├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    ├── blake2s.rs
    ├── ed25519.rs
    ├── ed448.rs
    ├── gf25519.rs
    ├── gf255e.rs
    ├── gf448.rs
    ├── gls254.rs
    ├── jq255e.rs
    ├── jq255s.rs
    ├── modint.rs
    ├── p256.rs
    ├── ristretto255.rs
    ├── sc448.rs
    ├── secp256k1.rs
    ├── util.rs
    ├── x25519.rs
    └── x448.rs
├── extra
    ├── frost-sample.rs
    ├── jq255-formulas.txt
    ├── mkuxcomp.sage
    └── truncsig.pdf
└── src
    ├── backend
        ├── mod.rs
        ├── w32
        │   ├── gf255.rs
        │   ├── gf448.rs
        │   ├── gfb254_m32.rs
        │   ├── gfgen.rs
        │   ├── gfsecp256k1.rs
        │   ├── lagrange.rs
        │   ├── mod.rs
        │   ├── modint.rs
        │   └── zz.rs
        └── w64
        │   ├── gf255_m51.rs
        │   ├── gf255_m64.rs
        │   ├── gf448.rs
        │   ├── gfb254_arm64pmull.rs
        │   ├── gfb254_m64.rs
        │   ├── gfb254_x86clmul.rs
        │   ├── gfgen.rs
        │   ├── gfp256.rs
        │   ├── gfsecp256k1.rs
        │   ├── lagrange.rs
        │   ├── mod.rs
        │   ├── modint.rs
        │   ├── modint32.rs
        │   ├── util32.rs
        │   ├── zz.rs
        │   └── zz32.rs
    ├── blake2s.rs
    ├── decaf448.rs
    ├── ed25519.rs
    ├── ed448.rs
    ├── field.rs
    ├── frost.rs
    ├── gls254.rs
    ├── jq255e.rs
    ├── jq255s.rs
    ├── lib.rs
    ├── lms.rs
    ├── p256.rs
    ├── ristretto255.rs
    ├── secp256k1.rs
    ├── sha2.rs
    ├── sha3.rs
    ├── x25519.rs
    └── x448.rs


/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [package]
  2 | name = "crrl"
  3 | version = "0.9.0"
  4 | authors = ["Thomas Pornin <thomas.pornin@nccgroup.com>"]
  5 | edition = "2018"
  6 | license = "MIT"
  7 | description = "Library for cryptographic research"
  8 | repository = "https://github.com/pornin/crrl"
  9 | readme = "README.md"
 10 | categories = ["cryptography", "no-std"]
 11 | keywords = ["cryptography", "ed25519", "ristretto255", "p256", "jq255e"]
 12 | exclude = ["extra/*"]
 13 | 
 14 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 15 | 
 16 | [dependencies]
 17 | rand_core = "0.6"
 18 | 
 19 | [dev-dependencies]
 20 | hex = "0.4.3"
 21 | num-bigint = "0.4.3"
 22 | 
 23 | # Set feature "w32_backend" or "w64_backend" to force use of the 32-bit or
 24 | # 64-bit backend explicitly. If neither is set then the backend will be
 25 | # selected automatically (architectures with 64-bit pointers get the 64-bit
 26 | # backend, while architectures with 32-bit pointers get the 32-bit backend).
 27 | # The "64-bit backend on 32-bit arch" combination might not be fully
 28 | # constant-time (depending on how the compiler implements multiplications
 29 | # on multi-limb integers) and thus should be avoided.
 30 | #
 31 | # Default feature 'std' enables uses of heap allocation, which is used by
 32 | # some functions. By disabling it, a core-only library can be obtained.
 33 | [features]
 34 | default = [ "std", "omnes" ]
 35 | std = [ "alloc" ]
 36 | alloc = []
 37 | w32_backend = []
 38 | w64_backend = []
 39 | gf255_m51 = []
 40 | gf255_m64 = []
 41 | gfb254_m64 = []
 42 | gfb254_x86clmul = []
 43 | gfb254_arm64pmull = []
 44 | omnes = [ "decaf448", "ed25519", "ed448", "frost", "jq255e", "jq255s", "lms", "p256", "ristretto255", "secp256k1", "gls254", "x25519", "x448", "modint256", "gf255", "gfgen" ]
 45 | decaf448 = [ "ed448" ]
 46 | ed25519 = [ "gf25519", "modint256" ]
 47 | ed448 = [ "gf448", "gfgen" ]
 48 | frost = [ "alloc" ]
 49 | jq255e = [ "gf255e", "modint256", "blake2s" ]
 50 | jq255s = [ "gf255s", "modint256", "blake2s" ]
 51 | lms = []
 52 | p256 = [ "gfp256", "modint256" ]
 53 | ristretto255 = [ "ed25519" ]
 54 | secp256k1 = [ "gfsecp256k1", "modint256" ]
 55 | gls254 = [ "gfb254", "modint256", "blake2s" ]
 56 | x25519 = [ "ed25519" ]
 57 | x448 = [ "ed448" ]
 58 | gfgen = []
 59 | gf255 = []
 60 | gf255e = []
 61 | gf255s = []
 62 | gf25519 = []
 63 | gfp256 = []
 64 | gfsecp256k1 = []
 65 | gf448 = []
 66 | modint256 = []
 67 | gfb254 = []
 68 | gls254bench = []
 69 | zz32 = []
 70 | zz64 = []
 71 | blake2s = []
 72 | 
 73 | [[bench]]
 74 | name = "modint"
 75 | path = "benches/modint.rs"
 76 | harness = false
 77 | required-features = [ "modint256" ]
 78 | 
 79 | [[bench]]
 80 | name = "gf255e"
 81 | path = "benches/gf255e.rs"
 82 | harness = false
 83 | required-features = [ "gf255e" ]
 84 | 
 85 | [[bench]]
 86 | name = "gf25519"
 87 | path = "benches/gf25519.rs"
 88 | harness = false
 89 | required-features = [ "gf25519" ]
 90 | 
 91 | [[bench]]
 92 | name = "ed25519"
 93 | path = "benches/ed25519.rs"
 94 | harness = false
 95 | required-features = [ "ed25519" ]
 96 | 
 97 | [[bench]]
 98 | name = "x25519"
 99 | path = "benches/x25519.rs"
100 | harness = false
101 | required-features = [ "x25519" ]
102 | 
103 | [[bench]]
104 | name = "p256"
105 | path = "benches/p256.rs"
106 | harness = false
107 | required-features = [ "p256" ]
108 | 
109 | [[bench]]
110 | name = "ristretto255"
111 | path = "benches/ristretto255.rs"
112 | harness = false
113 | required-features = [ "ristretto255" ]
114 | 
115 | [[bench]]
116 | name = "jq255e"
117 | path = "benches/jq255e.rs"
118 | harness = false
119 | required-features = [ "jq255e" ]
120 | 
121 | [[bench]]
122 | name = "jq255s"
123 | path = "benches/jq255s.rs"
124 | harness = false
125 | required-features = [ "jq255s" ]
126 | 
127 | [[bench]]
128 | name = "secp256k1"
129 | path = "benches/secp256k1.rs"
130 | harness = false
131 | required-features = [ "secp256k1" ]
132 | 
133 | [[bench]]
134 | name = "gls254"
135 | path = "benches/gls254.rs"
136 | harness = false
137 | required-features = [ "gls254" ]
138 | 
139 | [[bench]]
140 | name = "gf448"
141 | path = "benches/gf448.rs"
142 | harness = false
143 | required-features = [ "gf448" ]
144 | 
145 | [[bench]]
146 | name = "sc448"
147 | path = "benches/sc448.rs"
148 | harness = false
149 | required-features = [ "ed448" ]
150 | 
151 | [[bench]]
152 | name = "ed448"
153 | path = "benches/ed448.rs"
154 | harness = false
155 | required-features = [ "ed448" ]
156 | 
157 | [[bench]]
158 | name = "x448"
159 | path = "benches/x448.rs"
160 | harness = false
161 | required-features = [ "x448" ]
162 | 
163 | [[bench]]
164 | name = "blake2s"
165 | path = "benches/blake2s.rs"
166 | harness = false
167 | required-features = [ "blake2s" ]
168 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Thomas Pornin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # crrl
  2 | 
  3 | This library implements some primitives for purposes of cryptographic
  4 | research. Its point is to provide efficient, optimized and constant-time
  5 | implementations that are supposed to be representative of
  6 | production-ready code, so that realistic performance benchmarks may be
  7 | performed. Thus, while meant primarily for research, the code here
  8 | *should* be fine for production use (though of course I offer no such
  9 | guarantee; use at your own risks).
 10 | 
 11 | So far, only some primitives related to elliptic curve cryptography
 12 | are implemented:
 13 | 
 14 |   - A generic type `GF255<MQ>` for finite fields of integers modulo a
 15 |     prime 2^255-`MQ` (for a value of `MQ` between 1 and 32767). The `MQ`
 16 |     value is provided as a type parameter, i.e. the exact field is known
 17 |     at compile time. This type covers the usual modulus 2^255-19 (used
 18 |     in Curve25519) as well as 2^255-18651 and 2^255-3957 (used in
 19 |     [double-odd curves do255e and do255s](https://doubleodd.group/)).
 20 | 
 21 |   - A generic type `ModInt256<M0, M1, M2, M3>` for arbitrary finite
 22 |     fields of integers modulo a prime between 2^192 and 2^256.
 23 |     Montgomery representation is internally used. The modulus is
 24 |     provided as type parameters, allowing the compiler to apply
 25 |     optimizations when some parts of the modulus allow them (in
 26 |     particular with the modulus used for NIST curve P-256).
 27 | 
 28 |   - Type `GFsecp256k1` implements the specific base field for curve
 29 |     secp256k1 (integers modulo 2^256-4294968273). The 64-bit backend
 30 |     has a dedicated implementation, while the 32-bit version of this
 31 |     type uses `ModInt256`.
 32 | 
 33 |   - The macro `define_gfgen` allows defining arbitrary finite fields
 34 |     of integers modulo a prime, with a large range of modulus size.
 35 |     It uses Montgomery representation internally.
 36 | 
 37 |   - Type `GF448` implements the specific base field for Curve448.
 38 |     The 64-bit backend has a dedicated implementation, while the 32-bit
 39 |     backend uses `define_gfgen`.
 40 | 
 41 |   - Type `ed25519::Point` provides generic group operations in the
 42 |     twisted Edwards curve Curve25519. Ed25519 signatures (as per [RFC
 43 |     8032](https://datatracker.ietf.org/doc/html/rfc8032)) are
 44 |     implemented. Type `ed25519::Scalar` implements operations on
 45 |     integers modulo the curve subgroup order.
 46 | 
 47 |   - Type `ristretto255::Point` provides generic group operations in the
 48 |     [ristretto255 group](https://ristretto.group/), whose prime order is
 49 |     exactly the size of the interesting subgroup of Curve25519.
 50 | 
 51 |   - Type `ed448::Point` provides generic group operations in the
 52 |     Edwards curve edwards448. Ed448 signatures (as per [RFC
 53 |     8032](https://datatracker.ietf.org/doc/html/rfc8032)) are
 54 |     implemented. Type `ed448::Scalar` implements operations on
 55 |     integers modulo the curve subgroup order.
 56 | 
 57 |   - Type `decaf448::Point` provides generic group operations in the
 58 |     [decaf448 group](https://ristretto.group/), whose prime order is
 59 |     exactly the size of the interesting subgroup of Curve448.
 60 | 
 61 |   - Type `p256::Point` provides generic group operations in the NIST
 62 |     P-256 curve (aka "secp256r1" aka "prime256v1"). ECDSA signatures are
 63 |     supported. The `p256::Scalar` type implements the corresponding
 64 |     scalars (integers modulo the curve order).
 65 | 
 66 |   - Type `secp256k1::Point` provides generic group operations in the
 67 |     secp256k1 curve (aka "the Bitcoin curve"). ECDSA signatures are
 68 |     supported. The `secp256k1::Scalar` type implements the corresponding
 69 |     scalars (integers modulo the curve order). The GLV endomorphism is
 70 |     leveraged to speed-up point multiplication (key exchange) and
 71 |     signature verification.
 72 | 
 73 |   - Types `jq255e::Point` and `jq255s::Point` implement the
 74 |     [double-odd curves](https://doubleodd.group/) jq255e and jq255s
 75 |     (along with the corresponding scalar types `jq255e::Scalar` and
 76 |     `jq255s::Scalar`). Key exchange and Schnorr signatures are
 77 |     implemented. These curves provide a prime-order group abstraction,
 78 |     similar to ristretto255, but with somewhat better performance at the
 79 |     same security level. Moreover, the relevant signatures are both
 80 |     shorter (48 bytes instead of 64) and faster than the usual Ed25519
 81 |     signatures.
 82 | 
 83 |   - Function `x25519::x25519()` implements the
 84 |     [X25519 function](https://datatracker.ietf.org/doc/html/rfc7748#section-5).
 85 |     An optimized `x25519::x2559_base()` function is provided when X25519
 86 |     is applied to the conventional base point. Similarly, `x448::x448()`
 87 |     and `x448::x448_base()` provide the same functionality for the
 88 |     X448 function.
 89 | 
 90 |   - Type `gls254::Point` implements the GLS254 curve (or, more precisely,
 91 |     a prime-order group homomorphic to a subgroup of that curve), which is
 92 |     defined over a binary field. `gls254::Scalar` is the type for integers
 93 |     modulo the curve order. `gls254::PrivateKey` and `gls254:PublicKey`
 94 |     implement high-level operations such as key exchange and signatures,
 95 |     using that group.
 96 | 
 97 |   - Module `blake2s` contains some BLAKE2s implementations, with
 98 |     optional SSE2 and AVX2 optimizations.
 99 | 
100 | Types `GF255` and `ModInt256` have a 32-bit and a 64-bit implementations
101 | each (actually two 64-bit implementations, see later the discussion
102 | about the `gf255_m51` feature). The code is portable (it was tested on
103 | 32-bit and 64-bit x86, 64-bit aarch64, and 64-bit riscv64). Performance
104 | is quite decent; e.g. Ed25519 signatures are computed in about 51500
105 | cycles, and verified in about 114000 cycles, on an Intel "Coffee Lake"
106 | CPU; this is not too far from the best assembly-optimized
107 | implementations. At the same time, use of operator overloading allows to
108 | express formulas on points and scalar with about the same syntax as
109 | their mathematical description. For instance, the core of the X25519
110 | implementation looks like this:
111 | 
112 | ```
113 |         let A = x2 + z2;
114 |         let B = x2 - z2;
115 |         let AA = A.square();
116 |         let BB = B.square();
117 |         let C = x3 + z3;
118 |         let D = x3 - z3;
119 |         let E = AA - BB;
120 |         let DA = D * A;
121 |         let CB = C * B;
122 |         x3 = (DA + CB).square();
123 |         z3 = x1 * (DA - CB).square();
124 |         x2 = AA * BB;
125 |         z2 = E * (AA + E.mul_small(121665));
126 | ```
127 | 
128 | which is quite close to the corresponding description in RFC 7748:
129 | 
130 | ```
131 |        A = x_2 + z_2
132 |        AA = A^2
133 |        B = x_2 - z_2
134 |        BB = B^2
135 |        E = AA - BB
136 |        C = x_3 + z_3
137 |        D = x_3 - z_3
138 |        DA = D * A
139 |        CB = C * B
140 |        x_3 = (DA + CB)^2
141 |        z_3 = x_1 * (DA - CB)^2
142 |        x_2 = AA * BB
143 |        z_2 = E * (AA + a24 * E)
144 | ```
145 | 
146 | # Optional Features
147 | 
148 | By default, everything in crrl is compiled, which unfortunately makes for
149 | a relatively long compilation time, especially on not-so-fast systems.
150 | To only compile support for some primitives, use `--no-default-features`
151 | then add selectively the features you are interested in with `-F`; e.g.
152 | use `cargo build --no-default-features -F ed25519` to only compile the
153 | Ed25519 support (and the primitives that it needs, such as its base
154 | field). The defined primitive-controlling features are the following:
155 | 
156 |   - `omnes`: enables all of the following.
157 | 
158 |   - `decaf448`: decaf448 prime-order group (based on edwards448)
159 | 
160 |   - `ed25519`: edwards25519 curve and signatures (RFC 8032: Ed25519)
161 | 
162 |   - `ed448`: edwards448 curve and signatures (RFC 8032: Ed448)
163 |   
164 |   - `frost`: FROST threshold signatures (support macros + standard
165 |     ciphersuites, but only for the curves which are also enabled in
166 |     this build)
167 | 
168 |   - `jq255e`: jq255e prime-order group and signatures
169 | 
170 |   - `jq255s`: jq255s prime-order group and signatures
171 | 
172 |   - `lms`: LMS support (hash-based signatures)
173 | 
174 |   - `p256`: NIST P-256 curve and signatures (ECDSA)
175 | 
176 |   - `ristretto255`: ristretto255 prime-order group (based on edwards25519)
177 | 
178 |   - `secp256k1`: secp256k1 curve and signatures (ECDSA)
179 | 
180 |   - `x25519`: X25519 key exchange primitive (RFC 7748)
181 | 
182 |   - `x448`: X448 key exchange primitive (RFC 7748)
183 | 
184 |   - `modint256`: generic finite field implementation (prime order of up to
185 |     256 bits)
186 | 
187 |   - `gf255`: generic finite field implementation (for prime order
188 |     `q = 2^255 - MQ` with `MQ < 2^15`)
189 | 
190 |   - `gfgen`: generic finite field implementation (generating macro; prime
191 |     modulus of arbitrary length)
192 | 
193 |   - `gls254`: GLS254 prime-order group and signatures
194 | 
195 |   - `gls254bench`: additional benchmarking code for GLS254
196 | 
197 |   - `blake2s`: BLAKE2s hash function
198 | 
199 | Some operations have multiple backends. An appropriate backend is selected
200 | at compile-time, but this can be overridden by enabling some features:
201 | 
202 |   - `w32_backend`: enforce use of the 32-bit code, even on 64-bit systems.
203 | 
204 |   - `w64_backend`: enforce use of the 64-bit code, even on 32-bit systems.
205 | 
206 |   - `gf255_m64`: enforce use of 64-bit limbs for `GF255<MQ>`; this is the
207 |     default on 64-bit machines, except RISC-V (riscv64) where 51-bit
208 |     limbs are used by default. This feature has no effect if the 32-bit code
209 |     is used.
210 | 
211 |   - `gf255_m51`: encode use of 51-bit limbs for `GF255<MQ>`; this is the
212 |     default on 64-bit RISC-V targets (riscv64), but not on other 64-bit
213 |     architectures where 64-bit limbs are normally preferred. This feature
214 |     has no effect if the 32-bit code is used.
215 | 
216 |   - `gfb254_m64`: enforce use of the generic implementation of the
217 |     binary field GF(2^254). This feature has no effect if the 32-bit code
218 |     is used.
219 | 
220 |   - `gfb254_x86clmul`: enforce use of the AVX2+pclmulqdq implementation of
221 |     the binary field GF(2^254). This code is used automatically if the
222 |     compilation target is an x86 with the relevant hardware support; this
223 |     feature bypasses the automatic detection. This feature has no effect
224 |     if the 32-bit code is used.
225 | 
226 |   - `gfb254_arm64pmull`: enforce use of the NEON+pmull implementation of
227 |     the binary field GF(2^254). This code is used automatically if the
228 |     compilation target is an aarch64 system; this feature bypasses the
229 |     automatic detection. This feature has no effect if the 32-bit code
230 |     is used.
231 | 
232 | # Security and Compliance
233 | 
234 | All the code is strict, both in terms of timing-based side-channels
235 | (everything is constant-time, except if explicitly stated otherwise,
236 | e.g. in a function whose name includes `vartime`) and in compliance to
237 | relevant standards. For instance, the Ed25519 signature support applies
238 | and enforces canonical encodings of both points and scalars.
239 | 
240 | There is no attempt at "zeroizing memory" anywhere in the code. In
241 | general, such memory cleansing is a fool's quest. Note that since most
242 | of the library use `no_std` rules, dynamic allocation happens only on
243 | the stack, thereby limiting the risk of leaving secret information
244 | lingering all over the RAM. The only functions that use heap allocation
245 | only store public data there.
246 | 
247 | **WARNING:** I reiterate what was written above: although all of the
248 | code aims at being representative of optimized production-ready code, it
249 | is still fairly recent and some bugs might still lurk, however careful I
250 | am when writing code. Any assertion of suitability to any purpose is
251 | explcitly denied. The primary purpose is to help with "trying out stuff"
252 | in cryptographic research, by offering an easy-to-use API backed by
253 | performance close enough to what can be done in actual applications.
254 | 
255 | # Truncated Signatures
256 | 
257 | Support for truncated signatures is implemented for Ed25519 and
258 | ECDSA/P-256. Standard signatures can be shortened by 8 to 32 bits (i.e.
259 | the size may shrink from 64 down to 60 bytes), and the verifier rebuilds
260 | the original signature during verification (at some computational cost).
261 | This is not a ground-breaking feature, but it can be very convenient in
262 | some situations with tight constraints on bandwidth and a requirement to
263 | work with standard signature formats. See
264 | `ed25519::PublicKey::verify_trunc_raw()` and
265 | `p256::PublicKey::verify_trunc_hash()` for details.
266 | 
267 | # FROST Threshold Schnorr Signatures
268 | 
269 | The FROST protocol for a distributed Schnorr signature generation scheme
270 | has been implemented, as per the v14 draft specification:
271 | [draft-irtf-cfrg-frost-14](https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-frost-14).
272 | Four ciphersuites are provided, with similar APIs, in the
273 | `frost::ed25519`, `frost::ristretto255`, `frost::ed448`, `frost::p256` and
274 | `frost::secp256k1` modules. A sample code showing how to use the API is
275 | provided in the [frost-sample.rs](extra/frost-sample.rs) file.
276 | 
277 | While FROST is inherently a distributed scheme, the implementation can
278 | also be used in a single signer mode by using the "group" private key
279 | directly.
280 | 
281 | # Benchmarks
282 | 
283 | `cargo bench` runs some benchmarks, but there are a few caveats:
284 | 
285 |   - The cycle counter is used on x86. If frequency scaling ("TurboBoost")
286 |     is not disabled, then you'll get wrong and meaningless results.
287 | 
288 |   - On aarch64, the cycle counter is also accessed directly, which will
289 |     in general fail with some CPU exception. Access to the counter must
290 |     first be enabled, which requires (on Linux) a specific kernel
291 |     module. [This
292 |     one](https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0)
293 |     works for me.
294 | 
295 |   - On riscv64gc, the cycle counter is accessed directly. In general,
296 |     that counter is not enabled and all benches return zero; to enable
297 |     the cycle counter, run the benchmark binary inside the `perf`
298 |     tool (which comes with the `linux-tools`).
299 | 
300 |   - On architectures other than i386, x86-64, aarch64 and riscv64gc,
301 |     benchmark code will simply not compile.
302 | 
303 | # TODO
304 | 
305 | In general, about anything related to cryptography may show up here,
306 | if there is a use case for it.
307 | 


--------------------------------------------------------------------------------
/benches/blake2s.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | #![cfg(feature = "blake2s")]
 3 | 
 4 | mod util;
 5 | use util::core_cycles;
 6 | 
 7 | use crrl::blake2s::Blake2s256;
 8 | 
 9 | fn bench_blake2s_short() -> (f64, u8) {
10 |     let z = core_cycles();
11 |     let mut seed = [0u8; 32];
12 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
13 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
14 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
15 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
16 |     let mut tt = [0; 100];
17 |     let mut sh = Blake2s256::new();
18 |     for i in 0..(tt.len() + 1000) {
19 |         let begin = core_cycles();
20 |         for _ in 0..100 {
21 |             sh.update(&seed);
22 |             sh.finalize_reset_write(&mut seed);
23 |         }
24 |         let end = core_cycles();
25 |         if i >= 1000 {
26 |             tt[i - 1000] = end.wrapping_sub(begin);
27 |         }
28 |     }
29 |     tt.sort();
30 |     ((tt[tt.len() >> 1] as f64) / 100.0, seed[0])
31 | }
32 | 
33 | fn bench_blake2s_4096() -> (f64, u8) {
34 |     let z = core_cycles();
35 |     let mut sh = Blake2s256::new();
36 |     let mut buf = [0u8; 4096];
37 |     for i in 0..(buf.len() >> 5) {
38 |         sh.update(&z.to_le_bytes());
39 |         sh.update(&(i as u64).to_le_bytes());
40 |         sh.finalize_reset_write(&mut buf[(i << 5)..]);
41 |     }
42 |     let mut tt = [0; 100];
43 |     for i in 0..tt.len() {
44 |         let begin = core_cycles();
45 |         for _ in 0..(buf.len() >> 5) {
46 |             sh.update(&buf);
47 |             sh.finalize_reset_write(&mut buf[(i << 5)..]);
48 |         }
49 |         let end = core_cycles();
50 |         tt[i] = end.wrapping_sub(begin);
51 |     }
52 |     tt.sort();
53 |     ((tt[tt.len() >> 1] as f64) / ((buf.len() >> 5) as f64), buf[0])
54 | }
55 | 
56 | fn main() {
57 |     let mut bx = 0u8;
58 | 
59 |     let (v, x) = bench_blake2s_short();
60 |     bx ^= x;
61 |     println!("BLAKE2s (short):               {:13.2}", v);
62 |     let (v, x) = bench_blake2s_4096();
63 |     bx ^= x;
64 |     println!("BLAKE2s (4096 bytes):          {:13.2}", v);
65 | 
66 |     println!("{}", bx);
67 | }
68 | 


--------------------------------------------------------------------------------
/benches/ed25519.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | #![cfg(feature = "ed25519")]
  3 | 
  4 | mod util;
  5 | use util::core_cycles;
  6 | 
  7 | use crrl::ed25519::{PrivateKey, Point, Scalar};
  8 | use sha2::{Sha256, Digest};
  9 | 
 10 | fn bench_mulgen() -> (f64, u8) {
 11 |     let z = core_cycles();
 12 |     let mut seed = [0u8; 32];
 13 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 14 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 15 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 16 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 17 |     let mut s = Scalar::decode_reduce(&seed);
 18 |     let mut tt = [0; 100];
 19 |     for i in 0..tt.len() {
 20 |         let begin = core_cycles();
 21 |         for _ in 0..100 {
 22 |             let P = Point::mulgen(&s);
 23 |             if P.isneutral() != 0 {
 24 |                 s += Scalar::ZERO;
 25 |             } else {
 26 |                 s += Scalar::ONE;
 27 |             }
 28 |         }
 29 |         let end = core_cycles();
 30 |         tt[i] = end.wrapping_sub(begin);
 31 |     }
 32 |     tt.sort();
 33 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 34 | }
 35 | 
 36 | fn bench_mul() -> (f64, u8) {
 37 |     let z = core_cycles();
 38 |     let mut seed = [0u8; 32];
 39 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 40 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 41 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 42 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 43 |     let mut s = Scalar::decode_reduce(&seed);
 44 |     let mut P = Point::mulgen(&s);
 45 |     let mut tt = [0; 100];
 46 |     for i in 0..tt.len() {
 47 |         let begin = core_cycles();
 48 |         for _ in 0..100 {
 49 |             P *= s;
 50 |             if P.isneutral() != 0 {
 51 |                 s += Scalar::ZERO;
 52 |             } else {
 53 |                 s += Scalar::ONE;
 54 |             }
 55 |         }
 56 |         let end = core_cycles();
 57 |         tt[i] = end.wrapping_sub(begin);
 58 |     }
 59 |     tt.sort();
 60 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 61 | }
 62 | 
 63 | fn bench_mul_add_mulgen() -> (f64, u8) {
 64 |     let z = core_cycles();
 65 |     let mut uu = [Scalar::ZERO; 128];
 66 |     let mut vv = [Scalar::ZERO; 128];
 67 |     let mut sh = Sha256::new();
 68 |     for i in 0..128 {
 69 |         sh.update(z.to_le_bytes());
 70 |         sh.update(((2 * i + 0) as u64).to_le_bytes());
 71 |         let b1 = sh.finalize_reset();
 72 |         sh.update(z.to_le_bytes());
 73 |         sh.update(((2 * i + 1) as u64).to_le_bytes());
 74 |         let b2 = sh.finalize_reset();
 75 |         uu[i] = Scalar::decode_reduce(&b1);
 76 |         vv[i] = Scalar::decode_reduce(&b2);
 77 |     }
 78 |     let mut tt = [0; 100];
 79 |     let mut P = Point::mulgen(&uu[127]);
 80 |     for i in 0..tt.len() {
 81 |         let begin = core_cycles();
 82 |         for j in 0..128 {
 83 |             let ku = (i + j) & 127;
 84 |             let kv = i.wrapping_sub(j) & 127;
 85 |             let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]);
 86 |             P += Q;
 87 |         }
 88 |         let end = core_cycles();
 89 |         tt[i] = end.wrapping_sub(begin);
 90 |     }
 91 |     tt.sort();
 92 |     ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0])
 93 | }
 94 | 
 95 | fn bench_skey_load() -> (f64, u8) {
 96 |     let z = core_cycles();
 97 |     let mut seed = [0u8; 32];
 98 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
 99 |     let mut tt = [0; 100];
100 |     for i in 0..tt.len() {
101 |         let begin = core_cycles();
102 |         for _ in 0..100 {
103 |             let skey = PrivateKey::from_seed(&seed);
104 |             seed[..].copy_from_slice(&skey.public_key.encode());
105 |         }
106 |         let end = core_cycles();
107 |         tt[i] = end.wrapping_sub(begin);
108 |     }
109 |     tt.sort();
110 |     ((tt[tt.len() >> 1] as f64) / 100.0, seed[0])
111 | }
112 | 
113 | fn bench_skey_sign() -> (f64, u8) {
114 |     let z = core_cycles();
115 |     let mut seed = [0u8; 32];
116 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
117 |     let skey = PrivateKey::from_seed(&seed);
118 |     let mut tt = [0; 100];
119 |     let mut msg = [0u8; 32];
120 |     for i in 0..tt.len() {
121 |         let begin = core_cycles();
122 |         for _ in 0..100 {
123 |             let sig = skey.sign_raw(&msg);
124 |             msg[..].copy_from_slice(&sig[0..32]);
125 |         }
126 |         let end = core_cycles();
127 |         tt[i] = end.wrapping_sub(begin);
128 |     }
129 |     tt.sort();
130 |     ((tt[tt.len() >> 1] as f64) / 100.0, msg[0])
131 | }
132 | 
133 | fn bench_pkey_verify() -> (f64, u8) {
134 |     let z = core_cycles();
135 |     let mut seed = [0u8; 32];
136 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
137 |     let skey = PrivateKey::from_seed(&seed);
138 |     let pkey = skey.public_key;
139 |     let mut sigs = [[0u8; 64]; 128];
140 |     for i in 0..128 {
141 |         let msg = [i as u8; 32];
142 |         let sig = skey.sign_raw(&msg);
143 |         sigs[i][..].copy_from_slice(&sig);
144 |     }
145 |     let mut tt = [0; 100];
146 |     let mut msg = [0u8; 32];
147 |     for i in 0..tt.len() {
148 |         let begin = core_cycles();
149 |         for j in 0..128 {
150 |             let ff = pkey.verify_raw(&sigs[j], &msg);
151 |             sigs[j][40] ^= 1u8.wrapping_add(ff as u8);
152 |             msg[3] ^= 3u8.wrapping_sub(ff as u8);
153 |         }
154 |         let end = core_cycles();
155 |         tt[i] = end.wrapping_sub(begin);
156 |     }
157 |     tt.sort();
158 |     ((tt[tt.len() >> 1] as f64) / 128.0, msg[0])
159 | }
160 | 
161 | fn bench_decode() -> (f64, u8) {
162 |     let z = core_cycles();
163 |     let mut buf = [0u8; 32];
164 |     buf[ 0.. 8].copy_from_slice(&z.to_le_bytes());
165 |     buf[ 8..16].copy_from_slice(&z.to_le_bytes());
166 |     buf[16..24].copy_from_slice(&z.to_le_bytes());
167 |     buf[24..32].copy_from_slice(&z.to_le_bytes());
168 |     let mut tt = [0; 10];
169 |     let mut P = Point::NEUTRAL;
170 |     let Q = Point::BASE * z;
171 |     for i in 0..10 {
172 |         let begin = core_cycles();
173 |         for _ in 0..100 {
174 |             let r = P.set_decode(&buf);
175 |             buf[0] = buf[0].wrapping_add(1);
176 |             buf[1] = buf[1].wrapping_add(r as u8);
177 |             buf[2] = buf[2].wrapping_add(P.equals(Q) as u8);
178 |         }
179 |         let end = core_cycles();
180 |         tt[i] = end.wrapping_sub(begin);
181 |     }
182 |     tt.sort();
183 |     ((tt[4] as f64) / 100.0, buf[0])
184 | }
185 | 
186 | fn bench_encode() -> (f64, u8) {
187 |     let z = core_cycles();
188 |     let mut P = Point::BASE * z;
189 |     let mut tt = [0; 10];
190 |     for i in 0..10 {
191 |         let begin = core_cycles();
192 |         for _ in 0..100 {
193 |             let x = P.encode()[0];
194 |             if x & 1 == 0 {
195 |                 P = -P;
196 |             }
197 |         }
198 |         let end = core_cycles();
199 |         tt[i] = end.wrapping_sub(begin);
200 |     }
201 |     tt.sort();
202 |     ((tt[4] as f64) / 100.0, P.encode()[0])
203 | }
204 | 
205 | /*
206 |  * Old benchmark for the old is_in_subgroup() implementation.
207 | fn bench_subgroup_old() -> (f64, u8) {
208 |     let z = core_cycles();
209 |     let Q = Point::BASE * z;
210 |     let mut P = Point::NEUTRAL;
211 |     let mut tt = [0; 10];
212 |     for i in 0..10 {
213 |         let begin = core_cycles();
214 |         for _ in 0..100 {
215 |             let r = P.old_is_in_subgroup();
216 |             P.set_cond(&(P + Q), r);
217 |         }
218 |         let end = core_cycles();
219 |         tt[i] = end.wrapping_sub(begin);
220 |     }
221 |     tt.sort();
222 |     ((tt[4] as f64) / 100.0, P.encode()[0])
223 | }
224 |  */
225 | 
226 | fn bench_subgroup() -> (f64, u8) {
227 |     let z = core_cycles();
228 |     let Q = Point::BASE * z;
229 |     let mut P = Point::NEUTRAL;
230 |     let mut tt = [0; 10];
231 |     for i in 0..10 {
232 |         let begin = core_cycles();
233 |         for _ in 0..100 {
234 |             let r = P.is_in_subgroup();
235 |             P.set_cond(&(P + Q), r);
236 |         }
237 |         let end = core_cycles();
238 |         tt[i] = end.wrapping_sub(begin);
239 |     }
240 |     tt.sort();
241 |     ((tt[4] as f64) / 100.0, P.encode()[0])
242 | }
243 | 
244 | fn bench_pkey_verify_trunc(rm: usize) -> (f64, f64, u8) {
245 |     let z = core_cycles();
246 |     let mut seed = [0u8; 32];
247 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
248 |     let skey = PrivateKey::from_seed(&seed);
249 |     let pkey = skey.public_key;
250 |     let mut sigs = [[0u8; 64]; 256];
251 |     for i in 0..256 {
252 |         let msg = [i as u8; 32];
253 |         let sig = skey.sign_raw(&msg);
254 |         sigs[i][..].copy_from_slice(&sig);
255 |     }
256 |     let mut x = 0;
257 | 
258 |     // Phase 1: all signatures are correct.
259 |     let mut tt = [0; 2048];
260 |     for i in 0..tt.len() {
261 |         let msg = [i as u8; 32];
262 |         let begin = core_cycles();
263 |         x ^= (pkey.verify_trunc_raw(&sigs[i % 256], rm, &msg).is_some()) as u8;
264 |         let end = core_cycles();
265 |         tt[i] = end.wrapping_sub(begin);
266 |     }
267 |     tt.sort();
268 |     // Remove 10% slowest and 10% fastest, make an average of the rest.
269 |     let n10 = tt.len() / 10;
270 |     let n80 = tt.len() - 2 * n10;
271 |     let mut s = 0u64;
272 |     for i in n10..(tt.len() - n10) {
273 |         s += tt[i];
274 |     }
275 |     let res1 = (s as f64) / (n80 as f64);
276 | 
277 |     // Phase 2: all signatures are incorrect.
278 |     // We expect much lower variance in that case.
279 |     let mut tt = [0; 128];
280 |     for i in 0..tt.len() {
281 |         let msg = [(i + 1) as u8; 32];
282 |         let begin = core_cycles();
283 |         x ^= (pkey.verify_trunc_raw(&sigs[i % 256], rm, &msg).is_some()) as u8;
284 |         let end = core_cycles();
285 |         tt[i] = end.wrapping_sub(begin);
286 |     }
287 |     tt.sort();
288 |     // Remove 10% slowest and 10% fastest, make an average of the rest.
289 |     let n10 = tt.len() / 10;
290 |     let n80 = tt.len() - 2 * n10;
291 |     let mut s = 0u64;
292 |     for i in n10..(tt.len() - n10) {
293 |         s += tt[i];
294 |     }
295 |     let res2 = (s as f64) / (n80 as f64);
296 | 
297 |     (res1, res2, x)
298 | }
299 | 
300 | fn main() {
301 |     let mut bx = 0u8;
302 | 
303 |     let (v, x) = bench_mul();
304 |     bx ^= x;
305 |     println!("Ed25519 point mul:             {:13.2}", v);
306 |     let (v, x) = bench_mulgen();
307 |     bx ^= x;
308 |     println!("Ed25519 point mulgen:          {:13.2}", v);
309 |     let (v, x) = bench_mul_add_mulgen();
310 |     bx ^= x;
311 |     println!("Ed25519 point mul_add_mulgen:  {:13.2}", v);
312 |     let (v, x) = bench_skey_load();
313 |     bx ^= x;
314 |     println!("Ed25519 skey_load:             {:13.2}", v);
315 |     let (v, x) = bench_skey_sign();
316 |     bx ^= x;
317 |     println!("Ed25519 sign:                  {:13.2}", v);
318 |     let (v, x) = bench_pkey_verify();
319 |     bx ^= x;
320 |     println!("Ed25519 verify:                {:13.2}", v);
321 |     let (v, x) = bench_decode();
322 |     bx ^= x;
323 |     println!("Ed25519 decode:                {:13.2}", v);
324 |     let (v, x) = bench_encode();
325 |     bx ^= x;
326 |     println!("Ed25519 encode:                {:13.2}", v);
327 |     let (v, x) = bench_subgroup();
328 |     bx ^= x;
329 |     println!("Ed25519 subgroup:              {:13.2}", v);
330 | 
331 |     let (v1, v2, x) = bench_pkey_verify_trunc(8);
332 |     bx ^= x;
333 |     println!("Ed25519 verify_trunc8:         {:13.2}  {:13.2}", v1, v2);
334 |     let (v1, v2, x) = bench_pkey_verify_trunc(16);
335 |     bx ^= x;
336 |     println!("Ed25519 verify_trunc16:        {:13.2}  {:13.2}", v1, v2);
337 |     /*
338 |     let (v1, v2, x) = bench_pkey_verify_trunc(24);
339 |     bx ^= x;
340 |     println!("Ed25519 verify_trunc24:        {:13.2}  {:13.2}", v1, v2);
341 |     let (v1, v2, x) = bench_pkey_verify_trunc(28);
342 |     bx ^= x;
343 |     println!("Ed25519 verify_trunc28:        {:13.2}  {:13.2}", v1, v2);
344 |     let (v1, v2, x) = bench_pkey_verify_trunc(32);
345 |     bx ^= x;
346 |     println!("Ed25519 verify_trunc32:        {:13.2}  {:13.2}", v1, v2);
347 |     */
348 | 
349 |     println!("{}", bx);
350 | }
351 | 


--------------------------------------------------------------------------------
/benches/ed448.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | #![cfg(feature = "ed448")]
  3 | 
  4 | mod util;
  5 | use util::core_cycles;
  6 | 
  7 | use crrl::ed448::{PrivateKey, Point, Scalar};
  8 | use sha2::{Sha512, Digest};
  9 | 
 10 | fn bench_mulgen() -> (f64, u8) {
 11 |     let z = core_cycles();
 12 |     let mut seed = [0u8; 64];
 13 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 14 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 15 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 16 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 17 |     seed[32..40].copy_from_slice(&z.to_le_bytes());
 18 |     seed[40..48].copy_from_slice(&z.to_le_bytes());
 19 |     seed[48..56].copy_from_slice(&z.to_le_bytes());
 20 |     seed[56..64].copy_from_slice(&z.to_le_bytes());
 21 |     let mut s = Scalar::decode_reduce(&seed);
 22 |     let mut tt = [0; 100];
 23 |     for i in 0..tt.len() {
 24 |         let begin = core_cycles();
 25 |         for _ in 0..100 {
 26 |             let P = Point::mulgen(&s);
 27 |             if P.isneutral() != 0 {
 28 |                 s += Scalar::ZERO;
 29 |             } else {
 30 |                 s += Scalar::ONE;
 31 |             }
 32 |         }
 33 |         let end = core_cycles();
 34 |         tt[i] = end.wrapping_sub(begin);
 35 |     }
 36 |     tt.sort();
 37 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode()[0])
 38 | }
 39 | 
 40 | fn bench_mul() -> (f64, u8) {
 41 |     let z = core_cycles();
 42 |     let mut seed = [0u8; 64];
 43 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 44 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 45 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 46 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 47 |     seed[32..40].copy_from_slice(&z.to_le_bytes());
 48 |     seed[40..48].copy_from_slice(&z.to_le_bytes());
 49 |     seed[48..56].copy_from_slice(&z.to_le_bytes());
 50 |     seed[56..64].copy_from_slice(&z.to_le_bytes());
 51 |     let mut s = Scalar::decode_reduce(&seed);
 52 |     let mut P = Point::mulgen(&s);
 53 |     let mut tt = [0; 100];
 54 |     for i in 0..tt.len() {
 55 |         let begin = core_cycles();
 56 |         for _ in 0..100 {
 57 |             P *= s;
 58 |             if P.isneutral() != 0 {
 59 |                 s += Scalar::ZERO;
 60 |             } else {
 61 |                 s += Scalar::ONE;
 62 |             }
 63 |         }
 64 |         let end = core_cycles();
 65 |         tt[i] = end.wrapping_sub(begin);
 66 |     }
 67 |     tt.sort();
 68 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode()[0])
 69 | }
 70 | 
 71 | fn bench_mul_add_mulgen() -> (f64, u8) {
 72 |     let z = core_cycles();
 73 |     let mut uu = [Scalar::ZERO; 128];
 74 |     let mut vv = [Scalar::ZERO; 128];
 75 |     let mut sh = Sha512::new();
 76 |     for i in 0..128 {
 77 |         sh.update(z.to_le_bytes());
 78 |         sh.update(((2 * i + 0) as u64).to_le_bytes());
 79 |         let b1 = sh.finalize_reset();
 80 |         sh.update(z.to_le_bytes());
 81 |         sh.update(((2 * i + 1) as u64).to_le_bytes());
 82 |         let b2 = sh.finalize_reset();
 83 |         uu[i] = Scalar::decode_reduce(&b1);
 84 |         vv[i] = Scalar::decode_reduce(&b2);
 85 |     }
 86 |     let mut tt = [0; 100];
 87 |     let mut P = Point::mulgen(&uu[127]);
 88 |     for i in 0..tt.len() {
 89 |         let begin = core_cycles();
 90 |         for j in 0..128 {
 91 |             let ku = (i + j) & 127;
 92 |             let kv = i.wrapping_sub(j) & 127;
 93 |             let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]);
 94 |             P += Q;
 95 |         }
 96 |         let end = core_cycles();
 97 |         tt[i] = end.wrapping_sub(begin);
 98 |     }
 99 |     tt.sort();
100 |     ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0])
101 | }
102 | 
103 | fn bench_skey_load() -> (f64, u8) {
104 |     let z = core_cycles();
105 |     let mut seed = [0u8; 57];
106 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
107 |     let mut tt = [0; 100];
108 |     for i in 0..tt.len() {
109 |         let begin = core_cycles();
110 |         for _ in 0..100 {
111 |             let skey = PrivateKey::from_seed(&seed);
112 |             seed[..].copy_from_slice(&skey.public_key.encode());
113 |         }
114 |         let end = core_cycles();
115 |         tt[i] = end.wrapping_sub(begin);
116 |     }
117 |     tt.sort();
118 |     ((tt[tt.len() >> 1] as f64) / 100.0, seed[0])
119 | }
120 | 
121 | fn bench_skey_sign() -> (f64, u8) {
122 |     let z = core_cycles();
123 |     let mut seed = [0u8; 57];
124 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
125 |     let skey = PrivateKey::from_seed(&seed);
126 |     let mut tt = [0; 100];
127 |     let mut msg = [0u8; 32];
128 |     for i in 0..tt.len() {
129 |         let begin = core_cycles();
130 |         for _ in 0..100 {
131 |             let sig = skey.sign_raw(&msg);
132 |             msg[..].copy_from_slice(&sig[0..32]);
133 |         }
134 |         let end = core_cycles();
135 |         tt[i] = end.wrapping_sub(begin);
136 |     }
137 |     tt.sort();
138 |     ((tt[tt.len() >> 1] as f64) / 100.0, msg[0])
139 | }
140 | 
141 | fn bench_pkey_verify() -> (f64, u8) {
142 |     let z = core_cycles();
143 |     let mut seed = [0u8; 57];
144 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
145 |     let skey = PrivateKey::from_seed(&seed);
146 |     let pkey = skey.public_key;
147 |     let mut sigs = [[0u8; 114]; 128];
148 |     for i in 0..128 {
149 |         let msg = [i as u8; 32];
150 |         let sig = skey.sign_raw(&msg);
151 |         sigs[i][..].copy_from_slice(&sig);
152 |     }
153 |     let mut tt = [0; 100];
154 |     let mut msg = [0u8; 32];
155 |     for i in 0..tt.len() {
156 |         let begin = core_cycles();
157 |         for j in 0..128 {
158 |             let ff = pkey.verify_raw(&sigs[j], &msg);
159 |             sigs[j][40] ^= 1u8.wrapping_add(ff as u8);
160 |             msg[3] ^= 3u8.wrapping_sub(ff as u8);
161 |         }
162 |         let end = core_cycles();
163 |         tt[i] = end.wrapping_sub(begin);
164 |     }
165 |     tt.sort();
166 |     ((tt[tt.len() >> 1] as f64) / 128.0, msg[0])
167 | }
168 | 
169 | fn bench_decode() -> (f64, u8) {
170 |     let z = core_cycles();
171 |     let mut buf = [0u8; 57];
172 |     buf[ 0.. 8].copy_from_slice(&z.to_le_bytes());
173 |     buf[ 8..16].copy_from_slice(&z.to_le_bytes());
174 |     buf[16..24].copy_from_slice(&z.to_le_bytes());
175 |     buf[24..32].copy_from_slice(&z.to_le_bytes());
176 |     buf[32..40].copy_from_slice(&z.to_le_bytes());
177 |     buf[40..48].copy_from_slice(&z.to_le_bytes());
178 |     buf[48..56].copy_from_slice(&z.to_le_bytes());
179 |     buf[56] = z as u8;
180 |     let mut tt = [0; 10];
181 |     let mut P = Point::NEUTRAL;
182 |     let Q = Point::BASE * z;
183 |     for i in 0..10 {
184 |         let begin = core_cycles();
185 |         for _ in 0..100 {
186 |             let r = P.set_decode(&buf);
187 |             buf[0] = buf[0].wrapping_add(1);
188 |             buf[1] = buf[1].wrapping_add(r as u8);
189 |             buf[2] = buf[2].wrapping_add(P.equals(Q) as u8);
190 |         }
191 |         let end = core_cycles();
192 |         tt[i] = end.wrapping_sub(begin);
193 |     }
194 |     tt.sort();
195 |     ((tt[4] as f64) / 100.0, buf[0])
196 | }
197 | 
198 | fn bench_encode() -> (f64, u8) {
199 |     let z = core_cycles();
200 |     let mut P = Point::BASE * z;
201 |     let mut tt = [0; 10];
202 |     for i in 0..10 {
203 |         let begin = core_cycles();
204 |         for _ in 0..100 {
205 |             let x = P.encode()[0];
206 |             if x & 1 == 0 {
207 |                 P = -P;
208 |             }
209 |         }
210 |         let end = core_cycles();
211 |         tt[i] = end.wrapping_sub(begin);
212 |     }
213 |     tt.sort();
214 |     ((tt[4] as f64) / 100.0, P.encode()[0])
215 | }
216 | 
217 | /*
218 |  * Old benchmark for the old is_in_subgroup() implementation.
219 | fn bench_subgroup_old() -> (f64, u8) {
220 |     let z = core_cycles();
221 |     let Q = Point::BASE * z;
222 |     let mut P = Point::NEUTRAL;
223 |     let mut tt = [0; 10];
224 |     for i in 0..10 {
225 |         let begin = core_cycles();
226 |         for _ in 0..100 {
227 |             let r = P.old_is_in_subgroup();
228 |             P.set_cond(&(P + Q), r);
229 |         }
230 |         let end = core_cycles();
231 |         tt[i] = end.wrapping_sub(begin);
232 |     }
233 |     tt.sort();
234 |     ((tt[4] as f64) / 100.0, P.encode()[0])
235 | }
236 |  */
237 | 
238 | fn bench_subgroup() -> (f64, u8) {
239 |     let z = core_cycles();
240 |     let Q = Point::BASE * z;
241 |     let mut P = Point::NEUTRAL;
242 |     let mut tt = [0; 10];
243 |     for i in 0..10 {
244 |         let begin = core_cycles();
245 |         for _ in 0..100 {
246 |             let r = P.is_in_subgroup();
247 |             P.set_cond(&(P + Q), r);
248 |         }
249 |         let end = core_cycles();
250 |         tt[i] = end.wrapping_sub(begin);
251 |     }
252 |     tt.sort();
253 |     ((tt[4] as f64) / 100.0, P.encode()[0])
254 | }
255 | 
256 | fn main() {
257 |     let mut bx = 0u8;
258 | 
259 |     let (v, x) = bench_mul();
260 |     bx ^= x;
261 |     println!("Ed448 point mul:               {:13.2}", v);
262 |     let (v, x) = bench_mulgen();
263 |     bx ^= x;
264 |     println!("Ed448 point mulgen:            {:13.2}", v);
265 |     let (v, x) = bench_mul_add_mulgen();
266 |     bx ^= x;
267 |     println!("Ed448 point mul_add_mulgen:    {:13.2}", v);
268 |     let (v, x) = bench_skey_load();
269 |     bx ^= x;
270 |     println!("Ed448 skey_load:               {:13.2}", v);
271 |     let (v, x) = bench_skey_sign();
272 |     bx ^= x;
273 |     println!("Ed448 sign:                    {:13.2}", v);
274 |     let (v, x) = bench_pkey_verify();
275 |     bx ^= x;
276 |     println!("Ed448 verify:                  {:13.2}", v);
277 |     let (v, x) = bench_decode();
278 |     bx ^= x;
279 |     println!("Ed448 decode:                  {:13.2}", v);
280 |     let (v, x) = bench_encode();
281 |     bx ^= x;
282 |     println!("Ed448 encode:                  {:13.2}", v);
283 |     let (v, x) = bench_subgroup();
284 |     bx ^= x;
285 |     println!("Ed448 subgroup:                {:13.2}", v);
286 | 
287 |     println!("{}", bx);
288 | }
289 | 


--------------------------------------------------------------------------------
/benches/gf25519.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(feature = "gf25519")]
  2 | 
  3 | mod util;
  4 | use util::core_cycles;
  5 | 
  6 | use crrl::field::GF25519;
  7 | 
  8 | fn bench_gf25519_add() {
  9 |     let z = core_cycles();
 10 |     let mut x = GF25519::w64le(z, z.wrapping_mul(3),
 11 |         z.wrapping_mul(5), z.wrapping_mul(7));
 12 |     let mut y = x + GF25519::ONE;
 13 |     let mut tt = [0; 10];
 14 |     for i in 0..10 {
 15 |         let begin = core_cycles();
 16 |         for _ in 0..1000 {
 17 |             x += y;
 18 |             y += x;
 19 |             x += y;
 20 |             y += x;
 21 |             x += y;
 22 |             y += x;
 23 |         }
 24 |         let end = core_cycles();
 25 |         tt[i] = end.wrapping_sub(begin);
 26 |     }
 27 |     tt.sort();
 28 |     println!("GF25519 add:          {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 29 | }
 30 | 
 31 | fn bench_gf25519_sub() {
 32 |     let z = core_cycles();
 33 |     let mut x = GF25519::w64le(z, z.wrapping_mul(3),
 34 |         z.wrapping_mul(5), z.wrapping_mul(7));
 35 |     let mut y = x + GF25519::ONE;
 36 |     let mut tt = [0; 10];
 37 |     for i in 0..10 {
 38 |         let begin = core_cycles();
 39 |         for _ in 0..1000 {
 40 |             x -= y;
 41 |             y -= x;
 42 |             x -= y;
 43 |             y -= x;
 44 |             x -= y;
 45 |             y -= x;
 46 |         }
 47 |         let end = core_cycles();
 48 |         tt[i] = end.wrapping_sub(begin);
 49 |     }
 50 |     tt.sort();
 51 |     println!("GF25519 sub:          {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 52 | }
 53 | 
 54 | fn bench_gf25519_mul() {
 55 |     let z = core_cycles();
 56 |     let mut x = GF25519::w64le(z, z.wrapping_mul(3),
 57 |         z.wrapping_mul(5), z.wrapping_mul(7));
 58 |     let mut y = x + GF25519::ONE;
 59 |     let mut tt = [0; 10];
 60 |     for i in 0..10 {
 61 |         let begin = core_cycles();
 62 |         for _ in 0..1000 {
 63 |             x *= y;
 64 |             y *= x;
 65 |             x *= y;
 66 |             y *= x;
 67 |             x *= y;
 68 |             y *= x;
 69 |         }
 70 |         let end = core_cycles();
 71 |         tt[i] = end.wrapping_sub(begin);
 72 |     }
 73 |     tt.sort();
 74 |     println!("GF25519 mul:          {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 75 | }
 76 | 
 77 | fn bench_gf25519_square() {
 78 |     let z = core_cycles();
 79 |     let mut x = GF25519::w64le(z, z.wrapping_mul(3),
 80 |         z.wrapping_mul(5), z.wrapping_mul(7));
 81 |     let mut tt = [0; 10];
 82 |     for i in 0..10 {
 83 |         let begin = core_cycles();
 84 |         x = x.xsquare(6000);
 85 |         let end = core_cycles();
 86 |         tt[i] = end.wrapping_sub(begin);
 87 |     }
 88 |     tt.sort();
 89 |     println!("GF25519 square:       {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 90 | }
 91 | 
 92 | fn bench_gf25519_div() {
 93 |     let z = core_cycles();
 94 |     let mut x = GF25519::w64le(z, z.wrapping_mul(3),
 95 |         z.wrapping_mul(5), z.wrapping_mul(7));
 96 |     let mut y = x + GF25519::ONE;
 97 |     let mut tt = [0; 10];
 98 |     for i in 0..10 {
 99 |         let begin = core_cycles();
100 |         for _ in 0..1000 {
101 |             x /= y;
102 |             y /= x;
103 |             x /= y;
104 |             y /= x;
105 |             x /= y;
106 |             y /= x;
107 |         }
108 |         let end = core_cycles();
109 |         tt[i] = end.wrapping_sub(begin);
110 |     }
111 |     tt.sort();
112 |     println!("GF25519 div:          {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
113 | }
114 | 
115 | fn bench_gf25519_sqrt() {
116 |     let z = core_cycles();
117 |     let mut x = GF25519::w64le(z, z.wrapping_mul(3),
118 |         z.wrapping_mul(5), z.wrapping_mul(7));
119 |     let mut tt = [0; 10];
120 |     for i in 0..10 {
121 |         let begin = core_cycles();
122 |         for _ in 0..6000 {
123 |             let (x2, _) = x.sqrt();
124 |             x = x2 + GF25519::ONE;
125 |         }
126 |         let end = core_cycles();
127 |         tt[i] = end.wrapping_sub(begin);
128 |     }
129 |     tt.sort();
130 |     println!("GF25519 sqrt:         {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
131 | }
132 | 
133 | fn bench_gf25519_legendre() {
134 |     let z = core_cycles();
135 |     let mut x = GF25519::w64le(z, z.wrapping_mul(3),
136 |         z.wrapping_mul(5), z.wrapping_mul(7));
137 |     let mut tt = [0; 10];
138 |     for i in 0..10 {
139 |         let begin = core_cycles();
140 |         for _ in 0..6000 {
141 |             let ls = x.legendre();
142 |             x += GF25519::w64le(ls as u64, ls as u64, ls as u64, ls as u64);
143 |         }
144 |         let end = core_cycles();
145 |         tt[i] = end.wrapping_sub(begin);
146 |     }
147 |     tt.sort();
148 |     println!("GF25519 legendre:     {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
149 | }
150 | 
151 | fn main() {
152 |     bench_gf25519_add();
153 |     bench_gf25519_sub();
154 |     bench_gf25519_mul();
155 |     bench_gf25519_square();
156 |     bench_gf25519_div();
157 |     bench_gf25519_sqrt();
158 |     bench_gf25519_legendre();
159 | }
160 | 


--------------------------------------------------------------------------------
/benches/gf255e.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(feature = "gf255e")]
  2 | 
  3 | mod util;
  4 | use util::core_cycles;
  5 | 
  6 | use crrl::field::GF255e;
  7 | 
  8 | fn bench_gf255e_add() {
  9 |     let z = core_cycles();
 10 |     let mut x = GF255e::w64le(z, z.wrapping_mul(3),
 11 |         z.wrapping_mul(5), z.wrapping_mul(7));
 12 |     let mut y = x + GF255e::ONE;
 13 |     let mut tt = [0; 10];
 14 |     for i in 0..10 {
 15 |         let begin = core_cycles();
 16 |         for _ in 0..1000 {
 17 |             x += y;
 18 |             y += x;
 19 |             x += y;
 20 |             y += x;
 21 |             x += y;
 22 |             y += x;
 23 |         }
 24 |         let end = core_cycles();
 25 |         tt[i] = end.wrapping_sub(begin);
 26 |     }
 27 |     tt.sort();
 28 |     println!("GF255e add:           {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 29 | }
 30 | 
 31 | fn bench_gf255e_sub() {
 32 |     let z = core_cycles();
 33 |     let mut x = GF255e::w64le(z, z.wrapping_mul(3),
 34 |         z.wrapping_mul(5), z.wrapping_mul(7));
 35 |     let mut y = x + GF255e::ONE;
 36 |     let mut tt = [0; 10];
 37 |     for i in 0..10 {
 38 |         let begin = core_cycles();
 39 |         for _ in 0..1000 {
 40 |             x -= y;
 41 |             y -= x;
 42 |             x -= y;
 43 |             y -= x;
 44 |             x -= y;
 45 |             y -= x;
 46 |         }
 47 |         let end = core_cycles();
 48 |         tt[i] = end.wrapping_sub(begin);
 49 |     }
 50 |     tt.sort();
 51 |     println!("GF255e sub:           {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 52 | }
 53 | 
 54 | fn bench_gf255e_mul() {
 55 |     let z = core_cycles();
 56 |     let mut x = GF255e::w64le(z, z.wrapping_mul(3),
 57 |         z.wrapping_mul(5), z.wrapping_mul(7));
 58 |     let mut y = x + GF255e::ONE;
 59 |     let mut tt = [0; 10];
 60 |     for i in 0..10 {
 61 |         let begin = core_cycles();
 62 |         for _ in 0..1000 {
 63 |             x *= y;
 64 |             y *= x;
 65 |             x *= y;
 66 |             y *= x;
 67 |             x *= y;
 68 |             y *= x;
 69 |         }
 70 |         let end = core_cycles();
 71 |         tt[i] = end.wrapping_sub(begin);
 72 |     }
 73 |     tt.sort();
 74 |     println!("GF255e mul:           {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 75 | }
 76 | 
 77 | fn bench_gf255e_square() {
 78 |     let z = core_cycles();
 79 |     let mut x = GF255e::w64le(z, z.wrapping_mul(3),
 80 |         z.wrapping_mul(5), z.wrapping_mul(7));
 81 |     let mut tt = [0; 10];
 82 |     for i in 0..10 {
 83 |         let begin = core_cycles();
 84 |         x = x.xsquare(6000);
 85 |         let end = core_cycles();
 86 |         tt[i] = end.wrapping_sub(begin);
 87 |     }
 88 |     tt.sort();
 89 |     println!("GF255e square:        {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
 90 | }
 91 | 
 92 | fn bench_gf255e_div() {
 93 |     let z = core_cycles();
 94 |     let mut x = GF255e::w64le(z, z.wrapping_mul(3),
 95 |         z.wrapping_mul(5), z.wrapping_mul(7));
 96 |     let mut y = x + GF255e::ONE;
 97 |     let mut tt = [0; 10];
 98 |     for i in 0..10 {
 99 |         let begin = core_cycles();
100 |         for _ in 0..1000 {
101 |             x /= y;
102 |             y /= x;
103 |             x /= y;
104 |             y /= x;
105 |             x /= y;
106 |             y /= x;
107 |         }
108 |         let end = core_cycles();
109 |         tt[i] = end.wrapping_sub(begin);
110 |     }
111 |     tt.sort();
112 |     println!("GF255e div:           {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
113 | }
114 | 
115 | fn bench_gf255e_sqrt() {
116 |     let z = core_cycles();
117 |     let mut x = GF255e::w64le(z, z.wrapping_mul(3),
118 |         z.wrapping_mul(5), z.wrapping_mul(7));
119 |     let mut tt = [0; 10];
120 |     for i in 0..10 {
121 |         let begin = core_cycles();
122 |         for _ in 0..6000 {
123 |             let (x2, _) = x.sqrt();
124 |             x = x2 + GF255e::ONE;
125 |         }
126 |         let end = core_cycles();
127 |         tt[i] = end.wrapping_sub(begin);
128 |     }
129 |     tt.sort();
130 |     println!("GF255e sqrt:          {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
131 | }
132 | 
133 | fn bench_gf255e_legendre() {
134 |     let z = core_cycles();
135 |     let mut x = GF255e::w64le(z, z.wrapping_mul(3),
136 |         z.wrapping_mul(5), z.wrapping_mul(7));
137 |     let mut tt = [0; 10];
138 |     for i in 0..10 {
139 |         let begin = core_cycles();
140 |         for _ in 0..6000 {
141 |             let ls = x.legendre();
142 |             x += GF255e::w64le(ls as u64, ls as u64, ls as u64, ls as u64);
143 |         }
144 |         let end = core_cycles();
145 |         tt[i] = end.wrapping_sub(begin);
146 |     }
147 |     tt.sort();
148 |     println!("GF255e legendre:      {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]);
149 | }
150 | 
151 | fn main() {
152 |     bench_gf255e_add();
153 |     bench_gf255e_sub();
154 |     bench_gf255e_mul();
155 |     bench_gf255e_square();
156 |     bench_gf255e_div();
157 |     bench_gf255e_sqrt();
158 |     bench_gf255e_legendre();
159 | }
160 | 


--------------------------------------------------------------------------------
/benches/gf448.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(feature = "gf448")]
  2 | 
  3 | mod util;
  4 | use util::core_cycles;
  5 | 
  6 | use crrl::field::GF448;
  7 | 
  8 | fn bench_gf448_add() {
  9 |     let z = core_cycles();
 10 |     let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 11 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 12 |         z.wrapping_mul(13) ]);
 13 |     let mut y = x + GF448::ONE;
 14 |     let mut tt = [0; 10];
 15 |     for i in 0..30 {
 16 |         let begin = core_cycles();
 17 |         for _ in 0..1000 {
 18 |             x += y;
 19 |             y += x;
 20 |             x += y;
 21 |             y += x;
 22 |             x += y;
 23 |             y += x;
 24 |         }
 25 |         let end = core_cycles();
 26 |         if i >= 20 {
 27 |             tt[i - 20] = end.wrapping_sub(begin);
 28 |         }
 29 |     }
 30 |     tt.sort();
 31 |     println!("GF448 add:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
 32 | }
 33 | 
 34 | fn bench_gf448_sub() {
 35 |     let z = core_cycles();
 36 |     let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 37 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 38 |         z.wrapping_mul(13) ]);
 39 |     let mut y = x + GF448::ONE;
 40 |     let mut tt = [0; 10];
 41 |     for i in 0..30 {
 42 |         let begin = core_cycles();
 43 |         for _ in 0..1000 {
 44 |             x -= y;
 45 |             y -= x;
 46 |             x -= y;
 47 |             y -= x;
 48 |             x -= y;
 49 |             y -= x;
 50 |         }
 51 |         let end = core_cycles();
 52 |         if i >= 20 {
 53 |             tt[i - 20] = end.wrapping_sub(begin);
 54 |         }
 55 |     }
 56 |     tt.sort();
 57 |     println!("GF448 sub:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
 58 | }
 59 | 
 60 | fn bench_gf448_mul() {
 61 |     let z = core_cycles();
 62 |     let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 63 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 64 |         z.wrapping_mul(13) ]);
 65 |     let mut y = x + GF448::ONE;
 66 |     let mut tt = [0; 10];
 67 |     for i in 0..30 {
 68 |         let begin = core_cycles();
 69 |         for _ in 0..1000 {
 70 |             x *= y;
 71 |             y *= x;
 72 |             x *= y;
 73 |             y *= x;
 74 |             x *= y;
 75 |             y *= x;
 76 |         }
 77 |         let end = core_cycles();
 78 |         if i >= 20 {
 79 |             tt[i - 20] = end.wrapping_sub(begin);
 80 |         }
 81 |     }
 82 |     tt.sort();
 83 |     println!("GF448 mul:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
 84 | }
 85 | 
 86 | fn bench_gf448_square() {
 87 |     let z = core_cycles();
 88 |     let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 89 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 90 |         z.wrapping_mul(13) ]);
 91 |     let mut tt = [0; 10];
 92 |     for i in 0..30 {
 93 |         let begin = core_cycles();
 94 |         x = x.xsquare(6000);
 95 |         let end = core_cycles();
 96 |         if i >= 20 {
 97 |             tt[i - 20] = end.wrapping_sub(begin);
 98 |         }
 99 |     }
100 |     tt.sort();
101 |     println!("GF448 square:         {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
102 | }
103 | 
104 | fn bench_gf448_div() {
105 |     let z = core_cycles();
106 |     let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
107 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
108 |         z.wrapping_mul(13) ]);
109 |     let mut y = x + GF448::ONE;
110 |     let mut tt = [0; 10];
111 |     for i in 0..30 {
112 |         let begin = core_cycles();
113 |         for _ in 0..1000 {
114 |             x /= y;
115 |             y /= x;
116 |             x /= y;
117 |             y /= x;
118 |             x /= y;
119 |             y /= x;
120 |         }
121 |         let end = core_cycles();
122 |         if i >= 20 {
123 |             tt[i - 20] = end.wrapping_sub(begin);
124 |         }
125 |     }
126 |     tt.sort();
127 |     println!("GF448 div:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
128 | }
129 | 
130 | fn bench_gf448_sqrt() {
131 |     let z = core_cycles();
132 |     let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
133 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
134 |         z.wrapping_mul(13) ]);
135 |     let mut tt = [0; 10];
136 |     for i in 0..30 {
137 |         let begin = core_cycles();
138 |         for _ in 0..6000 {
139 |             let (x2, _) = x.sqrt();
140 |             x += x2 + GF448::ONE;
141 |         }
142 |         let end = core_cycles();
143 |         if i >= 20 {
144 |             tt[i - 20] = end.wrapping_sub(begin);
145 |         }
146 |     }
147 |     tt.sort();
148 |     println!("GF448 sqrt:           {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
149 | }
150 | 
151 | fn bench_gf448_legendre() {
152 |     let z = core_cycles();
153 |     let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
154 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
155 |         z.wrapping_mul(13) ]);
156 |     let mut tt = [0; 10];
157 |     for i in 0..30 {
158 |         let begin = core_cycles();
159 |         for _ in 0..6000 {
160 |             let ls = x.legendre();
161 |             let ls2 = ls as u64;
162 |             x += GF448::w64le([ ls2, ls2, ls2, ls2, ls2, ls2, ls2 ]);
163 |         }
164 |         let end = core_cycles();
165 |         if i >= 20 {
166 |             tt[i - 20] = end.wrapping_sub(begin);
167 |         }
168 |     }
169 |     tt.sort();
170 |     println!("GF448 legendre:       {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
171 | }
172 | 
173 | fn main() {
174 |     bench_gf448_add();
175 |     bench_gf448_sub();
176 |     bench_gf448_mul();
177 |     bench_gf448_square();
178 |     bench_gf448_div();
179 |     bench_gf448_sqrt();
180 |     bench_gf448_legendre();
181 | 
182 |     /*
183 |     bench_fiat_add();
184 |     bench_fiat_sub();
185 |     bench_fiat_mul();
186 |     bench_fiat_square();
187 |     */
188 | }
189 | 
190 | /*
191 | extern crate fiat_crypto;
192 | use fiat_crypto::p448_solinas_64::*;
193 | 
194 | fn bench_fiat_add() {
195 |     let z = core_cycles();
196 |     let mut x: fiat_p448_tight_field_element = [
197 |         z & 0x00FFFFFFFFFFFFFF,
198 |         z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF,
199 |         z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF,
200 |         z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF,
201 |         z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF,
202 |         z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF,
203 |         z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF,
204 |         z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF,
205 |     ];
206 |     let mut y = x;
207 |     y[0] += 1;
208 |     let mut tt = [0; 10];
209 |     for i in 0..30 {
210 |         let mut z: fiat_p448_loose_field_element = [0u64; 8];
211 |         let begin = core_cycles();
212 |         for _ in 0..1000 {
213 |             fiat_p448_add(&mut z, &x, &y); fiat_p448_carry(&mut x, &z);
214 |             fiat_p448_add(&mut z, &y, &x); fiat_p448_carry(&mut y, &z);
215 |             fiat_p448_add(&mut z, &x, &y); fiat_p448_carry(&mut x, &z);
216 |             fiat_p448_add(&mut z, &y, &x); fiat_p448_carry(&mut y, &z);
217 |             fiat_p448_add(&mut z, &x, &y); fiat_p448_carry(&mut x, &z);
218 |             fiat_p448_add(&mut z, &y, &x); fiat_p448_carry(&mut y, &z);
219 |         }
220 |         let end = core_cycles();
221 |         if i >= 20 {
222 |             tt[i - 20] = end.wrapping_sub(begin);
223 |         }
224 |     }
225 |     tt.sort();
226 |     println!("fc448 add:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x[0] as u8);
227 | }
228 | 
229 | fn bench_fiat_sub() {
230 |     let z = core_cycles();
231 |     let mut x: fiat_p448_tight_field_element = [
232 |         z & 0x00FFFFFFFFFFFFFF,
233 |         z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF,
234 |         z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF,
235 |         z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF,
236 |         z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF,
237 |         z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF,
238 |         z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF,
239 |         z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF,
240 |     ];
241 |     let mut y = x;
242 |     y[0] += 1;
243 |     let mut tt = [0; 10];
244 |     for i in 0..30 {
245 |         let mut z: fiat_p448_loose_field_element = [0u64; 8];
246 |         let begin = core_cycles();
247 |         for _ in 0..1000 {
248 |             fiat_p448_sub(&mut z, &x, &y); fiat_p448_carry(&mut x, &z);
249 |             fiat_p448_sub(&mut z, &y, &x); fiat_p448_carry(&mut y, &z);
250 |             fiat_p448_sub(&mut z, &x, &y); fiat_p448_carry(&mut x, &z);
251 |             fiat_p448_sub(&mut z, &y, &x); fiat_p448_carry(&mut y, &z);
252 |             fiat_p448_sub(&mut z, &x, &y); fiat_p448_carry(&mut x, &z);
253 |             fiat_p448_sub(&mut z, &y, &x); fiat_p448_carry(&mut y, &z);
254 |         }
255 |         let end = core_cycles();
256 |         if i >= 20 {
257 |             tt[i - 20] = end.wrapping_sub(begin);
258 |         }
259 |     }
260 |     tt.sort();
261 |     println!("fc448 sub:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x[0] as u8);
262 | }
263 | 
264 | fn bench_fiat_mul() {
265 |     let z = core_cycles();
266 |     let mut x: fiat_p448_loose_field_element = [
267 |         z & 0x00FFFFFFFFFFFFFF,
268 |         z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF,
269 |         z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF,
270 |         z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF,
271 |         z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF,
272 |         z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF,
273 |         z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF,
274 |         z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF,
275 |     ];
276 |     let mut y = x;
277 |     y[0] += 1;
278 |     let mut tt = [0; 10];
279 |     for i in 0..30 {
280 |         let mut z: fiat_p448_tight_field_element = [0u64; 8];
281 |         let begin = core_cycles();
282 |         for _ in 0..1000 {
283 |             fiat_p448_carry_mul(&mut z, &x, &y); fiat_p448_relax(&mut x, &z);
284 |             fiat_p448_carry_mul(&mut z, &y, &x); fiat_p448_relax(&mut y, &z);
285 |             fiat_p448_carry_mul(&mut z, &x, &y); fiat_p448_relax(&mut x, &z);
286 |             fiat_p448_carry_mul(&mut z, &y, &x); fiat_p448_relax(&mut y, &z);
287 |             fiat_p448_carry_mul(&mut z, &x, &y); fiat_p448_relax(&mut x, &z);
288 |             fiat_p448_carry_mul(&mut z, &y, &x); fiat_p448_relax(&mut y, &z);
289 |         }
290 |         let end = core_cycles();
291 |         if i >= 20 {
292 |             tt[i - 20] = end.wrapping_sub(begin);
293 |         }
294 |     }
295 |     tt.sort();
296 |     println!("fc448 mul:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x[0] as u8);
297 | }
298 | 
299 | fn bench_fiat_square() {
300 |     let z = core_cycles();
301 |     let mut x: fiat_p448_loose_field_element = [
302 |         z & 0x00FFFFFFFFFFFFFF,
303 |         z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF,
304 |         z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF,
305 |         z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF,
306 |         z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF,
307 |         z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF,
308 |         z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF,
309 |         z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF,
310 |     ];
311 |     let mut tt = [0; 10];
312 |     for i in 0..30 {
313 |         let mut z: fiat_p448_tight_field_element = [0u64; 8];
314 |         let begin = core_cycles();
315 |         for _ in 0..1000 {
316 |             fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z);
317 |             fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z);
318 |             fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z);
319 |             fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z);
320 |             fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z);
321 |             fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z);
322 |         }
323 |         let end = core_cycles();
324 |         if i >= 20 {
325 |             tt[i - 20] = end.wrapping_sub(begin);
326 |         }
327 |     }
328 |     tt.sort();
329 |     println!("fc448 square:         {:11.2}  ({})", (tt[4] as f64) / 6000.0, x[0] as u8);
330 | }
331 | */
332 | 


--------------------------------------------------------------------------------
/benches/gls254.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | #![cfg(feature = "gls254")]
  3 | 
  4 | mod util;
  5 | use util::core_cycles;
  6 | 
  7 | use crrl::gls254::{Point, Scalar, PrivateKey};
  8 | use sha2::{Sha256, Digest};
  9 | 
 10 | fn bench_mulgen() -> (f64, u8) {
 11 |     let z = core_cycles();
 12 |     let mut seed = [0u8; 32];
 13 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 14 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 15 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 16 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 17 |     let mut s = Scalar::decode_reduce(&seed);
 18 |     let mut tt = [0; 100];
 19 |     for i in 0..tt.len() {
 20 |         let begin = core_cycles();
 21 |         for _ in 0..100 {
 22 |             let P = Point::mulgen(&s);
 23 |             if P.isneutral() != 0 {
 24 |                 s += Scalar::ZERO;
 25 |             } else {
 26 |                 s += Scalar::ONE;
 27 |             }
 28 |         }
 29 |         let end = core_cycles();
 30 |         tt[i] = end.wrapping_sub(begin);
 31 |     }
 32 |     tt.sort();
 33 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 34 | }
 35 | 
 36 | fn bench_mul() -> (f64, u8) {
 37 |     let z = core_cycles();
 38 |     let mut seed = [0u8; 32];
 39 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 40 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 41 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 42 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 43 |     let mut s = Scalar::decode_reduce(&seed);
 44 |     let mut P = Point::mulgen(&s);
 45 |     let mut tt = [0; 100];
 46 |     for i in 0..tt.len() {
 47 |         let begin = core_cycles();
 48 |         for _ in 0..100 {
 49 |             P *= s;
 50 |             if P.isneutral() != 0 {
 51 |                 s += Scalar::ZERO;
 52 |             } else {
 53 |                 s += Scalar::ONE;
 54 |             }
 55 |         }
 56 |         let end = core_cycles();
 57 |         tt[i] = end.wrapping_sub(begin);
 58 |     }
 59 |     tt.sort();
 60 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 61 | }
 62 | 
 63 | fn bench_skey_load() -> (f64, u8) {
 64 |     let z = core_cycles();
 65 |     let mut seed = [0u8; 32];
 66 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
 67 |     let mut tt = [0; 100];
 68 |     for i in 0..tt.len() {
 69 |         let begin = core_cycles();
 70 |         for _ in 0..100 {
 71 |             let skey = PrivateKey::decode(&seed).unwrap();
 72 |             seed[..].copy_from_slice(&skey.public_key.encode());
 73 |             seed[31] &= 0x1Fu8;
 74 |         }
 75 |         let end = core_cycles();
 76 |         tt[i] = end.wrapping_sub(begin);
 77 |     }
 78 |     tt.sort();
 79 |     ((tt[tt.len() >> 1] as f64) / 100.0, seed[0])
 80 | }
 81 | 
 82 | fn bench_skey_sign() -> (f64, u8) {
 83 |     let z = core_cycles();
 84 |     let mut seed = [0u8; 32];
 85 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
 86 |     let mut sh = Sha256::new();
 87 |     sh.update(&seed);
 88 |     seed[..].copy_from_slice(&sh.finalize());
 89 |     seed[31] &= 0x1Fu8;
 90 |     let skey = PrivateKey::decode(&seed).unwrap();
 91 |     let mut tt = [0; 100];
 92 |     let mut msg = [0u8; 32];
 93 |     for i in 0..tt.len() {
 94 |         let begin = core_cycles();
 95 |         for _ in 0..100 {
 96 |             let sig = skey.sign("", &msg);
 97 |             msg[..].copy_from_slice(&sig[0..32]);
 98 |         }
 99 |         let end = core_cycles();
100 |         tt[i] = end.wrapping_sub(begin);
101 |     }
102 |     tt.sort();
103 |     ((tt[tt.len() >> 1] as f64) / 100.0, msg[0])
104 | }
105 | 
106 | fn bench_pkey_verify() -> (f64, u8) {
107 |     let z = core_cycles();
108 |     let mut seed = [0u8; 32];
109 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
110 |     let mut sh = Sha256::new();
111 |     sh.update(&seed);
112 |     seed[..].copy_from_slice(&sh.finalize());
113 |     seed[31] &= 0x1Fu8;
114 |     let skey = PrivateKey::decode(&seed).unwrap();
115 |     let pkey = skey.public_key;
116 |     let mut sigs = [[0u8; 48]; 128];
117 |     for i in 0..128 {
118 |         let msg = [i as u8; 32];
119 |         let sig = skey.sign("", &msg);
120 |         sigs[i][..].copy_from_slice(&sig);
121 |     }
122 |     let mut tt = [0; 100];
123 |     let mut msg = [0u8; 32];
124 |     for i in 0..tt.len() {
125 |         let begin = core_cycles();
126 |         for j in 0..128 {
127 |             let ff = pkey.verify(&sigs[j], "", &msg);
128 |             sigs[j][40] ^= 1u8.wrapping_add(ff as u8);
129 |             msg[3] ^= 3u8.wrapping_sub(ff as u8);
130 |         }
131 |         let end = core_cycles();
132 |         tt[i] = end.wrapping_sub(begin);
133 |     }
134 |     tt.sort();
135 |     ((tt[tt.len() >> 1] as f64) / 128.0, msg[0])
136 | }
137 | 
138 | fn bench_decode() -> (f64, u8) {
139 |     let z = core_cycles();
140 |     let mut buf = [0u8; 32];
141 |     buf[ 0.. 8].copy_from_slice(&z.to_le_bytes());
142 |     buf[ 8..16].copy_from_slice(&z.to_le_bytes());
143 |     buf[16..24].copy_from_slice(&z.to_le_bytes());
144 |     buf[24..32].copy_from_slice(&z.to_le_bytes());
145 |     let mut tt = [0; 10];
146 |     let mut P = Point::NEUTRAL;
147 |     let Q = Point::BASE * z;
148 |     for i in 0..10 {
149 |         let begin = core_cycles();
150 |         for _ in 0..100 {
151 |             let r = P.set_decode(&buf);
152 |             buf[0] = buf[0].wrapping_add(1);
153 |             buf[1] = buf[1].wrapping_add(r as u8);
154 |             buf[2] = buf[2].wrapping_add(P.equals(Q) as u8);
155 |         }
156 |         let end = core_cycles();
157 |         tt[i] = end.wrapping_sub(begin);
158 |     }
159 |     tt.sort();
160 |     ((tt[4] as f64) / 100.0, buf[0])
161 | }
162 | 
163 | fn bench_encode() -> (f64, u8) {
164 |     let z = core_cycles();
165 |     let mut P = Point::BASE * z;
166 |     let mut tt = [0; 10];
167 |     for i in 0..10 {
168 |         let begin = core_cycles();
169 |         for _ in 0..100 {
170 |             let x = P.encode()[0];
171 |             if x & 1 == 0 {
172 |                 P = -P;
173 |             }
174 |         }
175 |         let end = core_cycles();
176 |         tt[i] = end.wrapping_sub(begin);
177 |     }
178 |     tt.sort();
179 |     ((tt[4] as f64) / 100.0, P.encode()[0])
180 | }
181 | 
182 | fn bench_hash_to_curve() -> (f64, u8) {
183 |     let mut buf = [0u8; 32];
184 |     for i in 0..4 {
185 |         let z = core_cycles();
186 |         buf[(8 * i)..(8 * i + 8)].copy_from_slice(&z.to_le_bytes());
187 |     }
188 |     let mut tt = [0; 10];
189 |     for i in 0..10 {
190 |         let begin = core_cycles();
191 |         for _ in 0..100 {
192 |             let P = Point::hash_to_curve("", &buf);
193 |             buf[0] += P.isneutral() as u8;
194 |             buf[1] += 3;
195 |             buf[2] += 5;
196 |         }
197 |         let end = core_cycles();
198 |         tt[i] = end.wrapping_sub(begin);
199 |     }
200 |     tt.sort();
201 |     ((tt[4] as f64) / 100.0, buf[0])
202 | }
203 | 
204 | fn bench_split_mu() -> (f64, u8) {
205 |     let z = core_cycles();
206 |     let mut x = Scalar::from_u64(z);
207 |     x.set_xsquare(5);
208 |     let mut tt = [0; 10];
209 |     for i in 0..10 {
210 |         let begin = core_cycles();
211 |         for _ in 0..1000 {
212 |             let (k0, s0, k1, s1) = Point::split_mu(&x);
213 |             let mut buf = [0u8; 24];
214 |             buf[..16].copy_from_slice(&(k0 ^ k1).to_le_bytes());
215 |             buf[16..20].copy_from_slice(&s0.to_le_bytes());
216 |             buf[20..24].copy_from_slice(&s1.to_le_bytes());
217 |             x.set_decode_reduce(&buf);
218 |         }
219 |         let end = core_cycles();
220 |         tt[i] = end.wrapping_sub(begin);
221 |     }
222 |     tt.sort();
223 |     ((tt[4] as f64) / 1000.0, x.encode()[0])
224 | }
225 | 
226 | #[cfg(feature = "gls254bench")]
227 | fn bench_raw_ecdh_1dt_3() -> (f64, u8) {
228 |     let z = core_cycles();
229 |     let mut seed = [0u8; 32];
230 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
231 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
232 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
233 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
234 |     let mut sk = Scalar::decode_reduce(&seed).encode();
235 |     let mut pp: [u8; 64] = [
236 |         0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D,
237 |         0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63,
238 |         0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80,
239 |         0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02,
240 |         0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95,
241 |         0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E,
242 |         0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4,
243 |         0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D,
244 |     ];
245 |     pp = Point::for_benchmarks_only_1dt_3(&pp, &sk).unwrap();
246 |     let mut tt = [0; 100];
247 |     for i in 0..tt.len() {
248 |         let begin = core_cycles();
249 |         for _ in 0..100 {
250 |             sk[..].copy_from_slice(&pp[..32]);
251 |             sk[31] &= 0x1F;
252 |             pp = Point::for_benchmarks_only_1dt_3(&pp, &sk).unwrap();
253 |         }
254 |         let end = core_cycles();
255 |         tt[i] = end.wrapping_sub(begin);
256 |     }
257 |     tt.sort();
258 |     ((tt[tt.len() >> 1] as f64) / 100.0, pp[0])
259 | }
260 | 
261 | #[cfg(feature = "gls254bench")]
262 | fn bench_raw_ecdh_1dt_4() -> (f64, u8) {
263 |     let z = core_cycles();
264 |     let mut seed = [0u8; 32];
265 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
266 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
267 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
268 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
269 |     let mut sk = Scalar::decode_reduce(&seed).encode();
270 |     let mut pp: [u8; 64] = [
271 |         0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D,
272 |         0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63,
273 |         0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80,
274 |         0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02,
275 |         0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95,
276 |         0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E,
277 |         0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4,
278 |         0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D,
279 |     ];
280 |     pp = Point::for_benchmarks_only_1dt_4(&pp, &sk).unwrap();
281 |     let mut tt = [0; 100];
282 |     for i in 0..tt.len() {
283 |         let begin = core_cycles();
284 |         for _ in 0..100 {
285 |             sk[..].copy_from_slice(&pp[..32]);
286 |             sk[31] &= 0x1F;
287 |             pp = Point::for_benchmarks_only_1dt_4(&pp, &sk).unwrap();
288 |         }
289 |         let end = core_cycles();
290 |         tt[i] = end.wrapping_sub(begin);
291 |     }
292 |     tt.sort();
293 |     ((tt[tt.len() >> 1] as f64) / 100.0, pp[0])
294 | }
295 | 
296 | #[cfg(feature = "gls254bench")]
297 | fn bench_raw_ecdh_1dt_5() -> (f64, u8) {
298 |     let z = core_cycles();
299 |     let mut seed = [0u8; 32];
300 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
301 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
302 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
303 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
304 |     let mut sk = Scalar::decode_reduce(&seed).encode();
305 |     let mut pp: [u8; 64] = [
306 |         0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D,
307 |         0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63,
308 |         0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80,
309 |         0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02,
310 |         0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95,
311 |         0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E,
312 |         0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4,
313 |         0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D,
314 |     ];
315 |     pp = Point::for_benchmarks_only_1dt_5(&pp, &sk).unwrap();
316 |     let mut tt = [0; 100];
317 |     for i in 0..tt.len() {
318 |         let begin = core_cycles();
319 |         for _ in 0..100 {
320 |             sk[..].copy_from_slice(&pp[..32]);
321 |             sk[31] &= 0x1F;
322 |             pp = Point::for_benchmarks_only_1dt_5(&pp, &sk).unwrap();
323 |         }
324 |         let end = core_cycles();
325 |         tt[i] = end.wrapping_sub(begin);
326 |     }
327 |     tt.sort();
328 |     ((tt[tt.len() >> 1] as f64) / 100.0, pp[0])
329 | }
330 | 
331 | #[cfg(feature = "gls254bench")]
332 | fn bench_raw_ecdh_2dt_2() -> (f64, u8) {
333 |     let z = core_cycles();
334 |     let mut seed = [0u8; 32];
335 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
336 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
337 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
338 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
339 |     let mut sk = Scalar::decode_reduce(&seed).encode();
340 |     let mut pp: [u8; 64] = [
341 |         0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D,
342 |         0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63,
343 |         0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80,
344 |         0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02,
345 |         0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95,
346 |         0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E,
347 |         0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4,
348 |         0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D,
349 |     ];
350 |     pp = Point::for_benchmarks_only_2dt_2(&pp, &sk).unwrap();
351 |     let mut tt = [0; 100];
352 |     for i in 0..tt.len() {
353 |         let begin = core_cycles();
354 |         for _ in 0..100 {
355 |             sk[..].copy_from_slice(&pp[..32]);
356 |             sk[31] &= 0x1F;
357 |             pp = Point::for_benchmarks_only_2dt_2(&pp, &sk).unwrap();
358 |         }
359 |         let end = core_cycles();
360 |         tt[i] = end.wrapping_sub(begin);
361 |     }
362 |     tt.sort();
363 |     ((tt[tt.len() >> 1] as f64) / 100.0, pp[0])
364 | }
365 | 
366 | #[cfg(feature = "gls254bench")]
367 | fn bench_raw_ecdh_2dt_3() -> (f64, u8) {
368 |     let z = core_cycles();
369 |     let mut seed = [0u8; 32];
370 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
371 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
372 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
373 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
374 |     let mut sk = Scalar::decode_reduce(&seed).encode();
375 |     let mut pp: [u8; 64] = [
376 |         0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D,
377 |         0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63,
378 |         0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80,
379 |         0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02,
380 |         0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95,
381 |         0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E,
382 |         0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4,
383 |         0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D,
384 |     ];
385 |     pp = Point::for_benchmarks_only_2dt_3(&pp, &sk).unwrap();
386 |     let mut tt = [0; 100];
387 |     for i in 0..tt.len() {
388 |         let begin = core_cycles();
389 |         for _ in 0..100 {
390 |             sk[..].copy_from_slice(&pp[..32]);
391 |             sk[31] &= 0x1F;
392 |             pp = Point::for_benchmarks_only_2dt_3(&pp, &sk).unwrap();
393 |         }
394 |         let end = core_cycles();
395 |         tt[i] = end.wrapping_sub(begin);
396 |     }
397 |     tt.sort();
398 |     ((tt[tt.len() >> 1] as f64) / 100.0, pp[0])
399 | }
400 | 
401 | fn main() {
402 |     let mut bx = 0u8;
403 | 
404 |     let (v, x) = bench_mul();
405 |     bx ^= x;
406 |     println!("GLS254 point mul:              {:13.2}", v);
407 |     #[cfg(feature = "gls254bench")]
408 |     {
409 |         let (v, x) = bench_raw_ecdh_1dt_3();
410 |         bx ^= x;
411 |         println!("GLS254 raw_ECDH 1DT-3:         {:13.2}", v);
412 |         let (v, x) = bench_raw_ecdh_1dt_4();
413 |         bx ^= x;
414 |         println!("GLS254 raw_ECDH 1DT-4:         {:13.2}", v);
415 |         let (v, x) = bench_raw_ecdh_1dt_5();
416 |         bx ^= x;
417 |         println!("GLS254 raw_ECDH 1DT-5:         {:13.2}", v);
418 |         let (v, x) = bench_raw_ecdh_2dt_2();
419 |         bx ^= x;
420 |         println!("GLS254 raw_ECDH 2DT-2:         {:13.2}", v);
421 |         let (v, x) = bench_raw_ecdh_2dt_3();
422 |         bx ^= x;
423 |         println!("GLS254 raw_ECDH 2DT-3:         {:13.2}", v);
424 |     }
425 |     let (v, x) = bench_mulgen();
426 |     bx ^= x;
427 |     println!("GLS254 point mulgen:           {:13.2}", v);
428 |     let (v, x) = bench_skey_load();
429 |     bx ^= x;
430 |     println!("GLS254 skey_load:              {:13.2}", v);
431 |     let (v, x) = bench_skey_sign();
432 |     bx ^= x;
433 |     println!("GLS254 sign:                   {:13.2}", v);
434 |     let (v, x) = bench_pkey_verify();
435 |     bx ^= x;
436 |     println!("GLS254 verify:                 {:13.2}", v);
437 |     let (v, x) = bench_decode();
438 |     bx ^= x;
439 |     println!("GLS254 decode:                 {:13.2}", v);
440 |     let (v, x) = bench_encode();
441 |     bx ^= x;
442 |     println!("GLS254 encode:                 {:13.2}", v);
443 |     let (v, x) = bench_hash_to_curve();
444 |     bx ^= x;
445 |     println!("GLS254 hash-to-curve:          {:13.2}", v);
446 |     let (v, x) = bench_split_mu();
447 |     bx ^= x;
448 |     println!("GLS254 split_mu:               {:13.2}", v);
449 | 
450 |     println!("{}", bx);
451 | }
452 | 


--------------------------------------------------------------------------------
/benches/jq255e.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | #![cfg(feature = "jq255e")]
  3 | 
  4 | mod util;
  5 | use util::core_cycles;
  6 | 
  7 | use crrl::jq255e::{Point, Scalar, PrivateKey};
  8 | use sha2::{Sha256, Digest};
  9 | 
 10 | fn bench_mulgen() -> (f64, u8) {
 11 |     let z = core_cycles();
 12 |     let mut seed = [0u8; 32];
 13 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 14 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 15 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 16 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 17 |     let mut s = Scalar::decode_reduce(&seed);
 18 |     let mut tt = [0; 100];
 19 |     for i in 0..tt.len() {
 20 |         let begin = core_cycles();
 21 |         for _ in 0..100 {
 22 |             let P = Point::mulgen(&s);
 23 |             if P.isneutral() != 0 {
 24 |                 s += Scalar::ZERO;
 25 |             } else {
 26 |                 s += Scalar::ONE;
 27 |             }
 28 |         }
 29 |         let end = core_cycles();
 30 |         tt[i] = end.wrapping_sub(begin);
 31 |     }
 32 |     tt.sort();
 33 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 34 | }
 35 | 
 36 | fn bench_mul() -> (f64, u8) {
 37 |     let z = core_cycles();
 38 |     let mut seed = [0u8; 32];
 39 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 40 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 41 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 42 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 43 |     let mut s = Scalar::decode_reduce(&seed);
 44 |     let mut P = Point::mulgen(&s);
 45 |     let mut tt = [0; 100];
 46 |     for i in 0..tt.len() {
 47 |         let begin = core_cycles();
 48 |         for _ in 0..100 {
 49 |             P *= s;
 50 |             if P.isneutral() != 0 {
 51 |                 s += Scalar::ZERO;
 52 |             } else {
 53 |                 s += Scalar::ONE;
 54 |             }
 55 |         }
 56 |         let end = core_cycles();
 57 |         tt[i] = end.wrapping_sub(begin);
 58 |     }
 59 |     tt.sort();
 60 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 61 | }
 62 | 
 63 | fn bench_mul_add_mulgen() -> (f64, u8) {
 64 |     let z = core_cycles();
 65 |     let mut uu = [Scalar::ZERO; 128];
 66 |     let mut vv = [Scalar::ZERO; 128];
 67 |     let mut sh = Sha256::new();
 68 |     for i in 0..128 {
 69 |         sh.update(z.to_le_bytes());
 70 |         sh.update(((2 * i + 0) as u64).to_le_bytes());
 71 |         let b1 = sh.finalize_reset();
 72 |         sh.update(z.to_le_bytes());
 73 |         sh.update(((2 * i + 1) as u64).to_le_bytes());
 74 |         let b2 = sh.finalize_reset();
 75 |         uu[i] = Scalar::decode_reduce(&b1);
 76 |         vv[i] = Scalar::decode_reduce(&b2);
 77 |     }
 78 |     let mut tt = [0; 100];
 79 |     let mut P = Point::mulgen(&uu[127]);
 80 |     for i in 0..tt.len() {
 81 |         let begin = core_cycles();
 82 |         for j in 0..128 {
 83 |             let ku = (i + j) & 127;
 84 |             let kv = i.wrapping_sub(j) & 127;
 85 |             let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]);
 86 |             P += Q;
 87 |         }
 88 |         let end = core_cycles();
 89 |         tt[i] = end.wrapping_sub(begin);
 90 |     }
 91 |     tt.sort();
 92 |     ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0])
 93 | }
 94 | 
 95 | fn bench_skey_load() -> (f64, u8) {
 96 |     let z = core_cycles();
 97 |     let mut seed = [0u8; 32];
 98 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
 99 |     let mut tt = [0; 100];
100 |     for i in 0..tt.len() {
101 |         let begin = core_cycles();
102 |         for _ in 0..100 {
103 |             let skey = PrivateKey::decode(&seed).unwrap();
104 |             seed[..].copy_from_slice(&skey.public_key.encode());
105 |             seed[31] &= 0x1Fu8;
106 |         }
107 |         let end = core_cycles();
108 |         tt[i] = end.wrapping_sub(begin);
109 |     }
110 |     tt.sort();
111 |     ((tt[tt.len() >> 1] as f64) / 100.0, seed[0])
112 | }
113 | 
114 | fn bench_skey_sign() -> (f64, u8) {
115 |     let z = core_cycles();
116 |     let mut seed = [0u8; 32];
117 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
118 |     let mut sh = Sha256::new();
119 |     sh.update(&seed);
120 |     seed[..].copy_from_slice(&sh.finalize());
121 |     seed[31] &= 0x1Fu8;
122 |     let skey = PrivateKey::decode(&seed).unwrap();
123 |     let mut tt = [0; 100];
124 |     let mut msg = [0u8; 32];
125 |     for i in 0..tt.len() {
126 |         let begin = core_cycles();
127 |         for _ in 0..100 {
128 |             let sig = skey.sign("", &msg);
129 |             msg[..].copy_from_slice(&sig[0..32]);
130 |         }
131 |         let end = core_cycles();
132 |         tt[i] = end.wrapping_sub(begin);
133 |     }
134 |     tt.sort();
135 |     ((tt[tt.len() >> 1] as f64) / 100.0, msg[0])
136 | }
137 | 
138 | fn bench_pkey_verify() -> (f64, u8) {
139 |     let z = core_cycles();
140 |     let mut seed = [0u8; 32];
141 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
142 |     let mut sh = Sha256::new();
143 |     sh.update(&seed);
144 |     seed[..].copy_from_slice(&sh.finalize());
145 |     seed[31] &= 0x1Fu8;
146 |     let skey = PrivateKey::decode(&seed).unwrap();
147 |     let pkey = skey.public_key;
148 |     let mut sigs = [[0u8; 48]; 128];
149 |     for i in 0..128 {
150 |         let msg = [i as u8; 32];
151 |         let sig = skey.sign("", &msg);
152 |         sigs[i][..].copy_from_slice(&sig);
153 |     }
154 |     let mut tt = [0; 100];
155 |     let mut msg = [0u8; 32];
156 |     for i in 0..tt.len() {
157 |         let begin = core_cycles();
158 |         for j in 0..128 {
159 |             let ff = pkey.verify(&sigs[j], "", &msg);
160 |             sigs[j][40] ^= 1u8.wrapping_add(ff as u8);
161 |             msg[3] ^= 3u8.wrapping_sub(ff as u8);
162 |         }
163 |         let end = core_cycles();
164 |         tt[i] = end.wrapping_sub(begin);
165 |     }
166 |     tt.sort();
167 |     ((tt[tt.len() >> 1] as f64) / 128.0, msg[0])
168 | }
169 | 
170 | fn bench_decode() -> (f64, u8) {
171 |     let z = core_cycles();
172 |     let mut buf = [0u8; 32];
173 |     buf[ 0.. 8].copy_from_slice(&z.to_le_bytes());
174 |     buf[ 8..16].copy_from_slice(&z.to_le_bytes());
175 |     buf[16..24].copy_from_slice(&z.to_le_bytes());
176 |     buf[24..32].copy_from_slice(&z.to_le_bytes());
177 |     let mut tt = [0; 10];
178 |     let mut P = Point::NEUTRAL;
179 |     let Q = Point::BASE * z;
180 |     for i in 0..10 {
181 |         let begin = core_cycles();
182 |         for _ in 0..100 {
183 |             let r = P.set_decode(&buf);
184 |             buf[0] = buf[0].wrapping_add(1);
185 |             buf[1] = buf[1].wrapping_add(r as u8);
186 |             buf[2] = buf[2].wrapping_add(P.equals(Q) as u8);
187 |         }
188 |         let end = core_cycles();
189 |         tt[i] = end.wrapping_sub(begin);
190 |     }
191 |     tt.sort();
192 |     ((tt[4] as f64) / 100.0, buf[0])
193 | }
194 | 
195 | fn bench_encode() -> (f64, u8) {
196 |     let z = core_cycles();
197 |     let mut P = Point::BASE * z;
198 |     let mut tt = [0; 10];
199 |     for i in 0..10 {
200 |         let begin = core_cycles();
201 |         for _ in 0..100 {
202 |             let x = P.encode()[0];
203 |             if x & 1 == 0 {
204 |                 P = -P;
205 |             }
206 |         }
207 |         let end = core_cycles();
208 |         tt[i] = end.wrapping_sub(begin);
209 |     }
210 |     tt.sort();
211 |     ((tt[4] as f64) / 100.0, P.encode()[0])
212 | }
213 | 
214 | fn main() {
215 |     let mut bx = 0u8;
216 | 
217 |     let (v, x) = bench_mul();
218 |     bx ^= x;
219 |     println!("Jq255e point mul:              {:13.2}", v);
220 |     let (v, x) = bench_mulgen();
221 |     bx ^= x;
222 |     println!("Jq255e point mulgen:           {:13.2}", v);
223 |     let (v, x) = bench_mul_add_mulgen();
224 |     bx ^= x;
225 |     println!("Jq255e point mul_add_mulgen:   {:13.2}", v);
226 |     let (v, x) = bench_skey_load();
227 |     bx ^= x;
228 |     println!("Jq255e skey_load:              {:13.2}", v);
229 |     let (v, x) = bench_skey_sign();
230 |     bx ^= x;
231 |     println!("Jq255e sign:                   {:13.2}", v);
232 |     let (v, x) = bench_pkey_verify();
233 |     bx ^= x;
234 |     println!("Jq255e verify:                 {:13.2}", v);
235 |     let (v, x) = bench_decode();
236 |     bx ^= x;
237 |     println!("Jq255e decode:                 {:13.2}", v);
238 |     let (v, x) = bench_encode();
239 |     bx ^= x;
240 |     println!("Jq255e encode:                 {:13.2}", v);
241 | 
242 |     println!("{}", bx);
243 | }
244 | 


--------------------------------------------------------------------------------
/benches/jq255s.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | #![cfg(feature = "jq255s")]
  3 | 
  4 | mod util;
  5 | use util::core_cycles;
  6 | 
  7 | use crrl::jq255s::{Point, Scalar, PrivateKey};
  8 | use sha2::{Sha256, Digest};
  9 | 
 10 | fn bench_mulgen() -> (f64, u8) {
 11 |     let z = core_cycles();
 12 |     let mut seed = [0u8; 32];
 13 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 14 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 15 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 16 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 17 |     let mut s = Scalar::decode_reduce(&seed);
 18 |     let mut tt = [0; 100];
 19 |     for i in 0..tt.len() {
 20 |         let begin = core_cycles();
 21 |         for _ in 0..100 {
 22 |             let P = Point::mulgen(&s);
 23 |             if P.isneutral() != 0 {
 24 |                 s += Scalar::ZERO;
 25 |             } else {
 26 |                 s += Scalar::ONE;
 27 |             }
 28 |         }
 29 |         let end = core_cycles();
 30 |         tt[i] = end.wrapping_sub(begin);
 31 |     }
 32 |     tt.sort();
 33 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 34 | }
 35 | 
 36 | fn bench_mul() -> (f64, u8) {
 37 |     let z = core_cycles();
 38 |     let mut seed = [0u8; 32];
 39 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 40 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 41 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 42 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 43 |     let mut s = Scalar::decode_reduce(&seed);
 44 |     let mut P = Point::mulgen(&s);
 45 |     let mut tt = [0; 100];
 46 |     for i in 0..tt.len() {
 47 |         let begin = core_cycles();
 48 |         for _ in 0..100 {
 49 |             P *= s;
 50 |             if P.isneutral() != 0 {
 51 |                 s += Scalar::ZERO;
 52 |             } else {
 53 |                 s += Scalar::ONE;
 54 |             }
 55 |         }
 56 |         let end = core_cycles();
 57 |         tt[i] = end.wrapping_sub(begin);
 58 |     }
 59 |     tt.sort();
 60 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 61 | }
 62 | 
 63 | fn bench_mul_add_mulgen() -> (f64, u8) {
 64 |     let z = core_cycles();
 65 |     let mut uu = [Scalar::ZERO; 128];
 66 |     let mut vv = [Scalar::ZERO; 128];
 67 |     let mut sh = Sha256::new();
 68 |     for i in 0..128 {
 69 |         sh.update(z.to_le_bytes());
 70 |         sh.update(((2 * i + 0) as u64).to_le_bytes());
 71 |         let b1 = sh.finalize_reset();
 72 |         sh.update(z.to_le_bytes());
 73 |         sh.update(((2 * i + 1) as u64).to_le_bytes());
 74 |         let b2 = sh.finalize_reset();
 75 |         uu[i] = Scalar::decode_reduce(&b1);
 76 |         vv[i] = Scalar::decode_reduce(&b2);
 77 |     }
 78 |     let mut tt = [0; 100];
 79 |     let mut P = Point::mulgen(&uu[127]);
 80 |     for i in 0..tt.len() {
 81 |         let begin = core_cycles();
 82 |         for j in 0..128 {
 83 |             let ku = (i + j) & 127;
 84 |             let kv = i.wrapping_sub(j) & 127;
 85 |             let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]);
 86 |             P += Q;
 87 |         }
 88 |         let end = core_cycles();
 89 |         tt[i] = end.wrapping_sub(begin);
 90 |     }
 91 |     tt.sort();
 92 |     ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0])
 93 | }
 94 | 
 95 | fn bench_skey_load() -> (f64, u8) {
 96 |     let z = core_cycles();
 97 |     let mut seed = [0u8; 32];
 98 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
 99 |     let mut tt = [0; 100];
100 |     for i in 0..tt.len() {
101 |         let begin = core_cycles();
102 |         for _ in 0..100 {
103 |             let skey = PrivateKey::decode(&seed).unwrap();
104 |             seed[..].copy_from_slice(&skey.public_key.encode());
105 |             seed[31] &= 0x1Fu8;
106 |         }
107 |         let end = core_cycles();
108 |         tt[i] = end.wrapping_sub(begin);
109 |     }
110 |     tt.sort();
111 |     ((tt[tt.len() >> 1] as f64) / 100.0, seed[0])
112 | }
113 | 
114 | fn bench_skey_sign() -> (f64, u8) {
115 |     let z = core_cycles();
116 |     let mut seed = [0u8; 32];
117 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
118 |     let mut sh = Sha256::new();
119 |     sh.update(&seed);
120 |     seed[..].copy_from_slice(&sh.finalize());
121 |     seed[31] &= 0x1Fu8;
122 |     let skey = PrivateKey::decode(&seed).unwrap();
123 |     let mut tt = [0; 100];
124 |     let mut msg = [0u8; 32];
125 |     for i in 0..tt.len() {
126 |         let begin = core_cycles();
127 |         for _ in 0..100 {
128 |             let sig = skey.sign("", &msg);
129 |             msg[..].copy_from_slice(&sig[0..32]);
130 |         }
131 |         let end = core_cycles();
132 |         tt[i] = end.wrapping_sub(begin);
133 |     }
134 |     tt.sort();
135 |     ((tt[tt.len() >> 1] as f64) / 100.0, msg[0])
136 | }
137 | 
138 | fn bench_pkey_verify() -> (f64, u8) {
139 |     let z = core_cycles();
140 |     let mut seed = [0u8; 32];
141 |     seed[0..8].copy_from_slice(&z.to_le_bytes());
142 |     let mut sh = Sha256::new();
143 |     sh.update(&seed);
144 |     seed[..].copy_from_slice(&sh.finalize());
145 |     seed[31] &= 0x1Fu8;
146 |     let skey = PrivateKey::decode(&seed).unwrap();
147 |     let pkey = skey.public_key;
148 |     let mut sigs = [[0u8; 48]; 128];
149 |     for i in 0..128 {
150 |         let msg = [i as u8; 32];
151 |         let sig = skey.sign("", &msg);
152 |         sigs[i][..].copy_from_slice(&sig);
153 |     }
154 |     let mut tt = [0; 100];
155 |     let mut msg = [0u8; 32];
156 |     for i in 0..tt.len() {
157 |         let begin = core_cycles();
158 |         for j in 0..128 {
159 |             let ff = pkey.verify(&sigs[j], "", &msg);
160 |             sigs[j][40] ^= 1u8.wrapping_add(ff as u8);
161 |             msg[3] ^= 3u8.wrapping_sub(ff as u8);
162 |         }
163 |         let end = core_cycles();
164 |         tt[i] = end.wrapping_sub(begin);
165 |     }
166 |     tt.sort();
167 |     ((tt[tt.len() >> 1] as f64) / 128.0, msg[0])
168 | }
169 | 
170 | fn bench_decode() -> (f64, u8) {
171 |     let z = core_cycles();
172 |     let mut buf = [0u8; 32];
173 |     buf[ 0.. 8].copy_from_slice(&z.to_le_bytes());
174 |     buf[ 8..16].copy_from_slice(&z.to_le_bytes());
175 |     buf[16..24].copy_from_slice(&z.to_le_bytes());
176 |     buf[24..32].copy_from_slice(&z.to_le_bytes());
177 |     let mut tt = [0; 10];
178 |     let mut P = Point::NEUTRAL;
179 |     let Q = Point::BASE * z;
180 |     for i in 0..10 {
181 |         let begin = core_cycles();
182 |         for _ in 0..100 {
183 |             let r = P.set_decode(&buf);
184 |             buf[0] = buf[0].wrapping_add(1);
185 |             buf[1] = buf[1].wrapping_add(r as u8);
186 |             buf[2] = buf[2].wrapping_add(P.equals(Q) as u8);
187 |         }
188 |         let end = core_cycles();
189 |         tt[i] = end.wrapping_sub(begin);
190 |     }
191 |     tt.sort();
192 |     ((tt[4] as f64) / 100.0, buf[0])
193 | }
194 | 
195 | fn bench_encode() -> (f64, u8) {
196 |     let z = core_cycles();
197 |     let mut P = Point::BASE * z;
198 |     let mut tt = [0; 10];
199 |     for i in 0..10 {
200 |         let begin = core_cycles();
201 |         for _ in 0..100 {
202 |             let x = P.encode()[0];
203 |             if x & 1 == 0 {
204 |                 P = -P;
205 |             }
206 |         }
207 |         let end = core_cycles();
208 |         tt[i] = end.wrapping_sub(begin);
209 |     }
210 |     tt.sort();
211 |     ((tt[4] as f64) / 100.0, P.encode()[0])
212 | }
213 | 
214 | fn main() {
215 |     let mut bx = 0u8;
216 | 
217 |     let (v, x) = bench_mul();
218 |     bx ^= x;
219 |     println!("Jq255s point mul:              {:13.2}", v);
220 |     let (v, x) = bench_mulgen();
221 |     bx ^= x;
222 |     println!("Jq255s point mulgen:           {:13.2}", v);
223 |     let (v, x) = bench_mul_add_mulgen();
224 |     bx ^= x;
225 |     println!("Jq255s point mul_add_mulgen:   {:13.2}", v);
226 |     let (v, x) = bench_skey_load();
227 |     bx ^= x;
228 |     println!("Jq255s skey_load:              {:13.2}", v);
229 |     let (v, x) = bench_skey_sign();
230 |     bx ^= x;
231 |     println!("Jq255s sign:                   {:13.2}", v);
232 |     let (v, x) = bench_pkey_verify();
233 |     bx ^= x;
234 |     println!("Jq255s verify:                 {:13.2}", v);
235 |     let (v, x) = bench_decode();
236 |     bx ^= x;
237 |     println!("Jq255s decode:                 {:13.2}", v);
238 |     let (v, x) = bench_encode();
239 |     bx ^= x;
240 |     println!("Jq255s encode:                 {:13.2}", v);
241 | 
242 |     println!("{}", bx);
243 | }
244 | 


--------------------------------------------------------------------------------
/benches/modint.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(feature = "modint256")]
  2 | 
  3 | mod util;
  4 | use util::core_cycles;
  5 | 
  6 | use crrl::field::ModInt256;
  7 | use sha2::{Sha256, Digest};
  8 | 
  9 | fn bench_modint256_add<const M0: u64, const M1: u64,
 10 |                        const M2: u64, const M3: u64>() -> (f64, u8)
 11 | {
 12 |     let z = core_cycles();
 13 |     let mut x = ModInt256::<M0, M1, M2, M3>::w64le(z, z.wrapping_mul(3),
 14 |         z.wrapping_mul(5), z.wrapping_mul(7));
 15 |     let mut y = x + ModInt256::<M0, M1, M2, M3>::ONE;
 16 |     let mut tt = [0; 10];
 17 |     for i in 0..10 {
 18 |         let begin = core_cycles();
 19 |         for _ in 0..10000 {
 20 |             x += y;
 21 |             y += x;
 22 |             x += y;
 23 |             y += x;
 24 |             x += y;
 25 |             y += x;
 26 |         }
 27 |         let end = core_cycles();
 28 |         tt[i] = end.wrapping_sub(begin);
 29 |     }
 30 |     tt.sort();
 31 |     ((tt[4] as f64) / 60000.0, x.encode32()[0])
 32 | }
 33 | 
 34 | fn bench_modint256_sub<const M0: u64, const M1: u64,
 35 |                        const M2: u64, const M3: u64>() -> (f64, u8)
 36 | {
 37 |     let z = core_cycles();
 38 |     let mut x = ModInt256::<M0, M1, M2, M3>::w64le(z, z.wrapping_mul(3),
 39 |         z.wrapping_mul(5), z.wrapping_mul(7));
 40 |     let mut y = x + ModInt256::<M0, M1, M2, M3>::ONE;
 41 |     let mut tt = [0; 10];
 42 |     for i in 0..10 {
 43 |         let begin = core_cycles();
 44 |         for _ in 0..10000 {
 45 |             x -= y;
 46 |             y -= x;
 47 |             x -= y;
 48 |             y -= x;
 49 |             x -= y;
 50 |             y -= x;
 51 |         }
 52 |         let end = core_cycles();
 53 |         tt[i] = end.wrapping_sub(begin);
 54 |     }
 55 |     tt.sort();
 56 |     ((tt[4] as f64) / 60000.0, x.encode32()[0])
 57 | }
 58 | 
 59 | fn bench_modint256_mul<const M0: u64, const M1: u64,
 60 |                        const M2: u64, const M3: u64>() -> (f64, u8)
 61 | {
 62 |     let z = core_cycles();
 63 |     let mut x = ModInt256::<M0, M1, M2, M3>::w64le(z, z.wrapping_mul(3),
 64 |         z.wrapping_mul(5), z.wrapping_mul(7));
 65 |     let mut y = x + ModInt256::<M0, M1, M2, M3>::ONE;
 66 |     let mut tt = [0; 10];
 67 |     for i in 0..10 {
 68 |         let begin = core_cycles();
 69 |         for _ in 0..10000 {
 70 |             x *= y;
 71 |             y *= x;
 72 |             x *= y;
 73 |             y *= x;
 74 |             x *= y;
 75 |             y *= x;
 76 |         }
 77 |         let end = core_cycles();
 78 |         tt[i] = end.wrapping_sub(begin);
 79 |     }
 80 |     tt.sort();
 81 |     ((tt[4] as f64) / 60000.0, x.encode32()[0])
 82 | }
 83 | 
 84 | fn bench_modint256_square<const M0: u64, const M1: u64,
 85 |                           const M2: u64, const M3: u64>() -> (f64, u8)
 86 | {
 87 |     let z = core_cycles();
 88 |     let mut x = ModInt256::<M0, M1, M2, M3>::w64le(z, z.wrapping_mul(3),
 89 |         z.wrapping_mul(5), z.wrapping_mul(7));
 90 |     let mut tt = [0; 10];
 91 |     for i in 0..10 {
 92 |         let begin = core_cycles();
 93 |         x = x.xsquare(60000);
 94 |         let end = core_cycles();
 95 |         tt[i] = end.wrapping_sub(begin);
 96 |     }
 97 |     tt.sort();
 98 |     ((tt[4] as f64) / 60000.0, x.encode32()[0])
 99 | }
100 | 
101 | fn bench_modint256_div<const M0: u64, const M1: u64,
102 |                        const M2: u64, const M3: u64>() -> (f64, u8)
103 | {
104 |     let z = core_cycles();
105 |     let mut x = ModInt256::<M0, M1, M2, M3>::w64le(z, z.wrapping_mul(3),
106 |         z.wrapping_mul(5), z.wrapping_mul(7));
107 |     let mut y = x + ModInt256::<M0, M1, M2, M3>::ONE;
108 |     let mut tt = [0; 10];
109 |     for i in 0..10 {
110 |         let begin = core_cycles();
111 |         for _ in 0..1000 {
112 |             x /= y;
113 |             y /= x;
114 |             x /= y;
115 |             y /= x;
116 |             x /= y;
117 |             y /= x;
118 |         }
119 |         let end = core_cycles();
120 |         tt[i] = end.wrapping_sub(begin);
121 |     }
122 |     tt.sort();
123 |     ((tt[4] as f64) / 6000.0, x.encode32()[0])
124 | }
125 | 
126 | fn bench_modint256_sqrt<const M0: u64, const M1: u64,
127 |                         const M2: u64, const M3: u64>() -> (f64, u8)
128 | {
129 |     let z = core_cycles();
130 |     let mut x = ModInt256::<M0, M1, M2, M3>::w64le(z, z.wrapping_mul(3),
131 |         z.wrapping_mul(5), z.wrapping_mul(7));
132 |     let mut tt = [0; 10];
133 |     for i in 0..10 {
134 |         let begin = core_cycles();
135 |         for _ in 0..1000 {
136 |             let (x2, _) = x.sqrt();
137 |             x = x2 + ModInt256::<M0, M1, M2, M3>::ONE;
138 |         }
139 |         let end = core_cycles();
140 |         tt[i] = end.wrapping_sub(begin);
141 |     }
142 |     tt.sort();
143 |     ((tt[4] as f64) / 1000.0, x.encode32()[0])
144 | }
145 | 
146 | fn bench_modint256_legendre<const M0: u64, const M1: u64,
147 |                             const M2: u64, const M3: u64>() -> (f64, u8)
148 | {
149 |     let z = core_cycles();
150 |     let mut x = ModInt256::<M0, M1, M2, M3>::w64le(z, z.wrapping_mul(3),
151 |         z.wrapping_mul(5), z.wrapping_mul(7));
152 |     let mut tt = [0; 10];
153 |     for i in 0..10 {
154 |         let begin = core_cycles();
155 |         for _ in 0..1000 {
156 |             let ls = x.legendre();
157 |             x += ModInt256::<M0, M1, M2, M3>::w64le(ls as u64, ls as u64, ls as u64, ls as u64);
158 |         }
159 |         let end = core_cycles();
160 |         tt[i] = end.wrapping_sub(begin);
161 |     }
162 |     tt.sort();
163 |     ((tt[4] as f64) / 1000.0, x.encode32()[0])
164 | }
165 | 
166 | fn bench_modint256_split<const M0: u64, const M1: u64,
167 |                             const M2: u64, const M3: u64>() -> (f64, u8)
168 | {
169 |     let z = core_cycles();
170 | 
171 |     // Generate 512 pseudorandom elements. Number 512 was chosen so that
172 |     // the total in-RAM size is 16 kB, which should fit in L1 cache with
173 |     // enough room.
174 |     let mut vv = [ModInt256::<M0, M1, M2, M3>::ZERO; 512];
175 |     let mut sh = Sha256::new();
176 |     for i in 0..512 {
177 |         sh.update(z.to_le_bytes());
178 |         sh.update((i as u64).to_le_bytes());
179 |         let bb = sh.finalize_reset();
180 |         vv[i] = ModInt256::<M0, M1, M2, M3>::decode_reduce(&bb);
181 |     }
182 | 
183 |     let mut tt = [0; 10];
184 |     for i in 0..10 {
185 |         let begin = core_cycles();
186 |         for j in 0..512 {
187 |             let (c0, c1) = vv[j].split_vartime();
188 |             let x = c0.wrapping_add(c1);
189 |             vv[(j + 1) & 511] += ModInt256::<M0, M1, M2, M3>::from_i128(x);
190 |         }
191 |         let end = core_cycles();
192 |         tt[i] = end.wrapping_sub(begin);
193 |     }
194 |     tt.sort();
195 |     ((tt[4] as f64) / 512.0, vv[0].encode32()[0])
196 | }
197 | 
198 | fn bench_modint256_reduce<const M0: u64, const M1: u64,
199 |                           const M2: u64, const M3: u64>() -> (f64, u8)
200 | {
201 |     let mut x = ModInt256::<M0, M1, M2, M3>::ZERO;
202 |     let mut buf = [0u8; 48];
203 |     for i in 0..12 {
204 |         buf[(4 * i)..(4 * i + 4)].copy_from_slice(
205 |             &(core_cycles() as u32).to_le_bytes());
206 |     }
207 |     let mut tt = [0; 10];
208 |     for i in 0..10 {
209 |         let begin = core_cycles();
210 |         for _ in 0..10000 {
211 |             x.set_decode_reduce(&buf);
212 |             let xe = x.encode32();
213 |             buf[16..].copy_from_slice(&xe);
214 |             buf[..16].copy_from_slice(&xe[8..24]);
215 |         }
216 |         let end = core_cycles();
217 |         tt[i] = end.wrapping_sub(begin);
218 |     }
219 |     tt.sort();
220 |     ((tt[4] as f64) / 10000.0, buf[0])
221 | }
222 | 
223 | fn main() {
224 |     let mut bx = 0u8;
225 | 
226 |     let (f1, b1) = bench_modint256_add::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
227 |     let (f2, b2) = bench_modint256_add::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
228 |     let (f3, b3) = bench_modint256_add::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
229 |     bx ^= b1 ^ b2 ^ b3;
230 |     println!("ModInt256 add:         {:11.2} {:11.2} {:11.2}", f1, f2, f3);
231 | 
232 |     let (f1, b1) = bench_modint256_sub::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
233 |     let (f2, b2) = bench_modint256_sub::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
234 |     let (f3, b3) = bench_modint256_sub::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
235 |     bx ^= b1 ^ b2 ^ b3;
236 |     println!("ModInt256 sub:         {:11.2} {:11.2} {:11.2}", f1, f2, f3);
237 | 
238 |     let (f1, b1) = bench_modint256_mul::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
239 |     let (f2, b2) = bench_modint256_mul::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
240 |     let (f3, b3) = bench_modint256_mul::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
241 |     bx ^= b1 ^ b2 ^ b3;
242 |     println!("ModInt256 mul:         {:11.2} {:11.2} {:11.2}", f1, f2, f3);
243 | 
244 |     let (f1, b1) = bench_modint256_square::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
245 |     let (f2, b2) = bench_modint256_square::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
246 |     let (f3, b3) = bench_modint256_square::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
247 |     bx ^= b1 ^ b2 ^ b3;
248 |     println!("ModInt256 square:      {:11.2} {:11.2} {:11.2}", f1, f2, f3);
249 | 
250 |     let (f1, b1) = bench_modint256_div::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
251 |     let (f2, b2) = bench_modint256_div::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
252 |     let (f3, b3) = bench_modint256_div::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
253 |     bx ^= b1 ^ b2 ^ b3;
254 |     println!("ModInt256 div:         {:11.2} {:11.2} {:11.2}", f1, f2, f3);
255 | 
256 |     let (f1, b1) = bench_modint256_sqrt::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
257 |     let (f2, b2) = bench_modint256_sqrt::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
258 |     let (f3, b3) = bench_modint256_sqrt::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
259 |     bx ^= b1 ^ b2 ^ b3;
260 |     println!("ModInt256 sqrt:        {:11.2} {:11.2} {:11.2}", f1, f2, f3);
261 | 
262 |     let (f1, b1) = bench_modint256_legendre::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
263 |     let (f2, b2) = bench_modint256_legendre::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
264 |     let (f3, b3) = bench_modint256_legendre::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
265 |     bx ^= b1 ^ b2 ^ b3;
266 |     println!("ModInt256 legendre:    {:11.2} {:11.2} {:11.2}", f1, f2, f3);
267 | 
268 |     let (f1, b1) = bench_modint256_split::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>();
269 |     let (f2, b2) = bench_modint256_split::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>();
270 |     let (f3, b3) = bench_modint256_split::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>();
271 |     bx ^= b1 ^ b2 ^ b3;
272 |     println!("ModInt256 split (var)  {:11.2} {:11.2} {:11.2}", f1, f2, f3);
273 | 
274 |     let (f1, b1) = bench_modint256_reduce::<0x43E1F593F0000001, 0x2833E84879B97091, 0xB85045B68181585D, 0x30644E72E131A029>();
275 |     println!("ModInt256 reduce (gen) {:11.2}", f1);
276 |     bx ^= b1;
277 | 
278 |     println!("{}", bx);
279 | }
280 | 


--------------------------------------------------------------------------------
/benches/p256.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | #![cfg(feature = "p256")]
  3 | 
  4 | mod util;
  5 | use util::core_cycles;
  6 | 
  7 | use crrl::p256::{Point, Scalar, PrivateKey};
  8 | use sha2::{Sha256, Digest};
  9 | 
 10 | fn bench_mulgen() -> (f64, u8) {
 11 |     let z = core_cycles();
 12 |     let mut seed = [0u8; 32];
 13 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 14 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 15 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 16 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 17 |     let mut s = Scalar::decode_reduce(&seed);
 18 |     let mut tt = [0; 100];
 19 |     for i in 0..tt.len() {
 20 |         let begin = core_cycles();
 21 |         for _ in 0..100 {
 22 |             let P = Point::mulgen(&s);
 23 |             if P.isneutral() != 0 {
 24 |                 s += Scalar::ZERO;
 25 |             } else {
 26 |                 s += Scalar::ONE;
 27 |             }
 28 |         }
 29 |         let end = core_cycles();
 30 |         tt[i] = end.wrapping_sub(begin);
 31 |     }
 32 |     tt.sort();
 33 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 34 | }
 35 | 
 36 | fn bench_mul() -> (f64, u8) {
 37 |     let z = core_cycles();
 38 |     let mut seed = [0u8; 32];
 39 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 40 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 41 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 42 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 43 |     let mut s = Scalar::decode_reduce(&seed);
 44 |     let mut P = Point::mulgen(&s);
 45 |     let mut tt = [0; 100];
 46 |     for i in 0..tt.len() {
 47 |         let begin = core_cycles();
 48 |         for _ in 0..100 {
 49 |             P *= s;
 50 |             if P.isneutral() != 0 {
 51 |                 s += Scalar::ZERO;
 52 |             } else {
 53 |                 s += Scalar::ONE;
 54 |             }
 55 |         }
 56 |         let end = core_cycles();
 57 |         tt[i] = end.wrapping_sub(begin);
 58 |     }
 59 |     tt.sort();
 60 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 61 | }
 62 | 
 63 | fn bench_mul_add_mulgen() -> (f64, u8) {
 64 |     let z = core_cycles();
 65 |     let mut uu = [Scalar::ZERO; 128];
 66 |     let mut vv = [Scalar::ZERO; 128];
 67 |     let mut sh = Sha256::new();
 68 |     for i in 0..128 {
 69 |         sh.update(z.to_le_bytes());
 70 |         sh.update(((2 * i + 0) as u64).to_le_bytes());
 71 |         let b1 = sh.finalize_reset();
 72 |         sh.update(z.to_le_bytes());
 73 |         sh.update(((2 * i + 1) as u64).to_le_bytes());
 74 |         let b2 = sh.finalize_reset();
 75 |         uu[i] = Scalar::decode_reduce(&b1);
 76 |         vv[i] = Scalar::decode_reduce(&b2);
 77 |     }
 78 |     let mut tt = [0; 100];
 79 |     let mut P = Point::mulgen(&uu[127]);
 80 |     for i in 0..tt.len() {
 81 |         let begin = core_cycles();
 82 |         for j in 0..128 {
 83 |             let ku = (i + j) & 127;
 84 |             let kv = i.wrapping_sub(j) & 127;
 85 |             let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]);
 86 |             P += Q;
 87 |         }
 88 |         let end = core_cycles();
 89 |         tt[i] = end.wrapping_sub(begin);
 90 |     }
 91 |     tt.sort();
 92 |     ((tt[tt.len() >> 1] as f64) / 128.0, P.encode_compressed()[0])
 93 | }
 94 | 
 95 | fn bench_skey_sign() -> (f64, u8) {
 96 |     let z = core_cycles();
 97 |     let mut sh = Sha256::new();
 98 |     sh.update(&z.to_le_bytes());
 99 |     sh.update(&[0x00u8]);
100 |     let s1 = sh.finalize_reset();
101 |     sh.update(&z.to_le_bytes());
102 |     sh.update(&[0x01u8]);
103 |     let s2 = sh.finalize_reset();
104 |     let mut seed = [0u8; 48];
105 |     seed[..32].copy_from_slice(&s1);
106 |     seed[32..].copy_from_slice(&s2[..16]);
107 |     let skey = PrivateKey::from_seed(&seed);
108 |     let mut tt = [0; 100];
109 |     let mut msg = [0u8; 32];
110 |     for i in 0..tt.len() {
111 |         let begin = core_cycles();
112 |         for _ in 0..100 {
113 |             let sig = skey.sign_hash(&msg, &[]);
114 |             msg[..].copy_from_slice(&sig[0..32]);
115 |         }
116 |         let end = core_cycles();
117 |         tt[i] = end.wrapping_sub(begin);
118 |     }
119 |     tt.sort();
120 |     ((tt[tt.len() >> 1] as f64) / 100.0, msg[0])
121 | }
122 | 
123 | fn bench_pkey_verify() -> (f64, u8) {
124 |     let z = core_cycles();
125 |     let mut sh = Sha256::new();
126 |     sh.update(&z.to_le_bytes());
127 |     sh.update(&[0x00u8]);
128 |     let s1 = sh.finalize_reset();
129 |     sh.update(&z.to_le_bytes());
130 |     sh.update(&[0x01u8]);
131 |     let s2 = sh.finalize_reset();
132 |     let mut seed = [0u8; 48];
133 |     seed[..32].copy_from_slice(&s1);
134 |     seed[32..].copy_from_slice(&s2[..16]);
135 |     let skey = PrivateKey::from_seed(&seed);
136 |     let pkey = skey.to_public_key();
137 |     let mut sigs = [[0u8; 64]; 128];
138 |     for i in 0..128 {
139 |         let msg = [i as u8; 32];
140 |         let sig = skey.sign_hash(&msg, &[]);
141 |         sigs[i][..].copy_from_slice(&sig);
142 |     }
143 |     let mut tt = [0; 100];
144 |     let mut msg = [0u8; 32];
145 |     for i in 0..tt.len() {
146 |         let begin = core_cycles();
147 |         for j in 0..128 {
148 |             let ff = pkey.verify_hash(&sigs[j], &msg);
149 |             sigs[j][40] ^= 1u8.wrapping_add(ff as u8);
150 |             msg[3] ^= 3u8.wrapping_sub(ff as u8);
151 |         }
152 |         let end = core_cycles();
153 |         tt[i] = end.wrapping_sub(begin);
154 |     }
155 |     tt.sort();
156 |     ((tt[tt.len() >> 1] as f64) / 128.0, msg[0])
157 | }
158 | 
159 | #[cfg(feature = "alloc")]
160 | fn bench_pkey_verify_trunc(rm: usize) -> (f64, f64, u8) {
161 |     let z = core_cycles();
162 |     let mut sh = Sha256::new();
163 |     sh.update(&z.to_le_bytes());
164 |     sh.update(&[0x00u8]);
165 |     let s1 = sh.finalize_reset();
166 |     sh.update(&z.to_le_bytes());
167 |     sh.update(&[0x01u8]);
168 |     let s2 = sh.finalize_reset();
169 |     let mut seed = [0u8; 48];
170 |     seed[..32].copy_from_slice(&s1);
171 |     seed[32..].copy_from_slice(&s2[..16]);
172 |     let skey = PrivateKey::from_seed(&seed);
173 |     let pkey = skey.to_public_key();
174 |     let mut sigs = [[0u8; 64]; 256];
175 |     for i in 0..256 {
176 |         let hv = [i as u8; 32];
177 |         let sig = skey.sign_hash(&hv, &[]);
178 |         let sig = PrivateKey::prepare_truncate(&sig).unwrap();
179 |         sigs[i][..].copy_from_slice(&sig);
180 |     }
181 |     let mut x = 0;
182 | 
183 |     // Phase 1: all signatures are correct.
184 |     let mut tt = [0; 2048];
185 |     for i in 0..tt.len() {
186 |         let hv = [i as u8; 32];
187 |         let begin = core_cycles();
188 |         x ^= (pkey.verify_trunc_hash(&sigs[i % 256], rm, &hv).is_some()) as u8;
189 |         let end = core_cycles();
190 |         tt[i] = end.wrapping_sub(begin);
191 |     }
192 |     tt.sort();
193 |     // Remove 10% slowest and 10% fastest, make an average of the rest.
194 |     let n10 = tt.len() / 10;
195 |     let n80 = tt.len() - 2 * n10;
196 |     let mut s = 0u64;
197 |     for i in n10..(tt.len() - n10) {
198 |         s += tt[i];
199 |     }
200 |     let res1 = (s as f64) / (n80 as f64);
201 | 
202 |     // Phase 2: all signatures are invalid.
203 |     // Much less variance is expected in that case; we can use a lower
204 |     // number of samples.
205 |     let mut tt = [0; 128];
206 |     for i in 0..tt.len() {
207 |         let hv = [(i + 1) as u8; 32];
208 |         let begin = core_cycles();
209 |         x ^= (pkey.verify_trunc_hash(&sigs[i % 256], rm, &hv).is_some()) as u8;
210 |         let end = core_cycles();
211 |         tt[i] = end.wrapping_sub(begin);
212 |     }
213 |     tt.sort();
214 |     // Remove 10% slowest and 10% fastest, make an average of the rest.
215 |     let n10 = tt.len() / 10;
216 |     let n80 = tt.len() - 2 * n10;
217 |     let mut s = 0u64;
218 |     for i in n10..(tt.len() - n10) {
219 |         s += tt[i];
220 |     }
221 |     let res2 = (s as f64) / (n80 as f64);
222 | 
223 |     (res1, res2, x)
224 | }
225 | 
226 | fn main() {
227 |     let mut bx = 0u8;
228 | 
229 |     let (v, x) = bench_mul();
230 |     bx ^= x;
231 |     println!("P-256 point mul:               {:13.2}", v);
232 |     let (v, x) = bench_mulgen();
233 |     bx ^= x;
234 |     println!("P-256 point mulgen:            {:13.2}", v);
235 |     let (v, x) = bench_mul_add_mulgen();
236 |     bx ^= x;
237 |     println!("P-256 point mul_add_mulgen:    {:13.2}", v);
238 |     let (v, x) = bench_skey_sign();
239 |     bx ^= x;
240 |     println!("P-256 sign:                    {:13.2}", v);
241 |     let (v, x) = bench_pkey_verify();
242 |     bx ^= x;
243 |     println!("P-256 verify:                  {:13.2}", v);
244 | 
245 |     #[cfg(feature = "alloc")]
246 |     {
247 |         let (v1, v2, x) = bench_pkey_verify_trunc(8);
248 |         bx ^= x;
249 |         println!("P-256 verify_trunc8:           {:13.2}  {:13.2}", v1, v2);
250 |         let (v1, v2, x) = bench_pkey_verify_trunc(16);
251 |         bx ^= x;
252 |         println!("P-256 verify_trunc16:          {:13.2}  {:13.2}", v1, v2);
253 |         /*
254 |         let (v1, v2, x) = bench_pkey_verify_trunc(24);
255 |         bx ^= x;
256 |         println!("P-256 verify_trunc24:          {:13.2}  {:13.2}", v1, v2);
257 |         let (v1, v2, x) = bench_pkey_verify_trunc(28);
258 |         bx ^= x;
259 |         println!("P-256 verify_trunc28:          {:13.2}  {:13.2}", v1, v2);
260 |         let (v1, v2, x) = bench_pkey_verify_trunc(32);
261 |         bx ^= x;
262 |         println!("P-256 verify_trunc32:          {:13.2}  {:13.2}", v1, v2);
263 |         */
264 |     }
265 | 
266 |     println!("{}", bx);
267 | }
268 | 


--------------------------------------------------------------------------------
/benches/ristretto255.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | #![cfg(feature = "ristretto255")]
 3 | 
 4 | mod util;
 5 | use util::core_cycles;
 6 | 
 7 | use crrl::ristretto255::Point;
 8 | 
 9 | fn bench_decode() -> (f64, u8) {
10 |     let z = core_cycles();
11 |     let mut buf = [0u8; 32];
12 |     buf[ 0.. 8].copy_from_slice(&z.to_le_bytes());
13 |     buf[ 8..16].copy_from_slice(&z.to_le_bytes());
14 |     buf[16..24].copy_from_slice(&z.to_le_bytes());
15 |     buf[24..32].copy_from_slice(&z.to_le_bytes());
16 |     let mut tt = [0; 10];
17 |     let mut P = Point::NEUTRAL;
18 |     let Q = Point::BASE * z;
19 |     for i in 0..10 {
20 |         let begin = core_cycles();
21 |         for _ in 0..100 {
22 |             let r = P.set_decode(&buf);
23 |             buf[0] = buf[0].wrapping_add(1);
24 |             buf[1] = buf[1].wrapping_add(r as u8);
25 |             buf[2] = buf[2].wrapping_add(P.equals(Q) as u8);
26 |         }
27 |         let end = core_cycles();
28 |         tt[i] = end.wrapping_sub(begin);
29 |     }
30 |     tt.sort();
31 |     ((tt[4] as f64) / 100.0, buf[0])
32 | }
33 | 
34 | fn bench_encode() -> (f64, u8) {
35 |     let z = core_cycles();
36 |     let mut P = Point::BASE * z;
37 |     let mut tt = [0; 10];
38 |     for i in 0..10 {
39 |         let begin = core_cycles();
40 |         for _ in 0..100 {
41 |             let x = P.encode()[0];
42 |             if x & 1 == 0 {
43 |                 P = -P;
44 |             }
45 |         }
46 |         let end = core_cycles();
47 |         tt[i] = end.wrapping_sub(begin);
48 |     }
49 |     tt.sort();
50 |     ((tt[4] as f64) / 100.0, P.encode()[0])
51 | }
52 | 
53 | fn main() {
54 |     let mut bx = 0u8;
55 | 
56 |     let (v, x) = bench_decode();
57 |     bx ^= x;
58 |     println!("Ristretto255 decode:           {:13.2}", v);
59 |     let (v, x) = bench_encode();
60 |     bx ^= x;
61 |     println!("Ristretto255 encode:           {:13.2}", v);
62 | 
63 |     println!("{}", bx);
64 | }
65 | 


--------------------------------------------------------------------------------
/benches/sc448.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(feature = "ed448")]
  2 | 
  3 | mod util;
  4 | use util::core_cycles;
  5 | 
  6 | use crrl::ed448::Scalar;
  7 | 
  8 | fn bench_sc448_add() {
  9 |     let z = core_cycles();
 10 |     let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 11 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 12 |         z.wrapping_mul(13) ]);
 13 |     let mut y = x + Scalar::ONE;
 14 |     let mut tt = [0; 10];
 15 |     for i in 0..10 {
 16 |         let begin = core_cycles();
 17 |         for _ in 0..1000 {
 18 |             x += y;
 19 |             y += x;
 20 |             x += y;
 21 |             y += x;
 22 |             x += y;
 23 |             y += x;
 24 |         }
 25 |         let end = core_cycles();
 26 |         tt[i] = end.wrapping_sub(begin);
 27 |     }
 28 |     tt.sort();
 29 |     println!("sc448 add:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
 30 | }
 31 | 
 32 | fn bench_sc448_sub() {
 33 |     let z = core_cycles();
 34 |     let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 35 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 36 |         z.wrapping_mul(13) ]);
 37 |     let mut y = x + Scalar::ONE;
 38 |     let mut tt = [0; 10];
 39 |     for i in 0..10 {
 40 |         let begin = core_cycles();
 41 |         for _ in 0..1000 {
 42 |             x -= y;
 43 |             y -= x;
 44 |             x -= y;
 45 |             y -= x;
 46 |             x -= y;
 47 |             y -= x;
 48 |         }
 49 |         let end = core_cycles();
 50 |         tt[i] = end.wrapping_sub(begin);
 51 |     }
 52 |     tt.sort();
 53 |     println!("sc448 sub:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
 54 | }
 55 | 
 56 | fn bench_sc448_mul() {
 57 |     let z = core_cycles();
 58 |     let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 59 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 60 |         z.wrapping_mul(13) ]);
 61 |     let mut y = x + Scalar::ONE;
 62 |     let mut tt = [0; 10];
 63 |     for i in 0..10 {
 64 |         let begin = core_cycles();
 65 |         for _ in 0..1000 {
 66 |             x *= y;
 67 |             y *= x;
 68 |             x *= y;
 69 |             y *= x;
 70 |             x *= y;
 71 |             y *= x;
 72 |         }
 73 |         let end = core_cycles();
 74 |         tt[i] = end.wrapping_sub(begin);
 75 |     }
 76 |     tt.sort();
 77 |     println!("sc448 mul:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
 78 | }
 79 | 
 80 | fn bench_sc448_square() {
 81 |     let z = core_cycles();
 82 |     let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 83 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
 84 |         z.wrapping_mul(13) ]);
 85 |     let mut tt = [0; 10];
 86 |     for i in 0..10 {
 87 |         let begin = core_cycles();
 88 |         x = x.xsquare(6000);
 89 |         let end = core_cycles();
 90 |         tt[i] = end.wrapping_sub(begin);
 91 |     }
 92 |     tt.sort();
 93 |     println!("sc448 square:         {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
 94 | }
 95 | 
 96 | fn bench_sc448_div() {
 97 |     let z = core_cycles();
 98 |     let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
 99 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
100 |         z.wrapping_mul(13) ]);
101 |     let mut y = x + Scalar::ONE;
102 |     let mut tt = [0; 10];
103 |     for i in 0..10 {
104 |         let begin = core_cycles();
105 |         for _ in 0..1000 {
106 |             x /= y;
107 |             y /= x;
108 |             x /= y;
109 |             y /= x;
110 |             x /= y;
111 |             y /= x;
112 |         }
113 |         let end = core_cycles();
114 |         tt[i] = end.wrapping_sub(begin);
115 |     }
116 |     tt.sort();
117 |     println!("sc448 div:            {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
118 | }
119 | 
120 | fn bench_sc448_sqrt() {
121 |     let z = core_cycles();
122 |     let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
123 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
124 |         z.wrapping_mul(13) ]);
125 |     let mut tt = [0; 10];
126 |     for i in 0..10 {
127 |         let begin = core_cycles();
128 |         for _ in 0..6000 {
129 |             let (x2, _) = x.sqrt();
130 |             x += x2 + Scalar::ONE;
131 |         }
132 |         let end = core_cycles();
133 |         tt[i] = end.wrapping_sub(begin);
134 |     }
135 |     tt.sort();
136 |     println!("sc448 sqrt:           {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
137 | }
138 | 
139 | fn bench_sc448_legendre() {
140 |     let z = core_cycles();
141 |     let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5),
142 |         z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11),
143 |         z.wrapping_mul(13) ]);
144 |     let mut tt = [0; 10];
145 |     for i in 0..10 {
146 |         let begin = core_cycles();
147 |         for _ in 0..6000 {
148 |             let ls = x.legendre();
149 |             x += Scalar::from_w64le([ ls as u64, ls as u64, ls as u64,
150 |                 ls as u64, ls as u64, ls as u64, ls as u64 ]);
151 |         }
152 |         let end = core_cycles();
153 |         tt[i] = end.wrapping_sub(begin);
154 |     }
155 |     tt.sort();
156 |     println!("sc448 legendre:       {:11.2}  ({})", (tt[4] as f64) / 6000.0, x.encode()[0]);
157 | }
158 | 
159 | fn main() {
160 |     bench_sc448_add();
161 |     bench_sc448_sub();
162 |     bench_sc448_mul();
163 |     bench_sc448_square();
164 |     bench_sc448_div();
165 |     bench_sc448_sqrt();
166 |     bench_sc448_legendre();
167 | }
168 | 


--------------------------------------------------------------------------------
/benches/secp256k1.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | #![cfg(feature = "secp256k1")]
  3 | 
  4 | mod util;
  5 | use util::core_cycles;
  6 | 
  7 | use crrl::secp256k1::{Point, Scalar, PrivateKey};
  8 | use sha2::{Sha256, Digest};
  9 | 
 10 | fn bench_mulgen() -> (f64, u8) {
 11 |     let z = core_cycles();
 12 |     let mut seed = [0u8; 32];
 13 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 14 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 15 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 16 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 17 |     let mut s = Scalar::decode_reduce(&seed);
 18 |     let mut tt = [0; 100];
 19 |     for i in 0..tt.len() {
 20 |         let begin = core_cycles();
 21 |         for _ in 0..100 {
 22 |             let P = Point::mulgen(&s);
 23 |             if P.isneutral() != 0 {
 24 |                 s += Scalar::ZERO;
 25 |             } else {
 26 |                 s += Scalar::ONE;
 27 |             }
 28 |         }
 29 |         let end = core_cycles();
 30 |         tt[i] = end.wrapping_sub(begin);
 31 |     }
 32 |     tt.sort();
 33 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 34 | }
 35 | 
 36 | fn bench_mul() -> (f64, u8) {
 37 |     let z = core_cycles();
 38 |     let mut seed = [0u8; 32];
 39 |     seed[ 0.. 8].copy_from_slice(&z.to_le_bytes());
 40 |     seed[ 8..16].copy_from_slice(&z.to_le_bytes());
 41 |     seed[16..24].copy_from_slice(&z.to_le_bytes());
 42 |     seed[24..32].copy_from_slice(&z.to_le_bytes());
 43 |     let mut s = Scalar::decode_reduce(&seed);
 44 |     let mut P = Point::mulgen(&s);
 45 |     let mut tt = [0; 100];
 46 |     for i in 0..tt.len() {
 47 |         let begin = core_cycles();
 48 |         for _ in 0..100 {
 49 |             P *= s;
 50 |             if P.isneutral() != 0 {
 51 |                 s += Scalar::ZERO;
 52 |             } else {
 53 |                 s += Scalar::ONE;
 54 |             }
 55 |         }
 56 |         let end = core_cycles();
 57 |         tt[i] = end.wrapping_sub(begin);
 58 |     }
 59 |     tt.sort();
 60 |     ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0])
 61 | }
 62 | 
 63 | fn bench_mul_add_mulgen() -> (f64, u8) {
 64 |     let z = core_cycles();
 65 |     let mut uu = [Scalar::ZERO; 128];
 66 |     let mut vv = [Scalar::ZERO; 128];
 67 |     let mut sh = Sha256::new();
 68 |     for i in 0..128 {
 69 |         sh.update(z.to_le_bytes());
 70 |         sh.update(((2 * i + 0) as u64).to_le_bytes());
 71 |         let b1 = sh.finalize_reset();
 72 |         sh.update(z.to_le_bytes());
 73 |         sh.update(((2 * i + 1) as u64).to_le_bytes());
 74 |         let b2 = sh.finalize_reset();
 75 |         uu[i] = Scalar::decode_reduce(&b1);
 76 |         vv[i] = Scalar::decode_reduce(&b2);
 77 |     }
 78 |     let mut tt = [0; 100];
 79 |     let mut P = Point::mulgen(&uu[127]);
 80 |     for i in 0..tt.len() {
 81 |         let begin = core_cycles();
 82 |         for j in 0..128 {
 83 |             let ku = (i + j) & 127;
 84 |             let kv = i.wrapping_sub(j) & 127;
 85 |             let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]);
 86 |             P += Q;
 87 |         }
 88 |         let end = core_cycles();
 89 |         tt[i] = end.wrapping_sub(begin);
 90 |     }
 91 |     tt.sort();
 92 |     ((tt[tt.len() >> 1] as f64) / 128.0, P.encode_compressed()[0])
 93 | }
 94 | 
 95 | fn bench_skey_sign() -> (f64, u8) {
 96 |     let z = core_cycles();
 97 |     let mut sh = Sha256::new();
 98 |     sh.update(&z.to_le_bytes());
 99 |     sh.update(&[0x00u8]);
100 |     let s1 = sh.finalize_reset();
101 |     sh.update(&z.to_le_bytes());
102 |     sh.update(&[0x01u8]);
103 |     let s2 = sh.finalize_reset();
104 |     let mut seed = [0u8; 48];
105 |     seed[..32].copy_from_slice(&s1);
106 |     seed[32..].copy_from_slice(&s2[..16]);
107 |     let skey = PrivateKey::from_seed(&seed);
108 |     let mut tt = [0; 100];
109 |     let mut msg = [0u8; 32];
110 |     for i in 0..tt.len() {
111 |         let begin = core_cycles();
112 |         for _ in 0..100 {
113 |             let sig = skey.sign_hash(&msg, &[]);
114 |             msg[..].copy_from_slice(&sig[0..32]);
115 |         }
116 |         let end = core_cycles();
117 |         tt[i] = end.wrapping_sub(begin);
118 |     }
119 |     tt.sort();
120 |     ((tt[tt.len() >> 1] as f64) / 100.0, msg[0])
121 | }
122 | 
123 | fn bench_pkey_verify() -> (f64, u8) {
124 |     let z = core_cycles();
125 |     let mut sh = Sha256::new();
126 |     sh.update(&z.to_le_bytes());
127 |     sh.update(&[0x00u8]);
128 |     let s1 = sh.finalize_reset();
129 |     sh.update(&z.to_le_bytes());
130 |     sh.update(&[0x01u8]);
131 |     let s2 = sh.finalize_reset();
132 |     let mut seed = [0u8; 48];
133 |     seed[..32].copy_from_slice(&s1);
134 |     seed[32..].copy_from_slice(&s2[..16]);
135 |     let skey = PrivateKey::from_seed(&seed);
136 |     let pkey = skey.to_public_key();
137 |     let mut sigs = [[0u8; 64]; 128];
138 |     for i in 0..128 {
139 |         let msg = [i as u8; 32];
140 |         let sig = skey.sign_hash(&msg, &[]);
141 |         sigs[i][..].copy_from_slice(&sig);
142 |     }
143 |     let mut tt = [0; 100];
144 |     let mut msg = [0u8; 32];
145 |     for i in 0..tt.len() {
146 |         let begin = core_cycles();
147 |         for j in 0..128 {
148 |             let ff = pkey.verify_hash(&sigs[j], &msg);
149 |             sigs[j][40] ^= 1u8.wrapping_add(ff as u8);
150 |             msg[3] ^= 3u8.wrapping_sub(ff as u8);
151 |         }
152 |         let end = core_cycles();
153 |         tt[i] = end.wrapping_sub(begin);
154 |     }
155 |     tt.sort();
156 |     ((tt[tt.len() >> 1] as f64) / 128.0, msg[0])
157 | }
158 | 
159 | fn main() {
160 |     let mut bx = 0u8;
161 | 
162 |     let (v, x) = bench_mul();
163 |     bx ^= x;
164 |     println!("secp256k1 point mul:           {:13.2}", v);
165 |     let (v, x) = bench_mulgen();
166 |     bx ^= x;
167 |     println!("secp256k1 point mulgen:        {:13.2}", v);
168 |     let (v, x) = bench_mul_add_mulgen();
169 |     bx ^= x;
170 |     println!("secp256k1 point mul_add_mulgen:{:13.2}", v);
171 |     let (v, x) = bench_skey_sign();
172 |     bx ^= x;
173 |     println!("secp256k1 sign:                {:13.2}", v);
174 |     let (v, x) = bench_pkey_verify();
175 |     bx ^= x;
176 |     println!("secp256k1 verify:              {:13.2}", v);
177 | 
178 |     println!("{}", bx);
179 | }
180 | 


--------------------------------------------------------------------------------
/benches/util.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(target_arch = "x86")]
 2 | pub fn core_cycles() -> u64 {
 3 |     use core::arch::x86::{_mm_lfence, _rdtsc};
 4 |     unsafe {
 5 |         _mm_lfence();
 6 |         _rdtsc()
 7 |     }
 8 | }
 9 | 
10 | #[cfg(target_arch = "x86_64")]
11 | pub fn core_cycles() -> u64 {
12 |     use core::arch::x86_64::{_mm_lfence, _rdtsc};
13 |     unsafe {
14 |         _mm_lfence();
15 |         _rdtsc()
16 |     }
17 | }
18 | 
19 | #[cfg(target_arch = "aarch64")]
20 | pub fn core_cycles() -> u64 {
21 |     use core::arch::asm;
22 |     let mut x: u64;
23 |     unsafe {
24 |         asm!("dsb sy", "mrs {}, pmccntr_el0", out(reg) x);
25 |     }
26 |     x
27 | }
28 | 
29 | #[cfg(target_arch = "riscv64")]
30 | pub fn core_cycles() -> u64 {
31 |     use core::arch::asm;
32 |     let mut x: u64;
33 |     unsafe {
34 |         asm!("rdcycle {}", out(reg) x);
35 |     }
36 |     x
37 | }
38 | 


--------------------------------------------------------------------------------
/benches/x25519.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | #![cfg(feature = "x25519")]
 3 | 
 4 | mod util;
 5 | use util::core_cycles;
 6 | 
 7 | use crrl::x25519::{x25519, x25519_base};
 8 | 
 9 | fn bench_x25519() -> (f64, u8) {
10 |     let z = core_cycles();
11 |     let mut b = [0u8; 32];
12 |     b[ 0.. 8].copy_from_slice(&z.to_le_bytes());
13 |     b[ 8..16].copy_from_slice(&z.to_le_bytes());
14 |     b[16..24].copy_from_slice(&z.to_le_bytes());
15 |     b[24..32].copy_from_slice(&z.to_le_bytes());
16 |     let mut tt = [0; 100];
17 |     for i in 0..tt.len() {
18 |         let begin = core_cycles();
19 |         for _ in 0..100 {
20 |             b = x25519(&b, &b);
21 |         }
22 |         let end = core_cycles();
23 |         tt[i] = end.wrapping_sub(begin);
24 |     }
25 |     tt.sort();
26 |     ((tt[tt.len() >> 1] as f64) / 100.0, b[0])
27 | }
28 | 
29 | fn bench_x25519_base() -> (f64, u8) {
30 |     let z = core_cycles();
31 |     let mut b = [0u8; 32];
32 |     b[ 0.. 8].copy_from_slice(&z.to_le_bytes());
33 |     b[ 8..16].copy_from_slice(&z.to_le_bytes());
34 |     b[16..24].copy_from_slice(&z.to_le_bytes());
35 |     b[24..32].copy_from_slice(&z.to_le_bytes());
36 |     let mut tt = [0; 100];
37 |     for i in 0..tt.len() {
38 |         let begin = core_cycles();
39 |         for _ in 0..100 {
40 |             b = x25519_base(&b);
41 |         }
42 |         let end = core_cycles();
43 |         tt[i] = end.wrapping_sub(begin);
44 |     }
45 |     tt.sort();
46 |     ((tt[tt.len() >> 1] as f64) / 100.0, b[0])
47 | }
48 | 
49 | fn main() {
50 |     let mut bx = 0u8;
51 | 
52 |     let (v, x) = bench_x25519();
53 |     bx ^= x;
54 |     println!("X25519 (generic):              {:13.2}", v);
55 |     let (v, x) = bench_x25519_base();
56 |     bx ^= x;
57 |     println!("X25519 (base point):           {:13.2}", v);
58 | 
59 |     println!("{}", bx);
60 | }
61 | 


--------------------------------------------------------------------------------
/benches/x448.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | #![cfg(feature = "x448")]
 3 | 
 4 | mod util;
 5 | use util::core_cycles;
 6 | 
 7 | use crrl::x448::{x448, x448_base};
 8 | 
 9 | fn bench_x448() -> (f64, u8) {
10 |     let z = core_cycles();
11 |     let mut b = [0u8; 56];
12 |     b[ 0.. 8].copy_from_slice(&z.to_le_bytes());
13 |     b[ 8..16].copy_from_slice(&z.to_le_bytes());
14 |     b[16..24].copy_from_slice(&z.to_le_bytes());
15 |     b[24..32].copy_from_slice(&z.to_le_bytes());
16 |     b[32..40].copy_from_slice(&z.to_le_bytes());
17 |     b[40..48].copy_from_slice(&z.to_le_bytes());
18 |     b[48..56].copy_from_slice(&z.to_le_bytes());
19 |     let mut tt = [0; 100];
20 |     for i in 0..tt.len() {
21 |         let begin = core_cycles();
22 |         for _ in 0..100 {
23 |             b = x448(&b, &b);
24 |         }
25 |         let end = core_cycles();
26 |         tt[i] = end.wrapping_sub(begin);
27 |     }
28 |     tt.sort();
29 |     ((tt[tt.len() >> 1] as f64) / 100.0, b[0])
30 | }
31 | 
32 | fn bench_x448_base() -> (f64, u8) {
33 |     let z = core_cycles();
34 |     let mut b = [0u8; 56];
35 |     b[ 0.. 8].copy_from_slice(&z.to_le_bytes());
36 |     b[ 8..16].copy_from_slice(&z.to_le_bytes());
37 |     b[16..24].copy_from_slice(&z.to_le_bytes());
38 |     b[24..32].copy_from_slice(&z.to_le_bytes());
39 |     b[32..40].copy_from_slice(&z.to_le_bytes());
40 |     b[40..48].copy_from_slice(&z.to_le_bytes());
41 |     b[48..56].copy_from_slice(&z.to_le_bytes());
42 |     let mut tt = [0; 100];
43 |     for i in 0..tt.len() {
44 |         let begin = core_cycles();
45 |         for _ in 0..100 {
46 |             b = x448_base(&b);
47 |         }
48 |         let end = core_cycles();
49 |         tt[i] = end.wrapping_sub(begin);
50 |     }
51 |     tt.sort();
52 |     ((tt[tt.len() >> 1] as f64) / 100.0, b[0])
53 | }
54 | 
55 | fn main() {
56 |     let mut bx = 0u8;
57 | 
58 |     let (v, x) = bench_x448();
59 |     bx ^= x;
60 |     println!("X448 (generic):                {:13.2}", v);
61 |     let (v, x) = bench_x448_base();
62 |     bx ^= x;
63 |     println!("X448 (base point):             {:13.2}", v);
64 | 
65 |     println!("{}", bx);
66 | }
67 | 


--------------------------------------------------------------------------------
/extra/frost-sample.rs:
--------------------------------------------------------------------------------
  1 | // This sample code shows how to use the crrl FROST implementation.
  2 | 
  3 | use crrl::frost::ristretto255::{
  4 |     GroupPrivateKey,
  5 |     SignerPrivateKeyShare,
  6 |     SignerPublicKey,
  7 |     KeySplitter,
  8 |     VSSElement,
  9 |     SignatureShare,
 10 |     Commitment,
 11 |     Nonce,
 12 |     Coordinator,
 13 |     };
 14 | use rand::RngCore;
 15 | use rand::rngs::OsRng;
 16 | use std::vec::Vec;
 17 | 
 18 | fn main() {
 19 | 
 20 |     // We want `max_signers` individual signers, such that a threshold
 21 |     // of `min_signers` of them is required to compute a signature.
 22 |     // Rules: 2 <= min_signers <= max_signers <= 65535
 23 |     let max_signers = 5;
 24 |     let min_signers = 3;
 25 | 
 26 |     // ====================================================================
 27 |     // KEY GENERATION
 28 |     //
 29 |     // This step happens once. A trusted dealer generates the group private
 30 |     // key and splits it into individual key shares. Each signer receives
 31 |     // one key share. The signers can verify a VSS commitment by the dealer
 32 |     // to validate that the split was performed correctly (though the trusted
 33 |     // dealer is still trusted with using a proper entropy source for the
 34 |     // private key, and not remembering any secret afterwards).
 35 | 
 36 |     // =========== trusted dealer ===========
 37 | 
 38 |     // Generate a group private key.
 39 |     let mut rng = OsRng::default();
 40 |     let group_sk = GroupPrivateKey::generate(&mut rng);
 41 | 
 42 |     // Split the key into individual signer key shares.
 43 |     let (sk_share, vss) = KeySplitter::trusted_split(
 44 |         &mut rng, group_sk, min_signers, max_signers);
 45 | 
 46 |     // Send its key share to each signer.
 47 |     // Optionally: also send the VSS commitment that allows each signer
 48 |     // to verify that the share was properly generated.
 49 |     let mut enc_sk_share: Vec<[u8; SignerPrivateKeyShare::ENC_LEN]> =
 50 |         Vec::new();
 51 |     for sks in sk_share.iter() {
 52 |         enc_sk_share.push(sks.encode());
 53 |     }
 54 |     let enc_vss = VSSElement::encode_list(&vss);
 55 | 
 56 |     // Also extract the group public key and each individual signer public
 57 |     // key; they should be "published" (everybody knows them).
 58 |     let group_pk = group_sk.get_public_key();
 59 |     let mut signer_pk: Vec<SignerPublicKey> = Vec::new();
 60 |     for sks in sk_share.iter() {
 61 |         signer_pk.push(sks.get_public_key());
 62 |     }
 63 | 
 64 |     // =========== signers ===========
 65 | 
 66 |     // Each signer receives its private key share, decodes it, and
 67 |     // optionally verifies the VSS commitment that demonstrates proper
 68 |     // generation of the share.
 69 |     // In this example code we simulate all signers in a loop.
 70 | 
 71 |     let mut signer_sk_share: Vec<SignerPrivateKeyShare> = Vec::new();
 72 |     for esks in enc_sk_share.iter() {
 73 |         // All decoding operations return Option<something> so that None
 74 |         // is obtained on decoding failure. In this example we use unwrap(),
 75 |         // but this is where some error handling should happen.
 76 |         let sks = SignerPrivateKeyShare::decode(esks).unwrap();
 77 | 
 78 |         // Verify the VSS commitment (optional; needed only if the dealing
 79 |         // process is such that accidental or malicious alteration of shares
 80 |         // may happen).
 81 |         let vss = VSSElement::decode_list(&enc_vss).unwrap();
 82 |         if !sks.verify_split(&vss) {
 83 |             panic!("invalid key share");
 84 |         }
 85 | 
 86 |         // The signer stores its private key share (securely! It's secret).
 87 |         // As shown above, it can be encoded and decoded, for storage in
 88 |         // a file or equivalent. In this example, we keep an in-RAM
 89 |         // structure.
 90 |         signer_sk_share.push(sks);
 91 |     }
 92 | 
 93 |     // ====================================================================
 94 |     // SIGNATURE GENERATION
 95 |     //
 96 |     // Whenever a signature must be computed, over a given message, a
 97 |     // two-round protocol happens:
 98 |     //
 99 |     //   Round 1: each signer generates a per-signature nonce and associated
100 |     //   commitment; the commitments are sent to the coordinator. Each signer
101 |     //   remembers its nonce and commitment.
102 |     //
103 |     //   Round 2: the coordinator selects enough signers (among received
104 |     //   commitments) to meet the threshold. The corresponding list of
105 |     //   commitments is sent to the signers, along with the message. Each
106 |     //   signer computes and sends back to the coordinator a signature
107 |     //   share. The coordinator assembles the signature shares into the
108 |     //   signature value.
109 | 
110 |     // =========== signers ===========
111 | 
112 |     // Each signer generates a nonce and a commitment. The commitment is
113 |     // sent to the coordinator.
114 |     let mut signer_nonce: Vec<Nonce> = Vec::new();
115 |     let mut signer_comm: Vec<Commitment> = Vec::new();
116 |     let mut enc_signer_comm: Vec<[u8; Commitment::ENC_LEN]> = Vec::new();
117 |     for sks in signer_sk_share.iter() {
118 |         let (nonce, comm) = sks.commit(&mut rng);
119 |         signer_nonce.push(nonce);
120 |         signer_comm.push(comm);
121 |         enc_signer_comm.push(comm.encode());
122 |     }
123 | 
124 |     // =========== coordinator ===========
125 | 
126 |     // The coordinator knows the group public key and the signature
127 |     // threshold.
128 |     let coordinator = Coordinator::new(min_signers, group_pk).unwrap();
129 | 
130 |     // This is the message to sign.
131 |     let msg: &[u8] = b"sample";
132 | 
133 |     // The coordinator receives _some_ commitments. The commitments may
134 |     // be obtained in any order; some may missing; duplicates are tolerated
135 |     // (they are automatically ignored).
136 |     // In this example, we give apply a random permutation to the
137 |     // encoded commitments to simulate some network-induced shuffling.
138 |     for i in 0..enc_signer_comm.len() - 1 {
139 |         let j = i + (rng.next_u64() as usize) % (enc_signer_comm.len() - i);
140 |         if i != j {
141 |             let t = enc_signer_comm[i];
142 |             enc_signer_comm[i] = enc_signer_comm[j];
143 |             enc_signer_comm[j] = t;
144 |         }
145 |     }
146 | 
147 |     // Decode the commitments and use them to select a proper subset.
148 |     // The encoded commitments are sent to the selected signers (the
149 |     // selected signers are identified by the 'ident' fields of the
150 |     // commitments that have been chosen).
151 |     let mut received_signer_comm: Vec<Commitment> = Vec::new();
152 |     for esc in enc_signer_comm.iter() {
153 |         let sc = Commitment::decode(esc).unwrap();
154 |         received_signer_comm.push(sc);
155 |     }
156 |     let chosen_comm = coordinator.choose(&received_signer_comm).unwrap();
157 |     let enc_chosen_comm = Commitment::encode_list(&chosen_comm);
158 | 
159 |     // =========== signers ===========
160 | 
161 |     // The selected signers receive the encoded commitments. The coordinator
162 |     // may know who are the selected signers by looking at the identifiers
163 |     // (the `Commitment`, `SignerPublicKey` and `SignerPrivateKeyShare`
164 |     // all have matching public `ident` fields). Another option (which is
165 |     // used below) is to send the encoded commitments to everybody and see
166 |     // what they answer; only actually selected signers will respond.
167 |     let mut enc_sig_share: Vec<[u8; SignatureShare::ENC_LEN]> = Vec::new();
168 |     for (sks, (nonce, comm)) in signer_sk_share.iter().zip(
169 |         signer_nonce.iter().zip(signer_comm))
170 |     {
171 |         // The signer knows its private key share (sks), nonce,
172 |         // and commitment.
173 |         // Note: the commitment could also be recomputed from the nonce,
174 |         // using `nonce.get_commitment()`. Remembering the commitment
175 |         // saves a few clock cycles.
176 | 
177 |         // Decode the received commitment list.
178 |         let comm_list = Commitment::decode_list(&enc_chosen_comm).unwrap();
179 | 
180 |         // Compute the signature share from this signer. This may fail
181 |         // if the commitment list is incorrect, but also if this signer
182 |         // was not actually selected in the list.
183 |         match sks.sign(*nonce, comm, msg, &comm_list) {
184 |             Some(ss) => { enc_sig_share.push(ss.encode()); }
185 |             None     => { }
186 |         }
187 |     }
188 | 
189 |     // =========== coordinator ===========
190 | 
191 |     // The coordinator receives the encoded signature shares (in any order),
192 |     // decodes them, then assembles them into the signature. We again
193 |     // (for this example) randomly shuffle the list of encoded shares.
194 |     for i in 0..enc_sig_share.len() - 1 {
195 |         let j = i + (rng.next_u64() as usize) % (enc_sig_share.len() - i);
196 |         if i != j {
197 |             let t = enc_sig_share[i];
198 |             enc_sig_share[i] = enc_sig_share[j];
199 |             enc_sig_share[j] = t;
200 |         }
201 |     }
202 | 
203 |     // Decode the encoded signature shares.
204 |     let mut sig_share: Vec<SignatureShare> = Vec::new();
205 |     for ess in enc_sig_share.iter() {
206 |         sig_share.push(SignatureShare::decode(ess).unwrap());
207 |     }
208 | 
209 |     // Assemble the signature. This also verifies each share, _and_ checks
210 |     // that the assembled signature is valid.
211 |     // The coordinator uses the known signer public keys (signer_pk list);
212 |     // that list can be provided in any order and also contain public keys of
213 |     // signers that were not selected for this signature generation.
214 |     let sig = coordinator.assemble_signature(
215 |         &sig_share, &chosen_comm, &signer_pk, msg).unwrap();
216 | 
217 |     // The signature can be encoded into bytes.
218 |     let esig = sig.encode();
219 | 
220 |     // ====================================================================
221 |     // SIGNATURE VERIFICATION
222 | 
223 |     // Generated signatures can be verified against the group public key.
224 |     if !group_pk.verify_esig(&esig, msg) {
225 |         panic!("signature verification failed");
226 |     }
227 |     println!("OK");
228 | }
229 | 


--------------------------------------------------------------------------------
/extra/mkuxcomp.sage:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env sage
 2 | 
 3 | # This Sage script computes and prints the UX_COMP[] array of precomputed
 4 | # values that are used to support efficient verification of truncated
 5 | # Ed25519 signatures.
 6 | #
 7 | # We work in the Montgomery domain of Curve25519: y^2 = x^3 + 486662*x^2 + x
 8 | # B = conventional generator
 9 | # For i = 0 to 16384, let U_i = i*(2^240)*B, and x_i = x coordinate of U_i.
10 | # We consider x_i as an integer (with 0 <= x_i < 2^255-19) and define:
11 | #   z_i = (x_i % 2^48)*2^16 + i  (as an unsigned 64-bit integer)
12 | # The produced UX_COMP[] array contains the 16385 values z_i in ascending
13 | # numerical order.
14 | 
15 | import importlib
16 | import hashlib
17 | 
18 | def mkuxcomp():
19 |     p = 2**255 - 19
20 |     K = Zmod(p)
21 |     E = EllipticCurve(K, [0, 486662, 0, 1, 0])
22 |     B = E.point([9, 14781619447589544791020593568409986887264606134616475288964881837755586237401])
23 |     tt = []
24 |     T = E.point([0, 1, 0])
25 |     P = (2**240)*B
26 |     for i in range(0, 16385):
27 |         if T.is_zero():
28 |             x = K(0)
29 |         else:
30 |             x = T.xy()[0]
31 |         tt.append(int(i) + ((int(x) % 2**48) << 16))
32 |         T = T + P
33 |     tt.sort()
34 |     print('static UX_COMP: [u64; 16385] = [', end='')
35 |     for i in range(0, len(tt)):
36 |         if (i % 3) == 0:
37 |             print()
38 |             print('    ', end='')
39 |         else:
40 |             print(' ', end='')
41 |         print('0x%016X,' % int(tt[i]), end='')
42 |     print()
43 |     print('];')
44 | 
45 | mkuxcomp()
46 | 


--------------------------------------------------------------------------------
/extra/truncsig.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pornin/crrl/4cc7cbbe8796ee8d459b815d81318603279879e4/extra/truncsig.pdf


--------------------------------------------------------------------------------
/src/backend/w32/gf448.rs:
--------------------------------------------------------------------------------
 1 | // TODO: make a dedicated GF448 implementation, leveraging the special
 2 | // modulus format. For now, we use the generic code.
 3 | 
 4 | use super::gfgen::{define_gfgen, define_gfgen_tests};
 5 | 
 6 | struct GF448Params;
 7 | impl GF448Params {
 8 | 
 9 |     const MODULUS: [u64; 7] = [
10 |         0xFFFFFFFFFFFFFFFF,
11 |         0xFFFFFFFFFFFFFFFF,
12 |         0xFFFFFFFFFFFFFFFF,
13 |         0xFFFFFFFEFFFFFFFF,
14 |         0xFFFFFFFFFFFFFFFF,
15 |         0xFFFFFFFFFFFFFFFF,
16 |         0xFFFFFFFFFFFFFFFF,
17 |     ];
18 | }
19 | 
20 | define_gfgen!(GF448, GF448Params, gf448mod, false);
21 | define_gfgen_tests!(GF448, 7, test_gf448mod);
22 | 


--------------------------------------------------------------------------------
/src/backend/w32/gfsecp256k1.rs:
--------------------------------------------------------------------------------
 1 | pub type GFsecp256k1 = super::modint::ModInt256<
 2 |     0xFFFFFFFEFFFFFC2F, 0xFFFFFFFFFFFFFFFF,
 3 |     0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>;
 4 | 
 5 | impl GFsecp256k1 {
 6 | 
 7 |     // TODO: replace these functions with set_mul_small(), when the latter
 8 |     // is implemented.
 9 |     pub fn set_mul21(&mut self) {
10 |         *self *= Self::w64be(0, 0, 0, 21);
11 |     }
12 |     pub fn mul21(self) -> Self {
13 |         self * Self::w64be(0, 0, 0, 21)
14 |     }
15 | 
16 |     #[inline(always)]
17 |     pub fn encode(self) -> [u8; Self::ENC_LEN] {
18 |         self.encode32()
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/backend/w32/mod.rs:
--------------------------------------------------------------------------------
  1 | mod zz;
  2 | pub use zz::{Zu128, Zu256, Zu384};
  3 | 
  4 | #[cfg(any(
  5 |     feature = "gf255",
  6 |     feature = "gf255e",
  7 |     feature = "gf255s",
  8 |     feature = "gf25519",
  9 | ))]
 10 | pub mod gf255;
 11 | 
 12 | #[cfg(any(
 13 |     feature = "gf255",
 14 |     feature = "gf255e",
 15 |     feature = "gf255s",
 16 |     feature = "gf25519",
 17 | ))]
 18 | pub use gf255::GF255;
 19 | 
 20 | #[cfg(feature = "gf255e")]
 21 | pub type GF255e = GF255<18651>;
 22 | 
 23 | #[cfg(feature = "gf255s")]
 24 | pub type GF255s = GF255<3957>;
 25 | 
 26 | #[cfg(feature = "gf25519")]
 27 | pub type GF25519 = GF255<19>;
 28 | 
 29 | #[cfg(any(
 30 |     feature = "modint256",
 31 |     feature = "gfp256",
 32 | ))]
 33 | pub mod modint;
 34 | 
 35 | #[cfg(feature = "modint256")]
 36 | pub use modint::ModInt256;
 37 | 
 38 | #[cfg(feature = "modint256")]
 39 | pub type ModInt256ct<const M0: u64, const M1: u64, const M2: u64, const M3: u64> = ModInt256<M0, M1, M2, M3>;
 40 | 
 41 | #[cfg(feature = "gfp256")]
 42 | pub type GFp256 = modint::ModInt256<
 43 |     0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF,
 44 |     0x0000000000000000, 0xFFFFFFFF00000001>;
 45 | 
 46 | #[cfg(feature = "gfp256")]
 47 | impl GFp256 {
 48 |     /// Encodes a scalar element into bytes (little-endian).
 49 |     pub fn encode(self) -> [u8; 32] {
 50 |         self.encode32()
 51 |     }
 52 | }
 53 | 
 54 | #[cfg(feature = "secp256k1")]
 55 | pub mod gfsecp256k1;
 56 | 
 57 | #[cfg(feature = "secp256k1")]
 58 | pub use gfsecp256k1::GFsecp256k1;
 59 | 
 60 | #[cfg(feature = "gf448")]
 61 | pub mod gf448;
 62 | 
 63 | #[cfg(feature = "gf448")]
 64 | pub use gf448::GF448;
 65 | 
 66 | pub mod lagrange;
 67 | 
 68 | #[cfg(feature = "gfgen")]
 69 | pub mod gfgen;
 70 | 
 71 | #[cfg(feature = "gfb254")]
 72 | pub mod gfb254_m32;
 73 | 
 74 | #[cfg(feature = "gfb254")]
 75 | pub use gfb254_m32::{GFb127, GFb254};
 76 | 
 77 | // Carrying addition and subtraction should use u32::carrying_add()
 78 | // and u32::borrowing_sub(), but these functions are currently only
 79 | // experimental.
 80 | 
 81 | // Add with carry; carry is 0 or 1.
 82 | // (x, y, c_in) -> x + y + c_in mod 2^32, c_out
 83 | 
 84 | #[cfg(target_arch = "x86")]
 85 | #[allow(dead_code)]
 86 | #[inline(always)]
 87 | pub(crate) fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
 88 |     use core::arch::x86::_addcarry_u32;
 89 |     unsafe {
 90 |         let mut d = 0u32;
 91 |         let cc = _addcarry_u32(c, x, y, &mut d);
 92 |         (d, cc)
 93 |     }
 94 | }
 95 | 
 96 | #[cfg(not(target_arch = "x86"))]
 97 | #[allow(dead_code)]
 98 | #[inline(always)]
 99 | pub(crate) const fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
100 |     let z = (x as u64).wrapping_add(y as u64).wrapping_add(c as u64);
101 |     (z as u32, (z >> 32) as u8)
102 | }
103 | 
104 | // Subtract with borrow; borrow is 0 or 1.
105 | // (x, y, c_in) -> x - y - c_in mod 2^32, c_out
106 | 
107 | #[cfg(target_arch = "x86")]
108 | #[allow(dead_code)]
109 | #[inline(always)]
110 | pub(crate) fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
111 |     use core::arch::x86::_subborrow_u32;
112 |     unsafe {
113 |         let mut d = 0u32;
114 |         let cc = _subborrow_u32(c, x, y, &mut d);
115 |         (d, cc)
116 |     }
117 | }
118 | 
119 | #[cfg(not(target_arch = "x86"))]
120 | #[allow(dead_code)]
121 | #[inline(always)]
122 | pub(crate) const fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
123 |     let z = (x as u64).wrapping_sub(y as u64).wrapping_sub(c as u64);
124 |     (z as u32, (z >> 63) as u8)
125 | }
126 | 
127 | // Compute x*y over 64 bits, returned as two 32-bit words (lo, hi)
128 | #[allow(dead_code)]
129 | #[inline(always)]
130 | pub(crate) const fn umull(x: u32, y: u32) -> (u32, u32) {
131 |     let z = (x as u64) * (y as u64);
132 |     (z as u32, (z >> 32) as u32)
133 | }
134 | 
135 | // Compute x*y+z over 64 bits, returned as two 32-bit words (lo, hi)
136 | #[allow(dead_code)]
137 | #[inline(always)]
138 | pub(crate) const fn umull_add(x: u32, y: u32, z: u32) -> (u32, u32) {
139 |     let t = ((x as u64) * (y as u64)).wrapping_add(z as u64);
140 |     (t as u32, (t >> 32) as u32)
141 | }
142 | 
143 | // Compute x*y+z1+z2 over 64 bits, returned as two 32-bit words (lo, hi)
144 | #[allow(dead_code)]
145 | #[inline(always)]
146 | pub(crate) const fn umull_add2(x: u32, y: u32, z1: u32, z2: u32) -> (u32, u32) {
147 |     let t = ((x as u64) * (y as u64))
148 |         .wrapping_add(z1 as u64).wrapping_add(z2 as u64);
149 |     (t as u32, (t >> 32) as u32)
150 | }
151 | 
152 | // Compute x1*y1+x2*y2 over 64 bits, returned as two 32-bit words (lo, hi)
153 | #[allow(dead_code)]
154 | #[inline(always)]
155 | pub(crate) const fn umull_x2(x1: u32, y1: u32, x2: u32, y2: u32) -> (u32, u32) {
156 |     let z1 = (x1 as u64) * (y1 as u64);
157 |     let z2 = (x2 as u64) * (y2 as u64);
158 |     let z = z1.wrapping_add(z2);
159 |     (z as u32, (z >> 32) as u32)
160 | }
161 | 
162 | // Compute x1*y1+x2*y2+z3 over 64 bits, returned as two 32-bit words (lo, hi)
163 | #[allow(dead_code)]
164 | #[inline(always)]
165 | pub(crate) const fn umull_x2_add(x1: u32, y1: u32, x2: u32, y2: u32, z3: u32) -> (u32, u32) {
166 |     let z1 = (x1 as u64) * (y1 as u64);
167 |     let z2 = (x2 as u64) * (y2 as u64);
168 |     let z = z1.wrapping_add(z2).wrapping_add(z3 as u64);
169 |     (z as u32, (z >> 32) as u32)
170 | }
171 | 
172 | // Return 0xFFFFFFFF if x >= 0x80000000, 0 otherwise (i.e. take the sign
173 | // bit of the signed interpretation, and expand it to 32 bits).
174 | #[allow(dead_code)]
175 | #[inline(always)]
176 | pub(crate) const fn sgnw(x: u32) -> u32 {
177 |     ((x as i32) >> 31) as u32
178 | }
179 | 
180 | // Get the number of leading zeros in a 32-bit value.
181 | // On some platforms, u32::leading_zeros() performs the computation with
182 | // a code sequence that will be constant-time on most/all CPUs
183 | // compatible with that platforms (e.g. any 32-bit x86 with support for
184 | // the LZCNT opcode); on others, a non-constant-time sequence would be
185 | // used, and we must instead rely on a safe (but slower) routine.
186 | //
187 | // On x86 without LZCNT, u32::leading_zeros() uses a BSR opcode, but since
188 | // BSR yields an undefined result on an input of value 0, u32::leading_zeros()
189 | // includes an explicit test and a conditional jump for that case, and that
190 | // is not (in general) constant-time.
191 | #[cfg(any(
192 |     all(target_arch = "x86", target_feature = "lzcnt"),
193 |     ))]
194 | #[allow(dead_code)]
195 | #[inline(always)]
196 | pub(crate) const fn lzcnt(x: u32) -> u32 {
197 |     x.leading_zeros()
198 | }
199 | 
200 | #[cfg(not(any(
201 |     all(target_arch = "x86", target_feature = "lzcnt"),
202 |     )))]
203 | #[allow(dead_code)]
204 | pub(crate) const fn lzcnt(x: u32) -> u32 {
205 |     let m = sgnw((x >> 16).wrapping_sub(1));
206 |     let s = m & 16;
207 |     let x = (x >> 16) ^ (m & (x ^ (x >> 16)));
208 | 
209 |     let m = sgnw((x >>  8).wrapping_sub(1));
210 |     let s = s | (m &  8);
211 |     let x = (x >>  8) ^ (m & (x ^ (x >>  8)));
212 | 
213 |     let m = sgnw((x >>  4).wrapping_sub(1));
214 |     let s = s | (m &  4);
215 |     let x = (x >>  4) ^ (m & (x ^ (x >>  4)));
216 | 
217 |     let m = sgnw((x >>  2).wrapping_sub(1));
218 |     let s = s | (m &  2);
219 |     let x = (x >>  2) ^ (m & (x ^ (x >>  2)));
220 | 
221 |     // At this point, x fits on 2 bits. Number of leading zeros is then:
222 |     //   x = 0   -> 2
223 |     //   x = 1   -> 1
224 |     //   x = 2   -> 0
225 |     //   x = 3   -> 0
226 |     let s = s.wrapping_add(2u32.wrapping_sub(x) & ((x.wrapping_sub(3) >> 2)));
227 | 
228 |     s as u32
229 | }
230 | 


--------------------------------------------------------------------------------
/src/backend/w32/zz.rs:
--------------------------------------------------------------------------------
  1 | use core::convert::TryFrom;
  2 | 
  3 | use super::{addcarry_u32, subborrow_u32, umull, umull_add, umull_add2, sgnw};
  4 | 
  5 | /// A custom 128-bit integer with some constant-time operations.
  6 | #[derive(Clone, Copy, Debug)]
  7 | pub struct Zu128([u32; 4]);
  8 | 
  9 | impl Zu128 {
 10 | 
 11 |     pub const ZERO: Self = Self([0; 4]);
 12 | 
 13 |     #[inline(always)]
 14 |     pub const fn w64le(x0: u64, x1: u64) -> Self {
 15 |         Self([ x0 as u32, (x0 >> 32) as u32, x1 as u32, (x1 >> 32) as u32 ])
 16 |     }
 17 | 
 18 |     #[inline(always)]
 19 |     pub fn decode(buf: &[u8]) -> Option<Self> {
 20 |         if buf.len() != 16 {
 21 |             None
 22 |         } else {
 23 |             let mut x = Self::ZERO;
 24 |             for i in 0..4 {
 25 |                 x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from(
 26 |                     &buf[(4 * i)..(4 * i + 4)]).unwrap());
 27 |             }
 28 |             Some(x)
 29 |         }
 30 |     }
 31 | 
 32 |     #[inline(always)]
 33 |     pub fn mul128x128(self, b: &Self) -> Zu256 {
 34 |         let mut d = [0u32; 8];
 35 |         for i in 0..4 {
 36 |             let f = self.0[i];
 37 |             let mut hi = 0;
 38 |             for j in 0..4 {
 39 |                 (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi);
 40 |             }
 41 |             d[i + 4] = hi;
 42 |         }
 43 |         Zu256(d)
 44 |     }
 45 | 
 46 |     #[inline(always)]
 47 |     pub fn mul128x128trunc(self, b: &Self) -> Self {
 48 |         let f = self.0[0];
 49 |         let (d0, hi) = umull(f, b.0[0]);
 50 |         let (d1, hi) = umull_add(f, b.0[1], hi);
 51 |         let (d2, hi) = umull_add(f, b.0[2], hi);
 52 |         let d3 = f.wrapping_mul(b.0[3]).wrapping_add(hi);
 53 |         let f = self.0[1];
 54 |         let (d1, hi) = umull_add(f, b.0[0], d1);
 55 |         let (d2, hi) = umull_add2(f, b.0[1], d2, hi);
 56 |         let d3 = f.wrapping_mul(b.0[2]).wrapping_add(d3).wrapping_add(hi);
 57 |         let f = self.0[2];
 58 |         let (d2, hi) = umull_add(f, b.0[0], d2);
 59 |         let d3 = f.wrapping_mul(b.0[1]).wrapping_add(d3).wrapping_add(hi);
 60 |         let f = self.0[3];
 61 |         let d3 = f.wrapping_mul(b.0[0]).wrapping_add(d3);
 62 |         Self([ d0, d1, d2, d3 ])
 63 |     }
 64 | 
 65 |     /// Interpreting this value as a signed 128-bit integer, return its
 66 |     /// absolute value (in a `u128` type) and the original sign (0xFFFFFFFF
 67 |     /// for negative, 0x00000000 for non-negative).
 68 |     #[inline(always)]
 69 |     pub fn abs(self) -> (u128, u32) {
 70 |         let s = sgnw(self.0[3]);
 71 |         let (d0, cc) = subborrow_u32(self.0[0] ^ s, s, 0);
 72 |         let (d1, cc) = subborrow_u32(self.0[1] ^ s, s, cc);
 73 |         let (d2, cc) = subborrow_u32(self.0[2] ^ s, s, cc);
 74 |         let (d3, _)  = subborrow_u32(self.0[3] ^ s, s, cc);
 75 |         ((d0 as u128) | ((d1 as u128) << 32)
 76 |          | ((d2 as u128) << 64) | ((d3 as u128) << 96), s)
 77 |     }
 78 | 
 79 |     /// Interpreting this value as a signed 128-bit integer `x`, return
 80 |     /// the absolute value of `2*x+1` (as a `u128` type) and the original
 81 |     /// sign (0xFFFFFFFF for negative, 0x00000000 for non-negative).
 82 |     #[inline(always)]
 83 |     pub fn double_inc_abs(self) -> (u128, u32) {
 84 |         let s = sgnw(self.0[3]);
 85 |         let b0 = (self.0[0] << 1) | 1;
 86 |         let b1 = (self.0[0] >> 31) | (self.0[1] << 1);
 87 |         let b2 = (self.0[1] >> 31) | (self.0[2] << 1);
 88 |         let b3 = (self.0[2] >> 31) | (self.0[3] << 1);
 89 |         let (d0, cc) = subborrow_u32(b0 ^ s, s, 0);
 90 |         let (d1, cc) = subborrow_u32(b1 ^ s, s, cc);
 91 |         let (d2, cc) = subborrow_u32(b2 ^ s, s, cc);
 92 |         let (d3, _)  = subborrow_u32(b3 ^ s, s, cc);
 93 |         ((d0 as u128) | ((d1 as u128) << 32)
 94 |          | ((d2 as u128) << 64) | ((d3 as u128) << 96), s)
 95 |     }
 96 | 
 97 |     #[inline(always)]
 98 |     pub fn set_sub(&mut self, b: &Self) {
 99 |         let mut cc = 0;
100 |         for i in 0..4 {
101 |             (self.0[i], cc) = subborrow_u32(self.0[i], b.0[i], cc);
102 |         }
103 |     }
104 | 
105 |     #[inline(always)]
106 |     pub fn set_sub_u32(&mut self, b: u32) {
107 |         let mut cc;
108 |         (self.0[0], cc) = subborrow_u32(self.0[0], b, 0);
109 |         for i in 1..4 {
110 |             (self.0[i], cc) = subborrow_u32(self.0[i], 0, cc);
111 |         }
112 |     }
113 | }
114 | 
115 | /// A custom 256-bit integer with some constant-time operations.
116 | #[derive(Clone, Copy, Debug)]
117 | pub struct Zu256([u32; 8]);
118 | 
119 | impl Zu256 {
120 | 
121 |     pub const ZERO: Self = Self([0; 8]);
122 | 
123 |     #[inline(always)]
124 |     pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64) -> Self {
125 |         Self([
126 |             x0 as u32, (x0 >> 32) as u32,
127 |             x1 as u32, (x1 >> 32) as u32,
128 |             x2 as u32, (x2 >> 32) as u32,
129 |             x3 as u32, (x3 >> 32) as u32,
130 |         ])
131 |     }
132 | 
133 |     #[inline(always)]
134 |     pub fn decode(buf: &[u8]) -> Option<Self> {
135 |         if buf.len() != 32 {
136 |             None
137 |         } else {
138 |             let mut x = Self::ZERO;
139 |             for i in 0..8 {
140 |                 x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from(
141 |                     &buf[(4 * i)..(4 * i + 4)]).unwrap());
142 |             }
143 |             Some(x)
144 |         }
145 |     }
146 | 
147 |     #[inline(always)]
148 |     pub fn trunc128(self) -> Zu128 {
149 |         Zu128([ self.0[0], self.0[1], self.0[2], self.0[3] ])
150 |     }
151 | 
152 |     #[inline(always)]
153 |     pub fn mul256x128(self, b: &Zu128) -> Zu384 {
154 |         let mut d = [0u32; 12];
155 |         for i in 0..8 {
156 |             let f = self.0[i];
157 |             let mut hi = 0;
158 |             for j in 0..4 {
159 |                 (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi);
160 |             }
161 |             d[i + 4] = hi;
162 |         }
163 |         Zu384(d)
164 |     }
165 | 
166 |     /// Return `floor((self + b)/2^224) mod 2^32` (i.e. addition truncated
167 |     /// to 256 bits, then return the high 32 bits of the 256-bit result).
168 |     #[inline(always)]
169 |     pub fn add_rsh224(self, b: &Self) -> u32 {
170 |         let mut cc;
171 |         (_, cc) = addcarry_u32(self.0[0], b.0[0], 0);
172 |         for i in 1..7 {
173 |             (_, cc) = addcarry_u32(self.0[i], b.0[i], cc);
174 |         }
175 |         let (w, _) = addcarry_u32(self.0[7], b.0[7], cc);
176 |         w
177 |     }
178 | 
179 |     /// Return the borrow resulting from the subtraction of `b` from `self`;
180 |     /// returned value is 1 in case of borrow, 0 otherwise. The subtraction
181 |     /// result itself is discarded.
182 |     #[inline(always)]
183 |     pub fn borrow(self, b: &Self) -> u32 {
184 |         let mut cc;
185 |         (_, cc) = subborrow_u32(self.0[0], b.0[0], 0);
186 |         for i in 1..8 {
187 |             (_, cc) = subborrow_u32(self.0[i], b.0[i], cc);
188 |         }
189 |         cc as u32
190 |     }
191 | }
192 | 
193 | /// A custom 384-bit integer with some constant-time operations.
194 | #[derive(Clone, Copy, Debug)]
195 | pub struct Zu384([u32; 12]);
196 | 
197 | impl Zu384 {
198 | 
199 |     pub const ZERO: Self = Self([0; 12]);
200 | 
201 |     #[inline(always)]
202 |     pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64, x4: u64, x5: u64)
203 |         -> Self
204 |     {
205 |         Self([
206 |             x0 as u32, (x0 >> 32) as u32,
207 |             x1 as u32, (x1 >> 32) as u32,
208 |             x2 as u32, (x2 >> 32) as u32,
209 |             x3 as u32, (x3 >> 32) as u32,
210 |             x4 as u32, (x4 >> 32) as u32,
211 |             x5 as u32, (x5 >> 32) as u32,
212 |         ])
213 |     }
214 | 
215 |     #[inline(always)]
216 |     pub fn set_add(&mut self, b: &Self) {
217 |         let mut cc = 0;
218 |         for i in 0..12 {
219 |             (self.0[i], cc) = addcarry_u32(self.0[i], b.0[i], cc);
220 |         }
221 |     }
222 | 
223 |     /// Returns `self mod 2^n` and `(floor(self/2^n) + cc) mod 2^128`.
224 |     /// Shift count `n` MUST be between 225 and 255 (inclusive).
225 |     #[inline(always)]
226 |     pub fn trunc_and_rsh_cc(&mut self, b: u32, n: u32) -> (Zu256, Zu128) {
227 |         let n1 = n - 224;
228 |         let n2 = 32 - n1;
229 |         let (d0, cc) = addcarry_u32(
230 |             (self.0[7] >> n1) | (self.0[8] << n2), b, 0);
231 |         let (d1, cc) = addcarry_u32(
232 |             (self.0[8] >> n1) | (self.0[9] << n2), 0, cc);
233 |         let (d2, cc) = addcarry_u32(
234 |             (self.0[9] >> n1) | (self.0[10] << n2), 0, cc);
235 |         let (d3, _)  = addcarry_u32(
236 |             (self.0[10] >> n1) | (self.0[11] << n2), 0, cc);
237 |         let mut e = [0u32; 8];
238 |         e[..].copy_from_slice(&self.0[..8]);
239 |         e[7] &= (!0u32) >> n2;
240 |         (Zu256(e), Zu128([ d0, d1, d2, d3 ]))
241 |     }
242 | }
243 | 


--------------------------------------------------------------------------------
/src/backend/w64/mod.rs:
--------------------------------------------------------------------------------
  1 | // The zz module defines the Zu* type (custom non-modular integers with
  2 | // sizes of 128, 256 or 384 bits, with some constant-time operation to
  3 | // support scalar splitting in GLV and GLS curves). On aarch64 we use a
  4 | // 32-bit version, because the Arm Cortex-A55 has non-constant-time 64-bit
  5 | // multiplies (but 32-bit multiplies are constant-time).
  6 | 
  7 | #[cfg(any(
  8 |     feature = "zz32",
  9 |     all(
 10 |         not(feature = "zz64"),
 11 |         target_arch = "aarch64")))]
 12 | mod zz32;
 13 | 
 14 | #[cfg(any(
 15 |     feature = "zz32",
 16 |     all(
 17 |         not(feature = "zz64"),
 18 |         target_arch = "aarch64")))]
 19 | pub use zz32::{Zu128, Zu256, Zu384};
 20 | 
 21 | #[cfg(any(
 22 |     feature = "zz64",
 23 |     all(
 24 |         not(feature = "zz32"),
 25 |         not(target_arch = "aarch64"))))]
 26 | mod zz;
 27 | 
 28 | #[cfg(any(
 29 |     feature = "zz64",
 30 |     all(
 31 |         not(feature = "zz32"),
 32 |         not(target_arch = "aarch64"))))]
 33 | pub use zz::{Zu128, Zu256, Zu384};
 34 | 
 35 | // Module gf255 defines the generic GF255<MQ> type, with 64-bit limbs.
 36 | // It is used for GF255e and GF255s. For GF25519, an alternate implementation
 37 | // with 51-bit limbs is provided (in module gf25519) and used in some cases.
 38 | //  - If feature gf25519_m64 is set, then GF255<19> is used.
 39 | //  - If feature gf25519_m51 is set, then the alternate implementation is used.
 40 | //  - If neither gf25519_m64 nor gf25519_m51 is set, then the selected
 41 | //    implementation depends on the target architecture.
 42 | //  - Features gf25519_m51 and gf25519_m64 are mutually incompatible; they
 43 | //    cannot be both set at the same time.
 44 | #[cfg(all(
 45 |     feature = "gf255_m51",
 46 |     feature = "gf255_m64",
 47 | ))]
 48 | compile_error!("cannot use m51 and m64 GF255 implementations simultaneously");
 49 | 
 50 | #[cfg(all(
 51 |     any(
 52 |         feature = "gf255",
 53 |         feature = "gf255e",
 54 |         feature = "gf255s",
 55 |         feature = "gf25519"),
 56 |     not(feature = "gf255_m51"),
 57 |     any(
 58 |         feature = "gf255_m64",
 59 |         not(target_arch = "riscv64")),
 60 | ))]
 61 | pub mod gf255_m64;
 62 | 
 63 | #[cfg(all(
 64 |     any(
 65 |         feature = "gf255",
 66 |         feature = "gf255e",
 67 |         feature = "gf255s",
 68 |         feature = "gf25519"),
 69 |     not(feature = "gf255_m51"),
 70 |     any(
 71 |         feature = "gf255_m64",
 72 |         not(target_arch = "riscv64")),
 73 | ))]
 74 | pub use gf255_m64::GF255;
 75 | 
 76 | #[cfg(all(
 77 |     any(
 78 |         feature = "gf255",
 79 |         feature = "gf255e",
 80 |         feature = "gf255s",
 81 |         feature = "gf25519"),
 82 |     not(feature = "gf255_m64"),
 83 |     any(
 84 |         feature = "gf255_m51",
 85 |         target_arch = "riscv64"),
 86 | ))]
 87 | pub mod gf255_m51;
 88 | 
 89 | #[cfg(all(
 90 |     any(
 91 |         feature = "gf255",
 92 |         feature = "gf255e",
 93 |         feature = "gf255s",
 94 |         feature = "gf25519"),
 95 |     not(feature = "gf255_m64"),
 96 |     any(
 97 |         feature = "gf255_m51",
 98 |         target_arch = "riscv64"),
 99 | ))]
100 | pub use gf255_m51::GF255;
101 | 
102 | #[cfg(feature = "gf255e")]
103 | pub type GF255e = GF255<18651>;
104 | 
105 | #[cfg(feature = "gf255s")]
106 | pub type GF255s = GF255<3957>;
107 | 
108 | #[cfg(feature = "gf25519")]
109 | pub type GF25519 = GF255<19>;
110 | 
111 | #[cfg(any(
112 |     feature = "modint256",
113 |     feature = "gfp256",
114 | ))]
115 | pub mod modint;
116 | 
117 | #[cfg(feature = "modint256")]
118 | pub use modint::ModInt256;
119 | 
120 | #[cfg(all(
121 |     feature = "modint256",
122 |     not(target_arch = "aarch64")))]
123 | pub type ModInt256ct<const M0: u64, const M1: u64, const M2: u64, const M3: u64> = ModInt256<M0, M1, M2, M3>;
124 | 
125 | #[cfg(all(
126 |     feature = "modint256",
127 |     target_arch = "aarch64"))]
128 | pub mod modint32;
129 | 
130 | #[cfg(all(
131 |     feature = "modint256",
132 |     target_arch = "aarch64"))]
133 | pub use modint32::ModInt256ct;
134 | 
135 | /* disabled -- not faster than the generic code
136 | #[cfg(feature = "gfp256")]
137 | pub mod gfp256;
138 | 
139 | #[cfg(feature = "gfp256")]
140 | pub use gfp256::GFp256;
141 | */
142 | 
143 | #[cfg(feature = "gfp256")]
144 | pub type GFp256 = modint::ModInt256<
145 |     0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF,
146 |     0x0000000000000000, 0xFFFFFFFF00000001>;
147 | 
148 | #[cfg(feature = "gfp256")]
149 | impl GFp256 {
150 |     /// Encodes a scalar element into bytes (little-endian).
151 |     pub fn encode(self) -> [u8; 32] {
152 |         self.encode32()
153 |     }
154 | }
155 | 
156 | #[cfg(feature = "secp256k1")]
157 | pub mod gfsecp256k1;
158 | 
159 | #[cfg(feature = "secp256k1")]
160 | pub use gfsecp256k1::GFsecp256k1;
161 | 
162 | #[cfg(feature = "gf448")]
163 | pub mod gf448;
164 | 
165 | #[cfg(feature = "gf448")]
166 | pub use gf448::GF448;
167 | 
168 | pub mod lagrange;
169 | 
170 | #[cfg(feature = "gfgen")]
171 | pub mod gfgen;
172 | 
173 | #[cfg(all(
174 |     feature = "gfb254",
175 |     not(any(
176 |         feature = "gfb254_m64",
177 |         feature = "gfb254_arm64pmull")),
178 |     any(
179 |         feature = "gfb254_x86clmul",
180 |         all(
181 |             target_arch = "x86_64",
182 |             target_feature = "sse4.1",
183 |             target_feature = "pclmulqdq"))))]
184 | pub mod gfb254_x86clmul;
185 | 
186 | #[cfg(all(
187 |     feature = "gfb254",
188 |     not(any(
189 |         feature = "gfb254_m64",
190 |         feature = "gfb254_arm64pmull")),
191 |     any(
192 |         feature = "gfb254_x86clmul",
193 |         all(
194 |             target_arch = "x86_64",
195 |             target_feature = "sse4.1",
196 |             target_feature = "pclmulqdq"))))]
197 | pub use gfb254_x86clmul::{GFb127, GFb254};
198 | 
199 | #[cfg(all(
200 |     feature = "gfb254",
201 |     not(any(
202 |         feature = "gfb254_x86clmul",
203 |         feature = "gfb254_m64")),
204 |     any(
205 |         feature = "gfb254_arm64pmull",
206 |         all(
207 |             target_arch = "aarch64",
208 |             target_feature = "aes"))))]
209 | pub mod gfb254_arm64pmull;
210 | 
211 | #[cfg(all(
212 |     feature = "gfb254",
213 |     not(any(
214 |         feature = "gfb254_x86clmul",
215 |         feature = "gfb254_m64")),
216 |     any(
217 |         feature = "gfb254_arm64pmull",
218 |         all(
219 |             target_arch = "aarch64",
220 |             target_feature = "aes"))))]
221 | pub use gfb254_arm64pmull::{GFb127, GFb254};
222 | 
223 | #[cfg(all(
224 |     feature = "gfb254",
225 |     not(any(
226 |         feature = "gfb254_x86clmul",
227 |         feature = "gfb254_arm64pmull")),
228 |     any(
229 |         feature = "gfb254_m64",
230 |         not(any(
231 |             all(
232 |                 target_arch = "x86_64",
233 |                 target_feature = "sse4.1",
234 |                 target_feature = "pclmulqdq"),
235 |             all(
236 |                 target_arch = "aarch64",
237 |                 target_feature = "aes"))))))]
238 | pub mod gfb254_m64;
239 | 
240 | #[cfg(all(
241 |     feature = "gfb254",
242 |     not(any(
243 |         feature = "gfb254_x86clmul",
244 |         feature = "gfb254_arm64pmull")),
245 |     any(
246 |         feature = "gfb254_m64",
247 |         not(any(
248 |             all(
249 |                 target_arch = "x86_64",
250 |                 target_feature = "sse4.1",
251 |                 target_feature = "pclmulqdq"),
252 |             all(
253 |                 target_arch = "aarch64",
254 |                 target_feature = "aes"))))))]
255 | pub use gfb254_m64::{GFb127, GFb254};
256 | 
257 | // The 32-bit variants of the addcarry, umull,... functions.
258 | pub(crate) mod util32;
259 | 
260 | // Carrying addition and subtraction should use u64::carrying_add()
261 | // and u64::borrowing_sub(), but these functions are currently only
262 | // experimental.
263 | 
264 | // Add with carry; carry is 0 or 1.
265 | // (x, y, c_in) -> x + y + c_in mod 2^64, c_out
266 | 
267 | #[cfg(target_arch = "x86_64")]
268 | #[allow(dead_code)]
269 | #[inline(always)]
270 | pub fn addcarry_u64(x: u64, y: u64, c: u8) -> (u64, u8) {
271 |     use core::arch::x86_64::_addcarry_u64;
272 |     unsafe {
273 |         let mut d = 0u64;
274 |         let cc = _addcarry_u64(c, x, y, &mut d);
275 |         (d, cc)
276 |     }
277 | }
278 | 
279 | #[cfg(not(target_arch = "x86_64"))]
280 | #[allow(dead_code)]
281 | #[inline(always)]
282 | pub const fn addcarry_u64(x: u64, y: u64, c: u8) -> (u64, u8) {
283 |     let z = (x as u128).wrapping_add(y as u128).wrapping_add(c as u128);
284 |     (z as u64, (z >> 64) as u8)
285 | }
286 | 
287 | // Subtract with borrow; borrow is 0 or 1.
288 | // (x, y, c_in) -> x - y - c_in mod 2^64, c_out
289 | 
290 | #[cfg(target_arch = "x86_64")]
291 | #[allow(dead_code)]
292 | #[inline(always)]
293 | pub fn subborrow_u64(x: u64, y: u64, c: u8) -> (u64, u8) {
294 |     use core::arch::x86_64::_subborrow_u64;
295 |     unsafe {
296 |         let mut d = 0u64;
297 |         let cc = _subborrow_u64(c, x, y, &mut d);
298 |         (d, cc)
299 |     }
300 | }
301 | 
302 | #[cfg(not(target_arch = "x86_64"))]
303 | #[allow(dead_code)]
304 | #[inline(always)]
305 | pub const fn subborrow_u64(x: u64, y: u64, c: u8) -> (u64, u8) {
306 |     let z = (x as u128).wrapping_sub(y as u128).wrapping_sub(c as u128);
307 |     (z as u64, (z >> 127) as u8)
308 | }
309 | 
310 | // Compute x*y over 128 bits, returned as two 64-bit words (lo, hi)
311 | #[allow(dead_code)]
312 | #[inline(always)]
313 | pub const fn umull(x: u64, y: u64) -> (u64, u64) {
314 |     let z = (x as u128) * (y as u128);
315 |     (z as u64, (z >> 64) as u64)
316 | }
317 | 
318 | // Compute x*y+z over 128 bits, returned as two 64-bit words (lo, hi)
319 | #[allow(dead_code)]
320 | #[inline(always)]
321 | pub const fn umull_add(x: u64, y: u64, z: u64) -> (u64, u64) {
322 |     let t = ((x as u128) * (y as u128)).wrapping_add(z as u128);
323 |     (t as u64, (t >> 64) as u64)
324 | }
325 | 
326 | // Compute x*y+z1+z2 over 128 bits, returned as two 64-bit words (lo, hi)
327 | #[allow(dead_code)]
328 | #[inline(always)]
329 | pub const fn umull_add2(x: u64, y: u64, z1: u64, z2: u64) -> (u64, u64) {
330 |     let t = ((x as u128) * (y as u128))
331 |         .wrapping_add(z1 as u128).wrapping_add(z2 as u128);
332 |     (t as u64, (t >> 64) as u64)
333 | }
334 | 
335 | // Compute x1*y1+x2*y2 over 128 bits, returned as two 64-bit words (lo, hi)
336 | #[allow(dead_code)]
337 | #[inline(always)]
338 | pub const fn umull_x2(x1: u64, y1: u64, x2: u64, y2: u64) -> (u64, u64) {
339 |     let z1 = (x1 as u128) * (y1 as u128);
340 |     let z2 = (x2 as u128) * (y2 as u128);
341 |     let z = z1.wrapping_add(z2);
342 |     (z as u64, (z >> 64) as u64)
343 | }
344 | 
345 | // Compute x1*y1+x2*y2+z3 over 128 bits, returned as two 64-bit words (lo, hi)
346 | #[allow(dead_code)]
347 | #[inline(always)]
348 | pub const fn umull_x2_add(x1: u64, y1: u64, x2: u64, y2: u64, z3: u64) -> (u64, u64) {
349 |     let z1 = (x1 as u128) * (y1 as u128);
350 |     let z2 = (x2 as u128) * (y2 as u128);
351 |     let z = z1.wrapping_add(z2).wrapping_add(z3 as u128);
352 |     (z as u64, (z >> 64) as u64)
353 | }
354 | 
355 | // Return 0xFFFFFFFFFFFFFFFF if x >= 0x8000000000000000, 0 otherwise
356 | // (i.e. take the sign bit of the signed interpretation, and expand it
357 | // to 64 bits).
358 | #[allow(dead_code)]
359 | #[inline(always)]
360 | pub const fn sgnw(x: u64) -> u64 {
361 |     ((x as i64) >> 63) as u64
362 | }
363 | 
364 | // Get the number of leading zeros in a 64-bit value.
365 | // On some platforms, u64::leading_zeros() performs the computation with
366 | // a code sequence that will be constant-time on most/all CPUs
367 | // compatible with that platforms (e.g. any 64-bit x86 with support for
368 | // the LZCNT opcode); on others, a non-constant-time sequence would be
369 | // used, and we must instead rely on a safe (but slower) routine.
370 | //
371 | // On x86 without LZCNT, u64::leading_zeros() uses a BSR opcode, but since
372 | // BSR yields an undefined result on an input of value 0, u64::leading_zeros()
373 | // includes an explicit test and a conditional jump for that case, and that
374 | // is not (in general) constant-time.
375 | #[cfg(any(
376 |     all(target_arch = "x86_64", target_feature = "lzcnt"),
377 |     target_arch = "aarch64",
378 |     ))]
379 | #[allow(dead_code)]
380 | #[inline(always)]
381 | pub const fn lzcnt(x: u64) -> u32 {
382 |     x.leading_zeros()
383 | }
384 | 
385 | #[cfg(not(any(
386 |     all(target_arch = "x86_64", target_feature = "lzcnt"),
387 |     target_arch = "aarch64",
388 |     )))]
389 | #[allow(dead_code)]
390 | pub const fn lzcnt(x: u64) -> u32 {
391 |     let m = sgnw((x >> 32).wrapping_sub(1));
392 |     let s = m & 32;
393 |     let x = (x >> 32) ^ (m & (x ^ (x >> 32)));
394 | 
395 |     let m = sgnw((x >> 16).wrapping_sub(1));
396 |     let s = s | (m & 16);
397 |     let x = (x >> 16) ^ (m & (x ^ (x >> 16)));
398 | 
399 |     let m = sgnw((x >>  8).wrapping_sub(1));
400 |     let s = s | (m &  8);
401 |     let x = (x >>  8) ^ (m & (x ^ (x >>  8)));
402 | 
403 |     let m = sgnw((x >>  4).wrapping_sub(1));
404 |     let s = s | (m &  4);
405 |     let x = (x >>  4) ^ (m & (x ^ (x >>  4)));
406 | 
407 |     let m = sgnw((x >>  2).wrapping_sub(1));
408 |     let s = s | (m &  2);
409 |     let x = (x >>  2) ^ (m & (x ^ (x >>  2)));
410 | 
411 |     // At this point, x fits on 2 bits. Number of leading zeros is then:
412 |     //   x = 0   -> 2
413 |     //   x = 1   -> 1
414 |     //   x = 2   -> 0
415 |     //   x = 3   -> 0
416 |     let s = s.wrapping_add(2u64.wrapping_sub(x) & ((x.wrapping_sub(3) >> 2)));
417 | 
418 |     s as u32
419 | }
420 | 


--------------------------------------------------------------------------------
/src/backend/w64/util32.rs:
--------------------------------------------------------------------------------
  1 | // We define here the 32-bit variants of addcarry, umull,...
  2 | // They are meant to be used by code that requires 32-bit computations
  3 | // even on 64-bit platforms, because the architecture's 64-bit multiplication
  4 | // opcode is not constant-time.
  5 | 
  6 | // Carrying addition and subtraction should use u32::carrying_add()
  7 | // and u32::borrowing_sub(), but these functions are currently only
  8 | // experimental.
  9 | 
 10 | // Add with carry; carry is 0 or 1.
 11 | // (x, y, c_in) -> x + y + c_in mod 2^32, c_out
 12 | 
 13 | #[cfg(target_arch = "x86_64")]
 14 | #[allow(dead_code)]
 15 | #[inline(always)]
 16 | pub(crate) fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
 17 |     use core::arch::x86_64::_addcarry_u32;
 18 |     unsafe {
 19 |         let mut d = 0u32;
 20 |         let cc = _addcarry_u32(c, x, y, &mut d);
 21 |         (d, cc)
 22 |     }
 23 | }
 24 | 
 25 | #[cfg(not(target_arch = "x86_64"))]
 26 | #[allow(dead_code)]
 27 | #[inline(always)]
 28 | pub(crate) const fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
 29 |     let z = (x as u64).wrapping_add(y as u64).wrapping_add(c as u64);
 30 |     (z as u32, (z >> 32) as u8)
 31 | }
 32 | 
 33 | // Subtract with borrow; borrow is 0 or 1.
 34 | // (x, y, c_in) -> x - y - c_in mod 2^32, c_out
 35 | 
 36 | #[cfg(target_arch = "x86_64")]
 37 | #[allow(dead_code)]
 38 | #[inline(always)]
 39 | pub(crate) fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
 40 |     use core::arch::x86_64::_subborrow_u32;
 41 |     unsafe {
 42 |         let mut d = 0u32;
 43 |         let cc = _subborrow_u32(c, x, y, &mut d);
 44 |         (d, cc)
 45 |     }
 46 | }
 47 | 
 48 | #[cfg(not(target_arch = "x86_64"))]
 49 | #[allow(dead_code)]
 50 | #[inline(always)]
 51 | pub(crate) const fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) {
 52 |     let z = (x as u64).wrapping_sub(y as u64).wrapping_sub(c as u64);
 53 |     (z as u32, (z >> 63) as u8)
 54 | }
 55 | 
 56 | // Compute x*y over 64 bits, returned as two 32-bit words (lo, hi)
 57 | #[allow(dead_code)]
 58 | #[inline(always)]
 59 | pub(crate) const fn umull(x: u32, y: u32) -> (u32, u32) {
 60 |     let z = (x as u64) * (y as u64);
 61 |     (z as u32, (z >> 32) as u32)
 62 | }
 63 | 
 64 | // Compute x*y+z over 64 bits, returned as two 32-bit words (lo, hi)
 65 | #[allow(dead_code)]
 66 | #[inline(always)]
 67 | pub(crate) const fn umull_add(x: u32, y: u32, z: u32) -> (u32, u32) {
 68 |     let t = ((x as u64) * (y as u64)).wrapping_add(z as u64);
 69 |     (t as u32, (t >> 32) as u32)
 70 | }
 71 | 
 72 | // Compute x*y+z1+z2 over 64 bits, returned as two 32-bit words (lo, hi)
 73 | #[allow(dead_code)]
 74 | #[inline(always)]
 75 | pub(crate) const fn umull_add2(x: u32, y: u32, z1: u32, z2: u32) -> (u32, u32) {
 76 |     let t = ((x as u64) * (y as u64))
 77 |         .wrapping_add(z1 as u64).wrapping_add(z2 as u64);
 78 |     (t as u32, (t >> 32) as u32)
 79 | }
 80 | 
 81 | // Compute x1*y1+x2*y2 over 64 bits, returned as two 32-bit words (lo, hi)
 82 | #[allow(dead_code)]
 83 | #[inline(always)]
 84 | pub(crate) const fn umull_x2(x1: u32, y1: u32, x2: u32, y2: u32) -> (u32, u32) {
 85 |     let z1 = (x1 as u64) * (y1 as u64);
 86 |     let z2 = (x2 as u64) * (y2 as u64);
 87 |     let z = z1.wrapping_add(z2);
 88 |     (z as u32, (z >> 32) as u32)
 89 | }
 90 | 
 91 | // Compute x1*y1+x2*y2+z3 over 64 bits, returned as two 32-bit words (lo, hi)
 92 | #[allow(dead_code)]
 93 | #[inline(always)]
 94 | pub(crate) const fn umull_x2_add(x1: u32, y1: u32, x2: u32, y2: u32, z3: u32) -> (u32, u32) {
 95 |     let z1 = (x1 as u64) * (y1 as u64);
 96 |     let z2 = (x2 as u64) * (y2 as u64);
 97 |     let z = z1.wrapping_add(z2).wrapping_add(z3 as u64);
 98 |     (z as u32, (z >> 32) as u32)
 99 | }
100 | 
101 | // Return 0xFFFFFFFF if x >= 0x80000000, 0 otherwise (i.e. take the sign
102 | // bit of the signed interpretation, and expand it to 32 bits).
103 | #[allow(dead_code)]
104 | #[inline(always)]
105 | pub(crate) const fn sgnw(x: u32) -> u32 {
106 |     ((x as i32) >> 31) as u32
107 | }
108 | 
109 | // Get the number of leading zeros in a 32-bit value.
110 | // On some platforms, u32::leading_zeros() performs the computation with
111 | // a code sequence that will be constant-time on most/all CPUs
112 | // compatible with that platforms (e.g. any 32-bit x86 with support for
113 | // the LZCNT opcode); on others, a non-constant-time sequence would be
114 | // used, and we must instead rely on a safe (but slower) routine.
115 | //
116 | // On x86 without LZCNT, u32::leading_zeros() uses a BSR opcode, but since
117 | // BSR yields an undefined result on an input of value 0, u32::leading_zeros()
118 | // includes an explicit test and a conditional jump for that case, and that
119 | // is not (in general) constant-time.
120 | #[cfg(any(
121 |     all(target_arch = "x86_64", target_feature = "lzcnt"),
122 |     target_arch = "aarch64",
123 |     ))]
124 | #[allow(dead_code)]
125 | #[inline(always)]
126 | pub(crate) const fn lzcnt(x: u32) -> u32 {
127 |     x.leading_zeros()
128 | }
129 | 
130 | #[cfg(not(any(
131 |     all(target_arch = "x86_64", target_feature = "lzcnt"),
132 |     target_arch = "aarch64",
133 |     )))]
134 | #[allow(dead_code)]
135 | pub(crate) const fn lzcnt(x: u32) -> u32 {
136 |     let m = sgnw((x >> 16).wrapping_sub(1));
137 |     let s = m & 16;
138 |     let x = (x >> 16) ^ (m & (x ^ (x >> 16)));
139 | 
140 |     let m = sgnw((x >>  8).wrapping_sub(1));
141 |     let s = s | (m &  8);
142 |     let x = (x >>  8) ^ (m & (x ^ (x >>  8)));
143 | 
144 |     let m = sgnw((x >>  4).wrapping_sub(1));
145 |     let s = s | (m &  4);
146 |     let x = (x >>  4) ^ (m & (x ^ (x >>  4)));
147 | 
148 |     let m = sgnw((x >>  2).wrapping_sub(1));
149 |     let s = s | (m &  2);
150 |     let x = (x >>  2) ^ (m & (x ^ (x >>  2)));
151 | 
152 |     // At this point, x fits on 2 bits. Number of leading zeros is then:
153 |     //   x = 0   -> 2
154 |     //   x = 1   -> 1
155 |     //   x = 2   -> 0
156 |     //   x = 3   -> 0
157 |     let s = s.wrapping_add(2u32.wrapping_sub(x) & ((x.wrapping_sub(3) >> 2)));
158 | 
159 |     s as u32
160 | }
161 | 


--------------------------------------------------------------------------------
/src/backend/w64/zz.rs:
--------------------------------------------------------------------------------
  1 | use core::convert::TryFrom;
  2 | 
  3 | use super::{addcarry_u64, subborrow_u64, umull, umull_add, umull_add2, sgnw};
  4 | 
  5 | /// A custom 128-bit integer with some constant-time operations.
  6 | #[derive(Clone, Copy, Debug)]
  7 | pub struct Zu128([u64; 2]);
  8 | 
  9 | impl Zu128 {
 10 | 
 11 |     pub const ZERO: Self = Self([0; 2]);
 12 | 
 13 |     #[inline(always)]
 14 |     pub const fn w64le(x0: u64, x1: u64) -> Self {
 15 |         Self([ x0, x1 ])
 16 |     }
 17 | 
 18 |     #[inline(always)]
 19 |     pub fn decode(buf: &[u8]) -> Option<Self> {
 20 |         if buf.len() != 16 {
 21 |             None
 22 |         } else {
 23 |             let mut x = Self::ZERO;
 24 |             for i in 0..2 {
 25 |                 x.0[i] = u64::from_le_bytes(*<&[u8; 8]>::try_from(
 26 |                     &buf[(8 * i)..(8 * i + 8)]).unwrap());
 27 |             }
 28 |             Some(x)
 29 |         }
 30 |     }
 31 | 
 32 |     #[inline(always)]
 33 |     pub fn mul128x128(self, b: &Self) -> Zu256 {
 34 |         let (a0, a1) = (self.0[0], self.0[1]);
 35 |         let (b0, b1) = (b.0[0], b.0[1]);
 36 |         let mut d = [0u64; 4];
 37 |         let mut hi;
 38 |         (d[0], hi) = umull(a0, b0);
 39 |         (d[1], d[2]) = umull_add(a1, b0, hi);
 40 |         (d[1], hi) = umull_add(a0, b1, d[1]);
 41 |         (d[2], d[3]) = umull_add2(a1, b1, d[2], hi);
 42 |         Zu256(d)
 43 |     }
 44 | 
 45 |     #[inline(always)]
 46 |     pub fn mul128x128trunc(self, b: &Self) -> Self {
 47 |         let (a0, a1) = (self.0[0], self.0[1]);
 48 |         let (b0, b1) = (b.0[0], b.0[1]);
 49 |         let (d0, hi) = umull(a0, b0);
 50 |         let d1 = a0.wrapping_mul(b1)
 51 |             .wrapping_add(a1.wrapping_mul(b0))
 52 |             .wrapping_add(hi);
 53 |         Self([ d0, d1 ])
 54 |     }
 55 | 
 56 |     /// Interpreting this value as a signed 128-bit integer, return its
 57 |     /// absolute value (in a `u128` type) and the original sign (0xFFFFFFFF
 58 |     /// for negative, 0x00000000 for non-negative).
 59 |     #[inline(always)]
 60 |     pub fn abs(self) -> (u128, u32) {
 61 |         let (a0, a1) = (self.0[0], self.0[1]);
 62 |         let s = sgnw(a1);
 63 |         let (d0, cc) = subborrow_u64(a0 ^ s, s, 0);
 64 |         let (d1, _)  = subborrow_u64(a1 ^ s, s, cc);
 65 |         ((d0 as u128) | ((d1 as u128) << 64), s as u32)
 66 |     }
 67 | 
 68 |     /// Interpreting this value as a signed 128-bit integer `x`, return
 69 |     /// the absolute value of `2*x+1` (as a `u128` type) and the original
 70 |     /// sign (0xFFFFFFFF for negative, 0x00000000 for non-negative).
 71 |     #[inline(always)]
 72 |     pub fn double_inc_abs(self) -> (u128, u32) {
 73 |         let (a0, a1) = (self.0[0], self.0[1]);
 74 |         let s = sgnw(a1);
 75 |         let b0 = (a0 << 1) | 1;
 76 |         let b1 = (a0 >> 63) | (a1 << 1);
 77 |         let (d0, cc) = subborrow_u64(b0 ^ s, s, 0);
 78 |         let (d1, _)  = subborrow_u64(b1 ^ s, s, cc);
 79 |         ((d0 as u128) | ((d1 as u128) << 64), s as u32)
 80 |     }
 81 | 
 82 |     #[inline(always)]
 83 |     pub fn set_sub(&mut self, b: &Self) {
 84 |         let cc;
 85 |         (self.0[0], cc) = subborrow_u64(self.0[0], b.0[0], 0);
 86 |         (self.0[1], _)  = subborrow_u64(self.0[1], b.0[1], cc);
 87 |     }
 88 | 
 89 |     #[inline(always)]
 90 |     pub fn set_sub_u32(&mut self, b: u32) {
 91 |         let cc;
 92 |         (self.0[0], cc) = subborrow_u64(self.0[0], b as u64, 0);
 93 |         (self.0[1], _)  = subborrow_u64(self.0[1], 0, cc);
 94 |     }
 95 | }
 96 | 
 97 | /// A custom 256-bit integer with some constant-time operations.
 98 | #[derive(Clone, Copy, Debug)]
 99 | pub struct Zu256([u64; 4]);
100 | 
101 | impl Zu256 {
102 | 
103 |     pub const ZERO: Self = Self([0; 4]);
104 | 
105 |     #[inline(always)]
106 |     pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64) -> Self {
107 |         Self([ x0, x1, x2, x3 ])
108 |     }
109 | 
110 |     #[inline(always)]
111 |     pub fn decode(buf: &[u8]) -> Option<Self> {
112 |         if buf.len() != 32 {
113 |             None
114 |         } else {
115 |             let mut x = Self::ZERO;
116 |             for i in 0..4 {
117 |                 x.0[i] = u64::from_le_bytes(*<&[u8; 8]>::try_from(
118 |                     &buf[(8 * i)..(8 * i + 8)]).unwrap());
119 |             }
120 |             Some(x)
121 |         }
122 |     }
123 | 
124 |     #[inline(always)]
125 |     pub fn trunc128(self) -> Zu128 {
126 |         Zu128([ self.0[0], self.0[1] ])
127 |     }
128 | 
129 |     #[inline(always)]
130 |     pub fn mul256x128(self, b: &Zu128) -> Zu384 {
131 |         let (a0, a1, a2, a3) = (self.0[0], self.0[1], self.0[2], self.0[3]);
132 |         let (b0, b1) = (b.0[0], b.0[1]);
133 |         let mut d = [0u64; 6];
134 |         let mut hi;
135 |         (d[0], hi) = umull(a0, b0);
136 |         (d[1], hi) = umull_add(a1, b0, hi);
137 |         (d[2], hi) = umull_add(a2, b0, hi);
138 |         (d[3], d[4]) = umull_add(a3, b0, hi);
139 |         (d[1], hi) = umull_add(a0, b1, d[1]);
140 |         (d[2], hi) = umull_add2(a1, b1, d[2], hi);
141 |         (d[3], hi) = umull_add2(a2, b1, d[3], hi);
142 |         (d[4], d[5]) = umull_add2(a3, b1, d[4], hi);
143 |         Zu384(d)
144 |     }
145 | 
146 |     /// Return `floor((self + b)/2^224) mod 2^32` (i.e. addition truncated
147 |     /// to 256 bits, then return the high 32 bits of the 256-bit result).
148 |     #[inline(always)]
149 |     pub fn add_rsh224(self, b: &Self) -> u32 {
150 |         let mut cc;
151 |         (_, cc) = addcarry_u64(self.0[0], b.0[0], 0);
152 |         for i in 1..3 {
153 |             (_, cc) = addcarry_u64(self.0[i], b.0[i], cc);
154 |         }
155 |         let (w, _) = addcarry_u64(self.0[3], b.0[3], cc);
156 |         (w >> 32) as u32
157 |     }
158 | 
159 |     /// Return the borrow resulting from the subtraction of `b` from `self`;
160 |     /// returned value is 1 in case of borrow, 0 otherwise. The subtraction
161 |     /// result itself is discarded.
162 |     #[inline(always)]
163 |     pub fn borrow(self, b: &Self) -> u32 {
164 |         let mut cc;
165 |         (_, cc) = subborrow_u64(self.0[0], b.0[0], 0);
166 |         for i in 1..4 {
167 |             (_, cc) = subborrow_u64(self.0[i], b.0[i], cc);
168 |         }
169 |         cc as u32
170 |     }
171 | }
172 | 
173 | /// A custom 384-bit integer with some constant-time operations.
174 | #[derive(Clone, Copy, Debug)]
175 | pub struct Zu384([u64; 6]);
176 | 
177 | impl Zu384 {
178 | 
179 |     pub const ZERO: Self = Self([0; 6]);
180 | 
181 |     #[inline(always)]
182 |     pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64, x4: u64, x5: u64)
183 |         -> Self
184 |     {
185 |         Self([ x0, x1, x2, x3, x4, x5 ])
186 |     }
187 | 
188 |     #[inline(always)]
189 |     pub fn set_add(&mut self, b: &Self) {
190 |         let mut cc = 0;
191 |         for i in 0..6 {
192 |             (self.0[i], cc) = addcarry_u64(self.0[i], b.0[i], cc);
193 |         }
194 |     }
195 | 
196 |     /// Returns `self mod 2^n` and `(floor(self/2^n) + cc) mod 2^128`.
197 |     /// Shift count `n` MUST be between 225 and 255 (inclusive).
198 |     #[inline(always)]
199 |     pub fn trunc_and_rsh_cc(&mut self, b: u32, n: u32) -> (Zu256, Zu128) {
200 |         let n1 = n - 192;
201 |         let n2 = 64 - n1;
202 |         let (d0, cc) = addcarry_u64(
203 |             (self.0[3] >> n1) | (self.0[4] << n2), b as u64, 0);
204 |         let (d1, _)  = addcarry_u64(
205 |             (self.0[4] >> n1) | (self.0[5] << n2), 0, cc);
206 |         let c0 = self.0[0];
207 |         let c1 = self.0[1];
208 |         let c2 = self.0[2];
209 |         let c3 = self.0[3] & ((!0u64) >> n2);
210 |         (Zu256([ c0, c1, c2, c3 ]), Zu128([ d0, d1 ]))
211 |     }
212 | }
213 | 


--------------------------------------------------------------------------------
/src/backend/w64/zz32.rs:
--------------------------------------------------------------------------------
  1 | // This file is used for the Zu* types (used in splitting scalars for some
  2 | // GLV and GLS curves) on architectures where 32x32->64 multiplications are
  3 | // constant-time, but 64x64->128 multiplications are not (e.g. the ARM
  4 | // Cortex-A55).
  5 | 
  6 | use core::convert::TryFrom;
  7 | 
  8 | use super::util32::{addcarry_u32, subborrow_u32, umull, umull_add, umull_add2, sgnw};
  9 | 
 10 | /// A custom 128-bit integer with some constant-time operations.
 11 | #[derive(Clone, Copy, Debug)]
 12 | pub struct Zu128([u32; 4]);
 13 | 
 14 | impl Zu128 {
 15 | 
 16 |     pub const ZERO: Self = Self([0; 4]);
 17 | 
 18 |     #[inline(always)]
 19 |     pub const fn w64le(x0: u64, x1: u64) -> Self {
 20 |         Self([ x0 as u32, (x0 >> 32) as u32, x1 as u32, (x1 >> 32) as u32 ])
 21 |     }
 22 | 
 23 |     #[inline(always)]
 24 |     pub fn decode(buf: &[u8]) -> Option<Self> {
 25 |         if buf.len() != 16 {
 26 |             None
 27 |         } else {
 28 |             let mut x = Self::ZERO;
 29 |             for i in 0..4 {
 30 |                 x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from(
 31 |                     &buf[(4 * i)..(4 * i + 4)]).unwrap());
 32 |             }
 33 |             Some(x)
 34 |         }
 35 |     }
 36 | 
 37 |     #[inline(always)]
 38 |     pub fn mul128x128(self, b: &Self) -> Zu256 {
 39 |         let mut d = [0u32; 8];
 40 |         for i in 0..4 {
 41 |             let f = self.0[i];
 42 |             let mut hi = 0;
 43 |             for j in 0..4 {
 44 |                 (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi);
 45 |             }
 46 |             d[i + 4] = hi;
 47 |         }
 48 |         Zu256(d)
 49 |     }
 50 | 
 51 |     #[inline(always)]
 52 |     pub fn mul128x128trunc(self, b: &Self) -> Self {
 53 |         let f = self.0[0];
 54 |         let (d0, hi) = umull(f, b.0[0]);
 55 |         let (d1, hi) = umull_add(f, b.0[1], hi);
 56 |         let (d2, hi) = umull_add(f, b.0[2], hi);
 57 |         let d3 = f.wrapping_mul(b.0[3]).wrapping_add(hi);
 58 |         let f = self.0[1];
 59 |         let (d1, hi) = umull_add(f, b.0[0], d1);
 60 |         let (d2, hi) = umull_add2(f, b.0[1], d2, hi);
 61 |         let d3 = f.wrapping_mul(b.0[2]).wrapping_add(d3).wrapping_add(hi);
 62 |         let f = self.0[2];
 63 |         let (d2, hi) = umull_add(f, b.0[0], d2);
 64 |         let d3 = f.wrapping_mul(b.0[1]).wrapping_add(d3).wrapping_add(hi);
 65 |         let f = self.0[3];
 66 |         let d3 = f.wrapping_mul(b.0[0]).wrapping_add(d3);
 67 |         Self([ d0, d1, d2, d3 ])
 68 |     }
 69 | 
 70 |     /// Interpreting this value as a signed 128-bit integer, return its
 71 |     /// absolute value (in a `u128` type) and the original sign (0xFFFFFFFF
 72 |     /// for negative, 0x00000000 for non-negative).
 73 |     #[inline(always)]
 74 |     pub fn abs(self) -> (u128, u32) {
 75 |         let s = sgnw(self.0[3]);
 76 |         let (d0, cc) = subborrow_u32(self.0[0] ^ s, s, 0);
 77 |         let (d1, cc) = subborrow_u32(self.0[1] ^ s, s, cc);
 78 |         let (d2, cc) = subborrow_u32(self.0[2] ^ s, s, cc);
 79 |         let (d3, _)  = subborrow_u32(self.0[3] ^ s, s, cc);
 80 |         ((d0 as u128) | ((d1 as u128) << 32)
 81 |          | ((d2 as u128) << 64) | ((d3 as u128) << 96), s)
 82 |     }
 83 | 
 84 |     /// Interpreting this value as a signed 128-bit integer `x`, return
 85 |     /// the absolute value of `2*x+1` (as a `u128` type) and the original
 86 |     /// sign (0xFFFFFFFF for negative, 0x00000000 for non-negative).
 87 |     #[inline(always)]
 88 |     pub fn double_inc_abs(self) -> (u128, u32) {
 89 |         let s = sgnw(self.0[3]);
 90 |         let b0 = (self.0[0] << 1) | 1;
 91 |         let b1 = (self.0[0] >> 31) | (self.0[1] << 1);
 92 |         let b2 = (self.0[1] >> 31) | (self.0[2] << 1);
 93 |         let b3 = (self.0[2] >> 31) | (self.0[3] << 1);
 94 |         let (d0, cc) = subborrow_u32(b0 ^ s, s, 0);
 95 |         let (d1, cc) = subborrow_u32(b1 ^ s, s, cc);
 96 |         let (d2, cc) = subborrow_u32(b2 ^ s, s, cc);
 97 |         let (d3, _)  = subborrow_u32(b3 ^ s, s, cc);
 98 |         ((d0 as u128) | ((d1 as u128) << 32)
 99 |          | ((d2 as u128) << 64) | ((d3 as u128) << 96), s)
100 |     }
101 | 
102 |     #[inline(always)]
103 |     pub fn set_sub(&mut self, b: &Self) {
104 |         let mut cc = 0;
105 |         for i in 0..4 {
106 |             (self.0[i], cc) = subborrow_u32(self.0[i], b.0[i], cc);
107 |         }
108 |     }
109 | 
110 |     #[inline(always)]
111 |     pub fn set_sub_u32(&mut self, b: u32) {
112 |         let mut cc;
113 |         (self.0[0], cc) = subborrow_u32(self.0[0], b, 0);
114 |         for i in 1..4 {
115 |             (self.0[i], cc) = subborrow_u32(self.0[i], 0, cc);
116 |         }
117 |     }
118 | }
119 | 
120 | /// A custom 256-bit integer with some constant-time operations.
121 | #[derive(Clone, Copy, Debug)]
122 | pub struct Zu256([u32; 8]);
123 | 
124 | impl Zu256 {
125 | 
126 |     pub const ZERO: Self = Self([0; 8]);
127 | 
128 |     #[inline(always)]
129 |     pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64) -> Self {
130 |         Self([
131 |             x0 as u32, (x0 >> 32) as u32,
132 |             x1 as u32, (x1 >> 32) as u32,
133 |             x2 as u32, (x2 >> 32) as u32,
134 |             x3 as u32, (x3 >> 32) as u32,
135 |         ])
136 |     }
137 | 
138 |     #[inline(always)]
139 |     pub fn decode(buf: &[u8]) -> Option<Self> {
140 |         if buf.len() != 32 {
141 |             None
142 |         } else {
143 |             let mut x = Self::ZERO;
144 |             for i in 0..8 {
145 |                 x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from(
146 |                     &buf[(4 * i)..(4 * i + 4)]).unwrap());
147 |             }
148 |             Some(x)
149 |         }
150 |     }
151 | 
152 |     #[inline(always)]
153 |     pub fn trunc128(self) -> Zu128 {
154 |         Zu128([ self.0[0], self.0[1], self.0[2], self.0[3] ])
155 |     }
156 | 
157 |     #[inline(always)]
158 |     pub fn mul256x128(self, b: &Zu128) -> Zu384 {
159 |         let mut d = [0u32; 12];
160 |         for i in 0..8 {
161 |             let f = self.0[i];
162 |             let mut hi = 0;
163 |             for j in 0..4 {
164 |                 (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi);
165 |             }
166 |             d[i + 4] = hi;
167 |         }
168 |         Zu384(d)
169 |     }
170 | 
171 |     /// Return `floor((self + b)/2^224) mod 2^32` (i.e. addition truncated
172 |     /// to 256 bits, then return the high 32 bits of the 256-bit result).
173 |     #[inline(always)]
174 |     pub fn add_rsh224(self, b: &Self) -> u32 {
175 |         let mut cc;
176 |         (_, cc) = addcarry_u32(self.0[0], b.0[0], 0);
177 |         for i in 1..7 {
178 |             (_, cc) = addcarry_u32(self.0[i], b.0[i], cc);
179 |         }
180 |         let (w, _) = addcarry_u32(self.0[7], b.0[7], cc);
181 |         w
182 |     }
183 | 
184 |     /// Return the borrow resulting from the subtraction of `b` from `self`;
185 |     /// returned value is 1 in case of borrow, 0 otherwise. The subtraction
186 |     /// result itself is discarded.
187 |     #[inline(always)]
188 |     pub fn borrow(self, b: &Self) -> u32 {
189 |         let mut cc;
190 |         (_, cc) = subborrow_u32(self.0[0], b.0[0], 0);
191 |         for i in 1..8 {
192 |             (_, cc) = subborrow_u32(self.0[i], b.0[i], cc);
193 |         }
194 |         cc as u32
195 |     }
196 | }
197 | 
198 | /// A custom 384-bit integer with some constant-time operations.
199 | #[derive(Clone, Copy, Debug)]
200 | pub struct Zu384([u32; 12]);
201 | 
202 | impl Zu384 {
203 | 
204 |     pub const ZERO: Self = Self([0; 12]);
205 | 
206 |     #[inline(always)]
207 |     pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64, x4: u64, x5: u64)
208 |         -> Self
209 |     {
210 |         Self([
211 |             x0 as u32, (x0 >> 32) as u32,
212 |             x1 as u32, (x1 >> 32) as u32,
213 |             x2 as u32, (x2 >> 32) as u32,
214 |             x3 as u32, (x3 >> 32) as u32,
215 |             x4 as u32, (x4 >> 32) as u32,
216 |             x5 as u32, (x5 >> 32) as u32,
217 |         ])
218 |     }
219 | 
220 |     #[inline(always)]
221 |     pub fn set_add(&mut self, b: &Self) {
222 |         let mut cc = 0;
223 |         for i in 0..12 {
224 |             (self.0[i], cc) = addcarry_u32(self.0[i], b.0[i], cc);
225 |         }
226 |     }
227 | 
228 |     /// Returns `self mod 2^n` and `(floor(self/2^n) + cc) mod 2^128`.
229 |     /// Shift count `n` MUST be between 225 and 255 (inclusive).
230 |     #[inline(always)]
231 |     pub fn trunc_and_rsh_cc(&mut self, b: u32, n: u32) -> (Zu256, Zu128) {
232 |         let n1 = n - 224;
233 |         let n2 = 32 - n1;
234 |         let (d0, cc) = addcarry_u32(
235 |             (self.0[7] >> n1) | (self.0[8] << n2), b, 0);
236 |         let (d1, cc) = addcarry_u32(
237 |             (self.0[8] >> n1) | (self.0[9] << n2), 0, cc);
238 |         let (d2, cc) = addcarry_u32(
239 |             (self.0[9] >> n1) | (self.0[10] << n2), 0, cc);
240 |         let (d3, _)  = addcarry_u32(
241 |             (self.0[10] >> n1) | (self.0[11] << n2), 0, cc);
242 |         let mut e = [0u32; 8];
243 |         e[..].copy_from_slice(&self.0[..8]);
244 |         e[7] &= (!0u32) >> n2;
245 |         (Zu256(e), Zu128([ d0, d1, d2, d3 ]))
246 |     }
247 | }
248 | 


--------------------------------------------------------------------------------
/src/field.rs:
--------------------------------------------------------------------------------
 1 | //! Finite fields.
 2 | //!
 3 | //! This module defines a few specific finite fields, used as base fields
 4 | //! by various curves. These are merely specializations of the
 5 | //! backend-provided `GF255` and `ModInt256` types.
 6 | 
 7 | #[cfg(feature = "gf255e")]
 8 | pub use crate::backend::GF255e;
 9 | 
10 | #[cfg(feature = "gf255s")]
11 | pub use crate::backend::GF255s;
12 | 
13 | #[cfg(feature = "gf25519")]
14 | pub use crate::backend::GF25519;
15 | 
16 | #[cfg(feature = "modint256")]
17 | pub use crate::backend::ModInt256;
18 | 
19 | #[cfg(feature = "modint256")]
20 | pub use crate::backend::ModInt256ct;
21 | 
22 | #[cfg(feature = "gfsecp256k1")]
23 | pub use crate::backend::GFsecp256k1;
24 | 
25 | #[cfg(feature = "gfp256")]
26 | pub use crate::backend::GFp256;
27 | 
28 | #[cfg(feature = "gf448")]
29 | pub use crate::backend::GF448;
30 | 
31 | #[cfg(feature = "gfb254")]
32 | pub use crate::backend::{GFb127, GFb254};
33 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Crrl is a Rust library for cryptographic research.
  2 | //!
  3 | //! This library implements computations in some finite fields and
  4 | //! elliptic curves. It aims at providing efficient and secure
  5 | //! (constant-time) implementations, but with portable code, and with a
  6 | //! convenient API so that scalars, curve points, and other field
  7 | //! elements may be used in straightforward expressions with normal
  8 | //! arihtmetic operators.
  9 | //!
 10 | //! Finite fields are implemented through some customizable types defined
 11 | //! in `backend` (a 32-bit and a 64-bit backends are provided, the "right
 12 | //! one" is automatically selected, unless overridden by a compile-time
 13 | //! feature). The types may support several distinct moduli, chosen
 14 | //! through compile-time type parameter.
 15 | //!
 16 | //! Curve edwards25519 is implemented in the `ed25519` module. The
 17 | //! specialized X25519 function is in `x25519`. The prime-order group
 18 | //! ristretto255 (internally based on edwards25519) is defined in the
 19 | //! `ristretto255` module. NIST curve P-256 (aka "secp256r1" and
 20 | //! "prime256v1") is implemented in the `p256` module (with the ECDSA
 21 | //! signature algorithm). Double-odd curves jq255e and jq255s are
 22 | //! implemented by `jq255e` and `jq255s`, respectively (including
 23 | //! signature and key exchange schemes). Secp256k1 is implemented in
 24 | //! `secp256k1`. Edwards448 is in `ed448`, while the specialized X448
 25 | //! function is in `x448`. The prime-order decaf448 group is implemented
 26 | //! in `decaf448`.
 27 | //!
 28 | //! # Usage
 29 | //!
 30 | //! The library is "mostly `no_std`". By default, it compiles against the
 31 | //! standard library. It can be compiled in `no_std` mode, in which case
 32 | //! all functionality is still available, except verification of truncated
 33 | //! ECDSA signatures with curve P-256.
 34 | //!
 35 | //! # Conventions
 36 | //!
 37 | //! All implemented functions should be strictly constant-time, unless
 38 | //! explicitly documented otherwise (non-constant-time functions normally
 39 | //! have "vartime" in their name). In order to avoid unwanted side-channel
 40 | //! leaks, Booleans are avoided (compilers tend to "optimize" things a bit
 41 | //! too eagerly when handling `bool` values). All functions that return or
 42 | //! use a potentially secret Boolean value use the `u32` type; the convention
 43 | //! is that 0xFFFFFFFF means "true", and 0x00000000 means "false". No other
 44 | //! value shall be used, for they would lead to unpredictable results.
 45 | //! Similarly, the `Eq` or `PartialEq` traits are not implemented.
 46 | //!
 47 | //! Algebraic operations on field elements and curve points are performed
 48 | //! with the usual operators (e.g. `+`); appropriate traits are defined
 49 | //! so that structure types and pointers to structure types can be used
 50 | //! more or less interchangeably. Throughout the code, functions that
 51 | //! modify the object on which they are called tend to have a name in
 52 | //! `set_*()` (e.g. for a curve point `P`, if we want to compute the
 53 | //! double of that point, then `P.set_double()` modifies the point
 54 | //! structure in place, while `P.double()` leaves `P` unmodified and
 55 | //! returns the double as a new structure instance).
 56 | //!
 57 | //! # Truncated Signatures
 58 | //!
 59 | //! Apart from standard support for curve operations and signature
 60 | //! algorithms, _truncated signatures_ are implemented for both Ed25519
 61 | //! (Schnorr signatures over edwards25519) and ECDSA (over P-256). A
 62 | //! truncated signature is a shrunk version, by up to 32 bits, of a
 63 | //! normal signature; the verification process is then more expensive,
 64 | //! though not necessarily intolerably expensive, depending on usage
 65 | //! context (the most expensive verification function is for ECDSA on
 66 | //! P-256, with maximal 32-bit truncation; in that case, verification
 67 | //! cost can be up to about 0.65 seconds on a 500 MHz ARM Cortex A53; but
 68 | //! Ed25519 signatures with 32-bit truncation can be verified in less
 69 | //! than 0.05 seconds on the same hardware). Signature truncation can be
 70 | //! useful in situations with strong I/O constraints, where every data
 71 | //! bit counts, but where use of fully standard Ed25519 or ECDSA
 72 | //! signature generators is made mandatory because of some regulatory or
 73 | //! physical constraints of the signing hardware.
 74 | //! 
 75 | //! # Performance
 76 | //!
 77 | //! On an Intel i5-8259U CPU (Coffee Lake core), Ed25519 signatures have
 78 | //! been benchmarked at about 51600 cycles for signing, 111000 cycles for
 79 | //! verification; these are not bad values, and are competitive or at
 80 | //! least within 30% of performance obtained from assembly-optimized
 81 | //! implementations on the same hardware. For P-256, signing time is
 82 | //! about 125000 cycles, verification is 256000 cycles. For the jq255e
 83 | //! curve, signatures are generated in about 54700 cycles, and verified
 84 | //! in only 82800 cycles (56200 and 86800, respectively, for jq255s).
 85 | //! These figures have been obtained by compiling with Rust 1.59 in
 86 | //! release mode, with the flags `-C target-cpu=native`.
 87 | //!
 88 | //! On an ARM Cortex A53 (RaspberryPi Model 3B), Ed25519 signing was
 89 | //! measured at 213000 cycles, verification at 479000 cycles; for P-256,
 90 | //! the figures were 389000 and 991000, respectively. With jq255e,
 91 | //! signature generation and verification use 241000 and 358000 cycles,
 92 | //! respectively (248000 and 369000 for jq255s).
 93 | //!
 94 | //! No inline assembly is used. On x86-64 architectures, the
 95 | //! `_addcarry_u64()` and `_subborrow_u64()` intrinsics are used
 96 | //! (from `core::arch::x86_64`); however, plain implementations with
 97 | //! no intrinsics are available (and used on, for instance, aarch64).
 98 | 
 99 | #![no_std]
100 | 
101 | #[cfg(all(feature = "alloc", not(feature = "std")))]
102 | #[macro_use]
103 | #[allow(unused_imports)]
104 | extern crate alloc;
105 | 
106 | #[cfg(feature = "std")]
107 | #[macro_use]
108 | #[allow(unused_imports)]
109 | extern crate std;
110 | 
111 | #[cfg(all(feature = "alloc", not(feature = "std")))]
112 | #[allow(unused_imports)]
113 | pub(crate) use alloc::vec::Vec;
114 | 
115 | #[cfg(feature = "std")]
116 | #[allow(unused_imports)]
117 | pub(crate) use std::vec::Vec;
118 | 
119 | /// The `rand_core` types are re-exported so that users of crrl do not
120 | /// have to worry about using the exact correct version of `rand_core`.
121 | pub use rand_core::{CryptoRng, RngCore, Error as RngError};
122 | 
123 | #[allow(unused_macros)]
124 | macro_rules! static_assert {
125 |     ($condition:expr) => {
126 |         let _ = &[()][1 - ($condition) as usize];
127 |     }
128 | }
129 | 
130 | pub mod backend;
131 | pub mod field;
132 | 
133 | pub use backend::{Zu128, Zu256, Zu384};
134 | 
135 | #[cfg(feature = "ed25519")]
136 | pub mod ed25519;
137 | 
138 | #[cfg(feature = "x25519")]
139 | pub mod x25519;
140 | 
141 | #[cfg(feature = "ristretto255")]
142 | pub mod ristretto255;
143 | 
144 | #[cfg(feature = "jq255e")]
145 | pub mod jq255e;
146 | 
147 | #[cfg(feature = "jq255s")]
148 | pub mod jq255s;
149 | 
150 | #[cfg(feature = "p256")]
151 | pub mod p256;
152 | 
153 | #[cfg(feature = "secp256k1")]
154 | pub mod secp256k1;
155 | 
156 | #[cfg(feature = "gls254")]
157 | pub mod gls254;
158 | 
159 | #[cfg(feature = "ed448")]
160 | pub mod ed448;
161 | 
162 | #[cfg(feature = "x448")]
163 | pub mod x448;
164 | 
165 | #[cfg(feature = "decaf448")]
166 | pub mod decaf448;
167 | 
168 | #[cfg(all(feature = "alloc", feature = "frost"))]
169 | pub mod frost;
170 | 
171 | #[cfg(feature = "lms")]
172 | pub mod lms;
173 | 
174 | #[cfg(feature = "blake2s")]
175 | pub mod blake2s;
176 | 
177 | pub mod sha2;
178 | pub mod sha3;
179 | 


--------------------------------------------------------------------------------
/src/x25519.rs:
--------------------------------------------------------------------------------
  1 | //! X25519 key-exchange algorithm.
  2 | //!
  3 | //! This module implements the X25519 primitive, as defined by [RFC
  4 | //! 7748]. The primitive takes as input two 32-byte values, the first
  5 | //! being the representation of a point on Curve25519 (a Montgomery
  6 | //! curve) or on the quadratic twist of Curve25519, and the second being
  7 | //! a scalar (a big integer). The scalar is internally "clamped" (some
  8 | //! bits are set to specific values), then the point is multiplied by the
  9 | //! scalar, and the output point is reencoded into 32 bytes.
 10 | //!
 11 | //! The `x25519()` function implements exactly the process described in
 12 | //! RFC 7748 (section 5). The `x25519_base()` function is an optimization
 13 | //! of the specific case of the input point being the conventional
 14 | //! generator point on Curve25519; `x25519_base()` is fully compatible
 15 | //! with `x25519()`, but also substantially faster.
 16 | //!
 17 | //! The `x25519()` function does NOT filter out any value from its input;
 18 | //! any input sequence of 32 bytes is accepted, even if it encodes a
 19 | //! low-order curve point. As per RFC 7748 requirements, the top point
 20 | //! bit (most significant bit of the last byte) is ignored. As for
 21 | //! scalars, the clamping process ensures that the integer used for the
 22 | //! multiplication is a multiple of 8, at least 2^254, and lower than
 23 | //! 2^255; the three least significant bits of the first byte, and two
 24 | //! most significant bits of the last byte, are ignored.
 25 | //!
 26 | //! [RFC 7748]: https://datatracker.ietf.org/doc/html/rfc7748
 27 | 
 28 | // Projective/fractional coordinates traditionally use uppercase letters,
 29 | // using lowercase only for affine coordinates.
 30 | #![allow(non_snake_case)]
 31 | 
 32 | use super::field::GF25519;
 33 | use super::ed25519::{Point, Scalar};
 34 | 
 35 | /// X25519 function (from RFC 7748), general case.
 36 | ///
 37 | /// The source point is provided as an array of 32 bytes (`point`), as
 38 | /// well as the scalar (`scalar`). In RFC 7748 terminology, the `point`
 39 | /// parameter is the little-endian encoding of the u coordinate of a
 40 | /// point on the Montgomery curve or on its quadratic twist, and the
 41 | /// `scalar` parameter is the little-endian encoding of the scalar. The
 42 | /// function "clamps" the scalar (bits 0, 1, 2 and 255 are cleared, bit
 43 | /// 254 is set) then interprets the clamped scalar as an integer
 44 | /// (little-endian convention), with which the provided curve point is
 45 | /// multiplied; the u coordinate of the resulting point is then encoded
 46 | /// and returned.
 47 | pub fn x25519(point: &[u8; 32], scalar: &[u8; 32]) -> [u8; 32] {
 48 |     // Make clamped scalar.
 49 |     let mut s = *scalar;
 50 |     s[0] &= 248;
 51 |     s[31] &= 127;
 52 |     s[31] |= 64;
 53 | 
 54 |     // Decode the source point. As per RFC 7748 rules, the top bit is
 55 |     // ignored, and non-canonical values are acceptable.
 56 |     let mut u = *point;
 57 |     u[31] &= 127;
 58 |     let x1 = GF25519::decode_reduce(&u[..]);
 59 | 
 60 |     // Apply the RFC 7748 section 5 algorithm.
 61 |     let mut x2 = GF25519::ONE;
 62 |     let mut z2 = GF25519::ZERO;
 63 |     let mut x3 = x1;
 64 |     let mut z3 = GF25519::ONE;
 65 |     let mut swap = 0u32;
 66 | 
 67 |     for t in (0..255).rev() {
 68 |         let kt = (((s[t >> 3] >> (t & 7)) & 1) as u32).wrapping_neg();
 69 |         swap ^= kt;
 70 |         GF25519::cswap(&mut x2, &mut x3, swap);
 71 |         GF25519::cswap(&mut z2, &mut z3, swap);
 72 |         swap = kt;
 73 | 
 74 |         let A = x2 + z2;
 75 |         let B = x2 - z2;
 76 |         let AA = A.square();
 77 |         let BB = B.square();
 78 |         let C = x3 + z3;
 79 |         let D = x3 - z3;
 80 |         let E = AA - BB;
 81 |         let DA = D * A;
 82 |         let CB = C * B;
 83 |         x3 = (DA + CB).square();
 84 |         z3 = x1 * (DA - CB).square();
 85 |         x2 = AA * BB;
 86 |         z2 = E * (AA + E.mul_small(121665));
 87 |     }
 88 |     GF25519::cswap(&mut x2, &mut x3, swap);
 89 |     GF25519::cswap(&mut z2, &mut z3, swap);
 90 | 
 91 |     (x2 / z2).encode()
 92 | }
 93 | 
 94 | /// Specialized version of X25519, when applied to the conventional
 95 | /// generator point (u = 9).
 96 | ///
 97 | /// See `x25519()` for details. This function is significantly faster than
 98 | /// the general `x25519()` function.
 99 | pub fn x25519_base(scalar: &[u8; 32]) -> [u8; 32] {
100 |     // Make clamped scalar, and decode it as an integer modulo L.
101 |     let mut sb = *scalar;
102 |     sb[0] &= 248;
103 |     sb[31] &= 127;
104 |     sb[31] |= 64;
105 |     let s = Scalar::decode_reduce(&sb[..]);
106 | 
107 |     // Perform the multiplication on the Edwards curve.
108 |     let P = Point::mulgen(&s);
109 | 
110 |     // Apply the birational map to get the Montgomery point (u coordinate
111 |     // only). When the point is the neutral, we want to return 0.
112 |     let u = P.to_montgomery_u();
113 |     u.encode()
114 | }
115 | 
116 | // ========================================================================
117 | 
118 | #[cfg(test)]
119 | mod tests {
120 | 
121 |     use super::{x25519, x25519_base};
122 |     use crate::sha2::Sha256;
123 | 
124 |     #[test]
125 |     fn x25519_mc() {
126 |         let mut k = [0u8; 32];
127 |         k[0] = 9;
128 |         let mut u = k;
129 |         let mut ref1 = [0u8; 32];
130 |         hex::decode_to_slice("422c8e7a6227d7bca1350b3e2bb7279f7897b87bb6854b783c60e80311ae3079", &mut ref1[..]).unwrap();
131 |         let mut ref1000 = [0u8; 32];
132 |         hex::decode_to_slice("684cf59ba83309552800ef566f2f4d3c1c3887c49360e3875f2eb94d99532c51", &mut ref1000[..]).unwrap();
133 |         for i in 0..1000 {
134 |             let old_k = k;
135 |             k = x25519(&u, &k);
136 |             u = old_k;
137 |             if i == 0 {
138 |                 assert!(k == ref1);
139 |             }
140 |         }
141 |         assert!(k == ref1000);
142 |     }
143 | 
144 |     #[test]
145 |     fn x25519_basepoint() {
146 |         let mut sh = Sha256::new();
147 |         let mut b = [0u8; 32];
148 |         b[0] = 9;
149 |         for i in 0..20 {
150 |             sh.update(&(i as u64).to_le_bytes());
151 |             let v = sh.finalize_reset();
152 |             let mut k = [0u8; 32];
153 |             k[..].copy_from_slice(&v);
154 |             assert!(x25519(&b, &k) == x25519_base(&k));
155 |         }
156 |     }
157 | }
158 | 


--------------------------------------------------------------------------------
/src/x448.rs:
--------------------------------------------------------------------------------
  1 | //! X448 key-exchange algorithm.
  2 | //!
  3 | //! This module implements the X448 primitive, as defined by [RFC 7748].
  4 | //! The primitive takes as input two 56-byte values, the first
  5 | //! being the representation of a point on Curve448 (a Montgomery
  6 | //! curve) or on the quadratic twist of Curve448, and the second being
  7 | //! a scalar (a big integer). The scalar is internally "clamped" (some
  8 | //! bits are set to specific values), then the point is multiplied by the
  9 | //! scalar, and the output point is reencoded into 56 bytes.
 10 | //!
 11 | //! The `x448()` function implements exactly the process described in
 12 | //! RFC 7748 (section 5). The `x448_base()` function is an optimization
 13 | //! of the specific case of the input point being the conventional
 14 | //! generator point on Curve448; `x448_base()` is fully compatible
 15 | //! with `x448()`, but also substantially faster.
 16 | //!
 17 | //! The `x448()` function does NOT filter out any value from its input;
 18 | //! any input sequence of 56 bytes is accepted, even if it encodes a
 19 | //! low-order curve point. As per RFC 7748 requirements, the top point
 20 | //! bit (most significant bit of the last byte) is ignored. As for
 21 | //! scalars, the clamping process ensures that the integer used for the
 22 | //! multiplication is a multiple of 4, at least 2^447, and lower than
 23 | //! 2^448; the two least significant bits of the first byte, and the
 24 | //! most significant bit of the last byte, are ignored.
 25 | //!
 26 | //! [RFC 7748]: https://datatracker.ietf.org/doc/html/rfc7748
 27 | 
 28 | // Projective/fractional coordinates traditionally use uppercase letters,
 29 | // using lowercase only for affine coordinates.
 30 | #![allow(non_snake_case)]
 31 | 
 32 | use super::field::GF448;
 33 | use super::ed448::{Point, Scalar};
 34 | 
 35 | /// X448 function (from RFC 7748), general case.
 36 | ///
 37 | /// The source point is provided as an array of 56 bytes (`point`), as
 38 | /// well as the scalar (`scalar`). In RFC 7748 terminology, the `point`
 39 | /// parameter is the little-endian encoding of the u coordinate of a
 40 | /// point on the Montgomery curve or on its quadratic twist, and the
 41 | /// `scalar` parameter is the little-endian encoding of the scalar. The
 42 | /// function "clamps" the scalar (bits 0 and 1 are cleared, bit 447 is
 43 | /// set) then interprets the clamped scalar as an integer (little-endian
 44 | /// convention), with which the provided curve point is multiplied; the u
 45 | /// coordinate of the resulting point is then encoded and returned.
 46 | pub fn x448(point: &[u8; 56], scalar: &[u8; 56]) -> [u8; 56] {
 47 |     // Make clamped scalar.
 48 |     let mut s = *scalar;
 49 |     s[0] &= 252;
 50 |     s[55] |= 128;
 51 | 
 52 |     // Decode the source point. As per RFC 7748 rules, non-canonical
 53 |     // values are acceptable.
 54 |     let x1 = GF448::decode_reduce(point);
 55 | 
 56 |     // Apply the RFC 7748 section 5 algorithm.
 57 |     let mut x2 = GF448::ONE;
 58 |     let mut z2 = GF448::ZERO;
 59 |     let mut x3 = x1;
 60 |     let mut z3 = GF448::ONE;
 61 |     let mut swap = 0u32;
 62 | 
 63 |     for t in (0..448).rev() {
 64 |         let kt = (((s[t >> 3] >> (t & 7)) & 1) as u32).wrapping_neg();
 65 |         swap ^= kt;
 66 |         GF448::cswap(&mut x2, &mut x3, swap);
 67 |         GF448::cswap(&mut z2, &mut z3, swap);
 68 |         swap = kt;
 69 | 
 70 |         let A = x2 + z2;
 71 |         let B = x2 - z2;
 72 |         let AA = A.square();
 73 |         let BB = B.square();
 74 |         let C = x3 + z3;
 75 |         let D = x3 - z3;
 76 |         let E = AA - BB;
 77 |         let DA = D * A;
 78 |         let CB = C * B;
 79 |         x3 = (DA + CB).square();
 80 |         z3 = x1 * (DA - CB).square();
 81 |         x2 = AA * BB;
 82 |         z2 = E * (AA + E.mul_small(39081));
 83 |     }
 84 |     GF448::cswap(&mut x2, &mut x3, swap);
 85 |     GF448::cswap(&mut z2, &mut z3, swap);
 86 | 
 87 |     (x2 / z2).encode()
 88 | }
 89 | 
 90 | /// Specialized version of X448, when applied to the conventional
 91 | /// generator point (u = 9).
 92 | ///
 93 | /// See `x448()` for details. This function is significantly faster than
 94 | /// the general `x448()` function.
 95 | pub fn x448_base(scalar: &[u8; 56]) -> [u8; 56] {
 96 |     // Make clamped scalar, and decode it as an integer modulo L.
 97 |     let mut sb = *scalar;
 98 |     sb[0] &= 252;
 99 |     sb[55] |= 128;
100 |     let s = Scalar::decode_reduce(&sb[..]);
101 | 
102 |     // Perform the multiplication on the Edwards curve.
103 |     let P = Point::mulgen(&s);
104 | 
105 |     // Apply the birational map to get the Montgomery point (u coordinate
106 |     // only). When the point is the neutral, we want to return 0.
107 |     let u = P.to_montgomery_u();
108 |     u.encode()
109 | }
110 | 
111 | // ========================================================================
112 | 
113 | #[cfg(test)]
114 | mod tests {
115 | 
116 |     use super::{x448, x448_base};
117 |     use crate::sha2::Sha512;
118 | 
119 |     #[test]
120 |     fn x448_mc() {
121 |         let mut k = [0u8; 56];
122 |         k[0] = 5;
123 |         let mut u = k;
124 |         let mut ref1 = [0u8; 56];
125 |         hex::decode_to_slice("3f482c8a9f19b01e6c46ee9711d9dc14fd4bf67af30765c2ae2b846a4d23a8cd0db897086239492caf350b51f833868b9bc2b3bca9cf4113", &mut ref1[..]).unwrap();
126 |         let mut ref1000 = [0u8; 56];
127 |         hex::decode_to_slice("aa3b4749d55b9daf1e5b00288826c467274ce3ebbdd5c17b975e09d4af6c67cf10d087202db88286e2b79fceea3ec353ef54faa26e219f38", &mut ref1000[..]).unwrap();
128 |         for i in 0..1000 {
129 |             let old_k = k;
130 |             k = x448(&u, &k);
131 |             u = old_k;
132 |             if i == 0 {
133 |                 assert!(k == ref1);
134 |             }
135 |         }
136 |         assert!(k == ref1000);
137 |     }
138 | 
139 |     #[test]
140 |     fn x448_basepoint() {
141 |         let mut sh = Sha512::new();
142 |         let mut b = [0u8; 56];
143 |         b[0] = 5;
144 |         for i in 0..20 {
145 |             sh.update(&(i as u64).to_le_bytes());
146 |             let v = sh.finalize_reset();
147 |             let mut k = [0u8; 56];
148 |             k[..].copy_from_slice(&v[..56]);
149 |             assert!(x448(&b, &k) == x448_base(&k));
150 |         }
151 |     }
152 | }
153 | 


--------------------------------------------------------------------------------