├── .DS_Store
├── .gitignore
├── .gitmodules
├── README.md
├── bls
    ├── Cargo.lock
    ├── Cargo.toml
    ├── bls-embedded.h
    ├── cbindgen.toml
    └── src
    │   ├── bls
    │       ├── keys.rs
    │       └── mod.rs
    │   ├── build.rs
    │   ├── error.rs
    │   └── lib.rs
└── bls12_377
    ├── .DS_Store
    ├── .gitignore
    ├── COPYRIGHT
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── Makefile
    ├── benches
        └── groups.rs
    ├── rust-toolchain
    └── src
        ├── fp.rs
        ├── fp2.rs
        ├── fp_asm.S
        ├── fp_mont.cpp
        ├── fp_mont.h
        ├── fpc.cpp
        ├── fq_asm.s
        ├── g1.rs
        ├── g2.rs
        ├── lib.rs
        ├── scalar.rs
        └── util.rs


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/celo-org/bls-embedded/931ab609987daa4b2d6ca68edf7aab077039f553/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/*.rs.bk
2 | *.swp
3 | go/cmd/example/example
4 | Cargo.lock
5 | target
6 | testproj
7 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/celo-org/bls-embedded/931ab609987daa4b2d6ca68edf7aab077039f553/.gitmodules


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # bls-embedded
 2 | 
 3 | Install Rust:
 4 | 
 5 | `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`
 6 | 
 7 | Run with:
 8 | ```
 9 | cd bls12_377
10 | cargo build
11 | cargo test
12 | cargo bench
13 | cargo bench -- Fp_m
14 | cargo bench -- G2Projective_s
15 | ```
16 | 


--------------------------------------------------------------------------------
/bls/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | [[package]]
 4 | name = "bls12_377"
 5 | version = "0.0.0"
 6 | dependencies = [
 7 |  "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 8 |  "subtle 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 9 | ]
10 | 
11 | [[package]]
12 | name = "bls_embedded"
13 | version = "0.1.0"
14 | dependencies = [
15 |  "bls12_377 0.0.0",
16 |  "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
17 |  "subtle 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
18 | ]
19 | 
20 | [[package]]
21 | name = "byteorder"
22 | version = "1.3.2"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | 
25 | [[package]]
26 | name = "libc"
27 | version = "0.2.65"
28 | source = "registry+https://github.com/rust-lang/crates.io-index"
29 | 
30 | [[package]]
31 | name = "subtle"
32 | version = "2.1.1"
33 | source = "registry+https://github.com/rust-lang/crates.io-index"
34 | 
35 | [metadata]
36 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
37 | "checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8"
38 | "checksum subtle 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "01f40907d9ffc762709e4ff3eb4a6f6b41b650375a3f09ac92b641942b7fb082"
39 | 


--------------------------------------------------------------------------------
/bls/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "bls_embedded"
 3 | version = "0.1.0"
 4 | authors = ["Michael Straka <mstraka@celo.org>"]
 5 | edition = "2018"
 6 | build = "src/build.rs"
 7 | 
 8 | [dependencies]
 9 | bls12_377 = { path = "../bls12_377" }
10 | libc = "0.2"
11 | 
12 | [dependencies.subtle]
13 | version = "2.1"
14 | default-features = false
15 | 
16 | [lib]
17 | name = "bls_embedded"
18 | path = "src/lib.rs"
19 | crate-type = ["staticlib"]
20 | opt-level = "s"
21 | 
22 | [features]
23 | gen_header = []
24 | 
25 | [profile.dev]
26 | panic= "abort"
27 | 
28 | [profile.release]
29 | opt-level = 3
30 | panic= "abort"
31 | 


--------------------------------------------------------------------------------
/bls/bls-embedded.h:
--------------------------------------------------------------------------------
 1 | #include <stdarg.h>
 2 | #include <stdbool.h>
 3 | #include <stdint.h>
 4 | #include <stdlib.h>
 5 | 
 6 | typedef struct PrivateKey PrivateKey;
 7 | 
 8 | typedef struct PublicKey PublicKey;
 9 | 
10 | typedef struct Signature Signature;
11 | 
12 | bool aggregate_public_keys(const PublicKey *const *_in_public_keys,
13 |                            int32_t _in_public_keys_len,
14 |                            PublicKey **_out_public_key);
15 | 
16 | bool aggregate_public_keys_subtract(const PublicKey *_in_aggregated_public_key,
17 |                                     const PublicKey *const *_in_public_keys,
18 |                                     int32_t _in_public_keys_len,
19 |                                     PublicKey **_out_public_key);
20 | 
21 | bool aggregate_signatures(const Signature *const *_in_signatures,
22 |                           int32_t _in_signatures_len,
23 |                           Signature **_out_signature);
24 | 
25 | bool deserialize_private_key(const uint8_t *_in_private_key_bytes,
26 |                              int32_t _in_private_key_bytes_len,
27 |                              PrivateKey **_out_private_key);
28 | 
29 | bool deserialize_public_key(const uint8_t *_in_public_key_bytes,
30 |                             int32_t _in_public_key_bytes_len,
31 |                             PublicKey **_out_public_key);
32 | 
33 | bool deserialize_signature(const uint8_t *_in_signature_bytes,
34 |                            int32_t _in_signature_bytes_len,
35 |                            Signature **_out_signature);
36 | 
37 | void destroy_private_key(PrivateKey *_private_key);
38 | 
39 | void destroy_public_key(PublicKey *_public_key);
40 | 
41 | void destroy_signature(Signature *_signature);
42 | 
43 | void free_vec(uint8_t *_bytes, int32_t _len);
44 | 
45 | bool generate_private_key(PrivateKey **out_private_key);
46 | 
47 | bool generate_signature(Signature **out_signature);
48 | 
49 | bool get_pubkey(uint64_t *in_private_key, uint8_t *out_public_key);
50 | 
51 | bool is_valid_key(const uint8_t *in_private_key);
52 | 
53 | bool private_key_to_public_key(const PrivateKey *in_private_key, PublicKey **out_public_key);
54 | 
55 | bool serialize_private_key(const PrivateKey *_in_private_key,
56 |                            uint8_t **_out_bytes,
57 |                            int32_t *_out_len);
58 | 
59 | bool serialize_public_key(const PublicKey *_in_public_key, uint8_t **_out_bytes, int32_t *_out_len);
60 | 
61 | bool serialize_signature(const Signature *_in_signature, uint8_t **_out_bytes, int32_t *_out_len);
62 | 
63 | bool sign_hash(uint64_t *in_private_key, uint8_t *in_hash, uint8_t *out_signature);
64 | 
65 | bool sign_message(uint64_t *in_private_key,
66 |                   const uint8_t *in_message,
67 |                   int32_t in_message_len,
68 |                   const uint8_t *in_extra_data,
69 |                   int32_t in_extra_data_len,
70 |                   bool should_use_composite);
71 | 
72 | bool sign_pop(const PrivateKey *_in_private_key, Signature **_out_signature);
73 | 
74 | bool verify_pop(const PublicKey *_in_public_key,
75 |                 const Signature *_in_signature,
76 |                 bool *_out_verified);
77 | 
78 | bool verify_signature(const PublicKey *_in_public_key,
79 |                       const uint8_t *_in_message,
80 |                       int32_t _in_message_len,
81 |                       const uint8_t *_in_extra_data,
82 |                       int32_t _in_extra_data_len,
83 |                       const Signature *_in_signature,
84 |                       bool _should_use_composite,
85 |                       bool *_out_verified);
86 | 


--------------------------------------------------------------------------------
/bls/cbindgen.toml:
--------------------------------------------------------------------------------
1 | language = "C"
2 | 


--------------------------------------------------------------------------------
/bls/src/bls/keys.rs:
--------------------------------------------------------------------------------
  1 | use bls12_377::{Scalar, G1Affine, G2Affine, G1Projective, G2Projective};
  2 | use crate::error::ErrorCode;
  3 | use core::ops::Mul;
  4 | 
  5 | pub struct PrivateKey {
  6 |     sk: Scalar,
  7 | }
  8 | 
  9 | impl PrivateKey {
 10 |     pub fn from_scalar(s: &Scalar) -> Self {
 11 |         Self { sk: s.clone() }
 12 |     }
 13 | 
 14 |     pub fn to_public(&self) -> PublicKey {
 15 |         PublicKey::from_pk(&(G2Projective::generator() * &self.sk))
 16 |     }
 17 | 
 18 |     #[inline(always)]
 19 |     pub fn sign_hash(&self, hash: &[u8; 96]) -> Result<Signature, ErrorCode> {
 20 |        let hash_elem = G1Affine::from_uncompressed_unchecked_vartime(hash).unwrap(); 
 21 |        Ok(Signature::from_sig(&hash_elem.mul(&self.sk)))
 22 |     }
 23 | }
 24 | 
 25 | #[derive(Copy, Clone, Debug)]
 26 | pub struct PublicKey {
 27 |     pk: G2Projective,
 28 | }
 29 | 
 30 | impl PublicKey {
 31 |     pub fn from_pk(pk: &G2Projective) -> PublicKey {
 32 |         PublicKey { pk: pk.clone() }
 33 |     }
 34 | 
 35 |     #[inline(always)]
 36 |     pub fn serialize(&self) -> [u8; 192] {
 37 |         G2Affine::from(&self.pk).to_uncompressed_littleendian()
 38 |     }
 39 | }
 40 | impl Eq for PublicKey {}
 41 | impl PartialEq for PublicKey {
 42 |     fn eq(&self, other: &Self) -> bool {
 43 |         self.pk == other.pk
 44 |     }
 45 | }
 46 | 
 47 | #[derive(Copy, Clone, Debug)]
 48 | pub struct Signature {
 49 |     sig: G1Projective,
 50 | }
 51 | 
 52 | impl Signature {
 53 |     #[inline(always)]
 54 |     pub fn from_sig(sig: &G1Projective) -> Signature {
 55 |         Signature { sig: sig.clone() }
 56 |     }
 57 | 
 58 |     #[inline(always)]
 59 |     pub fn serialize(&self) -> [u8; 96] {
 60 |         G1Affine::from(self.sig).to_uncompressed_littleendian()
 61 |     }
 62 | }
 63 | impl Eq for Signature {} impl PartialEq for Signature {
 64 |     fn eq(&self, other: &Self) -> bool {
 65 |         self.sig == other.sig
 66 |     }
 67 | }
 68 | 
 69 | #[test]
 70 | fn test_signature_serialization() {
 71 |     let elem = [0, 23, 5, 45, 78, 62, 182, 66, 211, 46, 244, 152, 154, 242, 83, 204, 42, 48, 173, 55, 108, 232, 240, 178, 60, 146, 185, 135, 233, 92, 199, 24, 208, 32, 114, 187, 120, 211, 124, 9, 253, 118, 247, 1, 78, 236, 247, 151, 1, 108, 32, 107, 231, 56, 191, 70, 68, 250, 255, 16, 187, 130, 177, 159, 111, 7, 119, 153, 3, 166, 173, 37, 36, 128, 156, 226, 159, 148, 104, 59, 227, 43, 189, 208, 114, 236, 11, 230, 106, 224, 237, 13, 135, 129, 242, 119];
 72 |     let elem_result = Signature { sig: (G1Projective::generator() * &Scalar::from(5)) }.serialize();
 73 |     assert_eq!(&elem[..], &elem_result[..]);
 74 | }
 75 | 
 76 | #[test]
 77 | fn test_publickey_serialization() {
 78 |     let elem = [0, 31, 183, 170, 199, 212, 167, 3, 66, 81, 201, 4, 241, 48, 79, 223, 24, 52, 101, 225, 116, 36, 166, 246, 213, 127, 77, 200, 154, 183, 73, 53, 249, 207, 6, 102, 170, 157, 11, 128, 177, 20, 254, 185, 15, 142, 231, 68, 0, 222, 228, 89, 156, 13, 254, 199, 91, 133, 241, 129, 173, 74, 215, 198, 210, 32, 83, 154, 161, 153, 255, 92, 239, 64, 69, 147, 39, 48, 118, 242, 26, 126, 220, 109, 229, 226, 101, 150, 25, 228, 38, 133, 96, 89, 73, 238, 0, 105, 186, 188, 162, 17, 191, 123, 4, 159, 165, 161, 68, 105, 85, 121, 63, 19, 169, 22, 165, 195, 165, 66, 206, 1, 108, 166, 186, 198, 49, 232, 110, 212, 243, 6, 4, 6, 2, 95, 165, 241, 12, 160, 98, 34, 217, 143, 1, 42, 244, 0, 161, 173, 241, 170, 146, 11, 183, 159, 9, 30, 138, 40, 3, 30, 231, 111, 97, 118, 217, 229, 221, 205, 106, 218, 224, 24, 116, 233, 237, 223, 225, 180, 55, 239, 219, 248, 119, 10, 49, 96, 145, 22, 219, 26];
 79 |     let elem_result = PublicKey { pk: (G2Projective::generator() * &Scalar::from(5)) }.serialize();
 80 |     assert_eq!(&elem[..], &elem_result[..]);
 81 | }
 82 | 
 83 | #[test]
 84 | fn test_pubkey_derivation() {
 85 |     let priv_key = PrivateKey { sk: Scalar::from_bytes(&[52, 163, 121, 115, 149, 19, 242, 110, 13, 231, 110, 40, 146, 248, 62, 119, 87, 214, 200, 159, 51, 41, 164, 239, 155, 241, 173, 219, 230, 185, 133, 3]).unwrap() };
 86 |     let pub_key = PublicKey { pk: G2Projective::from(G2Affine::from_uncompressed(&[1, 65, 146, 224, 231, 36, 217, 8, 154, 9, 197, 85, 87, 10, 60, 10, 116, 199, 107, 77, 65, 110, 195, 241, 61, 149, 135, 254, 254, 231, 193, 180, 204, 158, 62, 152, 255, 162, 62, 57, 242, 63, 232, 173, 205, 118, 153, 74, 0, 33, 97, 106, 240, 49, 100, 155, 187, 111, 209, 35, 149, 158, 19, 5, 53, 161, 255, 29, 150, 27, 180, 76, 35, 128, 168, 52, 28, 185, 165, 29, 3, 171, 74, 204, 98, 167, 76, 26, 163, 61, 205, 9, 165, 185, 175, 92, 0, 255, 19, 80, 75, 234, 65, 82, 108, 145, 163, 112, 232, 187, 181, 136, 5, 148, 204, 65, 187, 54, 121, 249, 199, 164, 107, 239, 193, 46, 94, 130, 16, 4, 237, 46, 67, 32, 180, 185, 63, 12, 189, 114, 59, 70, 32, 214, 1, 103, 254, 116, 159, 104, 88, 88, 209, 241, 131, 173, 192, 119, 152, 28, 214, 52, 212, 168, 14, 233, 120, 89, 97, 233, 93, 236, 94, 172, 27, 173, 64, 49, 117, 213, 228, 168, 212, 232, 114, 121, 204, 16, 246, 121, 184, 81]).unwrap()) };
 87 |     let pub_key_result = priv_key.to_public();
 88 |     assert_eq!(G2Affine::from(pub_key.pk), G2Affine::from(pub_key_result.pk));
 89 | }
 90 | 
 91 | #[test]
 92 | fn test_sign_hash() {
 93 |     let pk = PrivateKey { sk: Scalar::from_bytes(&[10, 145, 220, 128, 41, 236, 187, 134, 47, 34, 61, 132, 196, 20, 201, 239, 33, 80, 184, 182, 49, 79, 15, 212, 4, 73, 201, 248, 74, 226, 158, 12]).unwrap() };
 94 |     let hash = [1, 95, 34, 213, 221, 202, 70, 0, 221, 118, 193, 93, 225, 200, 19, 73, 208, 8, 176, 53, 150, 73, 22, 154, 1, 71, 181, 38, 9, 102, 191, 35, 227, 112, 10, 208, 171, 43, 191, 43, 110, 164, 130, 8, 57, 101, 243, 19, 1, 47, 253, 198, 50, 95, 79, 61, 237, 164, 140, 88, 176, 124, 187, 181, 163, 22, 62, 109, 184, 189, 146, 112, 115, 9, 160, 33, 102, 163, 4, 181, 208, 41, 88, 149, 177, 103, 137, 99, 174, 49, 132, 6, 227, 20, 225, 203];
 95 |     let sig = Signature { sig: G1Projective::from(G1Affine::from_uncompressed(&[0, 197, 168, 175, 148, 226, 242, 59, 146, 38, 132, 5, 184, 97, 42, 143, 165, 173, 21, 4, 175, 57, 168, 90, 6, 88, 106, 216, 57, 126, 148, 208, 236, 146, 120, 249, 251, 21, 170, 84, 108, 46, 219, 72, 123, 118, 141, 23, 0, 137, 235, 28, 241, 199, 129, 202, 64, 124, 156, 28, 68, 75, 151, 18, 63, 110, 16, 210, 132, 222, 210, 134, 75, 135, 25, 6, 230, 9, 243, 11, 153, 183, 8, 154, 242, 128, 46, 134, 60, 59, 123, 187, 193, 124, 30, 238]).unwrap()) };
 96 | 
 97 |     let sig_result = pk.sign_hash(&hash).unwrap();
 98 |     assert_eq!(G1Affine::from(sig.sig), G1Affine::from(sig_result.sig));
 99 | }
100 | 


--------------------------------------------------------------------------------
/bls/src/bls/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod keys;
2 | 


--------------------------------------------------------------------------------
/bls/src/build.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature="gen_header")]
 2 | extern crate cbindgen;
 3 | 
 4 | #[cfg(feature="gen_header")]
 5 | fn main() {
 6 |     let crate_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
 7 | 
 8 |     cbindgen::generate(crate_dir)
 9 |         .expect("Unable to generate C bindings.")
10 |         .write_to_file("./bls-embedded.h");
11 | }
12 | 
13 | #[cfg(not(feature="gen_header"))]
14 | fn main() {
15 | }
16 | 


--------------------------------------------------------------------------------
/bls/src/error.rs:
--------------------------------------------------------------------------------
1 | #[derive(Debug)]
2 | #[repr(u16)]
3 | pub enum ErrorCode {
4 |     Error = 1,
5 | }
6 | 


--------------------------------------------------------------------------------
/bls/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![cfg_attr(not(gen_header), no_std)]
 2 | #![no_std]
 3 | extern crate libc;
 4 | 
 5 | pub mod bls;
 6 | pub mod error;
 7 | 
 8 | use bls12_377::Scalar;
 9 | use crate::bls::keys::PrivateKey;
10 | use subtle::CtOption;
11 | use core::ptr::copy;
12 | 
13 | use core::slice;
14 | 
15 | #[cfg(not(gen_header))]
16 | #[panic_handler]
17 | fn my_panic(_info: &core::panic::PanicInfo) -> ! {
18 |     loop {}
19 | }
20 | 
21 | fn convert_result_to_bool<T, E, F: Fn() -> Result<T, E>>(f: F) -> bool {
22 |     match f() {
23 |         Err(e) => {
24 |             false
25 |         }
26 |         _ => true,
27 |     }
28 | }
29 | 
30 | #[no_mangle]
31 | pub extern "C" fn is_valid_key(in_private_key: *const u8) -> bool {
32 |    let pk_array = in_private_key as *const [u8; 32];
33 |    let priv_key = unsafe { Scalar::from_bytes(&*pk_array) } ;
34 |    bool::from(CtOption::is_some(&priv_key))
35 | }
36 | 
37 | #[no_mangle]
38 | pub extern "C" fn sign_hash(
39 |     in_private_key: *mut u64,
40 |     in_hash: *mut u8,
41 |     out_signature: *mut u8,
42 | ) -> bool {
43 |     let pk_array = in_private_key as *mut [u64; 4];
44 |     let private_key = unsafe { PrivateKey::from_scalar(&Scalar::from_raw(*pk_array)) };
45 |     let hash = unsafe { slice::from_raw_parts(in_hash, 96) };
46 |     let mut hash_arr: [u8; 96] = [0; 96];
47 |     hash_arr.copy_from_slice(&hash[0..96]);
48 |     let sig = private_key.sign_hash(&hash_arr).unwrap();
49 |     let sig_arr = sig.serialize();
50 |     unsafe { copy(sig_arr.as_ptr(), out_signature, 96); };
51 |     true
52 | }
53 | 
54 | #[no_mangle]
55 | pub extern "C" fn get_pubkey(
56 |     in_private_key: *mut u64,
57 |     out_public_key: *mut u8,
58 | ) -> bool {
59 |     let private_key = unsafe { PrivateKey::from_scalar(&Scalar::from_raw(*(in_private_key as *mut [u64; 4]))) }; 
60 |     let pub_arr = private_key.to_public().serialize();
61 |     unsafe { copy(pub_arr.as_ptr(), out_public_key, 192) };
62 |     true
63 | }
64 | 


--------------------------------------------------------------------------------
/bls12_377/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/celo-org/bls-embedded/931ab609987daa4b2d6ca68edf7aab077039f553/bls12_377/.DS_Store


--------------------------------------------------------------------------------
/bls12_377/.gitignore:
--------------------------------------------------------------------------------
1 | libfpc.a
2 | 


--------------------------------------------------------------------------------
/bls12_377/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Copyrights in the "bls12_377" library are retained by their contributors. No
 2 | copyright assignment is required to contribute to the "bls12_377" library.
 3 | 
 4 | The "bls12_377" library is licensed under either of
 5 | 
 6 |  * Apache License, Version 2.0, (see ./LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0)
 7 |  * MIT license (see ./LICENSE-MIT or http://opensource.org/licenses/MIT)
 8 | 
 9 | at your option.
10 | 
11 | Unless you explicitly state otherwise, any contribution intentionally
12 | submitted for inclusion in the work by you, as defined in the Apache-2.0
13 | license, shall be dual licensed as above, without any additional terms or
14 | conditions.
15 | 


--------------------------------------------------------------------------------
/bls12_377/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | authors = ["Sean Bowe <ewillbefull@gmail.com>", "Michael Straka <mstraka@celo.org>"]
 3 | description = "Implementation of the BLS12-377 pairing-friendly elliptic curve construction"
 4 | license = "MIT/Apache-2.0"
 5 | name = "bls12_377"
 6 | repository = "https://github.com/zkcrypto/bls12_381"
 7 | version = "0.0.0"
 8 | edition = "2018"
 9 | 
10 | [package.metadata.docs.rs]
11 | rustdoc-args = [ "--html-in-header", "katex-header.html" ]
12 | 
13 | [dev-dependencies]
14 | criterion = "0.2.11"
15 | 
16 | [[bench]]
17 | name = "groups"
18 | harness = false
19 | required-features = ["groups"]
20 | 
21 | [dependencies.subtle]
22 | version = "2.1"
23 | default-features = false
24 | 
25 | [dependencies.byteorder]
26 | version = "1"
27 | default-features = false
28 | 
29 | [features]
30 | default = ["groups", "pairings"]
31 | groups = []
32 | pairings = ["groups"]
33 | nightly = ["subtle/nightly"]
34 | 


--------------------------------------------------------------------------------
/bls12_377/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/bls12_377/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any
 2 | person obtaining a copy of this software and associated
 3 | documentation files (the "Software"), to deal in the
 4 | Software without restriction, including without
 5 | limitation the rights to use, copy, modify, merge,
 6 | publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software
 8 | is furnished to do so, subject to the following
 9 | conditions:
10 | 
11 | The above copyright notice and this permission notice
12 | shall be included in all copies or substantial portions
13 | of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/bls12_377/Makefile:
--------------------------------------------------------------------------------
 1 | AR=ar
 2 | CC=gcc
 3 | CXX=g++
 4 | CPPFLAGS=-march=armv7-m -mcpu=cortex-m3 -mthumb -O3 -funroll-all-loops -fconserve-stack
 5 | #CPPFLAGS=-march=native -O2 -funroll-all-loops
 6 | 
 7 | all: libfpc.a
 8 | 	touch src/fp.rs
 9 | 
10 | libfpc.a: libfpc.a(fpc.o fp_mont.o fp_asm.o fq_asm.o)
11 | 
12 | %.o: src/%.cpp
13 | 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
14 | 
15 | %.o: src/%.c
16 | 	$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
17 | 
18 | %.o: src/%.s
19 | 	$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
20 | 
21 | %.o: src/%.S
22 | 	$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
23 | 
24 | test: libfpc.a
25 | 	RUSTFLAGS="-L `pwd`" cargo test
26 | 
27 | bench: libfpc.a
28 | 	RUSTFLAGS="-L `pwd`" cargo bench -- Fp_m
29 | 
30 | bench2: libfpc.a
31 | 	RUSTFLAGS="-L `pwd`" cargo bench -- G2Projective_s
32 | 
33 | bench1: libfpc.a
34 | 	RUSTFLAGS="-L `pwd`" cargo bench -- G1Projective_s
35 | 
36 | 
37 | 
38 | rsync:
39 | 	rsync -ar --progress --exclude=target --exclude='*.a' . tinkerboard:bls-embedded/bls12_377
40 | 


--------------------------------------------------------------------------------
/bls12_377/benches/groups.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate criterion;
  3 | 
  4 | extern crate bls12_377;
  5 | use bls12_377::*;
  6 | use bls12_377::fp::Fp;
  7 | 
  8 | use criterion::{black_box, Criterion};
  9 | 
 10 | fn criterion_benchmark(c: &mut Criterion) {
 11 |     // G1Affine
 12 |     {
 13 |         let name = "G1Affine";
 14 |         let a = G1Affine::generator();
 15 |         let s = Scalar::from_raw([1, 2, 3, 4]);
 16 |         let compressed = [0u8; 48];
 17 |         let uncompressed = [0u8; 96];
 18 |         c.bench_function(&format!("{}_check_on_curve", name), move |b| {
 19 |             b.iter(|| black_box(a).is_on_curve())
 20 |         });
 21 |         c.bench_function(&format!("{}_check_equality", name), move |b| {
 22 |             b.iter(|| black_box(a) == black_box(a))
 23 |         });
 24 |         c.bench_function(&format!("{}_scalar_multiplication", name), move |b| {
 25 |             b.iter(|| black_box(a) * black_box(s))
 26 |         });
 27 |         c.bench_function(&format!("{}_subgroup_check", name), move |b| {
 28 |             b.iter(|| black_box(a).is_torsion_free())
 29 |         });
 30 |         c.bench_function(
 31 |             &format!("{} deserialize uncompressed point", name),
 32 |             move |b| b.iter(|| G1Affine::from_uncompressed(black_box(&uncompressed))),
 33 |         );
 34 |     }
 35 | 
 36 |     // G1Projective
 37 |     {
 38 |         let name = "G1Projective";
 39 |         let a = G1Projective::generator();
 40 |         let a_affine = G1Affine::generator();
 41 |         let s = Scalar::from_raw([1, 2, 3, 4]);
 42 | 
 43 |         const N: usize = 10000;
 44 |         let v = vec![G1Projective::generator(); N];
 45 |         let mut q = vec![G1Affine::identity(); N];
 46 | 
 47 |         c.bench_function(&format!("{}_check_on_curve", name), move |b| {
 48 |             b.iter(|| black_box(a).is_on_curve())
 49 |         });
 50 |         c.bench_function(&format!("{}_check_equality", name), move |b| {
 51 |             b.iter(|| black_box(a) == black_box(a))
 52 |         });
 53 |         c.bench_function(&format!("{}_to_affine", name), move |b| {
 54 |             b.iter(|| G1Affine::from(black_box(a)))
 55 |         });
 56 |         c.bench_function(&format!("{}_doubling", name), move |b| {
 57 |             b.iter(|| black_box(a).double())
 58 |         });
 59 |         c.bench_function(&format!("{}_addition", name), move |b| {
 60 |             b.iter(|| black_box(a).add(&a))
 61 |         });
 62 |         c.bench_function(&format!("{}_mixed_addition", name), move |b| {
 63 |             b.iter(|| black_box(a).add_mixed(&a_affine))
 64 |         });
 65 |         c.bench_function(&format!("{}_scalar_multiplication", name), move |b| {
 66 |             b.iter(|| black_box(a) * black_box(s))
 67 |         });
 68 |         c.bench_function(&format!("{}_batch_to_affine_n={}", name, N), move |b| {
 69 |             b.iter(|| {
 70 |                 G1Projective::batch_normalize(black_box(&v), black_box(&mut q));
 71 |                 black_box(&q)[0]
 72 |             })
 73 |         });
 74 |     }
 75 | 
 76 |     // G2Affine
 77 |     {
 78 |         let name = "G2Affine";
 79 |         let a = G2Affine::generator();
 80 |         let s = Scalar::from_raw([1, 2, 3, 4]);
 81 |         let compressed = [0u8; 96];
 82 |         let uncompressed = [0u8; 192];
 83 |         c.bench_function(&format!("{}_check_on_curve", name), move |b| {
 84 |             b.iter(|| black_box(a).is_on_curve())
 85 |         });
 86 |         c.bench_function(&format!("{}_check_equality", name), move |b| {
 87 |             b.iter(|| black_box(a) == black_box(a))
 88 |         });
 89 |         c.bench_function(&format!("{}_scalar_multiplication", name), move |b| {
 90 |             b.iter(|| black_box(a) * black_box(s))
 91 |         });
 92 |         c.bench_function(&format!("{}_subgroup_check", name), move |b| {
 93 |             b.iter(|| black_box(a).is_torsion_free())
 94 |         });
 95 |         c.bench_function(
 96 |             &format!("{} deserialize uncompressed point", name),
 97 |             move |b| b.iter(|| G2Affine::from_uncompressed(black_box(&uncompressed))),
 98 |         );
 99 |     }
100 | 
101 |     // G2Projective
102 |     {
103 |         let name = "G2Projective";
104 |         let a = G2Projective::generator();
105 |         let a_affine = G2Affine::generator();
106 |         let s = Scalar::from_raw([1, 2, 3, 4]);
107 | 
108 |         const N: usize = 10000;
109 |         let v = vec![G2Projective::generator(); N];
110 |         let mut q = vec![G2Affine::identity(); N];
111 | 
112 |         c.bench_function(&format!("{}_check_on_curve", name), move |b| {
113 |             b.iter(|| black_box(a).is_on_curve())
114 |         });
115 |         c.bench_function(&format!("{}_check_equality", name), move |b| {
116 |             b.iter(|| black_box(a) == black_box(a))
117 |         });
118 |         c.bench_function(&format!("{}_to_affine", name), move |b| {
119 |             b.iter(|| G2Affine::from(black_box(a)))
120 |         });
121 |         c.bench_function(&format!("{}_doubling", name), move |b| {
122 |             b.iter(|| black_box(a).double())
123 |         });
124 |         c.bench_function(&format!("{}_addition", name), move |b| {
125 |             b.iter(|| black_box(a).add(&a))
126 |         });
127 |         c.bench_function(&format!("{}_mixed_addition", name), move |b| {
128 |             b.iter(|| black_box(a).add_mixed(&a_affine))
129 |         });
130 |         c.bench_function(&format!("{}_scalar_multiplication", name), move |b| {
131 |             b.iter(|| black_box(a) * black_box(s))
132 |         });
133 |         c.bench_function(&format!("{}_batch_to_affine_n={}", name, N), move |b| {
134 |             b.iter(|| {
135 |                 G2Projective::batch_normalize(black_box(&v), black_box(&mut q));
136 |                 black_box(&q)[0]
137 |             })
138 |         });
139 |     }
140 |     // Fp Arithmetic
141 |     {
142 |        let x = Fp::one();
143 |        let y = Fp::one();
144 |        c.bench_function("Fp_multiplication_new",  
145 |            move |b| {
146 |                b.iter(|| black_box(x).mul(&black_box(y)))
147 |            });
148 |        c.bench_function("Fp_multiplication_old",  
149 |            move |b| {
150 |                b.iter(|| black_box(x).mul_old(&black_box(y)))
151 |            });
152 |        c.bench_function("Fp_inverse",
153 |            move |b| {
154 |                b.iter(|| black_box(x).invert())
155 |            });
156 |     }
157 | }
158 | 
159 | criterion_group!(benches, criterion_benchmark);
160 | criterion_main!(benches);
161 | 


--------------------------------------------------------------------------------
/bls12_377/rust-toolchain:
--------------------------------------------------------------------------------
1 | 1.36.0


--------------------------------------------------------------------------------
/bls12_377/src/fp.rs:
--------------------------------------------------------------------------------
   1 | //! This module provides an implementation of the BLS12-377 base field `GF(p)` where `p = 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177`
   2 | 
   3 | use core::mem;
   4 | use core::fmt;
   5 | use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};
   6 | 
   7 | use byteorder::{BigEndian, ByteOrder, LittleEndian};
   8 | use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};
   9 | 
  10 | use crate::util::{adc, mac, sbb, LegendreSymbol};
  11 | 
  12 | #[link(name="fpc", kind="static")]
  13 | extern {
  14 |     fn c_mul(
  15 |         output: *mut u64,
  16 |         left: *const u64,
  17 |         right: *const u64
  18 |     ) -> ();
  19 | 
  20 |     fn c_montgomry(
  21 |         output: *mut u64,
  22 |         tmp: *const u64,
  23 |     ) -> ();
  24 | }
  25 | 
  26 | // The internal representation of this type is six 64-bit unsigned
  27 | // integers in little-endian order. `Fp` values are always in
  28 | // Montgomery form; i.e., Scalar(a) = aR mod p, with R = 2^384.
  29 | #[derive(Copy, Clone)]
  30 | pub struct Fp([u64; 6]);
  31 | 
  32 | impl fmt::Debug for Fp {
  33 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  34 |         let tmp = self.to_bytes();
  35 |         write!(f, "0x")?;
  36 |         for &b in tmp.iter() {
  37 |             write!(f, "{:02x}", b)?;
  38 |         }
  39 |         Ok(())
  40 |     }
  41 | }
  42 | 
  43 | impl Default for Fp {
  44 |     fn default() -> Self {
  45 |         Fp::zero()
  46 |     }
  47 | }
  48 | 
  49 | impl ConstantTimeEq for Fp {
  50 |     fn ct_eq(&self, other: &Self) -> Choice {
  51 |         self.0[0].ct_eq(&other.0[0])
  52 |             & self.0[1].ct_eq(&other.0[1])
  53 |             & self.0[2].ct_eq(&other.0[2])
  54 |             & self.0[3].ct_eq(&other.0[3])
  55 |             & self.0[4].ct_eq(&other.0[4])
  56 |             & self.0[5].ct_eq(&other.0[5])
  57 |     }
  58 | }
  59 | 
  60 | impl Eq for Fp {}
  61 | impl PartialEq for Fp {
  62 |     fn eq(&self, other: &Self) -> bool {
  63 |         self.ct_eq(other).unwrap_u8() == 1
  64 |     }
  65 | }
  66 | 
  67 | impl ConditionallySelectable for Fp {
  68 |     fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self {
  69 |         Fp([
  70 |             u64::conditional_select(&a.0[0], &b.0[0], choice),
  71 |             u64::conditional_select(&a.0[1], &b.0[1], choice),
  72 |             u64::conditional_select(&a.0[2], &b.0[2], choice),
  73 |             u64::conditional_select(&a.0[3], &b.0[3], choice),
  74 |             u64::conditional_select(&a.0[4], &b.0[4], choice),
  75 |             u64::conditional_select(&a.0[5], &b.0[5], choice),
  76 |         ])
  77 |     }
  78 | }
  79 | 
  80 | /// p = 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177
  81 | #[inline(always)]
  82 | pub const fn modulus() -> [u64; 6] {
  83 |     [
  84 |         0x8508c00000000001,
  85 |         0x170b5d4430000000,
  86 |         0x1ef3622fba094800,
  87 |         0x1a22d9f300f5138f,
  88 |         0xc63b05c06ca1493b,
  89 |         0x1ae3a4617c510ea,
  90 |     ]
  91 | }
  92 | 
  93 | 
  94 | /// INV = -(p^{-1} mod 2^64) mod 2^64
  95 | #[inline]
  96 | const fn inv() -> u64 {
  97 |     9586122913090633727u64
  98 | }
  99 | 
 100 | const fn two_adicity() -> u32 {
 101 |     46u32
 102 | }
 103 | 
 104 | /// R = 2^384 mod p
 105 | const fn r1() -> Fp {
 106 |     Fp([
 107 |         0x2cdffffffffff68,
 108 |         0x51409f837fffffb1,
 109 |         0x9f7db3a98a7d3ff2,
 110 |         0x7b4e97b76e7c6305,
 111 |         0x4cf495bf803c84e8,
 112 |         0x8d6661e2fdf49a,
 113 |     ])
 114 | }
 115 | 
 116 | /// R2 = 2^(384*2) mod p
 117 | const fn r_squared() -> Fp {
 118 |     Fp([
 119 |         0xb786686c9400cd22,
 120 |         0x329fcaab00431b1,
 121 |         0x22a5f11162d6b46d,
 122 |         0xbfdf7d03827dc3ac,
 123 |         0x837e92f041790bf9,
 124 |         0x6dfccb1e914b88,
 125 |     ])
 126 | }
 127 | 
 128 | /// c^t, where p - 1 = 2^s*t and t odd
 129 | const fn root_of_unity() -> Fp {
 130 |    Fp([
 131 |     0x1c104955744e6e0f,
 132 |     0xf1bd15c3898dd1af,
 133 |     0x76da78169a7f3950,
 134 |     0xee086c1fe367c337,
 135 |     0xf95564f4cbc1b61f,
 136 |     0xf3c1414ef58c54,
 137 |    ])
 138 | }
 139 | 
 140 | const fn t_minus_one_div_two() -> [u64; 6] {
 141 |     [
 142 |         0xba88600000010a11,
 143 |         0xc45f741290002e16,
 144 |         0xb3e601ea271e3de6,
 145 |         0xb80d94292763445,
 146 |         0x748c2f8a21d58c76,
 147 |         0x35c,
 148 |     ]
 149 | }
 150 | 
 151 | const fn modulus_minus_one_div_two() -> [u64; 6] {
 152 |     [
 153 |         0x4284600000000000,
 154 |         0xb85aea218000000,
 155 |         0x8f79b117dd04a400,
 156 |         0x8d116cf9807a89c7,
 157 |         0x631d82e03650a49d,
 158 |         0xd71d230be28875,
 159 |     ]
 160 | }
 161 | 
 162 | impl<'a> Neg for &'a Fp {
 163 |     type Output = Fp;
 164 | 
 165 |     #[inline(always)]
 166 |     fn neg(self) -> Fp {
 167 |         self.neg()
 168 |     }
 169 | }
 170 | 
 171 | impl Neg for Fp {
 172 |     type Output = Fp;
 173 | 
 174 |     #[inline(always)]
 175 |     fn neg(self) -> Fp {
 176 |         -&self
 177 |     }
 178 | }
 179 | 
 180 | impl<'a, 'b> Sub<&'b Fp> for &'a Fp {
 181 |     type Output = Fp;
 182 | 
 183 |     #[inline]
 184 |     fn sub(self, rhs: &'b Fp) -> Fp {
 185 |         self.sub(rhs)
 186 |     }
 187 | }
 188 | 
 189 | impl<'a, 'b> Add<&'b Fp> for &'a Fp {
 190 |     type Output = Fp;
 191 | 
 192 |     #[inline(always)]
 193 |     fn add(self, rhs: &'b Fp) -> Fp {
 194 |         self.add(rhs)
 195 |     }
 196 | }
 197 | 
 198 | impl<'a, 'b> Mul<&'b Fp> for &'a Fp {
 199 |     type Output = Fp;
 200 | 
 201 |     #[inline]
 202 |     fn mul(self, rhs: &'b Fp) -> Fp {
 203 |         self.mul(rhs)
 204 |     }
 205 | }
 206 | 
 207 | impl_binops_additive!(Fp, Fp);
 208 | impl_binops_multiplicative!(Fp, Fp);
 209 | 
 210 | impl Fp {
 211 |     /// Returns zero, the additive identity.
 212 |     #[inline]
 213 |     pub const fn zero() -> Fp {
 214 |         Fp([0, 0, 0, 0, 0, 0])
 215 |     }
 216 | 
 217 |     /// Returns one, the multiplicative identity.
 218 |     #[inline]
 219 |     pub const fn one() -> Fp {
 220 |         r1()
 221 |     }
 222 | 
 223 |     #[inline]
 224 |     pub fn is_zero(&self) -> Choice {
 225 |         self.ct_eq(&Fp::zero())
 226 |     }
 227 | 
 228 |     #[inline]
 229 |     pub fn is_one(&self) -> Choice {
 230 |         self.ct_eq(&Fp::one())
 231 |     }
 232 | 
 233 |     /// Attempts to convert a little-endian byte representation of
 234 |     /// a scalar into an `Fp`, failing if the input is not canonical.
 235 |     #[inline(always)]
 236 |     pub fn from_bytes(bytes: &[u8; 48]) -> CtOption<Fp> {
 237 |         let mut tmp = Fp([0, 0, 0, 0, 0, 0]);
 238 |         let modulus = modulus();
 239 | 
 240 |         tmp.0[5] = BigEndian::read_u64(&bytes[0..8]);
 241 |         tmp.0[4] = BigEndian::read_u64(&bytes[8..16]);
 242 |         tmp.0[3] = BigEndian::read_u64(&bytes[16..24]);
 243 |         tmp.0[2] = BigEndian::read_u64(&bytes[24..32]);
 244 |         tmp.0[1] = BigEndian::read_u64(&bytes[32..40]);
 245 |         tmp.0[0] = BigEndian::read_u64(&bytes[40..48]);
 246 | 
 247 |         // Try to subtract the modulus
 248 |         let (_, borrow) = sbb(tmp.0[0], modulus[0], 0);
 249 |         let (_, borrow) = sbb(tmp.0[1], modulus[1], borrow);
 250 |         let (_, borrow) = sbb(tmp.0[2], modulus[2], borrow);
 251 |         let (_, borrow) = sbb(tmp.0[3], modulus[3], borrow);
 252 |         let (_, borrow) = sbb(tmp.0[4], modulus[4], borrow);
 253 |         let (_, borrow) = sbb(tmp.0[5], modulus[5], borrow);
 254 | 
 255 |         // If the element is smaller than MODULUS then the
 256 |         // subtraction will underflow, producing a borrow value
 257 |         // of 0xffff...ffff. Otherwise, it'll be zero.
 258 |         let is_some = (borrow as u8) & 1;
 259 | 
 260 |         // Convert to Montgomery form by computing
 261 |         // (a.R^0 * R^2) / R = a.R
 262 |         tmp *= &r_squared();
 263 | 
 264 |         CtOption::new(tmp, Choice::from(is_some))
 265 |     }
 266 | 
 267 |     /// Attempts to convert a little-endian byte representation of
 268 |     /// a scalar into an `Fp`, failing if the input is not canonical.
 269 |     /// This is not constant time
 270 |     #[inline(always)]
 271 |     pub fn from_bytes_little_endian_vartime(bytes: &[u8; 48]) -> Option<Fp> {
 272 |         let mut tmp = Fp([0, 0, 0, 0, 0, 0]);
 273 |         let modulus = modulus();
 274 | 
 275 |         tmp.0[0] = LittleEndian::read_u64(&bytes[0..8]);
 276 |         tmp.0[1] = LittleEndian::read_u64(&bytes[8..16]);
 277 |         tmp.0[2] = LittleEndian::read_u64(&bytes[16..24]);
 278 |         tmp.0[3] = LittleEndian::read_u64(&bytes[24..32]);
 279 |         tmp.0[4] = LittleEndian::read_u64(&bytes[32..40]);
 280 |         tmp.0[5] = LittleEndian::read_u64(&bytes[40..48]);
 281 | 
 282 |         // Try to subtract the modulus
 283 |         let (_, borrow) = sbb(tmp.0[0], modulus[0], 0);
 284 |         let (_, borrow) = sbb(tmp.0[1], modulus[1], borrow);
 285 |         let (_, borrow) = sbb(tmp.0[2], modulus[2], borrow);
 286 |         let (_, borrow) = sbb(tmp.0[3], modulus[3], borrow);
 287 |         let (_, borrow) = sbb(tmp.0[4], modulus[4], borrow);
 288 |         let (_, borrow) = sbb(tmp.0[5], modulus[5], borrow);
 289 | 
 290 |         // If the element is smaller than MODULUS then the
 291 |         // subtraction will underflow, producing a borrow value
 292 |         // of 0xffff...ffff. Otherwise, it'll be zero.
 293 |         let is_some = (borrow as u8) & 1;
 294 | 
 295 |         // Convert to Montgomery form by computing
 296 |         // (a.R^0 * R^2) / R = a.R
 297 |         tmp *= &r_squared();
 298 | 
 299 |         if is_some == 0 {
 300 |             return None;
 301 |         }
 302 |         Some(tmp)
 303 |     }
 304 | 
 305 |     /// Converts an element of `Fp` into a byte representation in
 306 |     /// big-endian byte order.
 307 |     pub fn to_bytes(&self) -> [u8; 48] {
 308 |         // Turn into canonical form by computing
 309 |         // (a.R) / R = a
 310 |         let tmp = Fp::montgomery_reduce(
 311 |             self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0,
 312 |         );
 313 | 
 314 |         let mut res = [0; 48];
 315 |         BigEndian::write_u64(&mut res[0..8], tmp.0[5]);
 316 |         BigEndian::write_u64(&mut res[8..16], tmp.0[4]);
 317 |         BigEndian::write_u64(&mut res[16..24], tmp.0[3]);
 318 |         BigEndian::write_u64(&mut res[24..32], tmp.0[2]);
 319 |         BigEndian::write_u64(&mut res[32..40], tmp.0[1]);
 320 |         BigEndian::write_u64(&mut res[40..48], tmp.0[0]);
 321 | 
 322 |         res
 323 |     }
 324 | 
 325 |     /// Converts an element of `Fp` into a byte representation in
 326 |     /// little-endian byte order.
 327 |     pub fn to_bytes_littleendian(&self) -> [u8; 48] {
 328 |         // Turn into canonical form by computing
 329 |         // (a.R) / R = a
 330 |         let tmp = Fp::montgomery_reduce(
 331 |             self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0,
 332 |         );
 333 | 
 334 |         let mut res = [0; 48];
 335 |         LittleEndian::write_u64(&mut res[0..8], tmp.0[0]);
 336 |         LittleEndian::write_u64(&mut res[8..16], tmp.0[1]);
 337 |         LittleEndian::write_u64(&mut res[16..24], tmp.0[2]);
 338 |         LittleEndian::write_u64(&mut res[24..32], tmp.0[3]);
 339 |         LittleEndian::write_u64(&mut res[32..40], tmp.0[4]);
 340 |         LittleEndian::write_u64(&mut res[40..48], tmp.0[5]);
 341 | 
 342 |         res
 343 |     }
 344 | 
 345 |     /// Returns whether or not this element is strictly lexicographically
 346 |     /// larger than its negation.
 347 |     pub fn lexicographically_largest(&self) -> Choice {
 348 |         // This can be determined by checking to see if the element is
 349 |         // larger than (p - 1) // 2. If we subtract by ((p - 1) // 2) + 1
 350 |         // and there is no underflow, then the element must be larger than
 351 |         // (p - 1) // 2
 352 | 
 353 |         // First, because self is in Montgomery form we need to reduce it
 354 |         let tmp = Fp::montgomery_reduce(
 355 |             self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0,
 356 |         );
 357 | 
 358 |         let (_, borrow) = sbb(tmp.0[0], 0x4284600000000001, 0);
 359 |         let (_, borrow) = sbb(tmp.0[1], 0x0b85aea218000000, borrow);
 360 |         let (_, borrow) = sbb(tmp.0[2], 0x8f79b117dd04a400, borrow);
 361 |         let (_, borrow) = sbb(tmp.0[3], 0x8d116cf9807a89c7, borrow);
 362 |         let (_, borrow) = sbb(tmp.0[4], 0x631d82e03650a49d, borrow);
 363 |         let (_, borrow) = sbb(tmp.0[5], 0xd71d230be28875, borrow);
 364 | 
 365 |         // If the element was smaller, the subtraction will underflow
 366 |         // producing a borrow value of 0xffff...ffff, otherwise it will
 367 |         // be zero. We create a Choice representing true if there was
 368 |         // overflow (and so this element is not lexicographically larger
 369 |         // than its negation) and then negate it.
 370 | 
 371 |         !Choice::from((borrow as u8) & 1)
 372 |     }
 373 | 
 374 |     /// Constructs an element of `Fp` without checking that it is
 375 |     /// canonical.
 376 |     #[inline(always)]
 377 |     pub const fn from_raw_unchecked(v: [u64; 6]) -> Fp {
 378 |         Fp(v)
 379 |     }
 380 | 
 381 |     /// Although this is labeled "vartime", it is only
 382 |     /// variable time with respect to the exponent. It
 383 |     /// is also not exposed in the public API.
 384 |     #[inline(always)]
 385 |     pub fn pow_vartime(&self, by: &[u64; 6]) -> Self {
 386 |         let mut res = Self::one();
 387 |         for e in by.iter().rev() {
 388 |             for i in (0..64).rev() {
 389 |                 res = res.square();
 390 | 
 391 |                 if ((*e >> i) & 1) == 1 {
 392 |                     res = res.mul(self);
 393 |                 }
 394 |             }
 395 |         }
 396 |         res
 397 |     }
 398 | 
 399 |     pub fn legendre(&self) -> LegendreSymbol {
 400 |         let s = self.pow_vartime(&modulus_minus_one_div_two());
 401 |         if s == Self::zero() {
 402 |             LegendreSymbol::Zero
 403 |         } else if s == Self::one() {
 404 |             LegendreSymbol::QuadraticResidue
 405 |         } else {
 406 |             LegendreSymbol::QuadraticNonResidue
 407 |         }
 408 |     }
 409 | 
 410 |     pub fn sqrt_vartime(&self) -> Option<Self> {
 411 |         match self.legendre() {
 412 |             LegendreSymbol::Zero => Some(*self),
 413 |             LegendreSymbol::QuadraticNonResidue => None,
 414 |             LegendreSymbol::QuadraticResidue => {
 415 |                 let mut z = root_of_unity();
 416 |                 let mut w = self.pow_vartime(&t_minus_one_div_two());
 417 |                 let mut x = w * self;
 418 |                 let mut b = x * &w;
 419 |                 let mut v = two_adicity() as usize;
 420 | 
 421 |                 // t = self^t
 422 |                 {
 423 |                     let mut check = b;
 424 |                     for _ in 0..(v-1) {
 425 |                         check = check.square();
 426 |                     }
 427 |                     if check != Fp::one() {
 428 |                         panic!("Input is not a square root, but passed the QR test")
 429 |                     }
 430 |                 }
 431 | 
 432 |                 while b != Fp::one() {
 433 |                     let mut k = 0usize;
 434 | 
 435 |                     let mut b2k = b;
 436 |                     while b2k != Fp::one() {
 437 |                         // invariant: b2k = b^(2^k) after entering this loop
 438 |                         b2k = b2k.square();
 439 |                         k += 1;
 440 |                     }
 441 | 
 442 |                     let j = v - k - 1;
 443 |                     w = z;
 444 |                     for _ in 0..j {
 445 |                         w = w.square();
 446 |                     }
 447 | 
 448 |                     z = w.square();
 449 |                     b *= &z;
 450 |                     x *= &w;
 451 |                     v = k;
 452 |                 }
 453 |                 Some(x)
 454 |             },
 455 |         }
 456 |     }
 457 | 
 458 |     #[inline(always)]
 459 |     fn pow_acc(&self, acc: Fp, by: u64) -> Self {
 460 |         let mut acc = acc.clone();
 461 |         for i in (0..64).rev() {
 462 |             acc = acc.square();
 463 | 
 464 |             if ((by >> i) & 1) == 1 {
 465 |                 acc = acc.mul(self);
 466 |             }
 467 |         }
 468 |         acc
 469 |     }
 470 | 
 471 |     /// Computes the multiplicative inverse of this field
 472 |     /// element, returning None in the case that this element
 473 |     /// is zero.
 474 |     #[inline(always)]
 475 |     pub fn invert(&self) -> CtOption<Self> {
 476 |         let mut acc = Self::one();
 477 |         acc = self.pow_acc(acc, 0x1ae3a4617c510ea);
 478 |         acc = self.pow_acc(acc, 0xc63b05c06ca1493b);
 479 |         acc = self.pow_acc(acc, 0x1a22d9f300f5138f);
 480 |         acc = self.pow_acc(acc, 0x1ef3622fba094800);
 481 |         acc = self.pow_acc(acc, 0x170b5d4430000000);
 482 |         acc = self.pow_acc(acc, 0x8508bfffffffffff);
 483 |         CtOption::new(acc, !self.is_zero())
 484 |     }
 485 | 
 486 |     #[inline(always)]
 487 |     fn subtract_p(&self) -> Fp {
 488 |         let modulus = modulus();
 489 |         let (r0, borrow) = sbb(self.0[0], modulus[0], 0);
 490 |         let (r1, borrow) = sbb(self.0[1], modulus[1], borrow);
 491 |         let (r2, borrow) = sbb(self.0[2], modulus[2], borrow);
 492 |         let (r3, borrow) = sbb(self.0[3], modulus[3], borrow);
 493 |         let (r4, borrow) = sbb(self.0[4], modulus[4], borrow);
 494 |         let (r5, borrow) = sbb(self.0[5], modulus[5], borrow);
 495 | 
 496 |         // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise
 497 |         // borrow = 0x000...000. Thus, we use it as a mask!
 498 |         let r0 = (self.0[0] & borrow) | (r0 & !borrow);
 499 |         let r1 = (self.0[1] & borrow) | (r1 & !borrow);
 500 |         let r2 = (self.0[2] & borrow) | (r2 & !borrow);
 501 |         let r3 = (self.0[3] & borrow) | (r3 & !borrow);
 502 |         let r4 = (self.0[4] & borrow) | (r4 & !borrow);
 503 |         let r5 = (self.0[5] & borrow) | (r5 & !borrow);
 504 |         Fp([r0, r1, r2, r3, r4, r5])
 505 |     }
 506 | 
 507 |     #[inline(always)]
 508 |     pub fn add(&self, rhs: &Fp) -> Fp {
 509 |         let (d0, carry) = adc(self.0[0], rhs.0[0], 0);
 510 |         let (d1, carry) = adc(self.0[1], rhs.0[1], carry);
 511 |         let (d2, carry) = adc(self.0[2], rhs.0[2], carry);
 512 |         let (d3, carry) = adc(self.0[3], rhs.0[3], carry);
 513 |         let (d4, carry) = adc(self.0[4], rhs.0[4], carry);
 514 |         let (d5, _) = adc(self.0[5], rhs.0[5], carry);
 515 | 
 516 |         // Attempt to subtract the modulus, to ensure the value
 517 |         // is smaller than the modulus.
 518 |         (&Fp([d0, d1, d2, d3, d4, d5])).subtract_p()
 519 |     }
 520 | 
 521 |     #[inline(always)]
 522 |     pub fn neg(&self) -> Fp {
 523 |         let modulus = modulus();
 524 |         let (d0, borrow) = sbb(modulus[0], self.0[0], 0);
 525 |         let (d1, borrow) = sbb(modulus[1], self.0[1], borrow);
 526 |         let (d2, borrow) = sbb(modulus[2], self.0[2], borrow);
 527 |         let (d3, borrow) = sbb(modulus[3], self.0[3], borrow);
 528 |         let (d4, borrow) = sbb(modulus[4], self.0[4], borrow);
 529 |         let (d5, _) = sbb(modulus[5], self.0[5], borrow);
 530 | 
 531 |         // Let's use a mask if `self` was zero, which would mean
 532 |         // the result of the subtraction is p.
 533 |         let mask = (((self.0[0] | self.0[1] | self.0[2] | self.0[3] | self.0[4] | self.0[5]) == 0)
 534 |             as u64)
 535 |             .wrapping_sub(1);
 536 | 
 537 |         Fp([
 538 |             d0 & mask,
 539 |             d1 & mask,
 540 |             d2 & mask,
 541 |             d3 & mask,
 542 |             d4 & mask,
 543 |             d5 & mask,
 544 |         ])
 545 |     }
 546 | 
 547 |     #[inline(always)]
 548 |     pub fn sub(&self, rhs: &Fp) -> Fp {
 549 |         (&rhs.neg()).add(self)
 550 |     }
 551 | 
 552 |     #[inline(always)]
 553 |     pub fn square(&self) -> Fp {
 554 |         self * self
 555 |     }
 556 | 
 557 |     #[inline(always)]
 558 |     fn montgomery_reduce_old(
 559 |         t0: u64,
 560 |         t1: u64,
 561 |         t2: u64,
 562 |         t3: u64,
 563 |         t4: u64,
 564 |         t5: u64,
 565 |         t6: u64,
 566 |         t7: u64,
 567 |         t8: u64,
 568 |         t9: u64,
 569 |         t10: u64,
 570 |         t11: u64,
 571 |     ) -> Self {
 572 |         // The Montgomery reduction here is based on Algorithm 14.32 in
 573 |         // Handbook of Applied Cryptography
 574 |         // <http://cacr.uwaterloo.ca/hac/about/chap14.pdf>.
 575 |         let inv = inv();
 576 |         let modulus = modulus();
 577 | 
 578 |         let k = t0.wrapping_mul(inv);
 579 |         let (_, carry) = mac(t0, k, modulus[0], 0);
 580 |         let (r1, carry) = mac(t1, k, modulus[1], carry);
 581 |         let (r2, carry) = mac(t2, k, modulus[2], carry);
 582 |         let (r3, carry) = mac(t3, k, modulus[3], carry);
 583 |         let (r4, carry) = mac(t4, k, modulus[4], carry);
 584 |         let (r5, carry) = mac(t5, k, modulus[5], carry);
 585 |         let (r6, r7) = adc(t6, 0, carry);
 586 | 
 587 |         let k = r1.wrapping_mul(inv);
 588 |         let (_, carry) = mac(r1, k, modulus[0], 0);
 589 |         let (r2, carry) = mac(r2, k, modulus[1], carry);
 590 |         let (r3, carry) = mac(r3, k, modulus[2], carry);
 591 |         let (r4, carry) = mac(r4, k, modulus[3], carry);
 592 |         let (r5, carry) = mac(r5, k, modulus[4], carry);
 593 |         let (r6, carry) = mac(r6, k, modulus[5], carry);
 594 |         let (r7, r8) = adc(t7, r7, carry);
 595 | 
 596 |         let k = r2.wrapping_mul(inv);
 597 |         let (_, carry) = mac(r2, k, modulus[0], 0);
 598 |         let (r3, carry) = mac(r3, k, modulus[1], carry);
 599 |         let (r4, carry) = mac(r4, k, modulus[2], carry);
 600 |         let (r5, carry) = mac(r5, k, modulus[3], carry);
 601 |         let (r6, carry) = mac(r6, k, modulus[4], carry);
 602 |         let (r7, carry) = mac(r7, k, modulus[5], carry);
 603 |         let (r8, r9) = adc(t8, r8, carry);
 604 | 
 605 |         let k = r3.wrapping_mul(inv);
 606 |         let (_, carry) = mac(r3, k, modulus[0], 0);
 607 |         let (r4, carry) = mac(r4, k, modulus[1], carry);
 608 |         let (r5, carry) = mac(r5, k, modulus[2], carry);
 609 |         let (r6, carry) = mac(r6, k, modulus[3], carry);
 610 |         let (r7, carry) = mac(r7, k, modulus[4], carry);
 611 |         let (r8, carry) = mac(r8, k, modulus[5], carry);
 612 |         let (r9, r10) = adc(t9, r9, carry);
 613 | 
 614 |         let k = r4.wrapping_mul(inv);
 615 |         let (_, carry) = mac(r4, k, modulus[0], 0);
 616 |         let (r5, carry) = mac(r5, k, modulus[1], carry);
 617 |         let (r6, carry) = mac(r6, k, modulus[2], carry);
 618 |         let (r7, carry) = mac(r7, k, modulus[3], carry);
 619 |         let (r8, carry) = mac(r8, k, modulus[4], carry);
 620 |         let (r9, carry) = mac(r9, k, modulus[5], carry);
 621 |         let (r10, r11) = adc(t10, r10, carry);
 622 | 
 623 |         let k = r5.wrapping_mul(inv);
 624 |         let (_, carry) = mac(r5, k, modulus[0], 0);
 625 |         let (r6, carry) = mac(r6, k, modulus[1], carry);
 626 |         let (r7, carry) = mac(r7, k, modulus[2], carry);
 627 |         let (r8, carry) = mac(r8, k, modulus[3], carry);
 628 |         let (r9, carry) = mac(r9, k, modulus[4], carry);
 629 |         let (r10, carry) = mac(r10, k, modulus[5], carry);
 630 |         let (r11, _) = adc(t11, r11, carry);
 631 | 
 632 |         // Attempt to subtract the modulus, to ensure the value
 633 |         // is smaller than the modulus.
 634 |         (&Fp([r6, r7, r8, r9, r10, r11])).subtract_p()
 635 |     }
 636 | 
 637 |     #[inline(always)]
 638 |     pub fn mul_old(&self, rhs: &Fp) -> Fp {
 639 |         let (t0, carry) = mac(0, self.0[0], rhs.0[0], 0);
 640 |         let (t1, carry) = mac(0, self.0[0], rhs.0[1], carry);
 641 |         let (t2, carry) = mac(0, self.0[0], rhs.0[2], carry);
 642 |         let (t3, carry) = mac(0, self.0[0], rhs.0[3], carry);
 643 |         let (t4, carry) = mac(0, self.0[0], rhs.0[4], carry);
 644 |         let (t5, t6) = mac(0, self.0[0], rhs.0[5], carry);
 645 | 
 646 |         let (t1, carry) = mac(t1, self.0[1], rhs.0[0], 0);
 647 |         let (t2, carry) = mac(t2, self.0[1], rhs.0[1], carry);
 648 |         let (t3, carry) = mac(t3, self.0[1], rhs.0[2], carry);
 649 |         let (t4, carry) = mac(t4, self.0[1], rhs.0[3], carry);
 650 |         let (t5, carry) = mac(t5, self.0[1], rhs.0[4], carry);
 651 |         let (t6, t7) = mac(t6, self.0[1], rhs.0[5], carry);
 652 | 
 653 |         let (t2, carry) = mac(t2, self.0[2], rhs.0[0], 0);
 654 |         let (t3, carry) = mac(t3, self.0[2], rhs.0[1], carry);
 655 |         let (t4, carry) = mac(t4, self.0[2], rhs.0[2], carry);
 656 |         let (t5, carry) = mac(t5, self.0[2], rhs.0[3], carry);
 657 |         let (t6, carry) = mac(t6, self.0[2], rhs.0[4], carry);
 658 |         let (t7, t8) = mac(t7, self.0[2], rhs.0[5], carry);
 659 | 
 660 |         let (t3, carry) = mac(t3, self.0[3], rhs.0[0], 0);
 661 |         let (t4, carry) = mac(t4, self.0[3], rhs.0[1], carry);
 662 |         let (t5, carry) = mac(t5, self.0[3], rhs.0[2], carry);
 663 |         let (t6, carry) = mac(t6, self.0[3], rhs.0[3], carry);
 664 |         let (t7, carry) = mac(t7, self.0[3], rhs.0[4], carry);
 665 |         let (t8, t9) = mac(t8, self.0[3], rhs.0[5], carry);
 666 | 
 667 |         let (t4, carry) = mac(t4, self.0[4], rhs.0[0], 0);
 668 |         let (t5, carry) = mac(t5, self.0[4], rhs.0[1], carry);
 669 |         let (t6, carry) = mac(t6, self.0[4], rhs.0[2], carry);
 670 |         let (t7, carry) = mac(t7, self.0[4], rhs.0[3], carry);
 671 |         let (t8, carry) = mac(t8, self.0[4], rhs.0[4], carry);
 672 |         let (t9, t10) = mac(t9, self.0[4], rhs.0[5], carry);
 673 | 
 674 |         let (t5, carry) = mac(t5, self.0[5], rhs.0[0], 0);
 675 |         let (t6, carry) = mac(t6, self.0[5], rhs.0[1], carry);
 676 |         let (t7, carry) = mac(t7, self.0[5], rhs.0[2], carry);
 677 |         let (t8, carry) = mac(t8, self.0[5], rhs.0[3], carry);
 678 |         let (t9, carry) = mac(t9, self.0[5], rhs.0[4], carry);
 679 |         let (t10, t11) = mac(t10, self.0[5], rhs.0[5], carry);
 680 |         
 681 |         Self::montgomery_reduce_old(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
 682 |     }
 683 | 
 684 |     fn montgomery_reduce(
 685 |         t0: u64,
 686 |         t1: u64,
 687 |         t2: u64,
 688 |         t3: u64,
 689 |         t4: u64,
 690 |         t5: u64,
 691 |         t6: u64,
 692 |         t7: u64,
 693 |         t8: u64,
 694 |         t9: u64,
 695 |         t10: u64,
 696 |         t11: u64,
 697 |     ) -> Self {
 698 |         unsafe {
 699 |             let mut res: [u64; 6] = mem::uninitialized();
 700 |             let mut tmp: [u64; 12] = [t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11];
 701 |             c_montgomry(res.as_mut_ptr(), tmp.as_mut_ptr());
 702 |             Fp(res).subtract_p()
 703 |         }
 704 |     }
 705 | 
 706 |     #[inline(always)]
 707 |     fn mul_helper(&self, rhs: &Fp) -> [u64; 6] {
 708 |         unsafe {
 709 |             let mut res: [u64; 6] = mem::uninitialized();
 710 |             let mut tmp: [u64; 12] = mem::uninitialized();
 711 |             c_mul(tmp.as_mut_ptr(), self.0.as_ptr(), rhs.0.as_ptr());
 712 |             c_montgomry(res.as_mut_ptr(), tmp.as_mut_ptr());
 713 |             res
 714 |         }
 715 |     }
 716 | 
 717 |     #[inline(always)]
 718 |     pub fn mul(&self, rhs: &Fp) -> Fp {
 719 |         let res = self.mul_helper(&rhs);
 720 |         Fp(res).subtract_p()
 721 |     }
 722 | }
 723 | 
 724 | #[test]
 725 | fn test_conditional_selection() {
 726 |     let a = Fp([1, 2, 3, 4, 5, 6]);
 727 |     let b = Fp([7, 8, 9, 10, 11, 12]);
 728 | 
 729 |     assert_eq!(
 730 |         ConditionallySelectable::conditional_select(&a, &b, Choice::from(0u8)),
 731 |         a
 732 |     );
 733 |     assert_eq!(
 734 |         ConditionallySelectable::conditional_select(&a, &b, Choice::from(1u8)),
 735 |         b
 736 |     );
 737 | }
 738 | 
 739 | #[test]
 740 | fn test_legendre() {
 741 |     let a = Fp::from_raw_unchecked([
 742 |         0xf8397a163b69bed0, 
 743 |         0xf175823c7236735c, 
 744 |         0x5569469835f84b92, 
 745 |         0x714deebc8c061c3c, 
 746 |         0x7adcc0994eb519c8, 
 747 |         0x230d716ceafd4b,
 748 |     ]);
 749 |     assert_eq!(a.legendre(), LegendreSymbol::QuadraticResidue);
 750 | }
 751 |     
 752 | #[test]
 753 | fn test_equality() {
 754 |     fn is_equal(a: &Fp, b: &Fp) -> bool {
 755 |         let eq = a == b;
 756 |         let ct_eq = a.ct_eq(&b);
 757 | 
 758 |         assert_eq!(eq, ct_eq.unwrap_u8() == 1);
 759 | 
 760 |         eq
 761 |     }
 762 | 
 763 |     assert_eq!(&Fp([1, 2, 3, 4, 5, 6]), &Fp([1, 2, 3, 4, 5, 6]));
 764 | 
 765 |     let a = Fp([7, 2, 3, 4, 5, 6]);
 766 |     let b = Fp([1, 2, 3, 4, 5, 6]);
 767 |     assert_ne!(&a, &b);
 768 |     assert!(!is_equal(&Fp([1, 7, 3, 4, 5, 6]), &Fp([1, 2, 3, 4, 5, 6])));
 769 |     assert!(!is_equal(&Fp([1, 2, 7, 4, 5, 6]), &Fp([1, 2, 3, 4, 5, 6])));
 770 |     assert!(!is_equal(&Fp([1, 2, 3, 7, 5, 6]), &Fp([1, 2, 3, 4, 5, 6])));
 771 |     assert!(!is_equal(&Fp([1, 2, 3, 4, 7, 6]), &Fp([1, 2, 3, 4, 5, 6])));
 772 |     assert!(!is_equal(&Fp([1, 2, 3, 4, 5, 7]), &Fp([1, 2, 3, 4, 5, 6])));
 773 | }
 774 | 
 775 | #[test]
 776 | fn test_squaring() {
 777 |     let a = Fp([
 778 |         0xd215d2768e83191b,
 779 |         0x5085d80f8fb28261,
 780 |         0xce9a032ddf393a56,
 781 |         0x3e9c4fff2ca0c4bb,
 782 |         0x6436b6f7f4d95dfb,
 783 |         0x10606628ad4a4d90,
 784 |     ]);
 785 |     let b = Fp([
 786 |         0xc27f4faf338e6e7, 
 787 |         0xb9363389626f355, 
 788 |         0x2677a23d5ff9b701, 
 789 |         0xaa7da7ecaa317421, 
 790 |         0xd813d973bd2c6c51, 
 791 |         0x1363906dc99b15d,
 792 |     ]);
 793 | 
 794 |     assert_eq!(a.square(), b);
 795 | }
 796 | 
 797 | #[test]
 798 | fn test_multiplication() {
 799 |     let a = Fp([
 800 |         0x397a38320170cd4,
 801 |         0x734c1b2c9e761d30,
 802 |         0x5ed255ad9a48beb5,
 803 |         0x95a3c6b22a7fcfc,
 804 |         0x2294ce75d4e26a27,
 805 |         0x13338bd870011ebb,
 806 |     ]);
 807 |     let b = Fp([
 808 |         0xb9c3c7c5b1196af7,
 809 |         0x2580e2086ce335c1,
 810 |         0xf49aed3d8a57ef42,
 811 |         0x41f281e49846e878,
 812 |         0xe0762346c38452ce,
 813 |         0x652e89326e57dc0,
 814 |     ]);
 815 |     let c = Fp([
 816 |         0x797a886e0e8e8d85, 
 817 |         0x518df0f1d1732800, 
 818 |         0xb7098a12c4a10c5, 
 819 |         0x6338f6a9ec896084, 
 820 |         0xec6b4921810a39fc, 
 821 |         0x1751097d914d4be
 822 |     ]);
 823 | 
 824 |     assert_eq!(a * b, c);
 825 | }
 826 | 
 827 | #[test]
 828 | fn test_addition() {
 829 |     let a = Fp([
 830 |         0x5360bb5978678032,
 831 |         0x7dd275ae799e128e,
 832 |         0x5c5b5071ce4f4dcf,
 833 |         0xcdb21f93078dbb3e,
 834 |         0xc32365c5e73f474a,
 835 |         0x115a2a5489babe5b,
 836 |     ]);
 837 |     let b = Fp([
 838 |         0x9fd287733d23dda0,
 839 |         0xb16bf2af738b3554,
 840 |         0x3e57a75bd3cc6d1d,
 841 |         0x900bc0bd627fd6d6,
 842 |         0xd319a080efb245fe,
 843 |         0x15fdcaa4e4bb2091,
 844 |     ]);
 845 |     let c = Fp([
 846 |         0x6e2a82ccb58b5dd1,
 847 |         0x18330b19bd2947e2,
 848 |         0x7bbf959de81272ed,
 849 |         0x439b065d69187e85,
 850 |         0xd00200866a50440e,
 851 |         0x25a9bab356b0ce02,
 852 |     ]);
 853 | 
 854 |     assert_eq!(a + b, c);
 855 | }
 856 | 
 857 | #[test]
 858 | fn test_subtraction() {
 859 |     let a = Fp([
 860 |         0xaa270000000cfff3,
 861 |         0x53cc0032fc34000a,
 862 |         0x478fe97a6b0a807f,
 863 |         0xb1d37ebee6ba24d7,
 864 |         0x8ec9733bbf78ab2f,
 865 |         0x9d645513d83de7e,
 866 |     ]);
 867 |     let b = Fp([
 868 |         0x7d828664baf4f566,
 869 |         0xd17e663996ec7339,
 870 |         0x679ead55cb4078d0,
 871 |         0xfe3b2260e001ec28,
 872 |         0x305993d043d91b68,
 873 |         0x626f03c0489b72d,
 874 |     ]);
 875 |     let c = Fp([
 876 |         0x2ca4799b45180a8d, 
 877 |         0x824d99f965478cd1, 
 878 |         0xdff13c249fca07ae, 
 879 |         0xb3985c5e06b838ae, 
 880 |         0x5e6fdf6b7b9f8fc6, 
 881 |         0x3af551538fa2751,
 882 |     ]);
 883 | 
 884 |     assert_eq!(a - b, c);
 885 | }
 886 | 
 887 | #[test]
 888 | fn test_negation() {
 889 |     let a = Fp([
 890 |         0x5360bb5978678032,
 891 |         0x7dd275ae799e128e,
 892 |         0x5c5b5071ce4f4dcf,
 893 |         0xcdb21f93078dbb3e,
 894 |         0xc32365c5e73f474a,
 895 |         0x115a2a5489babe5b,
 896 |     ]);
 897 |     let b = Fp([
 898 |         0x31a804a687987fcf, 
 899 |         0x9938e795b661ed72, 
 900 |         0xc29811bdebb9fa30, 
 901 |         0x4c70ba5ff9675850, 
 902 |         0x3179ffa856201f0, 
 903 |         0xf0540ff18e0a528f,
 904 |     ]);
 905 | 
 906 |     assert_eq!(-a, b);
 907 | }
 908 | 
 909 | #[test]
 910 | fn test_debug() {
 911 |     assert_eq!(
 912 |         format!(
 913 |             "{:?}",
 914 |             Fp([0x5360bb5978678032, 0x7dd275ae799e128e, 0x5c5b5071ce4f4dcf, 0xcdb21f93078dbb3e, 0xc32365c5e73f474a, 0x115a2a5489babe5b])
 915 |         ),
 916 |         "0x01649f72ed7210935e96e9afd102e59eb0043d3eccd7606e797520db60fc0d2c5f8ec5dde3c6df9ddc6db87323948bdc"
 917 |     );
 918 | }
 919 | 
 920 | #[test]
 921 | fn test_from_bytes() {
 922 |     let mut a = Fp([
 923 |         0xdc906d9be3f95dc8,
 924 |         0x8755caf7459691a1,
 925 |         0xcff1a7f4e9583ab3,
 926 |         0x9b43821f849e2284,
 927 |         0xf57554f3a2974f3f,
 928 |         0x85dbea84ed47f79,
 929 |     ]);
 930 | 
 931 |     for _ in 0..100 {
 932 |         a = a.square();
 933 |         let tmp = a.to_bytes();
 934 |         let b = Fp::from_bytes(&tmp).unwrap();
 935 | 
 936 |         assert_eq!(a, b);
 937 |     }
 938 | 
 939 |     assert_eq!(
 940 |         -Fp::one(),
 941 |     Fp::from_bytes(&[1, 174, 58, 70, 23, 197, 16, 234, 198, 59, 5, 192, 108, 161, 73, 59, 26, 34, 217, 243, 0, 245, 19, 143, 30, 243, 98, 47, 186, 9, 72, 0, 23, 11, 93, 68, 48, 0, 0, 0, 133, 8, 192, 0, 0, 0, 0, 0]).unwrap()
 942 |     );
 943 | 
 944 |     assert!(
 945 |         Fp::from_bytes(&[
 946 |             27, 1, 17, 234, 57, 127, 230, 154, 75, 27, 167, 182, 67, 75, 172, 215, 100, 119, 75,
 947 |             132, 243, 133, 18, 191, 103, 48, 210, 160, 246, 176, 246, 36, 30, 171, 255, 254, 177,
 948 |             83, 255, 255, 185, 254, 255, 255, 255, 255, 170, 170
 949 |         ])
 950 |         .is_none()
 951 |         .unwrap_u8()
 952 |             == 1
 953 |     );
 954 | 
 955 |     assert!(Fp::from_bytes(&[0xff; 48]).is_none().unwrap_u8() == 1);
 956 | }
 957 | 
 958 | #[test]
 959 | fn test_sqrt_vartime() {
 960 |     let a = Fp::from_raw_unchecked([
 961 |         0xaa270000000cfff3,
 962 |         0x53cc0032fc34000a,
 963 |         0x478fe97a6b0a807f,
 964 |         0xb1d37ebee6ba24d7,
 965 |         0x8ec9733bbf78ab2f,
 966 |         0x9d645513d83de7e,
 967 |     ]);
 968 | 
 969 |     assert_eq!(
 970 |         a.sqrt_vartime().unwrap(),
 971 |         Fp::from_raw_unchecked([
 972 |             0xb7365bc1527cc225,
 973 |             0x80c4410c13dad980, 
 974 |             0x405a608866ec9af9, 
 975 |             0xbae77f06775d9e86, 
 976 |             0x631d7a2378887188, 
 977 |             0x24475d61e565d7,
 978 |         ])
 979 |     );
 980 | }
 981 | 
 982 | #[test]
 983 | fn test_inversion() {
 984 |     let a = Fp([
 985 |         0x43b43a5078ac2076,
 986 |         0x1ce0763046f8962b,
 987 |         0x724a5276486d735c,
 988 |         0x6f05c2a6282d48fd,
 989 |         0x2095bd5bb4ca9331,
 990 |         0x3b35b3894b0f7da,
 991 |     ]);
 992 |     let b = Fp([
 993 |         0x46e62daa07fc3fba,
 994 |         0x7a3ba1598ea4f941, 
 995 |         0x675f586198cad5e3, 
 996 |         0xd3c06c64199ca906, 
 997 |         0x61617cc7f1012816, 
 998 |         0xefb2f069ef448e,
 999 |     ]);
1000 | 
1001 |     assert_eq!(a.invert().unwrap(), b);
1002 |     assert!(Fp::zero().invert().is_none().unwrap_u8() == 1);
1003 | }
1004 | 
1005 | #[test]
1006 | fn test_multiply() {
1007 |     let a = Fp([
1008 |         0x43b43a5078ac2076,
1009 |         0x1ce0763046f8962b,
1010 |         0x724a5276486d735c,
1011 |         0x6f05c2a6282d48fd,
1012 |         0x2095bd5bb4ca9331,
1013 |         0x3b35b3894b0f7da,
1014 |     ]);
1015 |     let b = Fp([
1016 |         0x46e62daa07fc3fba,
1017 |         0x7a3ba1598ea4f941, 
1018 |         0x675f586198cad5e3, 
1019 |         0xd3c06c64199ca906, 
1020 |         0x61617cc7f1012816, 
1021 |         0xefb2f069ef448e,
1022 |     ]);
1023 |     let c = Fp([
1024 |         0x46e62daa07fc3fba,
1025 |         0x7a3ba1598ea4f941, 
1026 |         0x675f586198cad5e3, 
1027 |         0xd3c06c64199ca906, 
1028 |         0x61617cc7f1012816, 
1029 |         0xefb2f069ef448e,
1030 |     ]);
1031 |     assert_eq!(a.mul(&b), a.mul_old(&b));
1032 |     assert_eq!(a.mul(&c), a.mul_old(&c));
1033 |     assert_eq!(b.mul(&c), b.mul_old(&c));
1034 | }
1035 | 
1036 | #[test]
1037 | fn test_lexicographic_largest() {
1038 |     assert!(!bool::from(Fp::zero().lexicographically_largest()));
1039 |     assert!(!bool::from(Fp::one().lexicographically_largest()));
1040 |     assert!(!bool::from(
1041 |         Fp::from_raw_unchecked([
1042 |             0xa1fafffffffe5557,
1043 |             0x995bfff976a3fffe,
1044 |             0x3f41d24d174ceb4,
1045 |             0xf6547998c1995dbd,
1046 |             0x778a468f507a6034,
1047 |             0x20559931f7f8103
1048 |         ])
1049 |         .lexicographically_largest()
1050 |     ));
1051 |     assert!(!bool::from(
1052 |         Fp::from_raw_unchecked([
1053 |             0x1804000000015554,
1054 |             0x855000053ab00001,
1055 |             0x633cb57c253c276f,
1056 |             0x6e22d1ec31ebb502,
1057 |             0xd3916126f2d14ca2,
1058 |             0x17fbb8571a006596
1059 |         ])
1060 |         .lexicographically_largest()
1061 |     ));
1062 |     assert!(bool::from(
1063 |         Fp::from_raw_unchecked([
1064 |             0x43f5fffffffcaaae,
1065 |             0x32b7fff2ed47fffd,
1066 |             0x7e83a49a2e99d69,
1067 |             0xeca8f3318332bb7a,
1068 |             0xef148d1ea0f4c069,
1069 |             0x40ab3263eff0206
1070 |         ])
1071 |         .lexicographically_largest()
1072 |     ));
1073 | }
1074 | 


--------------------------------------------------------------------------------
/bls12_377/src/fp2.rs:
--------------------------------------------------------------------------------
  1 | //! This module implements arithmetic over the quadratic extension field Fp2.
  2 | 
  3 | use core::fmt;
  4 | use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Not, Sub, SubAssign};
  5 | 
  6 | use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};
  7 | 
  8 | use crate::fp::{Fp};
  9 | use crate::util::LegendreSymbol;
 10 | 
 11 | 
 12 | /// beta = -5
 13 | #[inline(always)]
 14 | const fn nonresidue() -> Fp {
 15 |     Fp::from_raw_unchecked([
 16 |         0xfc0b8000000002fa,
 17 |         0x97d39cf6e000018b,
 18 |         0x2072420fbfa05044,
 19 |         0xcbbcbd50d97c3802,
 20 |         0xbaf1ec35813f9eb,
 21 |         0x9974a2c0945ad2,
 22 |     ])
 23 | }
 24 | 
 25 | #[derive(Copy, Clone)]
 26 | pub struct Fp2 {
 27 |     pub c0: Fp,
 28 |     pub c1: Fp,
 29 | }
 30 | 
 31 | impl fmt::Debug for Fp2 {
 32 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 33 |         write!(f, "{:?} + {:?}*u", self.c0, self.c1)
 34 |     }
 35 | }
 36 | 
 37 | impl Default for Fp2 {
 38 |     fn default() -> Self {
 39 |         Fp2::zero()
 40 |     }
 41 | }
 42 | 
 43 | impl From<Fp> for Fp2 {
 44 |     fn from(f: Fp) -> Fp2 {
 45 |         Fp2 {
 46 |             c0: f,
 47 |             c1: Fp::zero(),
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | impl ConstantTimeEq for Fp2 {
 53 |     #[inline]
 54 |     fn ct_eq(&self, other: &Self) -> Choice {
 55 |         self.c0.ct_eq(&other.c0) & self.c1.ct_eq(&other.c1)
 56 |     }
 57 | }
 58 | 
 59 | impl Eq for Fp2 {}
 60 | impl PartialEq for Fp2 {
 61 |     fn eq(&self, other: &Self) -> bool {
 62 |         self.ct_eq(other).unwrap_u8() == 1
 63 |     }
 64 | }
 65 | 
 66 | impl ConditionallySelectable for Fp2 {
 67 |     #[inline]
 68 |     fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self {
 69 |         Fp2 {
 70 |             c0: Fp::conditional_select(&a.c0, &b.c0, choice),
 71 |             c1: Fp::conditional_select(&a.c1, &b.c1, choice),
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | impl<'a> Neg for &'a Fp2 {
 77 |     type Output = Fp2;
 78 | 
 79 |     #[inline(always)]
 80 |     fn neg(self) -> Fp2 {
 81 |         self.neg()
 82 |     }
 83 | }
 84 | 
 85 | impl Neg for Fp2 {
 86 |     type Output = Fp2;
 87 | 
 88 |     #[inline(always)]
 89 |     fn neg(self) -> Fp2 {
 90 |         -&self
 91 |     }
 92 | }
 93 | 
 94 | impl<'a, 'b> Sub<&'b Fp2> for &'a Fp2 {
 95 |     type Output = Fp2;
 96 | 
 97 |     #[inline(always)]
 98 |     fn sub(self, rhs: &'b Fp2) -> Fp2 {
 99 |         self.sub(rhs)
100 |     }
101 | }
102 | 
103 | impl<'a, 'b> Add<&'b Fp2> for &'a Fp2 {
104 |     type Output = Fp2;
105 | 
106 |    #[inline(always)]
107 |     fn add(self, rhs: &'b Fp2) -> Fp2 {
108 |         self.add(rhs)
109 |     }
110 | }
111 | 
112 | impl<'a, 'b> Mul<&'b Fp2> for &'a Fp2 {
113 |     type Output = Fp2;
114 | 
115 |     #[inline(always)]
116 |     fn mul(self, rhs: &'b Fp2) -> Fp2 {
117 |         self.mul(rhs)
118 |     }
119 | }
120 | 
121 | impl_binops_additive!(Fp2, Fp2);
122 | impl_binops_multiplicative!(Fp2, Fp2);
123 | 
124 | impl Fp2 {
125 |     #[inline(always)]
126 |     pub const fn zero() -> Fp2 {
127 |         Fp2 {
128 |             c0: Fp::zero(),
129 |             c1: Fp::zero(),
130 |         }
131 |     }
132 | 
133 |     #[inline(always)]
134 |     pub const fn one() -> Fp2 {
135 |         Fp2 {
136 |             c0: Fp::one(),
137 |             c1: Fp::zero(),
138 |         }
139 |     }
140 | 
141 |     #[inline(always)]
142 |     pub fn is_zero(&self) -> Choice {
143 |         self.c0.is_zero() & self.c1.is_zero()
144 |     }
145 | 
146 |     /// Raises this element to p.
147 |     pub fn frobenius_map(&self) -> Self {
148 |         // This is always just a conjugation
149 |         self.conjugate()
150 |     }
151 | 
152 |     pub fn conjugate(&self) -> Self {
153 |         Fp2 {
154 |             c0: self.c0,
155 |             c1: -self.c1,
156 |         }
157 |     }
158 | 
159 |     pub fn mul_by_nonresidue(&self) -> Fp2 {
160 |         // Multiply a + bu by u + 1, getting
161 |         // au + a + bu^2 + bu
162 |         // and because u^2 = -1, we get
163 |         // (a - b) + (a + b)u
164 | 
165 |         Fp2 {
166 |             c0: self.c0 + (nonresidue() * self.c1),
167 |             c1: self.c0 + self.c1,
168 |         }
169 |     }
170 | 
171 |     /// Returns whether or not this element is strictly lexicographically
172 |     /// larger than its negation.
173 |     pub fn lexicographically_largest(&self) -> Choice {
174 |         // If this element's c1 coefficient is lexicographically largest
175 |         // then it is lexicographically largest. Otherwise, in the event
176 |         // the c1 coefficient is zero and the c0 coefficient is
177 |         // lexicographically largest, then this element is lexicographically
178 |         // largest.
179 | 
180 |         self.c1.lexicographically_largest()
181 |             | (self.c1.is_zero() & self.c0.lexicographically_largest())
182 |     }
183 | 
184 |     #[inline]
185 |     pub fn square(&self) -> Fp2 {
186 |         // Complex squaring:
187 |         //
188 |         // v0  = c0 * c1
189 |         // c0' = (c0 + c1) * (c0 + \beta*c1) - v0 - \beta * v0
190 |         // c1' = 2 * v0
191 |         //
192 |         let mut v0 = (&self.c0).sub(&self.c1);
193 |         let v3 = (&self.c0).sub(&(&self.c1).mul(&nonresidue()));
194 |         let v2 = (&self.c0).mul(&self.c1);
195 |         v0 = (&v0).mul(&v3);
196 |         v0 = (&v0).add(&v2);
197 | 
198 |         Fp2 {
199 |             c0: (&v0).add(&((&v2).mul(&nonresidue()))),
200 |             c1: (&v2).add(&v2),
201 |         }
202 |     }
203 | 
204 |     #[inline(always)]
205 |     pub fn mul(&self, rhs: &Fp2) -> Fp2 {
206 |         // Karatsuba multiplication:
207 |         //
208 |         // v0  = a0 * b0
209 |         // v1  = a1 * b1
210 |         // c0 = v0 + \beta * v1
211 |         // c1 = (a0 + a1) * (b0 + b1) - v0 - v1
212 | 
213 |         let v0 = (&self.c0).mul(&rhs.c0);
214 |         let v1 = (&self.c1).mul(&rhs.c1);
215 |         let c0 = (&v0).add(&(&nonresidue()).mul(&v1));
216 |         let mut c1 = (&(&self.c0).add(&self.c1)).mul(&(&rhs.c0).add(&rhs.c1));
217 |         c1 = (&c1).sub(&v0);
218 |         c1 = (&c1).sub(&v1);
219 | 
220 |         Fp2 { c0, c1 }
221 |     }
222 | 
223 |     #[inline(always)]
224 |     pub fn add(&self, rhs: &Fp2) -> Fp2 {
225 |         Fp2 {
226 |             c0: (&self.c0).add(&rhs.c0),
227 |             c1: (&self.c1).add(&rhs.c1),
228 |         }
229 |     }
230 | 
231 |     #[inline(always)]
232 |     pub fn sub(&self, rhs: &Fp2) -> Fp2 {
233 |         Fp2 {
234 |             c0: (&self.c0).sub(&rhs.c0),
235 |             c1: (&self.c1).sub(&rhs.c1),
236 |         }
237 |     }
238 | 
239 |     #[inline(always)]
240 |     pub fn neg(&self) -> Fp2 {
241 |         Fp2 {
242 |             c0: (&self.c0).neg(),
243 |             c1: (&self.c1).neg(),
244 |         }
245 |     }
246 | 
247 |     fn norm(&self) -> Fp {
248 |         let t0 = self.c0.square();
249 |         let mut t1 = self.c1.square();
250 |         t1 = -(&t1).mul(&nonresidue());
251 |         t1.add_assign(&t0);
252 |         t1
253 |     }
254 | 
255 |     fn legendre(&self) -> LegendreSymbol {
256 |         self.norm().legendre()
257 |     }
258 | 
259 | 
260 |     /// Algorithm 8, https://eprint.iacr.org/2012/685.pdf
261 |     // TODO: Investigate switching to algo 10
262 |     // TODO: Add sqrt test coverage
263 |     pub fn sqrt_vartime(&self) -> Option<Self> {
264 |         if self.c1 == Fp::zero() {
265 |             return self.c0.sqrt_vartime().map(|c0| Self { c0, c1: Fp::zero() } )
266 |         }
267 | 
268 |         match self.legendre() {
269 |             LegendreSymbol::Zero => Some(*self),
270 |             LegendreSymbol::QuadraticNonResidue => None,
271 |             LegendreSymbol::QuadraticResidue => {
272 |                let two_inv = Fp::one()
273 |                    .add(Fp::one())
274 |                    .invert()
275 |                    .unwrap();
276 |                let alpha = self
277 |                    .norm()
278 |                    .sqrt_vartime()
279 |                    .unwrap();
280 |                let mut delta = (alpha + self.c0) * two_inv;
281 |                if delta.legendre() == LegendreSymbol::QuadraticNonResidue {
282 |                    delta -= alpha;
283 |                }
284 |                let c0 = delta.sqrt_vartime().unwrap();
285 |                let c0_inv = c0.invert().unwrap();
286 |                Some(Self { c0: c0, c1: self.c1 * two_inv *c0_inv })
287 |             },
288 |         }
289 |     } 
290 | 
291 |     /// Computes the multiplicative inverse of this field
292 |     /// element, returning None in the case that this element
293 |     /// is zero.
294 |     #[inline(always)]
295 |     pub fn invert(&self) -> CtOption<Self> {
296 |         // We wish to find the multiplicative inverse of a nonzero
297 |         // element a + bu in Fp2. Algorithm 5.19
298 |         // from Guide to Pairing Based Cryptography
299 |         
300 |         let mut v0 = self.c0.square();
301 |         v0 = v0 - nonresidue() * self.c1.square();
302 |         v0 = v0.invert().unwrap_or(Fp::zero());
303 |         CtOption::new(Fp2 {
304 |             c0: self.c0 * v0,
305 |             c1: -(self.c1 * v0),
306 |         }, Choice::not(self.is_zero()))
307 |     }
308 | 
309 |     /// Although this is labeled "vartime", it is only
310 |     /// variable time with respect to the exponent. It
311 |     /// is also not exposed in the public API.
312 |     pub fn pow_vartime(&self, by: &[u64; 6]) -> Self {
313 |         let mut res = Self::one();
314 |         for e in by.iter().rev() {
315 |             for i in (0..64).rev() {
316 |                 res = res.square();
317 | 
318 |                 if ((*e >> i) & 1) == 1 {
319 |                     res *= self;
320 |                 }
321 |             }
322 |         }
323 |         res
324 |     }
325 | }
326 | 
327 | #[test]
328 | fn test_conditional_selection() {
329 |     let a = Fp2 {
330 |         c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]),
331 |         c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]),
332 |     };
333 |     let b = Fp2 {
334 |         c0: Fp::from_raw_unchecked([13, 14, 15, 16, 17, 18]),
335 |         c1: Fp::from_raw_unchecked([19, 20, 21, 22, 23, 24]),
336 |     };
337 | 
338 |     assert_eq!(
339 |         ConditionallySelectable::conditional_select(&a, &b, Choice::from(0u8)),
340 |         a
341 |     );
342 |     assert_eq!(
343 |         ConditionallySelectable::conditional_select(&a, &b, Choice::from(1u8)),
344 |         b
345 |     );
346 | }
347 | 
348 | #[test]
349 | fn test_norm() {
350 |     let a = Fp2 {
351 |         c0: Fp::from_raw_unchecked([
352 |             0x2beed14627d7f9e9,
353 |             0xb6617e06660e5dce,
354 |             0x6c4cc7c2f91d42c,
355 |             0x996dc8474b7a63cc,
356 |             0xebaebc4c820d574e,
357 |             0x18865e12d93fd845,
358 |         ]),
359 |         c1: Fp::from_raw_unchecked([
360 |             0x7d828664baf4f566,
361 |             0xd17e663996ec7339,
362 |             0x679ead55cb4078d0,
363 |             0xfe3b2260e001ec28,
364 |             0x305993d043d91b68,
365 |             0x626f03c0489b72d,
366 |         ]),
367 |     };
368 |     let b = Fp::from_raw_unchecked([
369 |         0xf8397a163b69bed0, 
370 |         0xf175823c7236735c, 
371 |         0x5569469835f84b92, 
372 |         0x714deebc8c061c3c, 
373 |         0x7adcc0994eb519c8, 
374 |         0x230d716ceafd4b,
375 |     ]);
376 |     assert_eq!(a.norm(), b);
377 | }
378 | 
379 | #[test]
380 | fn test_equality() {
381 |     fn is_equal(a: &Fp2, b: &Fp2) -> bool {
382 |         let eq = a == b;
383 |         let ct_eq = a.ct_eq(&b);
384 | 
385 |         assert_eq!(eq, ct_eq.unwrap_u8() == 1);
386 | 
387 |         eq
388 |     }
389 | 
390 |     assert!(is_equal(
391 |         &Fp2 {
392 |             c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]),
393 |             c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]),
394 |         },
395 |         &Fp2 {
396 |             c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]),
397 |             c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]),
398 |         }
399 |     ));
400 | 
401 |     assert!(!is_equal(
402 |         &Fp2 {
403 |             c0: Fp::from_raw_unchecked([2, 2, 3, 4, 5, 6]),
404 |             c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]),
405 |         },
406 |         &Fp2 {
407 |             c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]),
408 |             c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]),
409 |         }
410 |     ));
411 | 
412 |     assert!(!is_equal(
413 |         &Fp2 {
414 |             c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]),
415 |             c1: Fp::from_raw_unchecked([2, 8, 9, 10, 11, 12]),
416 |         },
417 |         &Fp2 {
418 |             c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]),
419 |             c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]),
420 |         }
421 |     ));
422 | }
423 | 
424 | #[test]
425 | fn test_squaring() {
426 |     let a = Fp2 {
427 |         c0: Fp::from_raw_unchecked([
428 |             0xc9a2183163ee70d4,
429 |             0xbc3770a7196b5c91,
430 |             0xa247f8c1304c5f44,
431 |             0xb01fc2a3726c80b5,
432 |             0xe1d293e5bbd919c9,
433 |             0x4b78e80020ef2ca,
434 |         ]),
435 |         c1: Fp::from_raw_unchecked([
436 |             0x952ea4460462618f,
437 |             0x238d5eddf025c62f,
438 |             0xf6c94b012ea92e72,
439 |             0x3ce24eac1c93808,
440 |             0x55950f945da483c,
441 |             0x10a768d0df4eabc,
442 |         ]),
443 |     };
444 |     let b = Fp2 {
445 |         c0: Fp::from_raw_unchecked([
446 |             0x9180cfbd5231eb92, 
447 |             0x80ba5cc15826ee06, 
448 |             0x6e4810398ff8110a, 
449 |             0x17b1565c3b5de972, 
450 |             0xadfa03c911c9f3d, 
451 |             0x45616e22b1a459,
452 |         ]),
453 |         c1: Fp::from_raw_unchecked([
454 |             0xde372dea33981b66, 
455 |             0x235f7eb8baf88c85, 
456 |             0x3837e2636f0d07bc, 
457 |             0xba39294a74709e4b, 
458 |             0x274cb0edb1fdd1e2, 
459 |             0x11abe141195cea6,
460 |         ]),
461 |     };
462 | 
463 |     assert_eq!(a.square(), b);
464 | }
465 | 
466 | #[test]
467 | fn test_multiplication() {
468 |     let a = Fp2 {
469 |         c0: Fp::from_raw_unchecked([
470 |             0xc9a2183163ee70d4,
471 |             0xbc3770a7196b5c91,
472 |             0xa247f8c1304c5f44,
473 |             0xb01fc2a3726c80b5,
474 |             0xe1d293e5bbd919c9,
475 |             0x4b78e80020ef2ca,
476 |         ]),
477 |         c1: Fp::from_raw_unchecked([
478 |             0x952ea4460462618f,
479 |             0x238d5eddf025c62f,
480 |             0xf6c94b012ea92e72,
481 |             0x3ce24eac1c93808,
482 |             0x55950f945da483c,
483 |             0x10a768d0df4eabc,
484 |         ]),
485 |     };
486 |     let b = Fp2 {
487 |         c0: Fp::from_raw_unchecked([
488 |             0xa1e09175a4d2c1fe,
489 |             0x8b33acfc204eff12,
490 |             0xe24415a11b456e42,
491 |             0x61d996b1b6ee1936,
492 |             0x1164dbe8667c853c,
493 |             0x788557acc7d9c79,
494 |         ]),
495 |         c1: Fp::from_raw_unchecked([
496 |             0xda6a87cc6f48fa36,
497 |             0xfc7b488277c1903,
498 |             0x9445ac4adc448187,
499 |             0x2616d5bc9099209,
500 |             0xdbed46772db58d48,
501 |             0x11b94d5076c7b7b1,
502 |         ]),
503 |     };
504 |     let c = Fp2 {
505 |         c0: Fp::from_raw_unchecked([
506 |             0xa2332499367dd291, 
507 |             0x41882f1e421e6c04, 
508 |             0xbc6a01cea4131ffb, 
509 |             0xd5ccc0ffed5730d8, 
510 |             0x28c08d93d3196725, 
511 |             0x113a0b1f3ec936b,
512 |         ]),
513 |         c1: Fp::from_raw_unchecked([
514 |             0xc00e498bee3a3b12, 
515 |             0x3ac6975d105a3631, 
516 |             0x99d635ebdedee2ca, 
517 |             0xbc815bde58a6ecc8, 
518 |             0x26382035f22c7652, 
519 |             0x54f5a96fa8aef8,
520 |         ]),
521 |     };
522 | 
523 |     assert_eq!(a * b, c);
524 | }
525 | 
526 | #[test]
527 | fn test_addition() {
528 |     let a = Fp2 {
529 |         c0: Fp::from_raw_unchecked([
530 |             0xc9a2183163ee70d4,
531 |             0xbc3770a7196b5c91,
532 |             0xa247f8c1304c5f44,
533 |             0xb01fc2a3726c80b5,
534 |             0xe1d293e5bbd919c9,
535 |             0x4b78e80020ef2ca,
536 |         ]),
537 |         c1: Fp::from_raw_unchecked([
538 |             0x952ea4460462618f,
539 |             0x238d5eddf025c62f,
540 |             0xf6c94b012ea92e72,
541 |             0x3ce24eac1c93808,
542 |             0x55950f945da483c,
543 |             0x10a768d0df4eabc,
544 |         ]),
545 |     };
546 |     let b = Fp2 {
547 |         c0: Fp::from_raw_unchecked([
548 |             0xa1e09175a4d2c1fe,
549 |             0x8b33acfc204eff12,
550 |             0xe24415a11b456e42,
551 |             0x61d996b1b6ee1936,
552 |             0x1164dbe8667c853c,
553 |             0x788557acc7d9c79,
554 |         ]),
555 |         c1: Fp::from_raw_unchecked([
556 |             0xda6a87cc6f48fa36,
557 |             0xfc7b488277c1903,
558 |             0x9445ac4adc448187,
559 |             0x2616d5bc9099209,
560 |             0xdbed46772db58d48,
561 |             0x11b94d5076c7b7b1,
562 |         ]),
563 |     };
564 |     let c = Fp2 {
565 |         c0: Fp::from_raw_unchecked([
566 |             0xe679e9a708c132d1, 
567 |             0x305fc05f09ba5ba3, 
568 |             0x6598ac3291888587, 
569 |             0xf7d67f622865865d, 
570 |             0x2cfc6a0db5b455ca, 
571 |             0xa91a9b4b6c77e59,
572 |         ]),
573 |         c1: Fp::from_raw_unchecked([
574 |             0xea906c1273ab5bc4, 
575 |             0x1c49b621e7a1df32, 
576 |             0x6c1b951c50e467f9, 
577 |             0xec0cb85389ddb683, 
578 |             0x1b0b91b006ee8c48, 
579 |             0x111589976cf79183,
580 |         ]),
581 |     };
582 | 
583 |     assert_eq!(a + b, c);
584 | }
585 | 
586 | #[test]
587 | fn test_subtraction() {
588 |     let a = Fp2 {
589 |         c0: Fp::from_raw_unchecked([
590 |             0xc9a2183163ee70d4,
591 |             0xbc3770a7196b5c91,
592 |             0xa247f8c1304c5f44,
593 |             0xb01fc2a3726c80b5,
594 |             0xe1d293e5bbd919c9,
595 |             0x4b78e80020ef2ca,
596 |         ]),
597 |         c1: Fp::from_raw_unchecked([
598 |             0x952ea4460462618f,
599 |             0x238d5eddf025c62f,
600 |             0xf6c94b012ea92e72,
601 |             0x3ce24eac1c93808,
602 |             0x55950f945da483c,
603 |             0x10a768d0df4eabc,
604 |         ]),
605 |     };
606 |     let b = Fp2 {
607 |         c0: Fp::from_raw_unchecked([
608 |             0xa1e09175a4d2c1fe,
609 |             0x8b33acfc204eff12,
610 |             0xe24415a11b456e42,
611 |             0x61d996b1b6ee1936,
612 |             0x1164dbe8667c853c,
613 |             0x788557acc7d9c79,
614 |         ]),
615 |         c1: Fp::from_raw_unchecked([
616 |             0xda6a87cc6f48fa36,
617 |             0xfc7b488277c1903,
618 |             0x9445ac4adc448187,
619 |             0x2616d5bc9099209,
620 |             0xdbed46772db58d48,
621 |             0x11b94d5076c7b7b1,
622 |         ]),
623 |     };
624 |     let c = Fp2 {
625 |         c0: Fp::from_raw_unchecked([
626 |             0xd83e794440e4512a, 
627 |             0xcefc3c5506e3a280, 
628 |             0x3ffc1cdfeaf90efd, 
629 |             0xb1b9d40e44819881, 
630 |             0x2f924802aaa36b72, 
631 |             0x2d0c6faca6ea9ae,
632 |         ]),
633 |         c1: Fp::from_raw_unchecked([
634 |             0x453be3866ae698a7, 
635 |             0xec3a55aa375652d4, 
636 |             0x9d7c6149ad9b5314, 
637 |             0xfe93487107405a00, 
638 |             0xd693f57de7db450b, 
639 |             0x10aed6c368d2ccf5,
640 |         ]),
641 |     };
642 | 
643 |     assert_eq!(b - a, c);
644 | }
645 | 
646 | #[test]
647 | fn test_negation() {
648 |     let a = Fp2 {
649 |         c0: Fp::from_raw_unchecked([
650 |             0xc9a2183163ee70d4,
651 |             0xbc3770a7196b5c91,
652 |             0xa247f8c1304c5f44,
653 |             0xb01fc2a3726c80b5,
654 |             0xe1d293e5bbd919c9,
655 |             0x4b78e80020ef2ca,
656 |         ]),
657 |         c1: Fp::from_raw_unchecked([
658 |             0x952ea4460462618f,
659 |             0x238d5eddf025c62f,
660 |             0xf6c94b012ea92e72,
661 |             0x3ce24eac1c93808,
662 |             0x55950f945da483c,
663 |             0x10a768d0df4eabc,
664 |         ]),
665 |     };
666 |     let b = Fp2 {
667 |         c0: Fp::from_raw_unchecked([
668 |             0xbb66a7ce9c118f2d, 
669 |             0x5ad3ec9d1694a36e, 
670 |             0x7cab696e89bce8bb, 
671 |             0x6a03174f8e8892d9, 
672 |             0xe46871dab0c82f71, 
673 |             0xfcf6abc615b61e1f,
674 |         ]),
675 |         c1: Fp::from_raw_unchecked([
676 |             0xefda1bb9fb9d9e72, 
677 |             0xf37dfe663fda39d0, 
678 |             0x282a172e8b60198d, 
679 |             0x1654b5083f2bdb86, 
680 |             0xc0e1b4c726c700ff, 
681 |             0xa3c3b909d0262e,
682 |         ]),
683 |     };
684 | 
685 |     assert_eq!(-a, b);
686 | }
687 | 
688 | #[test]
689 | fn test_inversion() {
690 |     let a = Fp2 {
691 |         c0: Fp::from_raw_unchecked([
692 |             0x1128ecad67549455,
693 |             0x9e7a1cff3a4ea1a8,
694 |             0xeb208d51e08bcf27,
695 |             0xe98ad40811f5fc2b,
696 |             0x736c3a59232d511d,
697 |             0x10acd42d29cfcbb6,
698 |         ]),
699 |         c1: Fp::from_raw_unchecked([
700 |             0xd328e37cc2f58d41,
701 |             0x948df0858a605869,
702 |             0x6032f9d56f93a573,
703 |             0x2be483ef3fffdc87,
704 |             0x30ef61f88f483c2a,
705 |             0x1333f55a35725be0,
706 |         ]),
707 |     };
708 | 
709 |     let b = Fp2 {
710 |         c0: Fp::from_raw_unchecked([
711 |             0xa972fe45912ab0b0, 
712 |             0x2fad422c707d2a7a, 
713 |             0x1e0c99ca54b14292, 
714 |             0x12b35bad27bfbb4b, 
715 |             0xaac12849e9ca08be, 
716 |             0x9ca440f7d792c1,
717 |         ]),
718 |         c1: Fp::from_raw_unchecked([
719 |             0x93f803dee0c6aee, 
720 |             0x85be5ff1bf7a8b20, 
721 |             0x9343d05ec64f00b6, 
722 |             0x91a1db9f810ce2ac, 
723 |             0xc7a4b33169335bd, 
724 |             0xa9202f9769f137,
725 |         ]),
726 |     };
727 | 
728 |     assert_eq!(a.invert().unwrap(), b);
729 | 
730 |     assert!(Fp2::zero().invert().is_none().unwrap_u8() == 1);
731 | }
732 | 
733 | #[test]
734 | fn test_lexicographic_largest() {
735 |     assert!(!bool::from(Fp2::zero().lexicographically_largest()));
736 |     assert!(!bool::from(Fp2::one().lexicographically_largest()));
737 |     assert!(bool::from(
738 |         Fp2 {
739 |             c0: Fp::from_raw_unchecked([
740 |                 0x1128ecad67549455,
741 |                 0x9e7a1cff3a4ea1a8,
742 |                 0xeb208d51e08bcf27,
743 |                 0xe98ad40811f5fc2b,
744 |                 0x736c3a59232d511d,
745 |                 0x10acd42d29cfcbb6,
746 |             ]),
747 |             c1: Fp::from_raw_unchecked([
748 |                 0xd328e37cc2f58d41,
749 |                 0x948df0858a605869,
750 |                 0x6032f9d56f93a573,
751 |                 0x2be483ef3fffdc87,
752 |                 0x30ef61f88f483c2a,
753 |                 0x1333f55a35725be0,
754 |             ]),
755 |         }
756 |         .lexicographically_largest()
757 |     ));
758 |     assert!(!bool::from(
759 |         Fp2 {
760 |             c0: -Fp::from_raw_unchecked([
761 |                 0x1128ecad67549455,
762 |                 0x9e7a1cff3a4ea1a8,
763 |                 0xeb208d51e08bcf27,
764 |                 0xe98ad40811f5fc2b,
765 |                 0x736c3a59232d511d,
766 |                 0x10acd42d29cfcbb6,
767 |             ]),
768 |             c1: -Fp::from_raw_unchecked([
769 |                 0xd328e37cc2f58d41,
770 |                 0x948df0858a605869,
771 |                 0x6032f9d56f93a573,
772 |                 0x2be483ef3fffdc87,
773 |                 0x30ef61f88f483c2a,
774 |                 0x1333f55a35725be0,
775 |             ]),
776 |         }
777 |         .lexicographically_largest()
778 |     ));
779 |     assert!(!bool::from(
780 |         Fp2 {
781 |             c0: Fp::from_raw_unchecked([
782 |                 0x1128ecad67549455,
783 |                 0x9e7a1cff3a4ea1a8,
784 |                 0xeb208d51e08bcf27,
785 |                 0xe98ad40811f5fc2b,
786 |                 0x736c3a59232d511d,
787 |                 0x10acd42d29cfcbb6,
788 |             ]),
789 |             c1: Fp::zero(),
790 |         }
791 |         .lexicographically_largest()
792 |     ));
793 |     assert!(bool::from(
794 |         Fp2 {
795 |             c0: -Fp::from_raw_unchecked([
796 |                 0x1128ecad67549455,
797 |                 0x9e7a1cff3a4ea1a8,
798 |                 0xeb208d51e08bcf27,
799 |                 0xe98ad40811f5fc2b,
800 |                 0x736c3a59232d511d,
801 |                 0x10acd42d29cfcbb6,
802 |             ]),
803 |             c1: Fp::zero(),
804 |         }
805 |         .lexicographically_largest()
806 |     ));
807 | }
808 | 


--------------------------------------------------------------------------------
/bls12_377/src/fp_asm.S:
--------------------------------------------------------------------------------
   1 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
   2 | @
   3 | @ Low-level operations on Fp values
   4 | @
   5 | @ Each Fp value is stored as a word-aligned 12-word array
   6 | @
   7 | @ All functions work correctly with repeated arguments,
   8 | @ like e.g. fp_sum(x, x, x)
   9 | @
  10 | @ All functions should take constant time on ARM SC300
  11 | @
  12 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  13 | 
  14 | .text
  15 | 
  16 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  17 | @
  18 | @ fp_is_zero
  19 | @
  20 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  21 | 
  22 | .align 3
  23 | .global fp_is_zero
  24 | .syntax unified
  25 | .thumb
  26 | .thumb_func
  27 | .type fp_is_zero,	%function
  28 | 
  29 | fp_is_zero:
  30 | 	ldr	r1, [r0,  #0]
  31 | 
  32 | 	ldr	r2, [r0,  #4];	ldr	r3, [r0,  #8];	orr	r1, r2;	orr	r1, r3
  33 | 	ldr	r2, [r0, #12];	ldr	r3, [r0, #16];	orr	r1, r2;	orr	r1, r3
  34 | 	ldr	r2, [r0, #20];	ldr	r3, [r0, #24];	orr	r1, r2;	orr	r1, r3
  35 | 	ldr	r2, [r0, #28];	ldr	r3, [r0, #32];	orr	r1, r2;	orr	r1, r3
  36 | 	ldr	r2, [r0, #36];	ldr	r3, [r0, #40];	orr	r1, r2;	orr	r1, r3
  37 | 
  38 | 	ldr	r2, [r0, #44];	orrs	r1, r2
  39 | 
  40 | 	ite	eq
  41 | 	moveq	r0, #1
  42 | 	movne	r0, #0
  43 | 
  44 | 	bx	lr
  45 | 
  46 | .size fp_is_zero, . - fp_is_zero
  47 | 
  48 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  49 | @
  50 | @ fp_cpy: Copy
  51 | @
  52 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  53 | 
  54 | .align 3
  55 | .global fp_cpy
  56 | .syntax unified
  57 | .thumb
  58 | .thumb_func
  59 | .type fp_cpy,	%function
  60 | 
  61 | fp_cpy:
  62 | 	ldr	r2, [r1,  #0]; ldr	r3, [r1,  #4]
  63 | 	str	r2, [r0,  #0]; str	r3, [r0,  #4]
  64 | 
  65 | 	ldr	r2, [r1,  #8]; ldr	r3, [r1, #12]
  66 | 	str	r2, [r0,  #8]; str	r3, [r0, #12]
  67 | 
  68 | 	ldr	r2, [r1, #16]; ldr	r3, [r1, #20]
  69 | 	str	r2, [r0, #16]; str	r3, [r0, #20]
  70 | 
  71 | 	ldr	r2, [r1, #24]; ldr	r3, [r1, #28]
  72 | 	str	r2, [r0, #24]; str	r3, [r0, #28]
  73 | 
  74 | 	ldr	r2, [r1, #32]; ldr	r3, [r1, #36]
  75 | 	str	r2, [r0, #32]; str	r3, [r0, #36]
  76 | 
  77 | 	ldr	r2, [r1, #40]; ldr	r3, [r1, #44]
  78 | 	str	r2, [r0, #40]; str	r3, [r0, #44]
  79 | 
  80 | 	bx	lr
  81 | 
  82 | .size fp_cpy, . - fp_cpy
  83 | 
  84 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  85 | @
  86 | @ fp_eq: Check two Fp values for equality
  87 | @
  88 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  89 | 
  90 | .align 3
  91 | .global fp_eq
  92 | .syntax unified
  93 | .thumb
  94 | .thumb_func
  95 | .type fp_eq,	%function
  96 | 
  97 | fp_eq:
  98 | 	sub	sp, #4
  99 | 
 100 | 	ldr	r2, [r0,  #0]
 101 | 	ldr	r3, [r1,  #0]
 102 | 	str	r4, [sp]
 103 | 	sub	r2, r3
 104 | 
 105 | 	ldr	r3, [r0,  #4];	ldr	r4, [r1,  #4];	sub	r3, r4;	orr	r2, r3 
 106 | 	ldr	r3, [r0,  #8];	ldr	r4, [r1,  #8];	sub	r3, r4;	orr	r2, r3 
 107 | 	ldr	r3, [r0, #12];	ldr	r4, [r1, #12];	sub	r3, r4;	orr	r2, r3 
 108 | 	ldr	r3, [r0, #16];	ldr	r4, [r1, #16];	sub	r3, r4;	orr	r2, r3 
 109 | 	ldr	r3, [r0, #20];	ldr	r4, [r1, #20];	sub	r3, r4;	orr	r2, r3 
 110 | 	ldr	r3, [r0, #24];	ldr	r4, [r1, #24];	sub	r3, r4;	orr	r2, r3 
 111 | 	ldr	r3, [r0, #28];	ldr	r4, [r1, #28];	sub	r3, r4;	orr	r2, r3 
 112 | 	ldr	r3, [r0, #32];	ldr	r4, [r1, #32];	sub	r3, r4;	orr	r2, r3 
 113 | 	ldr	r3, [r0, #36];	ldr	r4, [r1, #36];	sub	r3, r4;	orr	r2, r3 
 114 | 	ldr	r3, [r0, #40];	ldr	r4, [r1, #40];	sub	r3, r4;	orr	r2, r3 
 115 | 
 116 | 	ldr	r4, [sp]
 117 | 
 118 | 	ldr	r0, [r0, #44];	ldr	r1, [r1, #44];	sub	r0, r1;	orrs	r0, r2 
 119 | 
 120 | 	ite	eq
 121 | 	moveq	r0, #1
 122 | 	movne	r0, #0
 123 | 
 124 | 	add	sp, #4
 125 | 
 126 | 	bx	lr
 127 | 
 128 | .size fp_eq, . - fp_eq
 129 | 
 130 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 131 | @
 132 | @ fp_neg: Negate an Fp value
 133 | @
 134 | @ x = (y != 0) ? (p - y) : 0
 135 | @
 136 | @  x  = (y == 0) ? p : y
 137 | @  x  = -x
 138 | @  x += p
 139 | @
 140 | @  Note: -x == ~x + 1
 141 | @
 142 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 143 | 
 144 | .align 3
 145 | .global fp_neg
 146 | .syntax unified
 147 | .thumb
 148 | .thumb_func
 149 | .type fp_neg,	%function
 150 | 
 151 | fp_neg:
 152 | 	push	{ r4-r11, lr }
 153 | 	ldm	r1, { r1-r12 }
 154 | 
 155 | 	@ lr = (y == 0)
 156 | 
 157 | 	orr	lr, r1, r2
 158 | 	orr	lr, r3
 159 | 	orr	lr, r4
 160 | 	orr	lr, r5
 161 | 	orr	lr, r6
 162 | 	orr	lr, r7
 163 | 	orr	lr, r8
 164 | 	orr	lr, r9
 165 | 	orr	lr, r10
 166 | 	orr	lr, r11
 167 | 	orrs	lr, r12	@ Z = (lr == 0) ? 1 : 0
 168 | 
 169 | 	@ x = Z ? p : y
 170 | 
 171 | 	itttt	eq
 172 | 	moveq	r1, #0x00000001
 173 | 	moveq	r2, #0xc000
 174 | 	movteq	r2, #0x8508
 175 | 	moveq	r3, #0x30000000
 176 | 
 177 | 	itttt	eq
 178 | 	moveq	r4, #0x5D44
 179 | 	movteq	r4, #0x170B
 180 | 	moveq	r5, #0x4800
 181 | 	movteq	r5, #0xBA09
 182 | 
 183 | 	itttt	eq
 184 | 	moveq	r6, #0x622F
 185 | 	movteq	r6, #0x1EF3
 186 | 	moveq	r7, #0x138F
 187 | 	movteq	r7, #0x00F5
 188 | 
 189 | 	itttt	eq
 190 | 	moveq	r8, #0xD9F3
 191 | 	movteq	r8, #0x1A22
 192 | 	moveq	r9, #0x493B
 193 | 	movteq	r9, #0x6CA1
 194 | 
 195 | 	itttt	eq
 196 | 	moveq	r10, #0x05C0
 197 | 	movteq	r10, #0xC63B
 198 | 	moveq	r11, #0x10EA
 199 | 	movteq	r11, #0x17C5
 200 | 
 201 | 	itt	eq
 202 | 	moveq	r12, #0x3A46
 203 | 	movteq	r12, #0x01AE
 204 | 
 205 | 	@ x = ~x
 206 | 
 207 | 	mvn	 r1,  r1
 208 | 	mvn	 r2,  r2
 209 | 	mvn	 r3,  r3
 210 | 	mvn	 r4,  r4
 211 | 	mvn	 r5,  r5
 212 | 	mvn	 r6,  r6
 213 | 	mvn	 r7,  r7
 214 | 	mvn	 r8,  r8
 215 | 	mvn	 r9,  r9
 216 | 	mvn	r10, r10
 217 | 	mvn	r11, r11
 218 | 	mvn	r12, r12
 219 | 
 220 | 	@ x += 1
 221 | 
 222 | 	adds	 r1, #1
 223 | 	adcs	 r2, #0
 224 | 	adcs	 r3, #0
 225 | 	adcs	 r4, #0
 226 | 	adcs	 r5, #0
 227 | 	adcs	 r6, #0
 228 | 	adcs	 r7, #0
 229 | 	adcs	 r8, #0
 230 | 	adcs	 r9, #0
 231 | 	adcs	r10, #0
 232 | 	adcs	r11, #0
 233 | 	adc	r12, #0
 234 | 
 235 | 	@ x += p
 236 | 
 237 | 	adds	r1, #0x00000001
 238 | 
 239 | 	mov	lr, #0xc000;	movt	lr, #0x8508;	adcs	 r2, lr
 240 | 
 241 | 	adcs	r3, #0x30000000
 242 | 
 243 | 	mov	lr, #0x5D44;	movt	lr, #0x170B;	adcs	 r4, lr
 244 | 	mov	lr, #0x4800;	movt	lr, #0xBA09;	adcs	 r5, lr
 245 | 	mov	lr, #0x622F;	movt	lr, #0x1EF3;	adcs	 r6, lr
 246 | 	mov	lr, #0x138F;	movt	lr, #0x00F5;	adcs	 r7, lr
 247 | 	mov	lr, #0xD9F3;	movt	lr, #0x1A22;	adcs	 r8, lr
 248 | 	mov	lr, #0x493B;	movt	lr, #0x6CA1;	adcs	 r9, lr
 249 | 	mov	lr, #0x05C0;	movt	lr, #0xC63B;	adcs	r10, lr
 250 | 	mov	lr, #0x10EA;	movt	lr, #0x17C5;	adcs	r11, lr
 251 | 	mov	lr, #0x3A46;	movt	lr, #0x01AE;	adcs	r12, lr
 252 | 
 253 | 	stm	r0, { r1-r12 }
 254 | 	pop	    { r4-r11, pc }
 255 | 
 256 | .size fp_neg, . - fp_neg
 257 | 
 258 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 259 | @
 260 | @ fp_diff: x = y - z (mod p)
 261 | @
 262 | @  x = y - z
 263 | @  store x
 264 | @
 265 | @  C = (x < 0); x += p
 266 | @  if (C)	/* carry, because x was < 0 */
 267 | @    store x
 268 | @
 269 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 270 | 
 271 | .align 3
 272 | .global fp_diff
 273 | .syntax unified
 274 | .thumb
 275 | .thumb_func
 276 | .type fp_diff,	%function
 277 | 
 278 | fp_diff:
 279 | 	push	{ r4-r11, lr }
 280 | 
 281 | 	@ x = y - z
 282 | 
 283 | 	ldr	r14, [r1,  #0]
 284 | 	ldr	r12, [r1,  #4]
 285 | 	ldr	r11, [r1,  #8]
 286 | 	ldr	r10, [r1, #12]
 287 | 	ldr	 r9, [r1, #16]
 288 | 
 289 | 	ldr	 r8, [r2,  #0]
 290 | 	ldr	 r7, [r2,  #4]
 291 | 	ldr	 r6, [r2,  #8]
 292 | 	ldr	 r5, [r2, #12]
 293 | 	ldr	 r4, [r2, #16]
 294 | 
 295 | 	subs	r14,  r8
 296 | 	sbcs	r12,  r7
 297 | 	sbcs	r11,  r6
 298 | 	sbcs	r10,  r5
 299 | 	sbcs	 r9,  r4
 300 | 
 301 | 	ldr	 r8, [r1, #20]
 302 | 	ldr	 r7, [r1, #24]
 303 | 	ldr	 r6, [r1, #28]
 304 | 
 305 | 	ldr	 r5, [r2, #20]
 306 | 	ldr	 r4, [r2, #24]
 307 | 	ldr	 r3, [r2, #28]
 308 | 
 309 | 	str	r14, [r0,  #0]	@ store early (0 cycles)
 310 | 
 311 | 	sbcs	 r8,  r5
 312 | 	sbcs	 r7,  r4
 313 | 	sbcs	 r6,  r3
 314 | 
 315 | 	ldr	 r5, [r1, #32]
 316 | 	ldr	 r4, [r1, #36]
 317 | 
 318 | 	ldr	 r3, [r2, #32]
 319 | 	ldr	r14, [r2, #36]	@ use r14 as temporary
 320 | 
 321 | 	str	r12, [r0,  #4]	@ store early (0 cycles)
 322 | 
 323 | 	sbcs	 r5,  r3
 324 | 	sbcs	 r4, r14
 325 | 
 326 | 	ldr	 r3, [r1, #40]
 327 | 	ldr	 r1, [r1, #44]
 328 | 
 329 | 	ldr	r14, [r2, #40]
 330 | 	ldr	 r2, [r2, #44]
 331 | 
 332 | 	str	r11, [r0,  #8]	@ store early (0 cycles)
 333 | 
 334 | 	sbcs	 r3, r14
 335 | 	sbcs	 r2,  r1,  r2
 336 | 
 337 | 	ldr	r14, [r0,  #0]	@ restore r14
 338 | 
 339 | 	str	r10, [r0, #12]
 340 | 	str	 r9, [r0, #16]
 341 | 	str	 r8, [r0, #20]
 342 | 	str	 r7, [r0, #24]
 343 | 	str	 r6, [r0, #28]
 344 | 	str	 r5, [r0, #32]
 345 | 	str	 r4, [r0, #36]
 346 | 	str	 r3, [r0, #40]
 347 | 	str	 r2, [r0, #44]
 348 | 
 349 | 	@ x += p, using r1 as temporary, x in { r14, r12-r2 }
 350 | 
 351 | 	adds	r14, #0x00000001
 352 | 
 353 | 	mov	r1, #0xc000;	movt	r1, #0x8508;	adcs	r12, r1
 354 | 
 355 | 	adcs	r11, #0x30000000
 356 | 
 357 | 	mov	r1, #0x5D44;	movt	r1, #0x170B;	adcs	r10, r1
 358 | 	mov	r1, #0x4800;	movt	r1, #0xBA09;	adcs	 r9, r1
 359 | 	mov	r1, #0x622F;	movt	r1, #0x1EF3;	adcs	 r8, r1
 360 | 	mov	r1, #0x138F;	movt	r1, #0x00F5;	adcs	 r7, r1
 361 | 	mov	r1, #0xD9F3;	movt	r1, #0x1A22;	adcs	 r6, r1
 362 | 	mov	r1, #0x493B;	movt	r1, #0x6CA1;	adcs	 r5, r1
 363 | 	mov	r1, #0x05C0;	movt	r1, #0xC63B;	adcs	 r4, r1
 364 | 	mov	r1, #0x10EA;	movt	r1, #0x17C5;	adcs	 r3, r1
 365 | 	mov	r1, #0x3A46;	movt	r1, #0x01AE;	adcs	 r2, r1
 366 | 
 367 | 	itttt	cs	@ carry set => x was negative => we need to store x
 368 | 	strcs	r14, [r0,  #0]
 369 | 	strcs	r12, [r0,  #4]
 370 | 	strcs	r11, [r0,  #8]
 371 | 	strcs	r10, [r0, #12]
 372 | 	itttt	cs
 373 | 	strcs	 r9, [r0, #16]
 374 | 	strcs	 r8, [r0, #20]
 375 | 	strcs	 r7, [r0, #24]
 376 | 	strcs	 r6, [r0, #28]
 377 | 	itttt	cs
 378 | 	strcs	 r5, [r0, #32]
 379 | 	strcs	 r4, [r0, #36]
 380 | 	strcs	 r3, [r0, #40]
 381 | 	strcs	 r2, [r0, #44]
 382 | 
 383 | 	pop	{ r4-r11, pc }
 384 | 
 385 | .size fp_diff, . - fp_diff
 386 | 
 387 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 388 | @
 389 | @ fp_sum: x = y + z (mod p)
 390 | @
 391 | @  x = y + z
 392 | @  store x
 393 | @
 394 | @  C = (x >= p); x -= p
 395 | @  if (C)
 396 | @    store x
 397 | @
 398 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 399 | 
 400 | .align 3
 401 | .global fp_sum
 402 | .syntax unified
 403 | .thumb
 404 | .thumb_func
 405 | .type fp_sum,	%function
 406 | 
 407 | fp_sum:
 408 | 	push	    { r4-r11, lr }
 409 | 
 410 | 	@ x = y + z
 411 | 
 412 | 	ldr	r14, [r1,  #0]
 413 | 	ldr	r12, [r1,  #4]
 414 | 	ldr	r11, [r1,  #8]
 415 | 	ldr	r10, [r1, #12]
 416 | 	ldr	 r9, [r1, #16]
 417 | 
 418 | 	ldr	 r8, [r2,  #0]
 419 | 	ldr	 r7, [r2,  #4]
 420 | 	ldr	 r6, [r2,  #8]
 421 | 	ldr	 r5, [r2, #12]
 422 | 	ldr	 r4, [r2, #16]
 423 | 
 424 | 	adds	r14,  r8
 425 | 	adcs	r12,  r7
 426 | 	adcs	r11,  r6
 427 | 	adcs	r10,  r5
 428 | 	adcs	 r9,  r4
 429 | 
 430 | 	ldr	 r8, [r1, #20]
 431 | 	ldr	 r7, [r1, #24]
 432 | 	ldr	 r6, [r1, #28]
 433 | 
 434 | 	ldr	 r5, [r2, #20]
 435 | 	ldr	 r4, [r2, #24]
 436 | 	ldr	 r3, [r2, #28]
 437 | 
 438 | 	str	r14, [r0,  #0]	@ store early (0 cycles)
 439 | 
 440 | 	adcs	 r8,  r5
 441 | 	adcs	 r7,  r4
 442 | 	adcs	 r6,  r3
 443 | 
 444 | 	ldr	 r5, [r1, #32]
 445 | 	ldr	 r4, [r1, #36]
 446 | 
 447 | 	ldr	 r3, [r2, #32]
 448 | 	ldr	r14, [r2, #36]	@ use r14 as temporary
 449 | 
 450 | 	str	r12, [r0,  #4]	@ store early (0 cycles)
 451 | 
 452 | 	adcs	 r5,  r3
 453 | 	adcs	 r4, r14
 454 | 
 455 | 	ldr	 r3, [r1, #40]
 456 | 	ldr	 r1, [r1, #44]
 457 | 
 458 | 	ldr	r14, [r2, #40]
 459 | 	ldr	 r2, [r2, #44]
 460 | 
 461 | 	str	r11, [r0,  #8]	@ store early (0 cycles)
 462 | 
 463 | 	adcs	 r3, r14
 464 | 	adcs	 r2,  r1,  r2
 465 | 
 466 | 	ldr	r14, [r0,  #0]	@ restore r14
 467 | 
 468 | 	str	r10, [r0, #12]
 469 | 	str	 r9, [r0, #16]
 470 | 	str	 r8, [r0, #20]
 471 | 	str	 r7, [r0, #24]
 472 | 	str	 r6, [r0, #28]
 473 | 	str	 r5, [r0, #32]
 474 | 	str	 r4, [r0, #36]
 475 | 	str	 r3, [r0, #40]
 476 | 	str	 r2, [r0, #44]
 477 | 
 478 | 	@ x -= p, using r1 as temporary, x in { r14, r12-r2 }
 479 | 
 480 | 	subs	r14, #0x00000001
 481 | 
 482 | 	mov	r1, #0xc000;	movt	r1, #0x8508;	sbcs	r12, r1
 483 | 
 484 | 	sbcs	r11, #0x30000000
 485 | 
 486 | 	mov	r1, #0x5D44;	movt	r1, #0x170B;	sbcs	r10, r1
 487 | 	mov	r1, #0x4800;	movt	r1, #0xBA09;	sbcs	 r9, r1
 488 | 	mov	r1, #0x622F;	movt	r1, #0x1EF3;	sbcs	 r8, r1
 489 | 	mov	r1, #0x138F;	movt	r1, #0x00F5;	sbcs	 r7, r1
 490 | 	mov	r1, #0xD9F3;	movt	r1, #0x1A22;	sbcs	 r6, r1
 491 | 	mov	r1, #0x493B;	movt	r1, #0x6CA1;	sbcs	 r5, r1
 492 | 	mov	r1, #0x05C0;	movt	r1, #0xC63B;	sbcs	 r4, r1
 493 | 	mov	r1, #0x10EA;	movt	r1, #0x17C5;	sbcs	 r3, r1
 494 | 	mov	r1, #0x3A46;	movt	r1, #0x01AE;	sbcs	 r2, r1
 495 | 
 496 | 	itttt	cs	@ carry set == no borrow => x was >= p => we need to store x
 497 | 	strcs	r14, [r0,  #0]
 498 | 	strcs	r12, [r0,  #4]
 499 | 	strcs	r11, [r0,  #8]
 500 | 	strcs	r10, [r0, #12]
 501 | 	itttt	cs
 502 | 	strcs	 r9, [r0, #16]
 503 | 	strcs	 r8, [r0, #20]
 504 | 	strcs	 r7, [r0, #24]
 505 | 	strcs	 r6, [r0, #28]
 506 | 	itttt	cs
 507 | 	strcs	 r5, [r0, #32]
 508 | 	strcs	 r4, [r0, #36]
 509 | 	strcs	 r3, [r0, #40]
 510 | 	strcs	 r2, [r0, #44]
 511 | 
 512 | 	pop	    { r4-r11, pc }
 513 | 
 514 | .size fp_sum, . - fp_sum
 515 | 
 516 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 517 | @
 518 | @ fp_cset: x = c ? y : x
 519 | @
 520 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 521 | 
 522 | .align 3
 523 | .global fp_cset
 524 | .syntax unified
 525 | .thumb
 526 | .thumb_func
 527 | .type fp_cset,	%function
 528 | 
 529 | fp_cset:
 530 | 	movs	r2, r2
 531 | 
 532 | 	ldr	r2, [r1,  #0]
 533 | 	ldr	r3, [r1,  #4]
 534 | 	itt	ne
 535 | 	strne	r2, [r0,  #0]
 536 | 	strne	r3, [r0,  #4]
 537 | 
 538 | 	ldr	r2, [r1,  #8]
 539 | 	ldr	r3, [r1, #12]
 540 | 	itt	ne
 541 | 	strne	r2, [r0,  #8]
 542 | 	strne	r3, [r0, #12]
 543 | 
 544 | 	ldr	r2, [r1, #16]
 545 | 	ldr	r3, [r1, #20]
 546 | 	itt	ne
 547 | 	strne	r2, [r0, #16]
 548 | 	strne	r3, [r0, #20]
 549 | 
 550 | 	ldr	r2, [r1, #24]
 551 | 	ldr	r3, [r1, #28]
 552 | 	itt	ne
 553 | 	strne	r2, [r0, #24]
 554 | 	strne	r3, [r0, #28]
 555 | 
 556 | 	ldr	r2, [r1, #32]
 557 | 	ldr	r3, [r1, #36]
 558 | 	itt	ne
 559 | 	strne	r2, [r0, #32]
 560 | 	strne	r3, [r0, #36]
 561 | 
 562 | 	ldr	r2, [r1, #40]
 563 | 	ldr	r3, [r1, #44]
 564 | 	itt	ne
 565 | 	strne	r2, [r0, #40]
 566 | 	strne	r3, [r0, #44]
 567 | 
 568 | 	bx	lr
 569 | 
 570 | .size fp_cset, . - fp_cset
 571 | 
 572 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 573 | @
 574 | @ fp_to_bytes
 575 | @
 576 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 577 | 
 578 | .align 3
 579 | .global fp_to_bytes
 580 | .syntax unified
 581 | .thumb
 582 | .thumb_func
 583 | .type fp_to_bytes,	%function
 584 | 
 585 | fp_to_bytes:
 586 | 				ldr	r2, [r1, #44]
 587 | 	strb	r2, [r0,  #3];	lsrs	r2, #8
 588 | 	strb	r2, [r0,  #2];	lsrs	r2, #8
 589 | 	strb	r2, [r0,  #1];	lsrs	r2, #8
 590 | 	strb	r2, [r0,  #0];	ldr	r2, [r1, #40]
 591 | 
 592 | 	strb	r2, [r0,  #7];	lsrs	r2, #8
 593 | 	strb	r2, [r0,  #6];	lsrs	r2, #8
 594 | 	strb	r2, [r0,  #5];	lsrs	r2, #8
 595 | 	strb	r2, [r0,  #4];	ldr	r2, [r1, #36]
 596 | 
 597 | 	strb	r2, [r0, #11];	lsrs	r2, #8
 598 | 	strb	r2, [r0, #10];	lsrs	r2, #8
 599 | 	strb	r2, [r0,  #9];	lsrs	r2, #8
 600 | 	strb	r2, [r0,  #8];	ldr	r2, [r1, #32]
 601 | 
 602 | 	strb	r2, [r0, #15];	lsrs	r2, #8
 603 | 	strb	r2, [r0, #14];	lsrs	r2, #8
 604 | 	strb	r2, [r0, #13];	lsrs	r2, #8
 605 | 	strb	r2, [r0, #12];	ldr	r2, [r1, #28]
 606 | 
 607 | 	strb	r2, [r0, #19];	lsrs	r2, #8
 608 | 	strb	r2, [r0, #18];	lsrs	r2, #8
 609 | 	strb	r2, [r0, #17];	lsrs	r2, #8
 610 | 	strb	r2, [r0, #16];	ldr	r2, [r1, #24]
 611 | 
 612 | 	strb	r2, [r0, #23];	lsrs	r2, #8
 613 | 	strb	r2, [r0, #22];	lsrs	r2, #8
 614 | 	strb	r2, [r0, #21];	lsrs	r2, #8
 615 | 	strb	r2, [r0, #20];	ldr	r2, [r1, #20]
 616 | 
 617 | 	strb	r2, [r0, #27];	lsrs	r2, #8
 618 | 	strb	r2, [r0, #26];	lsrs	r2, #8
 619 | 	strb	r2, [r0, #25];	lsrs	r2, #8
 620 | 	strb	r2, [r0, #24];	ldr	r2, [r1, #16]
 621 | 
 622 | 	strb	r2, [r0, #31];	lsrs	r2, #8
 623 | 	strb	r2, [r0, #30];	lsrs	r2, #8
 624 | 	strb	r2, [r0, #29];	lsrs	r2, #8
 625 | 	strb	r2, [r0, #28];	ldr	r2, [r1, #12]
 626 | 
 627 | 	strb	r2, [r0, #35];	lsrs	r2, #8
 628 | 	strb	r2, [r0, #34];	lsrs	r2, #8
 629 | 	strb	r2, [r0, #33];	lsrs	r2, #8
 630 | 	strb	r2, [r0, #32];	ldr	r2, [r1,  #8]
 631 | 
 632 | 	strb	r2, [r0, #39];	lsrs	r2, #8
 633 | 	strb	r2, [r0, #38];	lsrs	r2, #8
 634 | 	strb	r2, [r0, #37];	lsrs	r2, #8
 635 | 	strb	r2, [r0, #36];	ldr	r2, [r1,  #4]
 636 | 
 637 | 	strb	r2, [r0, #43];	lsrs	r2, #8
 638 | 	strb	r2, [r0, #42];	lsrs	r2, #8
 639 | 	strb	r2, [r0, #41];	lsrs	r2, #8
 640 | 	strb	r2, [r0, #40];	ldr	r2, [r1,  #0]
 641 | 
 642 | 	strb	r2, [r0, #47];	lsrs	r2, #8
 643 | 	strb	r2, [r0, #46];	lsrs	r2, #8
 644 | 	strb	r2, [r0, #45];	lsrs	r2, #8
 645 | 	strb	r2, [r0, #44]
 646 | 
 647 | 	bx	lr
 648 | 
 649 | .size fp_to_bytes, . - fp_to_bytes
 650 | 
 651 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 652 | @
 653 | @ fp_from_bytes
 654 | @
 655 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 656 | 
 657 | .align 3
 658 | .global fp_from_bytes
 659 | .syntax unified
 660 | .thumb
 661 | .thumb_func
 662 | .type fp_from_bytes,	%function
 663 | 
 664 | fp_from_bytes:
 665 | 	ldrb	r2, [r1,  #0];	strb	r2, [r0, #47]
 666 | 	ldrb	r2, [r1,  #1];	strb	r2, [r0, #46]
 667 | 	ldrb	r2, [r1,  #2];	strb	r2, [r0, #45]
 668 | 	ldrb	r2, [r1,  #3];	strb	r2, [r0, #44]
 669 | 
 670 | 	ldrb	r2, [r1,  #4];	strb	r2, [r0, #43]
 671 | 	ldrb	r2, [r1,  #5];	strb	r2, [r0, #42]
 672 | 	ldrb	r2, [r1,  #6];	strb	r2, [r0, #41]
 673 | 	ldrb	r2, [r1,  #7];	strb	r2, [r0, #40]
 674 | 
 675 | 	ldrb	r2, [r1,  #8];	strb	r2, [r0, #39]
 676 | 	ldrb	r2, [r1,  #9];	strb	r2, [r0, #38]
 677 | 	ldrb	r2, [r1, #10];	strb	r2, [r0, #37]
 678 | 	ldrb	r2, [r1, #11];	strb	r2, [r0, #36]
 679 | 
 680 | 	ldrb	r2, [r1, #12];	strb	r2, [r0, #35]
 681 | 	ldrb	r2, [r1, #13];	strb	r2, [r0, #34]
 682 | 	ldrb	r2, [r1, #14];	strb	r2, [r0, #33]
 683 | 	ldrb	r2, [r1, #15];	strb	r2, [r0, #32]
 684 | 
 685 | 	ldrb	r2, [r1, #16];	strb	r2, [r0, #31]
 686 | 	ldrb	r2, [r1, #17];	strb	r2, [r0, #30]
 687 | 	ldrb	r2, [r1, #18];	strb	r2, [r0, #29]
 688 | 	ldrb	r2, [r1, #19];	strb	r2, [r0, #28]
 689 | 
 690 | 	ldrb	r2, [r1, #20];	strb	r2, [r0, #27]
 691 | 	ldrb	r2, [r1, #21];	strb	r2, [r0, #26]
 692 | 	ldrb	r2, [r1, #22];	strb	r2, [r0, #25]
 693 | 	ldrb	r2, [r1, #23];	strb	r2, [r0, #24]
 694 | 
 695 | 	ldrb	r2, [r1, #24];	strb	r2, [r0, #23]
 696 | 	ldrb	r2, [r1, #25];	strb	r2, [r0, #22]
 697 | 	ldrb	r2, [r1, #26];	strb	r2, [r0, #21]
 698 | 	ldrb	r2, [r1, #27];	strb	r2, [r0, #20]
 699 | 
 700 | 	ldrb	r2, [r1, #28];	strb	r2, [r0, #19]
 701 | 	ldrb	r2, [r1, #29];	strb	r2, [r0, #18]
 702 | 	ldrb	r2, [r1, #30];	strb	r2, [r0, #17]
 703 | 	ldrb	r2, [r1, #31];	strb	r2, [r0, #16]
 704 | 
 705 | 	ldrb	r2, [r1, #32];	strb	r2, [r0, #15]
 706 | 	ldrb	r2, [r1, #33];	strb	r2, [r0, #14]
 707 | 	ldrb	r2, [r1, #34];	strb	r2, [r0, #13]
 708 | 	ldrb	r2, [r1, #35];	strb	r2, [r0, #12]
 709 | 
 710 | 	ldrb	r2, [r1, #36];	strb	r2, [r0, #11]
 711 | 	ldrb	r2, [r1, #37];	strb	r2, [r0, #10]
 712 | 	ldrb	r2, [r1, #38];	strb	r2, [r0,  #9]
 713 | 	ldrb	r2, [r1, #39];	strb	r2, [r0,  #8]
 714 | 
 715 | 	ldrb	r2, [r1, #40];	strb	r2, [r0,  #7]
 716 | 	ldrb	r2, [r1, #41];	strb	r2, [r0,  #6]
 717 | 	ldrb	r2, [r1, #42];	strb	r2, [r0,  #5]
 718 | 	ldrb	r2, [r1, #43];	strb	r2, [r0,  #4]
 719 | 
 720 | 	ldrb	r2, [r1, #44];	strb	r2, [r0,  #3]
 721 | 	ldrb	r2, [r1, #45];	strb	r2, [r0,  #2]
 722 | 	ldrb	r2, [r1, #46];	strb	r2, [r0,  #1]
 723 | 	ldrb	r2, [r1, #47];	strb	r2, [r0,  #0]
 724 | 
 725 | 	@ Subtract modulus
 726 | 
 727 | 	ldr	r2, [r0,  #0]
 728 | 	mov	r3, #0x00000001
 729 | 	subs	r2, r3
 730 | 
 731 | 	ldr	r2, [r0,  #4]
 732 | 	mov	r3, #0xC000
 733 | 	movt	r3, #0x8508
 734 | 	sbcs	r2, r3
 735 | 
 736 | 	ldr	r2, [r0,  #8]
 737 | 	mov	r3, #0x30000000
 738 | 	sbcs	r2, r3
 739 | 
 740 | 	ldr	r2, [r0, #12]
 741 | 	mov	r3, #0x5D44
 742 | 	movt	r3, #0x170B
 743 | 	sbcs	r2, r3
 744 | 
 745 | 	ldr	r2, [r0, #16]
 746 | 	mov	r3, #0x4800
 747 | 	movt	r3, #0xBA09
 748 | 	sbcs	r2, r3
 749 | 
 750 | 	ldr	r2, [r0, #20]
 751 | 	mov	r3, #0x622F
 752 | 	movt	r3, #0x1EF3
 753 | 	sbcs	r2, r3
 754 | 
 755 | 	ldr	r2, [r0, #24]
 756 | 	mov	r3, #0x138F
 757 | 	movt	r3, #0x00F5
 758 | 	sbcs	r2, r3
 759 | 
 760 | 	ldr	r2, [r0, #28]
 761 | 	mov	r3, #0xD9F3
 762 | 	movt	r3, #0x1A22
 763 | 	sbcs	r2, r3
 764 | 
 765 | 	ldr	r2, [r0, #32]
 766 | 	mov	r3, #0x493B
 767 | 	movt	r3, #0x6CA1
 768 | 	sbcs	r2, r3
 769 | 
 770 | 	ldr	r2, [r0, #36]
 771 | 	mov	r3, #0x05C0
 772 | 	movt	r3, #0xC63B
 773 | 	sbcs	r2, r3
 774 | 
 775 | 	ldr	r2, [r0, #40]
 776 | 	mov	r3, #0x10EA
 777 | 	movt	r3, #0x17C5
 778 | 	sbcs	r2, r3
 779 | 
 780 | 	ldr	r2, [r0, #44]
 781 | 	mov	r3, #0x3A46
 782 | 	movt	r3, #0x01AE
 783 | 	sbcs	r2, r3
 784 | 
 785 | 	@ Malformed input (x>=m) => no borrow
 786 | 
 787 | 	mov	r2, #0
 788 | 
 789 | 	itttt	cs	@ carry set == no borrow => x was >= p => we need to zero x
 790 | 	strcs	r2, [r0,  #0]
 791 | 	strcs	r2, [r0,  #4]
 792 | 	strcs	r2, [r0,  #8]
 793 | 	strcs	r2, [r0, #12]
 794 | 
 795 | 	itttt	cs
 796 | 	strcs	r2, [r0, #16]
 797 | 	strcs	r2, [r0, #20]
 798 | 	strcs	r2, [r0, #24]
 799 | 	strcs	r2, [r0, #28]
 800 | 
 801 | 	itttt	cs
 802 | 	strcs	r2, [r0, #32]
 803 | 	strcs	r2, [r0, #36]
 804 | 	strcs	r2, [r0, #40]
 805 | 	strcs	r2, [r0, #44]
 806 | 
 807 | 	bx	lr
 808 | 
 809 | .size fp_from_bytes, . - fp_from_bytes
 810 | 
 811 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 812 | @
 813 | @ mul378(uint32_t *z, const uint32_t *x, const uint32_t *y)
 814 | @
 815 | @ Multiply 378-bit numbers, producing a 756-bit result
 816 | @
 817 | @ Uses Karatsuba, delegating 189/190-bit multiply to m190
 818 | @
 819 | @ ~1641 clock cycles
 820 | @
 821 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 822 | 
 823 | .align 3
 824 | .global mul378
 825 | .syntax unified
 826 | .thumb
 827 | .thumb_func
 828 | mul378:
 829 | 	push	{ r0-r2, r4-r11, lr }
 830 | @ 13
 831 | 	@@ Split x and y into 189-bit halves
 832 | 	@ Save them on the stack and forget input pointers
 833 | 
 834 | 	@ Load y
 835 | 
 836 | 	ldm	r2, { r2-r12, r14 }
 837 | @ 26
 838 | 	@ Split bits into two right-aligned halves of 189 bits each
 839 | 
 840 | 	lsl	r14, #3;	orr	r14, r14, r12, lsr #29
 841 | 	lsl	r12, #3;	orr	r12, r12, r11, lsr #29
 842 | 	lsl	r11, #3;	orr	r11, r11, r10, lsr #29
 843 | 	lsl	r10, #3;	orr	r10, r10, r9,  lsr #29
 844 | 	lsl	r9,  #3;	orr	r9,  r9,  r8,  lsr #29
 845 | 	lsl	r8,  #3;	orr	r8,  r8,  r7,  lsr #29
 846 | 
 847 | 	@ Limit to 189 bits each
 848 | 
 849 | 	and	r14, #0x1fffffff
 850 | 	and	r7,  #0x1fffffff
 851 | @ 40
 852 | 	@ Store y on the stack
 853 | 
 854 | 	push	{ r2-r12, r14 }
 855 | @53
 856 | 	@ Load x
 857 | 
 858 | 	ldm	r1, { r2-r12, r14 }
 859 | 
 860 | 	@ Split bits into two right-aligned halves of 189 bits each
 861 | 
 862 | 	lsl	r14, #3;	orr	r14, r14, r12, lsr #29
 863 | 	lsl	r12, #3;	orr	r12, r12, r11, lsr #29
 864 | 	lsl	r11, #3;	orr	r11, r11, r10, lsr #29
 865 | 	lsl	r10, #3;	orr	r10, r10, r9,  lsr #29
 866 | 	lsl	r9,  #3;	orr	r9,  r9,  r8,  lsr #29
 867 | 	lsl	r8,  #3;	orr	r8,  r8,  r7,  lsr #29
 868 | 
 869 | 	@ Limit to 189 bits each
 870 | 
 871 | 	and	r14, #0x1fffffff
 872 | 	and	r7,  #0x1fffffff
 873 | 
 874 | 	@ Store x on the stack
 875 | 
 876 | 	push	{ r2-r12, r14 }
 877 | @ 93
 878 | 	@@ Compute xl*yl, place in zl
 879 | 
 880 | 	mov	r1, sp
 881 | 	add	r2, sp, #48
 882 | 
 883 | 	bl	m190
 884 | @ 517
 885 | 	@@ Compute xh*yh, place in zh
 886 | 
 887 | @	ldr	r0, [sp, #96]	@ Skipped because m190 preserves r0
 888 | 	add	r1, sp, #24
 889 | 	add	r2, sp, #72
 890 | 	add	r0, r0, #48
 891 | 
 892 | 	bl	m190
 893 | 
 894 | @ 942
 895 | 	@@ Compute xh+xl and yh+yl, store on the stack
 896 | 
 897 | 	@ Load x from the stack
 898 | 
 899 | 	ldm	sp, { r2-r12, r14 }
 900 | 
 901 | 	@ Compute xh+xl
 902 | 
 903 | 	adds	r2, r8
 904 | 	adcs	r3, r9
 905 | 	adcs	r4, r10
 906 | 	adcs	r5, r11
 907 | 	adcs	r6, r12
 908 | 	adc	r7, r14
 909 | 
 910 | 	add	r1, sp, #48	@ point at y on the stack
 911 | @ 962
 912 | 	stm	sp, { r2-r7 }	@ overwrite xl with xh+xl
 913 | 
 914 | 	@ Load y from the stack
 915 | 
 916 | 	ldm	r1, { r2-r12, r14 }
 917 | @ 982
 918 | 	sub	r1, r1, #24	@ point at xh on the stack
 919 | 
 920 | 	@ Compute yh+yl
 921 | 
 922 | 	adds	r2, r8
 923 | 	adcs	r3, r9
 924 | 	adcs	r4, r10
 925 | 	adcs	r5, r11
 926 | 	adcs	r6, r12
 927 | 	adc	r7, r14
 928 | 
 929 | 	stm	r1, { r2-r7 }	@ overwrite xh with yh+yl
 930 | 
 931 | 	@@ Compute (xh+xl)*(yh+yl), overwrite y on the stack
 932 | 
 933 | 	add	r0, sp, #48
 934 | 	add	r2, sp, #0	@ xh+xl
 935 | @ 998
 936 | 	bl	m190
 937 | @ 1420
 938 | 	@@ Subtract xl*yl and xh*yh from (xh+xl)*(yh+yl)
 939 | 
 940 | 	ldr	r0,  [sp, #96]	@ point to z (xl*yl)
 941 | 
 942 | 	ldr	r1,  [sp, #48]
 943 | 	ldr	r2,  [sp, #52]
 944 | 	ldr	r3,  [sp, #56]
 945 | 	ldr	r4,  [sp, #60]
 946 | 
 947 | 	ldr	r5,  [r0,  #0]
 948 | 	ldr	r6,  [r0,  #4]
 949 | 	ldr	r7,  [r0,  #8]
 950 | 	ldr	r8,  [r0, #12]
 951 | 
 952 | 	ldr	r9,  [r0, #48]
 953 | 	ldr	r10, [r0, #52]
 954 | 	ldr	r11, [r0, #56]
 955 | 	ldr	r12, [r0, #60]
 956 | 
 957 | 	mov	r14, #0
 958 | 
 959 | 	subs	r1, r5
 960 | 	sbcs	r2, r6
 961 | 	sbcs	r3, r7
 962 | 	sbcs	r4, r8
 963 | 	sbc	r14, #0
 964 | 
 965 | 	subs	r1, r9
 966 | 	sbcs	r2, r10
 967 | 	sbcs	r3, r11
 968 | 	sbcs	r4, r12
 969 | 	sbc	r14, #0
 970 | 
 971 | 	str	r1, [sp, #48]
 972 | 	str	r2, [sp, #52]
 973 | 	str	r3, [sp, #56]
 974 | 	str	r4, [sp, #60]
 975 | 
 976 | 
 977 | 	ldr	r1,  [sp, #64]
 978 | 	ldr	r2,  [sp, #68]
 979 | 	ldr	r3,  [sp, #72]
 980 | 	ldr	r4,  [sp, #76]
 981 | 
 982 | 	ldr	r5,  [r0, #16]
 983 | 	ldr	r6,  [r0, #20]
 984 | 	ldr	r7,  [r0, #24]
 985 | 	ldr	r8,  [r0, #28]
 986 | 
 987 | 	ldr	r9,  [r0, #64]
 988 | 	ldr	r10, [r0, #68]
 989 | 	ldr	r11, [r0, #72]
 990 | 	ldr	r12, [r0, #76]
 991 | @ 1462
 992 | 	@ Propagate borrow
 993 | 
 994 | 	rsb	r14, #0
 995 | 	subs	r1, r14;	mov	r14, #0
 996 | 	sbcs	r2, #0
 997 | 	sbcs	r3, #0
 998 | 	sbcs	r4, #0
 999 | 	sbc	r14, #0
1000 | 
1001 | 	subs	r1, r5
1002 | 	sbcs	r2, r6
1003 | 	sbcs	r3, r7
1004 | 	sbcs	r4, r8
1005 | 	sbc	r14, #0
1006 | 
1007 | 	subs	r1, r9
1008 | 	sbcs	r2, r10
1009 | 	sbcs	r3, r11
1010 | 	sbcs	r4, r12
1011 | 	sbc	r14, #0
1012 | 
1013 | 	str	r1, [sp, #64]
1014 | 	str	r2, [sp, #68]
1015 | 	str	r3, [sp, #72]
1016 | 	str	r4, [sp, #76]
1017 | @ 1482
1018 | 
1019 | 	ldr	r1,  [sp, #80]
1020 | 	ldr	r2,  [sp, #84]
1021 | 	ldr	r3,  [sp, #88]
1022 | 	ldr	r4,  [sp, #92]
1023 | 
1024 | 	ldr	r5,  [r0, #32]
1025 | 	ldr	r6,  [r0, #36]
1026 | 	ldr	r7,  [r0, #40]
1027 | 	ldr	r8,  [r0, #44]
1028 | 
1029 | 	ldr	r9,  [r0, #80]
1030 | 	ldr	r10, [r0, #84]
1031 | 	ldr	r11, [r0, #88]
1032 | 	ldr	r12, [r0, #92]
1033 | @ 1495
1034 | 	@ Propagate borrow
1035 | 	@ Ignore borrow out
1036 | 
1037 | 	rsb	r14, #0
1038 | 	subs	r1, r14;
1039 | 	sbcs	r2, #0
1040 | 	sbcs	r3, #0
1041 | 	sbc	r4, #0
1042 | 
1043 | 	subs	r1, r5
1044 | 	sbcs	r2, r6
1045 | 	sbcs	r3, r7
1046 | 	sbc	r4, r8
1047 | 
1048 | 	subs	r1, r9
1049 | 	sbcs	r2, r10
1050 | 	sbcs	r3, r11
1051 | 	sbc	r4, r12
1052 | 
1053 | 	str	r1, [sp, #80]
1054 | 	str	r2, [sp, #84]
1055 | 	str	r3, [sp, #88]
1056 | 	str	r4, [sp, #92]
1057 | 
1058 | 	@@ Add (((xh+xl)*(yh+yl) - xh*yh - xl*yl) << 189)
1059 | 	@@ to ((xh*yh << 378) + xl*yl)
1060 | 
1061 | 	ldr	r1,  [r0, #20]
1062 | 	ldr	r2,  [r0, #24]
1063 | 	ldr	r3,  [r0, #28]
1064 | 	ldr	r4,  [r0, #32]
1065 | 	ldr	r5,  [r0, #36]
1066 | 	ldr	r6,  [r0, #40]
1067 | 
1068 | 	ldr	r7,  [sp,  #0+48]
1069 | 	ldr	r8,  [sp,  #4+48]
1070 | 	ldr	r9,  [sp,  #8+48]
1071 | 	ldr	r10, [sp, #12+48]
1072 | 	ldr	r11, [sp, #16+48]
1073 | 	ldr	r12, [sp, #20+48]
1074 | @ 1525
1075 | 							adds	r1, r1, r7, lsl #29
1076 | 	lsr	r7,  #3; orr r7,  r7,  r8,  lsl #29;	adcs	r2, r7
1077 | 	lsr	r8,  #3; orr r8,  r8,  r9,  lsl #29;	adcs	r3, r8
1078 | 	lsr	r9,  #3; orr r9,  r9,  r10, lsl #29;	adcs	r4, r9
1079 | 	lsr	r10, #3; orr r10, r10, r11, lsl #29;	adcs	r5, r10
1080 | 	lsr	r11, #3; orr r11, r11, r12, lsl #29;	adcs	r6, r11
1081 | @ 1541
1082 | 	str	r1, [r0, #20];	ldr	r1, [r0, #44];	ldr	r7,  [sp, #24+48]
1083 | 	str	r2, [r0, #24];	ldr	r2, [r0, #48];	ldr	r8,  [sp, #28+48]
1084 | 	str	r3, [r0, #28];	ldr	r3, [r0, #52];	ldr	r9,  [sp, #32+48]
1085 | 	str	r4, [r0, #32];	ldr	r4, [r0, #56];	ldr	r10, [sp, #36+48]
1086 | 	str	r5, [r0, #36];	ldr	r5, [r0, #60];	ldr	r11, [sp, #40+48]
1087 | 	str	r6, [r0, #40];
1088 | @ 1557
1089 | 	orr	r1, r1, r2, lsl #26;	lsr	r2, #6
1090 | 	orr	r2, r2, r3, lsl #26;	lsr	r3, #6
1091 | 	orr	r3, r3, r4, lsl #26;	lsr	r4, #6
1092 | 	orr	r4, r4, r5, lsl #26;	lsr	r5, #6
1093 | @ 1565
1094 | 	lsr	r12, #3; orr r12, r12, r7,  lsl #29;	adcs	r1, r12
1095 | 	lsr	r7,  #3; orr r7,  r7,  r8,  lsl #29;	adcs	r2, r7
1096 | 	lsr	r8,  #3; orr r8,  r8,  r9,  lsl #29;	adcs	r3, r8
1097 | 	lsr	r9,  #3; orr r9,  r9,  r10, lsl #29;	adcs	r4, r9
1098 | 	lsr	r10, #3; orr r10, r10, r11, lsl #29
1099 | @ 1579
1100 | 	str	r1, [r0, #44];	ldr	r6, [r0, #64];	ldr	r12, [sp, #44+48]
1101 | 	str	r2, [r0, #48];	ldr	r1, [r0, #68]
1102 | 	str	r3, [r0, #52];	ldr	r2, [r0, #72]
1103 | 	str	r4, [r0, #56]
1104 | @ 1587
1105 | 	orr	r5, r5, r6, lsl #26;	lsr	r6, #6
1106 | 	orr	r6, r6, r1, lsl #26;	lsr	r1, #6
1107 | 	orr	r1, r1, r2, lsl #26;	lsr	r2, #6
1108 | @ 1593
1109 | 	lsr	r11, #3; orr r11, r11, r12, lsl #29;	adcs	r5, r10
1110 | 	lsr	r12, #3;				adcs	r6, r11
1111 | 							adcs	r1, r12
1112 | @ 1599
1113 | 	str	r5, [r0, #60]
1114 | 	ldr	r3, [r0, #76]
1115 | 	ldr	r4, [r0, #80]
1116 | 	str	r6, [r0, #64]
1117 | @ 1603
1118 | 	orr	r2, r2, r3, lsl #26;	lsr	r3, #6;	adcs	r2, #0
1119 | 	orr	r3, r3, r4, lsl #26;	lsr	r4, #6;	adcs	r3, #0
1120 | @ 1609
1121 | 	str	r1, [r0, #68]
1122 | 	ldr	r5, [r0, #84]
1123 | 	ldr	r6, [r0, #88]
1124 | 	ldr	r1, [r0, #92]
1125 | 	str	r2, [r0, #72]
1126 | @ 1614
1127 | 	orr	r4, r4, r5, lsl #26;	lsr	r5, #6;	adcs	r4, #0
1128 | 	orr	r5, r5, r6, lsl #26;	lsr	r6, #6;	adcs	r5, #0
1129 | 	orr	r6, r6, r1, lsl #26;	lsr	r1, #6;	adcs	r6, #0
1130 | 							adc	r1, #0
1131 | @ 1624
1132 | 	@ Deallocate stack
1133 | 
1134 | 	add	sp, #108	@ Discard 3+12+12 words
1135 | @ 1625
1136 | 	@ Store remaining outputs
1137 | 
1138 | 	str	r3, [r0, #76]
1139 | 	str	r4, [r0, #80]
1140 | 	str	r5, [r0, #84]
1141 | 	str	r6, [r0, #88]
1142 | 	str	r1, [r0, #92]
1143 | @ 1630
1144 | 	pop	{ r4-r11, pc }
1145 | @ 1641
1146 | 
1147 | .size   mul378, .-mul378
1148 | 
1149 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1150 | @
1151 | @ m190
1152 | @
1153 | @ Multiply 190-bit numbers, producing 380-bit result
1154 | @
1155 | @ Uses Karatsuba, delegating 95/96-bit multiply to m96
1156 | @
1157 | @ Output pointer in r0
1158 | @ Input pointers in r1, r2
1159 | @
1160 | @ Preserves r0
1161 | @
1162 | @ ~680 clock cycles
1163 | @
1164 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1165 | 
1166 | .align 3
1167 | .syntax unified
1168 | .thumb
1169 | .thumb_func
1170 | m190:
1171 | 	push	{ r0, lr }
1172 | 
1173 | 	@ Load y
1174 | 
1175 | 	ldr	r12, [r2, #20]
1176 | 	ldr	r11, [r2, #16]
1177 | 	ldr	r10, [r2, #12]
1178 | 	ldr	r9,  [r2,  #8]
1179 | 	ldr	r8,  [r2,  #4]
1180 | 	ldr	r7,  [r2,  #0]
1181 | 
1182 | 	@ Load x
1183 | 
1184 | 	ldr	r6,  [r1, #20]
1185 | 	ldr	r5,  [r1, #16]
1186 | 	ldr	r4,  [r1, #12]
1187 | 	ldr	r3,  [r1,  #8]
1188 | 	ldr	r2,  [r1,  #4]
1189 | 	ldr	r1,  [r1,  #0]
1190 | @ 16
1191 | 	@ Split bits into two right-aligned halves of 95 bits each
1192 | 
1193 | 	lsl	r12, #1;	orr	r12, r12, r11, lsr #31
1194 | 	lsl	r11, #1;	orr	r11, r11, r10, lsr #31
1195 | 	lsl	r10, #1;	orr	r10, r10, r9,  lsr #31
1196 | 
1197 | 	and	r12, #0x7fffffff
1198 | 	and	r9,  #0x7fffffff
1199 | 
1200 | 	lsl	r6,  #1;	orr	r6, r6, r5, lsr #31
1201 | 	lsl	r5,  #1;	orr	r5, r5, r4, lsr #31
1202 | 	lsl	r4,  #1;	orr	r4, r4, r3, lsr #31
1203 | 
1204 | 	and	r6, #0x7fffffff
1205 | 	and	r3, #0x7fffffff
1206 | @ 32
1207 | 	@ Store x and y on the stack
1208 | 
1209 | 	push	{ r1-r12 }
1210 | 
1211 | 	@@ Compute xl*yl, place in zl
1212 | 
1213 | 	mov	r1, sp
1214 | 	add	r2, sp, #24
1215 | @ 47
1216 | 	bl	m96
1217 | 
1218 | 	@@ Compute xh*yh, place in zh
1219 | 
1220 | 	add	r1, sp, #12
1221 | 	add	r2, sp, #36
1222 | 	add	r0, r0, #24
1223 | @ 131
1224 | 	bl	m96
1225 | @ 215
1226 | 	@@ Compute xh+xl and yh+yl, store on the stack
1227 | 
1228 | 	@ Load x and y from the stack
1229 | 
1230 | 	ldm	sp, { r2-r12, r14 }
1231 | 
1232 | 	@ Compute xh+xl
1233 | 
1234 | 	adds	r2,  r5
1235 | 	adcs	r3,  r6
1236 | 	adc 	r4,  r7
1237 | 
1238 | 	@ Compute yh+yl
1239 | 
1240 | 	adds	r8,  r11
1241 | 	adcs	r9,  r12
1242 | 	adc	r10, r14
1243 | @ 234
1244 | 	stm	sp, { r2-r4, r8-r10 }	@ overwrite x with xh+xl and yh+yl
1245 | 
1246 | 	@@ Compute (xh+xl)*(yh+yl), overwrite y on the stack
1247 | 
1248 | 	add	r0, sp, #24	@ y
1249 | 	add	r1, sp, #12	@ yh+yl
1250 | 	add	r2, sp, #0	@ xh+xl
1251 | @ 244
1252 | 	bl	m96
1253 | @ 328
1254 | 	@@ Subtract xl*yl and xh*yh from (xh+xl)*(yh+yl)
1255 | 
1256 | 	ldr	r0,  [sp, #48]	@ point to z (xl*yl)
1257 | 
1258 | 	@ Load (xh+xl)*(yh+yl)
1259 | 
1260 | 	ldr	r1,  [sp, #24]
1261 | 	ldr	r2,  [sp, #28]
1262 | 	ldr	r3,  [sp, #32]
1263 | 	ldr	r4,  [sp, #36]
1264 | 	ldr	r5,  [sp, #40]
1265 | 	ldr	r6,  [sp, #44]
1266 | 
1267 | 	@ Load and subtract xl*yl
1268 | 
1269 | 	ldr	r7,  [r0,  #0]
1270 | 	ldr	r8,  [r0,  #4]
1271 | 	ldr	r9,  [r0,  #8]
1272 | 	ldr	r10, [r0, #12]
1273 | 	ldr	r11, [r0, #16]
1274 | 	ldr	r12, [r0, #20]
1275 | @ 342
1276 | 	subs	r1, r7
1277 | 	sbcs	r2, r8
1278 | 	sbcs	r3, r9
1279 | 	sbcs	r4, r10
1280 | 	sbcs	r5, r11
1281 | 	sbc	r6, r12	@ There is no output borrow
1282 | 
1283 | 	@ Load and subtract xh*yh
1284 | 
1285 | 	ldr	r7,  [r0, #24]
1286 | 	ldr	r8,  [r0, #28]
1287 | 	ldr	r9,  [r0, #32]
1288 | 	ldr	r10, [r0, #36]
1289 | 	ldr	r11, [r0, #40]
1290 | 	ldr	r12, [r0, #44]
1291 | 
1292 | 	subs	r1, r7;		ldr	r7, [r0, #8]
1293 | 	sbcs	r2, r8
1294 | 	sbcs	r3, r9
1295 | 	sbcs	r4, r10
1296 | 	sbcs	r5, r11
1297 | 	sbc	r6, r12
1298 | 
1299 | 	@@ Add (xh+xl)*(yh+yl)-xl*yl-xh*yh to the middle of z
1300 | 
1301 | 	adds	r7, r7, r1, lsl #31
1302 | @ 364
1303 | 	@ Shift down by 1 bit
1304 | 
1305 | 	lsr	r1,  #1;	orr	r1, r1, r2, lsl #31
1306 | 	lsr	r2,  #1;	orr	r2, r2, r3, lsl #31
1307 | 	lsr	r3,  #1;	orr	r3, r3, r4, lsl #31
1308 | 	lsr	r4,  #1;	orr	r4, r4, r5, lsl #31
1309 | 	lsr	r5,  #1;	orr	r5, r5, r6, lsl #31
1310 | 	lsr	r6,  #1
1311 | @ 375
1312 | 	@ Add to middle of z
1313 | 	@ Note: operations on r7 already done above to save one cycle
1314 | 
1315 | 	ldr	r8,  [r0, #12]
1316 | 	ldr	r9,  [r0, #16]
1317 | 
1318 | 	adcs	r8, r1
1319 | 	adcs	r9, r2
1320 | 
1321 | 	str	r7,  [r0,  #8]
1322 | 	str	r8,  [r0, #12]
1323 | 	str	r9,  [r0, #16]
1324 | 
1325 | 	ldr	r7,  [r0, #20]
1326 | 	ldr	r8,  [r0, #24]
1327 | 	ldr	r9,  [r0, #28]
1328 | 	ldr	r10, [r0, #32]
1329 | 	ldr	r11, [r0, #36]
1330 | 	ldr	r12, [r0, #40]
1331 | 	ldr	r14, [r0, #44]
1332 | @ 391
1333 | 	@ Shift down top half of z by 2 bits before adding
1334 | 
1335 | 	orr	r7,  r7,  r8,  lsl #30;		lsr	r8,  #2
1336 | 	orr	r8,  r8,  r9,  lsl #30;		lsr	r9,  #2
1337 | 	orr	r9,  r9,  r10, lsl #30;		lsr	r10, #2
1338 | 	orr	r10, r10, r11, lsl #30;		lsr	r11, #2
1339 | 	orr	r11, r11, r12, lsl #30;		lsr	r12, #2
1340 | 	orr	r12, r12, r14, lsl #30;		lsr	r14, #2
1341 | 
1342 | 	adcs	r7,  r3
1343 | 	adcs	r8,  r4
1344 | 	adcs	r9,  r5
1345 | 	adcs	r10, r6
1346 | @ 407
1347 | 	@ Propagate carry
1348 | 
1349 | 	adcs	r11, #0
1350 | 	adcs	r12, #0
1351 | 	adc	r14, #0
1352 | 
1353 | 	@ Deallocate stack
1354 | 
1355 | 	add	sp, #48
1356 | 
1357 | 	@ Save high three quarters of z
1358 | 
1359 | 	str	r7,  [r0, #20]
1360 | 	str	r8,  [r0, #24]
1361 | 	str	r9,  [r0, #28]
1362 | 	str	r10, [r0, #32]
1363 | 	str	r11, [r0, #36]
1364 | 	str	r12, [r0, #40]
1365 | 	str	r14, [r0, #44]
1366 | @ 418
1367 | 	pop	{ r0, pc }
1368 | @ 422
1369 | 
1370 | .size   m190, .-m190
1371 | 
1372 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1373 | @
1374 | @ m96: Multiply 96x96->192
1375 | @
1376 | @ asm-only version using umull
1377 | @
1378 | @ Output pointer in r0
1379 | @ Input pointers in r1, r2
1380 | @
1381 | @ Preserves r0
1382 | @
1383 | @ Register assignments
1384 | @
1385 | @	r12	ro	zero
1386 | @	r11	ro	y2
1387 | @	r10	ro	y1
1388 | @	r9	ro	y0
1389 | @	r8	ro	x2
1390 | @	r7	ro	x1
1391 | @	r6	ro	x0
1392 | @	r5	rw	ah	accumulator, high word
1393 | @	r4	rw	am	accumulator, middle word
1394 | @	r3	rw	al	accumulator, low word
1395 | @	r2	rw	ph	partial product, high word
1396 | @	r1	rw	pl	partial product, low word
1397 | @	r0	ro		&z
1398 | @
1399 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1400 | 
1401 | #define	zero	r12
1402 | #define	y2	r11
1403 | #define	y1	r10
1404 | #define	y0	r9
1405 | #define	x2	r8
1406 | #define	x1	r7
1407 | #define	x0	r6
1408 | #define	ah	r5
1409 | #define	am	r4
1410 | #define	al	r3
1411 | #define	ph	r2
1412 | #define	pl	r1
1413 | 
1414 | .align 3
1415 | .syntax unified
1416 | .thumb
1417 | .thumb_func
1418 | m96:
1419 | 	mov	zero, #0
1420 | 
1421 | 	ldr	x0, [r1,  #0]
1422 | 	ldr	x1, [r1,  #4]
1423 | 	ldr	x2, [r1,  #8]
1424 | 
1425 | 	ldr	y0, [r2,  #0]
1426 | 	ldr	y1, [r2,  #4]
1427 | 	ldr	y2, [r2,  #8]
1428 | @ 8
1429 | 	umull	pl, ph, x0, y0	@ x0y0
1430 | 
1431 | 	str	pl, [r0, #0]
1432 | @ 14
1433 | 	umull	al, am, x0, y1	@ x0y1
1434 | 
1435 | 	adds	al, ph
1436 | 	adc	am, zero	@ no overflow
1437 | @ 21
1438 | 	umull	pl, ph, x1, y0	@ x1y0
1439 | 
1440 | 	adds	al, pl
1441 | 	adcs	am, ph
1442 | 	adc	ah, zero, zero
1443 | 
1444 | 	str	al, [r0, #4]
1445 | @ 30
1446 | 
1447 | 	umull	pl, ph, x0, y2	@ x0y2
1448 | 
1449 | 	adds	al, am, pl
1450 | 	adc	am, ah, ph	@ no overflow
1451 | @ 37
1452 | 	umull	pl, ph, x1, y1	@ x1y1
1453 | 
1454 | 	adds	al, pl
1455 | 	adcs	am, ph
1456 | 	adc	ah, zero, zero
1457 | @ 45
1458 | 	umull	pl, ph, x2, y0	@ x2y0
1459 | 
1460 | 	adds	al, pl
1461 | 	adcs	am, ph
1462 | 	adc	ah, zero
1463 | 
1464 | 	str	al, [r0, #8]
1465 | @ 54
1466 | 
1467 | 	umull	pl, ph, x1, y2	@ x1y2
1468 | 
1469 | 	adds	al, am, pl
1470 | 	adcs	am, ah, ph
1471 | 	adc	ah, zero, zero
1472 | @ 62
1473 | 	umull	pl, ph, x2, y1	@ x2y1
1474 | 
1475 | 	adds	al, pl
1476 | 	adcs	am, ph
1477 | 	adc	ah, zero
1478 | 
1479 | 	str	al, [r0, #12]
1480 | @ 71
1481 | 
1482 | 	umull	pl, ph, x2, y2	@ x2y2
1483 | 
1484 | 	adds	al, am, pl
1485 | 	adc	am, ah, ph
1486 | @ 78
1487 | 	str	al, [r0, #16]
1488 | 	str	am, [r0, #20]
1489 | @ 80
1490 | 	bx	lr
1491 | @ 82
1492 | 
1493 | #undef zero
1494 | #undef y2
1495 | #undef y1
1496 | #undef y0
1497 | #undef x2
1498 | #undef x1
1499 | #undef x0
1500 | #undef ah
1501 | #undef am
1502 | #undef al
1503 | #undef ph
1504 | #undef pl
1505 | 
1506 | .size   m96, .-m96
1507 | 


--------------------------------------------------------------------------------
/bls12_377/src/fp_mont.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdint.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | 
  5 | #define uint128_t __uint128_t
  6 | 
  7 | #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
  8 | #error This code depends on little-endian word order
  9 | #endif
 10 | 
 11 | // multiply a32 * b64 and accumulate in the 96 bit value stored in [o0, o1, o2]
 12 | // o0 is the existing 32 bit value
 13 | // o1,o2 is the 64 bit carry
 14 | 
 15 | inline
 16 | void umaal96(
 17 |     uint32_t& o0,
 18 |     uint32_t& o1,
 19 |     uint32_t& o2,
 20 |     uint32_t a,
 21 |     uint64_t b
 22 |  ) {
 23 | #if __arm__
 24 |     #ifdef HAVE_UMAAL
 25 | 	const uint32_t b0 = (uint32_t) b;
 26 | 	const uint32_t b1 = (uint32_t) (b>>32);
 27 | 
 28 |         asm (
 29 |             "UMAAL %[o0], %[o1], %[a0], %[b0]"
 30 |             : [o0] "+r" (o0),
 31 |             [o1] "+r" (o1)
 32 |             : [a0] "r" (a),
 33 |             [b0] "r" (b0)
 34 |         );
 35 | 
 36 |         asm (
 37 |             "UMAAL %[o1], %[o2], %[a0], %[b1]"
 38 |             : [o1] "+r" (o1),
 39 |             [o2] "+r" (o2)
 40 |             : [a0] "r" (a),
 41 |             [b1] "r" (b1)
 42 |         );
 43 |     #else
 44 | 	register uint32_t b0, b1, p0, p1;
 45 | 
 46 | 	b0 = b;
 47 | 	b1 = b >> 32;
 48 | 
 49 | 	asm (
 50 | 	    // b0:o0 = o0 + a*b0
 51 | 	    "UMULL	%[p0], %[p1], %[a], %[b0]\n\t"
 52 | 	    "ADDS	%[o0], %[p0]\n\t"
 53 | 	    "ADC	%[b0], %[p1], #0\n\t"
 54 | 	    : [p0] "=&r" (p0)
 55 | 	    , [p1] "=&r" (p1)
 56 | 	    , [o0]  "+r" (o0)
 57 | 	    , [b0]  "+r" (b0)
 58 | 
 59 | 	    : [a]    "r" (a)
 60 | 
 61 | 	    : "cc"
 62 | 	);
 63 | 
 64 | 	asm (
 65 | 	    // p1:p0 = o2 + a*b1
 66 | 	    "UMULL	%[p0], %[p1], %[a], %[b1]\n\t"
 67 | 	    "ADDS	%[p0], %[o2]\n\t"
 68 | 	    "ADC	%[p1], #0\n\t"
 69 | 	    : [p0] "=&r" (p0)
 70 | 	    , [p1] "=&r" (p1)
 71 | 
 72 | 	    : [a]    "r" (a)
 73 | 	    , [b1]   "r" (b1)
 74 | 	    , [o2]   "r" (o2)
 75 | 
 76 | 	    : "cc"
 77 | 	);
 78 | 
 79 | 	asm (
 80 | 	    // o2:o1:o0 = b0:o0 + p1:p0:o1
 81 | 	    "ADDS	%[o0], %[o1]\n\t"
 82 | 	    "ADCS	%[o1], %[b0], %[p0]\n\t"
 83 | 	    "ADC	%[o2], %[p1], #0\n\t"
 84 | 
 85 | 	    : [o0] "+r" (o0)
 86 | 	    , [o1] "+r" (o1)
 87 | 	    , [o2] "+r" (o2)
 88 | 
 89 | 	    : [p0]  "r" (p0)
 90 | 	    , [p1]  "r" (p1)
 91 | 	    , [b0]  "r" (b0)
 92 | 
 93 | 	    : "cc"
 94 | 	);
 95 |     #endif
 96 | #else
 97 |     uint128_t ret = ((uint128_t)a * (uint128_t)b) + (uint128_t)o0 + (uint128_t)o1 +  (((uint128_t)o2)<<32);
 98 |     o0 = (uint32_t)ret;
 99 |     o1 = (uint32_t)(ret >> 32);
100 |     o2 = (uint32_t)(ret >> 64);
101 | #endif
102 | }
103 | 
104 | inline
105 | void umaal96(uint32_t& o0,
106 |              uint32_t& o1,
107 |              uint32_t& o2,
108 |              uint32_t a,
109 |              uint64_t b, 
110 |              uint32_t c)
111 | {
112 |     o0 = c;
113 |     umaal96(o0, o1, o2, a, b);
114 | }
115 | 
116 | #if __arm__
117 | // not needed
118 | #else
119 | 
120 | inline
121 | uint64_t add32(uint32_t* output, const uint32_t* left, const uint32_t* right, int n) {
122 |     uint64_t carry = 0;
123 |     for(int i=0; i<n; i++){
124 |         carry += (uint64_t)left[i] + (uint64_t)right[i];
125 |         output[i] = (uint32_t) carry;
126 |         carry = carry >> 32;
127 |     }
128 |     return carry;
129 | }
130 | 
131 | inline
132 | uint64_t add32(uint32_t* output, const uint32_t* a, const uint32_t* b, const uint32_t* c, int n) {
133 |     uint64_t carry = 0;
134 |     for(int i=0; i<n; i++){
135 |         carry += (uint64_t)a[i] + (uint64_t)b[i] + (uint64_t)c[i];
136 |         output[i] = (uint32_t) carry;
137 |         carry = carry >> 32;
138 |     }
139 |     return carry;
140 | }
141 | #endif
142 | 
143 | inline
144 | uint32_t acc_2_2_1(uint32_t* output, const uint32_t* b, uint32_t c0) {
145 | #if __arm__
146 |     uint32_t carry = 0;
147 |     uint32_t t0 = output[0];
148 |     uint32_t t1 = output[1];
149 |     uint32_t b0 = b[0];
150 |     uint32_t b1 = b[1];
151 |     asm (
152 |         "ADDS %[t0], %[b0]\n\t"
153 |         "ADCS %[t1], %[b1]\n\t"
154 |         "ADC %[carry], #0"
155 |         : [carry] "+r" (carry),
156 |           [t0] "+r" (t0),
157 |           [t1] "+r" (t1)
158 |         : [b0] "r" (b0),
159 |           [b1] "r" (b1)
160 |     );
161 |     asm (
162 |         "ADDS %[t0], %[c0]\n\t"
163 |         "ADCS %[t1], #0\n\t"
164 |         "ADC %[carry], #0"
165 |         : [carry] "+r" (carry),
166 |           [t0] "+r" (t0),
167 |           [t1] "+r" (t1)
168 |         : [c0] "r" (c0)
169 |     );
170 |     output[0] = t0;
171 |     output[1] = t1;
172 |     return carry;
173 | #else
174 |     uint32_t _a[] = {output[0], output[1]};
175 |     uint32_t _b[] = {b[0], b[1]};
176 |     uint32_t _c[] = {c0, 0};
177 |     return add32(output, _a, _b, _c, 2);
178 | #endif
179 | }
180 | 
181 | inline
182 | void add_2_2_1(uint32_t* output, uint32_t* a, const uint32_t* b, uint32_t c0) {
183 | #if __arm__
184 |     uint32_t t0 = 0;
185 |     uint32_t t1 = 0;
186 |     uint32_t a0 = a[0];
187 |     uint32_t a1 = a[1];
188 |     uint32_t b0 = b[0];
189 |     uint32_t b1 = b[1];
190 |     asm (
191 |         "ADDS %[t0], %[a0], %[b0]\n\t"
192 |         "ADC %[t1], %[a1], %[b1]\n\t"
193 |         : [t0] "+r" (t0),
194 |           [t1] "+r" (t1)
195 |         : [a0] "r" (a0),
196 |           [a1] "r" (a1),
197 |           [b0] "r" (b0),
198 |           [b1] "r" (b1)
199 |     );
200 |     asm (
201 |         "ADDS %[t0], %[c0]\n\t"
202 |         "ADC  %[t1], #0 \n\t"
203 |         : [t0] "+r" (t0),
204 |           [t1] "+r" (t1)
205 |         : [c0] "r" (c0)
206 |     );
207 |     output[0] = t0;
208 |     output[1] = t1;
209 | #else
210 |     uint32_t _a[] = {a[0], a[1]};
211 |     uint32_t _b[] = {b[0], b[1]};
212 |     uint32_t _c[] = {c0, 0};
213 |     add32(output, _a, _b, _c, 2);
214 | #endif
215 | }
216 | 
217 | extern "C"
218 | void fp_redc(uint32_t* output, uint32_t* t) {
219 |     const static uint64_t inv = 9586122913090633727ull;
220 |     const static uint64_t modulus32[12] = {
221 |          0x00000001, 0x8508c000,
222 |          0x30000000, 0x170b5d44,
223 |          0xba094800, 0x1ef3622f,
224 |          0x00f5138f, 0x1a22d9f3,
225 |          0x6ca1493b, 0xc63b05c0,
226 |          0x17c510ea, 0x01ae3a46,
227 |     };
228 |     register uint32_t altcarry = 0;
229 | 
230 |     for(int i=0; i<5; ++i){
231 |         uint32_t* r = t + 2*i;
232 |         register uint64_t k = *(uint64_t*)r * inv;
233 |         uint32_t carry[2] = {0};
234 |         uint32_t _;
235 | 
236 |         umaal96(_,     carry[0], carry[1], modulus32[0],  k, r[0]);
237 |         umaal96(_,     carry[0], carry[1], modulus32[1],  k, r[1]);
238 |         umaal96(r[2],  carry[0], carry[1], modulus32[2],  k);
239 |         umaal96(r[3],  carry[0], carry[1], modulus32[3],  k);
240 |         umaal96(r[4],  carry[0], carry[1], modulus32[4],  k);
241 |         umaal96(r[5],  carry[0], carry[1], modulus32[5],  k);
242 |         umaal96(r[6],  carry[0], carry[1], modulus32[6],  k);
243 |         umaal96(r[7],  carry[0], carry[1], modulus32[7],  k);
244 |         umaal96(r[8],  carry[0], carry[1], modulus32[8],  k);
245 |         umaal96(r[9],  carry[0], carry[1], modulus32[9],  k);
246 |         umaal96(r[10], carry[0], carry[1], modulus32[10], k);
247 |         umaal96(r[11], carry[0], carry[1], modulus32[11], k);
248 |         altcarry = acc_2_2_1(&r[12], carry, altcarry);
249 |     }
250 | 
251 |     {
252 |         uint32_t* r = t + 10;
253 |         uint64_t k = *(uint64_t*)r * inv;
254 |         uint32_t carry[2] = {0};
255 |         uint32_t _;
256 | 
257 |         umaal96(_,         carry[0], carry[1], modulus32[0],  k, r[0]);
258 |         umaal96(_,         carry[0], carry[1], modulus32[1],  k, r[1]);
259 |         umaal96(output[0], carry[0], carry[1], modulus32[2],  k, r[2]);
260 |         umaal96(output[1], carry[0], carry[1], modulus32[3],  k, r[3]);
261 |         umaal96(output[2], carry[0], carry[1], modulus32[4],  k, r[4]);
262 |         umaal96(output[3], carry[0], carry[1], modulus32[5],  k, r[5]);
263 |         umaal96(output[4], carry[0], carry[1], modulus32[6],  k, r[6]);
264 |         umaal96(output[5], carry[0], carry[1], modulus32[7],  k, r[7]);
265 |         umaal96(output[6], carry[0], carry[1], modulus32[8],  k, r[8]);
266 |         umaal96(output[7], carry[0], carry[1], modulus32[9],  k, r[9]);
267 |         umaal96(output[8], carry[0], carry[1], modulus32[10], k, r[10]);
268 |         umaal96(output[9], carry[0], carry[1], modulus32[11], k, r[11]);
269 |         add_2_2_1(&output[10], &r[12], carry, altcarry);
270 |     }
271 | }
272 | 


--------------------------------------------------------------------------------
/bls12_377/src/fp_mont.h:
--------------------------------------------------------------------------------
 1 | #ifndef FPC_H
 2 | #define FPC_H
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | #if defined(__cplusplus) || defined(c_plusplus)
 7 | extern "C" {
 8 | #endif
 9 | 
10 | // Montgomery reduction function
11 | 
12 | void fp_redc(uint32_t *, uint32_t *);
13 | 
14 | #if defined(__cplusplus) || defined(c_plusplus)
15 | }
16 | #endif
17 | 
18 | #endif // FPC_H
19 | 


--------------------------------------------------------------------------------
/bls12_377/src/fpc.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdint.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | 
  5 | #define uint128_t __uint128_t
  6 | 
  7 | #define restrict
  8 | 
  9 | inline
 10 | void umaal96(uint32_t& restrict o0,
 11 |              uint32_t& restrict o1,
 12 |              uint32_t& restrict o2,
 13 |              uint32_t a,
 14 |              uint64_t b) {
 15 | // multiply a32 * b64 and accumulate in the 96 bit value stored in [o0, o1, o2]
 16 | // o0 is the existing 32 bit value
 17 | // o1,o2 is the 64 bit carry
 18 | #if __arm__
 19 |     const uint32_t b0 = (uint32_t) b;
 20 |     const uint32_t b1 = (uint32_t) (b>>32);
 21 |     #ifdef HAVE_UMAAL
 22 |         asm (
 23 |             "UMAAL %[o0], %[o1], %[a0], %[b0]"
 24 |             : [o0] "+r" (o0),
 25 |             [o1] "+r" (o1)
 26 |             : [a0] "r" (a),
 27 |             [b0] "r" (b0)
 28 |         );
 29 | 
 30 |         asm (
 31 |             "UMAAL %[o1], %[o2], %[a0], %[b1]"
 32 |             : [o1] "+r" (o1),
 33 |             [o2] "+r" (o2)
 34 |             : [a0] "r" (a),
 35 |             [b1] "r" (b1)
 36 |         );
 37 |     #else
 38 |         asm (
 39 |             "ADDS %[o0], %[o1]\n\t"
 40 |             "MOV  %[o1], #0\n\t"
 41 |             "UMLAL %[o0], %[o1], %[a0], %[b0]\n\t"
 42 |             "ADCS %[o1], %[o2]\n\t"
 43 |             "MOV  %[o2], #0\n\t"
 44 |             "ADC  %[o2], #0\n\t"
 45 |             "UMLAL %[o1], %[o2], %[a0], %[b1]\n\t"
 46 |             : [o0] "+r" (o0),
 47 |               [o1] "+r" (o1),
 48 |               [o2] "+r" (o2)
 49 |             : [a0] "r" (a),
 50 |               [b0] "r" (b0),
 51 |               [b1] "r" (b1)
 52 |         );
 53 |     #endif
 54 | #else
 55 |     uint128_t ret = ((uint128_t)a * (uint128_t)b) + (uint128_t)o0 + (uint128_t)o1 +  + (((uint128_t)o2)<<32);
 56 |     o0 = (uint32_t)ret;
 57 |     o1 = (uint32_t)(ret >> 32);
 58 |     o2 = (uint32_t)(ret >> 64);
 59 | #endif
 60 | }
 61 | 
 62 | inline
 63 | void umaal96(uint32_t& restrict o0,
 64 |              uint32_t& restrict o1,
 65 |              uint32_t& restrict o2,
 66 |              uint32_t a,
 67 |              uint64_t b, 
 68 |              uint32_t c)
 69 | {
 70 |     o0 = c;
 71 |     umaal96(o0, o1, o2, a, b);
 72 | }
 73 | 
 74 | 
 75 | inline
 76 | void umaal96(uint32_t& restrict o0,
 77 |              uint32_t& restrict o1,
 78 |              uint32_t& restrict o2,
 79 |              uint32_t a,
 80 |              uint64_t b, 
 81 |              uint32_t c,
 82 |              uint32_t d)
 83 | {
 84 |     o0 = c;
 85 |     o1 = d;
 86 |     o2 = 0;
 87 |     umaal96(o0, o1, o2, a, b);
 88 | }
 89 | 
 90 | inline
 91 | void umull96(uint32_t& restrict o0,
 92 |              uint32_t& restrict o1,
 93 |              uint32_t& restrict o2,
 94 |              uint32_t a,
 95 |              uint64_t b) {
 96 |     o0 = 0;
 97 |     o1 = 0;
 98 |     o2 = 0;
 99 |     umaal96(o0, o1, o2, a, b);
100 | }
101 | 
102 | 
103 | inline
104 | void umlal96(uint32_t& restrict o0,
105 |              uint32_t& restrict o1,
106 |              uint32_t& restrict o2,
107 |              uint32_t a,
108 |              uint64_t b) {
109 |     o1 = 0;
110 |     o2 = 0;
111 |     umaal96(o0, o1, o2, a, b);
112 | }
113 | 
114 | #if __arm__
115 | // not needed
116 | #else
117 | 
118 | inline
119 | uint64_t add32(uint32_t* output, const uint32_t* left, const uint32_t* right, int n) {
120 |     uint64_t carry = 0;
121 |     for(int i=0; i<n; i++){
122 |         carry += (uint64_t)left[i] + (uint64_t)right[i];
123 |         output[i] = (uint32_t) carry;
124 |         carry = carry >> 32;
125 |     }
126 |     return carry;
127 | }
128 | 
129 | inline
130 | uint64_t add32(uint32_t* output, const uint32_t* a, const uint32_t* b, const uint32_t* c, int n) {
131 |     uint64_t carry = 0;
132 |     for(int i=0; i<n; i++){
133 |         carry += (uint64_t)a[i] + (uint64_t)b[i] + (uint64_t)c[i];
134 |         output[i] = (uint32_t) carry;
135 |         carry = carry >> 32;
136 |     }
137 |     return carry;
138 | }
139 | 
140 | #endif
141 | 
142 | inline
143 | uint32_t acc_2_2_1(uint32_t* restrict output, const uint32_t* restrict b, uint32_t c0) {
144 | #if __arm__
145 |     uint32_t carry = 0;
146 |     uint32_t t0 = output[0];
147 |     uint32_t t1 = output[1];
148 |     uint32_t b0 = b[0];
149 |     uint32_t b1 = b[1];
150 |     asm (
151 |         "ADDS %[t0], %[b0]\n\t"
152 |         "ADCS %[t1], %[b1]\n\t"
153 |         "ADC %[carry], #0"
154 |         : [carry] "+r" (carry),
155 |           [t0] "+r" (t0),
156 |           [t1] "+r" (t1)
157 |         : [b0] "r" (b0),
158 |           [b1] "r" (b1)
159 |     );
160 |     asm (
161 |         "ADDS %[t0], %[c0]\n\t"
162 |         "ADCS %[t1], #0\n\t"
163 |         "ADC %[carry], #0"
164 |         : [carry] "+r" (carry),
165 |           [t0] "+r" (t0),
166 |           [t1] "+r" (t1)
167 |         : [c0] "r" (c0)
168 |     );
169 |     output[0] = t0;
170 |     output[1] = t1;
171 |     return carry;
172 | #else
173 |     uint32_t _a[] = {output[0], output[1]};
174 |     uint32_t _b[] = {b[0], b[1]};
175 |     uint32_t _c[] = {c0, 0};
176 |     return add32(output, _a, _b, _c, 2);
177 | #endif
178 | }
179 | 
180 | inline
181 | void add_2_2_1(uint32_t* restrict output, uint32_t* restrict a, const uint32_t* restrict b, uint32_t c0) {
182 | #if __arm__
183 |     uint32_t t0 = 0;
184 |     uint32_t t1 = 0;
185 |     uint32_t a0 = a[0];
186 |     uint32_t a1 = a[1];
187 |     uint32_t b0 = b[0];
188 |     uint32_t b1 = b[1];
189 |     asm (
190 |         "ADDS %[t0], %[a0], %[b0]\n\t"
191 |         "ADC %[t1], %[a1], %[b1]\n\t"
192 |         : [t0] "+r" (t0),
193 |           [t1] "+r" (t1)
194 |         : [a0] "r" (a0),
195 |           [a1] "r" (a1),
196 |           [b0] "r" (b0),
197 |           [b1] "r" (b1)
198 |     );
199 |     asm (
200 |         "ADDS %[t0], %[c0]\n\t"
201 |         "ADC  %[t1], #0 \n\t"
202 |         : [t0] "+r" (t0),
203 |           [t1] "+r" (t1)
204 |         : [c0] "r" (c0)
205 |     );
206 |     output[0] = t0;
207 |     output[1] = t1;
208 | #else
209 |     uint32_t _a[] = {a[0], a[1]};
210 |     uint32_t _b[] = {b[0], b[1]};
211 |     uint32_t _c[] = {c0, 0};
212 |     add32(output, _a, _b, _c, 2);
213 | #endif
214 | }
215 | 
216 | inline
217 | void mul_hybrid(uint32_t* restrict output, const uint64_t* restrict left, const uint32_t* restrict right) {
218 |     register uint32_t carry0;
219 |     register uint32_t carry1;
220 |     uint32_t o0;
221 |     uint32_t o1;
222 |     uint32_t o2;
223 |     uint32_t o3;
224 |     uint32_t o4;
225 |     uint32_t o5;
226 |     uint32_t o6;
227 |     uint32_t o7;
228 |     uint32_t o8;
229 |     uint32_t o9;
230 |     uint32_t o10;
231 |     uint32_t o11;
232 | 
233 |     register uint64_t val = left[0];
234 |     umull96(o0,  carry0, carry1, right[0],  val);
235 |     output[0] = o0;
236 |     umaal96(o1,  carry0, carry1, right[1],  val, 0);
237 |     output[1] = o1;
238 |     umaal96(o2,  carry0, carry1, right[2],  val, 0);
239 |     umaal96(o3,  carry0, carry1, right[3],  val, 0);
240 |     umaal96(o4,  carry0, carry1, right[4],  val, 0);
241 |     umaal96(o5,  carry0, carry1, right[5],  val, 0);
242 |     umaal96(o6,  carry0, carry1, right[6],  val, 0);
243 |     umaal96(o7,  carry0, carry1, right[7],  val, 0);
244 |     umaal96(o8,  carry0, carry1, right[8],  val, 0);
245 |     umaal96(o9,  carry0, carry1, right[9],  val, 0);
246 |     umaal96(o10, carry0, carry1, right[10], val, 0);
247 |     umaal96(o11, carry0, carry1, right[11], val, 0);
248 |     uint32_t o12 = carry0;
249 |     uint32_t o13 = carry1;
250 | 
251 |     val = left[1];
252 |     umlal96(o2,  carry0, carry1, right[0],  val);
253 |     output[2] = o2;
254 |     umaal96(o3,  carry0, carry1, right[1],  val);
255 |     output[3] = o3;
256 |     umaal96(o4,  carry0, carry1, right[2],  val);
257 |     umaal96(o5,  carry0, carry1, right[3],  val);
258 |     umaal96(o6,  carry0, carry1, right[4],  val);
259 |     umaal96(o7,  carry0, carry1, right[5],  val);
260 |     umaal96(o8,  carry0, carry1, right[6],  val);
261 |     umaal96(o9,  carry0, carry1, right[7],  val);
262 |     umaal96(o10, carry0, carry1, right[8],  val);
263 |     umaal96(o11, carry0, carry1, right[9],  val);
264 |     umaal96(o12, carry0, carry1, right[10], val);
265 |     umaal96(o13, carry0, carry1, right[11], val);
266 |     uint32_t o14 = carry0;
267 |     uint32_t o15 = carry1;
268 | 
269 |     val = left[2];
270 |     umlal96(o4,  carry0, carry1, right[0],  val);
271 |     output[4] = o4;
272 |     umaal96(o5,  carry0, carry1, right[1],  val);
273 |     output[5] = o5;
274 |     umaal96(o6,  carry0, carry1, right[2],  val);
275 |     umaal96(o7,  carry0, carry1, right[3],  val);
276 |     umaal96(o8,  carry0, carry1, right[4],  val);
277 |     umaal96(o9,  carry0, carry1, right[5],  val);
278 |     umaal96(o10, carry0, carry1, right[6],  val);
279 |     umaal96(o11, carry0, carry1, right[7],  val);
280 |     umaal96(o12, carry0, carry1, right[8],  val);
281 |     umaal96(o13, carry0, carry1, right[9],  val);
282 |     umaal96(o14, carry0, carry1, right[10], val);
283 |     umaal96(o15, carry0, carry1, right[11], val);
284 |     uint32_t o16 = carry0;
285 |     uint32_t o17 = carry1;
286 | 
287 | 
288 |     val = left[3];
289 |     umlal96(o6,  carry0, carry1, right[0],  val);
290 |     output[6] = o6;
291 |     umaal96(o7,  carry0, carry1, right[1],  val);
292 |     output[7] = o7;
293 |     umaal96(o8,  carry0, carry1, right[2],  val);
294 |     umaal96(o9,  carry0, carry1, right[3],  val);
295 |     umaal96(o10, carry0, carry1, right[4],  val);
296 |     umaal96(o11, carry0, carry1, right[5],  val);
297 |     umaal96(o12, carry0, carry1, right[6],  val);
298 |     umaal96(o13, carry0, carry1, right[7],  val);
299 |     umaal96(o14, carry0, carry1, right[8],  val);
300 |     umaal96(o15, carry0, carry1, right[9],  val);
301 |     umaal96(o16, carry0, carry1, right[10], val);
302 |     umaal96(o17, carry0, carry1, right[11], val);
303 |     uint32_t o18 = carry0;
304 |     uint32_t o19 = carry1;
305 | 
306 |     val = left[4];
307 |     umlal96(o8,  carry0, carry1, right[0],  val);
308 |     output[8] = o8;
309 |     umaal96(o9,  carry0, carry1, right[1],  val);
310 |     output[9] = o9;
311 |     umaal96(o10, carry0, carry1, right[2],  val);
312 |     umaal96(o11, carry0, carry1, right[3],  val);
313 |     umaal96(o12, carry0, carry1, right[4],  val);
314 |     umaal96(o13, carry0, carry1, right[5],  val);
315 |     umaal96(o14, carry0, carry1, right[6],  val);
316 |     umaal96(o15, carry0, carry1, right[7],  val);
317 |     umaal96(o16, carry0, carry1, right[8],  val);
318 |     umaal96(o17, carry0, carry1, right[9],  val);
319 |     umaal96(o18, carry0, carry1, right[10], val);
320 |     umaal96(o19, carry0, carry1, right[11], val);
321 |     uint32_t o20 = carry0;
322 |     uint32_t o21 = carry1;
323 | 
324 |     val = left[5];
325 |     umlal96(o10, carry0, carry1, right[0],  val);
326 |     output[10] = o10;
327 |     umaal96(o11, carry0, carry1, right[1],  val);
328 |     output[11] = o11;
329 |     umaal96(o12, carry0, carry1, right[2],  val);
330 |     output[12] = o12;
331 |     umaal96(o13, carry0, carry1, right[3],  val);
332 |     output[13] = o13;
333 |     umaal96(o14, carry0, carry1, right[4],  val);
334 |     output[14] = o14;
335 |     umaal96(o15, carry0, carry1, right[5],  val);
336 |     output[15] = o15;
337 |     umaal96(o16, carry0, carry1, right[6],  val);
338 |     output[16] = o16;
339 |     umaal96(o17, carry0, carry1, right[7],  val);
340 |     output[17] = o17;
341 |     umaal96(o18, carry0, carry1, right[8],  val);
342 |     output[18] = o18;
343 |     umaal96(o19, carry0, carry1, right[9],  val);
344 |     output[19] = o19;
345 |     umaal96(o20, carry0, carry1, right[10], val);
346 |     output[20] = o20;
347 |     umaal96(o21, carry0, carry1, right[11], val);
348 |     output[21] = o21;
349 |     output[22] = carry0;
350 |     output[23] = carry1;
351 | }
352 | 
353 | void montgomery_reduce(uint32_t* restrict output, uint32_t* t) {
354 |     const static uint64_t inv = 9586122913090633727ull;
355 |     const static uint64_t modulus32[12] = {
356 |          0x00000001, 0x8508c000,
357 |          0x30000000, 0x170b5d44,
358 |          0xba094800, 0x1ef3622f,
359 |          0x00f5138f, 0x1a22d9f3,
360 |          0x6ca1493b, 0xc63b05c0,
361 |          0x17c510ea, 0x01ae3a46,
362 |     };
363 |     register uint32_t altcarry = 0;
364 | 
365 |     for(int i=0; i<5; ++i){
366 |         uint32_t* r = t + 2*i;
367 |         register uint64_t k = *(uint64_t*)r * inv;
368 |         register uint32_t carry[2] = {0};
369 |         uint32_t _;
370 | 
371 |         umaal96(_,     carry[0], carry[1], modulus32[0],  k, r[0]);
372 |         umaal96(_,     carry[0], carry[1], modulus32[1],  k, r[1]);
373 |         umaal96(r[2],  carry[0], carry[1], modulus32[2],  k);
374 |         umaal96(r[3],  carry[0], carry[1], modulus32[3],  k);
375 |         umaal96(r[4],  carry[0], carry[1], modulus32[4],  k);
376 |         umaal96(r[5],  carry[0], carry[1], modulus32[5],  k);
377 |         umaal96(r[6],  carry[0], carry[1], modulus32[6],  k);
378 |         umaal96(r[7],  carry[0], carry[1], modulus32[7],  k);
379 |         umaal96(r[8],  carry[0], carry[1], modulus32[8],  k);
380 |         umaal96(r[9],  carry[0], carry[1], modulus32[9],  k);
381 |         umaal96(r[10], carry[0], carry[1], modulus32[10], k);
382 |         umaal96(r[11], carry[0], carry[1], modulus32[11], k);
383 |         altcarry = acc_2_2_1(&r[12], carry, altcarry);
384 |     }
385 | 
386 |     {
387 |         uint32_t* r = t + 10;
388 |         uint64_t k = *(uint64_t*)r * inv;
389 |         uint32_t carry[2] = {0};
390 |         uint32_t _;
391 | 
392 |         umaal96(_,         carry[0], carry[1], modulus32[0],  k, r[0]);
393 |         umaal96(_,         carry[0], carry[1], modulus32[1],  k, r[1]);
394 |         umaal96(output[0], carry[0], carry[1], modulus32[2],  k, r[2]);
395 |         umaal96(output[1], carry[0], carry[1], modulus32[3],  k, r[3]);
396 |         umaal96(output[2], carry[0], carry[1], modulus32[4],  k, r[4]);
397 |         umaal96(output[3], carry[0], carry[1], modulus32[5],  k, r[5]);
398 |         umaal96(output[4], carry[0], carry[1], modulus32[6],  k, r[6]);
399 |         umaal96(output[5], carry[0], carry[1], modulus32[7],  k, r[7]);
400 |         umaal96(output[6], carry[0], carry[1], modulus32[8],  k, r[8]);
401 |         umaal96(output[7], carry[0], carry[1], modulus32[9],  k, r[9]);
402 |         umaal96(output[8], carry[0], carry[1], modulus32[10], k, r[10]);
403 |         umaal96(output[9], carry[0], carry[1], modulus32[11], k, r[11]);
404 |         add_2_2_1(&output[10], &r[12], carry, altcarry);
405 |     }
406 | }
407 | 
408 | extern "C" void c_mul(uint64_t* restrict output, const uint64_t* restrict left, const uint64_t* restrict right) {
409 |     mul_hybrid((uint32_t*)output, left, (const uint32_t*)right);
410 | }
411 | 
412 | extern "C" void c_montgomry(uint64_t* restrict output, uint64_t* restrict tmp) {
413 |     montgomery_reduce((uint32_t*)output, (uint32_t*)tmp);
414 | }
415 | 


--------------------------------------------------------------------------------
/bls12_377/src/fq_asm.s:
--------------------------------------------------------------------------------
  1 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  2 | @
  3 | @ Low-level operations on Fq values
  4 | @
  5 | @ Each Fq value is stored as a word-aligned 8-word array
  6 | @
  7 | @ All functions work correctly with repeated arguments,
  8 | @ like e.g. fq_sum(x, x, x)
  9 | @
 10 | @ All functions should take constant time on ARM SC300
 11 | @
 12 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 13 | 
 14 | .text
 15 | 
 16 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 17 | @
 18 | @ fq_is_zero
 19 | @
 20 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 21 | 
 22 | .align 3
 23 | .global fq_is_zero
 24 | .syntax unified
 25 | .thumb
 26 | .thumb_func
 27 | .type fq_is_zero,	%function
 28 | 
 29 | fq_is_zero:
 30 | 	ldr	r1, [r0,  #0]
 31 | 
 32 | 	ldr	r2, [r0,  #4];	ldr	r3, [r0,  #8];	orr	r1, r2;	orr	r1, r3
 33 | 	ldr	r2, [r0, #12];	ldr	r3, [r0, #16];	orr	r1, r2;	orr	r1, r3
 34 | 	ldr	r2, [r0, #20];	ldr	r3, [r0, #24];	orr	r1, r2;	orr	r1, r3
 35 | 
 36 | 	ldr	r2, [r0, #20];	orrs	r1, r2
 37 | 
 38 | 	ite	eq
 39 | 	moveq	r0, #1
 40 | 	movne	r0, #0
 41 | 
 42 | 	bx	lr
 43 | 
 44 | .size fq_is_zero, . - fq_is_zero
 45 | 
 46 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 47 | @
 48 | @ fq_cpy: Copy
 49 | @
 50 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 51 | 
 52 | .align 3
 53 | .global fq_cpy
 54 | .syntax unified
 55 | .thumb
 56 | .thumb_func
 57 | .type fq_cpy,	%function
 58 | 
 59 | fq_cpy:
 60 | 	ldr	r2, [r1,  #0]; ldr	r3, [r1,  #4]
 61 | 	str	r2, [r0,  #0]; str	r3, [r0,  #4]
 62 | 
 63 | 	ldr	r2, [r1,  #8]; ldr	r3, [r1, #12]
 64 | 	str	r2, [r0,  #8]; str	r3, [r0, #12]
 65 | 
 66 | 	ldr	r2, [r1, #16]; ldr	r3, [r1, #20]
 67 | 	str	r2, [r0, #16]; str	r3, [r0, #20]
 68 | 
 69 | 	ldr	r2, [r1, #24]; ldr	r3, [r1, #28]
 70 | 	str	r2, [r0, #24]; str	r3, [r0, #28]
 71 | 
 72 | 	bx	lr
 73 | 
 74 | .size fq_cpy, . - fq_cpy
 75 | 
 76 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 77 | @
 78 | @ fq_eq: Check two Fq values for equality
 79 | @
 80 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 81 | 
 82 | .align 3
 83 | .global fq_eq
 84 | .syntax unified
 85 | .thumb
 86 | .thumb_func
 87 | .type fq_eq,	%function
 88 | 
 89 | fq_eq:
 90 | 	sub	sp, #4
 91 | 
 92 | 	ldr	r2, [r0,  #0]
 93 | 	ldr	r3, [r1,  #0]
 94 | 	str	r4, [sp]
 95 | 	sub	r2, r3
 96 | 
 97 | 	ldr	r3, [r0,  #4];	ldr	r4, [r1,  #4];	sub	r3, r4;	orr	r2, r3 
 98 | 	ldr	r3, [r0,  #8];	ldr	r4, [r1,  #8];	sub	r3, r4;	orr	r2, r3 
 99 | 	ldr	r3, [r0, #12];	ldr	r4, [r1, #12];	sub	r3, r4;	orr	r2, r3 
100 | 	ldr	r3, [r0, #16];	ldr	r4, [r1, #16];	sub	r3, r4;	orr	r2, r3 
101 | 	ldr	r3, [r0, #20];	ldr	r4, [r1, #20];	sub	r3, r4;	orr	r2, r3 
102 | 	ldr	r3, [r0, #24];	ldr	r4, [r1, #24];	sub	r3, r4;	orr	r2, r3 
103 | 
104 | 	ldr	r4, [sp]
105 | 
106 | 	ldr	r0, [r0, #28];	ldr	r1, [r1, #28];	sub	r0, r1;	orrs	r0, r2 
107 | 
108 | 	ite	eq
109 | 	moveq	r0, #1
110 | 	movne	r0, #0
111 | 
112 | 	add	sp, #4
113 | 
114 | 	bx	lr
115 | 
116 | .size fq_eq, . - fq_eq
117 | 
118 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
119 | @
120 | @ fq_neg: Negate an Fq value
121 | @
122 | @ x = (y != 0) ? (q - y) : 0
123 | @
124 | @  x  = (y == 0) ? q : y
125 | @  x  = -x
126 | @  x += q
127 | @
128 | @  Note: -x == ~x + 1
129 | @
130 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
131 | 
132 | .align 3
133 | .global fq_neg
134 | .syntax unified
135 | .thumb
136 | .thumb_func
137 | .type fq_neg,	%function
138 | 
139 | fq_neg:
140 | 	push	{ r4-r8, lr }
141 | 	ldm	r1, { r1-r8 }
142 | 
143 | 	@ lr = (y == 0)
144 | 
145 | 	orr	lr, r1, r2
146 | 	orr	lr, r3
147 | 	orr	lr, r4
148 | 	orr	lr, r5
149 | 	orr	lr, r6
150 | 	orr	lr, r7
151 | 	orrs	lr, r8	@ Z = (lr == 0) ? 1 : 0
152 | 
153 | 	@ x = Z ? q : y
154 | 
155 | 	itttt	eq
156 | 	moveq	r1, #0x00000001
157 | 	moveq	r2, #0x8000
158 | 	movteq	r2, #0x0A11
159 | 	moveq	r3, #0x0001
160 | 
161 | 	itttt	eq
162 | 	movteq	r3, #0xD000
163 | 	moveq	r4, #0x76FE
164 | 	movteq	r4, #0x59AA
165 | 	moveq	r5, #0xB001
166 | 
167 | 	itttt	eq
168 | 	movteq	r5, #0x5C37
169 | 	moveq	r6, #0x4D1E
170 | 	movteq	r6, #0x60B4
171 | 	moveq	r7, #0xA556
172 | 
173 | 	ittt	eq
174 | 	movteq	r7, #0x9A2C
175 | 	moveq	r8, #0x655E
176 | 	movteq	r8, #0x12AB
177 | 
178 | 	@ x = ~x
179 | 
180 | 	mvn	 r1,  r1
181 | 	mvn	 r2,  r2
182 | 	mvn	 r3,  r3
183 | 	mvn	 r4,  r4
184 | 	mvn	 r5,  r5
185 | 	mvn	 r6,  r6
186 | 	mvn	 r7,  r7
187 | 	mvn	 r8,  r8
188 | 
189 | 	@ x += 1
190 | 
191 | 	adds	 r1, #1
192 | 	adcs	 r2, #0
193 | 	adcs	 r3, #0
194 | 	adcs	 r4, #0
195 | 	adcs	 r5, #0
196 | 	adcs	 r6, #0
197 | 	adcs	 r7, #0
198 | 	adc	 r8, #0
199 | 
200 | 	@ x += q
201 | 
202 | 	adds	r1, #0x00000001
203 | 
204 | 	mov	lr, #0x8000;	movt	lr, #0x0A11;	adcs	 r2, lr
205 | 	mov	lr, #0x0001;	movt	lr, #0xD000;	adcs	 r3, lr
206 | 	mov	lr, #0x76FE;	movt	lr, #0x59AA;	adcs	 r4, lr
207 | 	mov	lr, #0xB001;	movt	lr, #0x5C37;	adcs	 r5, lr
208 | 	mov	lr, #0x4D1E;	movt	lr, #0x60B4;	adcs	 r6, lr
209 | 	mov	lr, #0xA556;	movt	lr, #0x9A2C;	adcs	 r7, lr
210 | 	mov	lr, #0x655E;	movt	lr, #0x12AB;	adcs	 r8, lr
211 | 
212 | 	stm	r0, { r1-r8 }
213 | 	pop	    { r4-r8, pc }
214 | 
215 | .size fq_neg, . - fq_neg
216 | 
217 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
218 | @
219 | @ fq_diff: x = y - z (mod q)
220 | @
221 | @  x = y - z
222 | @  store x
223 | @
224 | @  C = (x < 0); x += q
225 | @  if (C)	/* carry, because x was < 0 */
226 | @    store x
227 | @
228 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
229 | 
230 | .align 3
231 | .global fq_diff
232 | .syntax unified
233 | .thumb
234 | .thumb_func
235 | .type fq_diff,	%function
236 | 
237 | fq_diff:
238 | 	push	{ r4-r11, lr }
239 | 
240 | 	@ x = y - z
241 | 
242 | 	ldr	r14, [r1,  #0]
243 | 	ldr	r12, [r1,  #4]
244 | 	ldr	r11, [r1,  #8]
245 | 	ldr	r10, [r1, #12]
246 | 	ldr	 r9, [r1, #16]
247 | 
248 | 	ldr	 r8, [r2,  #0]
249 | 	ldr	 r7, [r2,  #4]
250 | 	ldr	 r6, [r2,  #8]
251 | 	ldr	 r5, [r2, #12]
252 | 	ldr	 r4, [r2, #16]
253 | 
254 | 	subs	r14,  r8
255 | 	sbcs	r12,  r7
256 | 	sbcs	r11,  r6
257 | 	sbcs	r10,  r5
258 | 	sbcs	 r9,  r4
259 | 
260 | 	ldr	 r8, [r1, #20]
261 | 	ldr	 r7, [r1, #24]
262 | 	ldr	 r6, [r1, #28]
263 | 
264 | 	ldr	 r5, [r2, #20]
265 | 	ldr	 r4, [r2, #24]
266 | 	ldr	 r3, [r2, #28]
267 | 
268 | 	str	r14, [r0,  #0]	@ store early (0 cycles)
269 | 
270 | 	sbcs	 r8,  r5
271 | 	sbcs	 r7,  r4
272 | 	sbcs	 r6,  r3
273 | 
274 | 	str	r12, [r0,  #4]
275 | 	str	r11, [r0,  #8]
276 | 	str	r10, [r0, #12]
277 | 	str	 r9, [r0, #16]
278 | 	str	 r8, [r0, #20]
279 | 	str	 r7, [r0, #24]
280 | 	str	 r6, [r0, #28]
281 | 
282 | 	@ x += q, using r1 as temporary, x in { r14, r12-r6 }
283 | 
284 | 	adds	r14, #0x00000001
285 | 
286 | 	mov	r1, #0x8000;	movt	r1, #0x0A11;	adcs	r12, r1
287 | 	mov	r1, #0x0001;	movt	r1, #0xD000;	adcs	r11, r1
288 | 	mov	r1, #0x76FE;	movt	r1, #0x59AA;	adcs	r10, r1
289 | 	mov	r1, #0xB001;	movt	r1, #0x5C37;	adcs	 r9, r1
290 | 	mov	r1, #0x4D1E;	movt	r1, #0x60B4;	adcs	 r8, r1
291 | 	mov	r1, #0xA556;	movt	r1, #0x9A2C;	adcs	 r7, r1
292 | 	mov	r1, #0x655E;	movt	r1, #0x12AB;	adcs	 r6, r1
293 | 
294 | 	itttt	cs	@ carry set => x was negative => we need to store x
295 | 	strcs	r14, [r0,  #0]
296 | 	strcs	r12, [r0,  #4]
297 | 	strcs	r11, [r0,  #8]
298 | 	strcs	r10, [r0, #12]
299 | 	itttt	cs
300 | 	strcs	 r9, [r0, #16]
301 | 	strcs	 r8, [r0, #20]
302 | 	strcs	 r7, [r0, #24]
303 | 	strcs	 r6, [r0, #28]
304 | 
305 | 	pop	{ r4-r11, pc }
306 | 
307 | .size fq_diff, . - fq_diff
308 | 
309 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
310 | @
311 | @ fq_sum: x = y + z (mod q)
312 | @
313 | @  x = y + z
314 | @  store x
315 | @
316 | @  C = (x >= q); x -= q
317 | @  if (C)
318 | @    store x
319 | @
320 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
321 | 
322 | .align 3
323 | .global fq_sum
324 | .syntax unified
325 | .thumb
326 | .thumb_func
327 | .type fq_sum,	%function
328 | 
329 | fq_sum:
330 | 	push	    { r4-r11, lr }
331 | 
332 | 	@ x = y + z
333 | 
334 | 	ldr	r14, [r1,  #0]
335 | 	ldr	r12, [r1,  #4]
336 | 	ldr	r11, [r1,  #8]
337 | 	ldr	r10, [r1, #12]
338 | 	ldr	 r9, [r1, #16]
339 | 
340 | 	ldr	 r8, [r2,  #0]
341 | 	ldr	 r7, [r2,  #4]
342 | 	ldr	 r6, [r2,  #8]
343 | 	ldr	 r5, [r2, #12]
344 | 	ldr	 r4, [r2, #16]
345 | 
346 | 	adds	r14,  r8
347 | 	adcs	r12,  r7
348 | 	adcs	r11,  r6
349 | 	adcs	r10,  r5
350 | 	adcs	 r9,  r4
351 | 
352 | 	ldr	 r8, [r1, #20]
353 | 	ldr	 r7, [r1, #24]
354 | 	ldr	 r6, [r1, #28]
355 | 
356 | 	ldr	 r5, [r2, #20]
357 | 	ldr	 r4, [r2, #24]
358 | 	ldr	 r3, [r2, #28]
359 | 
360 | 	str	r14, [r0,  #0]	@ store early (0 cycles)
361 | 
362 | 	adcs	 r8,  r5
363 | 	adcs	 r7,  r4
364 | 	adcs	 r6,  r3
365 | 
366 | 	str	r12, [r0,  #4]
367 | 	str	r11, [r0,  #8]
368 | 	str	r10, [r0, #12]
369 | 	str	 r9, [r0, #16]
370 | 	str	 r8, [r0, #20]
371 | 	str	 r7, [r0, #24]
372 | 	str	 r6, [r0, #28]
373 | 
374 | 	@ x -= q, using r1 as temporary, x in { r14, r12-r6 }
375 | 
376 | 	subs	r14, #0x00000001
377 | 
378 | 	mov	r1, #0x8000;	movt	r1, #0x0A11;	sbcs	r12, r1
379 | 	mov	r1, #0x0001;	movt	r1, #0xD000;	sbcs	r11, r1
380 | 	mov	r1, #0x76FE;	movt	r1, #0x59AA;	sbcs	r10, r1
381 | 	mov	r1, #0xB001;	movt	r1, #0x5C37;	sbcs	 r9, r1
382 | 	mov	r1, #0x4D1E;	movt	r1, #0x60B4;	sbcs	 r8, r1
383 | 	mov	r1, #0xA556;	movt	r1, #0x9A2C;	sbcs	 r7, r1
384 | 	mov	r1, #0x655E;	movt	r1, #0x12AB;	sbcs	 r6, r1
385 | 
386 | 	itttt	cs	@ carry set == no borrow => x was >= q => we need to store x
387 | 	strcs	r14, [r0,  #0]
388 | 	strcs	r12, [r0,  #4]
389 | 	strcs	r11, [r0,  #8]
390 | 	strcs	r10, [r0, #12]
391 | 	itttt	cs
392 | 	strcs	 r9, [r0, #16]
393 | 	strcs	 r8, [r0, #20]
394 | 	strcs	 r7, [r0, #24]
395 | 	strcs	 r6, [r0, #28]
396 | 
397 | 	pop	    { r4-r11, pc }
398 | 
399 | .size fq_sum, . - fq_sum
400 | 
401 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
402 | @
403 | @ fq_cset: x = c ? y : x
404 | @
405 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
406 | 
407 | .align 3
408 | .global fq_cset
409 | .syntax unified
410 | .thumb
411 | .thumb_func
412 | .type fq_cset,	%function
413 | 
414 | fq_cset:
415 | 	movs	r2, r2
416 | 
417 | 	ldr	r2, [r1,  #0]
418 | 	ldr	r3, [r1,  #4]
419 | 	itt	ne
420 | 	strne	r2, [r0,  #0]
421 | 	strne	r3, [r0,  #4]
422 | 
423 | 	ldr	r2, [r1,  #8]
424 | 	ldr	r3, [r1, #12]
425 | 	itt	ne
426 | 	strne	r2, [r0,  #8]
427 | 	strne	r3, [r0, #12]
428 | 
429 | 	ldr	r2, [r1, #16]
430 | 	ldr	r3, [r1, #20]
431 | 	itt	ne
432 | 	strne	r2, [r0, #16]
433 | 	strne	r3, [r0, #20]
434 | 
435 | 	ldr	r2, [r1, #24]
436 | 	ldr	r3, [r1, #28]
437 | 	itt	ne
438 | 	strne	r2, [r0, #24]
439 | 	strne	r3, [r0, #28]
440 | 
441 | 	bx	lr
442 | 
443 | .size fq_cset, . - fq_cset
444 | 
445 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
446 | @
447 | @ fq_to_bytes
448 | @
449 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
450 | 
451 | .align 3
452 | .global fq_to_bytes
453 | .syntax unified
454 | .thumb
455 | .thumb_func
456 | .type fq_to_bytes,	%function
457 | 
458 | fq_to_bytes:
459 | 				ldr	r2, [r1, #28]
460 | 	strb	r2, [r0,  #3];	lsrs	r2, #8
461 | 	strb	r2, [r0,  #2];	lsrs	r2, #8
462 | 	strb	r2, [r0,  #1];	lsrs	r2, #8
463 | 	strb	r2, [r0,  #0];	ldr	r2, [r1, #24]
464 | 
465 | 	strb	r2, [r0,  #7];	lsrs	r2, #8
466 | 	strb	r2, [r0,  #6];	lsrs	r2, #8
467 | 	strb	r2, [r0,  #5];	lsrs	r2, #8
468 | 	strb	r2, [r0,  #4];	ldr	r2, [r1, #20]
469 | 
470 | 	strb	r2, [r0, #11];	lsrs	r2, #8
471 | 	strb	r2, [r0, #10];	lsrs	r2, #8
472 | 	strb	r2, [r0,  #9];	lsrs	r2, #8
473 | 	strb	r2, [r0,  #8];	ldr	r2, [r1, #16]
474 | 
475 | 	strb	r2, [r0, #15];	lsrs	r2, #8
476 | 	strb	r2, [r0, #14];	lsrs	r2, #8
477 | 	strb	r2, [r0, #13];	lsrs	r2, #8
478 | 	strb	r2, [r0, #12];	ldr	r2, [r1, #12]
479 | 
480 | 	strb	r2, [r0, #19];	lsrs	r2, #8
481 | 	strb	r2, [r0, #18];	lsrs	r2, #8
482 | 	strb	r2, [r0, #17];	lsrs	r2, #8
483 | 	strb	r2, [r0, #16];	ldr	r2, [r1,  #8]
484 | 
485 | 	strb	r2, [r0, #23];	lsrs	r2, #8
486 | 	strb	r2, [r0, #22];	lsrs	r2, #8
487 | 	strb	r2, [r0, #21];	lsrs	r2, #8
488 | 	strb	r2, [r0, #20];	ldr	r2, [r1,  #4]
489 | 
490 | 	strb	r2, [r0, #27];	lsrs	r2, #8
491 | 	strb	r2, [r0, #26];	lsrs	r2, #8
492 | 	strb	r2, [r0, #25];	lsrs	r2, #8
493 | 	strb	r2, [r0, #24];	ldr	r2, [r1,  #0]
494 | 
495 | 	strb	r2, [r0, #31];	lsrs	r2, #8
496 | 	strb	r2, [r0, #30];	lsrs	r2, #8
497 | 	strb	r2, [r0, #29];	lsrs	r2, #8
498 | 	strb	r2, [r0, #28]
499 | 
500 | 	bx	lr
501 | 
502 | .size fq_to_bytes, . - fq_to_bytes
503 | 
504 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
505 | @
506 | @ fq_from_bytes
507 | @
508 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
509 | 
510 | .align 3
511 | .global fq_from_bytes
512 | .syntax unified
513 | .thumb
514 | .thumb_func
515 | .type fq_from_bytes,	%function
516 | 
517 | fq_from_bytes:
518 | 	ldrb	r2, [r1,  #0];	strb	r2, [r0, #31]
519 | 	ldrb	r2, [r1,  #1];	strb	r2, [r0, #30]
520 | 	ldrb	r2, [r1,  #2];	strb	r2, [r0, #29]
521 | 	ldrb	r2, [r1,  #3];	strb	r2, [r0, #28]
522 | 
523 | 	ldrb	r2, [r1,  #4];	strb	r2, [r0, #27]
524 | 	ldrb	r2, [r1,  #5];	strb	r2, [r0, #26]
525 | 	ldrb	r2, [r1,  #6];	strb	r2, [r0, #25]
526 | 	ldrb	r2, [r1,  #7];	strb	r2, [r0, #24]
527 | 
528 | 	ldrb	r2, [r1,  #8];	strb	r2, [r0, #23]
529 | 	ldrb	r2, [r1,  #9];	strb	r2, [r0, #22]
530 | 	ldrb	r2, [r1, #10];	strb	r2, [r0, #21]
531 | 	ldrb	r2, [r1, #11];	strb	r2, [r0, #20]
532 | 
533 | 	ldrb	r2, [r1, #12];	strb	r2, [r0, #19]
534 | 	ldrb	r2, [r1, #13];	strb	r2, [r0, #18]
535 | 	ldrb	r2, [r1, #14];	strb	r2, [r0, #17]
536 | 	ldrb	r2, [r1, #15];	strb	r2, [r0, #16]
537 | 
538 | 	ldrb	r2, [r1, #16];	strb	r2, [r0, #15]
539 | 	ldrb	r2, [r1, #17];	strb	r2, [r0, #14]
540 | 	ldrb	r2, [r1, #18];	strb	r2, [r0, #13]
541 | 	ldrb	r2, [r1, #19];	strb	r2, [r0, #12]
542 | 
543 | 	ldrb	r2, [r1, #20];	strb	r2, [r0, #11]
544 | 	ldrb	r2, [r1, #21];	strb	r2, [r0, #10]
545 | 	ldrb	r2, [r1, #22];	strb	r2, [r0,  #9]
546 | 	ldrb	r2, [r1, #23];	strb	r2, [r0,  #8]
547 | 
548 | 	ldrb	r2, [r1, #24];	strb	r2, [r0,  #7]
549 | 	ldrb	r2, [r1, #25];	strb	r2, [r0,  #6]
550 | 	ldrb	r2, [r1, #26];	strb	r2, [r0,  #5]
551 | 	ldrb	r2, [r1, #27];	strb	r2, [r0,  #4]
552 | 
553 | 	ldrb	r2, [r1, #28];	strb	r2, [r0,  #3]
554 | 	ldrb	r2, [r1, #29];	strb	r2, [r0,  #2]
555 | 	ldrb	r2, [r1, #30];	strb	r2, [r0,  #1]
556 | 	ldrb	r2, [r1, #31];	strb	r2, [r0,  #0]
557 | 
558 | 	bx	lr
559 | 
560 | .size fq_from_bytes, . - fq_from_bytes
561 | 
562 | 


--------------------------------------------------------------------------------
/bls12_377/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! # `bls12_377`
 2 | //!
 3 | //! This crate provides an implementation of the BLS12-377 pairing-friendly elliptic
 4 | //! curve construction.
 5 | //!
 6 | //! * **This implementation has not been reviewed or audited. Use at your own risk.**
 7 | //! * This implementation targets Rust `1.36` or later.
 8 | //! * This implementation does not require the Rust standard library.
 9 | //! * All operations are constant time unless explicitly noted.
10 | 
11 | #![no_std]
12 | #![deny(missing_debug_implementations)]
13 | #![allow(clippy::too_many_arguments)]
14 | #![allow(clippy::unreadable_literal)]
15 | #![allow(clippy::many_single_char_names)]
16 | // This lint is described at
17 | // https://rust-lang.github.io/rust-clippy/master/index.html#suspicious_arithmetic_impl
18 | // In our library, some of the arithmetic involving extension fields will necessarily
19 | // involve various binary operators, and so this lint is triggered unnecessarily.
20 | #![allow(clippy::suspicious_arithmetic_impl)]
21 | 
22 | #[cfg(test)]
23 | #[macro_use]
24 | extern crate std;
25 | 
26 | #[macro_use]
27 | pub mod util;
28 | 
29 | mod scalar;
30 | 
31 | pub use scalar::Scalar;
32 | 
33 | #[cfg(feature = "groups")]
34 | pub mod fp;
35 | #[cfg(feature = "groups")]
36 | mod fp2;
37 | #[cfg(feature = "groups")]
38 | mod g1;
39 | #[cfg(feature = "groups")]
40 | mod g2;
41 | 
42 | #[cfg(feature = "groups")]
43 | pub use g1::{G1Affine, G1Projective};
44 | #[cfg(feature = "groups")]
45 | pub use g2::{G2Affine, G2Projective};
46 | 
47 | // TODO: This should be upstreamed to subtle.
48 | // See https://github.com/dalek-cryptography/subtle/pull/48
49 | trait CtOptionExt<T> {
50 |     /// Calls f() and either returns self if it contains a value,
51 |     /// or returns the output of f() otherwise.
52 |     fn or_else<F: FnOnce() -> subtle::CtOption<T>>(self, f: F) -> subtle::CtOption<T>;
53 | }
54 | 
55 | impl<T: subtle::ConditionallySelectable> CtOptionExt<T> for subtle::CtOption<T> {
56 |     fn or_else<F: FnOnce() -> subtle::CtOption<T>>(self, f: F) -> subtle::CtOption<T> {
57 |         let is_none = self.is_none();
58 |         let f = f();
59 | 
60 |         subtle::ConditionallySelectable::conditional_select(&self, &f, is_none)
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/bls12_377/src/scalar.rs:
--------------------------------------------------------------------------------
  1 | //! This module provides an implementation of the BLS12-377 scalar field $\mathbb{F}_q$
  2 | //! where `q = 8444461749428370424248824938781546531375899335154063827935233455917409239041`
  3 | 
  4 | use core::fmt;
  5 | use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};
  6 | 
  7 | use byteorder::{ByteOrder, LittleEndian};
  8 | use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};
  9 | 
 10 | use crate::util::{adc, mac, sbb};
 11 | 
 12 | /// Represents an element of the scalar field $\mathbb{F}_q$ of the BLS12-377 elliptic
 13 | /// curve construction.
 14 | // The internal representation of this type is four 64-bit unsigned
 15 | // integers in little-endian order. `Scalar` values are always in
 16 | // Montgomery form; i.e., Scalar(a) = aR mod q, with R = 2^256.
 17 | #[derive(Clone, Copy, Eq)]
 18 | pub struct Scalar(pub(crate) [u64; 4]);
 19 | 
 20 | impl fmt::Debug for Scalar {
 21 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 22 |         let tmp = self.to_bytes();
 23 |         write!(f, "0x")?;
 24 |         for &b in tmp.iter().rev() {
 25 |             write!(f, "{:02x}", b)?;
 26 |         }
 27 |         Ok(())
 28 |     }
 29 | }
 30 | 
 31 | impl From<u64> for Scalar {
 32 |     fn from(val: u64) -> Scalar {
 33 |         Scalar([val, 0, 0, 0]) * r_squared()
 34 |     }
 35 | }
 36 | 
 37 | impl ConstantTimeEq for Scalar {
 38 |     fn ct_eq(&self, other: &Self) -> Choice {
 39 |         self.0[0].ct_eq(&other.0[0])
 40 |             & self.0[1].ct_eq(&other.0[1])
 41 |             & self.0[2].ct_eq(&other.0[2])
 42 |             & self.0[3].ct_eq(&other.0[3])
 43 |     }
 44 | }
 45 | 
 46 | impl PartialEq for Scalar {
 47 |     fn eq(&self, other: &Self) -> bool {
 48 |         self.ct_eq(other).unwrap_u8() == 1
 49 |     }
 50 | }
 51 | 
 52 | impl ConditionallySelectable for Scalar {
 53 |     fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self {
 54 |         Scalar([
 55 |             u64::conditional_select(&a.0[0], &b.0[0], choice),
 56 |             u64::conditional_select(&a.0[1], &b.0[1], choice),
 57 |             u64::conditional_select(&a.0[2], &b.0[2], choice),
 58 |             u64::conditional_select(&a.0[3], &b.0[3], choice),
 59 |         ])
 60 |     }
 61 | }
 62 | 
 63 | /// Constant representing the modulus
 64 | /// q = 8444461749428370424248824938781546531375899335154063827935233455917409239041
 65 | const fn modulus() -> Scalar {
 66 |     Scalar([
 67 |         725501752471715841u64,
 68 |         6461107452199829505u64,
 69 |         6968279316240510977u64,
 70 |         1345280370688173398u64,
 71 |     ])
 72 | }
 73 | 
 74 | impl<'a> Neg for &'a Scalar {
 75 |     type Output = Scalar;
 76 | 
 77 |     fn neg(self) -> Scalar {
 78 |         self.neg()
 79 |     }
 80 | }
 81 | 
 82 | impl Neg for Scalar {
 83 |     type Output = Scalar;
 84 | 
 85 |     fn neg(self) -> Scalar {
 86 |         -&self
 87 |     }
 88 | }
 89 | 
 90 | impl<'a, 'b> Sub<&'b Scalar> for &'a Scalar {
 91 |     type Output = Scalar;
 92 | 
 93 |     fn sub(self, rhs: &'b Scalar) -> Scalar {
 94 |         self.sub(rhs)
 95 |     }
 96 | }
 97 | 
 98 | impl<'a, 'b> Add<&'b Scalar> for &'a Scalar {
 99 |     type Output = Scalar;
100 | 
101 |     fn add(self, rhs: &'b Scalar) -> Scalar {
102 |         self.add(rhs)
103 |     }
104 | }
105 | 
106 | impl<'a, 'b> Mul<&'b Scalar> for &'a Scalar {
107 |     type Output = Scalar;
108 | 
109 |     fn mul(self, rhs: &'b Scalar) -> Scalar {
110 |         self.mul(rhs)
111 |     }
112 | }
113 | 
114 | impl_binops_additive!(Scalar, Scalar);
115 | impl_binops_multiplicative!(Scalar, Scalar);
116 | 
117 | /// INV = -(q^{-1} mod 2^64) mod 2^64
118 | const fn inv() -> u64 {
119 |     725501752471715839u64
120 | }
121 | 
122 | /// R = 2^256 mod q
123 | const fn r() -> Scalar {
124 |    Scalar([
125 |        0x7D1C7FFFFFFFFFF3,
126 |        0x7257F50F6FFFFFF2,
127 |        0x16D81575512C0FEE,
128 |        0xD4BDA322BBB9A9D,
129 |    ]) 
130 | }
131 | 
132 | /// R^2 = 2^512 mod q
133 | #[inline]
134 | const fn r_squared() -> Scalar {
135 |     Scalar([
136 |         0x25D577BAB861857B,
137 |         0xCC2C27B58860591F,
138 |         0xA7CC008FE5DC8593,
139 |         0x11FDAE7EFF1C939,
140 |     ]) 
141 | }
142 | 
143 | /// R^3 = 2^768 mod q
144 | const fn r_cubed() -> Scalar {
145 |     Scalar([
146 |         0x6A4295C90F65454C, 
147 |         0x624D23FFAE271699,
148 |         0xB1E55EF6F1C9D713,
149 |         0x601DFA555C48DDA,
150 |     ])
151 | }
152 | 
153 | const fn s() -> u32 {
154 |     47
155 | }
156 | 
157 | /// GENERATOR^t where t * 2^s + 1 = q
158 | /// with t odd. In other words, this
159 | /// is a 2^s root of unity.
160 | ///
161 | /// `GENERATOR = 7 mod q` is a generator
162 | /// of the q - 1 order multiplicative
163 | /// subgroup.
164 | const fn root_of_unity() -> Scalar {
165 |     Scalar([
166 |         0x3c3d3ca739381fb2,
167 |         0x9a14cda3ec99772b,
168 |         0xd7aacc7c59724826,
169 |         0xd1ba211c5cc349c,
170 |     ])
171 | }
172 | 
173 | impl Default for Scalar {
174 |     fn default() -> Self {
175 |         Self::zero()
176 |     }
177 | }
178 | 
179 | impl Scalar {
180 |     /// Returns zero, the additive identity.
181 |     pub const fn zero() -> Scalar {
182 |         Scalar([0, 0, 0, 0])
183 |     }
184 | 
185 |     /// Returns one, the multiplicative identity.
186 |     pub const fn one() -> Scalar {
187 |         r()
188 |     }
189 | 
190 |     /// Doubles this field element.
191 |     pub fn double(&self) -> Scalar {
192 |         // TODO: This can be achieved more efficiently with a bitshift.
193 |         self.add(self)
194 |     }
195 | 
196 |     /// Attempts to convert a little-endian byte representation of
197 |     /// a scalar into a `Scalar`, failing if the input is not canonical.
198 |     pub fn from_bytes(bytes: &[u8; 32]) -> CtOption<Scalar> {
199 |         let mut tmp = Scalar([0, 0, 0, 0]);
200 |         let modulus = modulus();
201 | 
202 |         tmp.0[0] = LittleEndian::read_u64(&bytes[0..8]);
203 |         tmp.0[1] = LittleEndian::read_u64(&bytes[8..16]);
204 |         tmp.0[2] = LittleEndian::read_u64(&bytes[16..24]);
205 |         tmp.0[3] = LittleEndian::read_u64(&bytes[24..32]);
206 | 
207 |         // Try to subtract the modulus
208 |         let (_, borrow) = sbb(tmp.0[0], modulus.0[0], 0);
209 |         let (_, borrow) = sbb(tmp.0[1], modulus.0[1], borrow);
210 |         let (_, borrow) = sbb(tmp.0[2], modulus.0[2], borrow);
211 |         let (_, borrow) = sbb(tmp.0[3], modulus.0[3], borrow);
212 | 
213 |         // If the element is smaller than MODULUS then the
214 |         // subtraction will underflow, producing a borrow value
215 |         // of 0xffff...ffff. Otherwise, it'll be zero.
216 |         let is_some = (borrow as u8) & 1;
217 | 
218 |         // Convert to Montgomery form by computing
219 |         // (a.R^0 * R^2) / R = a.R
220 |         tmp *= &r_squared();
221 | 
222 |         CtOption::new(tmp, Choice::from(is_some))
223 |     }
224 | 
225 |     /// Converts an element of `Scalar` into a byte representation in
226 |     /// little-endian byte order.
227 |     pub fn to_bytes(&self) -> [u8; 32] {
228 |         // Turn into canonical form by computing
229 |         // (a.R) / R = a
230 |         let tmp = Scalar::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);
231 | 
232 |         let mut res = [0; 32];
233 |         LittleEndian::write_u64(&mut res[0..8], tmp.0[0]);
234 |         LittleEndian::write_u64(&mut res[8..16], tmp.0[1]);
235 |         LittleEndian::write_u64(&mut res[16..24], tmp.0[2]);
236 |         LittleEndian::write_u64(&mut res[24..32], tmp.0[3]);
237 | 
238 |         res
239 |     }
240 | 
241 |     /// Converts a 512-bit little endian integer into
242 |     /// a `Scalar` by reducing by the modulus.
243 |     pub fn from_bytes_wide(bytes: &[u8; 64]) -> Scalar {
244 |         Scalar::from_u512([
245 |             LittleEndian::read_u64(&bytes[0..8]),
246 |             LittleEndian::read_u64(&bytes[8..16]),
247 |             LittleEndian::read_u64(&bytes[16..24]),
248 |             LittleEndian::read_u64(&bytes[24..32]),
249 |             LittleEndian::read_u64(&bytes[32..40]),
250 |             LittleEndian::read_u64(&bytes[40..48]),
251 |             LittleEndian::read_u64(&bytes[48..56]),
252 |             LittleEndian::read_u64(&bytes[56..64]),
253 |         ])
254 |     }
255 | 
256 |     fn from_u512(limbs: [u64; 8]) -> Scalar {
257 |         // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits
258 |         // with the higher bits multiplied by 2^256. Thus, we perform two reductions
259 |         //
260 |         // 1. the lower bits are multiplied by R^2, as normal
261 |         // 2. the upper bits are multiplied by R^2 * 2^256 = R^3
262 |         //
263 |         // and computing their sum in the field. It remains to see that arbitrary 256-bit
264 |         // numbers can be placed into Montgomery form safely using the reduction. The
265 |         // reduction works so long as the product is less than R=2^256 multipled by
266 |         // the modulus. This holds because for any `c` smaller than the modulus, we have
267 |         // that (2^256 - 1)*c is an acceptable product for the reduction. Therefore, the
268 |         // reduction always works so long as `c` is in the field; in this case it is either the
269 |         // constant `R2` or `R3`.
270 |         let d0 = Scalar([limbs[0], limbs[1], limbs[2], limbs[3]]);
271 |         let d1 = Scalar([limbs[4], limbs[5], limbs[6], limbs[7]]);
272 |         // Convert to Montgomery form
273 |         d0 * r_squared() + d1 * r_cubed()
274 |     }
275 | 
276 |     /// Converts from an integer represented in little endian
277 |     /// into its (congruent) `Scalar` representation.
278 |     #[inline]
279 |     pub fn from_raw(val: [u64; 4]) -> Self {
280 |         (&Scalar(val)).mul(&r_squared())
281 |     }
282 | 
283 |     /// Squares this element.
284 |     pub fn square(&self) -> Scalar {
285 |         let (r1, carry) = mac(0, self.0[0], self.0[1], 0);
286 |         let (r2, carry) = mac(0, self.0[0], self.0[2], carry);
287 |         let (r3, r4) = mac(0, self.0[0], self.0[3], carry);
288 | 
289 |         let (r3, carry) = mac(r3, self.0[1], self.0[2], 0);
290 |         let (r4, r5) = mac(r4, self.0[1], self.0[3], carry);
291 | 
292 |         let (r5, r6) = mac(r5, self.0[2], self.0[3], 0);
293 | 
294 |         let r7 = r6 >> 63;
295 |         let r6 = (r6 << 1) | (r5 >> 63);
296 |         let r5 = (r5 << 1) | (r4 >> 63);
297 |         let r4 = (r4 << 1) | (r3 >> 63);
298 |         let r3 = (r3 << 1) | (r2 >> 63);
299 |         let r2 = (r2 << 1) | (r1 >> 63);
300 |         let r1 = r1 << 1;
301 | 
302 |         let (r0, carry) = mac(0, self.0[0], self.0[0], 0);
303 |         let (r1, carry) = adc(0, r1, carry);
304 |         let (r2, carry) = mac(r2, self.0[1], self.0[1], carry);
305 |         let (r3, carry) = adc(0, r3, carry);
306 |         let (r4, carry) = mac(r4, self.0[2], self.0[2], carry);
307 |         let (r5, carry) = adc(0, r5, carry);
308 |         let (r6, carry) = mac(r6, self.0[3], self.0[3], carry);
309 |         let (r7, _) = adc(0, r7, carry);
310 | 
311 |         Scalar::montgomery_reduce(r0, r1, r2, r3, r4, r5, r6, r7)
312 |     }
313 | 
314 |     /// Computes the square root of this element, if it exists.
315 |     pub fn sqrt(&self) -> CtOption<Self> {
316 |         // Tonelli-Shank's algorithm for q mod 16 = 1
317 |         // https://eprint.iacr.org/2012/685.pdf (page 12, algorithm 5)
318 | 
319 |         // w = self^((t - 1) // 2)
320 |         //   = self^6104339283789297388802252303364915521546564123189034618274734669823
321 |         let w = self.pow_vartime(&[
322 |             0x7fff2dff7fffffff,
323 |             0x04d0ec02a9ded201,
324 |             0x94cebea4199cec04,
325 |             0x0000000039f6d3a9,
326 |         ]);
327 | 
328 |         let s = s();
329 | 
330 |         let mut v = s;
331 |         let mut x = self * w;
332 |         let mut b = x * w;
333 | 
334 |         // Initialize z as the 2^S root of unity.
335 |         let mut z = root_of_unity();
336 | 
337 |         for max_v in (1..=s).rev() {
338 |             let mut k = 1;
339 |             let mut tmp = b.square();
340 |             let mut j_less_than_v: Choice = 1.into();
341 | 
342 |             for j in 2..max_v {
343 |                 let tmp_is_one = tmp.ct_eq(&Scalar::one());
344 |                 let squared = Scalar::conditional_select(&tmp, &z, tmp_is_one).square();
345 |                 tmp = Scalar::conditional_select(&squared, &tmp, tmp_is_one);
346 |                 let new_z = Scalar::conditional_select(&z, &squared, tmp_is_one);
347 |                 j_less_than_v &= !j.ct_eq(&v);
348 |                 k = u32::conditional_select(&j, &k, tmp_is_one);
349 |                 z = Scalar::conditional_select(&z, &new_z, j_less_than_v);
350 |             }
351 | 
352 |             let result = x * z;
353 |             x = Scalar::conditional_select(&result, &x, b.ct_eq(&Scalar::one()));
354 |             z = z.square();
355 |             b *= z;
356 |             v = k;
357 |         }
358 | 
359 |         CtOption::new(
360 |             x,
361 |             (x * x).ct_eq(self), // Only return Some if it's the square root.
362 |         )
363 |     }
364 | 
365 |     /// Exponentiates `self` by `by`, where `by` is a
366 |     /// little-endian order integer exponent.
367 |     pub fn pow(&self, by: &[u64; 4]) -> Self {
368 |         let mut res = Self::one();
369 |         for e in by.iter().rev() {
370 |             for i in (0..64).rev() {
371 |                 res = res.square();
372 |                 let mut tmp = res;
373 |                 tmp *= self;
374 |                 res.conditional_assign(&tmp, (((*e >> i) & 0x1) as u8).into());
375 |             }
376 |         }
377 |         res
378 |     }
379 | 
380 |     /// Exponentiates `self` by `by`, where `by` is a
381 |     /// little-endian order integer exponent.
382 |     ///
383 |     /// **This operation is variable time with respect
384 |     /// to the exponent.** If the exponent is fixed,
385 |     /// this operation is effectively constant time.
386 |     pub fn pow_vartime(&self, by: &[u64; 4]) -> Self {
387 |         let mut res = Self::one();
388 |         for e in by.iter().rev() {
389 |             for i in (0..64).rev() {
390 |                 res = res.square();
391 | 
392 |                 if ((*e >> i) & 1) == 1 {
393 |                     res.mul_assign(self);
394 |                 }
395 |             }
396 |         }
397 |         res
398 |     }
399 | 
400 |     #[inline]
401 |     fn montgomery_reduce(
402 |         r0: u64,
403 |         r1: u64,
404 |         r2: u64,
405 |         r3: u64,
406 |         r4: u64,
407 |         r5: u64,
408 |         r6: u64,
409 |         r7: u64,
410 |     ) -> Self {
411 |         // The Montgomery reduction here is based on Algorithm 14.32 in
412 |         // Handbook of Applied Cryptography
413 |         // <http://cacr.uwaterloo.ca/hac/about/chap14.pdf>.
414 | 
415 |         let modulus = modulus();
416 |         let inv = inv();
417 |         let k = r0.wrapping_mul(inv);
418 |         let (_, carry) = mac(r0, k, modulus.0[0], 0);
419 |         let (r1, carry) = mac(r1, k, modulus.0[1], carry);
420 |         let (r2, carry) = mac(r2, k, modulus.0[2], carry);
421 |         let (r3, carry) = mac(r3, k, modulus.0[3], carry);
422 |         let (r4, carry2) = adc(r4, 0, carry);
423 | 
424 |         let k = r1.wrapping_mul(inv);
425 |         let (_, carry) = mac(r1, k, modulus.0[0], 0);
426 |         let (r2, carry) = mac(r2, k, modulus.0[1], carry);
427 |         let (r3, carry) = mac(r3, k, modulus.0[2], carry);
428 |         let (r4, carry) = mac(r4, k, modulus.0[3], carry);
429 |         let (r5, carry2) = adc(r5, carry2, carry);
430 | 
431 |         let k = r2.wrapping_mul(inv);
432 |         let (_, carry) = mac(r2, k, modulus.0[0], 0);
433 |         let (r3, carry) = mac(r3, k, modulus.0[1], carry);
434 |         let (r4, carry) = mac(r4, k, modulus.0[2], carry);
435 |         let (r5, carry) = mac(r5, k, modulus.0[3], carry);
436 |         let (r6, carry2) = adc(r6, carry2, carry);
437 | 
438 |         let k = r3.wrapping_mul(inv);
439 |         let (_, carry) = mac(r3, k, modulus.0[0], 0);
440 |         let (r4, carry) = mac(r4, k, modulus.0[1], carry);
441 |         let (r5, carry) = mac(r5, k, modulus.0[2], carry);
442 |         let (r6, carry) = mac(r6, k, modulus.0[3], carry);
443 |         let (r7, _) = adc(r7, carry2, carry);
444 |         // Result may be within MODULUS of the correct value
445 |         (&Scalar([r4, r5, r6, r7])).sub(&modulus)
446 |     }
447 | 
448 |     /// Multiplies `rhs` by `self`, returning the result.
449 |     #[inline]
450 |     pub fn mul(&self, rhs: &Self) -> Self {
451 |         // Schoolbook multiplication
452 | 
453 |         let (r0, carry) = mac(0, self.0[0], rhs.0[0], 0);
454 |         let (r1, carry) = mac(0, self.0[0], rhs.0[1], carry);
455 |         let (r2, carry) = mac(0, self.0[0], rhs.0[2], carry);
456 |         let (r3, r4) = mac(0, self.0[0], rhs.0[3], carry);
457 | 
458 |         let (r1, carry) = mac(r1, self.0[1], rhs.0[0], 0);
459 |         let (r2, carry) = mac(r2, self.0[1], rhs.0[1], carry);
460 |         let (r3, carry) = mac(r3, self.0[1], rhs.0[2], carry);
461 |         let (r4, r5) = mac(r4, self.0[1], rhs.0[3], carry);
462 | 
463 |         let (r2, carry) = mac(r2, self.0[2], rhs.0[0], 0);
464 |         let (r3, carry) = mac(r3, self.0[2], rhs.0[1], carry);
465 |         let (r4, carry) = mac(r4, self.0[2], rhs.0[2], carry);
466 |         let (r5, r6) = mac(r5, self.0[2], rhs.0[3], carry);
467 | 
468 |         let (r3, carry) = mac(r3, self.0[3], rhs.0[0], 0);
469 |         let (r4, carry) = mac(r4, self.0[3], rhs.0[1], carry);
470 |         let (r5, carry) = mac(r5, self.0[3], rhs.0[2], carry);
471 |         let (r6, r7) = mac(r6, self.0[3], rhs.0[3], carry);
472 | 
473 |         Scalar::montgomery_reduce(r0, r1, r2, r3, r4, r5, r6, r7)
474 |     }
475 | 
476 |     /// Subtracts `rhs` from `self`, returning the result.
477 |     pub fn sub(&self, rhs: &Self) -> Self {
478 |         let modulus = modulus();
479 |         let (d0, borrow) = sbb(self.0[0], rhs.0[0], 0);
480 |         let (d1, borrow) = sbb(self.0[1], rhs.0[1], borrow);
481 |         let (d2, borrow) = sbb(self.0[2], rhs.0[2], borrow);
482 |         let (d3, borrow) = sbb(self.0[3], rhs.0[3], borrow);
483 | 
484 |         // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise
485 |         // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the modulus.
486 |         let (d0, carry) = adc(d0, modulus.0[0] & borrow, 0);
487 |         let (d1, carry) = adc(d1, modulus.0[1] & borrow, carry);
488 |         let (d2, carry) = adc(d2, modulus.0[2] & borrow, carry);
489 |         let (d3, _) = adc(d3, modulus.0[3] & borrow, carry);
490 | 
491 |         Scalar([d0, d1, d2, d3])
492 |     }
493 | 
494 |     /// Adds `rhs` to `self`, returning the result.
495 |     pub fn add(&self, rhs: &Self) -> Self {
496 |         let (d0, carry) = adc(self.0[0], rhs.0[0], 0);
497 |         let (d1, carry) = adc(self.0[1], rhs.0[1], carry);
498 |         let (d2, carry) = adc(self.0[2], rhs.0[2], carry);
499 |         let (d3, _) = adc(self.0[3], rhs.0[3], carry);
500 | 
501 |         // Attempt to subtract the modulus, to ensure the value
502 |         // is smaller than the modulus.
503 |         (&Scalar([d0, d1, d2, d3])).sub(&modulus())
504 |     }
505 | 
506 |     /// Negates `self`.
507 |     pub fn neg(&self) -> Self {
508 |         // Subtract `self` from `MODULUS` to negate. Ignore the final
509 |         // borrow because it cannot underflow; self is guaranteed to
510 |         // be in the field.
511 |         let modulus = modulus();
512 |         let (d0, borrow) = sbb(modulus.0[0], self.0[0], 0);
513 |         let (d1, borrow) = sbb(modulus.0[1], self.0[1], borrow);
514 |         let (d2, borrow) = sbb(modulus.0[2], self.0[2], borrow);
515 |         let (d3, _) = sbb(modulus.0[3], self.0[3], borrow);
516 | 
517 |         // `tmp` could be `MODULUS` if `self` was zero. Create a mask that is
518 |         // zero if `self` was zero, and `u64::max_value()` if self was nonzero.
519 |         let mask = (((self.0[0] | self.0[1] | self.0[2] | self.0[3]) == 0) as u64).wrapping_sub(1);
520 | 
521 |         Scalar([d0 & mask, d1 & mask, d2 & mask, d3 & mask])
522 |     }
523 | }
524 | 
525 | impl<'a> From<&'a Scalar> for [u8; 32] {
526 |     fn from(value: &'a Scalar) -> [u8; 32] {
527 |         value.to_bytes()
528 |     }
529 | }
530 | 
531 | #[test]
532 | fn test_inv() {
533 |     // Compute -(q^{-1} mod 2^64) mod 2^64 by exponentiating
534 |     // by totient(2**64) - 1
535 | 
536 |     let true_inv = inv();
537 |     let mut inv = 1u64;
538 |     for _ in 0..63 {
539 |         inv = inv.wrapping_mul(inv);
540 |         inv = inv.wrapping_mul(modulus().0[0]);
541 |     }
542 |     inv = inv.wrapping_neg();
543 | 
544 |     assert_eq!(inv, true_inv);
545 | }
546 | 
547 | #[cfg(feature = "std")]
548 | #[test]
549 | fn test_debug() {
550 |     assert_eq!(
551 |         format!("{:?}", Scalar::zero()),
552 |         "0x0000000000000000000000000000000000000000000000000000000000000000"
553 |     );
554 |     assert_eq!(
555 |         format!("{:?}", Scalar::one()),
556 |         "0x0000000000000000000000000000000000000000000000000000000000000001"
557 |     );
558 |     assert_eq!(
559 |         format!("{:?}", r_squared()),
560 |         "0x1824b159acc5056f998c4fefecbc4ff55884b7fa0003480200000001fffffffe"
561 |     );
562 | }
563 | 
564 | #[test]
565 | fn test_equality() {
566 |     assert_eq!(Scalar::zero(), Scalar::zero());
567 |     assert_eq!(Scalar::one(), Scalar::one());
568 |     assert_eq!(r_squared(), r_squared());
569 | 
570 |     assert!(Scalar::zero() != Scalar::one());
571 |     assert!(Scalar::one() != r_squared());
572 | }
573 | 
574 | #[test]
575 | fn test_to_bytes() {
576 |     assert_eq!(
577 |         Scalar::zero().to_bytes(),
578 |         [
579 |             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 |             0, 0, 0
581 |         ]
582 |     );
583 | 
584 |     assert_eq!(
585 |         Scalar::one().to_bytes(),
586 |         [
587 |             1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 |             0, 0, 0
589 |         ]
590 |     );
591 | 
592 |     assert_eq!(
593 |         r_squared().to_bytes(),
594 |         [
595 |             243, 255, 255, 255, 255, 127, 28, 125, 242, 255, 255, 111, 15, 245, 87, 114, 238, 15, 
596 |             44, 81, 117, 21, 216, 22, 157, 154, 187, 43, 50, 218, 75, 13
597 |         ]
598 |     );
599 | 
600 |     assert_eq!(
601 |         (-&Scalar::one()).to_bytes(),
602 |         [
603 |         0, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 
604 |         96, 86, 165, 44, 154, 94, 101, 171, 18
605 |         ]
606 |     );
607 | }
608 | 
609 | #[test]
610 | fn test_from_bytes() {
611 |     assert_eq!(
612 |         Scalar::from_bytes(&[
613 |             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
614 |             0, 0, 0
615 |         ])
616 |         .unwrap(),
617 |         Scalar::zero()
618 |     );
619 | 
620 |     assert_eq!(
621 |         Scalar::from_bytes(&[
622 |             1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
623 |             0, 0, 0
624 |         ])
625 |         .unwrap(),
626 |         Scalar::one()
627 |     );
628 | 
629 |     assert_eq!(
630 |         Scalar::from_bytes(&[
631 |             243, 255, 255, 255, 255, 127, 28, 125, 242, 255, 255, 111, 15, 245, 87, 114, 238, 15, 
632 |             44, 81, 117, 21, 216, 22, 157, 154, 187, 43, 50, 218, 75, 13
633 |         ])
634 |         .unwrap(),
635 |         r_squared()
636 |     );
637 | 
638 |     // -1 should work
639 |     assert!(
640 |         Scalar::from_bytes(&[
641 |             0, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 
642 |             96, 86, 165, 44, 154, 94, 101, 171, 18
643 |         ])
644 |         .is_some()
645 |         .unwrap_u8()
646 |             == 1
647 |     );
648 | 
649 |     // modulus is invalid
650 |     assert!(
651 |         Scalar::from_bytes(&[
652 |             1, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 
653 |             96, 86, 165, 44, 154, 94, 101, 171, 18
654 |         ])
655 |         .is_none()
656 |         .unwrap_u8()
657 |             == 1
658 |     );
659 | 
660 |     // Anything larger than the modulus is invalid
661 |     assert!(
662 |         Scalar::from_bytes(&[
663 |             2, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 
664 |             96, 86, 165, 44, 154, 94, 101, 171, 18
665 |         ])
666 |         .is_none()
667 |         .unwrap_u8()
668 |             == 1
669 |     );
670 |     assert!(
671 |         Scalar::from_bytes(&[
672 |             1, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 177, 55, 92, 30, 77, 180, 
673 |             96, 86, 165, 44, 154, 94, 101, 171, 18
674 |         ])
675 |         .is_none()
676 |         .unwrap_u8()
677 |             == 1
678 |     );
679 |     assert!(
680 |         Scalar::from_bytes(&[
681 |             1, 0, 0, 0, 255, 255, 255, 255, 254, 91, 254, 255, 2, 164, 189, 83, 5, 216, 161, 9, 8,
682 |             216, 57, 51, 72, 125, 157, 41, 83, 167, 237, 117
683 |         ])
684 |         .is_none()
685 |         .unwrap_u8()
686 |             == 1
687 |     );
688 | }
689 | 
690 | #[test]
691 | fn test_from_u512_zero() {
692 |     let modulus = modulus();
693 |     assert_eq!(
694 |         Scalar::zero(),
695 |         Scalar::from_u512([
696 |             modulus.0[0],
697 |             modulus.0[1],
698 |             modulus.0[2],
699 |             modulus.0[3],
700 |             0,
701 |             0,
702 |             0,
703 |             0
704 |         ])
705 |     );
706 | }
707 | 
708 | #[test]
709 | fn test_from_u512_r() {
710 |     assert_eq!(r(), Scalar::from_u512([1, 0, 0, 0, 0, 0, 0, 0]));
711 | }
712 | 
713 | #[test]
714 | fn test_from_u512_r_squared() {
715 |     assert_eq!(r_squared(), Scalar::from_u512([0, 0, 0, 0, 1, 0, 0, 0]));
716 | }
717 | 
718 | #[test]
719 | fn test_from_u512_max() {
720 |     let max_u64 = 0xffffffffffffffff;
721 |     assert_eq!(
722 |         r_cubed() - r(),
723 |         Scalar::from_u512([max_u64, max_u64, max_u64, max_u64, max_u64, max_u64, max_u64, max_u64])
724 |     );
725 | }
726 | 
727 | #[test]
728 | fn test_from_bytes_wide_r_squared() {
729 |     assert_eq!(
730 |         r_squared(),
731 |         Scalar::from_bytes_wide(&[
732 |             243, 255, 255, 255, 255, 127, 28, 125, 242, 255, 255, 111, 15, 245, 87, 114, 238, 15, 
733 |             44, 81, 117, 21, 216, 22, 157, 154, 187, 43, 50, 218, 75, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
734 |             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
735 |         ])
736 |     );
737 | }
738 | 
739 | #[test]
740 | fn test_from_bytes_wide_negative_one() {
741 |     assert_eq!(
742 |         -&Scalar::one(),
743 |         Scalar::from_bytes_wide(&[
744 |             0, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 96, 86, 
745 |             165, 44, 154, 94, 101, 171, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
746 |             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
747 |         ])
748 |     );
749 | }
750 | 
751 | #[test]
752 | fn test_zero() {
753 |     assert_eq!(Scalar::zero(), -&Scalar::zero());
754 |     assert_eq!(Scalar::zero(), Scalar::zero() + Scalar::zero());
755 |     assert_eq!(Scalar::zero(), Scalar::zero() - Scalar::zero());
756 |     assert_eq!(Scalar::zero(), Scalar::zero() * Scalar::zero());
757 | }
758 | 
759 | #[cfg(test)]
760 | const LARGEST: Scalar = Scalar([
761 |     725501752471715840u64,
762 |     6461107452199829505u64,
763 |     6968279316240510977u64,
764 |     1345280370688173398u64,
765 | ]);
766 | 
767 | #[test]
768 | fn test_addition() {
769 |     let mut tmp = LARGEST;
770 |     tmp += &LARGEST;
771 |     println!("{:x?}", tmp);
772 | 
773 |     assert_eq!(
774 |         tmp,
775 |         Scalar([
776 |             0xa117fffffffffff, 
777 |             0x59aa76fed0000001, 
778 |             0x60b44d1e5c37b001, 
779 |             0x12ab655e9a2ca556,
780 |         ])
781 |     );
782 | 
783 |     let mut tmp = LARGEST;
784 |     tmp += &Scalar([1, 0, 0, 0]);
785 | 
786 |     assert_eq!(tmp, Scalar::zero());
787 | }
788 | 
789 | #[test]
790 | fn test_negation() {
791 |     let tmp = -&LARGEST;
792 | 
793 |     assert_eq!(tmp, Scalar([1, 0, 0, 0]));
794 | 
795 |     let tmp = -&Scalar::zero();
796 |     assert_eq!(tmp, Scalar::zero());
797 |     let tmp = -&Scalar([1, 0, 0, 0]);
798 |     assert_eq!(tmp, LARGEST);
799 | }
800 | 
801 | #[test]
802 | fn test_subtraction() {
803 |     let mut tmp = LARGEST;
804 |     tmp -= &LARGEST;
805 | 
806 |     assert_eq!(tmp, Scalar::zero());
807 | 
808 |     let mut tmp = Scalar::zero();
809 |     tmp -= &LARGEST;
810 | 
811 |     let mut tmp2 = modulus();
812 |     tmp2 -= &LARGEST;
813 | 
814 |     assert_eq!(tmp, tmp2);
815 | }
816 | 
817 | #[test]
818 | fn test_multiplication() {
819 |     let mut cur = LARGEST;
820 | 
821 |     for _ in 0..100 {
822 |         let mut tmp = cur;
823 |         tmp *= &cur;
824 | 
825 |         let mut tmp2 = Scalar::zero();
826 |         for b in cur
827 |             .to_bytes()
828 |             .iter()
829 |             .rev()
830 |             .flat_map(|byte| (0..8).rev().map(move |i| ((byte >> i) & 1u8) == 1u8))
831 |         {
832 |             let tmp3 = tmp2;
833 |             tmp2.add_assign(&tmp3);
834 | 
835 |             if b {
836 |                 tmp2.add_assign(&cur);
837 |             }
838 |         }
839 | 
840 |         assert_eq!(tmp, tmp2);
841 | 
842 |         cur.add_assign(&LARGEST);
843 |     }
844 | }
845 | 
846 | #[test]
847 | fn test_squaring() {
848 |     let mut cur = LARGEST;
849 | 
850 |     for _ in 0..100 {
851 |         let mut tmp = cur;
852 |         tmp = tmp.square();
853 | 
854 |         let mut tmp2 = Scalar::zero();
855 |         for b in cur
856 |             .to_bytes()
857 |             .iter()
858 |             .rev()
859 |             .flat_map(|byte| (0..8).rev().map(move |i| ((byte >> i) & 1u8) == 1u8))
860 |         {
861 |             let tmp3 = tmp2;
862 |             tmp2.add_assign(&tmp3);
863 | 
864 |             if b {
865 |                 tmp2.add_assign(&cur);
866 |             }
867 |         }
868 | 
869 |         assert_eq!(tmp, tmp2);
870 | 
871 |         cur.add_assign(&LARGEST);
872 |     }
873 | }
874 | 
875 | #[test]
876 | fn test_from_raw() {
877 |     assert_eq!(Scalar::from_raw(modulus().0), Scalar::zero());
878 | 
879 |     assert_eq!(Scalar::from_raw([1, 0, 0, 0]), r());
880 | }
881 | 
882 | #[test]
883 | fn test_double() {
884 |     let a = Scalar::from_raw([
885 |         0x1fff3231233ffffd,
886 |         0x4884b7fa00034802,
887 |         0x998c4fefecbc4ff3,
888 |         0x1824b159acc50562,
889 |     ]);
890 | 
891 |     assert_eq!(a.double(), a + a);
892 | }
893 | 


--------------------------------------------------------------------------------
/bls12_377/src/util.rs:
--------------------------------------------------------------------------------
  1 | #[derive(Debug, PartialEq)]
  2 | pub enum LegendreSymbol {
  3 |     Zero = 0,
  4 |     QuadraticResidue = 1,
  5 |     QuadraticNonResidue = -1,
  6 | }
  7 | 
  8 | /// Compute a + b + carry, returning the result and the new carry over.
  9 | #[inline(always)]
 10 | pub fn adc(a: u64, b: u64, carry: u64) -> (u64, u64) {
 11 |     let ret = (a as u128) + (b as u128) + (carry as u128);
 12 |     (ret as u64, (ret >> 64) as u64)
 13 | }
 14 | 
 15 | /// Compute a - (b + borrow), returning the result and the new borrow.
 16 | #[inline(always)]
 17 | pub fn sbb(a: u64, b: u64, borrow: u64) -> (u64, u64) {
 18 |     let ret = (a as u128).wrapping_sub((b as u128) + ((borrow >> 63) as u128));
 19 |     (ret as u64, (ret >> 64) as u64)
 20 | }
 21 | 
 22 | /// Compute a + (b * c) + carry, returning the result and the new carry over.
 23 | pub fn mac(a: u64, b: u64, c: u64, carry: u64) -> (u64, u64) {
 24 |     let ret = (a as u128) + ((b as u128) * (c as u128)) + (carry as u128);
 25 |     (ret as u64, (ret >> 64) as u64)
 26 | }
 27 | 
 28 | macro_rules! impl_add_binop_specify_output {
 29 |     ($lhs:ident, $rhs:ident, $output:ident) => {
 30 |         impl<'b> Add<&'b $rhs> for $lhs {
 31 |             type Output = $output;
 32 | 
 33 |             #[inline]
 34 |             fn add(self, rhs: &'b $rhs) -> $output {
 35 |                 &self + rhs
 36 |             }
 37 |         }
 38 | 
 39 |         impl<'a> Add<$rhs> for &'a $lhs {
 40 |             type Output = $output;
 41 | 
 42 |             #[inline]
 43 |             fn add(self, rhs: $rhs) -> $output {
 44 |                 self + &rhs
 45 |             }
 46 |         }
 47 | 
 48 |         impl Add<$rhs> for $lhs {
 49 |             type Output = $output;
 50 | 
 51 |             #[inline]
 52 |             fn add(self, rhs: $rhs) -> $output {
 53 |                 &self + &rhs
 54 |             }
 55 |         }
 56 |     };
 57 | }
 58 | 
 59 | macro_rules! impl_sub_binop_specify_output {
 60 |     ($lhs:ident, $rhs:ident, $output:ident) => {
 61 |         impl<'b> Sub<&'b $rhs> for $lhs {
 62 |             type Output = $output;
 63 | 
 64 |             #[inline]
 65 |             fn sub(self, rhs: &'b $rhs) -> $output {
 66 |                 &self - rhs
 67 |             }
 68 |         }
 69 | 
 70 |         impl<'a> Sub<$rhs> for &'a $lhs {
 71 |             type Output = $output;
 72 | 
 73 |             #[inline]
 74 |             fn sub(self, rhs: $rhs) -> $output {
 75 |                 self - &rhs
 76 |             }
 77 |         }
 78 | 
 79 |         impl Sub<$rhs> for $lhs {
 80 |             type Output = $output;
 81 | 
 82 |             #[inline]
 83 |             fn sub(self, rhs: $rhs) -> $output {
 84 |                 &self - &rhs
 85 |             }
 86 |         }
 87 |     };
 88 | }
 89 | 
 90 | macro_rules! impl_binops_additive_specify_output {
 91 |     ($lhs:ident, $rhs:ident, $output:ident) => {
 92 |         impl_add_binop_specify_output!($lhs, $rhs, $output);
 93 |         impl_sub_binop_specify_output!($lhs, $rhs, $output);
 94 |     };
 95 | }
 96 | 
 97 | macro_rules! impl_binops_multiplicative_mixed {
 98 |     ($lhs:ident, $rhs:ident, $output:ident) => {
 99 |         impl<'b> Mul<&'b $rhs> for $lhs {
100 |             type Output = $output;
101 | 
102 |             #[inline]
103 |             fn mul(self, rhs: &'b $rhs) -> $output {
104 |                 &self * rhs
105 |             }
106 |         }
107 | 
108 |         impl<'a> Mul<$rhs> for &'a $lhs {
109 |             type Output = $output;
110 | 
111 |             #[inline]
112 |             fn mul(self, rhs: $rhs) -> $output {
113 |                 self * &rhs
114 |             }
115 |         }
116 | 
117 |         impl Mul<$rhs> for $lhs {
118 |             type Output = $output;
119 | 
120 |             #[inline]
121 |             fn mul(self, rhs: $rhs) -> $output {
122 |                 &self * &rhs
123 |             }
124 |         }
125 |     };
126 | }
127 | 
128 | macro_rules! impl_binops_additive {
129 |     ($lhs:ident, $rhs:ident) => {
130 |         impl_binops_additive_specify_output!($lhs, $rhs, $lhs);
131 | 
132 |         impl SubAssign<$rhs> for $lhs {
133 |             #[inline]
134 |             fn sub_assign(&mut self, rhs: $rhs) {
135 |                 *self = &*self - &rhs;
136 |             }
137 |         }
138 | 
139 |         impl AddAssign<$rhs> for $lhs {
140 |             #[inline]
141 |             fn add_assign(&mut self, rhs: $rhs) {
142 |                 *self = &*self + &rhs;
143 |             }
144 |         }
145 | 
146 |         impl<'b> SubAssign<&'b $rhs> for $lhs {
147 |             #[inline]
148 |             fn sub_assign(&mut self, rhs: &'b $rhs) {
149 |                 *self = &*self - rhs;
150 |             }
151 |         }
152 | 
153 |         impl<'b> AddAssign<&'b $rhs> for $lhs {
154 |             #[inline]
155 |             fn add_assign(&mut self, rhs: &'b $rhs) {
156 |                 *self = &*self + rhs;
157 |             }
158 |         }
159 |     };
160 | }
161 | 
162 | macro_rules! impl_binops_multiplicative {
163 |     ($lhs:ident, $rhs:ident) => {
164 |         impl_binops_multiplicative_mixed!($lhs, $rhs, $lhs);
165 | 
166 |         impl MulAssign<$rhs> for $lhs {
167 |             #[inline]
168 |             fn mul_assign(&mut self, rhs: $rhs) {
169 |                 *self = &*self * &rhs;
170 |             }
171 |         }
172 | 
173 |         impl<'b> MulAssign<&'b $rhs> for $lhs {
174 |             #[inline]
175 |             fn mul_assign(&mut self, rhs: &'b $rhs) {
176 |                 *self = &*self * rhs;
177 |             }
178 |         }
179 |     };
180 | }
181 | 


--------------------------------------------------------------------------------