├── .DS_Store ├── .gitignore ├── .gitmodules ├── README.md ├── bls ├── Cargo.lock ├── Cargo.toml ├── bls-embedded.h ├── cbindgen.toml └── src │ ├── bls │ ├── keys.rs │ └── mod.rs │ ├── build.rs │ ├── error.rs │ └── lib.rs └── bls12_377 ├── .DS_Store ├── .gitignore ├── COPYRIGHT ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── Makefile ├── benches └── groups.rs ├── rust-toolchain └── src ├── fp.rs ├── fp2.rs ├── fp_asm.S ├── fp_mont.cpp ├── fp_mont.h ├── fpc.cpp ├── fq_asm.s ├── g1.rs ├── g2.rs ├── lib.rs ├── scalar.rs └── util.rs /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/celo-org/bls-embedded/931ab609987daa4b2d6ca68edf7aab077039f553/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.rs.bk 2 | *.swp 3 | go/cmd/example/example 4 | Cargo.lock 5 | target 6 | testproj 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/celo-org/bls-embedded/931ab609987daa4b2d6ca68edf7aab077039f553/.gitmodules -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bls-embedded 2 | 3 | Install Rust: 4 | 5 | `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh` 6 | 7 | Run with: 8 | ``` 9 | cd bls12_377 10 | cargo build 11 | cargo test 12 | cargo bench 13 | cargo bench -- Fp_m 14 | cargo bench -- G2Projective_s 15 | ``` 16 | -------------------------------------------------------------------------------- /bls/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "bls12_377" 5 | version = "0.0.0" 6 | dependencies = [ 7 | "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 8 | "subtle 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 9 | ] 10 | 11 | [[package]] 12 | name = "bls_embedded" 13 | version = "0.1.0" 14 | dependencies = [ 15 | "bls12_377 0.0.0", 16 | "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", 17 | "subtle 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 18 | ] 19 | 20 | [[package]] 21 | name = "byteorder" 22 | version = "1.3.2" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | 25 | [[package]] 26 | name = "libc" 27 | version = "0.2.65" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | 30 | [[package]] 31 | name = "subtle" 32 | version = "2.1.1" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | 35 | [metadata] 36 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" 37 | "checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8" 38 | "checksum subtle 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "01f40907d9ffc762709e4ff3eb4a6f6b41b650375a3f09ac92b641942b7fb082" 39 | -------------------------------------------------------------------------------- /bls/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bls_embedded" 3 | version = "0.1.0" 4 | authors = ["Michael Straka "] 5 | edition = "2018" 6 | build = "src/build.rs" 7 | 8 | [dependencies] 9 | bls12_377 = { path = "../bls12_377" } 10 | libc = "0.2" 11 | 12 | [dependencies.subtle] 13 | version = "2.1" 14 | default-features = false 15 | 16 | [lib] 17 | name = "bls_embedded" 18 | path = "src/lib.rs" 19 | crate-type = ["staticlib"] 20 | opt-level = "s" 21 | 22 | [features] 23 | gen_header = [] 24 | 25 | [profile.dev] 26 | panic= "abort" 27 | 28 | [profile.release] 29 | opt-level = 3 30 | panic= "abort" 31 | -------------------------------------------------------------------------------- /bls/bls-embedded.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | typedef struct PrivateKey PrivateKey; 7 | 8 | typedef struct PublicKey PublicKey; 9 | 10 | typedef struct Signature Signature; 11 | 12 | bool aggregate_public_keys(const PublicKey *const *_in_public_keys, 13 | int32_t _in_public_keys_len, 14 | PublicKey **_out_public_key); 15 | 16 | bool aggregate_public_keys_subtract(const PublicKey *_in_aggregated_public_key, 17 | const PublicKey *const *_in_public_keys, 18 | int32_t _in_public_keys_len, 19 | PublicKey **_out_public_key); 20 | 21 | bool aggregate_signatures(const Signature *const *_in_signatures, 22 | int32_t _in_signatures_len, 23 | Signature **_out_signature); 24 | 25 | bool deserialize_private_key(const uint8_t *_in_private_key_bytes, 26 | int32_t _in_private_key_bytes_len, 27 | PrivateKey **_out_private_key); 28 | 29 | bool deserialize_public_key(const uint8_t *_in_public_key_bytes, 30 | int32_t _in_public_key_bytes_len, 31 | PublicKey **_out_public_key); 32 | 33 | bool deserialize_signature(const uint8_t *_in_signature_bytes, 34 | int32_t _in_signature_bytes_len, 35 | Signature **_out_signature); 36 | 37 | void destroy_private_key(PrivateKey *_private_key); 38 | 39 | void destroy_public_key(PublicKey *_public_key); 40 | 41 | void destroy_signature(Signature *_signature); 42 | 43 | void free_vec(uint8_t *_bytes, int32_t _len); 44 | 45 | bool generate_private_key(PrivateKey **out_private_key); 46 | 47 | bool generate_signature(Signature **out_signature); 48 | 49 | bool get_pubkey(uint64_t *in_private_key, uint8_t *out_public_key); 50 | 51 | bool is_valid_key(const uint8_t *in_private_key); 52 | 53 | bool private_key_to_public_key(const PrivateKey *in_private_key, PublicKey **out_public_key); 54 | 55 | bool serialize_private_key(const PrivateKey *_in_private_key, 56 | uint8_t **_out_bytes, 57 | int32_t *_out_len); 58 | 59 | bool serialize_public_key(const PublicKey *_in_public_key, uint8_t **_out_bytes, int32_t *_out_len); 60 | 61 | bool serialize_signature(const Signature *_in_signature, uint8_t **_out_bytes, int32_t *_out_len); 62 | 63 | bool sign_hash(uint64_t *in_private_key, uint8_t *in_hash, uint8_t *out_signature); 64 | 65 | bool sign_message(uint64_t *in_private_key, 66 | const uint8_t *in_message, 67 | int32_t in_message_len, 68 | const uint8_t *in_extra_data, 69 | int32_t in_extra_data_len, 70 | bool should_use_composite); 71 | 72 | bool sign_pop(const PrivateKey *_in_private_key, Signature **_out_signature); 73 | 74 | bool verify_pop(const PublicKey *_in_public_key, 75 | const Signature *_in_signature, 76 | bool *_out_verified); 77 | 78 | bool verify_signature(const PublicKey *_in_public_key, 79 | const uint8_t *_in_message, 80 | int32_t _in_message_len, 81 | const uint8_t *_in_extra_data, 82 | int32_t _in_extra_data_len, 83 | const Signature *_in_signature, 84 | bool _should_use_composite, 85 | bool *_out_verified); 86 | -------------------------------------------------------------------------------- /bls/cbindgen.toml: -------------------------------------------------------------------------------- 1 | language = "C" 2 | -------------------------------------------------------------------------------- /bls/src/bls/keys.rs: -------------------------------------------------------------------------------- 1 | use bls12_377::{Scalar, G1Affine, G2Affine, G1Projective, G2Projective}; 2 | use crate::error::ErrorCode; 3 | use core::ops::Mul; 4 | 5 | pub struct PrivateKey { 6 | sk: Scalar, 7 | } 8 | 9 | impl PrivateKey { 10 | pub fn from_scalar(s: &Scalar) -> Self { 11 | Self { sk: s.clone() } 12 | } 13 | 14 | pub fn to_public(&self) -> PublicKey { 15 | PublicKey::from_pk(&(G2Projective::generator() * &self.sk)) 16 | } 17 | 18 | #[inline(always)] 19 | pub fn sign_hash(&self, hash: &[u8; 96]) -> Result { 20 | let hash_elem = G1Affine::from_uncompressed_unchecked_vartime(hash).unwrap(); 21 | Ok(Signature::from_sig(&hash_elem.mul(&self.sk))) 22 | } 23 | } 24 | 25 | #[derive(Copy, Clone, Debug)] 26 | pub struct PublicKey { 27 | pk: G2Projective, 28 | } 29 | 30 | impl PublicKey { 31 | pub fn from_pk(pk: &G2Projective) -> PublicKey { 32 | PublicKey { pk: pk.clone() } 33 | } 34 | 35 | #[inline(always)] 36 | pub fn serialize(&self) -> [u8; 192] { 37 | G2Affine::from(&self.pk).to_uncompressed_littleendian() 38 | } 39 | } 40 | impl Eq for PublicKey {} 41 | impl PartialEq for PublicKey { 42 | fn eq(&self, other: &Self) -> bool { 43 | self.pk == other.pk 44 | } 45 | } 46 | 47 | #[derive(Copy, Clone, Debug)] 48 | pub struct Signature { 49 | sig: G1Projective, 50 | } 51 | 52 | impl Signature { 53 | #[inline(always)] 54 | pub fn from_sig(sig: &G1Projective) -> Signature { 55 | Signature { sig: sig.clone() } 56 | } 57 | 58 | #[inline(always)] 59 | pub fn serialize(&self) -> [u8; 96] { 60 | G1Affine::from(self.sig).to_uncompressed_littleendian() 61 | } 62 | } 63 | impl Eq for Signature {} impl PartialEq for Signature { 64 | fn eq(&self, other: &Self) -> bool { 65 | self.sig == other.sig 66 | } 67 | } 68 | 69 | #[test] 70 | fn test_signature_serialization() { 71 | let elem = [0, 23, 5, 45, 78, 62, 182, 66, 211, 46, 244, 152, 154, 242, 83, 204, 42, 48, 173, 55, 108, 232, 240, 178, 60, 146, 185, 135, 233, 92, 199, 24, 208, 32, 114, 187, 120, 211, 124, 9, 253, 118, 247, 1, 78, 236, 247, 151, 1, 108, 32, 107, 231, 56, 191, 70, 68, 250, 255, 16, 187, 130, 177, 159, 111, 7, 119, 153, 3, 166, 173, 37, 36, 128, 156, 226, 159, 148, 104, 59, 227, 43, 189, 208, 114, 236, 11, 230, 106, 224, 237, 13, 135, 129, 242, 119]; 72 | let elem_result = Signature { sig: (G1Projective::generator() * &Scalar::from(5)) }.serialize(); 73 | assert_eq!(&elem[..], &elem_result[..]); 74 | } 75 | 76 | #[test] 77 | fn test_publickey_serialization() { 78 | let elem = [0, 31, 183, 170, 199, 212, 167, 3, 66, 81, 201, 4, 241, 48, 79, 223, 24, 52, 101, 225, 116, 36, 166, 246, 213, 127, 77, 200, 154, 183, 73, 53, 249, 207, 6, 102, 170, 157, 11, 128, 177, 20, 254, 185, 15, 142, 231, 68, 0, 222, 228, 89, 156, 13, 254, 199, 91, 133, 241, 129, 173, 74, 215, 198, 210, 32, 83, 154, 161, 153, 255, 92, 239, 64, 69, 147, 39, 48, 118, 242, 26, 126, 220, 109, 229, 226, 101, 150, 25, 228, 38, 133, 96, 89, 73, 238, 0, 105, 186, 188, 162, 17, 191, 123, 4, 159, 165, 161, 68, 105, 85, 121, 63, 19, 169, 22, 165, 195, 165, 66, 206, 1, 108, 166, 186, 198, 49, 232, 110, 212, 243, 6, 4, 6, 2, 95, 165, 241, 12, 160, 98, 34, 217, 143, 1, 42, 244, 0, 161, 173, 241, 170, 146, 11, 183, 159, 9, 30, 138, 40, 3, 30, 231, 111, 97, 118, 217, 229, 221, 205, 106, 218, 224, 24, 116, 233, 237, 223, 225, 180, 55, 239, 219, 248, 119, 10, 49, 96, 145, 22, 219, 26]; 79 | let elem_result = PublicKey { pk: (G2Projective::generator() * &Scalar::from(5)) }.serialize(); 80 | assert_eq!(&elem[..], &elem_result[..]); 81 | } 82 | 83 | #[test] 84 | fn test_pubkey_derivation() { 85 | let priv_key = PrivateKey { sk: Scalar::from_bytes(&[52, 163, 121, 115, 149, 19, 242, 110, 13, 231, 110, 40, 146, 248, 62, 119, 87, 214, 200, 159, 51, 41, 164, 239, 155, 241, 173, 219, 230, 185, 133, 3]).unwrap() }; 86 | let pub_key = PublicKey { pk: G2Projective::from(G2Affine::from_uncompressed(&[1, 65, 146, 224, 231, 36, 217, 8, 154, 9, 197, 85, 87, 10, 60, 10, 116, 199, 107, 77, 65, 110, 195, 241, 61, 149, 135, 254, 254, 231, 193, 180, 204, 158, 62, 152, 255, 162, 62, 57, 242, 63, 232, 173, 205, 118, 153, 74, 0, 33, 97, 106, 240, 49, 100, 155, 187, 111, 209, 35, 149, 158, 19, 5, 53, 161, 255, 29, 150, 27, 180, 76, 35, 128, 168, 52, 28, 185, 165, 29, 3, 171, 74, 204, 98, 167, 76, 26, 163, 61, 205, 9, 165, 185, 175, 92, 0, 255, 19, 80, 75, 234, 65, 82, 108, 145, 163, 112, 232, 187, 181, 136, 5, 148, 204, 65, 187, 54, 121, 249, 199, 164, 107, 239, 193, 46, 94, 130, 16, 4, 237, 46, 67, 32, 180, 185, 63, 12, 189, 114, 59, 70, 32, 214, 1, 103, 254, 116, 159, 104, 88, 88, 209, 241, 131, 173, 192, 119, 152, 28, 214, 52, 212, 168, 14, 233, 120, 89, 97, 233, 93, 236, 94, 172, 27, 173, 64, 49, 117, 213, 228, 168, 212, 232, 114, 121, 204, 16, 246, 121, 184, 81]).unwrap()) }; 87 | let pub_key_result = priv_key.to_public(); 88 | assert_eq!(G2Affine::from(pub_key.pk), G2Affine::from(pub_key_result.pk)); 89 | } 90 | 91 | #[test] 92 | fn test_sign_hash() { 93 | let pk = PrivateKey { sk: Scalar::from_bytes(&[10, 145, 220, 128, 41, 236, 187, 134, 47, 34, 61, 132, 196, 20, 201, 239, 33, 80, 184, 182, 49, 79, 15, 212, 4, 73, 201, 248, 74, 226, 158, 12]).unwrap() }; 94 | let hash = [1, 95, 34, 213, 221, 202, 70, 0, 221, 118, 193, 93, 225, 200, 19, 73, 208, 8, 176, 53, 150, 73, 22, 154, 1, 71, 181, 38, 9, 102, 191, 35, 227, 112, 10, 208, 171, 43, 191, 43, 110, 164, 130, 8, 57, 101, 243, 19, 1, 47, 253, 198, 50, 95, 79, 61, 237, 164, 140, 88, 176, 124, 187, 181, 163, 22, 62, 109, 184, 189, 146, 112, 115, 9, 160, 33, 102, 163, 4, 181, 208, 41, 88, 149, 177, 103, 137, 99, 174, 49, 132, 6, 227, 20, 225, 203]; 95 | let sig = Signature { sig: G1Projective::from(G1Affine::from_uncompressed(&[0, 197, 168, 175, 148, 226, 242, 59, 146, 38, 132, 5, 184, 97, 42, 143, 165, 173, 21, 4, 175, 57, 168, 90, 6, 88, 106, 216, 57, 126, 148, 208, 236, 146, 120, 249, 251, 21, 170, 84, 108, 46, 219, 72, 123, 118, 141, 23, 0, 137, 235, 28, 241, 199, 129, 202, 64, 124, 156, 28, 68, 75, 151, 18, 63, 110, 16, 210, 132, 222, 210, 134, 75, 135, 25, 6, 230, 9, 243, 11, 153, 183, 8, 154, 242, 128, 46, 134, 60, 59, 123, 187, 193, 124, 30, 238]).unwrap()) }; 96 | 97 | let sig_result = pk.sign_hash(&hash).unwrap(); 98 | assert_eq!(G1Affine::from(sig.sig), G1Affine::from(sig_result.sig)); 99 | } 100 | -------------------------------------------------------------------------------- /bls/src/bls/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod keys; 2 | -------------------------------------------------------------------------------- /bls/src/build.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature="gen_header")] 2 | extern crate cbindgen; 3 | 4 | #[cfg(feature="gen_header")] 5 | fn main() { 6 | let crate_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); 7 | 8 | cbindgen::generate(crate_dir) 9 | .expect("Unable to generate C bindings.") 10 | .write_to_file("./bls-embedded.h"); 11 | } 12 | 13 | #[cfg(not(feature="gen_header"))] 14 | fn main() { 15 | } 16 | -------------------------------------------------------------------------------- /bls/src/error.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug)] 2 | #[repr(u16)] 3 | pub enum ErrorCode { 4 | Error = 1, 5 | } 6 | -------------------------------------------------------------------------------- /bls/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(not(gen_header), no_std)] 2 | #![no_std] 3 | extern crate libc; 4 | 5 | pub mod bls; 6 | pub mod error; 7 | 8 | use bls12_377::Scalar; 9 | use crate::bls::keys::PrivateKey; 10 | use subtle::CtOption; 11 | use core::ptr::copy; 12 | 13 | use core::slice; 14 | 15 | #[cfg(not(gen_header))] 16 | #[panic_handler] 17 | fn my_panic(_info: &core::panic::PanicInfo) -> ! { 18 | loop {} 19 | } 20 | 21 | fn convert_result_to_bool Result>(f: F) -> bool { 22 | match f() { 23 | Err(e) => { 24 | false 25 | } 26 | _ => true, 27 | } 28 | } 29 | 30 | #[no_mangle] 31 | pub extern "C" fn is_valid_key(in_private_key: *const u8) -> bool { 32 | let pk_array = in_private_key as *const [u8; 32]; 33 | let priv_key = unsafe { Scalar::from_bytes(&*pk_array) } ; 34 | bool::from(CtOption::is_some(&priv_key)) 35 | } 36 | 37 | #[no_mangle] 38 | pub extern "C" fn sign_hash( 39 | in_private_key: *mut u64, 40 | in_hash: *mut u8, 41 | out_signature: *mut u8, 42 | ) -> bool { 43 | let pk_array = in_private_key as *mut [u64; 4]; 44 | let private_key = unsafe { PrivateKey::from_scalar(&Scalar::from_raw(*pk_array)) }; 45 | let hash = unsafe { slice::from_raw_parts(in_hash, 96) }; 46 | let mut hash_arr: [u8; 96] = [0; 96]; 47 | hash_arr.copy_from_slice(&hash[0..96]); 48 | let sig = private_key.sign_hash(&hash_arr).unwrap(); 49 | let sig_arr = sig.serialize(); 50 | unsafe { copy(sig_arr.as_ptr(), out_signature, 96); }; 51 | true 52 | } 53 | 54 | #[no_mangle] 55 | pub extern "C" fn get_pubkey( 56 | in_private_key: *mut u64, 57 | out_public_key: *mut u8, 58 | ) -> bool { 59 | let private_key = unsafe { PrivateKey::from_scalar(&Scalar::from_raw(*(in_private_key as *mut [u64; 4]))) }; 60 | let pub_arr = private_key.to_public().serialize(); 61 | unsafe { copy(pub_arr.as_ptr(), out_public_key, 192) }; 62 | true 63 | } 64 | -------------------------------------------------------------------------------- /bls12_377/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/celo-org/bls-embedded/931ab609987daa4b2d6ca68edf7aab077039f553/bls12_377/.DS_Store -------------------------------------------------------------------------------- /bls12_377/.gitignore: -------------------------------------------------------------------------------- 1 | libfpc.a 2 | -------------------------------------------------------------------------------- /bls12_377/COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyrights in the "bls12_377" library are retained by their contributors. No 2 | copyright assignment is required to contribute to the "bls12_377" library. 3 | 4 | The "bls12_377" library is licensed under either of 5 | 6 | * Apache License, Version 2.0, (see ./LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) 7 | * MIT license (see ./LICENSE-MIT or http://opensource.org/licenses/MIT) 8 | 9 | at your option. 10 | 11 | Unless you explicitly state otherwise, any contribution intentionally 12 | submitted for inclusion in the work by you, as defined in the Apache-2.0 13 | license, shall be dual licensed as above, without any additional terms or 14 | conditions. 15 | -------------------------------------------------------------------------------- /bls12_377/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Sean Bowe ", "Michael Straka "] 3 | description = "Implementation of the BLS12-377 pairing-friendly elliptic curve construction" 4 | license = "MIT/Apache-2.0" 5 | name = "bls12_377" 6 | repository = "https://github.com/zkcrypto/bls12_381" 7 | version = "0.0.0" 8 | edition = "2018" 9 | 10 | [package.metadata.docs.rs] 11 | rustdoc-args = [ "--html-in-header", "katex-header.html" ] 12 | 13 | [dev-dependencies] 14 | criterion = "0.2.11" 15 | 16 | [[bench]] 17 | name = "groups" 18 | harness = false 19 | required-features = ["groups"] 20 | 21 | [dependencies.subtle] 22 | version = "2.1" 23 | default-features = false 24 | 25 | [dependencies.byteorder] 26 | version = "1" 27 | default-features = false 28 | 29 | [features] 30 | default = ["groups", "pairings"] 31 | groups = [] 32 | pairings = ["groups"] 33 | nightly = ["subtle/nightly"] 34 | -------------------------------------------------------------------------------- /bls12_377/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /bls12_377/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /bls12_377/Makefile: -------------------------------------------------------------------------------- 1 | AR=ar 2 | CC=gcc 3 | CXX=g++ 4 | CPPFLAGS=-march=armv7-m -mcpu=cortex-m3 -mthumb -O3 -funroll-all-loops -fconserve-stack 5 | #CPPFLAGS=-march=native -O2 -funroll-all-loops 6 | 7 | all: libfpc.a 8 | touch src/fp.rs 9 | 10 | libfpc.a: libfpc.a(fpc.o fp_mont.o fp_asm.o fq_asm.o) 11 | 12 | %.o: src/%.cpp 13 | $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ 14 | 15 | %.o: src/%.c 16 | $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@ 17 | 18 | %.o: src/%.s 19 | $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@ 20 | 21 | %.o: src/%.S 22 | $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@ 23 | 24 | test: libfpc.a 25 | RUSTFLAGS="-L `pwd`" cargo test 26 | 27 | bench: libfpc.a 28 | RUSTFLAGS="-L `pwd`" cargo bench -- Fp_m 29 | 30 | bench2: libfpc.a 31 | RUSTFLAGS="-L `pwd`" cargo bench -- G2Projective_s 32 | 33 | bench1: libfpc.a 34 | RUSTFLAGS="-L `pwd`" cargo bench -- G1Projective_s 35 | 36 | 37 | 38 | rsync: 39 | rsync -ar --progress --exclude=target --exclude='*.a' . tinkerboard:bls-embedded/bls12_377 40 | -------------------------------------------------------------------------------- /bls12_377/benches/groups.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | extern crate bls12_377; 5 | use bls12_377::*; 6 | use bls12_377::fp::Fp; 7 | 8 | use criterion::{black_box, Criterion}; 9 | 10 | fn criterion_benchmark(c: &mut Criterion) { 11 | // G1Affine 12 | { 13 | let name = "G1Affine"; 14 | let a = G1Affine::generator(); 15 | let s = Scalar::from_raw([1, 2, 3, 4]); 16 | let compressed = [0u8; 48]; 17 | let uncompressed = [0u8; 96]; 18 | c.bench_function(&format!("{}_check_on_curve", name), move |b| { 19 | b.iter(|| black_box(a).is_on_curve()) 20 | }); 21 | c.bench_function(&format!("{}_check_equality", name), move |b| { 22 | b.iter(|| black_box(a) == black_box(a)) 23 | }); 24 | c.bench_function(&format!("{}_scalar_multiplication", name), move |b| { 25 | b.iter(|| black_box(a) * black_box(s)) 26 | }); 27 | c.bench_function(&format!("{}_subgroup_check", name), move |b| { 28 | b.iter(|| black_box(a).is_torsion_free()) 29 | }); 30 | c.bench_function( 31 | &format!("{} deserialize uncompressed point", name), 32 | move |b| b.iter(|| G1Affine::from_uncompressed(black_box(&uncompressed))), 33 | ); 34 | } 35 | 36 | // G1Projective 37 | { 38 | let name = "G1Projective"; 39 | let a = G1Projective::generator(); 40 | let a_affine = G1Affine::generator(); 41 | let s = Scalar::from_raw([1, 2, 3, 4]); 42 | 43 | const N: usize = 10000; 44 | let v = vec![G1Projective::generator(); N]; 45 | let mut q = vec![G1Affine::identity(); N]; 46 | 47 | c.bench_function(&format!("{}_check_on_curve", name), move |b| { 48 | b.iter(|| black_box(a).is_on_curve()) 49 | }); 50 | c.bench_function(&format!("{}_check_equality", name), move |b| { 51 | b.iter(|| black_box(a) == black_box(a)) 52 | }); 53 | c.bench_function(&format!("{}_to_affine", name), move |b| { 54 | b.iter(|| G1Affine::from(black_box(a))) 55 | }); 56 | c.bench_function(&format!("{}_doubling", name), move |b| { 57 | b.iter(|| black_box(a).double()) 58 | }); 59 | c.bench_function(&format!("{}_addition", name), move |b| { 60 | b.iter(|| black_box(a).add(&a)) 61 | }); 62 | c.bench_function(&format!("{}_mixed_addition", name), move |b| { 63 | b.iter(|| black_box(a).add_mixed(&a_affine)) 64 | }); 65 | c.bench_function(&format!("{}_scalar_multiplication", name), move |b| { 66 | b.iter(|| black_box(a) * black_box(s)) 67 | }); 68 | c.bench_function(&format!("{}_batch_to_affine_n={}", name, N), move |b| { 69 | b.iter(|| { 70 | G1Projective::batch_normalize(black_box(&v), black_box(&mut q)); 71 | black_box(&q)[0] 72 | }) 73 | }); 74 | } 75 | 76 | // G2Affine 77 | { 78 | let name = "G2Affine"; 79 | let a = G2Affine::generator(); 80 | let s = Scalar::from_raw([1, 2, 3, 4]); 81 | let compressed = [0u8; 96]; 82 | let uncompressed = [0u8; 192]; 83 | c.bench_function(&format!("{}_check_on_curve", name), move |b| { 84 | b.iter(|| black_box(a).is_on_curve()) 85 | }); 86 | c.bench_function(&format!("{}_check_equality", name), move |b| { 87 | b.iter(|| black_box(a) == black_box(a)) 88 | }); 89 | c.bench_function(&format!("{}_scalar_multiplication", name), move |b| { 90 | b.iter(|| black_box(a) * black_box(s)) 91 | }); 92 | c.bench_function(&format!("{}_subgroup_check", name), move |b| { 93 | b.iter(|| black_box(a).is_torsion_free()) 94 | }); 95 | c.bench_function( 96 | &format!("{} deserialize uncompressed point", name), 97 | move |b| b.iter(|| G2Affine::from_uncompressed(black_box(&uncompressed))), 98 | ); 99 | } 100 | 101 | // G2Projective 102 | { 103 | let name = "G2Projective"; 104 | let a = G2Projective::generator(); 105 | let a_affine = G2Affine::generator(); 106 | let s = Scalar::from_raw([1, 2, 3, 4]); 107 | 108 | const N: usize = 10000; 109 | let v = vec![G2Projective::generator(); N]; 110 | let mut q = vec![G2Affine::identity(); N]; 111 | 112 | c.bench_function(&format!("{}_check_on_curve", name), move |b| { 113 | b.iter(|| black_box(a).is_on_curve()) 114 | }); 115 | c.bench_function(&format!("{}_check_equality", name), move |b| { 116 | b.iter(|| black_box(a) == black_box(a)) 117 | }); 118 | c.bench_function(&format!("{}_to_affine", name), move |b| { 119 | b.iter(|| G2Affine::from(black_box(a))) 120 | }); 121 | c.bench_function(&format!("{}_doubling", name), move |b| { 122 | b.iter(|| black_box(a).double()) 123 | }); 124 | c.bench_function(&format!("{}_addition", name), move |b| { 125 | b.iter(|| black_box(a).add(&a)) 126 | }); 127 | c.bench_function(&format!("{}_mixed_addition", name), move |b| { 128 | b.iter(|| black_box(a).add_mixed(&a_affine)) 129 | }); 130 | c.bench_function(&format!("{}_scalar_multiplication", name), move |b| { 131 | b.iter(|| black_box(a) * black_box(s)) 132 | }); 133 | c.bench_function(&format!("{}_batch_to_affine_n={}", name, N), move |b| { 134 | b.iter(|| { 135 | G2Projective::batch_normalize(black_box(&v), black_box(&mut q)); 136 | black_box(&q)[0] 137 | }) 138 | }); 139 | } 140 | // Fp Arithmetic 141 | { 142 | let x = Fp::one(); 143 | let y = Fp::one(); 144 | c.bench_function("Fp_multiplication_new", 145 | move |b| { 146 | b.iter(|| black_box(x).mul(&black_box(y))) 147 | }); 148 | c.bench_function("Fp_multiplication_old", 149 | move |b| { 150 | b.iter(|| black_box(x).mul_old(&black_box(y))) 151 | }); 152 | c.bench_function("Fp_inverse", 153 | move |b| { 154 | b.iter(|| black_box(x).invert()) 155 | }); 156 | } 157 | } 158 | 159 | criterion_group!(benches, criterion_benchmark); 160 | criterion_main!(benches); 161 | -------------------------------------------------------------------------------- /bls12_377/rust-toolchain: -------------------------------------------------------------------------------- 1 | 1.36.0 -------------------------------------------------------------------------------- /bls12_377/src/fp.rs: -------------------------------------------------------------------------------- 1 | //! This module provides an implementation of the BLS12-377 base field `GF(p)` where `p = 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177` 2 | 3 | use core::mem; 4 | use core::fmt; 5 | use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}; 6 | 7 | use byteorder::{BigEndian, ByteOrder, LittleEndian}; 8 | use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption}; 9 | 10 | use crate::util::{adc, mac, sbb, LegendreSymbol}; 11 | 12 | #[link(name="fpc", kind="static")] 13 | extern { 14 | fn c_mul( 15 | output: *mut u64, 16 | left: *const u64, 17 | right: *const u64 18 | ) -> (); 19 | 20 | fn c_montgomry( 21 | output: *mut u64, 22 | tmp: *const u64, 23 | ) -> (); 24 | } 25 | 26 | // The internal representation of this type is six 64-bit unsigned 27 | // integers in little-endian order. `Fp` values are always in 28 | // Montgomery form; i.e., Scalar(a) = aR mod p, with R = 2^384. 29 | #[derive(Copy, Clone)] 30 | pub struct Fp([u64; 6]); 31 | 32 | impl fmt::Debug for Fp { 33 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 34 | let tmp = self.to_bytes(); 35 | write!(f, "0x")?; 36 | for &b in tmp.iter() { 37 | write!(f, "{:02x}", b)?; 38 | } 39 | Ok(()) 40 | } 41 | } 42 | 43 | impl Default for Fp { 44 | fn default() -> Self { 45 | Fp::zero() 46 | } 47 | } 48 | 49 | impl ConstantTimeEq for Fp { 50 | fn ct_eq(&self, other: &Self) -> Choice { 51 | self.0[0].ct_eq(&other.0[0]) 52 | & self.0[1].ct_eq(&other.0[1]) 53 | & self.0[2].ct_eq(&other.0[2]) 54 | & self.0[3].ct_eq(&other.0[3]) 55 | & self.0[4].ct_eq(&other.0[4]) 56 | & self.0[5].ct_eq(&other.0[5]) 57 | } 58 | } 59 | 60 | impl Eq for Fp {} 61 | impl PartialEq for Fp { 62 | fn eq(&self, other: &Self) -> bool { 63 | self.ct_eq(other).unwrap_u8() == 1 64 | } 65 | } 66 | 67 | impl ConditionallySelectable for Fp { 68 | fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self { 69 | Fp([ 70 | u64::conditional_select(&a.0[0], &b.0[0], choice), 71 | u64::conditional_select(&a.0[1], &b.0[1], choice), 72 | u64::conditional_select(&a.0[2], &b.0[2], choice), 73 | u64::conditional_select(&a.0[3], &b.0[3], choice), 74 | u64::conditional_select(&a.0[4], &b.0[4], choice), 75 | u64::conditional_select(&a.0[5], &b.0[5], choice), 76 | ]) 77 | } 78 | } 79 | 80 | /// p = 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177 81 | #[inline(always)] 82 | pub const fn modulus() -> [u64; 6] { 83 | [ 84 | 0x8508c00000000001, 85 | 0x170b5d4430000000, 86 | 0x1ef3622fba094800, 87 | 0x1a22d9f300f5138f, 88 | 0xc63b05c06ca1493b, 89 | 0x1ae3a4617c510ea, 90 | ] 91 | } 92 | 93 | 94 | /// INV = -(p^{-1} mod 2^64) mod 2^64 95 | #[inline] 96 | const fn inv() -> u64 { 97 | 9586122913090633727u64 98 | } 99 | 100 | const fn two_adicity() -> u32 { 101 | 46u32 102 | } 103 | 104 | /// R = 2^384 mod p 105 | const fn r1() -> Fp { 106 | Fp([ 107 | 0x2cdffffffffff68, 108 | 0x51409f837fffffb1, 109 | 0x9f7db3a98a7d3ff2, 110 | 0x7b4e97b76e7c6305, 111 | 0x4cf495bf803c84e8, 112 | 0x8d6661e2fdf49a, 113 | ]) 114 | } 115 | 116 | /// R2 = 2^(384*2) mod p 117 | const fn r_squared() -> Fp { 118 | Fp([ 119 | 0xb786686c9400cd22, 120 | 0x329fcaab00431b1, 121 | 0x22a5f11162d6b46d, 122 | 0xbfdf7d03827dc3ac, 123 | 0x837e92f041790bf9, 124 | 0x6dfccb1e914b88, 125 | ]) 126 | } 127 | 128 | /// c^t, where p - 1 = 2^s*t and t odd 129 | const fn root_of_unity() -> Fp { 130 | Fp([ 131 | 0x1c104955744e6e0f, 132 | 0xf1bd15c3898dd1af, 133 | 0x76da78169a7f3950, 134 | 0xee086c1fe367c337, 135 | 0xf95564f4cbc1b61f, 136 | 0xf3c1414ef58c54, 137 | ]) 138 | } 139 | 140 | const fn t_minus_one_div_two() -> [u64; 6] { 141 | [ 142 | 0xba88600000010a11, 143 | 0xc45f741290002e16, 144 | 0xb3e601ea271e3de6, 145 | 0xb80d94292763445, 146 | 0x748c2f8a21d58c76, 147 | 0x35c, 148 | ] 149 | } 150 | 151 | const fn modulus_minus_one_div_two() -> [u64; 6] { 152 | [ 153 | 0x4284600000000000, 154 | 0xb85aea218000000, 155 | 0x8f79b117dd04a400, 156 | 0x8d116cf9807a89c7, 157 | 0x631d82e03650a49d, 158 | 0xd71d230be28875, 159 | ] 160 | } 161 | 162 | impl<'a> Neg for &'a Fp { 163 | type Output = Fp; 164 | 165 | #[inline(always)] 166 | fn neg(self) -> Fp { 167 | self.neg() 168 | } 169 | } 170 | 171 | impl Neg for Fp { 172 | type Output = Fp; 173 | 174 | #[inline(always)] 175 | fn neg(self) -> Fp { 176 | -&self 177 | } 178 | } 179 | 180 | impl<'a, 'b> Sub<&'b Fp> for &'a Fp { 181 | type Output = Fp; 182 | 183 | #[inline] 184 | fn sub(self, rhs: &'b Fp) -> Fp { 185 | self.sub(rhs) 186 | } 187 | } 188 | 189 | impl<'a, 'b> Add<&'b Fp> for &'a Fp { 190 | type Output = Fp; 191 | 192 | #[inline(always)] 193 | fn add(self, rhs: &'b Fp) -> Fp { 194 | self.add(rhs) 195 | } 196 | } 197 | 198 | impl<'a, 'b> Mul<&'b Fp> for &'a Fp { 199 | type Output = Fp; 200 | 201 | #[inline] 202 | fn mul(self, rhs: &'b Fp) -> Fp { 203 | self.mul(rhs) 204 | } 205 | } 206 | 207 | impl_binops_additive!(Fp, Fp); 208 | impl_binops_multiplicative!(Fp, Fp); 209 | 210 | impl Fp { 211 | /// Returns zero, the additive identity. 212 | #[inline] 213 | pub const fn zero() -> Fp { 214 | Fp([0, 0, 0, 0, 0, 0]) 215 | } 216 | 217 | /// Returns one, the multiplicative identity. 218 | #[inline] 219 | pub const fn one() -> Fp { 220 | r1() 221 | } 222 | 223 | #[inline] 224 | pub fn is_zero(&self) -> Choice { 225 | self.ct_eq(&Fp::zero()) 226 | } 227 | 228 | #[inline] 229 | pub fn is_one(&self) -> Choice { 230 | self.ct_eq(&Fp::one()) 231 | } 232 | 233 | /// Attempts to convert a little-endian byte representation of 234 | /// a scalar into an `Fp`, failing if the input is not canonical. 235 | #[inline(always)] 236 | pub fn from_bytes(bytes: &[u8; 48]) -> CtOption { 237 | let mut tmp = Fp([0, 0, 0, 0, 0, 0]); 238 | let modulus = modulus(); 239 | 240 | tmp.0[5] = BigEndian::read_u64(&bytes[0..8]); 241 | tmp.0[4] = BigEndian::read_u64(&bytes[8..16]); 242 | tmp.0[3] = BigEndian::read_u64(&bytes[16..24]); 243 | tmp.0[2] = BigEndian::read_u64(&bytes[24..32]); 244 | tmp.0[1] = BigEndian::read_u64(&bytes[32..40]); 245 | tmp.0[0] = BigEndian::read_u64(&bytes[40..48]); 246 | 247 | // Try to subtract the modulus 248 | let (_, borrow) = sbb(tmp.0[0], modulus[0], 0); 249 | let (_, borrow) = sbb(tmp.0[1], modulus[1], borrow); 250 | let (_, borrow) = sbb(tmp.0[2], modulus[2], borrow); 251 | let (_, borrow) = sbb(tmp.0[3], modulus[3], borrow); 252 | let (_, borrow) = sbb(tmp.0[4], modulus[4], borrow); 253 | let (_, borrow) = sbb(tmp.0[5], modulus[5], borrow); 254 | 255 | // If the element is smaller than MODULUS then the 256 | // subtraction will underflow, producing a borrow value 257 | // of 0xffff...ffff. Otherwise, it'll be zero. 258 | let is_some = (borrow as u8) & 1; 259 | 260 | // Convert to Montgomery form by computing 261 | // (a.R^0 * R^2) / R = a.R 262 | tmp *= &r_squared(); 263 | 264 | CtOption::new(tmp, Choice::from(is_some)) 265 | } 266 | 267 | /// Attempts to convert a little-endian byte representation of 268 | /// a scalar into an `Fp`, failing if the input is not canonical. 269 | /// This is not constant time 270 | #[inline(always)] 271 | pub fn from_bytes_little_endian_vartime(bytes: &[u8; 48]) -> Option { 272 | let mut tmp = Fp([0, 0, 0, 0, 0, 0]); 273 | let modulus = modulus(); 274 | 275 | tmp.0[0] = LittleEndian::read_u64(&bytes[0..8]); 276 | tmp.0[1] = LittleEndian::read_u64(&bytes[8..16]); 277 | tmp.0[2] = LittleEndian::read_u64(&bytes[16..24]); 278 | tmp.0[3] = LittleEndian::read_u64(&bytes[24..32]); 279 | tmp.0[4] = LittleEndian::read_u64(&bytes[32..40]); 280 | tmp.0[5] = LittleEndian::read_u64(&bytes[40..48]); 281 | 282 | // Try to subtract the modulus 283 | let (_, borrow) = sbb(tmp.0[0], modulus[0], 0); 284 | let (_, borrow) = sbb(tmp.0[1], modulus[1], borrow); 285 | let (_, borrow) = sbb(tmp.0[2], modulus[2], borrow); 286 | let (_, borrow) = sbb(tmp.0[3], modulus[3], borrow); 287 | let (_, borrow) = sbb(tmp.0[4], modulus[4], borrow); 288 | let (_, borrow) = sbb(tmp.0[5], modulus[5], borrow); 289 | 290 | // If the element is smaller than MODULUS then the 291 | // subtraction will underflow, producing a borrow value 292 | // of 0xffff...ffff. Otherwise, it'll be zero. 293 | let is_some = (borrow as u8) & 1; 294 | 295 | // Convert to Montgomery form by computing 296 | // (a.R^0 * R^2) / R = a.R 297 | tmp *= &r_squared(); 298 | 299 | if is_some == 0 { 300 | return None; 301 | } 302 | Some(tmp) 303 | } 304 | 305 | /// Converts an element of `Fp` into a byte representation in 306 | /// big-endian byte order. 307 | pub fn to_bytes(&self) -> [u8; 48] { 308 | // Turn into canonical form by computing 309 | // (a.R) / R = a 310 | let tmp = Fp::montgomery_reduce( 311 | self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0, 312 | ); 313 | 314 | let mut res = [0; 48]; 315 | BigEndian::write_u64(&mut res[0..8], tmp.0[5]); 316 | BigEndian::write_u64(&mut res[8..16], tmp.0[4]); 317 | BigEndian::write_u64(&mut res[16..24], tmp.0[3]); 318 | BigEndian::write_u64(&mut res[24..32], tmp.0[2]); 319 | BigEndian::write_u64(&mut res[32..40], tmp.0[1]); 320 | BigEndian::write_u64(&mut res[40..48], tmp.0[0]); 321 | 322 | res 323 | } 324 | 325 | /// Converts an element of `Fp` into a byte representation in 326 | /// little-endian byte order. 327 | pub fn to_bytes_littleendian(&self) -> [u8; 48] { 328 | // Turn into canonical form by computing 329 | // (a.R) / R = a 330 | let tmp = Fp::montgomery_reduce( 331 | self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0, 332 | ); 333 | 334 | let mut res = [0; 48]; 335 | LittleEndian::write_u64(&mut res[0..8], tmp.0[0]); 336 | LittleEndian::write_u64(&mut res[8..16], tmp.0[1]); 337 | LittleEndian::write_u64(&mut res[16..24], tmp.0[2]); 338 | LittleEndian::write_u64(&mut res[24..32], tmp.0[3]); 339 | LittleEndian::write_u64(&mut res[32..40], tmp.0[4]); 340 | LittleEndian::write_u64(&mut res[40..48], tmp.0[5]); 341 | 342 | res 343 | } 344 | 345 | /// Returns whether or not this element is strictly lexicographically 346 | /// larger than its negation. 347 | pub fn lexicographically_largest(&self) -> Choice { 348 | // This can be determined by checking to see if the element is 349 | // larger than (p - 1) // 2. If we subtract by ((p - 1) // 2) + 1 350 | // and there is no underflow, then the element must be larger than 351 | // (p - 1) // 2 352 | 353 | // First, because self is in Montgomery form we need to reduce it 354 | let tmp = Fp::montgomery_reduce( 355 | self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0, 356 | ); 357 | 358 | let (_, borrow) = sbb(tmp.0[0], 0x4284600000000001, 0); 359 | let (_, borrow) = sbb(tmp.0[1], 0x0b85aea218000000, borrow); 360 | let (_, borrow) = sbb(tmp.0[2], 0x8f79b117dd04a400, borrow); 361 | let (_, borrow) = sbb(tmp.0[3], 0x8d116cf9807a89c7, borrow); 362 | let (_, borrow) = sbb(tmp.0[4], 0x631d82e03650a49d, borrow); 363 | let (_, borrow) = sbb(tmp.0[5], 0xd71d230be28875, borrow); 364 | 365 | // If the element was smaller, the subtraction will underflow 366 | // producing a borrow value of 0xffff...ffff, otherwise it will 367 | // be zero. We create a Choice representing true if there was 368 | // overflow (and so this element is not lexicographically larger 369 | // than its negation) and then negate it. 370 | 371 | !Choice::from((borrow as u8) & 1) 372 | } 373 | 374 | /// Constructs an element of `Fp` without checking that it is 375 | /// canonical. 376 | #[inline(always)] 377 | pub const fn from_raw_unchecked(v: [u64; 6]) -> Fp { 378 | Fp(v) 379 | } 380 | 381 | /// Although this is labeled "vartime", it is only 382 | /// variable time with respect to the exponent. It 383 | /// is also not exposed in the public API. 384 | #[inline(always)] 385 | pub fn pow_vartime(&self, by: &[u64; 6]) -> Self { 386 | let mut res = Self::one(); 387 | for e in by.iter().rev() { 388 | for i in (0..64).rev() { 389 | res = res.square(); 390 | 391 | if ((*e >> i) & 1) == 1 { 392 | res = res.mul(self); 393 | } 394 | } 395 | } 396 | res 397 | } 398 | 399 | pub fn legendre(&self) -> LegendreSymbol { 400 | let s = self.pow_vartime(&modulus_minus_one_div_two()); 401 | if s == Self::zero() { 402 | LegendreSymbol::Zero 403 | } else if s == Self::one() { 404 | LegendreSymbol::QuadraticResidue 405 | } else { 406 | LegendreSymbol::QuadraticNonResidue 407 | } 408 | } 409 | 410 | pub fn sqrt_vartime(&self) -> Option { 411 | match self.legendre() { 412 | LegendreSymbol::Zero => Some(*self), 413 | LegendreSymbol::QuadraticNonResidue => None, 414 | LegendreSymbol::QuadraticResidue => { 415 | let mut z = root_of_unity(); 416 | let mut w = self.pow_vartime(&t_minus_one_div_two()); 417 | let mut x = w * self; 418 | let mut b = x * &w; 419 | let mut v = two_adicity() as usize; 420 | 421 | // t = self^t 422 | { 423 | let mut check = b; 424 | for _ in 0..(v-1) { 425 | check = check.square(); 426 | } 427 | if check != Fp::one() { 428 | panic!("Input is not a square root, but passed the QR test") 429 | } 430 | } 431 | 432 | while b != Fp::one() { 433 | let mut k = 0usize; 434 | 435 | let mut b2k = b; 436 | while b2k != Fp::one() { 437 | // invariant: b2k = b^(2^k) after entering this loop 438 | b2k = b2k.square(); 439 | k += 1; 440 | } 441 | 442 | let j = v - k - 1; 443 | w = z; 444 | for _ in 0..j { 445 | w = w.square(); 446 | } 447 | 448 | z = w.square(); 449 | b *= &z; 450 | x *= &w; 451 | v = k; 452 | } 453 | Some(x) 454 | }, 455 | } 456 | } 457 | 458 | #[inline(always)] 459 | fn pow_acc(&self, acc: Fp, by: u64) -> Self { 460 | let mut acc = acc.clone(); 461 | for i in (0..64).rev() { 462 | acc = acc.square(); 463 | 464 | if ((by >> i) & 1) == 1 { 465 | acc = acc.mul(self); 466 | } 467 | } 468 | acc 469 | } 470 | 471 | /// Computes the multiplicative inverse of this field 472 | /// element, returning None in the case that this element 473 | /// is zero. 474 | #[inline(always)] 475 | pub fn invert(&self) -> CtOption { 476 | let mut acc = Self::one(); 477 | acc = self.pow_acc(acc, 0x1ae3a4617c510ea); 478 | acc = self.pow_acc(acc, 0xc63b05c06ca1493b); 479 | acc = self.pow_acc(acc, 0x1a22d9f300f5138f); 480 | acc = self.pow_acc(acc, 0x1ef3622fba094800); 481 | acc = self.pow_acc(acc, 0x170b5d4430000000); 482 | acc = self.pow_acc(acc, 0x8508bfffffffffff); 483 | CtOption::new(acc, !self.is_zero()) 484 | } 485 | 486 | #[inline(always)] 487 | fn subtract_p(&self) -> Fp { 488 | let modulus = modulus(); 489 | let (r0, borrow) = sbb(self.0[0], modulus[0], 0); 490 | let (r1, borrow) = sbb(self.0[1], modulus[1], borrow); 491 | let (r2, borrow) = sbb(self.0[2], modulus[2], borrow); 492 | let (r3, borrow) = sbb(self.0[3], modulus[3], borrow); 493 | let (r4, borrow) = sbb(self.0[4], modulus[4], borrow); 494 | let (r5, borrow) = sbb(self.0[5], modulus[5], borrow); 495 | 496 | // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise 497 | // borrow = 0x000...000. Thus, we use it as a mask! 498 | let r0 = (self.0[0] & borrow) | (r0 & !borrow); 499 | let r1 = (self.0[1] & borrow) | (r1 & !borrow); 500 | let r2 = (self.0[2] & borrow) | (r2 & !borrow); 501 | let r3 = (self.0[3] & borrow) | (r3 & !borrow); 502 | let r4 = (self.0[4] & borrow) | (r4 & !borrow); 503 | let r5 = (self.0[5] & borrow) | (r5 & !borrow); 504 | Fp([r0, r1, r2, r3, r4, r5]) 505 | } 506 | 507 | #[inline(always)] 508 | pub fn add(&self, rhs: &Fp) -> Fp { 509 | let (d0, carry) = adc(self.0[0], rhs.0[0], 0); 510 | let (d1, carry) = adc(self.0[1], rhs.0[1], carry); 511 | let (d2, carry) = adc(self.0[2], rhs.0[2], carry); 512 | let (d3, carry) = adc(self.0[3], rhs.0[3], carry); 513 | let (d4, carry) = adc(self.0[4], rhs.0[4], carry); 514 | let (d5, _) = adc(self.0[5], rhs.0[5], carry); 515 | 516 | // Attempt to subtract the modulus, to ensure the value 517 | // is smaller than the modulus. 518 | (&Fp([d0, d1, d2, d3, d4, d5])).subtract_p() 519 | } 520 | 521 | #[inline(always)] 522 | pub fn neg(&self) -> Fp { 523 | let modulus = modulus(); 524 | let (d0, borrow) = sbb(modulus[0], self.0[0], 0); 525 | let (d1, borrow) = sbb(modulus[1], self.0[1], borrow); 526 | let (d2, borrow) = sbb(modulus[2], self.0[2], borrow); 527 | let (d3, borrow) = sbb(modulus[3], self.0[3], borrow); 528 | let (d4, borrow) = sbb(modulus[4], self.0[4], borrow); 529 | let (d5, _) = sbb(modulus[5], self.0[5], borrow); 530 | 531 | // Let's use a mask if `self` was zero, which would mean 532 | // the result of the subtraction is p. 533 | let mask = (((self.0[0] | self.0[1] | self.0[2] | self.0[3] | self.0[4] | self.0[5]) == 0) 534 | as u64) 535 | .wrapping_sub(1); 536 | 537 | Fp([ 538 | d0 & mask, 539 | d1 & mask, 540 | d2 & mask, 541 | d3 & mask, 542 | d4 & mask, 543 | d5 & mask, 544 | ]) 545 | } 546 | 547 | #[inline(always)] 548 | pub fn sub(&self, rhs: &Fp) -> Fp { 549 | (&rhs.neg()).add(self) 550 | } 551 | 552 | #[inline(always)] 553 | pub fn square(&self) -> Fp { 554 | self * self 555 | } 556 | 557 | #[inline(always)] 558 | fn montgomery_reduce_old( 559 | t0: u64, 560 | t1: u64, 561 | t2: u64, 562 | t3: u64, 563 | t4: u64, 564 | t5: u64, 565 | t6: u64, 566 | t7: u64, 567 | t8: u64, 568 | t9: u64, 569 | t10: u64, 570 | t11: u64, 571 | ) -> Self { 572 | // The Montgomery reduction here is based on Algorithm 14.32 in 573 | // Handbook of Applied Cryptography 574 | // . 575 | let inv = inv(); 576 | let modulus = modulus(); 577 | 578 | let k = t0.wrapping_mul(inv); 579 | let (_, carry) = mac(t0, k, modulus[0], 0); 580 | let (r1, carry) = mac(t1, k, modulus[1], carry); 581 | let (r2, carry) = mac(t2, k, modulus[2], carry); 582 | let (r3, carry) = mac(t3, k, modulus[3], carry); 583 | let (r4, carry) = mac(t4, k, modulus[4], carry); 584 | let (r5, carry) = mac(t5, k, modulus[5], carry); 585 | let (r6, r7) = adc(t6, 0, carry); 586 | 587 | let k = r1.wrapping_mul(inv); 588 | let (_, carry) = mac(r1, k, modulus[0], 0); 589 | let (r2, carry) = mac(r2, k, modulus[1], carry); 590 | let (r3, carry) = mac(r3, k, modulus[2], carry); 591 | let (r4, carry) = mac(r4, k, modulus[3], carry); 592 | let (r5, carry) = mac(r5, k, modulus[4], carry); 593 | let (r6, carry) = mac(r6, k, modulus[5], carry); 594 | let (r7, r8) = adc(t7, r7, carry); 595 | 596 | let k = r2.wrapping_mul(inv); 597 | let (_, carry) = mac(r2, k, modulus[0], 0); 598 | let (r3, carry) = mac(r3, k, modulus[1], carry); 599 | let (r4, carry) = mac(r4, k, modulus[2], carry); 600 | let (r5, carry) = mac(r5, k, modulus[3], carry); 601 | let (r6, carry) = mac(r6, k, modulus[4], carry); 602 | let (r7, carry) = mac(r7, k, modulus[5], carry); 603 | let (r8, r9) = adc(t8, r8, carry); 604 | 605 | let k = r3.wrapping_mul(inv); 606 | let (_, carry) = mac(r3, k, modulus[0], 0); 607 | let (r4, carry) = mac(r4, k, modulus[1], carry); 608 | let (r5, carry) = mac(r5, k, modulus[2], carry); 609 | let (r6, carry) = mac(r6, k, modulus[3], carry); 610 | let (r7, carry) = mac(r7, k, modulus[4], carry); 611 | let (r8, carry) = mac(r8, k, modulus[5], carry); 612 | let (r9, r10) = adc(t9, r9, carry); 613 | 614 | let k = r4.wrapping_mul(inv); 615 | let (_, carry) = mac(r4, k, modulus[0], 0); 616 | let (r5, carry) = mac(r5, k, modulus[1], carry); 617 | let (r6, carry) = mac(r6, k, modulus[2], carry); 618 | let (r7, carry) = mac(r7, k, modulus[3], carry); 619 | let (r8, carry) = mac(r8, k, modulus[4], carry); 620 | let (r9, carry) = mac(r9, k, modulus[5], carry); 621 | let (r10, r11) = adc(t10, r10, carry); 622 | 623 | let k = r5.wrapping_mul(inv); 624 | let (_, carry) = mac(r5, k, modulus[0], 0); 625 | let (r6, carry) = mac(r6, k, modulus[1], carry); 626 | let (r7, carry) = mac(r7, k, modulus[2], carry); 627 | let (r8, carry) = mac(r8, k, modulus[3], carry); 628 | let (r9, carry) = mac(r9, k, modulus[4], carry); 629 | let (r10, carry) = mac(r10, k, modulus[5], carry); 630 | let (r11, _) = adc(t11, r11, carry); 631 | 632 | // Attempt to subtract the modulus, to ensure the value 633 | // is smaller than the modulus. 634 | (&Fp([r6, r7, r8, r9, r10, r11])).subtract_p() 635 | } 636 | 637 | #[inline(always)] 638 | pub fn mul_old(&self, rhs: &Fp) -> Fp { 639 | let (t0, carry) = mac(0, self.0[0], rhs.0[0], 0); 640 | let (t1, carry) = mac(0, self.0[0], rhs.0[1], carry); 641 | let (t2, carry) = mac(0, self.0[0], rhs.0[2], carry); 642 | let (t3, carry) = mac(0, self.0[0], rhs.0[3], carry); 643 | let (t4, carry) = mac(0, self.0[0], rhs.0[4], carry); 644 | let (t5, t6) = mac(0, self.0[0], rhs.0[5], carry); 645 | 646 | let (t1, carry) = mac(t1, self.0[1], rhs.0[0], 0); 647 | let (t2, carry) = mac(t2, self.0[1], rhs.0[1], carry); 648 | let (t3, carry) = mac(t3, self.0[1], rhs.0[2], carry); 649 | let (t4, carry) = mac(t4, self.0[1], rhs.0[3], carry); 650 | let (t5, carry) = mac(t5, self.0[1], rhs.0[4], carry); 651 | let (t6, t7) = mac(t6, self.0[1], rhs.0[5], carry); 652 | 653 | let (t2, carry) = mac(t2, self.0[2], rhs.0[0], 0); 654 | let (t3, carry) = mac(t3, self.0[2], rhs.0[1], carry); 655 | let (t4, carry) = mac(t4, self.0[2], rhs.0[2], carry); 656 | let (t5, carry) = mac(t5, self.0[2], rhs.0[3], carry); 657 | let (t6, carry) = mac(t6, self.0[2], rhs.0[4], carry); 658 | let (t7, t8) = mac(t7, self.0[2], rhs.0[5], carry); 659 | 660 | let (t3, carry) = mac(t3, self.0[3], rhs.0[0], 0); 661 | let (t4, carry) = mac(t4, self.0[3], rhs.0[1], carry); 662 | let (t5, carry) = mac(t5, self.0[3], rhs.0[2], carry); 663 | let (t6, carry) = mac(t6, self.0[3], rhs.0[3], carry); 664 | let (t7, carry) = mac(t7, self.0[3], rhs.0[4], carry); 665 | let (t8, t9) = mac(t8, self.0[3], rhs.0[5], carry); 666 | 667 | let (t4, carry) = mac(t4, self.0[4], rhs.0[0], 0); 668 | let (t5, carry) = mac(t5, self.0[4], rhs.0[1], carry); 669 | let (t6, carry) = mac(t6, self.0[4], rhs.0[2], carry); 670 | let (t7, carry) = mac(t7, self.0[4], rhs.0[3], carry); 671 | let (t8, carry) = mac(t8, self.0[4], rhs.0[4], carry); 672 | let (t9, t10) = mac(t9, self.0[4], rhs.0[5], carry); 673 | 674 | let (t5, carry) = mac(t5, self.0[5], rhs.0[0], 0); 675 | let (t6, carry) = mac(t6, self.0[5], rhs.0[1], carry); 676 | let (t7, carry) = mac(t7, self.0[5], rhs.0[2], carry); 677 | let (t8, carry) = mac(t8, self.0[5], rhs.0[3], carry); 678 | let (t9, carry) = mac(t9, self.0[5], rhs.0[4], carry); 679 | let (t10, t11) = mac(t10, self.0[5], rhs.0[5], carry); 680 | 681 | Self::montgomery_reduce_old(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) 682 | } 683 | 684 | fn montgomery_reduce( 685 | t0: u64, 686 | t1: u64, 687 | t2: u64, 688 | t3: u64, 689 | t4: u64, 690 | t5: u64, 691 | t6: u64, 692 | t7: u64, 693 | t8: u64, 694 | t9: u64, 695 | t10: u64, 696 | t11: u64, 697 | ) -> Self { 698 | unsafe { 699 | let mut res: [u64; 6] = mem::uninitialized(); 700 | let mut tmp: [u64; 12] = [t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11]; 701 | c_montgomry(res.as_mut_ptr(), tmp.as_mut_ptr()); 702 | Fp(res).subtract_p() 703 | } 704 | } 705 | 706 | #[inline(always)] 707 | fn mul_helper(&self, rhs: &Fp) -> [u64; 6] { 708 | unsafe { 709 | let mut res: [u64; 6] = mem::uninitialized(); 710 | let mut tmp: [u64; 12] = mem::uninitialized(); 711 | c_mul(tmp.as_mut_ptr(), self.0.as_ptr(), rhs.0.as_ptr()); 712 | c_montgomry(res.as_mut_ptr(), tmp.as_mut_ptr()); 713 | res 714 | } 715 | } 716 | 717 | #[inline(always)] 718 | pub fn mul(&self, rhs: &Fp) -> Fp { 719 | let res = self.mul_helper(&rhs); 720 | Fp(res).subtract_p() 721 | } 722 | } 723 | 724 | #[test] 725 | fn test_conditional_selection() { 726 | let a = Fp([1, 2, 3, 4, 5, 6]); 727 | let b = Fp([7, 8, 9, 10, 11, 12]); 728 | 729 | assert_eq!( 730 | ConditionallySelectable::conditional_select(&a, &b, Choice::from(0u8)), 731 | a 732 | ); 733 | assert_eq!( 734 | ConditionallySelectable::conditional_select(&a, &b, Choice::from(1u8)), 735 | b 736 | ); 737 | } 738 | 739 | #[test] 740 | fn test_legendre() { 741 | let a = Fp::from_raw_unchecked([ 742 | 0xf8397a163b69bed0, 743 | 0xf175823c7236735c, 744 | 0x5569469835f84b92, 745 | 0x714deebc8c061c3c, 746 | 0x7adcc0994eb519c8, 747 | 0x230d716ceafd4b, 748 | ]); 749 | assert_eq!(a.legendre(), LegendreSymbol::QuadraticResidue); 750 | } 751 | 752 | #[test] 753 | fn test_equality() { 754 | fn is_equal(a: &Fp, b: &Fp) -> bool { 755 | let eq = a == b; 756 | let ct_eq = a.ct_eq(&b); 757 | 758 | assert_eq!(eq, ct_eq.unwrap_u8() == 1); 759 | 760 | eq 761 | } 762 | 763 | assert_eq!(&Fp([1, 2, 3, 4, 5, 6]), &Fp([1, 2, 3, 4, 5, 6])); 764 | 765 | let a = Fp([7, 2, 3, 4, 5, 6]); 766 | let b = Fp([1, 2, 3, 4, 5, 6]); 767 | assert_ne!(&a, &b); 768 | assert!(!is_equal(&Fp([1, 7, 3, 4, 5, 6]), &Fp([1, 2, 3, 4, 5, 6]))); 769 | assert!(!is_equal(&Fp([1, 2, 7, 4, 5, 6]), &Fp([1, 2, 3, 4, 5, 6]))); 770 | assert!(!is_equal(&Fp([1, 2, 3, 7, 5, 6]), &Fp([1, 2, 3, 4, 5, 6]))); 771 | assert!(!is_equal(&Fp([1, 2, 3, 4, 7, 6]), &Fp([1, 2, 3, 4, 5, 6]))); 772 | assert!(!is_equal(&Fp([1, 2, 3, 4, 5, 7]), &Fp([1, 2, 3, 4, 5, 6]))); 773 | } 774 | 775 | #[test] 776 | fn test_squaring() { 777 | let a = Fp([ 778 | 0xd215d2768e83191b, 779 | 0x5085d80f8fb28261, 780 | 0xce9a032ddf393a56, 781 | 0x3e9c4fff2ca0c4bb, 782 | 0x6436b6f7f4d95dfb, 783 | 0x10606628ad4a4d90, 784 | ]); 785 | let b = Fp([ 786 | 0xc27f4faf338e6e7, 787 | 0xb9363389626f355, 788 | 0x2677a23d5ff9b701, 789 | 0xaa7da7ecaa317421, 790 | 0xd813d973bd2c6c51, 791 | 0x1363906dc99b15d, 792 | ]); 793 | 794 | assert_eq!(a.square(), b); 795 | } 796 | 797 | #[test] 798 | fn test_multiplication() { 799 | let a = Fp([ 800 | 0x397a38320170cd4, 801 | 0x734c1b2c9e761d30, 802 | 0x5ed255ad9a48beb5, 803 | 0x95a3c6b22a7fcfc, 804 | 0x2294ce75d4e26a27, 805 | 0x13338bd870011ebb, 806 | ]); 807 | let b = Fp([ 808 | 0xb9c3c7c5b1196af7, 809 | 0x2580e2086ce335c1, 810 | 0xf49aed3d8a57ef42, 811 | 0x41f281e49846e878, 812 | 0xe0762346c38452ce, 813 | 0x652e89326e57dc0, 814 | ]); 815 | let c = Fp([ 816 | 0x797a886e0e8e8d85, 817 | 0x518df0f1d1732800, 818 | 0xb7098a12c4a10c5, 819 | 0x6338f6a9ec896084, 820 | 0xec6b4921810a39fc, 821 | 0x1751097d914d4be 822 | ]); 823 | 824 | assert_eq!(a * b, c); 825 | } 826 | 827 | #[test] 828 | fn test_addition() { 829 | let a = Fp([ 830 | 0x5360bb5978678032, 831 | 0x7dd275ae799e128e, 832 | 0x5c5b5071ce4f4dcf, 833 | 0xcdb21f93078dbb3e, 834 | 0xc32365c5e73f474a, 835 | 0x115a2a5489babe5b, 836 | ]); 837 | let b = Fp([ 838 | 0x9fd287733d23dda0, 839 | 0xb16bf2af738b3554, 840 | 0x3e57a75bd3cc6d1d, 841 | 0x900bc0bd627fd6d6, 842 | 0xd319a080efb245fe, 843 | 0x15fdcaa4e4bb2091, 844 | ]); 845 | let c = Fp([ 846 | 0x6e2a82ccb58b5dd1, 847 | 0x18330b19bd2947e2, 848 | 0x7bbf959de81272ed, 849 | 0x439b065d69187e85, 850 | 0xd00200866a50440e, 851 | 0x25a9bab356b0ce02, 852 | ]); 853 | 854 | assert_eq!(a + b, c); 855 | } 856 | 857 | #[test] 858 | fn test_subtraction() { 859 | let a = Fp([ 860 | 0xaa270000000cfff3, 861 | 0x53cc0032fc34000a, 862 | 0x478fe97a6b0a807f, 863 | 0xb1d37ebee6ba24d7, 864 | 0x8ec9733bbf78ab2f, 865 | 0x9d645513d83de7e, 866 | ]); 867 | let b = Fp([ 868 | 0x7d828664baf4f566, 869 | 0xd17e663996ec7339, 870 | 0x679ead55cb4078d0, 871 | 0xfe3b2260e001ec28, 872 | 0x305993d043d91b68, 873 | 0x626f03c0489b72d, 874 | ]); 875 | let c = Fp([ 876 | 0x2ca4799b45180a8d, 877 | 0x824d99f965478cd1, 878 | 0xdff13c249fca07ae, 879 | 0xb3985c5e06b838ae, 880 | 0x5e6fdf6b7b9f8fc6, 881 | 0x3af551538fa2751, 882 | ]); 883 | 884 | assert_eq!(a - b, c); 885 | } 886 | 887 | #[test] 888 | fn test_negation() { 889 | let a = Fp([ 890 | 0x5360bb5978678032, 891 | 0x7dd275ae799e128e, 892 | 0x5c5b5071ce4f4dcf, 893 | 0xcdb21f93078dbb3e, 894 | 0xc32365c5e73f474a, 895 | 0x115a2a5489babe5b, 896 | ]); 897 | let b = Fp([ 898 | 0x31a804a687987fcf, 899 | 0x9938e795b661ed72, 900 | 0xc29811bdebb9fa30, 901 | 0x4c70ba5ff9675850, 902 | 0x3179ffa856201f0, 903 | 0xf0540ff18e0a528f, 904 | ]); 905 | 906 | assert_eq!(-a, b); 907 | } 908 | 909 | #[test] 910 | fn test_debug() { 911 | assert_eq!( 912 | format!( 913 | "{:?}", 914 | Fp([0x5360bb5978678032, 0x7dd275ae799e128e, 0x5c5b5071ce4f4dcf, 0xcdb21f93078dbb3e, 0xc32365c5e73f474a, 0x115a2a5489babe5b]) 915 | ), 916 | "0x01649f72ed7210935e96e9afd102e59eb0043d3eccd7606e797520db60fc0d2c5f8ec5dde3c6df9ddc6db87323948bdc" 917 | ); 918 | } 919 | 920 | #[test] 921 | fn test_from_bytes() { 922 | let mut a = Fp([ 923 | 0xdc906d9be3f95dc8, 924 | 0x8755caf7459691a1, 925 | 0xcff1a7f4e9583ab3, 926 | 0x9b43821f849e2284, 927 | 0xf57554f3a2974f3f, 928 | 0x85dbea84ed47f79, 929 | ]); 930 | 931 | for _ in 0..100 { 932 | a = a.square(); 933 | let tmp = a.to_bytes(); 934 | let b = Fp::from_bytes(&tmp).unwrap(); 935 | 936 | assert_eq!(a, b); 937 | } 938 | 939 | assert_eq!( 940 | -Fp::one(), 941 | Fp::from_bytes(&[1, 174, 58, 70, 23, 197, 16, 234, 198, 59, 5, 192, 108, 161, 73, 59, 26, 34, 217, 243, 0, 245, 19, 143, 30, 243, 98, 47, 186, 9, 72, 0, 23, 11, 93, 68, 48, 0, 0, 0, 133, 8, 192, 0, 0, 0, 0, 0]).unwrap() 942 | ); 943 | 944 | assert!( 945 | Fp::from_bytes(&[ 946 | 27, 1, 17, 234, 57, 127, 230, 154, 75, 27, 167, 182, 67, 75, 172, 215, 100, 119, 75, 947 | 132, 243, 133, 18, 191, 103, 48, 210, 160, 246, 176, 246, 36, 30, 171, 255, 254, 177, 948 | 83, 255, 255, 185, 254, 255, 255, 255, 255, 170, 170 949 | ]) 950 | .is_none() 951 | .unwrap_u8() 952 | == 1 953 | ); 954 | 955 | assert!(Fp::from_bytes(&[0xff; 48]).is_none().unwrap_u8() == 1); 956 | } 957 | 958 | #[test] 959 | fn test_sqrt_vartime() { 960 | let a = Fp::from_raw_unchecked([ 961 | 0xaa270000000cfff3, 962 | 0x53cc0032fc34000a, 963 | 0x478fe97a6b0a807f, 964 | 0xb1d37ebee6ba24d7, 965 | 0x8ec9733bbf78ab2f, 966 | 0x9d645513d83de7e, 967 | ]); 968 | 969 | assert_eq!( 970 | a.sqrt_vartime().unwrap(), 971 | Fp::from_raw_unchecked([ 972 | 0xb7365bc1527cc225, 973 | 0x80c4410c13dad980, 974 | 0x405a608866ec9af9, 975 | 0xbae77f06775d9e86, 976 | 0x631d7a2378887188, 977 | 0x24475d61e565d7, 978 | ]) 979 | ); 980 | } 981 | 982 | #[test] 983 | fn test_inversion() { 984 | let a = Fp([ 985 | 0x43b43a5078ac2076, 986 | 0x1ce0763046f8962b, 987 | 0x724a5276486d735c, 988 | 0x6f05c2a6282d48fd, 989 | 0x2095bd5bb4ca9331, 990 | 0x3b35b3894b0f7da, 991 | ]); 992 | let b = Fp([ 993 | 0x46e62daa07fc3fba, 994 | 0x7a3ba1598ea4f941, 995 | 0x675f586198cad5e3, 996 | 0xd3c06c64199ca906, 997 | 0x61617cc7f1012816, 998 | 0xefb2f069ef448e, 999 | ]); 1000 | 1001 | assert_eq!(a.invert().unwrap(), b); 1002 | assert!(Fp::zero().invert().is_none().unwrap_u8() == 1); 1003 | } 1004 | 1005 | #[test] 1006 | fn test_multiply() { 1007 | let a = Fp([ 1008 | 0x43b43a5078ac2076, 1009 | 0x1ce0763046f8962b, 1010 | 0x724a5276486d735c, 1011 | 0x6f05c2a6282d48fd, 1012 | 0x2095bd5bb4ca9331, 1013 | 0x3b35b3894b0f7da, 1014 | ]); 1015 | let b = Fp([ 1016 | 0x46e62daa07fc3fba, 1017 | 0x7a3ba1598ea4f941, 1018 | 0x675f586198cad5e3, 1019 | 0xd3c06c64199ca906, 1020 | 0x61617cc7f1012816, 1021 | 0xefb2f069ef448e, 1022 | ]); 1023 | let c = Fp([ 1024 | 0x46e62daa07fc3fba, 1025 | 0x7a3ba1598ea4f941, 1026 | 0x675f586198cad5e3, 1027 | 0xd3c06c64199ca906, 1028 | 0x61617cc7f1012816, 1029 | 0xefb2f069ef448e, 1030 | ]); 1031 | assert_eq!(a.mul(&b), a.mul_old(&b)); 1032 | assert_eq!(a.mul(&c), a.mul_old(&c)); 1033 | assert_eq!(b.mul(&c), b.mul_old(&c)); 1034 | } 1035 | 1036 | #[test] 1037 | fn test_lexicographic_largest() { 1038 | assert!(!bool::from(Fp::zero().lexicographically_largest())); 1039 | assert!(!bool::from(Fp::one().lexicographically_largest())); 1040 | assert!(!bool::from( 1041 | Fp::from_raw_unchecked([ 1042 | 0xa1fafffffffe5557, 1043 | 0x995bfff976a3fffe, 1044 | 0x3f41d24d174ceb4, 1045 | 0xf6547998c1995dbd, 1046 | 0x778a468f507a6034, 1047 | 0x20559931f7f8103 1048 | ]) 1049 | .lexicographically_largest() 1050 | )); 1051 | assert!(!bool::from( 1052 | Fp::from_raw_unchecked([ 1053 | 0x1804000000015554, 1054 | 0x855000053ab00001, 1055 | 0x633cb57c253c276f, 1056 | 0x6e22d1ec31ebb502, 1057 | 0xd3916126f2d14ca2, 1058 | 0x17fbb8571a006596 1059 | ]) 1060 | .lexicographically_largest() 1061 | )); 1062 | assert!(bool::from( 1063 | Fp::from_raw_unchecked([ 1064 | 0x43f5fffffffcaaae, 1065 | 0x32b7fff2ed47fffd, 1066 | 0x7e83a49a2e99d69, 1067 | 0xeca8f3318332bb7a, 1068 | 0xef148d1ea0f4c069, 1069 | 0x40ab3263eff0206 1070 | ]) 1071 | .lexicographically_largest() 1072 | )); 1073 | } 1074 | -------------------------------------------------------------------------------- /bls12_377/src/fp2.rs: -------------------------------------------------------------------------------- 1 | //! This module implements arithmetic over the quadratic extension field Fp2. 2 | 3 | use core::fmt; 4 | use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Not, Sub, SubAssign}; 5 | 6 | use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption}; 7 | 8 | use crate::fp::{Fp}; 9 | use crate::util::LegendreSymbol; 10 | 11 | 12 | /// beta = -5 13 | #[inline(always)] 14 | const fn nonresidue() -> Fp { 15 | Fp::from_raw_unchecked([ 16 | 0xfc0b8000000002fa, 17 | 0x97d39cf6e000018b, 18 | 0x2072420fbfa05044, 19 | 0xcbbcbd50d97c3802, 20 | 0xbaf1ec35813f9eb, 21 | 0x9974a2c0945ad2, 22 | ]) 23 | } 24 | 25 | #[derive(Copy, Clone)] 26 | pub struct Fp2 { 27 | pub c0: Fp, 28 | pub c1: Fp, 29 | } 30 | 31 | impl fmt::Debug for Fp2 { 32 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 33 | write!(f, "{:?} + {:?}*u", self.c0, self.c1) 34 | } 35 | } 36 | 37 | impl Default for Fp2 { 38 | fn default() -> Self { 39 | Fp2::zero() 40 | } 41 | } 42 | 43 | impl From for Fp2 { 44 | fn from(f: Fp) -> Fp2 { 45 | Fp2 { 46 | c0: f, 47 | c1: Fp::zero(), 48 | } 49 | } 50 | } 51 | 52 | impl ConstantTimeEq for Fp2 { 53 | #[inline] 54 | fn ct_eq(&self, other: &Self) -> Choice { 55 | self.c0.ct_eq(&other.c0) & self.c1.ct_eq(&other.c1) 56 | } 57 | } 58 | 59 | impl Eq for Fp2 {} 60 | impl PartialEq for Fp2 { 61 | fn eq(&self, other: &Self) -> bool { 62 | self.ct_eq(other).unwrap_u8() == 1 63 | } 64 | } 65 | 66 | impl ConditionallySelectable for Fp2 { 67 | #[inline] 68 | fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self { 69 | Fp2 { 70 | c0: Fp::conditional_select(&a.c0, &b.c0, choice), 71 | c1: Fp::conditional_select(&a.c1, &b.c1, choice), 72 | } 73 | } 74 | } 75 | 76 | impl<'a> Neg for &'a Fp2 { 77 | type Output = Fp2; 78 | 79 | #[inline(always)] 80 | fn neg(self) -> Fp2 { 81 | self.neg() 82 | } 83 | } 84 | 85 | impl Neg for Fp2 { 86 | type Output = Fp2; 87 | 88 | #[inline(always)] 89 | fn neg(self) -> Fp2 { 90 | -&self 91 | } 92 | } 93 | 94 | impl<'a, 'b> Sub<&'b Fp2> for &'a Fp2 { 95 | type Output = Fp2; 96 | 97 | #[inline(always)] 98 | fn sub(self, rhs: &'b Fp2) -> Fp2 { 99 | self.sub(rhs) 100 | } 101 | } 102 | 103 | impl<'a, 'b> Add<&'b Fp2> for &'a Fp2 { 104 | type Output = Fp2; 105 | 106 | #[inline(always)] 107 | fn add(self, rhs: &'b Fp2) -> Fp2 { 108 | self.add(rhs) 109 | } 110 | } 111 | 112 | impl<'a, 'b> Mul<&'b Fp2> for &'a Fp2 { 113 | type Output = Fp2; 114 | 115 | #[inline(always)] 116 | fn mul(self, rhs: &'b Fp2) -> Fp2 { 117 | self.mul(rhs) 118 | } 119 | } 120 | 121 | impl_binops_additive!(Fp2, Fp2); 122 | impl_binops_multiplicative!(Fp2, Fp2); 123 | 124 | impl Fp2 { 125 | #[inline(always)] 126 | pub const fn zero() -> Fp2 { 127 | Fp2 { 128 | c0: Fp::zero(), 129 | c1: Fp::zero(), 130 | } 131 | } 132 | 133 | #[inline(always)] 134 | pub const fn one() -> Fp2 { 135 | Fp2 { 136 | c0: Fp::one(), 137 | c1: Fp::zero(), 138 | } 139 | } 140 | 141 | #[inline(always)] 142 | pub fn is_zero(&self) -> Choice { 143 | self.c0.is_zero() & self.c1.is_zero() 144 | } 145 | 146 | /// Raises this element to p. 147 | pub fn frobenius_map(&self) -> Self { 148 | // This is always just a conjugation 149 | self.conjugate() 150 | } 151 | 152 | pub fn conjugate(&self) -> Self { 153 | Fp2 { 154 | c0: self.c0, 155 | c1: -self.c1, 156 | } 157 | } 158 | 159 | pub fn mul_by_nonresidue(&self) -> Fp2 { 160 | // Multiply a + bu by u + 1, getting 161 | // au + a + bu^2 + bu 162 | // and because u^2 = -1, we get 163 | // (a - b) + (a + b)u 164 | 165 | Fp2 { 166 | c0: self.c0 + (nonresidue() * self.c1), 167 | c1: self.c0 + self.c1, 168 | } 169 | } 170 | 171 | /// Returns whether or not this element is strictly lexicographically 172 | /// larger than its negation. 173 | pub fn lexicographically_largest(&self) -> Choice { 174 | // If this element's c1 coefficient is lexicographically largest 175 | // then it is lexicographically largest. Otherwise, in the event 176 | // the c1 coefficient is zero and the c0 coefficient is 177 | // lexicographically largest, then this element is lexicographically 178 | // largest. 179 | 180 | self.c1.lexicographically_largest() 181 | | (self.c1.is_zero() & self.c0.lexicographically_largest()) 182 | } 183 | 184 | #[inline] 185 | pub fn square(&self) -> Fp2 { 186 | // Complex squaring: 187 | // 188 | // v0 = c0 * c1 189 | // c0' = (c0 + c1) * (c0 + \beta*c1) - v0 - \beta * v0 190 | // c1' = 2 * v0 191 | // 192 | let mut v0 = (&self.c0).sub(&self.c1); 193 | let v3 = (&self.c0).sub(&(&self.c1).mul(&nonresidue())); 194 | let v2 = (&self.c0).mul(&self.c1); 195 | v0 = (&v0).mul(&v3); 196 | v0 = (&v0).add(&v2); 197 | 198 | Fp2 { 199 | c0: (&v0).add(&((&v2).mul(&nonresidue()))), 200 | c1: (&v2).add(&v2), 201 | } 202 | } 203 | 204 | #[inline(always)] 205 | pub fn mul(&self, rhs: &Fp2) -> Fp2 { 206 | // Karatsuba multiplication: 207 | // 208 | // v0 = a0 * b0 209 | // v1 = a1 * b1 210 | // c0 = v0 + \beta * v1 211 | // c1 = (a0 + a1) * (b0 + b1) - v0 - v1 212 | 213 | let v0 = (&self.c0).mul(&rhs.c0); 214 | let v1 = (&self.c1).mul(&rhs.c1); 215 | let c0 = (&v0).add(&(&nonresidue()).mul(&v1)); 216 | let mut c1 = (&(&self.c0).add(&self.c1)).mul(&(&rhs.c0).add(&rhs.c1)); 217 | c1 = (&c1).sub(&v0); 218 | c1 = (&c1).sub(&v1); 219 | 220 | Fp2 { c0, c1 } 221 | } 222 | 223 | #[inline(always)] 224 | pub fn add(&self, rhs: &Fp2) -> Fp2 { 225 | Fp2 { 226 | c0: (&self.c0).add(&rhs.c0), 227 | c1: (&self.c1).add(&rhs.c1), 228 | } 229 | } 230 | 231 | #[inline(always)] 232 | pub fn sub(&self, rhs: &Fp2) -> Fp2 { 233 | Fp2 { 234 | c0: (&self.c0).sub(&rhs.c0), 235 | c1: (&self.c1).sub(&rhs.c1), 236 | } 237 | } 238 | 239 | #[inline(always)] 240 | pub fn neg(&self) -> Fp2 { 241 | Fp2 { 242 | c0: (&self.c0).neg(), 243 | c1: (&self.c1).neg(), 244 | } 245 | } 246 | 247 | fn norm(&self) -> Fp { 248 | let t0 = self.c0.square(); 249 | let mut t1 = self.c1.square(); 250 | t1 = -(&t1).mul(&nonresidue()); 251 | t1.add_assign(&t0); 252 | t1 253 | } 254 | 255 | fn legendre(&self) -> LegendreSymbol { 256 | self.norm().legendre() 257 | } 258 | 259 | 260 | /// Algorithm 8, https://eprint.iacr.org/2012/685.pdf 261 | // TODO: Investigate switching to algo 10 262 | // TODO: Add sqrt test coverage 263 | pub fn sqrt_vartime(&self) -> Option { 264 | if self.c1 == Fp::zero() { 265 | return self.c0.sqrt_vartime().map(|c0| Self { c0, c1: Fp::zero() } ) 266 | } 267 | 268 | match self.legendre() { 269 | LegendreSymbol::Zero => Some(*self), 270 | LegendreSymbol::QuadraticNonResidue => None, 271 | LegendreSymbol::QuadraticResidue => { 272 | let two_inv = Fp::one() 273 | .add(Fp::one()) 274 | .invert() 275 | .unwrap(); 276 | let alpha = self 277 | .norm() 278 | .sqrt_vartime() 279 | .unwrap(); 280 | let mut delta = (alpha + self.c0) * two_inv; 281 | if delta.legendre() == LegendreSymbol::QuadraticNonResidue { 282 | delta -= alpha; 283 | } 284 | let c0 = delta.sqrt_vartime().unwrap(); 285 | let c0_inv = c0.invert().unwrap(); 286 | Some(Self { c0: c0, c1: self.c1 * two_inv *c0_inv }) 287 | }, 288 | } 289 | } 290 | 291 | /// Computes the multiplicative inverse of this field 292 | /// element, returning None in the case that this element 293 | /// is zero. 294 | #[inline(always)] 295 | pub fn invert(&self) -> CtOption { 296 | // We wish to find the multiplicative inverse of a nonzero 297 | // element a + bu in Fp2. Algorithm 5.19 298 | // from Guide to Pairing Based Cryptography 299 | 300 | let mut v0 = self.c0.square(); 301 | v0 = v0 - nonresidue() * self.c1.square(); 302 | v0 = v0.invert().unwrap_or(Fp::zero()); 303 | CtOption::new(Fp2 { 304 | c0: self.c0 * v0, 305 | c1: -(self.c1 * v0), 306 | }, Choice::not(self.is_zero())) 307 | } 308 | 309 | /// Although this is labeled "vartime", it is only 310 | /// variable time with respect to the exponent. It 311 | /// is also not exposed in the public API. 312 | pub fn pow_vartime(&self, by: &[u64; 6]) -> Self { 313 | let mut res = Self::one(); 314 | for e in by.iter().rev() { 315 | for i in (0..64).rev() { 316 | res = res.square(); 317 | 318 | if ((*e >> i) & 1) == 1 { 319 | res *= self; 320 | } 321 | } 322 | } 323 | res 324 | } 325 | } 326 | 327 | #[test] 328 | fn test_conditional_selection() { 329 | let a = Fp2 { 330 | c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]), 331 | c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]), 332 | }; 333 | let b = Fp2 { 334 | c0: Fp::from_raw_unchecked([13, 14, 15, 16, 17, 18]), 335 | c1: Fp::from_raw_unchecked([19, 20, 21, 22, 23, 24]), 336 | }; 337 | 338 | assert_eq!( 339 | ConditionallySelectable::conditional_select(&a, &b, Choice::from(0u8)), 340 | a 341 | ); 342 | assert_eq!( 343 | ConditionallySelectable::conditional_select(&a, &b, Choice::from(1u8)), 344 | b 345 | ); 346 | } 347 | 348 | #[test] 349 | fn test_norm() { 350 | let a = Fp2 { 351 | c0: Fp::from_raw_unchecked([ 352 | 0x2beed14627d7f9e9, 353 | 0xb6617e06660e5dce, 354 | 0x6c4cc7c2f91d42c, 355 | 0x996dc8474b7a63cc, 356 | 0xebaebc4c820d574e, 357 | 0x18865e12d93fd845, 358 | ]), 359 | c1: Fp::from_raw_unchecked([ 360 | 0x7d828664baf4f566, 361 | 0xd17e663996ec7339, 362 | 0x679ead55cb4078d0, 363 | 0xfe3b2260e001ec28, 364 | 0x305993d043d91b68, 365 | 0x626f03c0489b72d, 366 | ]), 367 | }; 368 | let b = Fp::from_raw_unchecked([ 369 | 0xf8397a163b69bed0, 370 | 0xf175823c7236735c, 371 | 0x5569469835f84b92, 372 | 0x714deebc8c061c3c, 373 | 0x7adcc0994eb519c8, 374 | 0x230d716ceafd4b, 375 | ]); 376 | assert_eq!(a.norm(), b); 377 | } 378 | 379 | #[test] 380 | fn test_equality() { 381 | fn is_equal(a: &Fp2, b: &Fp2) -> bool { 382 | let eq = a == b; 383 | let ct_eq = a.ct_eq(&b); 384 | 385 | assert_eq!(eq, ct_eq.unwrap_u8() == 1); 386 | 387 | eq 388 | } 389 | 390 | assert!(is_equal( 391 | &Fp2 { 392 | c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]), 393 | c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]), 394 | }, 395 | &Fp2 { 396 | c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]), 397 | c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]), 398 | } 399 | )); 400 | 401 | assert!(!is_equal( 402 | &Fp2 { 403 | c0: Fp::from_raw_unchecked([2, 2, 3, 4, 5, 6]), 404 | c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]), 405 | }, 406 | &Fp2 { 407 | c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]), 408 | c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]), 409 | } 410 | )); 411 | 412 | assert!(!is_equal( 413 | &Fp2 { 414 | c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]), 415 | c1: Fp::from_raw_unchecked([2, 8, 9, 10, 11, 12]), 416 | }, 417 | &Fp2 { 418 | c0: Fp::from_raw_unchecked([1, 2, 3, 4, 5, 6]), 419 | c1: Fp::from_raw_unchecked([7, 8, 9, 10, 11, 12]), 420 | } 421 | )); 422 | } 423 | 424 | #[test] 425 | fn test_squaring() { 426 | let a = Fp2 { 427 | c0: Fp::from_raw_unchecked([ 428 | 0xc9a2183163ee70d4, 429 | 0xbc3770a7196b5c91, 430 | 0xa247f8c1304c5f44, 431 | 0xb01fc2a3726c80b5, 432 | 0xe1d293e5bbd919c9, 433 | 0x4b78e80020ef2ca, 434 | ]), 435 | c1: Fp::from_raw_unchecked([ 436 | 0x952ea4460462618f, 437 | 0x238d5eddf025c62f, 438 | 0xf6c94b012ea92e72, 439 | 0x3ce24eac1c93808, 440 | 0x55950f945da483c, 441 | 0x10a768d0df4eabc, 442 | ]), 443 | }; 444 | let b = Fp2 { 445 | c0: Fp::from_raw_unchecked([ 446 | 0x9180cfbd5231eb92, 447 | 0x80ba5cc15826ee06, 448 | 0x6e4810398ff8110a, 449 | 0x17b1565c3b5de972, 450 | 0xadfa03c911c9f3d, 451 | 0x45616e22b1a459, 452 | ]), 453 | c1: Fp::from_raw_unchecked([ 454 | 0xde372dea33981b66, 455 | 0x235f7eb8baf88c85, 456 | 0x3837e2636f0d07bc, 457 | 0xba39294a74709e4b, 458 | 0x274cb0edb1fdd1e2, 459 | 0x11abe141195cea6, 460 | ]), 461 | }; 462 | 463 | assert_eq!(a.square(), b); 464 | } 465 | 466 | #[test] 467 | fn test_multiplication() { 468 | let a = Fp2 { 469 | c0: Fp::from_raw_unchecked([ 470 | 0xc9a2183163ee70d4, 471 | 0xbc3770a7196b5c91, 472 | 0xa247f8c1304c5f44, 473 | 0xb01fc2a3726c80b5, 474 | 0xe1d293e5bbd919c9, 475 | 0x4b78e80020ef2ca, 476 | ]), 477 | c1: Fp::from_raw_unchecked([ 478 | 0x952ea4460462618f, 479 | 0x238d5eddf025c62f, 480 | 0xf6c94b012ea92e72, 481 | 0x3ce24eac1c93808, 482 | 0x55950f945da483c, 483 | 0x10a768d0df4eabc, 484 | ]), 485 | }; 486 | let b = Fp2 { 487 | c0: Fp::from_raw_unchecked([ 488 | 0xa1e09175a4d2c1fe, 489 | 0x8b33acfc204eff12, 490 | 0xe24415a11b456e42, 491 | 0x61d996b1b6ee1936, 492 | 0x1164dbe8667c853c, 493 | 0x788557acc7d9c79, 494 | ]), 495 | c1: Fp::from_raw_unchecked([ 496 | 0xda6a87cc6f48fa36, 497 | 0xfc7b488277c1903, 498 | 0x9445ac4adc448187, 499 | 0x2616d5bc9099209, 500 | 0xdbed46772db58d48, 501 | 0x11b94d5076c7b7b1, 502 | ]), 503 | }; 504 | let c = Fp2 { 505 | c0: Fp::from_raw_unchecked([ 506 | 0xa2332499367dd291, 507 | 0x41882f1e421e6c04, 508 | 0xbc6a01cea4131ffb, 509 | 0xd5ccc0ffed5730d8, 510 | 0x28c08d93d3196725, 511 | 0x113a0b1f3ec936b, 512 | ]), 513 | c1: Fp::from_raw_unchecked([ 514 | 0xc00e498bee3a3b12, 515 | 0x3ac6975d105a3631, 516 | 0x99d635ebdedee2ca, 517 | 0xbc815bde58a6ecc8, 518 | 0x26382035f22c7652, 519 | 0x54f5a96fa8aef8, 520 | ]), 521 | }; 522 | 523 | assert_eq!(a * b, c); 524 | } 525 | 526 | #[test] 527 | fn test_addition() { 528 | let a = Fp2 { 529 | c0: Fp::from_raw_unchecked([ 530 | 0xc9a2183163ee70d4, 531 | 0xbc3770a7196b5c91, 532 | 0xa247f8c1304c5f44, 533 | 0xb01fc2a3726c80b5, 534 | 0xe1d293e5bbd919c9, 535 | 0x4b78e80020ef2ca, 536 | ]), 537 | c1: Fp::from_raw_unchecked([ 538 | 0x952ea4460462618f, 539 | 0x238d5eddf025c62f, 540 | 0xf6c94b012ea92e72, 541 | 0x3ce24eac1c93808, 542 | 0x55950f945da483c, 543 | 0x10a768d0df4eabc, 544 | ]), 545 | }; 546 | let b = Fp2 { 547 | c0: Fp::from_raw_unchecked([ 548 | 0xa1e09175a4d2c1fe, 549 | 0x8b33acfc204eff12, 550 | 0xe24415a11b456e42, 551 | 0x61d996b1b6ee1936, 552 | 0x1164dbe8667c853c, 553 | 0x788557acc7d9c79, 554 | ]), 555 | c1: Fp::from_raw_unchecked([ 556 | 0xda6a87cc6f48fa36, 557 | 0xfc7b488277c1903, 558 | 0x9445ac4adc448187, 559 | 0x2616d5bc9099209, 560 | 0xdbed46772db58d48, 561 | 0x11b94d5076c7b7b1, 562 | ]), 563 | }; 564 | let c = Fp2 { 565 | c0: Fp::from_raw_unchecked([ 566 | 0xe679e9a708c132d1, 567 | 0x305fc05f09ba5ba3, 568 | 0x6598ac3291888587, 569 | 0xf7d67f622865865d, 570 | 0x2cfc6a0db5b455ca, 571 | 0xa91a9b4b6c77e59, 572 | ]), 573 | c1: Fp::from_raw_unchecked([ 574 | 0xea906c1273ab5bc4, 575 | 0x1c49b621e7a1df32, 576 | 0x6c1b951c50e467f9, 577 | 0xec0cb85389ddb683, 578 | 0x1b0b91b006ee8c48, 579 | 0x111589976cf79183, 580 | ]), 581 | }; 582 | 583 | assert_eq!(a + b, c); 584 | } 585 | 586 | #[test] 587 | fn test_subtraction() { 588 | let a = Fp2 { 589 | c0: Fp::from_raw_unchecked([ 590 | 0xc9a2183163ee70d4, 591 | 0xbc3770a7196b5c91, 592 | 0xa247f8c1304c5f44, 593 | 0xb01fc2a3726c80b5, 594 | 0xe1d293e5bbd919c9, 595 | 0x4b78e80020ef2ca, 596 | ]), 597 | c1: Fp::from_raw_unchecked([ 598 | 0x952ea4460462618f, 599 | 0x238d5eddf025c62f, 600 | 0xf6c94b012ea92e72, 601 | 0x3ce24eac1c93808, 602 | 0x55950f945da483c, 603 | 0x10a768d0df4eabc, 604 | ]), 605 | }; 606 | let b = Fp2 { 607 | c0: Fp::from_raw_unchecked([ 608 | 0xa1e09175a4d2c1fe, 609 | 0x8b33acfc204eff12, 610 | 0xe24415a11b456e42, 611 | 0x61d996b1b6ee1936, 612 | 0x1164dbe8667c853c, 613 | 0x788557acc7d9c79, 614 | ]), 615 | c1: Fp::from_raw_unchecked([ 616 | 0xda6a87cc6f48fa36, 617 | 0xfc7b488277c1903, 618 | 0x9445ac4adc448187, 619 | 0x2616d5bc9099209, 620 | 0xdbed46772db58d48, 621 | 0x11b94d5076c7b7b1, 622 | ]), 623 | }; 624 | let c = Fp2 { 625 | c0: Fp::from_raw_unchecked([ 626 | 0xd83e794440e4512a, 627 | 0xcefc3c5506e3a280, 628 | 0x3ffc1cdfeaf90efd, 629 | 0xb1b9d40e44819881, 630 | 0x2f924802aaa36b72, 631 | 0x2d0c6faca6ea9ae, 632 | ]), 633 | c1: Fp::from_raw_unchecked([ 634 | 0x453be3866ae698a7, 635 | 0xec3a55aa375652d4, 636 | 0x9d7c6149ad9b5314, 637 | 0xfe93487107405a00, 638 | 0xd693f57de7db450b, 639 | 0x10aed6c368d2ccf5, 640 | ]), 641 | }; 642 | 643 | assert_eq!(b - a, c); 644 | } 645 | 646 | #[test] 647 | fn test_negation() { 648 | let a = Fp2 { 649 | c0: Fp::from_raw_unchecked([ 650 | 0xc9a2183163ee70d4, 651 | 0xbc3770a7196b5c91, 652 | 0xa247f8c1304c5f44, 653 | 0xb01fc2a3726c80b5, 654 | 0xe1d293e5bbd919c9, 655 | 0x4b78e80020ef2ca, 656 | ]), 657 | c1: Fp::from_raw_unchecked([ 658 | 0x952ea4460462618f, 659 | 0x238d5eddf025c62f, 660 | 0xf6c94b012ea92e72, 661 | 0x3ce24eac1c93808, 662 | 0x55950f945da483c, 663 | 0x10a768d0df4eabc, 664 | ]), 665 | }; 666 | let b = Fp2 { 667 | c0: Fp::from_raw_unchecked([ 668 | 0xbb66a7ce9c118f2d, 669 | 0x5ad3ec9d1694a36e, 670 | 0x7cab696e89bce8bb, 671 | 0x6a03174f8e8892d9, 672 | 0xe46871dab0c82f71, 673 | 0xfcf6abc615b61e1f, 674 | ]), 675 | c1: Fp::from_raw_unchecked([ 676 | 0xefda1bb9fb9d9e72, 677 | 0xf37dfe663fda39d0, 678 | 0x282a172e8b60198d, 679 | 0x1654b5083f2bdb86, 680 | 0xc0e1b4c726c700ff, 681 | 0xa3c3b909d0262e, 682 | ]), 683 | }; 684 | 685 | assert_eq!(-a, b); 686 | } 687 | 688 | #[test] 689 | fn test_inversion() { 690 | let a = Fp2 { 691 | c0: Fp::from_raw_unchecked([ 692 | 0x1128ecad67549455, 693 | 0x9e7a1cff3a4ea1a8, 694 | 0xeb208d51e08bcf27, 695 | 0xe98ad40811f5fc2b, 696 | 0x736c3a59232d511d, 697 | 0x10acd42d29cfcbb6, 698 | ]), 699 | c1: Fp::from_raw_unchecked([ 700 | 0xd328e37cc2f58d41, 701 | 0x948df0858a605869, 702 | 0x6032f9d56f93a573, 703 | 0x2be483ef3fffdc87, 704 | 0x30ef61f88f483c2a, 705 | 0x1333f55a35725be0, 706 | ]), 707 | }; 708 | 709 | let b = Fp2 { 710 | c0: Fp::from_raw_unchecked([ 711 | 0xa972fe45912ab0b0, 712 | 0x2fad422c707d2a7a, 713 | 0x1e0c99ca54b14292, 714 | 0x12b35bad27bfbb4b, 715 | 0xaac12849e9ca08be, 716 | 0x9ca440f7d792c1, 717 | ]), 718 | c1: Fp::from_raw_unchecked([ 719 | 0x93f803dee0c6aee, 720 | 0x85be5ff1bf7a8b20, 721 | 0x9343d05ec64f00b6, 722 | 0x91a1db9f810ce2ac, 723 | 0xc7a4b33169335bd, 724 | 0xa9202f9769f137, 725 | ]), 726 | }; 727 | 728 | assert_eq!(a.invert().unwrap(), b); 729 | 730 | assert!(Fp2::zero().invert().is_none().unwrap_u8() == 1); 731 | } 732 | 733 | #[test] 734 | fn test_lexicographic_largest() { 735 | assert!(!bool::from(Fp2::zero().lexicographically_largest())); 736 | assert!(!bool::from(Fp2::one().lexicographically_largest())); 737 | assert!(bool::from( 738 | Fp2 { 739 | c0: Fp::from_raw_unchecked([ 740 | 0x1128ecad67549455, 741 | 0x9e7a1cff3a4ea1a8, 742 | 0xeb208d51e08bcf27, 743 | 0xe98ad40811f5fc2b, 744 | 0x736c3a59232d511d, 745 | 0x10acd42d29cfcbb6, 746 | ]), 747 | c1: Fp::from_raw_unchecked([ 748 | 0xd328e37cc2f58d41, 749 | 0x948df0858a605869, 750 | 0x6032f9d56f93a573, 751 | 0x2be483ef3fffdc87, 752 | 0x30ef61f88f483c2a, 753 | 0x1333f55a35725be0, 754 | ]), 755 | } 756 | .lexicographically_largest() 757 | )); 758 | assert!(!bool::from( 759 | Fp2 { 760 | c0: -Fp::from_raw_unchecked([ 761 | 0x1128ecad67549455, 762 | 0x9e7a1cff3a4ea1a8, 763 | 0xeb208d51e08bcf27, 764 | 0xe98ad40811f5fc2b, 765 | 0x736c3a59232d511d, 766 | 0x10acd42d29cfcbb6, 767 | ]), 768 | c1: -Fp::from_raw_unchecked([ 769 | 0xd328e37cc2f58d41, 770 | 0x948df0858a605869, 771 | 0x6032f9d56f93a573, 772 | 0x2be483ef3fffdc87, 773 | 0x30ef61f88f483c2a, 774 | 0x1333f55a35725be0, 775 | ]), 776 | } 777 | .lexicographically_largest() 778 | )); 779 | assert!(!bool::from( 780 | Fp2 { 781 | c0: Fp::from_raw_unchecked([ 782 | 0x1128ecad67549455, 783 | 0x9e7a1cff3a4ea1a8, 784 | 0xeb208d51e08bcf27, 785 | 0xe98ad40811f5fc2b, 786 | 0x736c3a59232d511d, 787 | 0x10acd42d29cfcbb6, 788 | ]), 789 | c1: Fp::zero(), 790 | } 791 | .lexicographically_largest() 792 | )); 793 | assert!(bool::from( 794 | Fp2 { 795 | c0: -Fp::from_raw_unchecked([ 796 | 0x1128ecad67549455, 797 | 0x9e7a1cff3a4ea1a8, 798 | 0xeb208d51e08bcf27, 799 | 0xe98ad40811f5fc2b, 800 | 0x736c3a59232d511d, 801 | 0x10acd42d29cfcbb6, 802 | ]), 803 | c1: Fp::zero(), 804 | } 805 | .lexicographically_largest() 806 | )); 807 | } 808 | -------------------------------------------------------------------------------- /bls12_377/src/fp_asm.S: -------------------------------------------------------------------------------- 1 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2 | @ 3 | @ Low-level operations on Fp values 4 | @ 5 | @ Each Fp value is stored as a word-aligned 12-word array 6 | @ 7 | @ All functions work correctly with repeated arguments, 8 | @ like e.g. fp_sum(x, x, x) 9 | @ 10 | @ All functions should take constant time on ARM SC300 11 | @ 12 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 13 | 14 | .text 15 | 16 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 17 | @ 18 | @ fp_is_zero 19 | @ 20 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 21 | 22 | .align 3 23 | .global fp_is_zero 24 | .syntax unified 25 | .thumb 26 | .thumb_func 27 | .type fp_is_zero, %function 28 | 29 | fp_is_zero: 30 | ldr r1, [r0, #0] 31 | 32 | ldr r2, [r0, #4]; ldr r3, [r0, #8]; orr r1, r2; orr r1, r3 33 | ldr r2, [r0, #12]; ldr r3, [r0, #16]; orr r1, r2; orr r1, r3 34 | ldr r2, [r0, #20]; ldr r3, [r0, #24]; orr r1, r2; orr r1, r3 35 | ldr r2, [r0, #28]; ldr r3, [r0, #32]; orr r1, r2; orr r1, r3 36 | ldr r2, [r0, #36]; ldr r3, [r0, #40]; orr r1, r2; orr r1, r3 37 | 38 | ldr r2, [r0, #44]; orrs r1, r2 39 | 40 | ite eq 41 | moveq r0, #1 42 | movne r0, #0 43 | 44 | bx lr 45 | 46 | .size fp_is_zero, . - fp_is_zero 47 | 48 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 49 | @ 50 | @ fp_cpy: Copy 51 | @ 52 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 53 | 54 | .align 3 55 | .global fp_cpy 56 | .syntax unified 57 | .thumb 58 | .thumb_func 59 | .type fp_cpy, %function 60 | 61 | fp_cpy: 62 | ldr r2, [r1, #0]; ldr r3, [r1, #4] 63 | str r2, [r0, #0]; str r3, [r0, #4] 64 | 65 | ldr r2, [r1, #8]; ldr r3, [r1, #12] 66 | str r2, [r0, #8]; str r3, [r0, #12] 67 | 68 | ldr r2, [r1, #16]; ldr r3, [r1, #20] 69 | str r2, [r0, #16]; str r3, [r0, #20] 70 | 71 | ldr r2, [r1, #24]; ldr r3, [r1, #28] 72 | str r2, [r0, #24]; str r3, [r0, #28] 73 | 74 | ldr r2, [r1, #32]; ldr r3, [r1, #36] 75 | str r2, [r0, #32]; str r3, [r0, #36] 76 | 77 | ldr r2, [r1, #40]; ldr r3, [r1, #44] 78 | str r2, [r0, #40]; str r3, [r0, #44] 79 | 80 | bx lr 81 | 82 | .size fp_cpy, . - fp_cpy 83 | 84 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 85 | @ 86 | @ fp_eq: Check two Fp values for equality 87 | @ 88 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 89 | 90 | .align 3 91 | .global fp_eq 92 | .syntax unified 93 | .thumb 94 | .thumb_func 95 | .type fp_eq, %function 96 | 97 | fp_eq: 98 | sub sp, #4 99 | 100 | ldr r2, [r0, #0] 101 | ldr r3, [r1, #0] 102 | str r4, [sp] 103 | sub r2, r3 104 | 105 | ldr r3, [r0, #4]; ldr r4, [r1, #4]; sub r3, r4; orr r2, r3 106 | ldr r3, [r0, #8]; ldr r4, [r1, #8]; sub r3, r4; orr r2, r3 107 | ldr r3, [r0, #12]; ldr r4, [r1, #12]; sub r3, r4; orr r2, r3 108 | ldr r3, [r0, #16]; ldr r4, [r1, #16]; sub r3, r4; orr r2, r3 109 | ldr r3, [r0, #20]; ldr r4, [r1, #20]; sub r3, r4; orr r2, r3 110 | ldr r3, [r0, #24]; ldr r4, [r1, #24]; sub r3, r4; orr r2, r3 111 | ldr r3, [r0, #28]; ldr r4, [r1, #28]; sub r3, r4; orr r2, r3 112 | ldr r3, [r0, #32]; ldr r4, [r1, #32]; sub r3, r4; orr r2, r3 113 | ldr r3, [r0, #36]; ldr r4, [r1, #36]; sub r3, r4; orr r2, r3 114 | ldr r3, [r0, #40]; ldr r4, [r1, #40]; sub r3, r4; orr r2, r3 115 | 116 | ldr r4, [sp] 117 | 118 | ldr r0, [r0, #44]; ldr r1, [r1, #44]; sub r0, r1; orrs r0, r2 119 | 120 | ite eq 121 | moveq r0, #1 122 | movne r0, #0 123 | 124 | add sp, #4 125 | 126 | bx lr 127 | 128 | .size fp_eq, . - fp_eq 129 | 130 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 131 | @ 132 | @ fp_neg: Negate an Fp value 133 | @ 134 | @ x = (y != 0) ? (p - y) : 0 135 | @ 136 | @ x = (y == 0) ? p : y 137 | @ x = -x 138 | @ x += p 139 | @ 140 | @ Note: -x == ~x + 1 141 | @ 142 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 143 | 144 | .align 3 145 | .global fp_neg 146 | .syntax unified 147 | .thumb 148 | .thumb_func 149 | .type fp_neg, %function 150 | 151 | fp_neg: 152 | push { r4-r11, lr } 153 | ldm r1, { r1-r12 } 154 | 155 | @ lr = (y == 0) 156 | 157 | orr lr, r1, r2 158 | orr lr, r3 159 | orr lr, r4 160 | orr lr, r5 161 | orr lr, r6 162 | orr lr, r7 163 | orr lr, r8 164 | orr lr, r9 165 | orr lr, r10 166 | orr lr, r11 167 | orrs lr, r12 @ Z = (lr == 0) ? 1 : 0 168 | 169 | @ x = Z ? p : y 170 | 171 | itttt eq 172 | moveq r1, #0x00000001 173 | moveq r2, #0xc000 174 | movteq r2, #0x8508 175 | moveq r3, #0x30000000 176 | 177 | itttt eq 178 | moveq r4, #0x5D44 179 | movteq r4, #0x170B 180 | moveq r5, #0x4800 181 | movteq r5, #0xBA09 182 | 183 | itttt eq 184 | moveq r6, #0x622F 185 | movteq r6, #0x1EF3 186 | moveq r7, #0x138F 187 | movteq r7, #0x00F5 188 | 189 | itttt eq 190 | moveq r8, #0xD9F3 191 | movteq r8, #0x1A22 192 | moveq r9, #0x493B 193 | movteq r9, #0x6CA1 194 | 195 | itttt eq 196 | moveq r10, #0x05C0 197 | movteq r10, #0xC63B 198 | moveq r11, #0x10EA 199 | movteq r11, #0x17C5 200 | 201 | itt eq 202 | moveq r12, #0x3A46 203 | movteq r12, #0x01AE 204 | 205 | @ x = ~x 206 | 207 | mvn r1, r1 208 | mvn r2, r2 209 | mvn r3, r3 210 | mvn r4, r4 211 | mvn r5, r5 212 | mvn r6, r6 213 | mvn r7, r7 214 | mvn r8, r8 215 | mvn r9, r9 216 | mvn r10, r10 217 | mvn r11, r11 218 | mvn r12, r12 219 | 220 | @ x += 1 221 | 222 | adds r1, #1 223 | adcs r2, #0 224 | adcs r3, #0 225 | adcs r4, #0 226 | adcs r5, #0 227 | adcs r6, #0 228 | adcs r7, #0 229 | adcs r8, #0 230 | adcs r9, #0 231 | adcs r10, #0 232 | adcs r11, #0 233 | adc r12, #0 234 | 235 | @ x += p 236 | 237 | adds r1, #0x00000001 238 | 239 | mov lr, #0xc000; movt lr, #0x8508; adcs r2, lr 240 | 241 | adcs r3, #0x30000000 242 | 243 | mov lr, #0x5D44; movt lr, #0x170B; adcs r4, lr 244 | mov lr, #0x4800; movt lr, #0xBA09; adcs r5, lr 245 | mov lr, #0x622F; movt lr, #0x1EF3; adcs r6, lr 246 | mov lr, #0x138F; movt lr, #0x00F5; adcs r7, lr 247 | mov lr, #0xD9F3; movt lr, #0x1A22; adcs r8, lr 248 | mov lr, #0x493B; movt lr, #0x6CA1; adcs r9, lr 249 | mov lr, #0x05C0; movt lr, #0xC63B; adcs r10, lr 250 | mov lr, #0x10EA; movt lr, #0x17C5; adcs r11, lr 251 | mov lr, #0x3A46; movt lr, #0x01AE; adcs r12, lr 252 | 253 | stm r0, { r1-r12 } 254 | pop { r4-r11, pc } 255 | 256 | .size fp_neg, . - fp_neg 257 | 258 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 259 | @ 260 | @ fp_diff: x = y - z (mod p) 261 | @ 262 | @ x = y - z 263 | @ store x 264 | @ 265 | @ C = (x < 0); x += p 266 | @ if (C) /* carry, because x was < 0 */ 267 | @ store x 268 | @ 269 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 270 | 271 | .align 3 272 | .global fp_diff 273 | .syntax unified 274 | .thumb 275 | .thumb_func 276 | .type fp_diff, %function 277 | 278 | fp_diff: 279 | push { r4-r11, lr } 280 | 281 | @ x = y - z 282 | 283 | ldr r14, [r1, #0] 284 | ldr r12, [r1, #4] 285 | ldr r11, [r1, #8] 286 | ldr r10, [r1, #12] 287 | ldr r9, [r1, #16] 288 | 289 | ldr r8, [r2, #0] 290 | ldr r7, [r2, #4] 291 | ldr r6, [r2, #8] 292 | ldr r5, [r2, #12] 293 | ldr r4, [r2, #16] 294 | 295 | subs r14, r8 296 | sbcs r12, r7 297 | sbcs r11, r6 298 | sbcs r10, r5 299 | sbcs r9, r4 300 | 301 | ldr r8, [r1, #20] 302 | ldr r7, [r1, #24] 303 | ldr r6, [r1, #28] 304 | 305 | ldr r5, [r2, #20] 306 | ldr r4, [r2, #24] 307 | ldr r3, [r2, #28] 308 | 309 | str r14, [r0, #0] @ store early (0 cycles) 310 | 311 | sbcs r8, r5 312 | sbcs r7, r4 313 | sbcs r6, r3 314 | 315 | ldr r5, [r1, #32] 316 | ldr r4, [r1, #36] 317 | 318 | ldr r3, [r2, #32] 319 | ldr r14, [r2, #36] @ use r14 as temporary 320 | 321 | str r12, [r0, #4] @ store early (0 cycles) 322 | 323 | sbcs r5, r3 324 | sbcs r4, r14 325 | 326 | ldr r3, [r1, #40] 327 | ldr r1, [r1, #44] 328 | 329 | ldr r14, [r2, #40] 330 | ldr r2, [r2, #44] 331 | 332 | str r11, [r0, #8] @ store early (0 cycles) 333 | 334 | sbcs r3, r14 335 | sbcs r2, r1, r2 336 | 337 | ldr r14, [r0, #0] @ restore r14 338 | 339 | str r10, [r0, #12] 340 | str r9, [r0, #16] 341 | str r8, [r0, #20] 342 | str r7, [r0, #24] 343 | str r6, [r0, #28] 344 | str r5, [r0, #32] 345 | str r4, [r0, #36] 346 | str r3, [r0, #40] 347 | str r2, [r0, #44] 348 | 349 | @ x += p, using r1 as temporary, x in { r14, r12-r2 } 350 | 351 | adds r14, #0x00000001 352 | 353 | mov r1, #0xc000; movt r1, #0x8508; adcs r12, r1 354 | 355 | adcs r11, #0x30000000 356 | 357 | mov r1, #0x5D44; movt r1, #0x170B; adcs r10, r1 358 | mov r1, #0x4800; movt r1, #0xBA09; adcs r9, r1 359 | mov r1, #0x622F; movt r1, #0x1EF3; adcs r8, r1 360 | mov r1, #0x138F; movt r1, #0x00F5; adcs r7, r1 361 | mov r1, #0xD9F3; movt r1, #0x1A22; adcs r6, r1 362 | mov r1, #0x493B; movt r1, #0x6CA1; adcs r5, r1 363 | mov r1, #0x05C0; movt r1, #0xC63B; adcs r4, r1 364 | mov r1, #0x10EA; movt r1, #0x17C5; adcs r3, r1 365 | mov r1, #0x3A46; movt r1, #0x01AE; adcs r2, r1 366 | 367 | itttt cs @ carry set => x was negative => we need to store x 368 | strcs r14, [r0, #0] 369 | strcs r12, [r0, #4] 370 | strcs r11, [r0, #8] 371 | strcs r10, [r0, #12] 372 | itttt cs 373 | strcs r9, [r0, #16] 374 | strcs r8, [r0, #20] 375 | strcs r7, [r0, #24] 376 | strcs r6, [r0, #28] 377 | itttt cs 378 | strcs r5, [r0, #32] 379 | strcs r4, [r0, #36] 380 | strcs r3, [r0, #40] 381 | strcs r2, [r0, #44] 382 | 383 | pop { r4-r11, pc } 384 | 385 | .size fp_diff, . - fp_diff 386 | 387 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 388 | @ 389 | @ fp_sum: x = y + z (mod p) 390 | @ 391 | @ x = y + z 392 | @ store x 393 | @ 394 | @ C = (x >= p); x -= p 395 | @ if (C) 396 | @ store x 397 | @ 398 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 399 | 400 | .align 3 401 | .global fp_sum 402 | .syntax unified 403 | .thumb 404 | .thumb_func 405 | .type fp_sum, %function 406 | 407 | fp_sum: 408 | push { r4-r11, lr } 409 | 410 | @ x = y + z 411 | 412 | ldr r14, [r1, #0] 413 | ldr r12, [r1, #4] 414 | ldr r11, [r1, #8] 415 | ldr r10, [r1, #12] 416 | ldr r9, [r1, #16] 417 | 418 | ldr r8, [r2, #0] 419 | ldr r7, [r2, #4] 420 | ldr r6, [r2, #8] 421 | ldr r5, [r2, #12] 422 | ldr r4, [r2, #16] 423 | 424 | adds r14, r8 425 | adcs r12, r7 426 | adcs r11, r6 427 | adcs r10, r5 428 | adcs r9, r4 429 | 430 | ldr r8, [r1, #20] 431 | ldr r7, [r1, #24] 432 | ldr r6, [r1, #28] 433 | 434 | ldr r5, [r2, #20] 435 | ldr r4, [r2, #24] 436 | ldr r3, [r2, #28] 437 | 438 | str r14, [r0, #0] @ store early (0 cycles) 439 | 440 | adcs r8, r5 441 | adcs r7, r4 442 | adcs r6, r3 443 | 444 | ldr r5, [r1, #32] 445 | ldr r4, [r1, #36] 446 | 447 | ldr r3, [r2, #32] 448 | ldr r14, [r2, #36] @ use r14 as temporary 449 | 450 | str r12, [r0, #4] @ store early (0 cycles) 451 | 452 | adcs r5, r3 453 | adcs r4, r14 454 | 455 | ldr r3, [r1, #40] 456 | ldr r1, [r1, #44] 457 | 458 | ldr r14, [r2, #40] 459 | ldr r2, [r2, #44] 460 | 461 | str r11, [r0, #8] @ store early (0 cycles) 462 | 463 | adcs r3, r14 464 | adcs r2, r1, r2 465 | 466 | ldr r14, [r0, #0] @ restore r14 467 | 468 | str r10, [r0, #12] 469 | str r9, [r0, #16] 470 | str r8, [r0, #20] 471 | str r7, [r0, #24] 472 | str r6, [r0, #28] 473 | str r5, [r0, #32] 474 | str r4, [r0, #36] 475 | str r3, [r0, #40] 476 | str r2, [r0, #44] 477 | 478 | @ x -= p, using r1 as temporary, x in { r14, r12-r2 } 479 | 480 | subs r14, #0x00000001 481 | 482 | mov r1, #0xc000; movt r1, #0x8508; sbcs r12, r1 483 | 484 | sbcs r11, #0x30000000 485 | 486 | mov r1, #0x5D44; movt r1, #0x170B; sbcs r10, r1 487 | mov r1, #0x4800; movt r1, #0xBA09; sbcs r9, r1 488 | mov r1, #0x622F; movt r1, #0x1EF3; sbcs r8, r1 489 | mov r1, #0x138F; movt r1, #0x00F5; sbcs r7, r1 490 | mov r1, #0xD9F3; movt r1, #0x1A22; sbcs r6, r1 491 | mov r1, #0x493B; movt r1, #0x6CA1; sbcs r5, r1 492 | mov r1, #0x05C0; movt r1, #0xC63B; sbcs r4, r1 493 | mov r1, #0x10EA; movt r1, #0x17C5; sbcs r3, r1 494 | mov r1, #0x3A46; movt r1, #0x01AE; sbcs r2, r1 495 | 496 | itttt cs @ carry set == no borrow => x was >= p => we need to store x 497 | strcs r14, [r0, #0] 498 | strcs r12, [r0, #4] 499 | strcs r11, [r0, #8] 500 | strcs r10, [r0, #12] 501 | itttt cs 502 | strcs r9, [r0, #16] 503 | strcs r8, [r0, #20] 504 | strcs r7, [r0, #24] 505 | strcs r6, [r0, #28] 506 | itttt cs 507 | strcs r5, [r0, #32] 508 | strcs r4, [r0, #36] 509 | strcs r3, [r0, #40] 510 | strcs r2, [r0, #44] 511 | 512 | pop { r4-r11, pc } 513 | 514 | .size fp_sum, . - fp_sum 515 | 516 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 517 | @ 518 | @ fp_cset: x = c ? y : x 519 | @ 520 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 521 | 522 | .align 3 523 | .global fp_cset 524 | .syntax unified 525 | .thumb 526 | .thumb_func 527 | .type fp_cset, %function 528 | 529 | fp_cset: 530 | movs r2, r2 531 | 532 | ldr r2, [r1, #0] 533 | ldr r3, [r1, #4] 534 | itt ne 535 | strne r2, [r0, #0] 536 | strne r3, [r0, #4] 537 | 538 | ldr r2, [r1, #8] 539 | ldr r3, [r1, #12] 540 | itt ne 541 | strne r2, [r0, #8] 542 | strne r3, [r0, #12] 543 | 544 | ldr r2, [r1, #16] 545 | ldr r3, [r1, #20] 546 | itt ne 547 | strne r2, [r0, #16] 548 | strne r3, [r0, #20] 549 | 550 | ldr r2, [r1, #24] 551 | ldr r3, [r1, #28] 552 | itt ne 553 | strne r2, [r0, #24] 554 | strne r3, [r0, #28] 555 | 556 | ldr r2, [r1, #32] 557 | ldr r3, [r1, #36] 558 | itt ne 559 | strne r2, [r0, #32] 560 | strne r3, [r0, #36] 561 | 562 | ldr r2, [r1, #40] 563 | ldr r3, [r1, #44] 564 | itt ne 565 | strne r2, [r0, #40] 566 | strne r3, [r0, #44] 567 | 568 | bx lr 569 | 570 | .size fp_cset, . - fp_cset 571 | 572 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 573 | @ 574 | @ fp_to_bytes 575 | @ 576 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 577 | 578 | .align 3 579 | .global fp_to_bytes 580 | .syntax unified 581 | .thumb 582 | .thumb_func 583 | .type fp_to_bytes, %function 584 | 585 | fp_to_bytes: 586 | ldr r2, [r1, #44] 587 | strb r2, [r0, #3]; lsrs r2, #8 588 | strb r2, [r0, #2]; lsrs r2, #8 589 | strb r2, [r0, #1]; lsrs r2, #8 590 | strb r2, [r0, #0]; ldr r2, [r1, #40] 591 | 592 | strb r2, [r0, #7]; lsrs r2, #8 593 | strb r2, [r0, #6]; lsrs r2, #8 594 | strb r2, [r0, #5]; lsrs r2, #8 595 | strb r2, [r0, #4]; ldr r2, [r1, #36] 596 | 597 | strb r2, [r0, #11]; lsrs r2, #8 598 | strb r2, [r0, #10]; lsrs r2, #8 599 | strb r2, [r0, #9]; lsrs r2, #8 600 | strb r2, [r0, #8]; ldr r2, [r1, #32] 601 | 602 | strb r2, [r0, #15]; lsrs r2, #8 603 | strb r2, [r0, #14]; lsrs r2, #8 604 | strb r2, [r0, #13]; lsrs r2, #8 605 | strb r2, [r0, #12]; ldr r2, [r1, #28] 606 | 607 | strb r2, [r0, #19]; lsrs r2, #8 608 | strb r2, [r0, #18]; lsrs r2, #8 609 | strb r2, [r0, #17]; lsrs r2, #8 610 | strb r2, [r0, #16]; ldr r2, [r1, #24] 611 | 612 | strb r2, [r0, #23]; lsrs r2, #8 613 | strb r2, [r0, #22]; lsrs r2, #8 614 | strb r2, [r0, #21]; lsrs r2, #8 615 | strb r2, [r0, #20]; ldr r2, [r1, #20] 616 | 617 | strb r2, [r0, #27]; lsrs r2, #8 618 | strb r2, [r0, #26]; lsrs r2, #8 619 | strb r2, [r0, #25]; lsrs r2, #8 620 | strb r2, [r0, #24]; ldr r2, [r1, #16] 621 | 622 | strb r2, [r0, #31]; lsrs r2, #8 623 | strb r2, [r0, #30]; lsrs r2, #8 624 | strb r2, [r0, #29]; lsrs r2, #8 625 | strb r2, [r0, #28]; ldr r2, [r1, #12] 626 | 627 | strb r2, [r0, #35]; lsrs r2, #8 628 | strb r2, [r0, #34]; lsrs r2, #8 629 | strb r2, [r0, #33]; lsrs r2, #8 630 | strb r2, [r0, #32]; ldr r2, [r1, #8] 631 | 632 | strb r2, [r0, #39]; lsrs r2, #8 633 | strb r2, [r0, #38]; lsrs r2, #8 634 | strb r2, [r0, #37]; lsrs r2, #8 635 | strb r2, [r0, #36]; ldr r2, [r1, #4] 636 | 637 | strb r2, [r0, #43]; lsrs r2, #8 638 | strb r2, [r0, #42]; lsrs r2, #8 639 | strb r2, [r0, #41]; lsrs r2, #8 640 | strb r2, [r0, #40]; ldr r2, [r1, #0] 641 | 642 | strb r2, [r0, #47]; lsrs r2, #8 643 | strb r2, [r0, #46]; lsrs r2, #8 644 | strb r2, [r0, #45]; lsrs r2, #8 645 | strb r2, [r0, #44] 646 | 647 | bx lr 648 | 649 | .size fp_to_bytes, . - fp_to_bytes 650 | 651 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 652 | @ 653 | @ fp_from_bytes 654 | @ 655 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 656 | 657 | .align 3 658 | .global fp_from_bytes 659 | .syntax unified 660 | .thumb 661 | .thumb_func 662 | .type fp_from_bytes, %function 663 | 664 | fp_from_bytes: 665 | ldrb r2, [r1, #0]; strb r2, [r0, #47] 666 | ldrb r2, [r1, #1]; strb r2, [r0, #46] 667 | ldrb r2, [r1, #2]; strb r2, [r0, #45] 668 | ldrb r2, [r1, #3]; strb r2, [r0, #44] 669 | 670 | ldrb r2, [r1, #4]; strb r2, [r0, #43] 671 | ldrb r2, [r1, #5]; strb r2, [r0, #42] 672 | ldrb r2, [r1, #6]; strb r2, [r0, #41] 673 | ldrb r2, [r1, #7]; strb r2, [r0, #40] 674 | 675 | ldrb r2, [r1, #8]; strb r2, [r0, #39] 676 | ldrb r2, [r1, #9]; strb r2, [r0, #38] 677 | ldrb r2, [r1, #10]; strb r2, [r0, #37] 678 | ldrb r2, [r1, #11]; strb r2, [r0, #36] 679 | 680 | ldrb r2, [r1, #12]; strb r2, [r0, #35] 681 | ldrb r2, [r1, #13]; strb r2, [r0, #34] 682 | ldrb r2, [r1, #14]; strb r2, [r0, #33] 683 | ldrb r2, [r1, #15]; strb r2, [r0, #32] 684 | 685 | ldrb r2, [r1, #16]; strb r2, [r0, #31] 686 | ldrb r2, [r1, #17]; strb r2, [r0, #30] 687 | ldrb r2, [r1, #18]; strb r2, [r0, #29] 688 | ldrb r2, [r1, #19]; strb r2, [r0, #28] 689 | 690 | ldrb r2, [r1, #20]; strb r2, [r0, #27] 691 | ldrb r2, [r1, #21]; strb r2, [r0, #26] 692 | ldrb r2, [r1, #22]; strb r2, [r0, #25] 693 | ldrb r2, [r1, #23]; strb r2, [r0, #24] 694 | 695 | ldrb r2, [r1, #24]; strb r2, [r0, #23] 696 | ldrb r2, [r1, #25]; strb r2, [r0, #22] 697 | ldrb r2, [r1, #26]; strb r2, [r0, #21] 698 | ldrb r2, [r1, #27]; strb r2, [r0, #20] 699 | 700 | ldrb r2, [r1, #28]; strb r2, [r0, #19] 701 | ldrb r2, [r1, #29]; strb r2, [r0, #18] 702 | ldrb r2, [r1, #30]; strb r2, [r0, #17] 703 | ldrb r2, [r1, #31]; strb r2, [r0, #16] 704 | 705 | ldrb r2, [r1, #32]; strb r2, [r0, #15] 706 | ldrb r2, [r1, #33]; strb r2, [r0, #14] 707 | ldrb r2, [r1, #34]; strb r2, [r0, #13] 708 | ldrb r2, [r1, #35]; strb r2, [r0, #12] 709 | 710 | ldrb r2, [r1, #36]; strb r2, [r0, #11] 711 | ldrb r2, [r1, #37]; strb r2, [r0, #10] 712 | ldrb r2, [r1, #38]; strb r2, [r0, #9] 713 | ldrb r2, [r1, #39]; strb r2, [r0, #8] 714 | 715 | ldrb r2, [r1, #40]; strb r2, [r0, #7] 716 | ldrb r2, [r1, #41]; strb r2, [r0, #6] 717 | ldrb r2, [r1, #42]; strb r2, [r0, #5] 718 | ldrb r2, [r1, #43]; strb r2, [r0, #4] 719 | 720 | ldrb r2, [r1, #44]; strb r2, [r0, #3] 721 | ldrb r2, [r1, #45]; strb r2, [r0, #2] 722 | ldrb r2, [r1, #46]; strb r2, [r0, #1] 723 | ldrb r2, [r1, #47]; strb r2, [r0, #0] 724 | 725 | @ Subtract modulus 726 | 727 | ldr r2, [r0, #0] 728 | mov r3, #0x00000001 729 | subs r2, r3 730 | 731 | ldr r2, [r0, #4] 732 | mov r3, #0xC000 733 | movt r3, #0x8508 734 | sbcs r2, r3 735 | 736 | ldr r2, [r0, #8] 737 | mov r3, #0x30000000 738 | sbcs r2, r3 739 | 740 | ldr r2, [r0, #12] 741 | mov r3, #0x5D44 742 | movt r3, #0x170B 743 | sbcs r2, r3 744 | 745 | ldr r2, [r0, #16] 746 | mov r3, #0x4800 747 | movt r3, #0xBA09 748 | sbcs r2, r3 749 | 750 | ldr r2, [r0, #20] 751 | mov r3, #0x622F 752 | movt r3, #0x1EF3 753 | sbcs r2, r3 754 | 755 | ldr r2, [r0, #24] 756 | mov r3, #0x138F 757 | movt r3, #0x00F5 758 | sbcs r2, r3 759 | 760 | ldr r2, [r0, #28] 761 | mov r3, #0xD9F3 762 | movt r3, #0x1A22 763 | sbcs r2, r3 764 | 765 | ldr r2, [r0, #32] 766 | mov r3, #0x493B 767 | movt r3, #0x6CA1 768 | sbcs r2, r3 769 | 770 | ldr r2, [r0, #36] 771 | mov r3, #0x05C0 772 | movt r3, #0xC63B 773 | sbcs r2, r3 774 | 775 | ldr r2, [r0, #40] 776 | mov r3, #0x10EA 777 | movt r3, #0x17C5 778 | sbcs r2, r3 779 | 780 | ldr r2, [r0, #44] 781 | mov r3, #0x3A46 782 | movt r3, #0x01AE 783 | sbcs r2, r3 784 | 785 | @ Malformed input (x>=m) => no borrow 786 | 787 | mov r2, #0 788 | 789 | itttt cs @ carry set == no borrow => x was >= p => we need to zero x 790 | strcs r2, [r0, #0] 791 | strcs r2, [r0, #4] 792 | strcs r2, [r0, #8] 793 | strcs r2, [r0, #12] 794 | 795 | itttt cs 796 | strcs r2, [r0, #16] 797 | strcs r2, [r0, #20] 798 | strcs r2, [r0, #24] 799 | strcs r2, [r0, #28] 800 | 801 | itttt cs 802 | strcs r2, [r0, #32] 803 | strcs r2, [r0, #36] 804 | strcs r2, [r0, #40] 805 | strcs r2, [r0, #44] 806 | 807 | bx lr 808 | 809 | .size fp_from_bytes, . - fp_from_bytes 810 | 811 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 812 | @ 813 | @ mul378(uint32_t *z, const uint32_t *x, const uint32_t *y) 814 | @ 815 | @ Multiply 378-bit numbers, producing a 756-bit result 816 | @ 817 | @ Uses Karatsuba, delegating 189/190-bit multiply to m190 818 | @ 819 | @ ~1641 clock cycles 820 | @ 821 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 822 | 823 | .align 3 824 | .global mul378 825 | .syntax unified 826 | .thumb 827 | .thumb_func 828 | mul378: 829 | push { r0-r2, r4-r11, lr } 830 | @ 13 831 | @@ Split x and y into 189-bit halves 832 | @ Save them on the stack and forget input pointers 833 | 834 | @ Load y 835 | 836 | ldm r2, { r2-r12, r14 } 837 | @ 26 838 | @ Split bits into two right-aligned halves of 189 bits each 839 | 840 | lsl r14, #3; orr r14, r14, r12, lsr #29 841 | lsl r12, #3; orr r12, r12, r11, lsr #29 842 | lsl r11, #3; orr r11, r11, r10, lsr #29 843 | lsl r10, #3; orr r10, r10, r9, lsr #29 844 | lsl r9, #3; orr r9, r9, r8, lsr #29 845 | lsl r8, #3; orr r8, r8, r7, lsr #29 846 | 847 | @ Limit to 189 bits each 848 | 849 | and r14, #0x1fffffff 850 | and r7, #0x1fffffff 851 | @ 40 852 | @ Store y on the stack 853 | 854 | push { r2-r12, r14 } 855 | @53 856 | @ Load x 857 | 858 | ldm r1, { r2-r12, r14 } 859 | 860 | @ Split bits into two right-aligned halves of 189 bits each 861 | 862 | lsl r14, #3; orr r14, r14, r12, lsr #29 863 | lsl r12, #3; orr r12, r12, r11, lsr #29 864 | lsl r11, #3; orr r11, r11, r10, lsr #29 865 | lsl r10, #3; orr r10, r10, r9, lsr #29 866 | lsl r9, #3; orr r9, r9, r8, lsr #29 867 | lsl r8, #3; orr r8, r8, r7, lsr #29 868 | 869 | @ Limit to 189 bits each 870 | 871 | and r14, #0x1fffffff 872 | and r7, #0x1fffffff 873 | 874 | @ Store x on the stack 875 | 876 | push { r2-r12, r14 } 877 | @ 93 878 | @@ Compute xl*yl, place in zl 879 | 880 | mov r1, sp 881 | add r2, sp, #48 882 | 883 | bl m190 884 | @ 517 885 | @@ Compute xh*yh, place in zh 886 | 887 | @ ldr r0, [sp, #96] @ Skipped because m190 preserves r0 888 | add r1, sp, #24 889 | add r2, sp, #72 890 | add r0, r0, #48 891 | 892 | bl m190 893 | 894 | @ 942 895 | @@ Compute xh+xl and yh+yl, store on the stack 896 | 897 | @ Load x from the stack 898 | 899 | ldm sp, { r2-r12, r14 } 900 | 901 | @ Compute xh+xl 902 | 903 | adds r2, r8 904 | adcs r3, r9 905 | adcs r4, r10 906 | adcs r5, r11 907 | adcs r6, r12 908 | adc r7, r14 909 | 910 | add r1, sp, #48 @ point at y on the stack 911 | @ 962 912 | stm sp, { r2-r7 } @ overwrite xl with xh+xl 913 | 914 | @ Load y from the stack 915 | 916 | ldm r1, { r2-r12, r14 } 917 | @ 982 918 | sub r1, r1, #24 @ point at xh on the stack 919 | 920 | @ Compute yh+yl 921 | 922 | adds r2, r8 923 | adcs r3, r9 924 | adcs r4, r10 925 | adcs r5, r11 926 | adcs r6, r12 927 | adc r7, r14 928 | 929 | stm r1, { r2-r7 } @ overwrite xh with yh+yl 930 | 931 | @@ Compute (xh+xl)*(yh+yl), overwrite y on the stack 932 | 933 | add r0, sp, #48 934 | add r2, sp, #0 @ xh+xl 935 | @ 998 936 | bl m190 937 | @ 1420 938 | @@ Subtract xl*yl and xh*yh from (xh+xl)*(yh+yl) 939 | 940 | ldr r0, [sp, #96] @ point to z (xl*yl) 941 | 942 | ldr r1, [sp, #48] 943 | ldr r2, [sp, #52] 944 | ldr r3, [sp, #56] 945 | ldr r4, [sp, #60] 946 | 947 | ldr r5, [r0, #0] 948 | ldr r6, [r0, #4] 949 | ldr r7, [r0, #8] 950 | ldr r8, [r0, #12] 951 | 952 | ldr r9, [r0, #48] 953 | ldr r10, [r0, #52] 954 | ldr r11, [r0, #56] 955 | ldr r12, [r0, #60] 956 | 957 | mov r14, #0 958 | 959 | subs r1, r5 960 | sbcs r2, r6 961 | sbcs r3, r7 962 | sbcs r4, r8 963 | sbc r14, #0 964 | 965 | subs r1, r9 966 | sbcs r2, r10 967 | sbcs r3, r11 968 | sbcs r4, r12 969 | sbc r14, #0 970 | 971 | str r1, [sp, #48] 972 | str r2, [sp, #52] 973 | str r3, [sp, #56] 974 | str r4, [sp, #60] 975 | 976 | 977 | ldr r1, [sp, #64] 978 | ldr r2, [sp, #68] 979 | ldr r3, [sp, #72] 980 | ldr r4, [sp, #76] 981 | 982 | ldr r5, [r0, #16] 983 | ldr r6, [r0, #20] 984 | ldr r7, [r0, #24] 985 | ldr r8, [r0, #28] 986 | 987 | ldr r9, [r0, #64] 988 | ldr r10, [r0, #68] 989 | ldr r11, [r0, #72] 990 | ldr r12, [r0, #76] 991 | @ 1462 992 | @ Propagate borrow 993 | 994 | rsb r14, #0 995 | subs r1, r14; mov r14, #0 996 | sbcs r2, #0 997 | sbcs r3, #0 998 | sbcs r4, #0 999 | sbc r14, #0 1000 | 1001 | subs r1, r5 1002 | sbcs r2, r6 1003 | sbcs r3, r7 1004 | sbcs r4, r8 1005 | sbc r14, #0 1006 | 1007 | subs r1, r9 1008 | sbcs r2, r10 1009 | sbcs r3, r11 1010 | sbcs r4, r12 1011 | sbc r14, #0 1012 | 1013 | str r1, [sp, #64] 1014 | str r2, [sp, #68] 1015 | str r3, [sp, #72] 1016 | str r4, [sp, #76] 1017 | @ 1482 1018 | 1019 | ldr r1, [sp, #80] 1020 | ldr r2, [sp, #84] 1021 | ldr r3, [sp, #88] 1022 | ldr r4, [sp, #92] 1023 | 1024 | ldr r5, [r0, #32] 1025 | ldr r6, [r0, #36] 1026 | ldr r7, [r0, #40] 1027 | ldr r8, [r0, #44] 1028 | 1029 | ldr r9, [r0, #80] 1030 | ldr r10, [r0, #84] 1031 | ldr r11, [r0, #88] 1032 | ldr r12, [r0, #92] 1033 | @ 1495 1034 | @ Propagate borrow 1035 | @ Ignore borrow out 1036 | 1037 | rsb r14, #0 1038 | subs r1, r14; 1039 | sbcs r2, #0 1040 | sbcs r3, #0 1041 | sbc r4, #0 1042 | 1043 | subs r1, r5 1044 | sbcs r2, r6 1045 | sbcs r3, r7 1046 | sbc r4, r8 1047 | 1048 | subs r1, r9 1049 | sbcs r2, r10 1050 | sbcs r3, r11 1051 | sbc r4, r12 1052 | 1053 | str r1, [sp, #80] 1054 | str r2, [sp, #84] 1055 | str r3, [sp, #88] 1056 | str r4, [sp, #92] 1057 | 1058 | @@ Add (((xh+xl)*(yh+yl) - xh*yh - xl*yl) << 189) 1059 | @@ to ((xh*yh << 378) + xl*yl) 1060 | 1061 | ldr r1, [r0, #20] 1062 | ldr r2, [r0, #24] 1063 | ldr r3, [r0, #28] 1064 | ldr r4, [r0, #32] 1065 | ldr r5, [r0, #36] 1066 | ldr r6, [r0, #40] 1067 | 1068 | ldr r7, [sp, #0+48] 1069 | ldr r8, [sp, #4+48] 1070 | ldr r9, [sp, #8+48] 1071 | ldr r10, [sp, #12+48] 1072 | ldr r11, [sp, #16+48] 1073 | ldr r12, [sp, #20+48] 1074 | @ 1525 1075 | adds r1, r1, r7, lsl #29 1076 | lsr r7, #3; orr r7, r7, r8, lsl #29; adcs r2, r7 1077 | lsr r8, #3; orr r8, r8, r9, lsl #29; adcs r3, r8 1078 | lsr r9, #3; orr r9, r9, r10, lsl #29; adcs r4, r9 1079 | lsr r10, #3; orr r10, r10, r11, lsl #29; adcs r5, r10 1080 | lsr r11, #3; orr r11, r11, r12, lsl #29; adcs r6, r11 1081 | @ 1541 1082 | str r1, [r0, #20]; ldr r1, [r0, #44]; ldr r7, [sp, #24+48] 1083 | str r2, [r0, #24]; ldr r2, [r0, #48]; ldr r8, [sp, #28+48] 1084 | str r3, [r0, #28]; ldr r3, [r0, #52]; ldr r9, [sp, #32+48] 1085 | str r4, [r0, #32]; ldr r4, [r0, #56]; ldr r10, [sp, #36+48] 1086 | str r5, [r0, #36]; ldr r5, [r0, #60]; ldr r11, [sp, #40+48] 1087 | str r6, [r0, #40]; 1088 | @ 1557 1089 | orr r1, r1, r2, lsl #26; lsr r2, #6 1090 | orr r2, r2, r3, lsl #26; lsr r3, #6 1091 | orr r3, r3, r4, lsl #26; lsr r4, #6 1092 | orr r4, r4, r5, lsl #26; lsr r5, #6 1093 | @ 1565 1094 | lsr r12, #3; orr r12, r12, r7, lsl #29; adcs r1, r12 1095 | lsr r7, #3; orr r7, r7, r8, lsl #29; adcs r2, r7 1096 | lsr r8, #3; orr r8, r8, r9, lsl #29; adcs r3, r8 1097 | lsr r9, #3; orr r9, r9, r10, lsl #29; adcs r4, r9 1098 | lsr r10, #3; orr r10, r10, r11, lsl #29 1099 | @ 1579 1100 | str r1, [r0, #44]; ldr r6, [r0, #64]; ldr r12, [sp, #44+48] 1101 | str r2, [r0, #48]; ldr r1, [r0, #68] 1102 | str r3, [r0, #52]; ldr r2, [r0, #72] 1103 | str r4, [r0, #56] 1104 | @ 1587 1105 | orr r5, r5, r6, lsl #26; lsr r6, #6 1106 | orr r6, r6, r1, lsl #26; lsr r1, #6 1107 | orr r1, r1, r2, lsl #26; lsr r2, #6 1108 | @ 1593 1109 | lsr r11, #3; orr r11, r11, r12, lsl #29; adcs r5, r10 1110 | lsr r12, #3; adcs r6, r11 1111 | adcs r1, r12 1112 | @ 1599 1113 | str r5, [r0, #60] 1114 | ldr r3, [r0, #76] 1115 | ldr r4, [r0, #80] 1116 | str r6, [r0, #64] 1117 | @ 1603 1118 | orr r2, r2, r3, lsl #26; lsr r3, #6; adcs r2, #0 1119 | orr r3, r3, r4, lsl #26; lsr r4, #6; adcs r3, #0 1120 | @ 1609 1121 | str r1, [r0, #68] 1122 | ldr r5, [r0, #84] 1123 | ldr r6, [r0, #88] 1124 | ldr r1, [r0, #92] 1125 | str r2, [r0, #72] 1126 | @ 1614 1127 | orr r4, r4, r5, lsl #26; lsr r5, #6; adcs r4, #0 1128 | orr r5, r5, r6, lsl #26; lsr r6, #6; adcs r5, #0 1129 | orr r6, r6, r1, lsl #26; lsr r1, #6; adcs r6, #0 1130 | adc r1, #0 1131 | @ 1624 1132 | @ Deallocate stack 1133 | 1134 | add sp, #108 @ Discard 3+12+12 words 1135 | @ 1625 1136 | @ Store remaining outputs 1137 | 1138 | str r3, [r0, #76] 1139 | str r4, [r0, #80] 1140 | str r5, [r0, #84] 1141 | str r6, [r0, #88] 1142 | str r1, [r0, #92] 1143 | @ 1630 1144 | pop { r4-r11, pc } 1145 | @ 1641 1146 | 1147 | .size mul378, .-mul378 1148 | 1149 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1150 | @ 1151 | @ m190 1152 | @ 1153 | @ Multiply 190-bit numbers, producing 380-bit result 1154 | @ 1155 | @ Uses Karatsuba, delegating 95/96-bit multiply to m96 1156 | @ 1157 | @ Output pointer in r0 1158 | @ Input pointers in r1, r2 1159 | @ 1160 | @ Preserves r0 1161 | @ 1162 | @ ~680 clock cycles 1163 | @ 1164 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1165 | 1166 | .align 3 1167 | .syntax unified 1168 | .thumb 1169 | .thumb_func 1170 | m190: 1171 | push { r0, lr } 1172 | 1173 | @ Load y 1174 | 1175 | ldr r12, [r2, #20] 1176 | ldr r11, [r2, #16] 1177 | ldr r10, [r2, #12] 1178 | ldr r9, [r2, #8] 1179 | ldr r8, [r2, #4] 1180 | ldr r7, [r2, #0] 1181 | 1182 | @ Load x 1183 | 1184 | ldr r6, [r1, #20] 1185 | ldr r5, [r1, #16] 1186 | ldr r4, [r1, #12] 1187 | ldr r3, [r1, #8] 1188 | ldr r2, [r1, #4] 1189 | ldr r1, [r1, #0] 1190 | @ 16 1191 | @ Split bits into two right-aligned halves of 95 bits each 1192 | 1193 | lsl r12, #1; orr r12, r12, r11, lsr #31 1194 | lsl r11, #1; orr r11, r11, r10, lsr #31 1195 | lsl r10, #1; orr r10, r10, r9, lsr #31 1196 | 1197 | and r12, #0x7fffffff 1198 | and r9, #0x7fffffff 1199 | 1200 | lsl r6, #1; orr r6, r6, r5, lsr #31 1201 | lsl r5, #1; orr r5, r5, r4, lsr #31 1202 | lsl r4, #1; orr r4, r4, r3, lsr #31 1203 | 1204 | and r6, #0x7fffffff 1205 | and r3, #0x7fffffff 1206 | @ 32 1207 | @ Store x and y on the stack 1208 | 1209 | push { r1-r12 } 1210 | 1211 | @@ Compute xl*yl, place in zl 1212 | 1213 | mov r1, sp 1214 | add r2, sp, #24 1215 | @ 47 1216 | bl m96 1217 | 1218 | @@ Compute xh*yh, place in zh 1219 | 1220 | add r1, sp, #12 1221 | add r2, sp, #36 1222 | add r0, r0, #24 1223 | @ 131 1224 | bl m96 1225 | @ 215 1226 | @@ Compute xh+xl and yh+yl, store on the stack 1227 | 1228 | @ Load x and y from the stack 1229 | 1230 | ldm sp, { r2-r12, r14 } 1231 | 1232 | @ Compute xh+xl 1233 | 1234 | adds r2, r5 1235 | adcs r3, r6 1236 | adc r4, r7 1237 | 1238 | @ Compute yh+yl 1239 | 1240 | adds r8, r11 1241 | adcs r9, r12 1242 | adc r10, r14 1243 | @ 234 1244 | stm sp, { r2-r4, r8-r10 } @ overwrite x with xh+xl and yh+yl 1245 | 1246 | @@ Compute (xh+xl)*(yh+yl), overwrite y on the stack 1247 | 1248 | add r0, sp, #24 @ y 1249 | add r1, sp, #12 @ yh+yl 1250 | add r2, sp, #0 @ xh+xl 1251 | @ 244 1252 | bl m96 1253 | @ 328 1254 | @@ Subtract xl*yl and xh*yh from (xh+xl)*(yh+yl) 1255 | 1256 | ldr r0, [sp, #48] @ point to z (xl*yl) 1257 | 1258 | @ Load (xh+xl)*(yh+yl) 1259 | 1260 | ldr r1, [sp, #24] 1261 | ldr r2, [sp, #28] 1262 | ldr r3, [sp, #32] 1263 | ldr r4, [sp, #36] 1264 | ldr r5, [sp, #40] 1265 | ldr r6, [sp, #44] 1266 | 1267 | @ Load and subtract xl*yl 1268 | 1269 | ldr r7, [r0, #0] 1270 | ldr r8, [r0, #4] 1271 | ldr r9, [r0, #8] 1272 | ldr r10, [r0, #12] 1273 | ldr r11, [r0, #16] 1274 | ldr r12, [r0, #20] 1275 | @ 342 1276 | subs r1, r7 1277 | sbcs r2, r8 1278 | sbcs r3, r9 1279 | sbcs r4, r10 1280 | sbcs r5, r11 1281 | sbc r6, r12 @ There is no output borrow 1282 | 1283 | @ Load and subtract xh*yh 1284 | 1285 | ldr r7, [r0, #24] 1286 | ldr r8, [r0, #28] 1287 | ldr r9, [r0, #32] 1288 | ldr r10, [r0, #36] 1289 | ldr r11, [r0, #40] 1290 | ldr r12, [r0, #44] 1291 | 1292 | subs r1, r7; ldr r7, [r0, #8] 1293 | sbcs r2, r8 1294 | sbcs r3, r9 1295 | sbcs r4, r10 1296 | sbcs r5, r11 1297 | sbc r6, r12 1298 | 1299 | @@ Add (xh+xl)*(yh+yl)-xl*yl-xh*yh to the middle of z 1300 | 1301 | adds r7, r7, r1, lsl #31 1302 | @ 364 1303 | @ Shift down by 1 bit 1304 | 1305 | lsr r1, #1; orr r1, r1, r2, lsl #31 1306 | lsr r2, #1; orr r2, r2, r3, lsl #31 1307 | lsr r3, #1; orr r3, r3, r4, lsl #31 1308 | lsr r4, #1; orr r4, r4, r5, lsl #31 1309 | lsr r5, #1; orr r5, r5, r6, lsl #31 1310 | lsr r6, #1 1311 | @ 375 1312 | @ Add to middle of z 1313 | @ Note: operations on r7 already done above to save one cycle 1314 | 1315 | ldr r8, [r0, #12] 1316 | ldr r9, [r0, #16] 1317 | 1318 | adcs r8, r1 1319 | adcs r9, r2 1320 | 1321 | str r7, [r0, #8] 1322 | str r8, [r0, #12] 1323 | str r9, [r0, #16] 1324 | 1325 | ldr r7, [r0, #20] 1326 | ldr r8, [r0, #24] 1327 | ldr r9, [r0, #28] 1328 | ldr r10, [r0, #32] 1329 | ldr r11, [r0, #36] 1330 | ldr r12, [r0, #40] 1331 | ldr r14, [r0, #44] 1332 | @ 391 1333 | @ Shift down top half of z by 2 bits before adding 1334 | 1335 | orr r7, r7, r8, lsl #30; lsr r8, #2 1336 | orr r8, r8, r9, lsl #30; lsr r9, #2 1337 | orr r9, r9, r10, lsl #30; lsr r10, #2 1338 | orr r10, r10, r11, lsl #30; lsr r11, #2 1339 | orr r11, r11, r12, lsl #30; lsr r12, #2 1340 | orr r12, r12, r14, lsl #30; lsr r14, #2 1341 | 1342 | adcs r7, r3 1343 | adcs r8, r4 1344 | adcs r9, r5 1345 | adcs r10, r6 1346 | @ 407 1347 | @ Propagate carry 1348 | 1349 | adcs r11, #0 1350 | adcs r12, #0 1351 | adc r14, #0 1352 | 1353 | @ Deallocate stack 1354 | 1355 | add sp, #48 1356 | 1357 | @ Save high three quarters of z 1358 | 1359 | str r7, [r0, #20] 1360 | str r8, [r0, #24] 1361 | str r9, [r0, #28] 1362 | str r10, [r0, #32] 1363 | str r11, [r0, #36] 1364 | str r12, [r0, #40] 1365 | str r14, [r0, #44] 1366 | @ 418 1367 | pop { r0, pc } 1368 | @ 422 1369 | 1370 | .size m190, .-m190 1371 | 1372 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1373 | @ 1374 | @ m96: Multiply 96x96->192 1375 | @ 1376 | @ asm-only version using umull 1377 | @ 1378 | @ Output pointer in r0 1379 | @ Input pointers in r1, r2 1380 | @ 1381 | @ Preserves r0 1382 | @ 1383 | @ Register assignments 1384 | @ 1385 | @ r12 ro zero 1386 | @ r11 ro y2 1387 | @ r10 ro y1 1388 | @ r9 ro y0 1389 | @ r8 ro x2 1390 | @ r7 ro x1 1391 | @ r6 ro x0 1392 | @ r5 rw ah accumulator, high word 1393 | @ r4 rw am accumulator, middle word 1394 | @ r3 rw al accumulator, low word 1395 | @ r2 rw ph partial product, high word 1396 | @ r1 rw pl partial product, low word 1397 | @ r0 ro &z 1398 | @ 1399 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1400 | 1401 | #define zero r12 1402 | #define y2 r11 1403 | #define y1 r10 1404 | #define y0 r9 1405 | #define x2 r8 1406 | #define x1 r7 1407 | #define x0 r6 1408 | #define ah r5 1409 | #define am r4 1410 | #define al r3 1411 | #define ph r2 1412 | #define pl r1 1413 | 1414 | .align 3 1415 | .syntax unified 1416 | .thumb 1417 | .thumb_func 1418 | m96: 1419 | mov zero, #0 1420 | 1421 | ldr x0, [r1, #0] 1422 | ldr x1, [r1, #4] 1423 | ldr x2, [r1, #8] 1424 | 1425 | ldr y0, [r2, #0] 1426 | ldr y1, [r2, #4] 1427 | ldr y2, [r2, #8] 1428 | @ 8 1429 | umull pl, ph, x0, y0 @ x0y0 1430 | 1431 | str pl, [r0, #0] 1432 | @ 14 1433 | umull al, am, x0, y1 @ x0y1 1434 | 1435 | adds al, ph 1436 | adc am, zero @ no overflow 1437 | @ 21 1438 | umull pl, ph, x1, y0 @ x1y0 1439 | 1440 | adds al, pl 1441 | adcs am, ph 1442 | adc ah, zero, zero 1443 | 1444 | str al, [r0, #4] 1445 | @ 30 1446 | 1447 | umull pl, ph, x0, y2 @ x0y2 1448 | 1449 | adds al, am, pl 1450 | adc am, ah, ph @ no overflow 1451 | @ 37 1452 | umull pl, ph, x1, y1 @ x1y1 1453 | 1454 | adds al, pl 1455 | adcs am, ph 1456 | adc ah, zero, zero 1457 | @ 45 1458 | umull pl, ph, x2, y0 @ x2y0 1459 | 1460 | adds al, pl 1461 | adcs am, ph 1462 | adc ah, zero 1463 | 1464 | str al, [r0, #8] 1465 | @ 54 1466 | 1467 | umull pl, ph, x1, y2 @ x1y2 1468 | 1469 | adds al, am, pl 1470 | adcs am, ah, ph 1471 | adc ah, zero, zero 1472 | @ 62 1473 | umull pl, ph, x2, y1 @ x2y1 1474 | 1475 | adds al, pl 1476 | adcs am, ph 1477 | adc ah, zero 1478 | 1479 | str al, [r0, #12] 1480 | @ 71 1481 | 1482 | umull pl, ph, x2, y2 @ x2y2 1483 | 1484 | adds al, am, pl 1485 | adc am, ah, ph 1486 | @ 78 1487 | str al, [r0, #16] 1488 | str am, [r0, #20] 1489 | @ 80 1490 | bx lr 1491 | @ 82 1492 | 1493 | #undef zero 1494 | #undef y2 1495 | #undef y1 1496 | #undef y0 1497 | #undef x2 1498 | #undef x1 1499 | #undef x0 1500 | #undef ah 1501 | #undef am 1502 | #undef al 1503 | #undef ph 1504 | #undef pl 1505 | 1506 | .size m96, .-m96 1507 | -------------------------------------------------------------------------------- /bls12_377/src/fp_mont.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define uint128_t __uint128_t 6 | 7 | #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ 8 | #error This code depends on little-endian word order 9 | #endif 10 | 11 | // multiply a32 * b64 and accumulate in the 96 bit value stored in [o0, o1, o2] 12 | // o0 is the existing 32 bit value 13 | // o1,o2 is the 64 bit carry 14 | 15 | inline 16 | void umaal96( 17 | uint32_t& o0, 18 | uint32_t& o1, 19 | uint32_t& o2, 20 | uint32_t a, 21 | uint64_t b 22 | ) { 23 | #if __arm__ 24 | #ifdef HAVE_UMAAL 25 | const uint32_t b0 = (uint32_t) b; 26 | const uint32_t b1 = (uint32_t) (b>>32); 27 | 28 | asm ( 29 | "UMAAL %[o0], %[o1], %[a0], %[b0]" 30 | : [o0] "+r" (o0), 31 | [o1] "+r" (o1) 32 | : [a0] "r" (a), 33 | [b0] "r" (b0) 34 | ); 35 | 36 | asm ( 37 | "UMAAL %[o1], %[o2], %[a0], %[b1]" 38 | : [o1] "+r" (o1), 39 | [o2] "+r" (o2) 40 | : [a0] "r" (a), 41 | [b1] "r" (b1) 42 | ); 43 | #else 44 | register uint32_t b0, b1, p0, p1; 45 | 46 | b0 = b; 47 | b1 = b >> 32; 48 | 49 | asm ( 50 | // b0:o0 = o0 + a*b0 51 | "UMULL %[p0], %[p1], %[a], %[b0]\n\t" 52 | "ADDS %[o0], %[p0]\n\t" 53 | "ADC %[b0], %[p1], #0\n\t" 54 | : [p0] "=&r" (p0) 55 | , [p1] "=&r" (p1) 56 | , [o0] "+r" (o0) 57 | , [b0] "+r" (b0) 58 | 59 | : [a] "r" (a) 60 | 61 | : "cc" 62 | ); 63 | 64 | asm ( 65 | // p1:p0 = o2 + a*b1 66 | "UMULL %[p0], %[p1], %[a], %[b1]\n\t" 67 | "ADDS %[p0], %[o2]\n\t" 68 | "ADC %[p1], #0\n\t" 69 | : [p0] "=&r" (p0) 70 | , [p1] "=&r" (p1) 71 | 72 | : [a] "r" (a) 73 | , [b1] "r" (b1) 74 | , [o2] "r" (o2) 75 | 76 | : "cc" 77 | ); 78 | 79 | asm ( 80 | // o2:o1:o0 = b0:o0 + p1:p0:o1 81 | "ADDS %[o0], %[o1]\n\t" 82 | "ADCS %[o1], %[b0], %[p0]\n\t" 83 | "ADC %[o2], %[p1], #0\n\t" 84 | 85 | : [o0] "+r" (o0) 86 | , [o1] "+r" (o1) 87 | , [o2] "+r" (o2) 88 | 89 | : [p0] "r" (p0) 90 | , [p1] "r" (p1) 91 | , [b0] "r" (b0) 92 | 93 | : "cc" 94 | ); 95 | #endif 96 | #else 97 | uint128_t ret = ((uint128_t)a * (uint128_t)b) + (uint128_t)o0 + (uint128_t)o1 + (((uint128_t)o2)<<32); 98 | o0 = (uint32_t)ret; 99 | o1 = (uint32_t)(ret >> 32); 100 | o2 = (uint32_t)(ret >> 64); 101 | #endif 102 | } 103 | 104 | inline 105 | void umaal96(uint32_t& o0, 106 | uint32_t& o1, 107 | uint32_t& o2, 108 | uint32_t a, 109 | uint64_t b, 110 | uint32_t c) 111 | { 112 | o0 = c; 113 | umaal96(o0, o1, o2, a, b); 114 | } 115 | 116 | #if __arm__ 117 | // not needed 118 | #else 119 | 120 | inline 121 | uint64_t add32(uint32_t* output, const uint32_t* left, const uint32_t* right, int n) { 122 | uint64_t carry = 0; 123 | for(int i=0; i> 32; 127 | } 128 | return carry; 129 | } 130 | 131 | inline 132 | uint64_t add32(uint32_t* output, const uint32_t* a, const uint32_t* b, const uint32_t* c, int n) { 133 | uint64_t carry = 0; 134 | for(int i=0; i> 32; 138 | } 139 | return carry; 140 | } 141 | #endif 142 | 143 | inline 144 | uint32_t acc_2_2_1(uint32_t* output, const uint32_t* b, uint32_t c0) { 145 | #if __arm__ 146 | uint32_t carry = 0; 147 | uint32_t t0 = output[0]; 148 | uint32_t t1 = output[1]; 149 | uint32_t b0 = b[0]; 150 | uint32_t b1 = b[1]; 151 | asm ( 152 | "ADDS %[t0], %[b0]\n\t" 153 | "ADCS %[t1], %[b1]\n\t" 154 | "ADC %[carry], #0" 155 | : [carry] "+r" (carry), 156 | [t0] "+r" (t0), 157 | [t1] "+r" (t1) 158 | : [b0] "r" (b0), 159 | [b1] "r" (b1) 160 | ); 161 | asm ( 162 | "ADDS %[t0], %[c0]\n\t" 163 | "ADCS %[t1], #0\n\t" 164 | "ADC %[carry], #0" 165 | : [carry] "+r" (carry), 166 | [t0] "+r" (t0), 167 | [t1] "+r" (t1) 168 | : [c0] "r" (c0) 169 | ); 170 | output[0] = t0; 171 | output[1] = t1; 172 | return carry; 173 | #else 174 | uint32_t _a[] = {output[0], output[1]}; 175 | uint32_t _b[] = {b[0], b[1]}; 176 | uint32_t _c[] = {c0, 0}; 177 | return add32(output, _a, _b, _c, 2); 178 | #endif 179 | } 180 | 181 | inline 182 | void add_2_2_1(uint32_t* output, uint32_t* a, const uint32_t* b, uint32_t c0) { 183 | #if __arm__ 184 | uint32_t t0 = 0; 185 | uint32_t t1 = 0; 186 | uint32_t a0 = a[0]; 187 | uint32_t a1 = a[1]; 188 | uint32_t b0 = b[0]; 189 | uint32_t b1 = b[1]; 190 | asm ( 191 | "ADDS %[t0], %[a0], %[b0]\n\t" 192 | "ADC %[t1], %[a1], %[b1]\n\t" 193 | : [t0] "+r" (t0), 194 | [t1] "+r" (t1) 195 | : [a0] "r" (a0), 196 | [a1] "r" (a1), 197 | [b0] "r" (b0), 198 | [b1] "r" (b1) 199 | ); 200 | asm ( 201 | "ADDS %[t0], %[c0]\n\t" 202 | "ADC %[t1], #0 \n\t" 203 | : [t0] "+r" (t0), 204 | [t1] "+r" (t1) 205 | : [c0] "r" (c0) 206 | ); 207 | output[0] = t0; 208 | output[1] = t1; 209 | #else 210 | uint32_t _a[] = {a[0], a[1]}; 211 | uint32_t _b[] = {b[0], b[1]}; 212 | uint32_t _c[] = {c0, 0}; 213 | add32(output, _a, _b, _c, 2); 214 | #endif 215 | } 216 | 217 | extern "C" 218 | void fp_redc(uint32_t* output, uint32_t* t) { 219 | const static uint64_t inv = 9586122913090633727ull; 220 | const static uint64_t modulus32[12] = { 221 | 0x00000001, 0x8508c000, 222 | 0x30000000, 0x170b5d44, 223 | 0xba094800, 0x1ef3622f, 224 | 0x00f5138f, 0x1a22d9f3, 225 | 0x6ca1493b, 0xc63b05c0, 226 | 0x17c510ea, 0x01ae3a46, 227 | }; 228 | register uint32_t altcarry = 0; 229 | 230 | for(int i=0; i<5; ++i){ 231 | uint32_t* r = t + 2*i; 232 | register uint64_t k = *(uint64_t*)r * inv; 233 | uint32_t carry[2] = {0}; 234 | uint32_t _; 235 | 236 | umaal96(_, carry[0], carry[1], modulus32[0], k, r[0]); 237 | umaal96(_, carry[0], carry[1], modulus32[1], k, r[1]); 238 | umaal96(r[2], carry[0], carry[1], modulus32[2], k); 239 | umaal96(r[3], carry[0], carry[1], modulus32[3], k); 240 | umaal96(r[4], carry[0], carry[1], modulus32[4], k); 241 | umaal96(r[5], carry[0], carry[1], modulus32[5], k); 242 | umaal96(r[6], carry[0], carry[1], modulus32[6], k); 243 | umaal96(r[7], carry[0], carry[1], modulus32[7], k); 244 | umaal96(r[8], carry[0], carry[1], modulus32[8], k); 245 | umaal96(r[9], carry[0], carry[1], modulus32[9], k); 246 | umaal96(r[10], carry[0], carry[1], modulus32[10], k); 247 | umaal96(r[11], carry[0], carry[1], modulus32[11], k); 248 | altcarry = acc_2_2_1(&r[12], carry, altcarry); 249 | } 250 | 251 | { 252 | uint32_t* r = t + 10; 253 | uint64_t k = *(uint64_t*)r * inv; 254 | uint32_t carry[2] = {0}; 255 | uint32_t _; 256 | 257 | umaal96(_, carry[0], carry[1], modulus32[0], k, r[0]); 258 | umaal96(_, carry[0], carry[1], modulus32[1], k, r[1]); 259 | umaal96(output[0], carry[0], carry[1], modulus32[2], k, r[2]); 260 | umaal96(output[1], carry[0], carry[1], modulus32[3], k, r[3]); 261 | umaal96(output[2], carry[0], carry[1], modulus32[4], k, r[4]); 262 | umaal96(output[3], carry[0], carry[1], modulus32[5], k, r[5]); 263 | umaal96(output[4], carry[0], carry[1], modulus32[6], k, r[6]); 264 | umaal96(output[5], carry[0], carry[1], modulus32[7], k, r[7]); 265 | umaal96(output[6], carry[0], carry[1], modulus32[8], k, r[8]); 266 | umaal96(output[7], carry[0], carry[1], modulus32[9], k, r[9]); 267 | umaal96(output[8], carry[0], carry[1], modulus32[10], k, r[10]); 268 | umaal96(output[9], carry[0], carry[1], modulus32[11], k, r[11]); 269 | add_2_2_1(&output[10], &r[12], carry, altcarry); 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /bls12_377/src/fp_mont.h: -------------------------------------------------------------------------------- 1 | #ifndef FPC_H 2 | #define FPC_H 3 | 4 | #include 5 | 6 | #if defined(__cplusplus) || defined(c_plusplus) 7 | extern "C" { 8 | #endif 9 | 10 | // Montgomery reduction function 11 | 12 | void fp_redc(uint32_t *, uint32_t *); 13 | 14 | #if defined(__cplusplus) || defined(c_plusplus) 15 | } 16 | #endif 17 | 18 | #endif // FPC_H 19 | -------------------------------------------------------------------------------- /bls12_377/src/fpc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define uint128_t __uint128_t 6 | 7 | #define restrict 8 | 9 | inline 10 | void umaal96(uint32_t& restrict o0, 11 | uint32_t& restrict o1, 12 | uint32_t& restrict o2, 13 | uint32_t a, 14 | uint64_t b) { 15 | // multiply a32 * b64 and accumulate in the 96 bit value stored in [o0, o1, o2] 16 | // o0 is the existing 32 bit value 17 | // o1,o2 is the 64 bit carry 18 | #if __arm__ 19 | const uint32_t b0 = (uint32_t) b; 20 | const uint32_t b1 = (uint32_t) (b>>32); 21 | #ifdef HAVE_UMAAL 22 | asm ( 23 | "UMAAL %[o0], %[o1], %[a0], %[b0]" 24 | : [o0] "+r" (o0), 25 | [o1] "+r" (o1) 26 | : [a0] "r" (a), 27 | [b0] "r" (b0) 28 | ); 29 | 30 | asm ( 31 | "UMAAL %[o1], %[o2], %[a0], %[b1]" 32 | : [o1] "+r" (o1), 33 | [o2] "+r" (o2) 34 | : [a0] "r" (a), 35 | [b1] "r" (b1) 36 | ); 37 | #else 38 | asm ( 39 | "ADDS %[o0], %[o1]\n\t" 40 | "MOV %[o1], #0\n\t" 41 | "UMLAL %[o0], %[o1], %[a0], %[b0]\n\t" 42 | "ADCS %[o1], %[o2]\n\t" 43 | "MOV %[o2], #0\n\t" 44 | "ADC %[o2], #0\n\t" 45 | "UMLAL %[o1], %[o2], %[a0], %[b1]\n\t" 46 | : [o0] "+r" (o0), 47 | [o1] "+r" (o1), 48 | [o2] "+r" (o2) 49 | : [a0] "r" (a), 50 | [b0] "r" (b0), 51 | [b1] "r" (b1) 52 | ); 53 | #endif 54 | #else 55 | uint128_t ret = ((uint128_t)a * (uint128_t)b) + (uint128_t)o0 + (uint128_t)o1 + + (((uint128_t)o2)<<32); 56 | o0 = (uint32_t)ret; 57 | o1 = (uint32_t)(ret >> 32); 58 | o2 = (uint32_t)(ret >> 64); 59 | #endif 60 | } 61 | 62 | inline 63 | void umaal96(uint32_t& restrict o0, 64 | uint32_t& restrict o1, 65 | uint32_t& restrict o2, 66 | uint32_t a, 67 | uint64_t b, 68 | uint32_t c) 69 | { 70 | o0 = c; 71 | umaal96(o0, o1, o2, a, b); 72 | } 73 | 74 | 75 | inline 76 | void umaal96(uint32_t& restrict o0, 77 | uint32_t& restrict o1, 78 | uint32_t& restrict o2, 79 | uint32_t a, 80 | uint64_t b, 81 | uint32_t c, 82 | uint32_t d) 83 | { 84 | o0 = c; 85 | o1 = d; 86 | o2 = 0; 87 | umaal96(o0, o1, o2, a, b); 88 | } 89 | 90 | inline 91 | void umull96(uint32_t& restrict o0, 92 | uint32_t& restrict o1, 93 | uint32_t& restrict o2, 94 | uint32_t a, 95 | uint64_t b) { 96 | o0 = 0; 97 | o1 = 0; 98 | o2 = 0; 99 | umaal96(o0, o1, o2, a, b); 100 | } 101 | 102 | 103 | inline 104 | void umlal96(uint32_t& restrict o0, 105 | uint32_t& restrict o1, 106 | uint32_t& restrict o2, 107 | uint32_t a, 108 | uint64_t b) { 109 | o1 = 0; 110 | o2 = 0; 111 | umaal96(o0, o1, o2, a, b); 112 | } 113 | 114 | #if __arm__ 115 | // not needed 116 | #else 117 | 118 | inline 119 | uint64_t add32(uint32_t* output, const uint32_t* left, const uint32_t* right, int n) { 120 | uint64_t carry = 0; 121 | for(int i=0; i> 32; 125 | } 126 | return carry; 127 | } 128 | 129 | inline 130 | uint64_t add32(uint32_t* output, const uint32_t* a, const uint32_t* b, const uint32_t* c, int n) { 131 | uint64_t carry = 0; 132 | for(int i=0; i> 32; 136 | } 137 | return carry; 138 | } 139 | 140 | #endif 141 | 142 | inline 143 | uint32_t acc_2_2_1(uint32_t* restrict output, const uint32_t* restrict b, uint32_t c0) { 144 | #if __arm__ 145 | uint32_t carry = 0; 146 | uint32_t t0 = output[0]; 147 | uint32_t t1 = output[1]; 148 | uint32_t b0 = b[0]; 149 | uint32_t b1 = b[1]; 150 | asm ( 151 | "ADDS %[t0], %[b0]\n\t" 152 | "ADCS %[t1], %[b1]\n\t" 153 | "ADC %[carry], #0" 154 | : [carry] "+r" (carry), 155 | [t0] "+r" (t0), 156 | [t1] "+r" (t1) 157 | : [b0] "r" (b0), 158 | [b1] "r" (b1) 159 | ); 160 | asm ( 161 | "ADDS %[t0], %[c0]\n\t" 162 | "ADCS %[t1], #0\n\t" 163 | "ADC %[carry], #0" 164 | : [carry] "+r" (carry), 165 | [t0] "+r" (t0), 166 | [t1] "+r" (t1) 167 | : [c0] "r" (c0) 168 | ); 169 | output[0] = t0; 170 | output[1] = t1; 171 | return carry; 172 | #else 173 | uint32_t _a[] = {output[0], output[1]}; 174 | uint32_t _b[] = {b[0], b[1]}; 175 | uint32_t _c[] = {c0, 0}; 176 | return add32(output, _a, _b, _c, 2); 177 | #endif 178 | } 179 | 180 | inline 181 | void add_2_2_1(uint32_t* restrict output, uint32_t* restrict a, const uint32_t* restrict b, uint32_t c0) { 182 | #if __arm__ 183 | uint32_t t0 = 0; 184 | uint32_t t1 = 0; 185 | uint32_t a0 = a[0]; 186 | uint32_t a1 = a[1]; 187 | uint32_t b0 = b[0]; 188 | uint32_t b1 = b[1]; 189 | asm ( 190 | "ADDS %[t0], %[a0], %[b0]\n\t" 191 | "ADC %[t1], %[a1], %[b1]\n\t" 192 | : [t0] "+r" (t0), 193 | [t1] "+r" (t1) 194 | : [a0] "r" (a0), 195 | [a1] "r" (a1), 196 | [b0] "r" (b0), 197 | [b1] "r" (b1) 198 | ); 199 | asm ( 200 | "ADDS %[t0], %[c0]\n\t" 201 | "ADC %[t1], #0 \n\t" 202 | : [t0] "+r" (t0), 203 | [t1] "+r" (t1) 204 | : [c0] "r" (c0) 205 | ); 206 | output[0] = t0; 207 | output[1] = t1; 208 | #else 209 | uint32_t _a[] = {a[0], a[1]}; 210 | uint32_t _b[] = {b[0], b[1]}; 211 | uint32_t _c[] = {c0, 0}; 212 | add32(output, _a, _b, _c, 2); 213 | #endif 214 | } 215 | 216 | inline 217 | void mul_hybrid(uint32_t* restrict output, const uint64_t* restrict left, const uint32_t* restrict right) { 218 | register uint32_t carry0; 219 | register uint32_t carry1; 220 | uint32_t o0; 221 | uint32_t o1; 222 | uint32_t o2; 223 | uint32_t o3; 224 | uint32_t o4; 225 | uint32_t o5; 226 | uint32_t o6; 227 | uint32_t o7; 228 | uint32_t o8; 229 | uint32_t o9; 230 | uint32_t o10; 231 | uint32_t o11; 232 | 233 | register uint64_t val = left[0]; 234 | umull96(o0, carry0, carry1, right[0], val); 235 | output[0] = o0; 236 | umaal96(o1, carry0, carry1, right[1], val, 0); 237 | output[1] = o1; 238 | umaal96(o2, carry0, carry1, right[2], val, 0); 239 | umaal96(o3, carry0, carry1, right[3], val, 0); 240 | umaal96(o4, carry0, carry1, right[4], val, 0); 241 | umaal96(o5, carry0, carry1, right[5], val, 0); 242 | umaal96(o6, carry0, carry1, right[6], val, 0); 243 | umaal96(o7, carry0, carry1, right[7], val, 0); 244 | umaal96(o8, carry0, carry1, right[8], val, 0); 245 | umaal96(o9, carry0, carry1, right[9], val, 0); 246 | umaal96(o10, carry0, carry1, right[10], val, 0); 247 | umaal96(o11, carry0, carry1, right[11], val, 0); 248 | uint32_t o12 = carry0; 249 | uint32_t o13 = carry1; 250 | 251 | val = left[1]; 252 | umlal96(o2, carry0, carry1, right[0], val); 253 | output[2] = o2; 254 | umaal96(o3, carry0, carry1, right[1], val); 255 | output[3] = o3; 256 | umaal96(o4, carry0, carry1, right[2], val); 257 | umaal96(o5, carry0, carry1, right[3], val); 258 | umaal96(o6, carry0, carry1, right[4], val); 259 | umaal96(o7, carry0, carry1, right[5], val); 260 | umaal96(o8, carry0, carry1, right[6], val); 261 | umaal96(o9, carry0, carry1, right[7], val); 262 | umaal96(o10, carry0, carry1, right[8], val); 263 | umaal96(o11, carry0, carry1, right[9], val); 264 | umaal96(o12, carry0, carry1, right[10], val); 265 | umaal96(o13, carry0, carry1, right[11], val); 266 | uint32_t o14 = carry0; 267 | uint32_t o15 = carry1; 268 | 269 | val = left[2]; 270 | umlal96(o4, carry0, carry1, right[0], val); 271 | output[4] = o4; 272 | umaal96(o5, carry0, carry1, right[1], val); 273 | output[5] = o5; 274 | umaal96(o6, carry0, carry1, right[2], val); 275 | umaal96(o7, carry0, carry1, right[3], val); 276 | umaal96(o8, carry0, carry1, right[4], val); 277 | umaal96(o9, carry0, carry1, right[5], val); 278 | umaal96(o10, carry0, carry1, right[6], val); 279 | umaal96(o11, carry0, carry1, right[7], val); 280 | umaal96(o12, carry0, carry1, right[8], val); 281 | umaal96(o13, carry0, carry1, right[9], val); 282 | umaal96(o14, carry0, carry1, right[10], val); 283 | umaal96(o15, carry0, carry1, right[11], val); 284 | uint32_t o16 = carry0; 285 | uint32_t o17 = carry1; 286 | 287 | 288 | val = left[3]; 289 | umlal96(o6, carry0, carry1, right[0], val); 290 | output[6] = o6; 291 | umaal96(o7, carry0, carry1, right[1], val); 292 | output[7] = o7; 293 | umaal96(o8, carry0, carry1, right[2], val); 294 | umaal96(o9, carry0, carry1, right[3], val); 295 | umaal96(o10, carry0, carry1, right[4], val); 296 | umaal96(o11, carry0, carry1, right[5], val); 297 | umaal96(o12, carry0, carry1, right[6], val); 298 | umaal96(o13, carry0, carry1, right[7], val); 299 | umaal96(o14, carry0, carry1, right[8], val); 300 | umaal96(o15, carry0, carry1, right[9], val); 301 | umaal96(o16, carry0, carry1, right[10], val); 302 | umaal96(o17, carry0, carry1, right[11], val); 303 | uint32_t o18 = carry0; 304 | uint32_t o19 = carry1; 305 | 306 | val = left[4]; 307 | umlal96(o8, carry0, carry1, right[0], val); 308 | output[8] = o8; 309 | umaal96(o9, carry0, carry1, right[1], val); 310 | output[9] = o9; 311 | umaal96(o10, carry0, carry1, right[2], val); 312 | umaal96(o11, carry0, carry1, right[3], val); 313 | umaal96(o12, carry0, carry1, right[4], val); 314 | umaal96(o13, carry0, carry1, right[5], val); 315 | umaal96(o14, carry0, carry1, right[6], val); 316 | umaal96(o15, carry0, carry1, right[7], val); 317 | umaal96(o16, carry0, carry1, right[8], val); 318 | umaal96(o17, carry0, carry1, right[9], val); 319 | umaal96(o18, carry0, carry1, right[10], val); 320 | umaal96(o19, carry0, carry1, right[11], val); 321 | uint32_t o20 = carry0; 322 | uint32_t o21 = carry1; 323 | 324 | val = left[5]; 325 | umlal96(o10, carry0, carry1, right[0], val); 326 | output[10] = o10; 327 | umaal96(o11, carry0, carry1, right[1], val); 328 | output[11] = o11; 329 | umaal96(o12, carry0, carry1, right[2], val); 330 | output[12] = o12; 331 | umaal96(o13, carry0, carry1, right[3], val); 332 | output[13] = o13; 333 | umaal96(o14, carry0, carry1, right[4], val); 334 | output[14] = o14; 335 | umaal96(o15, carry0, carry1, right[5], val); 336 | output[15] = o15; 337 | umaal96(o16, carry0, carry1, right[6], val); 338 | output[16] = o16; 339 | umaal96(o17, carry0, carry1, right[7], val); 340 | output[17] = o17; 341 | umaal96(o18, carry0, carry1, right[8], val); 342 | output[18] = o18; 343 | umaal96(o19, carry0, carry1, right[9], val); 344 | output[19] = o19; 345 | umaal96(o20, carry0, carry1, right[10], val); 346 | output[20] = o20; 347 | umaal96(o21, carry0, carry1, right[11], val); 348 | output[21] = o21; 349 | output[22] = carry0; 350 | output[23] = carry1; 351 | } 352 | 353 | void montgomery_reduce(uint32_t* restrict output, uint32_t* t) { 354 | const static uint64_t inv = 9586122913090633727ull; 355 | const static uint64_t modulus32[12] = { 356 | 0x00000001, 0x8508c000, 357 | 0x30000000, 0x170b5d44, 358 | 0xba094800, 0x1ef3622f, 359 | 0x00f5138f, 0x1a22d9f3, 360 | 0x6ca1493b, 0xc63b05c0, 361 | 0x17c510ea, 0x01ae3a46, 362 | }; 363 | register uint32_t altcarry = 0; 364 | 365 | for(int i=0; i<5; ++i){ 366 | uint32_t* r = t + 2*i; 367 | register uint64_t k = *(uint64_t*)r * inv; 368 | register uint32_t carry[2] = {0}; 369 | uint32_t _; 370 | 371 | umaal96(_, carry[0], carry[1], modulus32[0], k, r[0]); 372 | umaal96(_, carry[0], carry[1], modulus32[1], k, r[1]); 373 | umaal96(r[2], carry[0], carry[1], modulus32[2], k); 374 | umaal96(r[3], carry[0], carry[1], modulus32[3], k); 375 | umaal96(r[4], carry[0], carry[1], modulus32[4], k); 376 | umaal96(r[5], carry[0], carry[1], modulus32[5], k); 377 | umaal96(r[6], carry[0], carry[1], modulus32[6], k); 378 | umaal96(r[7], carry[0], carry[1], modulus32[7], k); 379 | umaal96(r[8], carry[0], carry[1], modulus32[8], k); 380 | umaal96(r[9], carry[0], carry[1], modulus32[9], k); 381 | umaal96(r[10], carry[0], carry[1], modulus32[10], k); 382 | umaal96(r[11], carry[0], carry[1], modulus32[11], k); 383 | altcarry = acc_2_2_1(&r[12], carry, altcarry); 384 | } 385 | 386 | { 387 | uint32_t* r = t + 10; 388 | uint64_t k = *(uint64_t*)r * inv; 389 | uint32_t carry[2] = {0}; 390 | uint32_t _; 391 | 392 | umaal96(_, carry[0], carry[1], modulus32[0], k, r[0]); 393 | umaal96(_, carry[0], carry[1], modulus32[1], k, r[1]); 394 | umaal96(output[0], carry[0], carry[1], modulus32[2], k, r[2]); 395 | umaal96(output[1], carry[0], carry[1], modulus32[3], k, r[3]); 396 | umaal96(output[2], carry[0], carry[1], modulus32[4], k, r[4]); 397 | umaal96(output[3], carry[0], carry[1], modulus32[5], k, r[5]); 398 | umaal96(output[4], carry[0], carry[1], modulus32[6], k, r[6]); 399 | umaal96(output[5], carry[0], carry[1], modulus32[7], k, r[7]); 400 | umaal96(output[6], carry[0], carry[1], modulus32[8], k, r[8]); 401 | umaal96(output[7], carry[0], carry[1], modulus32[9], k, r[9]); 402 | umaal96(output[8], carry[0], carry[1], modulus32[10], k, r[10]); 403 | umaal96(output[9], carry[0], carry[1], modulus32[11], k, r[11]); 404 | add_2_2_1(&output[10], &r[12], carry, altcarry); 405 | } 406 | } 407 | 408 | extern "C" void c_mul(uint64_t* restrict output, const uint64_t* restrict left, const uint64_t* restrict right) { 409 | mul_hybrid((uint32_t*)output, left, (const uint32_t*)right); 410 | } 411 | 412 | extern "C" void c_montgomry(uint64_t* restrict output, uint64_t* restrict tmp) { 413 | montgomery_reduce((uint32_t*)output, (uint32_t*)tmp); 414 | } 415 | -------------------------------------------------------------------------------- /bls12_377/src/fq_asm.s: -------------------------------------------------------------------------------- 1 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2 | @ 3 | @ Low-level operations on Fq values 4 | @ 5 | @ Each Fq value is stored as a word-aligned 8-word array 6 | @ 7 | @ All functions work correctly with repeated arguments, 8 | @ like e.g. fq_sum(x, x, x) 9 | @ 10 | @ All functions should take constant time on ARM SC300 11 | @ 12 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 13 | 14 | .text 15 | 16 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 17 | @ 18 | @ fq_is_zero 19 | @ 20 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 21 | 22 | .align 3 23 | .global fq_is_zero 24 | .syntax unified 25 | .thumb 26 | .thumb_func 27 | .type fq_is_zero, %function 28 | 29 | fq_is_zero: 30 | ldr r1, [r0, #0] 31 | 32 | ldr r2, [r0, #4]; ldr r3, [r0, #8]; orr r1, r2; orr r1, r3 33 | ldr r2, [r0, #12]; ldr r3, [r0, #16]; orr r1, r2; orr r1, r3 34 | ldr r2, [r0, #20]; ldr r3, [r0, #24]; orr r1, r2; orr r1, r3 35 | 36 | ldr r2, [r0, #20]; orrs r1, r2 37 | 38 | ite eq 39 | moveq r0, #1 40 | movne r0, #0 41 | 42 | bx lr 43 | 44 | .size fq_is_zero, . - fq_is_zero 45 | 46 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 47 | @ 48 | @ fq_cpy: Copy 49 | @ 50 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 51 | 52 | .align 3 53 | .global fq_cpy 54 | .syntax unified 55 | .thumb 56 | .thumb_func 57 | .type fq_cpy, %function 58 | 59 | fq_cpy: 60 | ldr r2, [r1, #0]; ldr r3, [r1, #4] 61 | str r2, [r0, #0]; str r3, [r0, #4] 62 | 63 | ldr r2, [r1, #8]; ldr r3, [r1, #12] 64 | str r2, [r0, #8]; str r3, [r0, #12] 65 | 66 | ldr r2, [r1, #16]; ldr r3, [r1, #20] 67 | str r2, [r0, #16]; str r3, [r0, #20] 68 | 69 | ldr r2, [r1, #24]; ldr r3, [r1, #28] 70 | str r2, [r0, #24]; str r3, [r0, #28] 71 | 72 | bx lr 73 | 74 | .size fq_cpy, . - fq_cpy 75 | 76 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 77 | @ 78 | @ fq_eq: Check two Fq values for equality 79 | @ 80 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 81 | 82 | .align 3 83 | .global fq_eq 84 | .syntax unified 85 | .thumb 86 | .thumb_func 87 | .type fq_eq, %function 88 | 89 | fq_eq: 90 | sub sp, #4 91 | 92 | ldr r2, [r0, #0] 93 | ldr r3, [r1, #0] 94 | str r4, [sp] 95 | sub r2, r3 96 | 97 | ldr r3, [r0, #4]; ldr r4, [r1, #4]; sub r3, r4; orr r2, r3 98 | ldr r3, [r0, #8]; ldr r4, [r1, #8]; sub r3, r4; orr r2, r3 99 | ldr r3, [r0, #12]; ldr r4, [r1, #12]; sub r3, r4; orr r2, r3 100 | ldr r3, [r0, #16]; ldr r4, [r1, #16]; sub r3, r4; orr r2, r3 101 | ldr r3, [r0, #20]; ldr r4, [r1, #20]; sub r3, r4; orr r2, r3 102 | ldr r3, [r0, #24]; ldr r4, [r1, #24]; sub r3, r4; orr r2, r3 103 | 104 | ldr r4, [sp] 105 | 106 | ldr r0, [r0, #28]; ldr r1, [r1, #28]; sub r0, r1; orrs r0, r2 107 | 108 | ite eq 109 | moveq r0, #1 110 | movne r0, #0 111 | 112 | add sp, #4 113 | 114 | bx lr 115 | 116 | .size fq_eq, . - fq_eq 117 | 118 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 119 | @ 120 | @ fq_neg: Negate an Fq value 121 | @ 122 | @ x = (y != 0) ? (q - y) : 0 123 | @ 124 | @ x = (y == 0) ? q : y 125 | @ x = -x 126 | @ x += q 127 | @ 128 | @ Note: -x == ~x + 1 129 | @ 130 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 131 | 132 | .align 3 133 | .global fq_neg 134 | .syntax unified 135 | .thumb 136 | .thumb_func 137 | .type fq_neg, %function 138 | 139 | fq_neg: 140 | push { r4-r8, lr } 141 | ldm r1, { r1-r8 } 142 | 143 | @ lr = (y == 0) 144 | 145 | orr lr, r1, r2 146 | orr lr, r3 147 | orr lr, r4 148 | orr lr, r5 149 | orr lr, r6 150 | orr lr, r7 151 | orrs lr, r8 @ Z = (lr == 0) ? 1 : 0 152 | 153 | @ x = Z ? q : y 154 | 155 | itttt eq 156 | moveq r1, #0x00000001 157 | moveq r2, #0x8000 158 | movteq r2, #0x0A11 159 | moveq r3, #0x0001 160 | 161 | itttt eq 162 | movteq r3, #0xD000 163 | moveq r4, #0x76FE 164 | movteq r4, #0x59AA 165 | moveq r5, #0xB001 166 | 167 | itttt eq 168 | movteq r5, #0x5C37 169 | moveq r6, #0x4D1E 170 | movteq r6, #0x60B4 171 | moveq r7, #0xA556 172 | 173 | ittt eq 174 | movteq r7, #0x9A2C 175 | moveq r8, #0x655E 176 | movteq r8, #0x12AB 177 | 178 | @ x = ~x 179 | 180 | mvn r1, r1 181 | mvn r2, r2 182 | mvn r3, r3 183 | mvn r4, r4 184 | mvn r5, r5 185 | mvn r6, r6 186 | mvn r7, r7 187 | mvn r8, r8 188 | 189 | @ x += 1 190 | 191 | adds r1, #1 192 | adcs r2, #0 193 | adcs r3, #0 194 | adcs r4, #0 195 | adcs r5, #0 196 | adcs r6, #0 197 | adcs r7, #0 198 | adc r8, #0 199 | 200 | @ x += q 201 | 202 | adds r1, #0x00000001 203 | 204 | mov lr, #0x8000; movt lr, #0x0A11; adcs r2, lr 205 | mov lr, #0x0001; movt lr, #0xD000; adcs r3, lr 206 | mov lr, #0x76FE; movt lr, #0x59AA; adcs r4, lr 207 | mov lr, #0xB001; movt lr, #0x5C37; adcs r5, lr 208 | mov lr, #0x4D1E; movt lr, #0x60B4; adcs r6, lr 209 | mov lr, #0xA556; movt lr, #0x9A2C; adcs r7, lr 210 | mov lr, #0x655E; movt lr, #0x12AB; adcs r8, lr 211 | 212 | stm r0, { r1-r8 } 213 | pop { r4-r8, pc } 214 | 215 | .size fq_neg, . - fq_neg 216 | 217 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 218 | @ 219 | @ fq_diff: x = y - z (mod q) 220 | @ 221 | @ x = y - z 222 | @ store x 223 | @ 224 | @ C = (x < 0); x += q 225 | @ if (C) /* carry, because x was < 0 */ 226 | @ store x 227 | @ 228 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 229 | 230 | .align 3 231 | .global fq_diff 232 | .syntax unified 233 | .thumb 234 | .thumb_func 235 | .type fq_diff, %function 236 | 237 | fq_diff: 238 | push { r4-r11, lr } 239 | 240 | @ x = y - z 241 | 242 | ldr r14, [r1, #0] 243 | ldr r12, [r1, #4] 244 | ldr r11, [r1, #8] 245 | ldr r10, [r1, #12] 246 | ldr r9, [r1, #16] 247 | 248 | ldr r8, [r2, #0] 249 | ldr r7, [r2, #4] 250 | ldr r6, [r2, #8] 251 | ldr r5, [r2, #12] 252 | ldr r4, [r2, #16] 253 | 254 | subs r14, r8 255 | sbcs r12, r7 256 | sbcs r11, r6 257 | sbcs r10, r5 258 | sbcs r9, r4 259 | 260 | ldr r8, [r1, #20] 261 | ldr r7, [r1, #24] 262 | ldr r6, [r1, #28] 263 | 264 | ldr r5, [r2, #20] 265 | ldr r4, [r2, #24] 266 | ldr r3, [r2, #28] 267 | 268 | str r14, [r0, #0] @ store early (0 cycles) 269 | 270 | sbcs r8, r5 271 | sbcs r7, r4 272 | sbcs r6, r3 273 | 274 | str r12, [r0, #4] 275 | str r11, [r0, #8] 276 | str r10, [r0, #12] 277 | str r9, [r0, #16] 278 | str r8, [r0, #20] 279 | str r7, [r0, #24] 280 | str r6, [r0, #28] 281 | 282 | @ x += q, using r1 as temporary, x in { r14, r12-r6 } 283 | 284 | adds r14, #0x00000001 285 | 286 | mov r1, #0x8000; movt r1, #0x0A11; adcs r12, r1 287 | mov r1, #0x0001; movt r1, #0xD000; adcs r11, r1 288 | mov r1, #0x76FE; movt r1, #0x59AA; adcs r10, r1 289 | mov r1, #0xB001; movt r1, #0x5C37; adcs r9, r1 290 | mov r1, #0x4D1E; movt r1, #0x60B4; adcs r8, r1 291 | mov r1, #0xA556; movt r1, #0x9A2C; adcs r7, r1 292 | mov r1, #0x655E; movt r1, #0x12AB; adcs r6, r1 293 | 294 | itttt cs @ carry set => x was negative => we need to store x 295 | strcs r14, [r0, #0] 296 | strcs r12, [r0, #4] 297 | strcs r11, [r0, #8] 298 | strcs r10, [r0, #12] 299 | itttt cs 300 | strcs r9, [r0, #16] 301 | strcs r8, [r0, #20] 302 | strcs r7, [r0, #24] 303 | strcs r6, [r0, #28] 304 | 305 | pop { r4-r11, pc } 306 | 307 | .size fq_diff, . - fq_diff 308 | 309 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 310 | @ 311 | @ fq_sum: x = y + z (mod q) 312 | @ 313 | @ x = y + z 314 | @ store x 315 | @ 316 | @ C = (x >= q); x -= q 317 | @ if (C) 318 | @ store x 319 | @ 320 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 321 | 322 | .align 3 323 | .global fq_sum 324 | .syntax unified 325 | .thumb 326 | .thumb_func 327 | .type fq_sum, %function 328 | 329 | fq_sum: 330 | push { r4-r11, lr } 331 | 332 | @ x = y + z 333 | 334 | ldr r14, [r1, #0] 335 | ldr r12, [r1, #4] 336 | ldr r11, [r1, #8] 337 | ldr r10, [r1, #12] 338 | ldr r9, [r1, #16] 339 | 340 | ldr r8, [r2, #0] 341 | ldr r7, [r2, #4] 342 | ldr r6, [r2, #8] 343 | ldr r5, [r2, #12] 344 | ldr r4, [r2, #16] 345 | 346 | adds r14, r8 347 | adcs r12, r7 348 | adcs r11, r6 349 | adcs r10, r5 350 | adcs r9, r4 351 | 352 | ldr r8, [r1, #20] 353 | ldr r7, [r1, #24] 354 | ldr r6, [r1, #28] 355 | 356 | ldr r5, [r2, #20] 357 | ldr r4, [r2, #24] 358 | ldr r3, [r2, #28] 359 | 360 | str r14, [r0, #0] @ store early (0 cycles) 361 | 362 | adcs r8, r5 363 | adcs r7, r4 364 | adcs r6, r3 365 | 366 | str r12, [r0, #4] 367 | str r11, [r0, #8] 368 | str r10, [r0, #12] 369 | str r9, [r0, #16] 370 | str r8, [r0, #20] 371 | str r7, [r0, #24] 372 | str r6, [r0, #28] 373 | 374 | @ x -= q, using r1 as temporary, x in { r14, r12-r6 } 375 | 376 | subs r14, #0x00000001 377 | 378 | mov r1, #0x8000; movt r1, #0x0A11; sbcs r12, r1 379 | mov r1, #0x0001; movt r1, #0xD000; sbcs r11, r1 380 | mov r1, #0x76FE; movt r1, #0x59AA; sbcs r10, r1 381 | mov r1, #0xB001; movt r1, #0x5C37; sbcs r9, r1 382 | mov r1, #0x4D1E; movt r1, #0x60B4; sbcs r8, r1 383 | mov r1, #0xA556; movt r1, #0x9A2C; sbcs r7, r1 384 | mov r1, #0x655E; movt r1, #0x12AB; sbcs r6, r1 385 | 386 | itttt cs @ carry set == no borrow => x was >= q => we need to store x 387 | strcs r14, [r0, #0] 388 | strcs r12, [r0, #4] 389 | strcs r11, [r0, #8] 390 | strcs r10, [r0, #12] 391 | itttt cs 392 | strcs r9, [r0, #16] 393 | strcs r8, [r0, #20] 394 | strcs r7, [r0, #24] 395 | strcs r6, [r0, #28] 396 | 397 | pop { r4-r11, pc } 398 | 399 | .size fq_sum, . - fq_sum 400 | 401 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 402 | @ 403 | @ fq_cset: x = c ? y : x 404 | @ 405 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 406 | 407 | .align 3 408 | .global fq_cset 409 | .syntax unified 410 | .thumb 411 | .thumb_func 412 | .type fq_cset, %function 413 | 414 | fq_cset: 415 | movs r2, r2 416 | 417 | ldr r2, [r1, #0] 418 | ldr r3, [r1, #4] 419 | itt ne 420 | strne r2, [r0, #0] 421 | strne r3, [r0, #4] 422 | 423 | ldr r2, [r1, #8] 424 | ldr r3, [r1, #12] 425 | itt ne 426 | strne r2, [r0, #8] 427 | strne r3, [r0, #12] 428 | 429 | ldr r2, [r1, #16] 430 | ldr r3, [r1, #20] 431 | itt ne 432 | strne r2, [r0, #16] 433 | strne r3, [r0, #20] 434 | 435 | ldr r2, [r1, #24] 436 | ldr r3, [r1, #28] 437 | itt ne 438 | strne r2, [r0, #24] 439 | strne r3, [r0, #28] 440 | 441 | bx lr 442 | 443 | .size fq_cset, . - fq_cset 444 | 445 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 446 | @ 447 | @ fq_to_bytes 448 | @ 449 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 450 | 451 | .align 3 452 | .global fq_to_bytes 453 | .syntax unified 454 | .thumb 455 | .thumb_func 456 | .type fq_to_bytes, %function 457 | 458 | fq_to_bytes: 459 | ldr r2, [r1, #28] 460 | strb r2, [r0, #3]; lsrs r2, #8 461 | strb r2, [r0, #2]; lsrs r2, #8 462 | strb r2, [r0, #1]; lsrs r2, #8 463 | strb r2, [r0, #0]; ldr r2, [r1, #24] 464 | 465 | strb r2, [r0, #7]; lsrs r2, #8 466 | strb r2, [r0, #6]; lsrs r2, #8 467 | strb r2, [r0, #5]; lsrs r2, #8 468 | strb r2, [r0, #4]; ldr r2, [r1, #20] 469 | 470 | strb r2, [r0, #11]; lsrs r2, #8 471 | strb r2, [r0, #10]; lsrs r2, #8 472 | strb r2, [r0, #9]; lsrs r2, #8 473 | strb r2, [r0, #8]; ldr r2, [r1, #16] 474 | 475 | strb r2, [r0, #15]; lsrs r2, #8 476 | strb r2, [r0, #14]; lsrs r2, #8 477 | strb r2, [r0, #13]; lsrs r2, #8 478 | strb r2, [r0, #12]; ldr r2, [r1, #12] 479 | 480 | strb r2, [r0, #19]; lsrs r2, #8 481 | strb r2, [r0, #18]; lsrs r2, #8 482 | strb r2, [r0, #17]; lsrs r2, #8 483 | strb r2, [r0, #16]; ldr r2, [r1, #8] 484 | 485 | strb r2, [r0, #23]; lsrs r2, #8 486 | strb r2, [r0, #22]; lsrs r2, #8 487 | strb r2, [r0, #21]; lsrs r2, #8 488 | strb r2, [r0, #20]; ldr r2, [r1, #4] 489 | 490 | strb r2, [r0, #27]; lsrs r2, #8 491 | strb r2, [r0, #26]; lsrs r2, #8 492 | strb r2, [r0, #25]; lsrs r2, #8 493 | strb r2, [r0, #24]; ldr r2, [r1, #0] 494 | 495 | strb r2, [r0, #31]; lsrs r2, #8 496 | strb r2, [r0, #30]; lsrs r2, #8 497 | strb r2, [r0, #29]; lsrs r2, #8 498 | strb r2, [r0, #28] 499 | 500 | bx lr 501 | 502 | .size fq_to_bytes, . - fq_to_bytes 503 | 504 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 505 | @ 506 | @ fq_from_bytes 507 | @ 508 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 509 | 510 | .align 3 511 | .global fq_from_bytes 512 | .syntax unified 513 | .thumb 514 | .thumb_func 515 | .type fq_from_bytes, %function 516 | 517 | fq_from_bytes: 518 | ldrb r2, [r1, #0]; strb r2, [r0, #31] 519 | ldrb r2, [r1, #1]; strb r2, [r0, #30] 520 | ldrb r2, [r1, #2]; strb r2, [r0, #29] 521 | ldrb r2, [r1, #3]; strb r2, [r0, #28] 522 | 523 | ldrb r2, [r1, #4]; strb r2, [r0, #27] 524 | ldrb r2, [r1, #5]; strb r2, [r0, #26] 525 | ldrb r2, [r1, #6]; strb r2, [r0, #25] 526 | ldrb r2, [r1, #7]; strb r2, [r0, #24] 527 | 528 | ldrb r2, [r1, #8]; strb r2, [r0, #23] 529 | ldrb r2, [r1, #9]; strb r2, [r0, #22] 530 | ldrb r2, [r1, #10]; strb r2, [r0, #21] 531 | ldrb r2, [r1, #11]; strb r2, [r0, #20] 532 | 533 | ldrb r2, [r1, #12]; strb r2, [r0, #19] 534 | ldrb r2, [r1, #13]; strb r2, [r0, #18] 535 | ldrb r2, [r1, #14]; strb r2, [r0, #17] 536 | ldrb r2, [r1, #15]; strb r2, [r0, #16] 537 | 538 | ldrb r2, [r1, #16]; strb r2, [r0, #15] 539 | ldrb r2, [r1, #17]; strb r2, [r0, #14] 540 | ldrb r2, [r1, #18]; strb r2, [r0, #13] 541 | ldrb r2, [r1, #19]; strb r2, [r0, #12] 542 | 543 | ldrb r2, [r1, #20]; strb r2, [r0, #11] 544 | ldrb r2, [r1, #21]; strb r2, [r0, #10] 545 | ldrb r2, [r1, #22]; strb r2, [r0, #9] 546 | ldrb r2, [r1, #23]; strb r2, [r0, #8] 547 | 548 | ldrb r2, [r1, #24]; strb r2, [r0, #7] 549 | ldrb r2, [r1, #25]; strb r2, [r0, #6] 550 | ldrb r2, [r1, #26]; strb r2, [r0, #5] 551 | ldrb r2, [r1, #27]; strb r2, [r0, #4] 552 | 553 | ldrb r2, [r1, #28]; strb r2, [r0, #3] 554 | ldrb r2, [r1, #29]; strb r2, [r0, #2] 555 | ldrb r2, [r1, #30]; strb r2, [r0, #1] 556 | ldrb r2, [r1, #31]; strb r2, [r0, #0] 557 | 558 | bx lr 559 | 560 | .size fq_from_bytes, . - fq_from_bytes 561 | 562 | -------------------------------------------------------------------------------- /bls12_377/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # `bls12_377` 2 | //! 3 | //! This crate provides an implementation of the BLS12-377 pairing-friendly elliptic 4 | //! curve construction. 5 | //! 6 | //! * **This implementation has not been reviewed or audited. Use at your own risk.** 7 | //! * This implementation targets Rust `1.36` or later. 8 | //! * This implementation does not require the Rust standard library. 9 | //! * All operations are constant time unless explicitly noted. 10 | 11 | #![no_std] 12 | #![deny(missing_debug_implementations)] 13 | #![allow(clippy::too_many_arguments)] 14 | #![allow(clippy::unreadable_literal)] 15 | #![allow(clippy::many_single_char_names)] 16 | // This lint is described at 17 | // https://rust-lang.github.io/rust-clippy/master/index.html#suspicious_arithmetic_impl 18 | // In our library, some of the arithmetic involving extension fields will necessarily 19 | // involve various binary operators, and so this lint is triggered unnecessarily. 20 | #![allow(clippy::suspicious_arithmetic_impl)] 21 | 22 | #[cfg(test)] 23 | #[macro_use] 24 | extern crate std; 25 | 26 | #[macro_use] 27 | pub mod util; 28 | 29 | mod scalar; 30 | 31 | pub use scalar::Scalar; 32 | 33 | #[cfg(feature = "groups")] 34 | pub mod fp; 35 | #[cfg(feature = "groups")] 36 | mod fp2; 37 | #[cfg(feature = "groups")] 38 | mod g1; 39 | #[cfg(feature = "groups")] 40 | mod g2; 41 | 42 | #[cfg(feature = "groups")] 43 | pub use g1::{G1Affine, G1Projective}; 44 | #[cfg(feature = "groups")] 45 | pub use g2::{G2Affine, G2Projective}; 46 | 47 | // TODO: This should be upstreamed to subtle. 48 | // See https://github.com/dalek-cryptography/subtle/pull/48 49 | trait CtOptionExt { 50 | /// Calls f() and either returns self if it contains a value, 51 | /// or returns the output of f() otherwise. 52 | fn or_else subtle::CtOption>(self, f: F) -> subtle::CtOption; 53 | } 54 | 55 | impl CtOptionExt for subtle::CtOption { 56 | fn or_else subtle::CtOption>(self, f: F) -> subtle::CtOption { 57 | let is_none = self.is_none(); 58 | let f = f(); 59 | 60 | subtle::ConditionallySelectable::conditional_select(&self, &f, is_none) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /bls12_377/src/scalar.rs: -------------------------------------------------------------------------------- 1 | //! This module provides an implementation of the BLS12-377 scalar field $\mathbb{F}_q$ 2 | //! where `q = 8444461749428370424248824938781546531375899335154063827935233455917409239041` 3 | 4 | use core::fmt; 5 | use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}; 6 | 7 | use byteorder::{ByteOrder, LittleEndian}; 8 | use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption}; 9 | 10 | use crate::util::{adc, mac, sbb}; 11 | 12 | /// Represents an element of the scalar field $\mathbb{F}_q$ of the BLS12-377 elliptic 13 | /// curve construction. 14 | // The internal representation of this type is four 64-bit unsigned 15 | // integers in little-endian order. `Scalar` values are always in 16 | // Montgomery form; i.e., Scalar(a) = aR mod q, with R = 2^256. 17 | #[derive(Clone, Copy, Eq)] 18 | pub struct Scalar(pub(crate) [u64; 4]); 19 | 20 | impl fmt::Debug for Scalar { 21 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 22 | let tmp = self.to_bytes(); 23 | write!(f, "0x")?; 24 | for &b in tmp.iter().rev() { 25 | write!(f, "{:02x}", b)?; 26 | } 27 | Ok(()) 28 | } 29 | } 30 | 31 | impl From for Scalar { 32 | fn from(val: u64) -> Scalar { 33 | Scalar([val, 0, 0, 0]) * r_squared() 34 | } 35 | } 36 | 37 | impl ConstantTimeEq for Scalar { 38 | fn ct_eq(&self, other: &Self) -> Choice { 39 | self.0[0].ct_eq(&other.0[0]) 40 | & self.0[1].ct_eq(&other.0[1]) 41 | & self.0[2].ct_eq(&other.0[2]) 42 | & self.0[3].ct_eq(&other.0[3]) 43 | } 44 | } 45 | 46 | impl PartialEq for Scalar { 47 | fn eq(&self, other: &Self) -> bool { 48 | self.ct_eq(other).unwrap_u8() == 1 49 | } 50 | } 51 | 52 | impl ConditionallySelectable for Scalar { 53 | fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self { 54 | Scalar([ 55 | u64::conditional_select(&a.0[0], &b.0[0], choice), 56 | u64::conditional_select(&a.0[1], &b.0[1], choice), 57 | u64::conditional_select(&a.0[2], &b.0[2], choice), 58 | u64::conditional_select(&a.0[3], &b.0[3], choice), 59 | ]) 60 | } 61 | } 62 | 63 | /// Constant representing the modulus 64 | /// q = 8444461749428370424248824938781546531375899335154063827935233455917409239041 65 | const fn modulus() -> Scalar { 66 | Scalar([ 67 | 725501752471715841u64, 68 | 6461107452199829505u64, 69 | 6968279316240510977u64, 70 | 1345280370688173398u64, 71 | ]) 72 | } 73 | 74 | impl<'a> Neg for &'a Scalar { 75 | type Output = Scalar; 76 | 77 | fn neg(self) -> Scalar { 78 | self.neg() 79 | } 80 | } 81 | 82 | impl Neg for Scalar { 83 | type Output = Scalar; 84 | 85 | fn neg(self) -> Scalar { 86 | -&self 87 | } 88 | } 89 | 90 | impl<'a, 'b> Sub<&'b Scalar> for &'a Scalar { 91 | type Output = Scalar; 92 | 93 | fn sub(self, rhs: &'b Scalar) -> Scalar { 94 | self.sub(rhs) 95 | } 96 | } 97 | 98 | impl<'a, 'b> Add<&'b Scalar> for &'a Scalar { 99 | type Output = Scalar; 100 | 101 | fn add(self, rhs: &'b Scalar) -> Scalar { 102 | self.add(rhs) 103 | } 104 | } 105 | 106 | impl<'a, 'b> Mul<&'b Scalar> for &'a Scalar { 107 | type Output = Scalar; 108 | 109 | fn mul(self, rhs: &'b Scalar) -> Scalar { 110 | self.mul(rhs) 111 | } 112 | } 113 | 114 | impl_binops_additive!(Scalar, Scalar); 115 | impl_binops_multiplicative!(Scalar, Scalar); 116 | 117 | /// INV = -(q^{-1} mod 2^64) mod 2^64 118 | const fn inv() -> u64 { 119 | 725501752471715839u64 120 | } 121 | 122 | /// R = 2^256 mod q 123 | const fn r() -> Scalar { 124 | Scalar([ 125 | 0x7D1C7FFFFFFFFFF3, 126 | 0x7257F50F6FFFFFF2, 127 | 0x16D81575512C0FEE, 128 | 0xD4BDA322BBB9A9D, 129 | ]) 130 | } 131 | 132 | /// R^2 = 2^512 mod q 133 | #[inline] 134 | const fn r_squared() -> Scalar { 135 | Scalar([ 136 | 0x25D577BAB861857B, 137 | 0xCC2C27B58860591F, 138 | 0xA7CC008FE5DC8593, 139 | 0x11FDAE7EFF1C939, 140 | ]) 141 | } 142 | 143 | /// R^3 = 2^768 mod q 144 | const fn r_cubed() -> Scalar { 145 | Scalar([ 146 | 0x6A4295C90F65454C, 147 | 0x624D23FFAE271699, 148 | 0xB1E55EF6F1C9D713, 149 | 0x601DFA555C48DDA, 150 | ]) 151 | } 152 | 153 | const fn s() -> u32 { 154 | 47 155 | } 156 | 157 | /// GENERATOR^t where t * 2^s + 1 = q 158 | /// with t odd. In other words, this 159 | /// is a 2^s root of unity. 160 | /// 161 | /// `GENERATOR = 7 mod q` is a generator 162 | /// of the q - 1 order multiplicative 163 | /// subgroup. 164 | const fn root_of_unity() -> Scalar { 165 | Scalar([ 166 | 0x3c3d3ca739381fb2, 167 | 0x9a14cda3ec99772b, 168 | 0xd7aacc7c59724826, 169 | 0xd1ba211c5cc349c, 170 | ]) 171 | } 172 | 173 | impl Default for Scalar { 174 | fn default() -> Self { 175 | Self::zero() 176 | } 177 | } 178 | 179 | impl Scalar { 180 | /// Returns zero, the additive identity. 181 | pub const fn zero() -> Scalar { 182 | Scalar([0, 0, 0, 0]) 183 | } 184 | 185 | /// Returns one, the multiplicative identity. 186 | pub const fn one() -> Scalar { 187 | r() 188 | } 189 | 190 | /// Doubles this field element. 191 | pub fn double(&self) -> Scalar { 192 | // TODO: This can be achieved more efficiently with a bitshift. 193 | self.add(self) 194 | } 195 | 196 | /// Attempts to convert a little-endian byte representation of 197 | /// a scalar into a `Scalar`, failing if the input is not canonical. 198 | pub fn from_bytes(bytes: &[u8; 32]) -> CtOption { 199 | let mut tmp = Scalar([0, 0, 0, 0]); 200 | let modulus = modulus(); 201 | 202 | tmp.0[0] = LittleEndian::read_u64(&bytes[0..8]); 203 | tmp.0[1] = LittleEndian::read_u64(&bytes[8..16]); 204 | tmp.0[2] = LittleEndian::read_u64(&bytes[16..24]); 205 | tmp.0[3] = LittleEndian::read_u64(&bytes[24..32]); 206 | 207 | // Try to subtract the modulus 208 | let (_, borrow) = sbb(tmp.0[0], modulus.0[0], 0); 209 | let (_, borrow) = sbb(tmp.0[1], modulus.0[1], borrow); 210 | let (_, borrow) = sbb(tmp.0[2], modulus.0[2], borrow); 211 | let (_, borrow) = sbb(tmp.0[3], modulus.0[3], borrow); 212 | 213 | // If the element is smaller than MODULUS then the 214 | // subtraction will underflow, producing a borrow value 215 | // of 0xffff...ffff. Otherwise, it'll be zero. 216 | let is_some = (borrow as u8) & 1; 217 | 218 | // Convert to Montgomery form by computing 219 | // (a.R^0 * R^2) / R = a.R 220 | tmp *= &r_squared(); 221 | 222 | CtOption::new(tmp, Choice::from(is_some)) 223 | } 224 | 225 | /// Converts an element of `Scalar` into a byte representation in 226 | /// little-endian byte order. 227 | pub fn to_bytes(&self) -> [u8; 32] { 228 | // Turn into canonical form by computing 229 | // (a.R) / R = a 230 | let tmp = Scalar::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0); 231 | 232 | let mut res = [0; 32]; 233 | LittleEndian::write_u64(&mut res[0..8], tmp.0[0]); 234 | LittleEndian::write_u64(&mut res[8..16], tmp.0[1]); 235 | LittleEndian::write_u64(&mut res[16..24], tmp.0[2]); 236 | LittleEndian::write_u64(&mut res[24..32], tmp.0[3]); 237 | 238 | res 239 | } 240 | 241 | /// Converts a 512-bit little endian integer into 242 | /// a `Scalar` by reducing by the modulus. 243 | pub fn from_bytes_wide(bytes: &[u8; 64]) -> Scalar { 244 | Scalar::from_u512([ 245 | LittleEndian::read_u64(&bytes[0..8]), 246 | LittleEndian::read_u64(&bytes[8..16]), 247 | LittleEndian::read_u64(&bytes[16..24]), 248 | LittleEndian::read_u64(&bytes[24..32]), 249 | LittleEndian::read_u64(&bytes[32..40]), 250 | LittleEndian::read_u64(&bytes[40..48]), 251 | LittleEndian::read_u64(&bytes[48..56]), 252 | LittleEndian::read_u64(&bytes[56..64]), 253 | ]) 254 | } 255 | 256 | fn from_u512(limbs: [u64; 8]) -> Scalar { 257 | // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits 258 | // with the higher bits multiplied by 2^256. Thus, we perform two reductions 259 | // 260 | // 1. the lower bits are multiplied by R^2, as normal 261 | // 2. the upper bits are multiplied by R^2 * 2^256 = R^3 262 | // 263 | // and computing their sum in the field. It remains to see that arbitrary 256-bit 264 | // numbers can be placed into Montgomery form safely using the reduction. The 265 | // reduction works so long as the product is less than R=2^256 multipled by 266 | // the modulus. This holds because for any `c` smaller than the modulus, we have 267 | // that (2^256 - 1)*c is an acceptable product for the reduction. Therefore, the 268 | // reduction always works so long as `c` is in the field; in this case it is either the 269 | // constant `R2` or `R3`. 270 | let d0 = Scalar([limbs[0], limbs[1], limbs[2], limbs[3]]); 271 | let d1 = Scalar([limbs[4], limbs[5], limbs[6], limbs[7]]); 272 | // Convert to Montgomery form 273 | d0 * r_squared() + d1 * r_cubed() 274 | } 275 | 276 | /// Converts from an integer represented in little endian 277 | /// into its (congruent) `Scalar` representation. 278 | #[inline] 279 | pub fn from_raw(val: [u64; 4]) -> Self { 280 | (&Scalar(val)).mul(&r_squared()) 281 | } 282 | 283 | /// Squares this element. 284 | pub fn square(&self) -> Scalar { 285 | let (r1, carry) = mac(0, self.0[0], self.0[1], 0); 286 | let (r2, carry) = mac(0, self.0[0], self.0[2], carry); 287 | let (r3, r4) = mac(0, self.0[0], self.0[3], carry); 288 | 289 | let (r3, carry) = mac(r3, self.0[1], self.0[2], 0); 290 | let (r4, r5) = mac(r4, self.0[1], self.0[3], carry); 291 | 292 | let (r5, r6) = mac(r5, self.0[2], self.0[3], 0); 293 | 294 | let r7 = r6 >> 63; 295 | let r6 = (r6 << 1) | (r5 >> 63); 296 | let r5 = (r5 << 1) | (r4 >> 63); 297 | let r4 = (r4 << 1) | (r3 >> 63); 298 | let r3 = (r3 << 1) | (r2 >> 63); 299 | let r2 = (r2 << 1) | (r1 >> 63); 300 | let r1 = r1 << 1; 301 | 302 | let (r0, carry) = mac(0, self.0[0], self.0[0], 0); 303 | let (r1, carry) = adc(0, r1, carry); 304 | let (r2, carry) = mac(r2, self.0[1], self.0[1], carry); 305 | let (r3, carry) = adc(0, r3, carry); 306 | let (r4, carry) = mac(r4, self.0[2], self.0[2], carry); 307 | let (r5, carry) = adc(0, r5, carry); 308 | let (r6, carry) = mac(r6, self.0[3], self.0[3], carry); 309 | let (r7, _) = adc(0, r7, carry); 310 | 311 | Scalar::montgomery_reduce(r0, r1, r2, r3, r4, r5, r6, r7) 312 | } 313 | 314 | /// Computes the square root of this element, if it exists. 315 | pub fn sqrt(&self) -> CtOption { 316 | // Tonelli-Shank's algorithm for q mod 16 = 1 317 | // https://eprint.iacr.org/2012/685.pdf (page 12, algorithm 5) 318 | 319 | // w = self^((t - 1) // 2) 320 | // = self^6104339283789297388802252303364915521546564123189034618274734669823 321 | let w = self.pow_vartime(&[ 322 | 0x7fff2dff7fffffff, 323 | 0x04d0ec02a9ded201, 324 | 0x94cebea4199cec04, 325 | 0x0000000039f6d3a9, 326 | ]); 327 | 328 | let s = s(); 329 | 330 | let mut v = s; 331 | let mut x = self * w; 332 | let mut b = x * w; 333 | 334 | // Initialize z as the 2^S root of unity. 335 | let mut z = root_of_unity(); 336 | 337 | for max_v in (1..=s).rev() { 338 | let mut k = 1; 339 | let mut tmp = b.square(); 340 | let mut j_less_than_v: Choice = 1.into(); 341 | 342 | for j in 2..max_v { 343 | let tmp_is_one = tmp.ct_eq(&Scalar::one()); 344 | let squared = Scalar::conditional_select(&tmp, &z, tmp_is_one).square(); 345 | tmp = Scalar::conditional_select(&squared, &tmp, tmp_is_one); 346 | let new_z = Scalar::conditional_select(&z, &squared, tmp_is_one); 347 | j_less_than_v &= !j.ct_eq(&v); 348 | k = u32::conditional_select(&j, &k, tmp_is_one); 349 | z = Scalar::conditional_select(&z, &new_z, j_less_than_v); 350 | } 351 | 352 | let result = x * z; 353 | x = Scalar::conditional_select(&result, &x, b.ct_eq(&Scalar::one())); 354 | z = z.square(); 355 | b *= z; 356 | v = k; 357 | } 358 | 359 | CtOption::new( 360 | x, 361 | (x * x).ct_eq(self), // Only return Some if it's the square root. 362 | ) 363 | } 364 | 365 | /// Exponentiates `self` by `by`, where `by` is a 366 | /// little-endian order integer exponent. 367 | pub fn pow(&self, by: &[u64; 4]) -> Self { 368 | let mut res = Self::one(); 369 | for e in by.iter().rev() { 370 | for i in (0..64).rev() { 371 | res = res.square(); 372 | let mut tmp = res; 373 | tmp *= self; 374 | res.conditional_assign(&tmp, (((*e >> i) & 0x1) as u8).into()); 375 | } 376 | } 377 | res 378 | } 379 | 380 | /// Exponentiates `self` by `by`, where `by` is a 381 | /// little-endian order integer exponent. 382 | /// 383 | /// **This operation is variable time with respect 384 | /// to the exponent.** If the exponent is fixed, 385 | /// this operation is effectively constant time. 386 | pub fn pow_vartime(&self, by: &[u64; 4]) -> Self { 387 | let mut res = Self::one(); 388 | for e in by.iter().rev() { 389 | for i in (0..64).rev() { 390 | res = res.square(); 391 | 392 | if ((*e >> i) & 1) == 1 { 393 | res.mul_assign(self); 394 | } 395 | } 396 | } 397 | res 398 | } 399 | 400 | #[inline] 401 | fn montgomery_reduce( 402 | r0: u64, 403 | r1: u64, 404 | r2: u64, 405 | r3: u64, 406 | r4: u64, 407 | r5: u64, 408 | r6: u64, 409 | r7: u64, 410 | ) -> Self { 411 | // The Montgomery reduction here is based on Algorithm 14.32 in 412 | // Handbook of Applied Cryptography 413 | // . 414 | 415 | let modulus = modulus(); 416 | let inv = inv(); 417 | let k = r0.wrapping_mul(inv); 418 | let (_, carry) = mac(r0, k, modulus.0[0], 0); 419 | let (r1, carry) = mac(r1, k, modulus.0[1], carry); 420 | let (r2, carry) = mac(r2, k, modulus.0[2], carry); 421 | let (r3, carry) = mac(r3, k, modulus.0[3], carry); 422 | let (r4, carry2) = adc(r4, 0, carry); 423 | 424 | let k = r1.wrapping_mul(inv); 425 | let (_, carry) = mac(r1, k, modulus.0[0], 0); 426 | let (r2, carry) = mac(r2, k, modulus.0[1], carry); 427 | let (r3, carry) = mac(r3, k, modulus.0[2], carry); 428 | let (r4, carry) = mac(r4, k, modulus.0[3], carry); 429 | let (r5, carry2) = adc(r5, carry2, carry); 430 | 431 | let k = r2.wrapping_mul(inv); 432 | let (_, carry) = mac(r2, k, modulus.0[0], 0); 433 | let (r3, carry) = mac(r3, k, modulus.0[1], carry); 434 | let (r4, carry) = mac(r4, k, modulus.0[2], carry); 435 | let (r5, carry) = mac(r5, k, modulus.0[3], carry); 436 | let (r6, carry2) = adc(r6, carry2, carry); 437 | 438 | let k = r3.wrapping_mul(inv); 439 | let (_, carry) = mac(r3, k, modulus.0[0], 0); 440 | let (r4, carry) = mac(r4, k, modulus.0[1], carry); 441 | let (r5, carry) = mac(r5, k, modulus.0[2], carry); 442 | let (r6, carry) = mac(r6, k, modulus.0[3], carry); 443 | let (r7, _) = adc(r7, carry2, carry); 444 | // Result may be within MODULUS of the correct value 445 | (&Scalar([r4, r5, r6, r7])).sub(&modulus) 446 | } 447 | 448 | /// Multiplies `rhs` by `self`, returning the result. 449 | #[inline] 450 | pub fn mul(&self, rhs: &Self) -> Self { 451 | // Schoolbook multiplication 452 | 453 | let (r0, carry) = mac(0, self.0[0], rhs.0[0], 0); 454 | let (r1, carry) = mac(0, self.0[0], rhs.0[1], carry); 455 | let (r2, carry) = mac(0, self.0[0], rhs.0[2], carry); 456 | let (r3, r4) = mac(0, self.0[0], rhs.0[3], carry); 457 | 458 | let (r1, carry) = mac(r1, self.0[1], rhs.0[0], 0); 459 | let (r2, carry) = mac(r2, self.0[1], rhs.0[1], carry); 460 | let (r3, carry) = mac(r3, self.0[1], rhs.0[2], carry); 461 | let (r4, r5) = mac(r4, self.0[1], rhs.0[3], carry); 462 | 463 | let (r2, carry) = mac(r2, self.0[2], rhs.0[0], 0); 464 | let (r3, carry) = mac(r3, self.0[2], rhs.0[1], carry); 465 | let (r4, carry) = mac(r4, self.0[2], rhs.0[2], carry); 466 | let (r5, r6) = mac(r5, self.0[2], rhs.0[3], carry); 467 | 468 | let (r3, carry) = mac(r3, self.0[3], rhs.0[0], 0); 469 | let (r4, carry) = mac(r4, self.0[3], rhs.0[1], carry); 470 | let (r5, carry) = mac(r5, self.0[3], rhs.0[2], carry); 471 | let (r6, r7) = mac(r6, self.0[3], rhs.0[3], carry); 472 | 473 | Scalar::montgomery_reduce(r0, r1, r2, r3, r4, r5, r6, r7) 474 | } 475 | 476 | /// Subtracts `rhs` from `self`, returning the result. 477 | pub fn sub(&self, rhs: &Self) -> Self { 478 | let modulus = modulus(); 479 | let (d0, borrow) = sbb(self.0[0], rhs.0[0], 0); 480 | let (d1, borrow) = sbb(self.0[1], rhs.0[1], borrow); 481 | let (d2, borrow) = sbb(self.0[2], rhs.0[2], borrow); 482 | let (d3, borrow) = sbb(self.0[3], rhs.0[3], borrow); 483 | 484 | // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise 485 | // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the modulus. 486 | let (d0, carry) = adc(d0, modulus.0[0] & borrow, 0); 487 | let (d1, carry) = adc(d1, modulus.0[1] & borrow, carry); 488 | let (d2, carry) = adc(d2, modulus.0[2] & borrow, carry); 489 | let (d3, _) = adc(d3, modulus.0[3] & borrow, carry); 490 | 491 | Scalar([d0, d1, d2, d3]) 492 | } 493 | 494 | /// Adds `rhs` to `self`, returning the result. 495 | pub fn add(&self, rhs: &Self) -> Self { 496 | let (d0, carry) = adc(self.0[0], rhs.0[0], 0); 497 | let (d1, carry) = adc(self.0[1], rhs.0[1], carry); 498 | let (d2, carry) = adc(self.0[2], rhs.0[2], carry); 499 | let (d3, _) = adc(self.0[3], rhs.0[3], carry); 500 | 501 | // Attempt to subtract the modulus, to ensure the value 502 | // is smaller than the modulus. 503 | (&Scalar([d0, d1, d2, d3])).sub(&modulus()) 504 | } 505 | 506 | /// Negates `self`. 507 | pub fn neg(&self) -> Self { 508 | // Subtract `self` from `MODULUS` to negate. Ignore the final 509 | // borrow because it cannot underflow; self is guaranteed to 510 | // be in the field. 511 | let modulus = modulus(); 512 | let (d0, borrow) = sbb(modulus.0[0], self.0[0], 0); 513 | let (d1, borrow) = sbb(modulus.0[1], self.0[1], borrow); 514 | let (d2, borrow) = sbb(modulus.0[2], self.0[2], borrow); 515 | let (d3, _) = sbb(modulus.0[3], self.0[3], borrow); 516 | 517 | // `tmp` could be `MODULUS` if `self` was zero. Create a mask that is 518 | // zero if `self` was zero, and `u64::max_value()` if self was nonzero. 519 | let mask = (((self.0[0] | self.0[1] | self.0[2] | self.0[3]) == 0) as u64).wrapping_sub(1); 520 | 521 | Scalar([d0 & mask, d1 & mask, d2 & mask, d3 & mask]) 522 | } 523 | } 524 | 525 | impl<'a> From<&'a Scalar> for [u8; 32] { 526 | fn from(value: &'a Scalar) -> [u8; 32] { 527 | value.to_bytes() 528 | } 529 | } 530 | 531 | #[test] 532 | fn test_inv() { 533 | // Compute -(q^{-1} mod 2^64) mod 2^64 by exponentiating 534 | // by totient(2**64) - 1 535 | 536 | let true_inv = inv(); 537 | let mut inv = 1u64; 538 | for _ in 0..63 { 539 | inv = inv.wrapping_mul(inv); 540 | inv = inv.wrapping_mul(modulus().0[0]); 541 | } 542 | inv = inv.wrapping_neg(); 543 | 544 | assert_eq!(inv, true_inv); 545 | } 546 | 547 | #[cfg(feature = "std")] 548 | #[test] 549 | fn test_debug() { 550 | assert_eq!( 551 | format!("{:?}", Scalar::zero()), 552 | "0x0000000000000000000000000000000000000000000000000000000000000000" 553 | ); 554 | assert_eq!( 555 | format!("{:?}", Scalar::one()), 556 | "0x0000000000000000000000000000000000000000000000000000000000000001" 557 | ); 558 | assert_eq!( 559 | format!("{:?}", r_squared()), 560 | "0x1824b159acc5056f998c4fefecbc4ff55884b7fa0003480200000001fffffffe" 561 | ); 562 | } 563 | 564 | #[test] 565 | fn test_equality() { 566 | assert_eq!(Scalar::zero(), Scalar::zero()); 567 | assert_eq!(Scalar::one(), Scalar::one()); 568 | assert_eq!(r_squared(), r_squared()); 569 | 570 | assert!(Scalar::zero() != Scalar::one()); 571 | assert!(Scalar::one() != r_squared()); 572 | } 573 | 574 | #[test] 575 | fn test_to_bytes() { 576 | assert_eq!( 577 | Scalar::zero().to_bytes(), 578 | [ 579 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 580 | 0, 0, 0 581 | ] 582 | ); 583 | 584 | assert_eq!( 585 | Scalar::one().to_bytes(), 586 | [ 587 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 588 | 0, 0, 0 589 | ] 590 | ); 591 | 592 | assert_eq!( 593 | r_squared().to_bytes(), 594 | [ 595 | 243, 255, 255, 255, 255, 127, 28, 125, 242, 255, 255, 111, 15, 245, 87, 114, 238, 15, 596 | 44, 81, 117, 21, 216, 22, 157, 154, 187, 43, 50, 218, 75, 13 597 | ] 598 | ); 599 | 600 | assert_eq!( 601 | (-&Scalar::one()).to_bytes(), 602 | [ 603 | 0, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 604 | 96, 86, 165, 44, 154, 94, 101, 171, 18 605 | ] 606 | ); 607 | } 608 | 609 | #[test] 610 | fn test_from_bytes() { 611 | assert_eq!( 612 | Scalar::from_bytes(&[ 613 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 614 | 0, 0, 0 615 | ]) 616 | .unwrap(), 617 | Scalar::zero() 618 | ); 619 | 620 | assert_eq!( 621 | Scalar::from_bytes(&[ 622 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 623 | 0, 0, 0 624 | ]) 625 | .unwrap(), 626 | Scalar::one() 627 | ); 628 | 629 | assert_eq!( 630 | Scalar::from_bytes(&[ 631 | 243, 255, 255, 255, 255, 127, 28, 125, 242, 255, 255, 111, 15, 245, 87, 114, 238, 15, 632 | 44, 81, 117, 21, 216, 22, 157, 154, 187, 43, 50, 218, 75, 13 633 | ]) 634 | .unwrap(), 635 | r_squared() 636 | ); 637 | 638 | // -1 should work 639 | assert!( 640 | Scalar::from_bytes(&[ 641 | 0, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 642 | 96, 86, 165, 44, 154, 94, 101, 171, 18 643 | ]) 644 | .is_some() 645 | .unwrap_u8() 646 | == 1 647 | ); 648 | 649 | // modulus is invalid 650 | assert!( 651 | Scalar::from_bytes(&[ 652 | 1, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 653 | 96, 86, 165, 44, 154, 94, 101, 171, 18 654 | ]) 655 | .is_none() 656 | .unwrap_u8() 657 | == 1 658 | ); 659 | 660 | // Anything larger than the modulus is invalid 661 | assert!( 662 | Scalar::from_bytes(&[ 663 | 2, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 664 | 96, 86, 165, 44, 154, 94, 101, 171, 18 665 | ]) 666 | .is_none() 667 | .unwrap_u8() 668 | == 1 669 | ); 670 | assert!( 671 | Scalar::from_bytes(&[ 672 | 1, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 177, 55, 92, 30, 77, 180, 673 | 96, 86, 165, 44, 154, 94, 101, 171, 18 674 | ]) 675 | .is_none() 676 | .unwrap_u8() 677 | == 1 678 | ); 679 | assert!( 680 | Scalar::from_bytes(&[ 681 | 1, 0, 0, 0, 255, 255, 255, 255, 254, 91, 254, 255, 2, 164, 189, 83, 5, 216, 161, 9, 8, 682 | 216, 57, 51, 72, 125, 157, 41, 83, 167, 237, 117 683 | ]) 684 | .is_none() 685 | .unwrap_u8() 686 | == 1 687 | ); 688 | } 689 | 690 | #[test] 691 | fn test_from_u512_zero() { 692 | let modulus = modulus(); 693 | assert_eq!( 694 | Scalar::zero(), 695 | Scalar::from_u512([ 696 | modulus.0[0], 697 | modulus.0[1], 698 | modulus.0[2], 699 | modulus.0[3], 700 | 0, 701 | 0, 702 | 0, 703 | 0 704 | ]) 705 | ); 706 | } 707 | 708 | #[test] 709 | fn test_from_u512_r() { 710 | assert_eq!(r(), Scalar::from_u512([1, 0, 0, 0, 0, 0, 0, 0])); 711 | } 712 | 713 | #[test] 714 | fn test_from_u512_r_squared() { 715 | assert_eq!(r_squared(), Scalar::from_u512([0, 0, 0, 0, 1, 0, 0, 0])); 716 | } 717 | 718 | #[test] 719 | fn test_from_u512_max() { 720 | let max_u64 = 0xffffffffffffffff; 721 | assert_eq!( 722 | r_cubed() - r(), 723 | Scalar::from_u512([max_u64, max_u64, max_u64, max_u64, max_u64, max_u64, max_u64, max_u64]) 724 | ); 725 | } 726 | 727 | #[test] 728 | fn test_from_bytes_wide_r_squared() { 729 | assert_eq!( 730 | r_squared(), 731 | Scalar::from_bytes_wide(&[ 732 | 243, 255, 255, 255, 255, 127, 28, 125, 242, 255, 255, 111, 15, 245, 87, 114, 238, 15, 733 | 44, 81, 117, 21, 216, 22, 157, 154, 187, 43, 50, 218, 75, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 734 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 735 | ]) 736 | ); 737 | } 738 | 739 | #[test] 740 | fn test_from_bytes_wide_negative_one() { 741 | assert_eq!( 742 | -&Scalar::one(), 743 | Scalar::from_bytes_wide(&[ 744 | 0, 0, 0, 0, 0, 128, 17, 10, 1, 0, 0, 208, 254, 118, 170, 89, 1, 176, 55, 92, 30, 77, 180, 96, 86, 745 | 165, 44, 154, 94, 101, 171, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 746 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 747 | ]) 748 | ); 749 | } 750 | 751 | #[test] 752 | fn test_zero() { 753 | assert_eq!(Scalar::zero(), -&Scalar::zero()); 754 | assert_eq!(Scalar::zero(), Scalar::zero() + Scalar::zero()); 755 | assert_eq!(Scalar::zero(), Scalar::zero() - Scalar::zero()); 756 | assert_eq!(Scalar::zero(), Scalar::zero() * Scalar::zero()); 757 | } 758 | 759 | #[cfg(test)] 760 | const LARGEST: Scalar = Scalar([ 761 | 725501752471715840u64, 762 | 6461107452199829505u64, 763 | 6968279316240510977u64, 764 | 1345280370688173398u64, 765 | ]); 766 | 767 | #[test] 768 | fn test_addition() { 769 | let mut tmp = LARGEST; 770 | tmp += &LARGEST; 771 | println!("{:x?}", tmp); 772 | 773 | assert_eq!( 774 | tmp, 775 | Scalar([ 776 | 0xa117fffffffffff, 777 | 0x59aa76fed0000001, 778 | 0x60b44d1e5c37b001, 779 | 0x12ab655e9a2ca556, 780 | ]) 781 | ); 782 | 783 | let mut tmp = LARGEST; 784 | tmp += &Scalar([1, 0, 0, 0]); 785 | 786 | assert_eq!(tmp, Scalar::zero()); 787 | } 788 | 789 | #[test] 790 | fn test_negation() { 791 | let tmp = -&LARGEST; 792 | 793 | assert_eq!(tmp, Scalar([1, 0, 0, 0])); 794 | 795 | let tmp = -&Scalar::zero(); 796 | assert_eq!(tmp, Scalar::zero()); 797 | let tmp = -&Scalar([1, 0, 0, 0]); 798 | assert_eq!(tmp, LARGEST); 799 | } 800 | 801 | #[test] 802 | fn test_subtraction() { 803 | let mut tmp = LARGEST; 804 | tmp -= &LARGEST; 805 | 806 | assert_eq!(tmp, Scalar::zero()); 807 | 808 | let mut tmp = Scalar::zero(); 809 | tmp -= &LARGEST; 810 | 811 | let mut tmp2 = modulus(); 812 | tmp2 -= &LARGEST; 813 | 814 | assert_eq!(tmp, tmp2); 815 | } 816 | 817 | #[test] 818 | fn test_multiplication() { 819 | let mut cur = LARGEST; 820 | 821 | for _ in 0..100 { 822 | let mut tmp = cur; 823 | tmp *= &cur; 824 | 825 | let mut tmp2 = Scalar::zero(); 826 | for b in cur 827 | .to_bytes() 828 | .iter() 829 | .rev() 830 | .flat_map(|byte| (0..8).rev().map(move |i| ((byte >> i) & 1u8) == 1u8)) 831 | { 832 | let tmp3 = tmp2; 833 | tmp2.add_assign(&tmp3); 834 | 835 | if b { 836 | tmp2.add_assign(&cur); 837 | } 838 | } 839 | 840 | assert_eq!(tmp, tmp2); 841 | 842 | cur.add_assign(&LARGEST); 843 | } 844 | } 845 | 846 | #[test] 847 | fn test_squaring() { 848 | let mut cur = LARGEST; 849 | 850 | for _ in 0..100 { 851 | let mut tmp = cur; 852 | tmp = tmp.square(); 853 | 854 | let mut tmp2 = Scalar::zero(); 855 | for b in cur 856 | .to_bytes() 857 | .iter() 858 | .rev() 859 | .flat_map(|byte| (0..8).rev().map(move |i| ((byte >> i) & 1u8) == 1u8)) 860 | { 861 | let tmp3 = tmp2; 862 | tmp2.add_assign(&tmp3); 863 | 864 | if b { 865 | tmp2.add_assign(&cur); 866 | } 867 | } 868 | 869 | assert_eq!(tmp, tmp2); 870 | 871 | cur.add_assign(&LARGEST); 872 | } 873 | } 874 | 875 | #[test] 876 | fn test_from_raw() { 877 | assert_eq!(Scalar::from_raw(modulus().0), Scalar::zero()); 878 | 879 | assert_eq!(Scalar::from_raw([1, 0, 0, 0]), r()); 880 | } 881 | 882 | #[test] 883 | fn test_double() { 884 | let a = Scalar::from_raw([ 885 | 0x1fff3231233ffffd, 886 | 0x4884b7fa00034802, 887 | 0x998c4fefecbc4ff3, 888 | 0x1824b159acc50562, 889 | ]); 890 | 891 | assert_eq!(a.double(), a + a); 892 | } 893 | -------------------------------------------------------------------------------- /bls12_377/src/util.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, PartialEq)] 2 | pub enum LegendreSymbol { 3 | Zero = 0, 4 | QuadraticResidue = 1, 5 | QuadraticNonResidue = -1, 6 | } 7 | 8 | /// Compute a + b + carry, returning the result and the new carry over. 9 | #[inline(always)] 10 | pub fn adc(a: u64, b: u64, carry: u64) -> (u64, u64) { 11 | let ret = (a as u128) + (b as u128) + (carry as u128); 12 | (ret as u64, (ret >> 64) as u64) 13 | } 14 | 15 | /// Compute a - (b + borrow), returning the result and the new borrow. 16 | #[inline(always)] 17 | pub fn sbb(a: u64, b: u64, borrow: u64) -> (u64, u64) { 18 | let ret = (a as u128).wrapping_sub((b as u128) + ((borrow >> 63) as u128)); 19 | (ret as u64, (ret >> 64) as u64) 20 | } 21 | 22 | /// Compute a + (b * c) + carry, returning the result and the new carry over. 23 | pub fn mac(a: u64, b: u64, c: u64, carry: u64) -> (u64, u64) { 24 | let ret = (a as u128) + ((b as u128) * (c as u128)) + (carry as u128); 25 | (ret as u64, (ret >> 64) as u64) 26 | } 27 | 28 | macro_rules! impl_add_binop_specify_output { 29 | ($lhs:ident, $rhs:ident, $output:ident) => { 30 | impl<'b> Add<&'b $rhs> for $lhs { 31 | type Output = $output; 32 | 33 | #[inline] 34 | fn add(self, rhs: &'b $rhs) -> $output { 35 | &self + rhs 36 | } 37 | } 38 | 39 | impl<'a> Add<$rhs> for &'a $lhs { 40 | type Output = $output; 41 | 42 | #[inline] 43 | fn add(self, rhs: $rhs) -> $output { 44 | self + &rhs 45 | } 46 | } 47 | 48 | impl Add<$rhs> for $lhs { 49 | type Output = $output; 50 | 51 | #[inline] 52 | fn add(self, rhs: $rhs) -> $output { 53 | &self + &rhs 54 | } 55 | } 56 | }; 57 | } 58 | 59 | macro_rules! impl_sub_binop_specify_output { 60 | ($lhs:ident, $rhs:ident, $output:ident) => { 61 | impl<'b> Sub<&'b $rhs> for $lhs { 62 | type Output = $output; 63 | 64 | #[inline] 65 | fn sub(self, rhs: &'b $rhs) -> $output { 66 | &self - rhs 67 | } 68 | } 69 | 70 | impl<'a> Sub<$rhs> for &'a $lhs { 71 | type Output = $output; 72 | 73 | #[inline] 74 | fn sub(self, rhs: $rhs) -> $output { 75 | self - &rhs 76 | } 77 | } 78 | 79 | impl Sub<$rhs> for $lhs { 80 | type Output = $output; 81 | 82 | #[inline] 83 | fn sub(self, rhs: $rhs) -> $output { 84 | &self - &rhs 85 | } 86 | } 87 | }; 88 | } 89 | 90 | macro_rules! impl_binops_additive_specify_output { 91 | ($lhs:ident, $rhs:ident, $output:ident) => { 92 | impl_add_binop_specify_output!($lhs, $rhs, $output); 93 | impl_sub_binop_specify_output!($lhs, $rhs, $output); 94 | }; 95 | } 96 | 97 | macro_rules! impl_binops_multiplicative_mixed { 98 | ($lhs:ident, $rhs:ident, $output:ident) => { 99 | impl<'b> Mul<&'b $rhs> for $lhs { 100 | type Output = $output; 101 | 102 | #[inline] 103 | fn mul(self, rhs: &'b $rhs) -> $output { 104 | &self * rhs 105 | } 106 | } 107 | 108 | impl<'a> Mul<$rhs> for &'a $lhs { 109 | type Output = $output; 110 | 111 | #[inline] 112 | fn mul(self, rhs: $rhs) -> $output { 113 | self * &rhs 114 | } 115 | } 116 | 117 | impl Mul<$rhs> for $lhs { 118 | type Output = $output; 119 | 120 | #[inline] 121 | fn mul(self, rhs: $rhs) -> $output { 122 | &self * &rhs 123 | } 124 | } 125 | }; 126 | } 127 | 128 | macro_rules! impl_binops_additive { 129 | ($lhs:ident, $rhs:ident) => { 130 | impl_binops_additive_specify_output!($lhs, $rhs, $lhs); 131 | 132 | impl SubAssign<$rhs> for $lhs { 133 | #[inline] 134 | fn sub_assign(&mut self, rhs: $rhs) { 135 | *self = &*self - &rhs; 136 | } 137 | } 138 | 139 | impl AddAssign<$rhs> for $lhs { 140 | #[inline] 141 | fn add_assign(&mut self, rhs: $rhs) { 142 | *self = &*self + &rhs; 143 | } 144 | } 145 | 146 | impl<'b> SubAssign<&'b $rhs> for $lhs { 147 | #[inline] 148 | fn sub_assign(&mut self, rhs: &'b $rhs) { 149 | *self = &*self - rhs; 150 | } 151 | } 152 | 153 | impl<'b> AddAssign<&'b $rhs> for $lhs { 154 | #[inline] 155 | fn add_assign(&mut self, rhs: &'b $rhs) { 156 | *self = &*self + rhs; 157 | } 158 | } 159 | }; 160 | } 161 | 162 | macro_rules! impl_binops_multiplicative { 163 | ($lhs:ident, $rhs:ident) => { 164 | impl_binops_multiplicative_mixed!($lhs, $rhs, $lhs); 165 | 166 | impl MulAssign<$rhs> for $lhs { 167 | #[inline] 168 | fn mul_assign(&mut self, rhs: $rhs) { 169 | *self = &*self * &rhs; 170 | } 171 | } 172 | 173 | impl<'b> MulAssign<&'b $rhs> for $lhs { 174 | #[inline] 175 | fn mul_assign(&mut self, rhs: &'b $rhs) { 176 | *self = &*self * rhs; 177 | } 178 | } 179 | }; 180 | } 181 | --------------------------------------------------------------------------------