├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .rspec ├── .rubocop.yml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── Gemfile ├── LICENSE ├── README.md ├── Rakefile ├── ext ├── x25519_precomputed │ ├── cputest.c │ ├── extconf.rb │ ├── fp25519_x64.c │ ├── fp25519_x64.h │ ├── table_ladder_x25519.h │ ├── x25519_precomputed.c │ ├── x25519_precomputed.h │ └── x25519_x64.c └── x25519_ref10 │ ├── api.h │ ├── base.c │ ├── extconf.rb │ ├── fe.c │ ├── fe.h │ ├── montgomery.h │ ├── pow225521.h │ ├── scalarmult.c │ ├── x25519_ref10.c │ └── x25519_ref10.h ├── lib ├── x25519.rb └── x25519 │ ├── montgomery_u.rb │ ├── precomputed_not_available.rb │ ├── scalar.rb │ ├── test_vectors.rb │ └── version.rb ├── spec ├── spec_helper.rb ├── support │ └── provider_examples.rb ├── x25519 │ ├── montgomery_u_spec.rb │ ├── provider │ │ ├── precomputed_spec.rb │ │ └── ref10_spec.rb │ └── scalar_spec.rb └── x25519_spec.rb └── x25519.gemspec /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | paths-ignore: 6 | - README.md 7 | push: 8 | paths-ignore: 9 | - README.md 10 | 11 | env: 12 | BUNDLE_WITHOUT: "development" 13 | 14 | jobs: 15 | test: 16 | runs-on: ubuntu-latest 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | ruby: 21 | - ruby-2.7 22 | - ruby-3.0 23 | - ruby-3.1 24 | - ruby-3.2 25 | - ruby-3.3 26 | - ruby-3.4 27 | - ruby-head 28 | steps: 29 | - uses: actions/checkout@v4 30 | - uses: ruby/setup-ruby@v1 31 | with: 32 | ruby-version: ${{ matrix.ruby }} 33 | bundler-cache: true 34 | - run: bundle exec rake compile 35 | - run: bundle exec rake spec 36 | 37 | rubocop: 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: ruby/setup-ruby@v1 42 | with: 43 | ruby-version: 2.7 44 | bundler-cache: true 45 | - run: bundle exec rubocop --format progress --color 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /Gemfile.lock 2 | /.bundle/ 3 | /.yardoc 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | *.o 11 | *.so 12 | *.bundle 13 | 14 | # rspec failure tracking 15 | .rspec_status 16 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format documentation 3 | --order random 4 | --warnings 5 | --require spec_helper 6 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | require: 2 | - rubocop-performance 3 | - rubocop-rake 4 | - rubocop-rspec 5 | 6 | AllCops: 7 | TargetRubyVersion: 2.7 8 | DisplayCopNames: true 9 | NewCops: enable 10 | 11 | # 12 | # Gemspec 13 | # 14 | 15 | Gemspec/RequireMFA: 16 | Enabled: false 17 | 18 | # 19 | # Metrics 20 | # 21 | 22 | Metrics/AbcSize: 23 | Enabled: false 24 | 25 | Metrics/CyclomaticComplexity: 26 | Enabled: false 27 | 28 | Metrics/PerceivedComplexity: 29 | Enabled: false 30 | 31 | Metrics/BlockLength: 32 | Max: 100 33 | 34 | Metrics/ClassLength: 35 | Max: 100 36 | 37 | Metrics/LineLength: 38 | Max: 128 39 | 40 | Metrics/MethodLength: 41 | Max: 25 42 | 43 | # 44 | # Style 45 | # 46 | 47 | Style/FrozenStringLiteralComment: 48 | Enabled: true 49 | 50 | Style/StringLiterals: 51 | EnforcedStyle: double_quotes 52 | 53 | # 54 | # RSpec 55 | # 56 | 57 | RSpec/MultipleExpectations: 58 | Max: 3 59 | 60 | RSpec/ExampleLength: 61 | Max: 6 62 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [1.0.10] (2022-10-06) 2 | 3 | [1.0.10]: https://github.com/RubyCrypto/x25519/compare/v1.0.9...v1.0.10 4 | 5 | - [#32](https://github.com/RubyCrypto/x25519/pull/32) 6 | Avoid building precomputed on `aarch64-linux` 7 | - [#29](https://github.com/RubyCrypto/x25519/pull/29), [#31](https://github.com/RubyCrypto/x25519/pull/31) 8 | Allow usage on Apple silicon 9 | 10 | ## [1.0.9] (2021-08-04) 11 | 12 | [1.0.9]: https://github.com/RubyCrypto/x25519/compare/v1.0.8...v1.0.9 13 | 14 | - [#23](https://github.com/RubyCrypto/x25519/pull/23) 15 | Bump `required_ruby_version` to 2.5 16 | - [#25](https://github.com/RubyCrypto/x25519/pull/25) 17 | Fix SIGILL caused by use of `-march=native` 18 | 19 | ## [1.0.8] (2018-11-08) 20 | 21 | [1.0.8]: https://github.com/RubyCrypto/x25519/compare/v1.0.7...v1.0.8 22 | 23 | - [#20](https://github.com/RubyCrypto/x25519/pull/19) 24 | Change license to BSD-3-clause. 25 | 26 | ## [1.0.7] (2018-02-26) 27 | 28 | [1.0.7]: https://github.com/RubyCrypto/x25519/compare/v1.0.6...v1.0.7 29 | 30 | - [#19](https://github.com/RubyCrypto/x25519/pull/19) 31 | Incorporate upstream carry propagation bugfix and LICENSE changes. 32 | 33 | ## [1.0.6] (2018-01-04) 34 | 35 | [1.0.6]: https://github.com/RubyCrypto/x25519/compare/v1.0.5...v1.0.6 36 | 37 | - Use correct (LGPLv3) license in gemspec 38 | 39 | ## [1.0.5] (2017-12-31) 40 | 41 | [1.0.5]: https://github.com/RubyCrypto/x25519/compare/v1.0.4...v1.0.5 42 | 43 | - [#15](https://github.com/RubyCrypto/x25519/pull/15) 44 | RuboCop 0.52.1 45 | 46 | - [#14](https://github.com/RubyCrypto/x25519/pull/14) 47 | `ext/x25519_ref10`: Consolidate all field element code into `fe.c`. 48 | 49 | ## [1.0.4] (2017-12-31) 50 | 51 | [1.0.4]: https://github.com/RubyCrypto/x25519/compare/v1.0.3...v1.0.4 52 | 53 | - [#13](https://github.com/RubyCrypto/x25519/pull/13) 54 | Test against Ruby 2.5.0 55 | 56 | - [#12](https://github.com/RubyCrypto/x25519/pull/12) 57 | Move project to the RubyCrypto GitHub organization 58 | 59 | ## [1.0.3] (2017-12-13) 60 | 61 | [1.0.3]: https://github.com/RubyCrypto/x25519/compare/v1.0.2...v1.0.3 62 | 63 | - [#10](https://github.com/RubyCrypto/x25519/pull/10) 64 | Detect degenerate (i.e. all-zero) public keys (fixes #6) 65 | 66 | ## [1.0.2] (2017-12-13) 67 | 68 | [1.0.2]: https://github.com/RubyCrypto/x25519/compare/v1.0.1...v1.0.2 69 | 70 | - [#9](https://github.com/RubyCrypto/x25519/pull/9) 71 | Make `X25519.provider` an `attr_accessor` 72 | - Raise `X25519::SelfTestFailure` when self-test fails 73 | 74 | ## [1.0.1] (2017-12-12) 75 | 76 | [1.0.1]: https://github.com/RubyCrypto/x25519/compare/v1.0.0...v1.0.1 77 | 78 | - Have `X25519.self_test` return true on success 79 | 80 | ## [1.0.0] (2017-12-12) 81 | 82 | [1.0.0]: https://github.com/RubyCrypto/x25519/compare/v0.2.0...v1.0.0 83 | 84 | - [#8](https://github.com/RubyCrypto/x25519/pull/8) 85 | Add self-test 86 | 87 | - [#7](https://github.com/RubyCrypto/x25519/pull/7) 88 | Factor providers into the `X25519::Provider` namespace 89 | 90 | ## [0.2.0] (2017-12-12) 91 | 92 | [0.2.0]: https://github.com/RubyCrypto/x25519/compare/v0.1.0...v0.2.0 93 | 94 | - [#5](https://github.com/RubyCrypto/x25519/pull/5) 95 | Rewrite gem in Ruby with minimal native extensions 96 | 97 | ## 0.1.0 (2017-12-11) 98 | 99 | - Initial release 100 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at bascule@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | gemspec 6 | 7 | group :development, :test do 8 | gem "rake", require: false 9 | gem "rake-compiler", "~> 1.0", require: false 10 | gem "rspec", "~> 3.10", require: false 11 | gem "rubocop", "1.68", require: false 12 | gem "rubocop-performance", "1.23.0", require: false 13 | gem "rubocop-rake", "0.6.0", require: false 14 | gem "rubocop-rspec", "3.2.0", require: false 15 | end 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD-3 License. 2 | 3 | Copyright (c) 2017, Armando Faz . All rights reserved. 4 | Institute of Computing. 5 | University of Campinas, Brazil. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions 9 | are met: 10 | 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following 15 | disclaimer in the documentation and/or other materials provided 16 | with the distribution. 17 | * Neither the name of University of Campinas nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | OF THE POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # x25519.rb [![Latest Version][gem-shield]][gem-link] [![Yard Docs][docs-image]][docs-link] [![License: BSD 3-Clause][license-image]][license-link] [![Build Status][build-image]][build-link] 2 | 3 | [gem-shield]: https://img.shields.io/gem/v/x25519?logo=ruby 4 | [gem-link]: https://rubygems.org/gems/x25519 5 | [docs-image]: https://img.shields.io/badge/yard-docs-blue.svg 6 | [docs-link]: http://www.rubydoc.info/gems/x25519/1.0.6 7 | [license-image]: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg 8 | [license-link]: https://spdx.org/licenses/BSD-3-Clause.html 9 | [build-image]: https://github.com/RubyCrypto/x25519/actions/workflows/ci.yml/badge.svg 10 | [build-link]: https://github.com/RubyCrypto/x25519/actions/workflows/ci.yml 11 | 12 | An efficient public key cryptography library for Ruby providing key 13 | exchange/agreement. 14 | 15 | This gem implements X25519 (a.k.a. Curve25519) Elliptic Curve Diffie-Hellman 16 | function as described in [RFC7748] as a C extension using the 17 | high performance [rfc7748_precomputed] implementation based on the paper 18 | [How to (pre-)compute a ladder] 19 | (with fallback to the ref10 C implementation). 20 | 21 | X25519 is one of two notable algorithms implemented atop the Curve25519 22 | elliptic curve. The [ed25519 gem] is a related project of this one, 23 | and implements the Ed25519 signature scheme on the twisted Edwards form of 24 | Curve25519. 25 | 26 | [RFC7748]: https://tools.ietf.org/html/rfc7748 27 | [How to (pre-)compute a ladder]: https://eprint.iacr.org/2017/264 28 | [rfc7748_precomputed]: https://github.com/armfazh/rfc7748_precomputed 29 | [ed25519 gem]: https://github.com/RubyCrypto/ed25519 30 | 31 | ### Is it any good? 32 | 33 | [Yes.](http://news.ycombinator.com/item?id=3067434) 34 | 35 | ### What is it useful for? 36 | 37 | X25519 is a key exchange/agreement algorithm generally used as a low-level 38 | building block in cryptographic protocols. 39 | 40 | ### Can I use X25519 to encrypt things? 41 | 42 | Please use [RbNaCl::Box] if you would like a high-level construction which uses 43 | X25519 for public-key encryption. Otherwise, the X25519 algorithm is not directly 44 | useful for encryption without a higher-level encryption protocol built on top of it. 45 | 46 | [RbNaCl::Box]: https://github.com/RubyCrypto/rbnacl/wiki/Public-Key-Encryption 47 | 48 | ## Requirements 49 | 50 | **x25519.rb** is supported on and tested against the following platforms: 51 | 52 | - MRI 2.7, 3.0, 3.1, 3.2, 3.3, 3.4 53 | 54 | ## Installation 55 | 56 | Add this line to your application's Gemfile: 57 | 58 | ```ruby 59 | gem "x25519" 60 | ``` 61 | 62 | And then execute: 63 | 64 | $ bundle 65 | 66 | Or install it yourself as: 67 | 68 | $ gem install x25519 69 | 70 | ## Usage 71 | 72 | The example below shows how to perform a full Diffie-Hellman key exchange: 73 | 74 | ```ruby 75 | require "x25519" 76 | 77 | # Alice generates random scalar (private key) 78 | alice_sk = X25519::Scalar.generate 79 | 80 | # Alice obtains public key for her private key/scalar 81 | alice_pk = alice_sk.public_key 82 | 83 | # Bob generates random scalar (private key) 84 | # Ostensibly this would be on a different computer somewhere 85 | bob_sk = X25519::Scalar.generate 86 | bob_pk = bob_sk.public_key 87 | 88 | # Alice can perform Diffie-Hellman with Bob's public key 89 | alice_secret = alice_sk.diffie_hellman(bob_pk).to_bytes 90 | 91 | # Bob can perform Diffie-Hellman with Alice's public key 92 | bob_secret = bob_sk.diffie_hellman(alice_pk).to_bytes 93 | 94 | # The resulting secrets should be the same 95 | alice_secret == bob_secret # true 96 | ``` 97 | 98 | ## X25519::Scalar: private keys 99 | 100 | The `X25519::Scalar` class represents secret integers used as X25519 private 101 | keys. These secret integers are multiplied by a well-known base point to 102 | obtain X25519 public keys (`X25519::MontgomeryU`). 103 | 104 | ### `X25519::Scalar.generate()`: make a random private key 105 | 106 | Generate a random private scalar (using `SecureRandom`) 107 | 108 | **Example:** 109 | 110 | ```ruby 111 | secret_key = X25519::Scalar.generate 112 | ``` 113 | 114 | ### `X25519::Scalar.new(bytes)`: load existing private key 115 | 116 | * `bytes`: a 32-byte `String` value containing the private key 117 | 118 | **Example:** 119 | 120 | ```ruby 121 | secret_key = X25519::Scalar.new(File.read("alice.key")) 122 | ``` 123 | 124 | ### `X25519::Scalar#public_key()`: obtain public key for this scalar 125 | 126 | NOTE: The `#multiply_base` method is an alias of this one. 127 | 128 | Performs fixed-base scalar multiplication (i.e. calculates public key) 129 | 130 | **Return Value:** 131 | 132 | Returns a `X25519::MontgomeryU` object which represents the public key for this private key/scalar. 133 | 134 | **Example:** 135 | 136 | ```ruby 137 | secret_key = X25519::Scalar.generate 138 | public_key = secret_key.public_key 139 | ``` 140 | 141 | ### `X25519::Scalar#diffie_hellman(other_public_key)`: obtain public key for this scalar 142 | 143 | NOTE: The `#multiply` method is an alias of this one. 144 | 145 | Performs variable-base scalar multiplication, computing a shared secret between 146 | our private scalar and someone else's public key/point. 147 | 148 | **Arguments:** 149 | 150 | * `other_public_key`: a `X25519::MontgomeryU` object containing the public key 151 | with which we'd like to compute a shared secret. 152 | 153 | **Return Value:** 154 | 155 | Returns a `X25519::MontgomeryU` object which represents the shared secret. 156 | 157 | **Example:** 158 | 159 | ```ruby 160 | secret_key = X25519::Scalar.generate 161 | public_key = X25519::MontgomeryU.new(File.read("bob.pub")) 162 | 163 | # Returns an X25519::MontgomeryU 164 | shared_secret = secret_key.multiply(public_key) 165 | 166 | # Obtain the shared secret as a serialized byte representation 167 | shared_secret_bytes = shared_secret.to_bytes 168 | ``` 169 | 170 | ### `X25519::Scalar#to_bytes`: serialize a scalar as a `String` 171 | 172 | **Return Value:** 173 | 174 | Returns a `String` containing a byte representation of this scalar: 175 | 176 | **Example:** 177 | 178 | ```ruby 179 | secret_key = X25519::Scalar.new(...) 180 | File.write("alice.key", secret_key.to_bytes) 181 | ``` 182 | 183 | ## X25519::MontgomeryU: public keys and shared secrets 184 | 185 | The `X25519::MontgomeryU` class represents a coordinate (specifically a 186 | Montgomery-u coordinate) on the elliptic curve. In the X25519 Diffie-Hellman 187 | function, these serve both as public keys and as shared secrets. 188 | 189 | ### `X25519::MontgomeryU.new(bytes)`: load existing public key 190 | 191 | **Arguments:** 192 | 193 | * `bytes`: a 32-byte `String` value containing the public key 194 | 195 | **Example:** 196 | 197 | ```ruby 198 | public_key = X25519::MontgomeryU.new(File.read("bob.pub")) 199 | ``` 200 | 201 | ### `X25519::MontgomeryU#to_bytes`: serialize a Montgomery-u coordinate as a `String` 202 | 203 | **Return Value:** 204 | 205 | Returns a `String` containing a byte representation of a compressed Montgomery-u coordinate: 206 | 207 | **Example:** 208 | 209 | ```ruby 210 | public_key = X25519::MontgomeryU..new(...) 211 | File.write("bob.pub", public_key.to_bytes) 212 | ``` 213 | 214 | ## X25519: module-level functionality 215 | 216 | ### `X25519.diffie_hellman(secret_key, public_key)`: shorthand `String`-oriented API 217 | 218 | If you'd like to avoid the object-oriented API, you can use a simplified API which 219 | acts entirely on bytestrings. 220 | 221 | **Arguments:** 222 | 223 | * `secret_key`: a 32-byte `String` containing a private scalar 224 | * `public_key`: a 32-byte `String` containing a compressed Montgomery-u coordinate 225 | 226 | **Return Value:** 227 | 228 | Returns a `String` containing a 32-byte compressed Montgomery-u coordinate 229 | 230 | ## Contributing 231 | 232 | Bug reports and pull requests are welcome on GitHub at https://github.com/RubyCrypto/x25519. 233 | This project is intended to be a safe, welcoming space for collaboration, 234 | and contributors are expected to adhere to the [Contributor Covenant](https://contributor-covenant.org) 235 | code of conduct. 236 | 237 | ## Implementation Details 238 | 239 | This gem contains two implementations of X25519: an optimized assembly 240 | implementation and a portable C implementation. Implementations are selected 241 | based on available CPU features. 242 | 243 | ### [rfc7748_precomputed]: optimized assembly implementation 244 | 245 | * Prime field arithmetic is optimized for the 4th and 6th generation of Intel Core processors 246 | (Haswell and Skylake micro-architectures). 247 | * Efficient integer multiplication using MULX instruction. 248 | * Integer additions accelerated with ADCX/ADOX instructions. 249 | * Key generation uses a read-only table of 8 KB for X25519. 250 | 251 | ### ref10: portable C implementation 252 | 253 | * Taken from the [SUPERCOP] cryptographic benchmarking suite (supercop-20171020) 254 | * Portable C code which should compile on any architecture 255 | 256 | [SUPERCOP]: https://bench.cr.yp.to/supercop.html 257 | 258 | ## Designers 259 | 260 | The X25519 Diffie-Hellman function was originally designed by Dan Bernstein: 261 | 262 | https://cr.yp.to/ecdh.html 263 | 264 | The optimized [rfc7748_precomputed] implementation was designed by: 265 | 266 | * Thomaz Oliveira, Computer Science Department, Cinvestav-IPN, Mexico. 267 | * Julio López, University of Campinas, Brazil. 268 | * Hüseyin Hisil, Yasar University, Turkey. 269 | * Armando Faz-Hernández, University of Campinas, Brazil. 270 | * Francisco Rodríguez-Henríquez, Computer Science Department, Cinvestav-IPN, Mexico. 271 | 272 | ## License 273 | 274 | * Copyright (c) 2017-2018 Armando Faz 275 | * Copyright (c) 2017-2025 Tony Arcieri 276 | 277 | This gem is available as open source under the terms of the 278 | BSD-3 Clause License ([LICENSE](./LICENSE)) 279 | 280 | ## Code of Conduct 281 | 282 | Everyone interacting in the x25519.rb project’s codebases, issue trackers, chat 283 | rooms and mailing lists is expected to follow the [code of conduct]. 284 | 285 | [code of conduct]: https://github.com/RubyCrypto/x25519/blob/main/CODE_OF_CONDUCT.md 286 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/gem_tasks" 4 | 5 | require "rake/clean" 6 | CLEAN.include("**/*.o", "**/*.so", "**/*.bundle", "pkg", "tmp") 7 | 8 | require "rake/extensiontask" 9 | %w[precomputed ref10].each do |provider| 10 | next if provider == "precomputed" && RUBY_PLATFORM !~ /x86_64|x64/ 11 | 12 | Rake::ExtensionTask.new("x25519_#{provider}") do |ext| 13 | ext.ext_dir = "ext/x25519_#{provider}" 14 | end 15 | end 16 | 17 | require "rspec/core/rake_task" 18 | RSpec::Core::RakeTask.new 19 | 20 | require "rubocop/rake_task" 21 | RuboCop::RakeTask.new 22 | 23 | task default: %w[compile spec rubocop] 24 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/cputest.c: -------------------------------------------------------------------------------- 1 | /* 2 | Test for 4th generation Intel Core processor family features (e.g. Haswell) 3 | From https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family 4 | */ 5 | 6 | #include 7 | #if defined(_MSC_VER) 8 | # include 9 | #endif 10 | 11 | static void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) 12 | { 13 | #if defined(_MSC_VER) 14 | __cpuidex(abcd, eax, ecx); 15 | #else 16 | uint32_t ebx = 0, edx; 17 | # if defined( __i386__ ) && defined ( __PIC__ ) 18 | /* in case of PIC under 32-bit EBX cannot be clobbered */ 19 | __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx), 20 | # else 21 | __asm__ ( "cpuid" : "+b" (ebx), 22 | # endif 23 | "+a" (eax), "+c" (ecx), "=d" (edx) ); 24 | abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx; 25 | #endif 26 | } 27 | 28 | static int check_xcr0_ymm() 29 | { 30 | uint32_t xcr0; 31 | #if defined(_MSC_VER) 32 | xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */ 33 | #else 34 | __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); 35 | #endif 36 | return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */ 37 | } 38 | 39 | int check_4th_gen_intel_core_features() 40 | { 41 | uint32_t abcd[4]; 42 | uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); 43 | uint32_t avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8); 44 | 45 | /* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1 && 46 | CPUID.(EAX=01H, ECX=0H):ECX.MOVBE[bit 22]==1 && 47 | CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 */ 48 | run_cpuid( 1, 0, abcd ); 49 | if ( (abcd[2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask ) 50 | return 0; 51 | 52 | if ( ! check_xcr0_ymm() ) 53 | return 0; 54 | 55 | /* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 && 56 | CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]==1 && 57 | CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */ 58 | run_cpuid( 7, 0, abcd ); 59 | if ( (abcd[1] & avx2_bmi12_mask) != avx2_bmi12_mask ) 60 | return 0; 61 | 62 | /* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */ 63 | run_cpuid( 0x80000001, 0, abcd ); 64 | if ( (abcd[2] & (1 << 5)) == 0) 65 | return 0; 66 | 67 | return 1; 68 | } 69 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/extconf.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # rubocop:disable Style/GlobalVars 4 | 5 | require "mkmf" 6 | 7 | if RUBY_PLATFORM.match?(/x86_64|x64/) 8 | $CFLAGS << " -Wall -O3 -pedantic -std=c99 -mbmi -mbmi2 -march=haswell" 9 | 10 | create_makefile "x25519_precomputed" 11 | else 12 | File.write("Makefile", "install clean: ;") 13 | end 14 | 15 | # rubocop:enable Style/GlobalVars 16 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/fp25519_x64.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2017, Armando Faz . All rights reserved. 3 | * Institute of Computing. 4 | * University of Campinas, Brazil. 5 | * 6 | * Copyright (C) 2018 Jason A. Donenfeld . All Rights Reserved. 7 | * Copyright (C) 2018 Samuel Neves . All Rights Reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions 11 | * are met: 12 | * 13 | * * Redistributions of source code must retain the above copyright 14 | * notice, this list of conditions and the following disclaimer. 15 | * * Redistributions in binary form must reproduce the above 16 | * copyright notice, this list of conditions and the following 17 | * disclaimer in the documentation and/or other materials provided 18 | * with the distribution. 19 | * * Neither the name of University of Campinas nor the names of its 20 | * contributors may be used to endorse or promote products derived 21 | * from this software without specific prior written permission. 22 | * 23 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 34 | * OF THE POSSIBILITY OF SUCH DAMAGE. 35 | */ 36 | 37 | #include "fp25519_x64.h" 38 | 39 | /** 40 | * 41 | * @param c Two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] 42 | * @param a Two 256-bit integers: a0[0:3] and a1[4:7] 43 | * @param b Two 256-bit integers: b0[0:3] and b1[4:7] 44 | */ 45 | void mul2_256x256_integer_x64(uint64_t *const c, uint64_t *const a, 46 | uint64_t *const b) { 47 | #ifdef __BMI2__ 48 | #ifdef __ADX__ 49 | __asm__ __volatile__( 50 | "xorl %%r14d, %%r14d ;" 51 | "movq (%1), %%rdx; " /* A[0] */ 52 | "mulx (%2), %%r8, %%r12; " /* A[0]*B[0] */ "xorl %%r10d, %%r10d ;" "movq %%r8, (%0) ;" 53 | "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ "adox %%r10, %%r12 ;" 54 | "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ "adox %%r8, %%rax ;" 55 | "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ "adox %%r10, %%rbx ;" 56 | /*******************************************/ "adox %%r14, %%rcx ;" 57 | 58 | "movq 8(%1), %%rdx; " /* A[1] */ 59 | "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ "adox %%r12, %%r8 ;" "movq %%r8, 8(%0) ;" 60 | "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ "adox %%r10, %%r9 ;" "adcx %%r9, %%rax ;" 61 | "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ "adox %%r8, %%r11 ;" "adcx %%r11, %%rbx ;" 62 | "mulx 24(%2), %%r10, %%r12; " /* A[1]*B[3] */ "adox %%r10, %%r13 ;" "adcx %%r13, %%rcx ;" 63 | /*******************************************/ "adox %%r14, %%r12 ;" "adcx %%r14, %%r12 ;" 64 | 65 | "movq 16(%1), %%rdx; " /* A[2] */ "xorl %%r10d, %%r10d ;" 66 | "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ "adox %%rax, %%r8 ;" "movq %%r8, 16(%0) ;" 67 | "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ "adox %%r10, %%r9 ;" "adcx %%r9, %%rbx ;" 68 | "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ "adox %%r8, %%r11 ;" "adcx %%r11, %%rcx ;" 69 | "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ "adox %%r10, %%r13 ;" "adcx %%r13, %%r12 ;" 70 | /*******************************************/ "adox %%r14, %%rax ;" "adcx %%r14, %%rax ;" 71 | 72 | "movq 24(%1), %%rdx; " /* A[3] */ "xorl %%r10d, %%r10d ;" 73 | "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ "adox %%rbx, %%r8 ;" "movq %%r8, 24(%0) ;" 74 | "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ "adox %%r10, %%r9 ;" "adcx %%r9, %%rcx ;" "movq %%rcx, 32(%0) ;" 75 | "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ "adox %%r8, %%r11 ;" "adcx %%r11, %%r12 ;" "movq %%r12, 40(%0) ;" 76 | "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ "adox %%r10, %%r13 ;" "adcx %%r13, %%rax ;" "movq %%rax, 48(%0) ;" 77 | /*******************************************/ "adox %%r14, %%rbx ;" "adcx %%r14, %%rbx ;" "movq %%rbx, 56(%0) ;" 78 | 79 | "movq 32(%1), %%rdx; " /* C[0] */ 80 | "mulx 32(%2), %%r8, %%r12; " /* C[0]*D[0] */ "xorl %%r10d, %%r10d ;" "movq %%r8, 64(%0);" 81 | "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ "adox %%r10, %%r12 ;" 82 | "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ "adox %%r8, %%rax ;" 83 | "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ "adox %%r10, %%rbx ;" 84 | /*******************************************/ "adox %%r14, %%rcx ;" 85 | 86 | "movq 40(%1), %%rdx; " /* C[1] */ "xorl %%r10d, %%r10d ;" 87 | "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ "adox %%r12, %%r8 ;" "movq %%r8, 72(%0);" 88 | "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ "adox %%r10, %%r9 ;" "adcx %%r9, %%rax ;" 89 | "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ "adox %%r8, %%r11 ;" "adcx %%r11, %%rbx ;" 90 | "mulx 56(%2), %%r10, %%r12; " /* C[1]*D[3] */ "adox %%r10, %%r13 ;" "adcx %%r13, %%rcx ;" 91 | /*******************************************/ "adox %%r14, %%r12 ;" "adcx %%r14, %%r12 ;" 92 | 93 | "movq 48(%1), %%rdx; " /* C[2] */ "xorl %%r10d, %%r10d ;" 94 | "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ "adox %%rax, %%r8 ;" "movq %%r8, 80(%0);" 95 | "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ "adox %%r10, %%r9 ;" "adcx %%r9, %%rbx ;" 96 | "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ "adox %%r8, %%r11 ;" "adcx %%r11, %%rcx ;" 97 | "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ "adox %%r10, %%r13 ;" "adcx %%r13, %%r12 ;" 98 | /*******************************************/ "adox %%r14, %%rax ;" "adcx %%r14, %%rax ;" 99 | 100 | "movq 56(%1), %%rdx; " /* C[3] */ "xorl %%r10d, %%r10d ;" 101 | "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ "adox %%rbx, %%r8 ;" "movq %%r8, 88(%0);" 102 | "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ "adox %%r10, %%r9 ;" "adcx %%r9, %%rcx ;" "movq %%rcx, 96(%0) ;" 103 | "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ "adox %%r8, %%r11 ;" "adcx %%r11, %%r12 ;" "movq %%r12, 104(%0) ;" 104 | "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ "adox %%r10, %%r13 ;" "adcx %%r13, %%rax ;" "movq %%rax, 112(%0) ;" 105 | /*******************************************/ "adox %%r14, %%rbx ;" "adcx %%r14, %%rbx ;" "movq %%rbx, 120(%0) ;" 106 | : 107 | : "r" (c), "r" (a), "r" (b) 108 | : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", 109 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14" 110 | ); 111 | #else 112 | __asm__ __volatile__( 113 | "movq (%1), %%rdx; " /* A[0] */ 114 | "mulx (%2), %%r8, %%r12; " /* A[0]*B[0] */ "movq %%r8, (%0) ;" 115 | "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ "addq %%r10, %%r12 ;" 116 | "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ "adcq %%r8, %%rax ;" 117 | "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ "adcq %%r10, %%rbx ;" 118 | /*******************************************/ "adcq $0, %%rcx ;" 119 | 120 | "movq 8(%1), %%rdx; " /* A[1] */ 121 | "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ "addq %%r12, %%r8 ;" "movq %%r8, 8(%0) ;" 122 | "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ "adcq %%r10, %%r9 ;" 123 | "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ "adcq %%r8, %%r11 ;" 124 | "mulx 24(%2), %%r10, %%r12; " /* A[1]*B[3] */ "adcq %%r10, %%r13 ;" 125 | /*******************************************/ "adcq $0, %%r12 ;" 126 | 127 | "addq %%r9, %%rax ;" 128 | "adcq %%r11, %%rbx ;" 129 | "adcq %%r13, %%rcx ;" 130 | "adcq $0, %%r12 ;" 131 | 132 | "movq 16(%1), %%rdx; " /* A[2] */ 133 | "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ "addq %%rax, %%r8 ;" "movq %%r8, 16(%0) ;" 134 | "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ "adcq %%r10, %%r9 ;" 135 | "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ "adcq %%r8, %%r11 ;" 136 | "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ "adcq %%r10, %%r13 ;" 137 | /*******************************************/ "adcq $0, %%rax ;" 138 | 139 | "addq %%r9, %%rbx ;" 140 | "adcq %%r11, %%rcx ;" 141 | "adcq %%r13, %%r12 ;" 142 | "adcq $0, %%rax ;" 143 | 144 | "movq 24(%1), %%rdx; " /* A[3] */ 145 | "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ "addq %%rbx, %%r8 ;" "movq %%r8, 24(%0) ;" 146 | "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ "adcq %%r10, %%r9 ;" 147 | "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ "adcq %%r8, %%r11 ;" 148 | "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ "adcq %%r10, %%r13 ;" 149 | /*******************************************/ "adcq $0, %%rbx ;" 150 | 151 | "addq %%r9, %%rcx ;" "movq %%rcx, 32(%0) ;" 152 | "adcq %%r11, %%r12 ;" "movq %%r12, 40(%0) ;" 153 | "adcq %%r13, %%rax ;" "movq %%rax, 48(%0) ;" 154 | "adcq $0, %%rbx ;" "movq %%rbx, 56(%0) ;" 155 | 156 | "movq 32(%1), %%rdx; " /* C[0] */ 157 | "mulx 32(%2), %%r8, %%r12; " /* C[0]*D[0] */ "movq %%r8, 64(%0) ;" 158 | "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ "addq %%r10, %%r12 ;" 159 | "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ "adcq %%r8, %%rax ;" 160 | "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ "adcq %%r10, %%rbx ;" 161 | /*******************************************/ "adcq $0, %%rcx ;" 162 | 163 | "movq 40(%1), %%rdx; " /* C[1] */ 164 | "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ "addq %%r12, %%r8 ;" "movq %%r8, 72(%0) ;" 165 | "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ "adcq %%r10, %%r9 ;" 166 | "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ "adcq %%r8, %%r11 ;" 167 | "mulx 56(%2), %%r10, %%r12; " /* C[1]*D[3] */ "adcq %%r10, %%r13 ;" 168 | /*******************************************/ "adcq $0, %%r12 ;" 169 | 170 | "addq %%r9, %%rax ;" 171 | "adcq %%r11, %%rbx ;" 172 | "adcq %%r13, %%rcx ;" 173 | "adcq $0, %%r12 ;" 174 | 175 | "movq 48(%1), %%rdx; " /* C[2] */ 176 | "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ "addq %%rax, %%r8 ;" "movq %%r8, 80(%0) ;" 177 | "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ "adcq %%r10, %%r9 ;" 178 | "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ "adcq %%r8, %%r11 ;" 179 | "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ "adcq %%r10, %%r13 ;" 180 | /*******************************************/ "adcq $0, %%rax ;" 181 | 182 | "addq %%r9, %%rbx ;" 183 | "adcq %%r11, %%rcx ;" 184 | "adcq %%r13, %%r12 ;" 185 | "adcq $0, %%rax ;" 186 | 187 | "movq 56(%1), %%rdx; " /* C[3] */ 188 | "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ "addq %%rbx, %%r8 ;" "movq %%r8, 88(%0) ;" 189 | "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ "adcq %%r10, %%r9 ;" 190 | "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ "adcq %%r8, %%r11 ;" 191 | "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ "adcq %%r10, %%r13 ;" 192 | /*******************************************/ "adcq $0, %%rbx ;" 193 | 194 | "addq %%r9, %%rcx ;" "movq %%rcx, 96(%0) ;" 195 | "adcq %%r11, %%r12 ;" "movq %%r12, 104(%0) ;" 196 | "adcq %%r13, %%rax ;" "movq %%rax, 112(%0) ;" 197 | "adcq $0, %%rbx ;" "movq %%rbx, 120(%0) ;" 198 | : 199 | : "r" (c), "r" (a), "r" (b) 200 | : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", 201 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13" 202 | ); 203 | #endif 204 | #else /* Without BMI2 */ 205 | /** 206 | * TODO: Multiplications using MULQ instruction. 207 | **/ 208 | #endif 209 | } 210 | 211 | /** 212 | * 213 | * @param c 214 | * @param a 215 | */ 216 | void sqr2_256x256_integer_x64(uint64_t *const c, uint64_t *const a) { 217 | #ifdef __BMI2__ 218 | #ifdef __ADX__ 219 | __asm__ __volatile__( 220 | "movq (%1), %%rdx ;" /* A[0] */ 221 | "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ "xorl %%r15d, %%r15d;" 222 | "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ "adcx %%r14, %%r9 ;" 223 | "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ "adcx %%rax, %%r10 ;" 224 | "movq 24(%1), %%rdx ;" /* A[3] */ 225 | "mulx 8(%1), %%r11, %%r12 ;" /* A[1]*A[3] */ "adcx %%rcx, %%r11 ;" 226 | "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ "adcx %%rax, %%r12 ;" 227 | "movq 8(%1), %%rdx ;" /* A[1] */ "adcx %%r15, %%r13 ;" 228 | "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ "movq $0, %%r14 ;" 229 | /*******************************************/ "adcx %%r15, %%r14 ;" 230 | 231 | "xorl %%r15d, %%r15d;" 232 | "adox %%rax, %%r10 ;" "adcx %%r8, %%r8 ;" 233 | "adox %%rcx, %%r11 ;" "adcx %%r9, %%r9 ;" 234 | "adox %%r15, %%r12 ;" "adcx %%r10, %%r10 ;" 235 | "adox %%r15, %%r13 ;" "adcx %%r11, %%r11 ;" 236 | "adox %%r15, %%r14 ;" "adcx %%r12, %%r12 ;" 237 | "adcx %%r13, %%r13 ;" 238 | "adcx %%r14, %%r14 ;" 239 | 240 | "movq (%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ 241 | /********************/ "movq %%rax, 0(%0) ;" 242 | "addq %%rcx, %%r8 ;" "movq %%r8, 8(%0) ;" 243 | "movq 8(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ 244 | "adcq %%rax, %%r9 ;" "movq %%r9, 16(%0) ;" 245 | "adcq %%rcx, %%r10 ;" "movq %%r10, 24(%0) ;" 246 | "movq 16(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ 247 | "adcq %%rax, %%r11 ;" "movq %%r11, 32(%0) ;" 248 | "adcq %%rcx, %%r12 ;" "movq %%r12, 40(%0) ;" 249 | "movq 24(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ 250 | "adcq %%rax, %%r13 ;" "movq %%r13, 48(%0) ;" 251 | "adcq %%rcx, %%r14 ;" "movq %%r14, 56(%0) ;" 252 | 253 | 254 | "movq 32(%1), %%rdx ;" /* B[0] */ 255 | "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ "xorl %%r15d, %%r15d;" 256 | "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ "adcx %%r14, %%r9 ;" 257 | "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ "adcx %%rax, %%r10 ;" 258 | "movq 56(%1), %%rdx ;" /* B[3] */ 259 | "mulx 40(%1), %%r11, %%r12 ;" /* B[1]*B[3] */ "adcx %%rcx, %%r11 ;" 260 | "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ "adcx %%rax, %%r12 ;" 261 | "movq 40(%1), %%rdx ;" /* B[1] */ "adcx %%r15, %%r13 ;" 262 | "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ "movq $0, %%r14 ;" 263 | /*******************************************/ "adcx %%r15, %%r14 ;" 264 | 265 | "xorl %%r15d, %%r15d;" 266 | "adox %%rax, %%r10 ;" "adcx %%r8, %%r8 ;" 267 | "adox %%rcx, %%r11 ;" "adcx %%r9, %%r9 ;" 268 | "adox %%r15, %%r12 ;" "adcx %%r10, %%r10 ;" 269 | "adox %%r15, %%r13 ;" "adcx %%r11, %%r11 ;" 270 | "adox %%r15, %%r14 ;" "adcx %%r12, %%r12 ;" 271 | "adcx %%r13, %%r13 ;" 272 | "adcx %%r14, %%r14 ;" 273 | 274 | "movq 32(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ 275 | /********************/ "movq %%rax, 64(%0) ;" 276 | "addq %%rcx, %%r8 ;" "movq %%r8, 72(%0) ;" 277 | "movq 40(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ 278 | "adcq %%rax, %%r9 ;" "movq %%r9, 80(%0) ;" 279 | "adcq %%rcx, %%r10 ;" "movq %%r10, 88(%0) ;" 280 | "movq 48(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ 281 | "adcq %%rax, %%r11 ;" "movq %%r11, 96(%0) ;" 282 | "adcq %%rcx, %%r12 ;" "movq %%r12, 104(%0) ;" 283 | "movq 56(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ 284 | "adcq %%rax, %%r13 ;" "movq %%r13, 112(%0) ;" 285 | "adcq %%rcx, %%r14 ;" "movq %%r14, 120(%0) ;" 286 | : 287 | : "r" (c), "r" (a) 288 | : "memory", "cc", "%rax", "%rcx", "%rdx", 289 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" 290 | ); 291 | #else /* Without ADX */ 292 | __asm__ __volatile__( 293 | "movq 8(%1), %%rdx ;" /* A[1] */ 294 | "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ 295 | "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ 296 | "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ 297 | 298 | "movq 16(%1), %%rdx ;" /* A[2] */ 299 | "mulx 24(%1), %%r12, %%r13 ;" /* A[3]*A[2] */ 300 | "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ 301 | 302 | "addq %%rax, %%r9 ;" 303 | "adcq %%rdx, %%r10 ;" 304 | "adcq %%rcx, %%r11 ;" 305 | "adcq %%r14, %%r12 ;" 306 | "adcq $0, %%r13 ;" 307 | "movq $0, %%r14 ;" 308 | "adcq $0, %%r14 ;" 309 | 310 | "movq (%1), %%rdx ;" /* A[0] */ 311 | "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ 312 | 313 | "addq %%rax, %%r10 ;" 314 | "adcq %%rcx, %%r11 ;" 315 | "adcq $0, %%r12 ;" 316 | "adcq $0, %%r13 ;" 317 | "adcq $0, %%r14 ;" 318 | 319 | "shldq $1, %%r13, %%r14 ;" 320 | "shldq $1, %%r12, %%r13 ;" 321 | "shldq $1, %%r11, %%r12 ;" 322 | "shldq $1, %%r10, %%r11 ;" 323 | "shldq $1, %%r9, %%r10 ;" 324 | "shldq $1, %%r8, %%r9 ;" 325 | "shlq $1, %%r8 ;" 326 | 327 | /********************/ "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ 328 | /********************/ "movq %%rax, 0(%0) ;" 329 | "addq %%rcx, %%r8 ;" "movq %%r8, 8(%0) ;" 330 | "movq 8(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ 331 | "adcq %%rax, %%r9 ;" "movq %%r9, 16(%0) ;" 332 | "adcq %%rcx, %%r10 ;" "movq %%r10, 24(%0) ;" 333 | "movq 16(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ 334 | "adcq %%rax, %%r11 ;" "movq %%r11, 32(%0) ;" 335 | "adcq %%rcx, %%r12 ;" "movq %%r12, 40(%0) ;" 336 | "movq 24(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ 337 | "adcq %%rax, %%r13 ;" "movq %%r13, 48(%0) ;" 338 | "adcq %%rcx, %%r14 ;" "movq %%r14, 56(%0) ;" 339 | 340 | "movq 40(%1), %%rdx ;" /* B[1] */ 341 | "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ 342 | "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ 343 | "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ 344 | 345 | "movq 48(%1), %%rdx ;" /* B[2] */ 346 | "mulx 56(%1), %%r12, %%r13 ;" /* B[3]*B[2] */ 347 | "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ 348 | 349 | "addq %%rax, %%r9 ;" 350 | "adcq %%rdx, %%r10 ;" 351 | "adcq %%rcx, %%r11 ;" 352 | "adcq %%r14, %%r12 ;" 353 | "adcq $0, %%r13 ;" 354 | "movq $0, %%r14 ;" 355 | "adcq $0, %%r14 ;" 356 | 357 | "movq 32(%1), %%rdx ;" /* B[0] */ 358 | "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ 359 | 360 | "addq %%rax, %%r10 ;" 361 | "adcq %%rcx, %%r11 ;" 362 | "adcq $0, %%r12 ;" 363 | "adcq $0, %%r13 ;" 364 | "adcq $0, %%r14 ;" 365 | 366 | "shldq $1, %%r13, %%r14 ;" 367 | "shldq $1, %%r12, %%r13 ;" 368 | "shldq $1, %%r11, %%r12 ;" 369 | "shldq $1, %%r10, %%r11 ;" 370 | "shldq $1, %%r9, %%r10 ;" 371 | "shldq $1, %%r8, %%r9 ;" 372 | "shlq $1, %%r8 ;" 373 | 374 | /********************/ "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ 375 | /********************/ "movq %%rax, 64(%0) ;" 376 | "addq %%rcx, %%r8 ;" "movq %%r8, 72(%0) ;" 377 | "movq 40(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ 378 | "adcq %%rax, %%r9 ;" "movq %%r9, 80(%0) ;" 379 | "adcq %%rcx, %%r10 ;" "movq %%r10, 88(%0) ;" 380 | "movq 48(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ 381 | "adcq %%rax, %%r11 ;" "movq %%r11, 96(%0) ;" 382 | "adcq %%rcx, %%r12 ;" "movq %%r12, 104(%0) ;" 383 | "movq 56(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ 384 | "adcq %%rax, %%r13 ;" "movq %%r13, 112(%0) ;" 385 | "adcq %%rcx, %%r14 ;" "movq %%r14, 120(%0) ;" 386 | : 387 | : "r" (c), "r" (a) 388 | : "memory", "cc", "%rax", "%rcx", "%rdx", 389 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14" 390 | ); 391 | #endif 392 | #else /* Without BMI2 */ 393 | /** 394 | * TODO: Multiplications using MULQ instruction. 395 | **/ 396 | #endif 397 | } 398 | 399 | /** 400 | * 401 | * @param c 402 | * @param a 403 | */ 404 | void red_EltFp25519_2w_x64(uint64_t *const c, uint64_t *const a) { 405 | #ifdef __BMI2__ 406 | #ifdef __ADX__ 407 | __asm__ __volatile__( 408 | "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ 409 | "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ "xorl %%ebx, %%ebx ;" "adox (%1), %%r8 ;" 410 | "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ "adcx %%r10, %%r9 ;" "adox 8(%1), %%r9 ;" 411 | "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ "adcx %%r11, %%r10 ;" "adox 16(%1), %%r10 ;" 412 | "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ "adcx %%rax, %%r11 ;" "adox 24(%1), %%r11 ;" 413 | /****************************************/ "adcx %%rbx, %%rcx ;" "adox %%rbx, %%rcx ;" 414 | "clc ;" 415 | "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */ 416 | "adcx %%rax, %%r8 ;" 417 | "adcx %%rcx, %%r9 ;" "movq %%r9, 8(%0) ;" 418 | "adcx %%rbx, %%r10 ;" "movq %%r10, 16(%0) ;" 419 | "adcx %%rbx, %%r11 ;" "movq %%r11, 24(%0) ;" 420 | "mov $0, %%ecx ;" 421 | "cmovc %%edx, %%ecx ;" 422 | "addq %%rcx, %%r8 ;" "movq %%r8, (%0) ;" 423 | 424 | "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ "xorl %%ebx, %%ebx ;" "adox 64(%1), %%r8 ;" 425 | "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ "adcx %%r10, %%r9 ;" "adox 72(%1), %%r9 ;" 426 | "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ "adcx %%r11, %%r10 ;" "adox 80(%1), %%r10 ;" 427 | "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ "adcx %%rax, %%r11 ;" "adox 88(%1), %%r11 ;" 428 | /*****************************************/ "adcx %%rbx, %%rcx ;" "adox %%rbx, %%rcx ;" 429 | "clc ;" 430 | "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */ 431 | "adcx %%rax, %%r8 ;" 432 | "adcx %%rcx, %%r9 ;" "movq %%r9, 40(%0) ;" 433 | "adcx %%rbx, %%r10 ;" "movq %%r10, 48(%0) ;" 434 | "adcx %%rbx, %%r11 ;" "movq %%r11, 56(%0) ;" 435 | "mov $0, %%ecx ;" 436 | "cmovc %%edx, %%ecx ;" 437 | "addq %%rcx, %%r8 ;" "movq %%r8, 32(%0) ;" 438 | : 439 | : "r" (c), "r" (a) 440 | : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11" 441 | ); 442 | #else 443 | __asm__ __volatile__( 444 | "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ 445 | "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ 446 | "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ "addq %%r10, %%r9 ;" 447 | "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ "adcq %%r11, %%r10 ;" 448 | "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ "adcq %%rax, %%r11 ;" 449 | /****************************************/ "adcq $0, %%rcx ;" 450 | "addq (%1), %%r8 ;" 451 | "adcq 8(%1), %%r9 ;" 452 | "adcq 16(%1), %%r10 ;" 453 | "adcq 24(%1), %%r11 ;" 454 | "adcq $0, %%rcx ;" 455 | "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ 456 | "addq %%rax, %%r8 ;" 457 | "adcq %%rcx, %%r9 ;" "movq %%r9, 8(%0) ;" 458 | "adcq $0, %%r10 ;" "movq %%r10, 16(%0) ;" 459 | "adcq $0, %%r11 ;" "movq %%r11, 24(%0) ;" 460 | "mov $0, %%ecx ;" 461 | "cmovc %%edx, %%ecx ;" 462 | "addq %%rcx, %%r8 ;" "movq %%r8, (%0) ;" 463 | 464 | "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ 465 | "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ "addq %%r10, %%r9 ;" 466 | "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ "adcq %%r11, %%r10 ;" 467 | "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ "adcq %%rax, %%r11 ;" 468 | /*****************************************/ "adcq $0, %%rcx ;" 469 | "addq 64(%1), %%r8 ;" 470 | "adcq 72(%1), %%r9 ;" 471 | "adcq 80(%1), %%r10 ;" 472 | "adcq 88(%1), %%r11 ;" 473 | "adcq $0, %%rcx ;" 474 | "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ 475 | "addq %%rax, %%r8 ;" 476 | "adcq %%rcx, %%r9 ;" "movq %%r9, 40(%0) ;" 477 | "adcq $0, %%r10 ;" "movq %%r10, 48(%0) ;" 478 | "adcq $0, %%r11 ;" "movq %%r11, 56(%0) ;" 479 | "mov $0, %%ecx ;" 480 | "cmovc %%edx, %%ecx ;" 481 | "addq %%rcx, %%r8 ;" "movq %%r8, 32(%0) ;" 482 | : 483 | : "r" (c), "r" (a) 484 | : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11" 485 | ); 486 | #endif 487 | #else /* Without BMI2 */ 488 | /* [TODO] */ 489 | #endif 490 | } 491 | 492 | void mul_256x256_integer_x64(uint64_t *const c, uint64_t *const a, uint64_t *const b) { 493 | #ifdef __BMI2__ 494 | #ifdef __ADX__ 495 | __asm__ __volatile__( 496 | "movq (%1), %%rdx; " /* A[0] */ 497 | "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ "xorl %%r10d, %%r10d ;" "movq %%r8, (%0) ;" 498 | "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ "adox %%r9, %%r10 ;" "movq %%r10, 8(%0) ;" 499 | "mulx 16(%2), %%r12, %%r13; " /* A[0]*B[2] */ "adox %%r11, %%r12 ;" 500 | "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ "adox %%r13, %%r14 ;" "movq $0, %%rax ;" 501 | /*******************************************/ "adox %%rdx, %%rax ;" 502 | 503 | "movq 8(%1), %%rdx; " /* A[1] */ 504 | "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ "xorl %%r10d, %%r10d ;" "adcx 8(%0), %%r8 ;" "movq %%r8, 8(%0) ;" 505 | "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ "adox %%r9, %%r10 ;" "adcx %%r12, %%r10 ;" "movq %%r10, 16(%0) ;" 506 | "mulx 16(%2), %%r12, %%r13; " /* A[1]*B[2] */ "adox %%r11, %%r12 ;" "adcx %%r14, %%r12 ;" "movq $0, %%r8 ;" 507 | "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ "adox %%r13, %%r14 ;" "adcx %%rax, %%r14 ;" "movq $0, %%rax ;" 508 | /*******************************************/ "adox %%rdx, %%rax ;" "adcx %%r8, %%rax ;" 509 | 510 | "movq 16(%1), %%rdx; " /* A[2] */ 511 | "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ "xorl %%r10d, %%r10d ;" "adcx 16(%0), %%r8 ;" "movq %%r8, 16(%0) ;" 512 | "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ "adox %%r9, %%r10 ;" "adcx %%r12, %%r10 ;" "movq %%r10, 24(%0) ;" 513 | "mulx 16(%2), %%r12, %%r13; " /* A[2]*B[2] */ "adox %%r11, %%r12 ;" "adcx %%r14, %%r12 ;" "movq $0, %%r8 ;" 514 | "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ "adox %%r13, %%r14 ;" "adcx %%rax, %%r14 ;" "movq $0, %%rax ;" 515 | /*******************************************/ "adox %%rdx, %%rax ;" "adcx %%r8, %%rax ;" 516 | 517 | "movq 24(%1), %%rdx; " /* A[3] */ 518 | "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ "xorl %%r10d, %%r10d ;" "adcx 24(%0), %%r8 ;" "movq %%r8, 24(%0) ;" 519 | "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ "adox %%r9, %%r10 ;" "adcx %%r12, %%r10 ;" "movq %%r10, 32(%0) ;" 520 | "mulx 16(%2), %%r12, %%r13; " /* A[3]*B[2] */ "adox %%r11, %%r12 ;" "adcx %%r14, %%r12 ;" "movq %%r12, 40(%0) ;" "movq $0, %%r8 ;" 521 | "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ "adox %%r13, %%r14 ;" "adcx %%rax, %%r14 ;" "movq %%r14, 48(%0) ;" "movq $0, %%rax ;" 522 | /*******************************************/ "adox %%rdx, %%rax ;" "adcx %%r8, %%rax ;" "movq %%rax, 56(%0) ;" 523 | : 524 | : "r" (c), "r" (a), "r" (b) 525 | : "memory", "cc", "%rax", "%rdx", "%r8", 526 | "%r9", "%r10", "%r11", "%r12", "%r13", "%r14" 527 | ); 528 | #else 529 | __asm__ __volatile__( 530 | "movq (%1), %%rdx; " /* A[0] */ 531 | "mulx (%2), %%r8, %%r12; " /* A[0]*B[0] */ "movq %%r8, (%0) ;" 532 | "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ "addq %%r10, %%r12 ;" 533 | "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ "adcq %%r8, %%rax ;" 534 | "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ "adcq %%r10, %%rbx ;" 535 | /*******************************************/ "adcq $0, %%rcx ;" 536 | 537 | "movq 8(%1), %%rdx; " /* A[1] */ 538 | "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ "addq %%r12, %%r8 ;" "movq %%r8, 8(%0) ;" 539 | "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ "adcq %%r10, %%r9 ;" 540 | "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ "adcq %%r8, %%r11 ;" 541 | "mulx 24(%2), %%r10, %%r12; " /* A[1]*B[3] */ "adcq %%r10, %%r13 ;" 542 | /*******************************************/ "adcq $0, %%r12 ;" 543 | 544 | "addq %%r9, %%rax ;" 545 | "adcq %%r11, %%rbx ;" 546 | "adcq %%r13, %%rcx ;" 547 | "adcq $0, %%r12 ;" 548 | 549 | "movq 16(%1), %%rdx; " /* A[2] */ 550 | "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ "addq %%rax, %%r8 ;" "movq %%r8, 16(%0) ;" 551 | "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ "adcq %%r10, %%r9 ;" 552 | "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ "adcq %%r8, %%r11 ;" 553 | "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ "adcq %%r10, %%r13 ;" 554 | /*******************************************/ "adcq $0, %%rax ;" 555 | 556 | "addq %%r9, %%rbx ;" 557 | "adcq %%r11, %%rcx ;" 558 | "adcq %%r13, %%r12 ;" 559 | "adcq $0, %%rax ;" 560 | 561 | "movq 24(%1), %%rdx; " /* A[3] */ 562 | "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ "addq %%rbx, %%r8 ;" "movq %%r8, 24(%0) ;" 563 | "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ "adcq %%r10, %%r9 ;" 564 | "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ "adcq %%r8, %%r11 ;" 565 | "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ "adcq %%r10, %%r13 ;" 566 | /*******************************************/ "adcq $0, %%rbx ;" 567 | 568 | "addq %%r9, %%rcx ;" "movq %%rcx, 32(%0) ;" 569 | "adcq %%r11, %%r12 ;" "movq %%r12, 40(%0) ;" 570 | "adcq %%r13, %%rax ;" "movq %%rax, 48(%0) ;" 571 | "adcq $0, %%rbx ;" "movq %%rbx, 56(%0) ;" 572 | : 573 | : "r" (c), "r" (a), "r" (b) 574 | : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", 575 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13" 576 | ); 577 | #endif 578 | #else /* Without BMI2 */ 579 | /** 580 | * TODO: Multiplications using MULQ instruction. 581 | **/ 582 | #endif 583 | } 584 | 585 | void sqr_256x256_integer_x64(uint64_t *const c, uint64_t *const a) { 586 | #ifdef __BMI2__ 587 | #ifdef __ADX__ 588 | __asm__ __volatile__( 589 | "movq (%1), %%rdx ;" /* A[0] */ 590 | "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ "xorl %%r15d, %%r15d;" 591 | "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ "adcx %%r14, %%r9 ;" 592 | "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ "adcx %%rax, %%r10 ;" 593 | "movq 24(%1), %%rdx ;" /* A[3] */ 594 | "mulx 8(%1), %%r11, %%r12 ;" /* A[1]*A[3] */ "adcx %%rcx, %%r11 ;" 595 | "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ "adcx %%rax, %%r12 ;" 596 | "movq 8(%1), %%rdx ;" /* A[1] */ "adcx %%r15, %%r13 ;" 597 | "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ "movq $0, %%r14 ;" 598 | /*******************************************/ "adcx %%r15, %%r14 ;" 599 | 600 | "xorl %%r15d, %%r15d;" 601 | "adox %%rax, %%r10 ;" "adcx %%r8, %%r8 ;" 602 | "adox %%rcx, %%r11 ;" "adcx %%r9, %%r9 ;" 603 | "adox %%r15, %%r12 ;" "adcx %%r10, %%r10 ;" 604 | "adox %%r15, %%r13 ;" "adcx %%r11, %%r11 ;" 605 | "adox %%r15, %%r14 ;" "adcx %%r12, %%r12 ;" 606 | "adcx %%r13, %%r13 ;" 607 | "adcx %%r14, %%r14 ;" 608 | 609 | "movq (%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ 610 | /********************/ "movq %%rax, 0(%0) ;" 611 | "addq %%rcx, %%r8 ;" "movq %%r8, 8(%0) ;" 612 | "movq 8(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ 613 | "adcq %%rax, %%r9 ;" "movq %%r9, 16(%0) ;" 614 | "adcq %%rcx, %%r10 ;" "movq %%r10, 24(%0) ;" 615 | "movq 16(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ 616 | "adcq %%rax, %%r11 ;" "movq %%r11, 32(%0) ;" 617 | "adcq %%rcx, %%r12 ;" "movq %%r12, 40(%0) ;" 618 | "movq 24(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ 619 | "adcq %%rax, %%r13 ;" "movq %%r13, 48(%0) ;" 620 | "adcq %%rcx, %%r14 ;" "movq %%r14, 56(%0) ;" 621 | : 622 | : "r" (c), "r" (a) 623 | : "memory", "cc", "%rax", "%rcx", "%rdx", 624 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" 625 | ); 626 | #else /* Without ADX */ 627 | __asm__ __volatile__( 628 | "movq 8(%1), %%rdx ;" /* A[1] */ 629 | "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ 630 | "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ 631 | "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ 632 | 633 | "movq 16(%1), %%rdx ;" /* A[2] */ 634 | "mulx 24(%1), %%r12, %%r13 ;" /* A[3]*A[2] */ 635 | "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ 636 | 637 | "addq %%rax, %%r9 ;" 638 | "adcq %%rdx, %%r10 ;" 639 | "adcq %%rcx, %%r11 ;" 640 | "adcq %%r14, %%r12 ;" 641 | "adcq $0, %%r13 ;" 642 | "movq $0, %%r14 ;" 643 | "adcq $0, %%r14 ;" 644 | 645 | "movq (%1), %%rdx ;" /* A[0] */ 646 | "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ 647 | 648 | "addq %%rax, %%r10 ;" 649 | "adcq %%rcx, %%r11 ;" 650 | "adcq $0, %%r12 ;" 651 | "adcq $0, %%r13 ;" 652 | "adcq $0, %%r14 ;" 653 | 654 | "shldq $1, %%r13, %%r14 ;" 655 | "shldq $1, %%r12, %%r13 ;" 656 | "shldq $1, %%r11, %%r12 ;" 657 | "shldq $1, %%r10, %%r11 ;" 658 | "shldq $1, %%r9, %%r10 ;" 659 | "shldq $1, %%r8, %%r9 ;" 660 | "shlq $1, %%r8 ;" 661 | 662 | /********************/ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ 663 | /********************/ "movq %%rax, 0(%0) ;" 664 | "addq %%rcx, %%r8 ;" "movq %%r8, 8(%0) ;" 665 | "movq 8(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ 666 | "adcq %%rax, %%r9 ;" "movq %%r9, 16(%0) ;" 667 | "adcq %%rcx, %%r10 ;" "movq %%r10, 24(%0) ;" 668 | "movq 16(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ 669 | "adcq %%rax, %%r11 ;" "movq %%r11, 32(%0) ;" 670 | "adcq %%rcx, %%r12 ;" "movq %%r12, 40(%0) ;" 671 | "movq 24(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ 672 | "adcq %%rax, %%r13 ;" "movq %%r13, 48(%0) ;" 673 | "adcq %%rcx, %%r14 ;" "movq %%r14, 56(%0) ;" 674 | : 675 | : "r" (c), "r" (a) 676 | : "memory", "cc", "%rax", "%rcx", "%rdx", 677 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14" 678 | ); 679 | #endif 680 | #else /* Without BMI2 */ 681 | /** 682 | * TODO: Multiplications using MULQ instruction. 683 | **/ 684 | #endif 685 | } 686 | 687 | void red_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a) { 688 | #ifdef __BMI2__ 689 | #ifdef __ADX__ 690 | __asm__ __volatile__( 691 | "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ 692 | "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ "xorl %%ebx, %%ebx ;" "adox (%1), %%r8 ;" 693 | "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ "adcx %%r10, %%r9 ;" "adox 8(%1), %%r9 ;" 694 | "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ "adcx %%r11, %%r10 ;" "adox 16(%1), %%r10 ;" 695 | "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ "adcx %%rax, %%r11 ;" "adox 24(%1), %%r11 ;" 696 | /****************************************/ "adcx %%rbx, %%rcx ;" "adox %%rbx, %%rcx ;" 697 | "clc ;" 698 | "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ 699 | "adcx %%rax, %%r8 ;" 700 | "adcx %%rcx, %%r9 ;" "movq %%r9, 8(%0) ;" 701 | "adcx %%rbx, %%r10 ;" "movq %%r10, 16(%0) ;" 702 | "adcx %%rbx, %%r11 ;" "movq %%r11, 24(%0) ;" 703 | "mov $0, %%ecx ;" 704 | "cmovc %%edx, %%ecx ;" 705 | "addq %%rcx, %%r8 ;" "movq %%r8, (%0) ;" 706 | : 707 | : "r" (c), "r" (a) 708 | : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11" 709 | ); 710 | #else 711 | __asm__ __volatile__( 712 | "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ 713 | "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ 714 | "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ "addq %%r10, %%r9 ;" 715 | "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ "adcq %%r11, %%r10 ;" 716 | "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ "adcq %%rax, %%r11 ;" 717 | /****************************************/ "adcq $0, %%rcx ;" 718 | "addq (%1), %%r8 ;" 719 | "adcq 8(%1), %%r9 ;" 720 | "adcq 16(%1), %%r10 ;" 721 | "adcq 24(%1), %%r11 ;" 722 | "adcq $0, %%rcx ;" 723 | "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ 724 | "addq %%rax, %%r8 ;" 725 | "adcq %%rcx, %%r9 ;" "movq %%r9, 8(%0) ;" 726 | "adcq $0, %%r10 ;" "movq %%r10, 16(%0) ;" 727 | "adcq $0, %%r11 ;" "movq %%r11, 24(%0) ;" 728 | "mov $0, %%ecx ;" 729 | "cmovc %%edx, %%ecx ;" 730 | "addq %%rcx, %%r8 ;" "movq %%r8, (%0) ;" 731 | : 732 | : "r" (c), "r" (a) 733 | : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11" 734 | ); 735 | #endif 736 | #else /* Without BMI2 */ 737 | /** 738 | * TODO: Multiplications using MULQ instruction. 739 | **/ 740 | #endif 741 | } 742 | 743 | inline void add_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a, uint64_t *const b) { 744 | #ifdef __ADX__ 745 | __asm__ __volatile__( 746 | "mov $38, %%eax ;" 747 | "xorl %%ecx, %%ecx ;" 748 | "movq (%2), %%r8 ;" "adcx (%1), %%r8 ;" 749 | "movq 8(%2), %%r9 ;" "adcx 8(%1), %%r9 ;" 750 | "movq 16(%2), %%r10 ;" "adcx 16(%1), %%r10 ;" 751 | "movq 24(%2), %%r11 ;" "adcx 24(%1), %%r11 ;" 752 | "cmovc %%eax, %%ecx ;" 753 | "xorl %%eax, %%eax ;" 754 | "adcx %%rcx, %%r8 ;" 755 | "adcx %%rax, %%r9 ;" "movq %%r9, 8(%0) ;" 756 | "adcx %%rax, %%r10 ;" "movq %%r10, 16(%0) ;" 757 | "adcx %%rax, %%r11 ;" "movq %%r11, 24(%0) ;" 758 | "mov $38, %%ecx ;" 759 | "cmovc %%ecx, %%eax ;" 760 | "addq %%rax, %%r8 ;" "movq %%r8, (%0) ;" 761 | : 762 | : "r" (c), "r" (a), "r" (b) 763 | : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11" 764 | ); 765 | #else 766 | __asm__ __volatile__( 767 | "mov $38, %%eax ;" 768 | "movq (%2), %%r8 ;" "addq (%1), %%r8 ;" 769 | "movq 8(%2), %%r9 ;" "adcq 8(%1), %%r9 ;" 770 | "movq 16(%2), %%r10 ;" "adcq 16(%1), %%r10 ;" 771 | "movq 24(%2), %%r11 ;" "adcq 24(%1), %%r11 ;" 772 | "mov $0, %%ecx ;" 773 | "cmovc %%eax, %%ecx ;" 774 | "addq %%rcx, %%r8 ;" 775 | "adcq $0, %%r9 ;" "movq %%r9, 8(%0) ;" 776 | "adcq $0, %%r10 ;" "movq %%r10, 16(%0) ;" 777 | "adcq $0, %%r11 ;" "movq %%r11, 24(%0) ;" 778 | "mov $0, %%ecx ;" 779 | "cmovc %%eax, %%ecx ;" 780 | "addq %%rcx, %%r8 ;" "movq %%r8, (%0) ;" 781 | : 782 | : "r" (c), "r" (a), "r" (b) 783 | : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11" 784 | ); 785 | #endif 786 | } 787 | 788 | inline void sub_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a, uint64_t *const b) { 789 | __asm__ __volatile__( 790 | "mov $38, %%eax ;" 791 | "movq (%1), %%r8 ;" "subq (%2), %%r8 ;" 792 | "movq 8(%1), %%r9 ;" "sbbq 8(%2), %%r9 ;" 793 | "movq 16(%1), %%r10 ;" "sbbq 16(%2), %%r10 ;" 794 | "movq 24(%1), %%r11 ;" "sbbq 24(%2), %%r11 ;" 795 | "mov $0, %%ecx ;" 796 | "cmovc %%eax, %%ecx ;" 797 | "subq %%rcx, %%r8 ;" 798 | "sbbq $0, %%r9 ;" "movq %%r9, 8(%0) ;" 799 | "sbbq $0, %%r10 ;" "movq %%r10, 16(%0) ;" 800 | "sbbq $0, %%r11 ;" "movq %%r11, 24(%0) ;" 801 | "mov $0, %%ecx ;" 802 | "cmovc %%eax, %%ecx ;" 803 | "subq %%rcx, %%r8 ;" "movq %%r8, (%0) ;" 804 | : 805 | : "r" (c), "r" (a), "r" (b) 806 | : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11" 807 | ); 808 | } 809 | 810 | /** 811 | * Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 812 | **/ 813 | inline void mul_a24_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a) { 814 | #ifdef __BMI2__ 815 | const uint64_t a24 = 121666; 816 | __asm__ __volatile__( 817 | "movq %2, %%rdx ;" 818 | "mulx (%1), %%r8, %%r10 ;" 819 | "mulx 8(%1), %%r9, %%r11 ;" "addq %%r10, %%r9 ;" 820 | "mulx 16(%1), %%r10, %%rax ;" "adcq %%r11, %%r10 ;" 821 | "mulx 24(%1), %%r11, %%rcx ;" "adcq %%rax, %%r11 ;" 822 | /***************************/ "adcq $0, %%rcx ;" 823 | "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ 824 | "mulx %%rcx, %%rax, %%rcx ;" 825 | "addq %%rax, %%r8 ;" 826 | "adcq %%rcx, %%r9 ;" "movq %%r9, 8(%0) ;" 827 | "adcq $0, %%r10 ;" "movq %%r10, 16(%0) ;" 828 | "adcq $0, %%r11 ;" "movq %%r11, 24(%0) ;" 829 | "mov $0, %%ecx ;" 830 | "cmovc %%edx, %%ecx ;" 831 | "addq %%rcx, %%r8 ;" "movq %%r8, (%0) ;" 832 | : 833 | : "r" (c), "r" (a), "r" (a24) 834 | : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11" 835 | ); 836 | #else /* Without BMI2 */ 837 | /** 838 | * TODO: Multiplications using MULQ instruction. 839 | **/ 840 | #endif 841 | } 842 | 843 | void inv_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a) { 844 | #define sqrn_EltFp25519_1w_x64(A, times)\ 845 | counter = times;\ 846 | while ( counter-- > 0) {\ 847 | sqr_EltFp25519_1w_x64(A);\ 848 | } 849 | 850 | EltFp25519_1w_Buffer_x64 buffer_1w; 851 | EltFp25519_1w_x64 x0, x1, x2; 852 | uint64_t * T[5]; 853 | uint64_t counter; 854 | 855 | T[0] = x0; 856 | T[1] = c; /* x^(-1) */ 857 | T[2] = x1; 858 | T[3] = x2; 859 | T[4] = a; /* x */ 860 | 861 | copy_EltFp25519_1w_x64(T[1], a); 862 | sqrn_EltFp25519_1w_x64(T[1], 1); 863 | copy_EltFp25519_1w_x64(T[2], T[1]); 864 | sqrn_EltFp25519_1w_x64(T[2], 2); 865 | mul_EltFp25519_1w_x64(T[0], a, T[2]); 866 | mul_EltFp25519_1w_x64(T[1], T[1], T[0]); 867 | copy_EltFp25519_1w_x64(T[2], T[1]); 868 | sqrn_EltFp25519_1w_x64(T[2], 1); 869 | mul_EltFp25519_1w_x64(T[0], T[0], T[2]); 870 | copy_EltFp25519_1w_x64(T[2], T[0]); 871 | sqrn_EltFp25519_1w_x64(T[2], 5); 872 | mul_EltFp25519_1w_x64(T[0], T[0], T[2]); 873 | copy_EltFp25519_1w_x64(T[2], T[0]); 874 | sqrn_EltFp25519_1w_x64(T[2], 10); 875 | mul_EltFp25519_1w_x64(T[2], T[2], T[0]); 876 | copy_EltFp25519_1w_x64(T[3], T[2]); 877 | sqrn_EltFp25519_1w_x64(T[3], 20); 878 | mul_EltFp25519_1w_x64(T[3], T[3], T[2]); 879 | sqrn_EltFp25519_1w_x64(T[3], 10); 880 | mul_EltFp25519_1w_x64(T[3], T[3], T[0]); 881 | copy_EltFp25519_1w_x64(T[0], T[3]); 882 | sqrn_EltFp25519_1w_x64(T[0], 50); 883 | mul_EltFp25519_1w_x64(T[0], T[0], T[3]); 884 | copy_EltFp25519_1w_x64(T[2], T[0]); 885 | sqrn_EltFp25519_1w_x64(T[2], 100); 886 | mul_EltFp25519_1w_x64(T[2], T[2], T[0]); 887 | sqrn_EltFp25519_1w_x64(T[2], 50); 888 | mul_EltFp25519_1w_x64(T[2], T[2], T[3]); 889 | sqrn_EltFp25519_1w_x64(T[2], 5); 890 | mul_EltFp25519_1w_x64(T[1], T[1], T[2]); 891 | #undef sqrn_EltFp25519_1w_x64 892 | } 893 | 894 | /** 895 | * Given C, a 256-bit number, fred_EltFp25519_1w_x64 updates C 896 | * with a number such that 0 <= C < 2**255-19. 897 | * Contributed by: Samuel Neves. 898 | **/ 899 | inline void fred_EltFp25519_1w_x64(uint64_t *const c) { 900 | __asm__ __volatile__ ( 901 | /* First, obtains a number less than 2^255. */ 902 | "btrq $63, 24(%0) ;" 903 | "sbbl %%ecx, %%ecx ;" 904 | "andq $19, %%rcx ;" 905 | "addq %%rcx, (%0) ;" 906 | "adcq $0, 8(%0) ;" 907 | "adcq $0, 16(%0) ;" 908 | "adcq $0, 24(%0) ;" 909 | 910 | "btrq $63, 24(%0) ;" 911 | "sbbl %%ecx, %%ecx ;" 912 | "andq $19, %%rcx ;" 913 | "addq %%rcx, (%0) ;" 914 | "adcq $0, 8(%0) ;" 915 | "adcq $0, 16(%0) ;" 916 | "adcq $0, 24(%0) ;" 917 | 918 | /* Then, in case the number fall into [2^255-19, 2^255-1] */ 919 | "cmpq $-19, (%0) ;" 920 | "setaeb %%al ;" 921 | "cmpq $-1, 8(%0) ;" 922 | "setzb %%bl ;" 923 | "cmpq $-1, 16(%0) ;" 924 | "setzb %%cl ;" 925 | "movq 24(%0), %%rdx ;" 926 | "addq $1, %%rdx ;" 927 | "shrq $63, %%rdx ;" 928 | "andb %%bl, %%al ;" 929 | "andb %%dl, %%cl ;" 930 | "test %%cl, %%al ;" 931 | "movl $0, %%eax ;" 932 | "movl $19, %%ecx ;" 933 | "cmovnz %%rcx, %%rax ;" 934 | "addq %%rax, (%0) ;" 935 | "adcq $0, 8(%0) ;" 936 | "adcq $0, 16(%0) ;" 937 | "adcq $0, 24(%0) ;" 938 | "btrq $63, 24(%0) ;" 939 | : 940 | : "r"(c) 941 | : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx" 942 | ); 943 | } 944 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/fp25519_x64.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2017, Armando Faz . All rights reserved. 3 | * Institute of Computing. 4 | * University of Campinas, Brazil. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions 8 | * are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * * Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * * Neither the name of University of Campinas nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef FP25519_X64_H 35 | #define FP25519_X64_H 36 | 37 | #include 38 | 39 | #ifndef ALIGN_BYTES 40 | #define ALIGN_BYTES 32 41 | #endif 42 | 43 | #ifndef ALIGN 44 | #ifdef __INTEL_COMPILER 45 | #define ALIGN __declspec(align(ALIGN_BYTES)) 46 | #else 47 | #define ALIGN __attribute__((aligned(ALIGN_BYTES))) 48 | #endif 49 | #endif 50 | 51 | #define SIZE_BYTES_FP25519 32 52 | #define NUM_WORDS_ELTFP25519_X64 4 53 | typedef ALIGN uint64_t EltFp25519_1w_x64[NUM_WORDS_ELTFP25519_X64]; 54 | typedef ALIGN uint64_t EltFp25519_1w_Buffer_x64[2 * NUM_WORDS_ELTFP25519_X64]; 55 | typedef ALIGN uint64_t EltFp25519_2w_x64[2 * NUM_WORDS_ELTFP25519_X64]; 56 | typedef ALIGN uint64_t EltFp25519_2w_Buffer_x64[4 * NUM_WORDS_ELTFP25519_X64]; 57 | 58 | #ifdef __cplusplus 59 | extern "C" { 60 | #endif 61 | 62 | /* Integer Arithmetic */ 63 | void mul2_256x256_integer_x64(uint64_t *const c, uint64_t *const a, 64 | uint64_t *const b); 65 | 66 | void sqr2_256x256_integer_x64(uint64_t *const c, uint64_t *const a); 67 | 68 | void red_EltFp25519_2w_x64(uint64_t *const c, uint64_t *const a); 69 | 70 | void mul_256x256_integer_x64(uint64_t *const c, uint64_t *const a, 71 | uint64_t *const b); 72 | 73 | void sqr_256x256_integer_x64(uint64_t *const c, uint64_t *const a); 74 | 75 | void red_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a); 76 | 77 | /* Prime Field Arithmetic */ 78 | void add_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a, 79 | uint64_t *const b); 80 | 81 | void sub_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a, 82 | uint64_t *const b); 83 | 84 | void mul_a24_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a); 85 | 86 | void inv_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a); 87 | 88 | void fred_EltFp25519_1w_x64(uint64_t *const c); 89 | 90 | #ifdef __cplusplus 91 | } 92 | #endif 93 | 94 | #define mul_EltFp25519_1w_x64(c, a, b) \ 95 | mul_256x256_integer_x64(buffer_1w, a, b); \ 96 | red_EltFp25519_1w_x64(c, buffer_1w); 97 | 98 | #define sqr_EltFp25519_1w_x64(a) \ 99 | sqr_256x256_integer_x64(buffer_1w, a); \ 100 | red_EltFp25519_1w_x64(a, buffer_1w); 101 | 102 | #define mul_EltFp25519_2w_x64(c, a, b) \ 103 | mul2_256x256_integer_x64(buffer_2w, a, b); \ 104 | red_EltFp25519_2w_x64(c, buffer_2w); 105 | 106 | #define sqr_EltFp25519_2w_x64(a) \ 107 | sqr2_256x256_integer_x64(buffer_2w, a); \ 108 | red_EltFp25519_2w_x64(a, buffer_2w); 109 | 110 | #define copy_EltFp25519_1w_x64(C, A) \ 111 | (C)[0] = (A)[0]; \ 112 | (C)[1] = (A)[1]; \ 113 | (C)[2] = (A)[2]; \ 114 | (C)[3] = (A)[3]; 115 | 116 | #define setzero_EltFp25519_1w_x64(C) \ 117 | (C)[0] = 0; \ 118 | (C)[1] = 0; \ 119 | (C)[2] = 0; \ 120 | (C)[3] = 0; 121 | 122 | #endif /* FP25519_X64_H */ 123 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/table_ladder_x25519.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2017, Armando Faz . All rights reserved. 3 | * Institute of Computing. 4 | * University of Campinas, Brazil. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions 8 | * are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * * Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * * Neither the name of University of Campinas nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef TABLE_LADDER_8K_H 35 | #define TABLE_LADDER_8K_H 36 | #include 37 | 38 | ALIGN static const uint64_t Table_Ladder_8k[252 * NUM_WORDS_ELTFP25519_X64] = { 39 | /* 1 */ 0xfffffffffffffff3, 0xffffffffffffffff, 40 | 0xffffffffffffffff, 0x5fffffffffffffff, 41 | /* 2 */ 0x6b8220f416aafe96, 0x82ebeb2b4f566a34, 42 | 0xd5a9a5b075a5950f, 0x5142b2cf4b2488f4, 43 | /* 3 */ 0x6aaebc750069680c, 0x89cf7820a0f99c41, 44 | 0x2a58d9183b56d0f4, 0x4b5aca80e36011a4, 45 | /* 4 */ 0x329132348c29745d, 0xf4a2e616e1642fd7, 46 | 0x1e45bb03ff67bc34, 0x306912d0f42a9b4a, 47 | /* 5 */ 0xff886507e6af7154, 0x04f50e13dfeec82f, 48 | 0xaa512fe82abab5ce, 0x174e251a68d5f222, 49 | /* 6 */ 0xcf96700d82028898, 0x1743e3370a2c02c5, 50 | 0x379eec98b4e86eaa, 0x0c59888a51e0482e, 51 | /* 7 */ 0xfbcbf1d699b5d189, 0xacaef0d58e9fdc84, 52 | 0xc1c20d06231f7614, 0x2938218da274f972, 53 | /* 8 */ 0xf6af49beff1d7f18, 0xcc541c22387ac9c2, 54 | 0x96fcc9ef4015c56b, 0x69c1627c690913a9, 55 | /* 9 */ 0x7a86fd2f4733db0e, 0xfdb8c4f29e087de9, 56 | 0x095e4b1a8ea2a229, 0x1ad7a7c829b37a79, 57 | /* 10 */ 0x342d89cad17ea0c0, 0x67bedda6cced2051, 58 | 0x19ca31bf2bb42f74, 0x3df7b4c84980acbb, 59 | /* 11 */ 0xa8c6444dc80ad883, 0xb91e440366e3ab85, 60 | 0xc215cda00164f6d8, 0x3d867c6ef247e668, 61 | /* 12 */ 0xc7dd582bcc3e658c, 0xfd2c4748ee0e5528, 62 | 0xa0fd9b95cc9f4f71, 0x7529d871b0675ddf, 63 | /* 13 */ 0xb8f568b42d3cbd78, 0x1233011b91f3da82, 64 | 0x2dce6ccd4a7c3b62, 0x75e7fc8e9e498603, 65 | /* 14 */ 0x2f4f13f1fcd0b6ec, 0xf1a8ca1f29ff7a45, 66 | 0xc249c1a72981e29b, 0x6ebe0dbb8c83b56a, 67 | /* 15 */ 0x7114fa8d170bb222, 0x65a2dcd5bf93935f, 68 | 0xbdc41f68b59c979a, 0x2f0eef79a2ce9289, 69 | /* 16 */ 0x42ecbf0c083c37ce, 0x2930bc09ec496322, 70 | 0xf294b0c19cfeac0d, 0x3780aa4bedfabb80, 71 | /* 17 */ 0x56c17d3e7cead929, 0xe7cb4beb2e5722c5, 72 | 0x0ce931732dbfe15a, 0x41b883c7621052f8, 73 | /* 18 */ 0xdbf75ca0c3d25350, 0x2936be086eb1e351, 74 | 0xc936e03cb4a9b212, 0x1d45bf82322225aa, 75 | /* 19 */ 0xe81ab1036a024cc5, 0xe212201c304c9a72, 76 | 0xc5d73fba6832b1fc, 0x20ffdb5a4d839581, 77 | /* 20 */ 0xa283d367be5d0fad, 0x6c2b25ca8b164475, 78 | 0x9d4935467caaf22e, 0x5166408eee85ff49, 79 | /* 21 */ 0x3c67baa2fab4e361, 0xb3e433c67ef35cef, 80 | 0x5259729241159b1c, 0x6a621892d5b0ab33, 81 | /* 22 */ 0x20b74a387555cdcb, 0x532aa10e1208923f, 82 | 0xeaa17b7762281dd1, 0x61ab3443f05c44bf, 83 | /* 23 */ 0x257a6c422324def8, 0x131c6c1017e3cf7f, 84 | 0x23758739f630a257, 0x295a407a01a78580, 85 | /* 24 */ 0xf8c443246d5da8d9, 0x19d775450c52fa5d, 86 | 0x2afcfc92731bf83d, 0x7d10c8e81b2b4700, 87 | /* 25 */ 0xc8e0271f70baa20b, 0x993748867ca63957, 88 | 0x5412efb3cb7ed4bb, 0x3196d36173e62975, 89 | /* 26 */ 0xde5bcad141c7dffc, 0x47cc8cd2b395c848, 90 | 0xa34cd942e11af3cb, 0x0256dbf2d04ecec2, 91 | /* 27 */ 0x875ab7e94b0e667f, 0xcad4dd83c0850d10, 92 | 0x47f12e8f4e72c79f, 0x5f1a87bb8c85b19b, 93 | /* 28 */ 0x7ae9d0b6437f51b8, 0x12c7ce5518879065, 94 | 0x2ade09fe5cf77aee, 0x23a05a2f7d2c5627, 95 | /* 29 */ 0x5908e128f17c169a, 0xf77498dd8ad0852d, 96 | 0x74b4c4ceab102f64, 0x183abadd10139845, 97 | /* 30 */ 0xb165ba8daa92aaac, 0xd5c5ef9599386705, 98 | 0xbe2f8f0cf8fc40d1, 0x2701e635ee204514, 99 | /* 31 */ 0x629fa80020156514, 0xf223868764a8c1ce, 100 | 0x5b894fff0b3f060e, 0x60d9944cf708a3fa, 101 | /* 32 */ 0xaeea001a1c7a201f, 0xebf16a633ee2ce63, 102 | 0x6f7709594c7a07e1, 0x79b958150d0208cb, 103 | /* 33 */ 0x24b55e5301d410e7, 0xe3a34edff3fdc84d, 104 | 0xd88768e4904032d8, 0x131384427b3aaeec, 105 | /* 34 */ 0x8405e51286234f14, 0x14dc4739adb4c529, 106 | 0xb8a2b5b250634ffd, 0x2fe2a94ad8a7ff93, 107 | /* 35 */ 0xec5c57efe843fadd, 0x2843ce40f0bb9918, 108 | 0xa4b561d6cf3d6305, 0x743629bde8fb777e, 109 | /* 36 */ 0x343edd46bbaf738f, 0xed981828b101a651, 110 | 0xa401760b882c797a, 0x1fc223e28dc88730, 111 | /* 37 */ 0x48604e91fc0fba0e, 0xb637f78f052c6fa4, 112 | 0x91ccac3d09e9239c, 0x23f7eed4437a687c, 113 | /* 38 */ 0x5173b1118d9bd800, 0x29d641b63189d4a7, 114 | 0xfdbf177988bbc586, 0x2959894fcad81df5, 115 | /* 39 */ 0xaebc8ef3b4bbc899, 0x4148995ab26992b9, 116 | 0x24e20b0134f92cfb, 0x40d158894a05dee8, 117 | /* 40 */ 0x46b00b1185af76f6, 0x26bac77873187a79, 118 | 0x3dc0bf95ab8fff5f, 0x2a608bd8945524d7, 119 | /* 41 */ 0x26449588bd446302, 0x7c4bc21c0388439c, 120 | 0x8e98a4f383bd11b2, 0x26218d7bc9d876b9, 121 | /* 42 */ 0xe3081542997c178a, 0x3c2d29a86fb6606f, 122 | 0x5c217736fa279374, 0x7dde05734afeb1fa, 123 | /* 43 */ 0x3bf10e3906d42bab, 0xe4f7803e1980649c, 124 | 0xe6053bf89595bf7a, 0x394faf38da245530, 125 | /* 44 */ 0x7a8efb58896928f4, 0xfbc778e9cc6a113c, 126 | 0x72670ce330af596f, 0x48f222a81d3d6cf7, 127 | /* 45 */ 0xf01fce410d72caa7, 0x5a20ecc7213b5595, 128 | 0x7bc21165c1fa1483, 0x07f89ae31da8a741, 129 | /* 46 */ 0x05d2c2b4c6830ff9, 0xd43e330fc6316293, 130 | 0xa5a5590a96d3a904, 0x705edb91a65333b6, 131 | /* 47 */ 0x048ee15e0bb9a5f7, 0x3240cfca9e0aaf5d, 132 | 0x8f4b71ceedc4a40b, 0x621c0da3de544a6d, 133 | /* 48 */ 0x92872836a08c4091, 0xce8375b010c91445, 134 | 0x8a72eb524f276394, 0x2667fcfa7ec83635, 135 | /* 49 */ 0x7f4c173345e8752a, 0x061b47feee7079a5, 136 | 0x25dd9afa9f86ff34, 0x3780cef5425dc89c, 137 | /* 50 */ 0x1a46035a513bb4e9, 0x3e1ef379ac575ada, 138 | 0xc78c5f1c5fa24b50, 0x321a967634fd9f22, 139 | /* 51 */ 0x946707b8826e27fa, 0x3dca84d64c506fd0, 140 | 0xc189218075e91436, 0x6d9284169b3b8484, 141 | /* 52 */ 0x3a67e840383f2ddf, 0x33eec9a30c4f9b75, 142 | 0x3ec7c86fa783ef47, 0x26ec449fbac9fbc4, 143 | /* 53 */ 0x5c0f38cba09b9e7d, 0x81168cc762a3478c, 144 | 0x3e23b0d306fc121c, 0x5a238aa0a5efdcdd, 145 | /* 54 */ 0x1ba26121c4ea43ff, 0x36f8c77f7c8832b5, 146 | 0x88fbea0b0adcf99a, 0x5ca9938ec25bebf9, 147 | /* 55 */ 0xd5436a5e51fccda0, 0x1dbc4797c2cd893b, 148 | 0x19346a65d3224a08, 0x0f5034e49b9af466, 149 | /* 56 */ 0xf23c3967a1e0b96e, 0xe58b08fa867a4d88, 150 | 0xfb2fabc6a7341679, 0x2a75381eb6026946, 151 | /* 57 */ 0xc80a3be4c19420ac, 0x66b1f6c681f2b6dc, 152 | 0x7cf7036761e93388, 0x25abbbd8a660a4c4, 153 | /* 58 */ 0x91ea12ba14fd5198, 0x684950fc4a3cffa9, 154 | 0xf826842130f5ad28, 0x3ea988f75301a441, 155 | /* 59 */ 0xc978109a695f8c6f, 0x1746eb4a0530c3f3, 156 | 0x444d6d77b4459995, 0x75952b8c054e5cc7, 157 | /* 60 */ 0xa3703f7915f4d6aa, 0x66c346202f2647d8, 158 | 0xd01469df811d644b, 0x77fea47d81a5d71f, 159 | /* 61 */ 0xc5e9529ef57ca381, 0x6eeeb4b9ce2f881a, 160 | 0xb6e91a28e8009bd6, 0x4b80be3e9afc3fec, 161 | /* 62 */ 0x7e3773c526aed2c5, 0x1b4afcb453c9a49d, 162 | 0xa920bdd7baffb24d, 0x7c54699f122d400e, 163 | /* 63 */ 0xef46c8e14fa94bc8, 0xe0b074ce2952ed5e, 164 | 0xbea450e1dbd885d5, 0x61b68649320f712c, 165 | /* 64 */ 0x8a485f7309ccbdd1, 0xbd06320d7d4d1a2d, 166 | 0x25232973322dbef4, 0x445dc4758c17f770, 167 | /* 65 */ 0xdb0434177cc8933c, 0xed6fe82175ea059f, 168 | 0x1efebefdc053db34, 0x4adbe867c65daf99, 169 | /* 66 */ 0x3acd71a2a90609df, 0xe5e991856dd04050, 170 | 0x1ec69b688157c23c, 0x697427f6885cfe4d, 171 | /* 67 */ 0xd7be7b9b65e1a851, 0xa03d28d522c536dd, 172 | 0x28399d658fd2b645, 0x49e5b7e17c2641e1, 173 | /* 68 */ 0x6f8c3a98700457a4, 0x5078f0a25ebb6778, 174 | 0xd13c3ccbc382960f, 0x2e003258a7df84b1, 175 | /* 69 */ 0x8ad1f39be6296a1c, 0xc1eeaa652a5fbfb2, 176 | 0x33ee0673fd26f3cb, 0x59256173a69d2ccc, 177 | /* 70 */ 0x41ea07aa4e18fc41, 0xd9fc19527c87a51e, 178 | 0xbdaacb805831ca6f, 0x445b652dc916694f, 179 | /* 71 */ 0xce92a3a7f2172315, 0x1edc282de11b9964, 180 | 0xa1823aafe04c314a, 0x790a2d94437cf586, 181 | /* 72 */ 0x71c447fb93f6e009, 0x8922a56722845276, 182 | 0xbf70903b204f5169, 0x2f7a89891ba319fe, 183 | /* 73 */ 0x02a08eb577e2140c, 0xed9a4ed4427bdcf4, 184 | 0x5253ec44e4323cd1, 0x3e88363c14e9355b, 185 | /* 74 */ 0xaa66c14277110b8c, 0x1ae0391610a23390, 186 | 0x2030bd12c93fc2a2, 0x3ee141579555c7ab, 187 | /* 75 */ 0x9214de3a6d6e7d41, 0x3ccdd88607f17efe, 188 | 0x674f1288f8e11217, 0x5682250f329f93d0, 189 | /* 76 */ 0x6cf00b136d2e396e, 0x6e4cf86f1014debf, 190 | 0x5930b1b5bfcc4e83, 0x047069b48aba16b6, 191 | /* 77 */ 0x0d4ce4ab69b20793, 0xb24db91a97d0fb9e, 192 | 0xcdfa50f54e00d01d, 0x221b1085368bddb5, 193 | /* 78 */ 0xe7e59468b1e3d8d2, 0x53c56563bd122f93, 194 | 0xeee8a903e0663f09, 0x61efa662cbbe3d42, 195 | /* 79 */ 0x2cf8ddddde6eab2a, 0x9bf80ad51435f231, 196 | 0x5deadacec9f04973, 0x29275b5d41d29b27, 197 | /* 80 */ 0xcfde0f0895ebf14f, 0xb9aab96b054905a7, 198 | 0xcae80dd9a1c420fd, 0x0a63bf2f1673bbc7, 199 | /* 81 */ 0x092f6e11958fbc8c, 0x672a81e804822fad, 200 | 0xcac8351560d52517, 0x6f3f7722c8f192f8, 201 | /* 82 */ 0xf8ba90ccc2e894b7, 0x2c7557a438ff9f0d, 202 | 0x894d1d855ae52359, 0x68e122157b743d69, 203 | /* 83 */ 0xd87e5570cfb919f3, 0x3f2cdecd95798db9, 204 | 0x2121154710c0a2ce, 0x3c66a115246dc5b2, 205 | /* 84 */ 0xcbedc562294ecb72, 0xba7143c36a280b16, 206 | 0x9610c2efd4078b67, 0x6144735d946a4b1e, 207 | /* 85 */ 0x536f111ed75b3350, 0x0211db8c2041d81b, 208 | 0xf93cb1000e10413c, 0x149dfd3c039e8876, 209 | /* 86 */ 0xd479dde46b63155b, 0xb66e15e93c837976, 210 | 0xdafde43b1f13e038, 0x5fafda1a2e4b0b35, 211 | /* 87 */ 0x3600bbdf17197581, 0x3972050bbe3cd2c2, 212 | 0x5938906dbdd5be86, 0x34fce5e43f9b860f, 213 | /* 88 */ 0x75a8a4cd42d14d02, 0x828dabc53441df65, 214 | 0x33dcabedd2e131d3, 0x3ebad76fb814d25f, 215 | /* 89 */ 0xd4906f566f70e10f, 0x5d12f7aa51690f5a, 216 | 0x45adb16e76cefcf2, 0x01f768aead232999, 217 | /* 90 */ 0x2b6cc77b6248febd, 0x3cd30628ec3aaffd, 218 | 0xce1c0b80d4ef486a, 0x4c3bff2ea6f66c23, 219 | /* 91 */ 0x3f2ec4094aeaeb5f, 0x61b19b286e372ca7, 220 | 0x5eefa966de2a701d, 0x23b20565de55e3ef, 221 | /* 92 */ 0xe301ca5279d58557, 0x07b2d4ce27c2874f, 222 | 0xa532cd8a9dcf1d67, 0x2a52fee23f2bff56, 223 | /* 93 */ 0x8624efb37cd8663d, 0xbbc7ac20ffbd7594, 224 | 0x57b85e9c82d37445, 0x7b3052cb86a6ec66, 225 | /* 94 */ 0x3482f0ad2525e91e, 0x2cb68043d28edca0, 226 | 0xaf4f6d052e1b003a, 0x185f8c2529781b0a, 227 | /* 95 */ 0xaa41de5bd80ce0d6, 0x9407b2416853e9d6, 228 | 0x563ec36e357f4c3a, 0x4cc4b8dd0e297bce, 229 | /* 96 */ 0xa2fc1a52ffb8730e, 0x1811f16e67058e37, 230 | 0x10f9a366cddf4ee1, 0x72f4a0c4a0b9f099, 231 | /* 97 */ 0x8c16c06f663f4ea7, 0x693b3af74e970fba, 232 | 0x2102e7f1d69ec345, 0x0ba53cbc968a8089, 233 | /* 98 */ 0xca3d9dc7fea15537, 0x4c6824bb51536493, 234 | 0xb9886314844006b1, 0x40d2a72ab454cc60, 235 | /* 99 */ 0x5936a1b712570975, 0x91b9d648debda657, 236 | 0x3344094bb64330ea, 0x006ba10d12ee51d0, 237 | /* 100 */ 0x19228468f5de5d58, 0x0eb12f4c38cc05b0, 238 | 0xa1039f9dd5601990, 0x4502d4ce4fff0e0b, 239 | /* 101 */ 0xeb2054106837c189, 0xd0f6544c6dd3b93c, 240 | 0x40727064c416d74f, 0x6e15c6114b502ef0, 241 | /* 102 */ 0x4df2a398cfb1a76b, 0x11256c7419f2f6b1, 242 | 0x4a497962066e6043, 0x705b3aab41355b44, 243 | /* 103 */ 0x365ef536d797b1d8, 0x00076bd622ddf0db, 244 | 0x3bbf33b0e0575a88, 0x3777aa05c8e4ca4d, 245 | /* 104 */ 0x392745c85578db5f, 0x6fda4149dbae5ae2, 246 | 0xb1f0b00b8adc9867, 0x09963437d36f1da3, 247 | /* 105 */ 0x7e824e90a5dc3853, 0xccb5f6641f135cbd, 248 | 0x6736d86c87ce8fcc, 0x625f3ce26604249f, 249 | /* 106 */ 0xaf8ac8059502f63f, 0x0c05e70a2e351469, 250 | 0x35292e9c764b6305, 0x1a394360c7e23ac3, 251 | /* 107 */ 0xd5c6d53251183264, 0x62065abd43c2b74f, 252 | 0xb5fbf5d03b973f9b, 0x13a3da3661206e5e, 253 | /* 108 */ 0xc6bd5837725d94e5, 0x18e30912205016c5, 254 | 0x2088ce1570033c68, 0x7fba1f495c837987, 255 | /* 109 */ 0x5a8c7423f2f9079d, 0x1735157b34023fc5, 256 | 0xe4f9b49ad2fab351, 0x6691ff72c878e33c, 257 | /* 110 */ 0x122c2adedc5eff3e, 0xf8dd4bf1d8956cf4, 258 | 0xeb86205d9e9e5bda, 0x049b92b9d975c743, 259 | /* 111 */ 0xa5379730b0f6c05a, 0x72a0ffacc6f3a553, 260 | 0xb0032c34b20dcd6d, 0x470e9dbc88d5164a, 261 | /* 112 */ 0xb19cf10ca237c047, 0xb65466711f6c81a2, 262 | 0xb3321bd16dd80b43, 0x48c14f600c5fbe8e, 263 | /* 113 */ 0x66451c264aa6c803, 0xb66e3904a4fa7da6, 264 | 0xd45f19b0b3128395, 0x31602627c3c9bc10, 265 | /* 114 */ 0x3120dc4832e4e10d, 0xeb20c46756c717f7, 266 | 0x00f52e3f67280294, 0x566d4fc14730c509, 267 | /* 115 */ 0x7e3a5d40fd837206, 0xc1e926dc7159547a, 268 | 0x216730fba68d6095, 0x22e8c3843f69cea7, 269 | /* 116 */ 0x33d074e8930e4b2b, 0xb6e4350e84d15816, 270 | 0x5534c26ad6ba2365, 0x7773c12f89f1f3f3, 271 | /* 117 */ 0x8cba404da57962aa, 0x5b9897a81999ce56, 272 | 0x508e862f121692fc, 0x3a81907fa093c291, 273 | /* 118 */ 0x0dded0ff4725a510, 0x10d8cc10673fc503, 274 | 0x5b9d151c9f1f4e89, 0x32a5c1d5cb09a44c, 275 | /* 119 */ 0x1e0aa442b90541fb, 0x5f85eb7cc1b485db, 276 | 0xbee595ce8a9df2e5, 0x25e496c722422236, 277 | /* 120 */ 0x5edf3c46cd0fe5b9, 0x34e75a7ed2a43388, 278 | 0xe488de11d761e352, 0x0e878a01a085545c, 279 | /* 121 */ 0xba493c77e021bb04, 0x2b4d1843c7df899a, 280 | 0x9ea37a487ae80d67, 0x67a9958011e41794, 281 | /* 122 */ 0x4b58051a6697b065, 0x47e33f7d8d6ba6d4, 282 | 0xbb4da8d483ca46c1, 0x68becaa181c2db0d, 283 | /* 123 */ 0x8d8980e90b989aa5, 0xf95eb14a2c93c99b, 284 | 0x51c6c7c4796e73a2, 0x6e228363b5efb569, 285 | /* 124 */ 0xc6bbc0b02dd624c8, 0x777eb47dec8170ee, 286 | 0x3cde15a004cfafa9, 0x1dc6bc087160bf9b, 287 | /* 125 */ 0x2e07e043eec34002, 0x18e9fc677a68dc7f, 288 | 0xd8da03188bd15b9a, 0x48fbc3bb00568253, 289 | /* 126 */ 0x57547d4cfb654ce1, 0xd3565b82a058e2ad, 290 | 0xf63eaf0bbf154478, 0x47531ef114dfbb18, 291 | /* 127 */ 0xe1ec630a4278c587, 0x5507d546ca8e83f3, 292 | 0x85e135c63adc0c2b, 0x0aa7efa85682844e, 293 | /* 128 */ 0x72691ba8b3e1f615, 0x32b4e9701fbe3ffa, 294 | 0x97b6d92e39bb7868, 0x2cfe53dea02e39e8, 295 | /* 129 */ 0x687392cd85cd52b0, 0x27ff66c910e29831, 296 | 0x97134556a9832d06, 0x269bb0360a84f8a0, 297 | /* 130 */ 0x706e55457643f85c, 0x3734a48c9b597d1b, 298 | 0x7aee91e8c6efa472, 0x5cd6abc198a9d9e0, 299 | /* 131 */ 0x0e04de06cb3ce41a, 0xd8c6eb893402e138, 300 | 0x904659bb686e3772, 0x7215c371746ba8c8, 301 | /* 132 */ 0xfd12a97eeae4a2d9, 0x9514b7516394f2c5, 302 | 0x266fd5809208f294, 0x5c847085619a26b9, 303 | /* 133 */ 0x52985410fed694ea, 0x3c905b934a2ed254, 304 | 0x10bb47692d3be467, 0x063b3d2d69e5e9e1, 305 | /* 134 */ 0x472726eedda57deb, 0xefb6c4ae10f41891, 306 | 0x2b1641917b307614, 0x117c554fc4f45b7c, 307 | /* 135 */ 0xc07cf3118f9d8812, 0x01dbd82050017939, 308 | 0xd7e803f4171b2827, 0x1015e87487d225ea, 309 | /* 136 */ 0xc58de3fed23acc4d, 0x50db91c294a7be2d, 310 | 0x0b94d43d1c9cf457, 0x6b1640fa6e37524a, 311 | /* 137 */ 0x692f346c5fda0d09, 0x200b1c59fa4d3151, 312 | 0xb8c46f760777a296, 0x4b38395f3ffdfbcf, 313 | /* 138 */ 0x18d25e00be54d671, 0x60d50582bec8aba6, 314 | 0x87ad8f263b78b982, 0x50fdf64e9cda0432, 315 | /* 139 */ 0x90f567aac578dcf0, 0xef1e9b0ef2a3133b, 316 | 0x0eebba9242d9de71, 0x15473c9bf03101c7, 317 | /* 140 */ 0x7c77e8ae56b78095, 0xb678e7666e6f078e, 318 | 0x2da0b9615348ba1f, 0x7cf931c1ff733f0b, 319 | /* 141 */ 0x26b357f50a0a366c, 0xe9708cf42b87d732, 320 | 0xc13aeea5f91cb2c0, 0x35d90c991143bb4c, 321 | /* 142 */ 0x47c1c404a9a0d9dc, 0x659e58451972d251, 322 | 0x3875a8c473b38c31, 0x1fbd9ed379561f24, 323 | /* 143 */ 0x11fabc6fd41ec28d, 0x7ef8dfe3cd2a2dca, 324 | 0x72e73b5d8c404595, 0x6135fa4954b72f27, 325 | /* 144 */ 0xccfc32a2de24b69c, 0x3f55698c1f095d88, 326 | 0xbe3350ed5ac3f929, 0x5e9bf806ca477eeb, 327 | /* 145 */ 0xe9ce8fb63c309f68, 0x5376f63565e1f9f4, 328 | 0xd1afcfb35a6393f1, 0x6632a1ede5623506, 329 | /* 146 */ 0x0b7d6c390c2ded4c, 0x56cb3281df04cb1f, 330 | 0x66305a1249ecc3c7, 0x5d588b60a38ca72a, 331 | /* 147 */ 0xa6ecbf78e8e5f42d, 0x86eeb44b3c8a3eec, 332 | 0xec219c48fbd21604, 0x1aaf1af517c36731, 333 | /* 148 */ 0xc306a2836769bde7, 0x208280622b1e2adb, 334 | 0x8027f51ffbff94a6, 0x76cfa1ce1124f26b, 335 | /* 149 */ 0x18eb00562422abb6, 0xf377c4d58f8c29c3, 336 | 0x4dbbc207f531561a, 0x0253b7f082128a27, 337 | /* 150 */ 0x3d1f091cb62c17e0, 0x4860e1abd64628a9, 338 | 0x52d17436309d4253, 0x356f97e13efae576, 339 | /* 151 */ 0xd351e11aa150535b, 0x3e6b45bb1dd878cc, 340 | 0x0c776128bed92c98, 0x1d34ae93032885b8, 341 | /* 152 */ 0x4ba0488ca85ba4c3, 0x985348c33c9ce6ce, 342 | 0x66124c6f97bda770, 0x0f81a0290654124a, 343 | /* 153 */ 0x9ed09ca6569b86fd, 0x811009fd18af9a2d, 344 | 0xff08d03f93d8c20a, 0x52a148199faef26b, 345 | /* 154 */ 0x3e03f9dc2d8d1b73, 0x4205801873961a70, 346 | 0xc0d987f041a35970, 0x07aa1f15a1c0d549, 347 | /* 155 */ 0xdfd46ce08cd27224, 0x6d0a024f934e4239, 348 | 0x808a7a6399897b59, 0x0a4556e9e13d95a2, 349 | /* 156 */ 0xd21a991fe9c13045, 0x9b0e8548fe7751b8, 350 | 0x5da643cb4bf30035, 0x77db28d63940f721, 351 | /* 157 */ 0xfc5eeb614adc9011, 0x5229419ae8c411eb, 352 | 0x9ec3e7787d1dcf74, 0x340d053e216e4cb5, 353 | /* 158 */ 0xcac7af39b48df2b4, 0xc0faec2871a10a94, 354 | 0x140a69245ca575ed, 0x0cf1c37134273a4c, 355 | /* 159 */ 0xc8ee306ac224b8a5, 0x57eaee7ccb4930b0, 356 | 0xa1e806bdaacbe74f, 0x7d9a62742eeb657d, 357 | /* 160 */ 0x9eb6b6ef546c4830, 0x885cca1fddb36e2e, 358 | 0xe6b9f383ef0d7105, 0x58654fef9d2e0412, 359 | /* 161 */ 0xa905c4ffbe0e8e26, 0x942de5df9b31816e, 360 | 0x497d723f802e88e1, 0x30684dea602f408d, 361 | /* 162 */ 0x21e5a278a3e6cb34, 0xaefb6e6f5b151dc4, 362 | 0xb30b8e049d77ca15, 0x28c3c9cf53b98981, 363 | /* 163 */ 0x287fb721556cdd2a, 0x0d317ca897022274, 364 | 0x7468c7423a543258, 0x4a7f11464eb5642f, 365 | /* 164 */ 0xa237a4774d193aa6, 0xd865986ea92129a1, 366 | 0x24c515ecf87c1a88, 0x604003575f39f5eb, 367 | /* 165 */ 0x47b9f189570a9b27, 0x2b98cede465e4b78, 368 | 0x026df551dbb85c20, 0x74fcd91047e21901, 369 | /* 166 */ 0x13e2a90a23c1bfa3, 0x0cb0074e478519f6, 370 | 0x5ff1cbbe3af6cf44, 0x67fe5438be812dbe, 371 | /* 167 */ 0xd13cf64fa40f05b0, 0x054dfb2f32283787, 372 | 0x4173915b7f0d2aea, 0x482f144f1f610d4e, 373 | /* 168 */ 0xf6210201b47f8234, 0x5d0ae1929e70b990, 374 | 0xdcd7f455b049567c, 0x7e93d0f1f0916f01, 375 | /* 169 */ 0xdd79cbf18a7db4fa, 0xbe8391bf6f74c62f, 376 | 0x027145d14b8291bd, 0x585a73ea2cbf1705, 377 | /* 170 */ 0x485ca03e928a0db2, 0x10fc01a5742857e7, 378 | 0x2f482edbd6d551a7, 0x0f0433b5048fdb8a, 379 | /* 171 */ 0x60da2e8dd7dc6247, 0x88b4c9d38cd4819a, 380 | 0x13033ac001f66697, 0x273b24fe3b367d75, 381 | /* 172 */ 0xc6e8f66a31b3b9d4, 0x281514a494df49d5, 382 | 0xd1726fdfc8b23da7, 0x4b3ae7d103dee548, 383 | /* 173 */ 0xc6256e19ce4b9d7e, 0xff5c5cf186e3c61c, 384 | 0xacc63ca34b8ec145, 0x74621888fee66574, 385 | /* 174 */ 0x956f409645290a1e, 0xef0bf8e3263a962e, 386 | 0xed6a50eb5ec2647b, 0x0694283a9dca7502, 387 | /* 175 */ 0x769b963643a2dcd1, 0x42b7c8ea09fc5353, 388 | 0x4f002aee13397eab, 0x63005e2c19b7d63a, 389 | /* 176 */ 0xca6736da63023bea, 0x966c7f6db12a99b7, 390 | 0xace09390c537c5e1, 0x0b696063a1aa89ee, 391 | /* 177 */ 0xebb03e97288c56e5, 0x432a9f9f938c8be8, 392 | 0xa6a5a93d5b717f71, 0x1a5fb4c3e18f9d97, 393 | /* 178 */ 0x1c94e7ad1c60cdce, 0xee202a43fc02c4a0, 394 | 0x8dafe4d867c46a20, 0x0a10263c8ac27b58, 395 | /* 179 */ 0xd0dea9dfe4432a4a, 0x856af87bbe9277c5, 396 | 0xce8472acc212c71a, 0x6f151b6d9bbb1e91, 397 | /* 180 */ 0x26776c527ceed56a, 0x7d211cb7fbf8faec, 398 | 0x37ae66a6fd4609cc, 0x1f81b702d2770c42, 399 | /* 181 */ 0x2fb0b057eac58392, 0xe1dd89fe29744e9d, 400 | 0xc964f8eb17beb4f8, 0x29571073c9a2d41e, 401 | /* 182 */ 0xa948a18981c0e254, 0x2df6369b65b22830, 402 | 0xa33eb2d75fcfd3c6, 0x078cd6ec4199a01f, 403 | /* 183 */ 0x4a584a41ad900d2f, 0x32142b78e2c74c52, 404 | 0x68c4e8338431c978, 0x7f69ea9008689fc2, 405 | /* 184 */ 0x52f2c81e46a38265, 0xfd78072d04a832fd, 406 | 0x8cd7d5fa25359e94, 0x4de71b7454cc29d2, 407 | /* 185 */ 0x42eb60ad1eda6ac9, 0x0aad37dfdbc09c3a, 408 | 0x81004b71e33cc191, 0x44e6be345122803c, 409 | /* 186 */ 0x03fe8388ba1920db, 0xf5d57c32150db008, 410 | 0x49c8c4281af60c29, 0x21edb518de701aee, 411 | /* 187 */ 0x7fb63e418f06dc99, 0xa4460d99c166d7b8, 412 | 0x24dd5248ce520a83, 0x5ec3ad712b928358, 413 | /* 188 */ 0x15022a5fbd17930f, 0xa4f64a77d82570e3, 414 | 0x12bc8d6915783712, 0x498194c0fc620abb, 415 | /* 189 */ 0x38a2d9d255686c82, 0x785c6bd9193e21f0, 416 | 0xe4d5c81ab24a5484, 0x56307860b2e20989, 417 | /* 190 */ 0x429d55f78b4d74c4, 0x22f1834643350131, 418 | 0x1e60c24598c71fff, 0x59f2f014979983ef, 419 | /* 191 */ 0x46a47d56eb494a44, 0x3e22a854d636a18e, 420 | 0xb346e15274491c3b, 0x2ceafd4e5390cde7, 421 | /* 192 */ 0xba8a8538be0d6675, 0x4b9074bb50818e23, 422 | 0xcbdab89085d304c3, 0x61a24fe0e56192c4, 423 | /* 193 */ 0xcb7615e6db525bcb, 0xdd7d8c35a567e4ca, 424 | 0xe6b4153acafcdd69, 0x2d668e097f3c9766, 425 | /* 194 */ 0xa57e7e265ce55ef0, 0x5d9f4e527cd4b967, 426 | 0xfbc83606492fd1e5, 0x090d52beb7c3f7ae, 427 | /* 195 */ 0x09b9515a1e7b4d7c, 0x1f266a2599da44c0, 428 | 0xa1c49548e2c55504, 0x7ef04287126f15cc, 429 | /* 196 */ 0xfed1659dbd30ef15, 0x8b4ab9eec4e0277b, 430 | 0x884d6236a5df3291, 0x1fd96ea6bf5cf788, 431 | /* 197 */ 0x42a161981f190d9a, 0x61d849507e6052c1, 432 | 0x9fe113bf285a2cd5, 0x7c22d676dbad85d8, 433 | /* 198 */ 0x82e770ed2bfbd27d, 0x4c05b2ece996f5a5, 434 | 0xcd40a9c2b0900150, 0x5895319213d9bf64, 435 | /* 199 */ 0xe7cc5d703fea2e08, 0xb50c491258e2188c, 436 | 0xcce30baa48205bf0, 0x537c659ccfa32d62, 437 | /* 200 */ 0x37b6623a98cfc088, 0xfe9bed1fa4d6aca4, 438 | 0x04d29b8e56a8d1b0, 0x725f71c40b519575, 439 | /* 201 */ 0x28c7f89cd0339ce6, 0x8367b14469ddc18b, 440 | 0x883ada83a6a1652c, 0x585f1974034d6c17, 441 | /* 202 */ 0x89cfb266f1b19188, 0xe63b4863e7c35217, 442 | 0xd88c9da6b4c0526a, 0x3e035c9df0954635, 443 | /* 203 */ 0xdd9d5412fb45de9d, 0xdd684532e4cff40d, 444 | 0x4b5c999b151d671c, 0x2d8c2cc811e7f690, 445 | /* 204 */ 0x7f54be1d90055d40, 0xa464c5df464aaf40, 446 | 0x33979624f0e917be, 0x2c018dc527356b30, 447 | /* 205 */ 0xa5415024e330b3d4, 0x73ff3d96691652d3, 448 | 0x94ec42c4ef9b59f1, 0x0747201618d08e5a, 449 | /* 206 */ 0x4d6ca48aca411c53, 0x66415f2fcfa66119, 450 | 0x9c4dd40051e227ff, 0x59810bc09a02f7eb, 451 | /* 207 */ 0x2a7eb171b3dc101d, 0x441c5ab99ffef68e, 452 | 0x32025c9b93b359ea, 0x5e8ce0a71e9d112f, 453 | /* 208 */ 0xbfcccb92429503fd, 0xd271ba752f095d55, 454 | 0x345ead5e972d091e, 0x18c8df11a83103ba, 455 | /* 209 */ 0x90cd949a9aed0f4c, 0xc5d1f4cb6660e37e, 456 | 0xb8cac52d56c52e0b, 0x6e42e400c5808e0d, 457 | /* 210 */ 0xa3b46966eeaefd23, 0x0c4f1f0be39ecdca, 458 | 0x189dc8c9d683a51d, 0x51f27f054c09351b, 459 | /* 211 */ 0x4c487ccd2a320682, 0x587ea95bb3df1c96, 460 | 0xc8ccf79e555cb8e8, 0x547dc829a206d73d, 461 | /* 212 */ 0xb822a6cd80c39b06, 0xe96d54732000d4c6, 462 | 0x28535b6f91463b4d, 0x228f4660e2486e1d, 463 | /* 213 */ 0x98799538de8d3abf, 0x8cd8330045ebca6e, 464 | 0x79952a008221e738, 0x4322e1a7535cd2bb, 465 | /* 214 */ 0xb114c11819d1801c, 0x2016e4d84f3f5ec7, 466 | 0xdd0e2df409260f4c, 0x5ec362c0ae5f7266, 467 | /* 215 */ 0xc0462b18b8b2b4ee, 0x7cc8d950274d1afb, 468 | 0xf25f7105436b02d2, 0x43bbf8dcbff9ccd3, 469 | /* 216 */ 0xb6ad1767a039e9df, 0xb0714da8f69d3583, 470 | 0x5e55fa18b42931f5, 0x4ed5558f33c60961, 471 | /* 217 */ 0x1fe37901c647a5dd, 0x593ddf1f8081d357, 472 | 0x0249a4fd813fd7a6, 0x69acca274e9caf61, 473 | /* 218 */ 0x047ba3ea330721c9, 0x83423fc20e7e1ea0, 474 | 0x1df4c0af01314a60, 0x09a62dab89289527, 475 | /* 219 */ 0xa5b325a49cc6cb00, 0xe94b5dc654b56cb6, 476 | 0x3be28779adc994a0, 0x4296e8f8ba3a4aad, 477 | /* 220 */ 0x328689761e451eab, 0x2e4d598bff59594a, 478 | 0x49b96853d7a7084a, 0x4980a319601420a8, 479 | /* 221 */ 0x9565b9e12f552c42, 0x8a5318db7100fe96, 480 | 0x05c90b4d43add0d7, 0x538b4cd66a5d4eda, 481 | /* 222 */ 0xf4e94fc3e89f039f, 0x592c9af26f618045, 482 | 0x08a36eb5fd4b9550, 0x25fffaf6c2ed1419, 483 | /* 223 */ 0x34434459cc79d354, 0xeeecbfb4b1d5476b, 484 | 0xddeb34a061615d99, 0x5129cecceb64b773, 485 | /* 224 */ 0xee43215894993520, 0x772f9c7cf14c0b3b, 486 | 0xd2e2fce306bedad5, 0x715f42b546f06a97, 487 | /* 225 */ 0x434ecdceda5b5f1a, 0x0da17115a49741a9, 488 | 0x680bd77c73edad2e, 0x487c02354edd9041, 489 | /* 226 */ 0xb8efeff3a70ed9c4, 0x56a32aa3e857e302, 490 | 0xdf3a68bd48a2a5a0, 0x07f650b73176c444, 491 | /* 227 */ 0xe38b9b1626e0ccb1, 0x79e053c18b09fb36, 492 | 0x56d90319c9f94964, 0x1ca941e7ac9ff5c4, 493 | /* 228 */ 0x49c4df29162fa0bb, 0x8488cf3282b33305, 494 | 0x95dfda14cabb437d, 0x3391f78264d5ad86, 495 | /* 229 */ 0x729ae06ae2b5095d, 0xd58a58d73259a946, 496 | 0xe9834262d13921ed, 0x27fedafaa54bb592, 497 | /* 230 */ 0xa99dc5b829ad48bb, 0x5f025742499ee260, 498 | 0x802c8ecd5d7513fd, 0x78ceb3ef3f6dd938, 499 | /* 231 */ 0xc342f44f8a135d94, 0x7b9edb44828cdda3, 500 | 0x9436d11a0537cfe7, 0x5064b164ec1ab4c8, 501 | /* 232 */ 0x7020eccfd37eb2fc, 0x1f31ea3ed90d25fc, 502 | 0x1b930d7bdfa1bb34, 0x5344467a48113044, 503 | /* 233 */ 0x70073170f25e6dfb, 0xe385dc1a50114cc8, 504 | 0x2348698ac8fc4f00, 0x2a77a55284dd40d8, 505 | /* 234 */ 0xfe06afe0c98c6ce4, 0xc235df96dddfd6e4, 506 | 0x1428d01e33bf1ed3, 0x785768ec9300bdaf, 507 | /* 235 */ 0x9702e57a91deb63b, 0x61bdb8bfe5ce8b80, 508 | 0x645b426f3d1d58ac, 0x4804a82227a557bc, 509 | /* 236 */ 0x8e57048ab44d2601, 0x68d6501a4b3a6935, 510 | 0xc39c9ec3f9e1c293, 0x4172f257d4de63e2, 511 | /* 237 */ 0xd368b450330c6401, 0x040d3017418f2391, 512 | 0x2c34bb6090b7d90d, 0x16f649228fdfd51f, 513 | /* 238 */ 0xbea6818e2b928ef5, 0xe28ccf91cdc11e72, 514 | 0x594aaa68e77a36cd, 0x313034806c7ffd0f, 515 | /* 239 */ 0x8a9d27ac2249bd65, 0x19a3b464018e9512, 516 | 0xc26ccff352b37ec7, 0x056f68341d797b21, 517 | /* 240 */ 0x5e79d6757efd2327, 0xfabdbcb6553afe15, 518 | 0xd3e7222c6eaf5a60, 0x7046c76d4dae743b, 519 | /* 241 */ 0x660be872b18d4a55, 0x19992518574e1496, 520 | 0xc103053a302bdcbb, 0x3ed8e9800b218e8e, 521 | /* 242 */ 0x7b0b9239fa75e03e, 0xefe9fb684633c083, 522 | 0x98a35fbe391a7793, 0x6065510fe2d0fe34, 523 | /* 243 */ 0x55cb668548abad0c, 0xb4584548da87e527, 524 | 0x2c43ecea0107c1dd, 0x526028809372de35, 525 | /* 244 */ 0x3415c56af9213b1f, 0x5bee1a4d017e98db, 526 | 0x13f6b105b5cf709b, 0x5ff20e3482b29ab6, 527 | /* 245 */ 0x0aa29c75cc2e6c90, 0xfc7d73ca3a70e206, 528 | 0x899fc38fc4b5c515, 0x250386b124ffc207, 529 | /* 246 */ 0x54ea28d5ae3d2b56, 0x9913149dd6de60ce, 530 | 0x16694fc58f06d6c1, 0x46b23975eb018fc7, 531 | /* 247 */ 0x470a6a0fb4b7b4e2, 0x5d92475a8f7253de, 532 | 0xabeee5b52fbd3adb, 0x7fa20801a0806968, 533 | /* 248 */ 0x76f3faf19f7714d2, 0xb3e840c12f4660c3, 534 | 0x0fb4cd8df212744e, 0x4b065a251d3a2dd2, 535 | /* 249 */ 0x5cebde383d77cd4a, 0x6adf39df882c9cb1, 536 | 0xa2dd242eb09af759, 0x3147c0e50e5f6422, 537 | /* 250 */ 0x164ca5101d1350db, 0xf8d13479c33fc962, 538 | 0xe640ce4d13e5da08, 0x4bdee0c45061f8ba, 539 | /* 251 */ 0xd7c46dc1a4edb1c9, 0x5514d7b6437fd98a, 540 | 0x58942f6bb2a1c00b, 0x2dffb2ab1d70710e, 541 | /* 252 */ 0xccdfcf2fc18b6d68, 0xa8ebcba8b7806167, 542 | 0x980697f95e2937e3, 0x02fbba1cd0126e8c}; 543 | 544 | #endif /* TABLE_LADDER_8K_H */ 545 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/x25519_precomputed.c: -------------------------------------------------------------------------------- 1 | /* 2 | Ruby C extension providing bindings to the rfc7748_precomputed implementation of 3 | the X25519 Diffie-Hellman algorithm 4 | */ 5 | 6 | #include "ruby.h" 7 | #include "x25519_precomputed.h" 8 | 9 | static VALUE mX25519 = Qnil; 10 | static VALUE mX25519_Provider = Qnil; 11 | static VALUE mX25519_Provider_Precomputed = Qnil; 12 | 13 | static VALUE mX25519_Provider_Precomputed_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u); 14 | static VALUE mX25519_Provider_Precomputed_scalarmult_base(VALUE self, VALUE scalar); 15 | static VALUE mX25519_is_available(VALUE self); 16 | 17 | /* Initialize the x25519_precomputed C extension */ 18 | void Init_x25519_precomputed() 19 | { 20 | mX25519 = rb_define_module("X25519"); 21 | mX25519_Provider = rb_define_module_under(mX25519, "Provider"); 22 | mX25519_Provider_Precomputed = rb_define_module_under(mX25519_Provider, "Precomputed"); 23 | 24 | rb_define_singleton_method(mX25519_Provider_Precomputed, "scalarmult", mX25519_Provider_Precomputed_scalarmult, 2); 25 | rb_define_singleton_method(mX25519_Provider_Precomputed, "scalarmult_base", mX25519_Provider_Precomputed_scalarmult_base, 1); 26 | rb_define_singleton_method(mX25519_Provider_Precomputed, "available?", mX25519_is_available, 0); 27 | } 28 | 29 | /* Variable-base scalar multiplication */ 30 | static VALUE mX25519_Provider_Precomputed_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u) 31 | { 32 | /* X25519_KEY ensures inputs are aligned at 32-bytes */ 33 | X25519_KEY raw_scalar, raw_montgomery_u, product; 34 | 35 | StringValue(scalar); 36 | if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) { 37 | rb_raise( 38 | rb_eArgError, 39 | "expected %d-byte scalar, got %ld", 40 | X25519_KEYSIZE_BYTES, 41 | RSTRING_LEN(scalar) 42 | ); 43 | } 44 | 45 | StringValue(montgomery_u); 46 | if(RSTRING_LEN(montgomery_u) != X25519_KEYSIZE_BYTES) { 47 | rb_raise( 48 | rb_eArgError, 49 | "expected %d-byte Montgomery-u coordinate, got %ld", 50 | X25519_KEYSIZE_BYTES, 51 | RSTRING_LEN(montgomery_u) 52 | ); 53 | } 54 | 55 | memcpy(raw_scalar, RSTRING_PTR(scalar), X25519_KEYSIZE_BYTES); 56 | memcpy(raw_montgomery_u, RSTRING_PTR(montgomery_u), X25519_KEYSIZE_BYTES); 57 | x25519_precomputed_scalarmult(product, raw_scalar, raw_montgomery_u); 58 | 59 | return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES); 60 | } 61 | 62 | /* Fixed-base scalar multiplication */ 63 | static VALUE mX25519_Provider_Precomputed_scalarmult_base(VALUE self, VALUE scalar) 64 | { 65 | /* X25519_KEY ensures inputs are aligned at 32-bytes */ 66 | X25519_KEY raw_scalar, product; 67 | 68 | StringValue(scalar); 69 | if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) { 70 | rb_raise( 71 | rb_eArgError, 72 | "expected %d-byte scalar, got %ld", 73 | X25519_KEYSIZE_BYTES, 74 | RSTRING_LEN(scalar) 75 | ); 76 | } 77 | 78 | memcpy(raw_scalar, RSTRING_PTR(scalar), X25519_KEYSIZE_BYTES); 79 | x25519_precomputed_scalarmult_base(product, raw_scalar); 80 | 81 | return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES); 82 | } 83 | 84 | /* Is the x25519_precomputed backend supported on this CPU? */ 85 | static VALUE mX25519_is_available(VALUE self) 86 | { 87 | return check_4th_gen_intel_core_features() ? Qtrue : Qfalse; 88 | } 89 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/x25519_precomputed.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2017, Armando Faz . All rights reserved. 3 | * Institute of Computing. 4 | * University of Campinas, Brazil. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions 8 | * are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * * Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * * Neither the name of University of Campinas nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | * OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef X25519_PRECOMPUTED_H 35 | #define X25519_PRECOMPUTED_H 36 | 37 | #include 38 | 39 | #ifndef ALIGN_BYTES 40 | #define ALIGN_BYTES 32 41 | #endif 42 | 43 | #ifndef ALIGN 44 | #ifdef __INTEL_COMPILER 45 | #define ALIGN __declspec(align(ALIGN_BYTES)) 46 | #else 47 | #define ALIGN __attribute__((aligned(ALIGN_BYTES))) 48 | #endif 49 | #endif 50 | 51 | #define X25519_KEYSIZE_BYTES 32 52 | typedef ALIGN uint8_t X25519_KEY[X25519_KEYSIZE_BYTES]; 53 | 54 | void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key, uint8_t *session_key); 55 | void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key); 56 | int check_4th_gen_intel_core_features(); 57 | 58 | #endif /* X25519_PRECOMPUTED_H */ 59 | -------------------------------------------------------------------------------- /ext/x25519_precomputed/x25519_x64.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2017, Armando Faz . All rights reserved. 3 | * Institute of Computing. 4 | * University of Campinas, Brazil. 5 | * 6 | * Copyright (C) 2018 Jason A. Donenfeld . All Rights Reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above 15 | * copyright notice, this list of conditions and the following 16 | * disclaimer in the documentation and/or other materials provided 17 | * with the distribution. 18 | * * Neither the name of University of Campinas nor the names of its 19 | * contributors may be used to endorse or promote products derived 20 | * from this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 33 | * OF THE POSSIBILITY OF SUCH DAMAGE. 34 | */ 35 | 36 | #include 37 | #include "fp25519_x64.h" 38 | #include "x25519_precomputed.h" 39 | #include "table_ladder_x25519.h" 40 | 41 | static inline void cswap_x64(uint64_t bit, uint64_t *const px, 42 | uint64_t *const py) { 43 | int i = 0; 44 | uint64_t mask = (uint64_t)0 - bit; 45 | for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) { 46 | uint64_t t = mask & (px[i] ^ py[i]); 47 | px[i] = px[i] ^ t; 48 | py[i] = py[i] ^ t; 49 | } 50 | } 51 | 52 | 53 | /** Original rfc7748_precomputed name: 'x25519_shared_secret_x64' */ 54 | void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key, 55 | uint8_t *session_key) { 56 | ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64]; 57 | ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64]; 58 | ALIGN uint64_t workspace[6 * NUM_WORDS_ELTFP25519_X64]; 59 | ALIGN uint8_t session[X25519_KEYSIZE_BYTES]; 60 | ALIGN uint8_t private[X25519_KEYSIZE_BYTES]; 61 | 62 | int i = 0, j = 0; 63 | uint64_t prev = 0; 64 | uint64_t *const X1 = (uint64_t *)session; 65 | uint64_t *const key = (uint64_t *)private; 66 | uint64_t *const Px = coordinates + 0; 67 | uint64_t *const Pz = coordinates + 4; 68 | uint64_t *const Qx = coordinates + 8; 69 | uint64_t *const Qz = coordinates + 12; 70 | uint64_t *const X2 = Qx; 71 | uint64_t *const Z2 = Qz; 72 | uint64_t *const X3 = Px; 73 | uint64_t *const Z3 = Pz; 74 | uint64_t *const X2Z2 = Qx; 75 | uint64_t *const X3Z3 = Px; 76 | 77 | uint64_t *const A = workspace + 0; 78 | uint64_t *const B = workspace + 4; 79 | uint64_t *const D = workspace + 8; 80 | uint64_t *const C = workspace + 12; 81 | uint64_t *const DA = workspace + 16; 82 | uint64_t *const CB = workspace + 20; 83 | uint64_t *const AB = A; 84 | uint64_t *const DC = D; 85 | uint64_t *const DACB = DA; 86 | uint64_t *const buffer_1w = buffer; 87 | uint64_t *const buffer_2w = buffer; 88 | 89 | memcpy(private, private_key, sizeof(private)); 90 | memcpy(session, session_key, sizeof(session)); 91 | 92 | /* clampC function */ 93 | private 94 | [0] = private[0] & (~(uint8_t)0x7); 95 | private 96 | [X25519_KEYSIZE_BYTES - 1] = 97 | (uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F); 98 | 99 | /** 100 | * As in the draft: 101 | * When receiving such an array, implementations of curve25519 102 | * MUST mask the most-significant bit in the final byte. This 103 | * is done to preserve compatibility with point formats which 104 | * reserve the sign bit for use in other protocols and to 105 | * increase resistance to implementation fingerprinting 106 | **/ 107 | session[X25519_KEYSIZE_BYTES - 1] &= (1 << (255 % 8)) - 1; 108 | 109 | copy_EltFp25519_1w_x64(Px, X1); 110 | setzero_EltFp25519_1w_x64(Pz); 111 | setzero_EltFp25519_1w_x64(Qx); 112 | setzero_EltFp25519_1w_x64(Qz); 113 | 114 | Pz[0] = 1; 115 | Qx[0] = 1; 116 | 117 | /* main-loop */ 118 | prev = 0; 119 | j = 62; 120 | for (i = 3; i >= 0; i--) { 121 | while (j >= 0) { 122 | uint64_t bit = (key[i] >> j) & 0x1; 123 | uint64_t swap = bit ^ prev; 124 | prev = bit; 125 | 126 | add_EltFp25519_1w_x64(A, X2, Z2); /* A = (X2+Z2) */ 127 | sub_EltFp25519_1w_x64(B, X2, Z2); /* B = (X2-Z2) */ 128 | add_EltFp25519_1w_x64(C, X3, Z3); /* C = (X3+Z3) */ 129 | sub_EltFp25519_1w_x64(D, X3, Z3); /* D = (X3-Z3) */ 130 | mul_EltFp25519_2w_x64(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ 131 | 132 | cswap_x64(swap, A, C); 133 | cswap_x64(swap, B, D); 134 | 135 | sqr_EltFp25519_2w_x64(AB); /* [AA|BB] = [A^2|B^2] */ 136 | add_EltFp25519_1w_x64(X3, DA, CB); /* X3 = (DA+CB) */ 137 | sub_EltFp25519_1w_x64(Z3, DA, CB); /* Z3 = (DA-CB) */ 138 | sqr_EltFp25519_2w_x64(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ 139 | 140 | copy_EltFp25519_1w_x64(X2, B); /* X2 = B^2 */ 141 | sub_EltFp25519_1w_x64(Z2, A, B); /* Z2 = E = AA-BB */ 142 | 143 | mul_a24_EltFp25519_1w_x64(B, Z2); /* B = a24*E */ 144 | add_EltFp25519_1w_x64(B, B, X2); /* B = a24*E+B */ 145 | mul_EltFp25519_2w_x64(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ 146 | mul_EltFp25519_1w_x64(Z3, Z3, X1); /* Z3 = Z3*X1 */ 147 | j--; 148 | } 149 | j = 63; 150 | } 151 | 152 | inv_EltFp25519_1w_x64(A, Qz); 153 | mul_EltFp25519_1w_x64((uint64_t *)shared, Qx, A); 154 | fred_EltFp25519_1w_x64((uint64_t *)shared); 155 | } 156 | 157 | /* Original rfc7748_precomputed name: 'x25519_keygen_precmp_x64' */ 158 | void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key) { 159 | ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64]; 160 | ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64]; 161 | ALIGN uint64_t workspace[4 * NUM_WORDS_ELTFP25519_X64]; 162 | ALIGN uint8_t private[X25519_KEYSIZE_BYTES]; 163 | 164 | int i = 0, j = 0, k = 0; 165 | uint64_t *const key = (uint64_t *)private; 166 | uint64_t *const Ur1 = coordinates + 0; 167 | uint64_t *const Zr1 = coordinates + 4; 168 | uint64_t *const Ur2 = coordinates + 8; 169 | uint64_t *const Zr2 = coordinates + 12; 170 | 171 | uint64_t *const UZr1 = coordinates + 0; 172 | uint64_t *const ZUr2 = coordinates + 8; 173 | 174 | uint64_t *const A = workspace + 0; 175 | uint64_t *const B = workspace + 4; 176 | uint64_t *const C = workspace + 8; 177 | uint64_t *const D = workspace + 12; 178 | 179 | uint64_t *const AB = workspace + 0; 180 | uint64_t *const CD = workspace + 8; 181 | 182 | uint64_t *const buffer_1w = buffer; 183 | uint64_t *const buffer_2w = buffer; 184 | uint64_t *P = (uint64_t *)Table_Ladder_8k; 185 | 186 | memcpy(private, private_key, sizeof(private)); 187 | 188 | /* clampC function */ 189 | private 190 | [0] = private[0] & (~(uint8_t)0x7); 191 | private 192 | [X25519_KEYSIZE_BYTES - 1] = 193 | (uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F); 194 | 195 | setzero_EltFp25519_1w_x64(Ur1); 196 | setzero_EltFp25519_1w_x64(Zr1); 197 | setzero_EltFp25519_1w_x64(Zr2); 198 | Ur1[0] = 1; 199 | Zr1[0] = 1; 200 | Zr2[0] = 1; 201 | 202 | /* G-S */ 203 | Ur2[3] = 0x1eaecdeee27cab34; 204 | Ur2[2] = 0xadc7a0b9235d48e2; 205 | Ur2[1] = 0xbbf095ae14b2edf8; 206 | Ur2[0] = 0x7e94e1fec82faabd; 207 | 208 | /* main-loop */ 209 | const int ite[4] = {64, 64, 64, 63}; 210 | const int q = 3; 211 | uint64_t swap = 1; 212 | 213 | j = q; 214 | for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) { 215 | while (j < ite[i]) { 216 | k = (64 * i + j - q); 217 | uint64_t bit = (key[i] >> j) & 0x1; 218 | swap = swap ^ bit; 219 | cswap_x64(swap, Ur1, Ur2); 220 | cswap_x64(swap, Zr1, Zr2); 221 | swap = bit; 222 | /** Addition */ 223 | sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */ 224 | add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */ 225 | mul_EltFp25519_1w_x64(C, &P[4 * k], B); /* C = M0-B */ 226 | sub_EltFp25519_1w_x64(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ 227 | add_EltFp25519_1w_x64(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ 228 | sqr_EltFp25519_2w_x64(AB); /* A = A^2 | B = B^2 */ 229 | mul_EltFp25519_2w_x64(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ 230 | j++; 231 | } 232 | j = 0; 233 | } 234 | 235 | /** Doubling */ 236 | for (i = 0; i < q; i++) { 237 | add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */ 238 | sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */ 239 | sqr_EltFp25519_2w_x64(AB); /* A = A**2 B = B**2 */ 240 | copy_EltFp25519_1w_x64(C, B); /* C = B */ 241 | sub_EltFp25519_1w_x64(B, A, B); /* B = A-B */ 242 | mul_a24_EltFp25519_1w_x64(D, B); /* D = my_a24*B */ 243 | add_EltFp25519_1w_x64(D, D, C); /* D = D+C */ 244 | mul_EltFp25519_2w_x64(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ 245 | } 246 | 247 | /* Convert to affine coordinates */ 248 | inv_EltFp25519_1w_x64(A, Zr1); 249 | mul_EltFp25519_1w_x64((uint64_t *)session_key, Ur1, A); 250 | fred_EltFp25519_1w_x64((uint64_t *)session_key); 251 | } 252 | -------------------------------------------------------------------------------- /ext/x25519_ref10/api.h: -------------------------------------------------------------------------------- 1 | #define CRYPTO_BYTES 32 2 | #define CRYPTO_SCALARBYTES 32 3 | -------------------------------------------------------------------------------- /ext/x25519_ref10/base.c: -------------------------------------------------------------------------------- 1 | #include "fe.h" 2 | #include "x25519_ref10.h" 3 | 4 | static const uint8_t x25519_basepoint[32] = {9}; 5 | 6 | int x25519_ref10_scalarmult_base(uint8_t *q, const uint8_t *n) 7 | { 8 | return x25519_ref10_scalarmult(q,n,x25519_basepoint); 9 | } 10 | -------------------------------------------------------------------------------- /ext/x25519_ref10/extconf.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # rubocop:disable Style/GlobalVars 4 | 5 | require "mkmf" 6 | 7 | $CFLAGS << " -Wall -O3 -pedantic -std=c99" 8 | 9 | create_makefile "x25519_ref10" 10 | 11 | # rubocop:enable Style/GlobalVars 12 | -------------------------------------------------------------------------------- /ext/x25519_ref10/fe.c: -------------------------------------------------------------------------------- 1 | #include "fe.h" 2 | 3 | /* 4 | h = 0 5 | */ 6 | 7 | void fe_0(fe h) 8 | { 9 | h[0] = 0; 10 | h[1] = 0; 11 | h[2] = 0; 12 | h[3] = 0; 13 | h[4] = 0; 14 | h[5] = 0; 15 | h[6] = 0; 16 | h[7] = 0; 17 | h[8] = 0; 18 | h[9] = 0; 19 | } 20 | 21 | /* 22 | h = 1 23 | */ 24 | 25 | void fe_1(fe h) 26 | { 27 | h[0] = 1; 28 | h[1] = 0; 29 | h[2] = 0; 30 | h[3] = 0; 31 | h[4] = 0; 32 | h[5] = 0; 33 | h[6] = 0; 34 | h[7] = 0; 35 | h[8] = 0; 36 | h[9] = 0; 37 | } 38 | 39 | /* 40 | h = f + g 41 | Can overlap h with f or g. 42 | 43 | Preconditions: 44 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 45 | |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 46 | 47 | Postconditions: 48 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 49 | */ 50 | 51 | void fe_add(fe h,fe f,fe g) 52 | { 53 | int32_t f0 = f[0]; 54 | int32_t f1 = f[1]; 55 | int32_t f2 = f[2]; 56 | int32_t f3 = f[3]; 57 | int32_t f4 = f[4]; 58 | int32_t f5 = f[5]; 59 | int32_t f6 = f[6]; 60 | int32_t f7 = f[7]; 61 | int32_t f8 = f[8]; 62 | int32_t f9 = f[9]; 63 | int32_t g0 = g[0]; 64 | int32_t g1 = g[1]; 65 | int32_t g2 = g[2]; 66 | int32_t g3 = g[3]; 67 | int32_t g4 = g[4]; 68 | int32_t g5 = g[5]; 69 | int32_t g6 = g[6]; 70 | int32_t g7 = g[7]; 71 | int32_t g8 = g[8]; 72 | int32_t g9 = g[9]; 73 | int32_t h0 = f0 + g0; 74 | int32_t h1 = f1 + g1; 75 | int32_t h2 = f2 + g2; 76 | int32_t h3 = f3 + g3; 77 | int32_t h4 = f4 + g4; 78 | int32_t h5 = f5 + g5; 79 | int32_t h6 = f6 + g6; 80 | int32_t h7 = f7 + g7; 81 | int32_t h8 = f8 + g8; 82 | int32_t h9 = f9 + g9; 83 | h[0] = h0; 84 | h[1] = h1; 85 | h[2] = h2; 86 | h[3] = h3; 87 | h[4] = h4; 88 | h[5] = h5; 89 | h[6] = h6; 90 | h[7] = h7; 91 | h[8] = h8; 92 | h[9] = h9; 93 | } 94 | 95 | /* 96 | h = f 97 | */ 98 | 99 | void fe_copy(fe h,fe f) 100 | { 101 | int32_t f0 = f[0]; 102 | int32_t f1 = f[1]; 103 | int32_t f2 = f[2]; 104 | int32_t f3 = f[3]; 105 | int32_t f4 = f[4]; 106 | int32_t f5 = f[5]; 107 | int32_t f6 = f[6]; 108 | int32_t f7 = f[7]; 109 | int32_t f8 = f[8]; 110 | int32_t f9 = f[9]; 111 | h[0] = f0; 112 | h[1] = f1; 113 | h[2] = f2; 114 | h[3] = f3; 115 | h[4] = f4; 116 | h[5] = f5; 117 | h[6] = f6; 118 | h[7] = f7; 119 | h[8] = f8; 120 | h[9] = f9; 121 | } 122 | 123 | /* 124 | Replace (f,g) with (g,f) if b == 1; 125 | replace (f,g) with (f,g) if b == 0. 126 | 127 | Preconditions: b in {0,1}. 128 | */ 129 | 130 | void fe_cswap(fe f,fe g,unsigned int b) 131 | { 132 | int32_t f0 = f[0]; 133 | int32_t f1 = f[1]; 134 | int32_t f2 = f[2]; 135 | int32_t f3 = f[3]; 136 | int32_t f4 = f[4]; 137 | int32_t f5 = f[5]; 138 | int32_t f6 = f[6]; 139 | int32_t f7 = f[7]; 140 | int32_t f8 = f[8]; 141 | int32_t f9 = f[9]; 142 | int32_t g0 = g[0]; 143 | int32_t g1 = g[1]; 144 | int32_t g2 = g[2]; 145 | int32_t g3 = g[3]; 146 | int32_t g4 = g[4]; 147 | int32_t g5 = g[5]; 148 | int32_t g6 = g[6]; 149 | int32_t g7 = g[7]; 150 | int32_t g8 = g[8]; 151 | int32_t g9 = g[9]; 152 | int32_t x0 = f0 ^ g0; 153 | int32_t x1 = f1 ^ g1; 154 | int32_t x2 = f2 ^ g2; 155 | int32_t x3 = f3 ^ g3; 156 | int32_t x4 = f4 ^ g4; 157 | int32_t x5 = f5 ^ g5; 158 | int32_t x6 = f6 ^ g6; 159 | int32_t x7 = f7 ^ g7; 160 | int32_t x8 = f8 ^ g8; 161 | int32_t x9 = f9 ^ g9; 162 | b = -b; 163 | x0 &= b; 164 | x1 &= b; 165 | x2 &= b; 166 | x3 &= b; 167 | x4 &= b; 168 | x5 &= b; 169 | x6 &= b; 170 | x7 &= b; 171 | x8 &= b; 172 | x9 &= b; 173 | f[0] = f0 ^ x0; 174 | f[1] = f1 ^ x1; 175 | f[2] = f2 ^ x2; 176 | f[3] = f3 ^ x3; 177 | f[4] = f4 ^ x4; 178 | f[5] = f5 ^ x5; 179 | f[6] = f6 ^ x6; 180 | f[7] = f7 ^ x7; 181 | f[8] = f8 ^ x8; 182 | f[9] = f9 ^ x9; 183 | g[0] = g0 ^ x0; 184 | g[1] = g1 ^ x1; 185 | g[2] = g2 ^ x2; 186 | g[3] = g3 ^ x3; 187 | g[4] = g4 ^ x4; 188 | g[5] = g5 ^ x5; 189 | g[6] = g6 ^ x6; 190 | g[7] = g7 ^ x7; 191 | g[8] = g8 ^ x8; 192 | g[9] = g9 ^ x9; 193 | } 194 | 195 | static uint64_t load_3(const unsigned char *in) 196 | { 197 | uint64_t result; 198 | result = (uint64_t) in[0]; 199 | result |= ((uint64_t) in[1]) << 8; 200 | result |= ((uint64_t) in[2]) << 16; 201 | return result; 202 | } 203 | 204 | static uint64_t load_4(const unsigned char *in) 205 | { 206 | uint64_t result; 207 | result = (uint64_t) in[0]; 208 | result |= ((uint64_t) in[1]) << 8; 209 | result |= ((uint64_t) in[2]) << 16; 210 | result |= ((uint64_t) in[3]) << 24; 211 | return result; 212 | } 213 | 214 | void fe_frombytes(fe h,const unsigned char *s) 215 | { 216 | int64_t h0 = load_4(s); 217 | int64_t h1 = load_3(s + 4) << 6; 218 | int64_t h2 = load_3(s + 7) << 5; 219 | int64_t h3 = load_3(s + 10) << 3; 220 | int64_t h4 = load_3(s + 13) << 2; 221 | int64_t h5 = load_4(s + 16); 222 | int64_t h6 = load_3(s + 20) << 7; 223 | int64_t h7 = load_3(s + 23) << 5; 224 | int64_t h8 = load_3(s + 26) << 4; 225 | int64_t h9 = (load_3(s + 29) & 8388607) << 2; 226 | int64_t carry0; 227 | int64_t carry1; 228 | int64_t carry2; 229 | int64_t carry3; 230 | int64_t carry4; 231 | int64_t carry5; 232 | int64_t carry6; 233 | int64_t carry7; 234 | int64_t carry8; 235 | int64_t carry9; 236 | 237 | carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 238 | carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 239 | carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 240 | carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 241 | carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 242 | 243 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 244 | carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 245 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 246 | carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 247 | carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 248 | 249 | h[0] = (int32_t)h0; 250 | h[1] = (int32_t)h1; 251 | h[2] = (int32_t)h2; 252 | h[3] = (int32_t)h3; 253 | h[4] = (int32_t)h4; 254 | h[5] = (int32_t)h5; 255 | h[6] = (int32_t)h6; 256 | h[7] = (int32_t)h7; 257 | h[8] = (int32_t)h8; 258 | h[9] = (int32_t)h9; 259 | } 260 | 261 | void fe_invert(fe out,fe z) 262 | { 263 | fe t0; 264 | fe t1; 265 | fe t2; 266 | fe t3; 267 | int i; 268 | 269 | #include "pow225521.h" 270 | 271 | return; 272 | } 273 | 274 | /* 275 | h = f * g 276 | Can overlap h with f or g. 277 | 278 | Preconditions: 279 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 280 | |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 281 | 282 | Postconditions: 283 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 284 | */ 285 | 286 | /* 287 | Notes on implementation strategy: 288 | 289 | Using schoolbook multiplication. 290 | Karatsuba would save a little in some cost models. 291 | 292 | Most multiplications by 2 and 19 are 32-bit precomputations; 293 | cheaper than 64-bit postcomputations. 294 | 295 | There is one remaining multiplication by 19 in the carry chain; 296 | one *19 precomputation can be merged into this, 297 | but the resulting data flow is considerably less clean. 298 | 299 | There are 12 carries below. 300 | 10 of them are 2-way parallelizable and vectorizable. 301 | Can get away with 11 carries, but then data flow is much deeper. 302 | 303 | With tighter constraints on inputs can squeeze carries into int32. 304 | */ 305 | 306 | void fe_mul(fe h,fe f,fe g) 307 | { 308 | int32_t f0 = f[0]; 309 | int32_t f1 = f[1]; 310 | int32_t f2 = f[2]; 311 | int32_t f3 = f[3]; 312 | int32_t f4 = f[4]; 313 | int32_t f5 = f[5]; 314 | int32_t f6 = f[6]; 315 | int32_t f7 = f[7]; 316 | int32_t f8 = f[8]; 317 | int32_t f9 = f[9]; 318 | int32_t g0 = g[0]; 319 | int32_t g1 = g[1]; 320 | int32_t g2 = g[2]; 321 | int32_t g3 = g[3]; 322 | int32_t g4 = g[4]; 323 | int32_t g5 = g[5]; 324 | int32_t g6 = g[6]; 325 | int32_t g7 = g[7]; 326 | int32_t g8 = g[8]; 327 | int32_t g9 = g[9]; 328 | int32_t g1_19 = 19 * g1; /* 1.4*2^29 */ 329 | int32_t g2_19 = 19 * g2; /* 1.4*2^30; still ok */ 330 | int32_t g3_19 = 19 * g3; 331 | int32_t g4_19 = 19 * g4; 332 | int32_t g5_19 = 19 * g5; 333 | int32_t g6_19 = 19 * g6; 334 | int32_t g7_19 = 19 * g7; 335 | int32_t g8_19 = 19 * g8; 336 | int32_t g9_19 = 19 * g9; 337 | int32_t f1_2 = 2 * f1; 338 | int32_t f3_2 = 2 * f3; 339 | int32_t f5_2 = 2 * f5; 340 | int32_t f7_2 = 2 * f7; 341 | int32_t f9_2 = 2 * f9; 342 | int64_t f0g0 = f0 * (int64_t) g0; 343 | int64_t f0g1 = f0 * (int64_t) g1; 344 | int64_t f0g2 = f0 * (int64_t) g2; 345 | int64_t f0g3 = f0 * (int64_t) g3; 346 | int64_t f0g4 = f0 * (int64_t) g4; 347 | int64_t f0g5 = f0 * (int64_t) g5; 348 | int64_t f0g6 = f0 * (int64_t) g6; 349 | int64_t f0g7 = f0 * (int64_t) g7; 350 | int64_t f0g8 = f0 * (int64_t) g8; 351 | int64_t f0g9 = f0 * (int64_t) g9; 352 | int64_t f1g0 = f1 * (int64_t) g0; 353 | int64_t f1g1_2 = f1_2 * (int64_t) g1; 354 | int64_t f1g2 = f1 * (int64_t) g2; 355 | int64_t f1g3_2 = f1_2 * (int64_t) g3; 356 | int64_t f1g4 = f1 * (int64_t) g4; 357 | int64_t f1g5_2 = f1_2 * (int64_t) g5; 358 | int64_t f1g6 = f1 * (int64_t) g6; 359 | int64_t f1g7_2 = f1_2 * (int64_t) g7; 360 | int64_t f1g8 = f1 * (int64_t) g8; 361 | int64_t f1g9_38 = f1_2 * (int64_t) g9_19; 362 | int64_t f2g0 = f2 * (int64_t) g0; 363 | int64_t f2g1 = f2 * (int64_t) g1; 364 | int64_t f2g2 = f2 * (int64_t) g2; 365 | int64_t f2g3 = f2 * (int64_t) g3; 366 | int64_t f2g4 = f2 * (int64_t) g4; 367 | int64_t f2g5 = f2 * (int64_t) g5; 368 | int64_t f2g6 = f2 * (int64_t) g6; 369 | int64_t f2g7 = f2 * (int64_t) g7; 370 | int64_t f2g8_19 = f2 * (int64_t) g8_19; 371 | int64_t f2g9_19 = f2 * (int64_t) g9_19; 372 | int64_t f3g0 = f3 * (int64_t) g0; 373 | int64_t f3g1_2 = f3_2 * (int64_t) g1; 374 | int64_t f3g2 = f3 * (int64_t) g2; 375 | int64_t f3g3_2 = f3_2 * (int64_t) g3; 376 | int64_t f3g4 = f3 * (int64_t) g4; 377 | int64_t f3g5_2 = f3_2 * (int64_t) g5; 378 | int64_t f3g6 = f3 * (int64_t) g6; 379 | int64_t f3g7_38 = f3_2 * (int64_t) g7_19; 380 | int64_t f3g8_19 = f3 * (int64_t) g8_19; 381 | int64_t f3g9_38 = f3_2 * (int64_t) g9_19; 382 | int64_t f4g0 = f4 * (int64_t) g0; 383 | int64_t f4g1 = f4 * (int64_t) g1; 384 | int64_t f4g2 = f4 * (int64_t) g2; 385 | int64_t f4g3 = f4 * (int64_t) g3; 386 | int64_t f4g4 = f4 * (int64_t) g4; 387 | int64_t f4g5 = f4 * (int64_t) g5; 388 | int64_t f4g6_19 = f4 * (int64_t) g6_19; 389 | int64_t f4g7_19 = f4 * (int64_t) g7_19; 390 | int64_t f4g8_19 = f4 * (int64_t) g8_19; 391 | int64_t f4g9_19 = f4 * (int64_t) g9_19; 392 | int64_t f5g0 = f5 * (int64_t) g0; 393 | int64_t f5g1_2 = f5_2 * (int64_t) g1; 394 | int64_t f5g2 = f5 * (int64_t) g2; 395 | int64_t f5g3_2 = f5_2 * (int64_t) g3; 396 | int64_t f5g4 = f5 * (int64_t) g4; 397 | int64_t f5g5_38 = f5_2 * (int64_t) g5_19; 398 | int64_t f5g6_19 = f5 * (int64_t) g6_19; 399 | int64_t f5g7_38 = f5_2 * (int64_t) g7_19; 400 | int64_t f5g8_19 = f5 * (int64_t) g8_19; 401 | int64_t f5g9_38 = f5_2 * (int64_t) g9_19; 402 | int64_t f6g0 = f6 * (int64_t) g0; 403 | int64_t f6g1 = f6 * (int64_t) g1; 404 | int64_t f6g2 = f6 * (int64_t) g2; 405 | int64_t f6g3 = f6 * (int64_t) g3; 406 | int64_t f6g4_19 = f6 * (int64_t) g4_19; 407 | int64_t f6g5_19 = f6 * (int64_t) g5_19; 408 | int64_t f6g6_19 = f6 * (int64_t) g6_19; 409 | int64_t f6g7_19 = f6 * (int64_t) g7_19; 410 | int64_t f6g8_19 = f6 * (int64_t) g8_19; 411 | int64_t f6g9_19 = f6 * (int64_t) g9_19; 412 | int64_t f7g0 = f7 * (int64_t) g0; 413 | int64_t f7g1_2 = f7_2 * (int64_t) g1; 414 | int64_t f7g2 = f7 * (int64_t) g2; 415 | int64_t f7g3_38 = f7_2 * (int64_t) g3_19; 416 | int64_t f7g4_19 = f7 * (int64_t) g4_19; 417 | int64_t f7g5_38 = f7_2 * (int64_t) g5_19; 418 | int64_t f7g6_19 = f7 * (int64_t) g6_19; 419 | int64_t f7g7_38 = f7_2 * (int64_t) g7_19; 420 | int64_t f7g8_19 = f7 * (int64_t) g8_19; 421 | int64_t f7g9_38 = f7_2 * (int64_t) g9_19; 422 | int64_t f8g0 = f8 * (int64_t) g0; 423 | int64_t f8g1 = f8 * (int64_t) g1; 424 | int64_t f8g2_19 = f8 * (int64_t) g2_19; 425 | int64_t f8g3_19 = f8 * (int64_t) g3_19; 426 | int64_t f8g4_19 = f8 * (int64_t) g4_19; 427 | int64_t f8g5_19 = f8 * (int64_t) g5_19; 428 | int64_t f8g6_19 = f8 * (int64_t) g6_19; 429 | int64_t f8g7_19 = f8 * (int64_t) g7_19; 430 | int64_t f8g8_19 = f8 * (int64_t) g8_19; 431 | int64_t f8g9_19 = f8 * (int64_t) g9_19; 432 | int64_t f9g0 = f9 * (int64_t) g0; 433 | int64_t f9g1_38 = f9_2 * (int64_t) g1_19; 434 | int64_t f9g2_19 = f9 * (int64_t) g2_19; 435 | int64_t f9g3_38 = f9_2 * (int64_t) g3_19; 436 | int64_t f9g4_19 = f9 * (int64_t) g4_19; 437 | int64_t f9g5_38 = f9_2 * (int64_t) g5_19; 438 | int64_t f9g6_19 = f9 * (int64_t) g6_19; 439 | int64_t f9g7_38 = f9_2 * (int64_t) g7_19; 440 | int64_t f9g8_19 = f9 * (int64_t) g8_19; 441 | int64_t f9g9_38 = f9_2 * (int64_t) g9_19; 442 | int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; 443 | int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; 444 | int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; 445 | int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; 446 | int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; 447 | int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; 448 | int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; 449 | int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; 450 | int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; 451 | int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; 452 | int64_t carry0; 453 | int64_t carry1; 454 | int64_t carry2; 455 | int64_t carry3; 456 | int64_t carry4; 457 | int64_t carry5; 458 | int64_t carry6; 459 | int64_t carry7; 460 | int64_t carry8; 461 | int64_t carry9; 462 | 463 | /* 464 | |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) 465 | i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 466 | |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) 467 | i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 468 | */ 469 | 470 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 471 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 472 | /* |h0| <= 2^25 */ 473 | /* |h4| <= 2^25 */ 474 | /* |h1| <= 1.51*2^58 */ 475 | /* |h5| <= 1.51*2^58 */ 476 | 477 | carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 478 | carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 479 | /* |h1| <= 2^24; from now on fits into int32 */ 480 | /* |h5| <= 2^24; from now on fits into int32 */ 481 | /* |h2| <= 1.21*2^59 */ 482 | /* |h6| <= 1.21*2^59 */ 483 | 484 | carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 485 | carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 486 | /* |h2| <= 2^25; from now on fits into int32 unchanged */ 487 | /* |h6| <= 2^25; from now on fits into int32 unchanged */ 488 | /* |h3| <= 1.51*2^58 */ 489 | /* |h7| <= 1.51*2^58 */ 490 | 491 | carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 492 | carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 493 | /* |h3| <= 2^24; from now on fits into int32 unchanged */ 494 | /* |h7| <= 2^24; from now on fits into int32 unchanged */ 495 | /* |h4| <= 1.52*2^33 */ 496 | /* |h8| <= 1.52*2^33 */ 497 | 498 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 499 | carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 500 | /* |h4| <= 2^25; from now on fits into int32 unchanged */ 501 | /* |h8| <= 2^25; from now on fits into int32 unchanged */ 502 | /* |h5| <= 1.01*2^24 */ 503 | /* |h9| <= 1.51*2^58 */ 504 | 505 | carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 506 | /* |h9| <= 2^24; from now on fits into int32 unchanged */ 507 | /* |h0| <= 1.8*2^37 */ 508 | 509 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 510 | /* |h0| <= 2^25; from now on fits into int32 unchanged */ 511 | /* |h1| <= 1.01*2^24 */ 512 | 513 | h[0] = (int32_t)h0; 514 | h[1] = (int32_t)h1; 515 | h[2] = (int32_t)h2; 516 | h[3] = (int32_t)h3; 517 | h[4] = (int32_t)h4; 518 | h[5] = (int32_t)h5; 519 | h[6] = (int32_t)h6; 520 | h[7] = (int32_t)h7; 521 | h[8] = (int32_t)h8; 522 | h[9] = (int32_t)h9; 523 | } 524 | 525 | /* 526 | h = f * 121666 527 | Can overlap h with f. 528 | 529 | Preconditions: 530 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 531 | 532 | Postconditions: 533 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 534 | */ 535 | 536 | void fe_mul121666(fe h,fe f) 537 | { 538 | int32_t f0 = f[0]; 539 | int32_t f1 = f[1]; 540 | int32_t f2 = f[2]; 541 | int32_t f3 = f[3]; 542 | int32_t f4 = f[4]; 543 | int32_t f5 = f[5]; 544 | int32_t f6 = f[6]; 545 | int32_t f7 = f[7]; 546 | int32_t f8 = f[8]; 547 | int32_t f9 = f[9]; 548 | int64_t h0 = f0 * (int64_t) 121666; 549 | int64_t h1 = f1 * (int64_t) 121666; 550 | int64_t h2 = f2 * (int64_t) 121666; 551 | int64_t h3 = f3 * (int64_t) 121666; 552 | int64_t h4 = f4 * (int64_t) 121666; 553 | int64_t h5 = f5 * (int64_t) 121666; 554 | int64_t h6 = f6 * (int64_t) 121666; 555 | int64_t h7 = f7 * (int64_t) 121666; 556 | int64_t h8 = f8 * (int64_t) 121666; 557 | int64_t h9 = f9 * (int64_t) 121666; 558 | int64_t carry0; 559 | int64_t carry1; 560 | int64_t carry2; 561 | int64_t carry3; 562 | int64_t carry4; 563 | int64_t carry5; 564 | int64_t carry6; 565 | int64_t carry7; 566 | int64_t carry8; 567 | int64_t carry9; 568 | 569 | carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 570 | carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 571 | carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 572 | carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 573 | carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 574 | 575 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 576 | carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 577 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 578 | carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 579 | carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 580 | 581 | h[0] = (int32_t)h0; 582 | h[1] = (int32_t)h1; 583 | h[2] = (int32_t)h2; 584 | h[3] = (int32_t)h3; 585 | h[4] = (int32_t)h4; 586 | h[5] = (int32_t)h5; 587 | h[6] = (int32_t)h6; 588 | h[7] = (int32_t)h7; 589 | h[8] = (int32_t)h8; 590 | h[9] = (int32_t)h9; 591 | } 592 | 593 | /* 594 | h = f * f 595 | Can overlap h with f. 596 | 597 | Preconditions: 598 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 599 | 600 | Postconditions: 601 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 602 | */ 603 | 604 | /* 605 | See fe_mul.c for discussion of implementation strategy. 606 | */ 607 | 608 | void fe_sq(fe h,fe f) 609 | { 610 | int32_t f0 = f[0]; 611 | int32_t f1 = f[1]; 612 | int32_t f2 = f[2]; 613 | int32_t f3 = f[3]; 614 | int32_t f4 = f[4]; 615 | int32_t f5 = f[5]; 616 | int32_t f6 = f[6]; 617 | int32_t f7 = f[7]; 618 | int32_t f8 = f[8]; 619 | int32_t f9 = f[9]; 620 | int32_t f0_2 = 2 * f0; 621 | int32_t f1_2 = 2 * f1; 622 | int32_t f2_2 = 2 * f2; 623 | int32_t f3_2 = 2 * f3; 624 | int32_t f4_2 = 2 * f4; 625 | int32_t f5_2 = 2 * f5; 626 | int32_t f6_2 = 2 * f6; 627 | int32_t f7_2 = 2 * f7; 628 | int32_t f5_38 = 38 * f5; /* 1.31*2^30 */ 629 | int32_t f6_19 = 19 * f6; /* 1.31*2^30 */ 630 | int32_t f7_38 = 38 * f7; /* 1.31*2^30 */ 631 | int32_t f8_19 = 19 * f8; /* 1.31*2^30 */ 632 | int32_t f9_38 = 38 * f9; /* 1.31*2^30 */ 633 | int64_t f0f0 = f0 * (int64_t) f0; 634 | int64_t f0f1_2 = f0_2 * (int64_t) f1; 635 | int64_t f0f2_2 = f0_2 * (int64_t) f2; 636 | int64_t f0f3_2 = f0_2 * (int64_t) f3; 637 | int64_t f0f4_2 = f0_2 * (int64_t) f4; 638 | int64_t f0f5_2 = f0_2 * (int64_t) f5; 639 | int64_t f0f6_2 = f0_2 * (int64_t) f6; 640 | int64_t f0f7_2 = f0_2 * (int64_t) f7; 641 | int64_t f0f8_2 = f0_2 * (int64_t) f8; 642 | int64_t f0f9_2 = f0_2 * (int64_t) f9; 643 | int64_t f1f1_2 = f1_2 * (int64_t) f1; 644 | int64_t f1f2_2 = f1_2 * (int64_t) f2; 645 | int64_t f1f3_4 = f1_2 * (int64_t) f3_2; 646 | int64_t f1f4_2 = f1_2 * (int64_t) f4; 647 | int64_t f1f5_4 = f1_2 * (int64_t) f5_2; 648 | int64_t f1f6_2 = f1_2 * (int64_t) f6; 649 | int64_t f1f7_4 = f1_2 * (int64_t) f7_2; 650 | int64_t f1f8_2 = f1_2 * (int64_t) f8; 651 | int64_t f1f9_76 = f1_2 * (int64_t) f9_38; 652 | int64_t f2f2 = f2 * (int64_t) f2; 653 | int64_t f2f3_2 = f2_2 * (int64_t) f3; 654 | int64_t f2f4_2 = f2_2 * (int64_t) f4; 655 | int64_t f2f5_2 = f2_2 * (int64_t) f5; 656 | int64_t f2f6_2 = f2_2 * (int64_t) f6; 657 | int64_t f2f7_2 = f2_2 * (int64_t) f7; 658 | int64_t f2f8_38 = f2_2 * (int64_t) f8_19; 659 | int64_t f2f9_38 = f2 * (int64_t) f9_38; 660 | int64_t f3f3_2 = f3_2 * (int64_t) f3; 661 | int64_t f3f4_2 = f3_2 * (int64_t) f4; 662 | int64_t f3f5_4 = f3_2 * (int64_t) f5_2; 663 | int64_t f3f6_2 = f3_2 * (int64_t) f6; 664 | int64_t f3f7_76 = f3_2 * (int64_t) f7_38; 665 | int64_t f3f8_38 = f3_2 * (int64_t) f8_19; 666 | int64_t f3f9_76 = f3_2 * (int64_t) f9_38; 667 | int64_t f4f4 = f4 * (int64_t) f4; 668 | int64_t f4f5_2 = f4_2 * (int64_t) f5; 669 | int64_t f4f6_38 = f4_2 * (int64_t) f6_19; 670 | int64_t f4f7_38 = f4 * (int64_t) f7_38; 671 | int64_t f4f8_38 = f4_2 * (int64_t) f8_19; 672 | int64_t f4f9_38 = f4 * (int64_t) f9_38; 673 | int64_t f5f5_38 = f5 * (int64_t) f5_38; 674 | int64_t f5f6_38 = f5_2 * (int64_t) f6_19; 675 | int64_t f5f7_76 = f5_2 * (int64_t) f7_38; 676 | int64_t f5f8_38 = f5_2 * (int64_t) f8_19; 677 | int64_t f5f9_76 = f5_2 * (int64_t) f9_38; 678 | int64_t f6f6_19 = f6 * (int64_t) f6_19; 679 | int64_t f6f7_38 = f6 * (int64_t) f7_38; 680 | int64_t f6f8_38 = f6_2 * (int64_t) f8_19; 681 | int64_t f6f9_38 = f6 * (int64_t) f9_38; 682 | int64_t f7f7_38 = f7 * (int64_t) f7_38; 683 | int64_t f7f8_38 = f7_2 * (int64_t) f8_19; 684 | int64_t f7f9_76 = f7_2 * (int64_t) f9_38; 685 | int64_t f8f8_19 = f8 * (int64_t) f8_19; 686 | int64_t f8f9_38 = f8 * (int64_t) f9_38; 687 | int64_t f9f9_38 = f9 * (int64_t) f9_38; 688 | int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 689 | int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 690 | int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 691 | int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 692 | int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 693 | int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 694 | int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 695 | int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 696 | int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 697 | int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 698 | int64_t carry0; 699 | int64_t carry1; 700 | int64_t carry2; 701 | int64_t carry3; 702 | int64_t carry4; 703 | int64_t carry5; 704 | int64_t carry6; 705 | int64_t carry7; 706 | int64_t carry8; 707 | int64_t carry9; 708 | 709 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 710 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 711 | 712 | carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 713 | carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 714 | 715 | carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 716 | carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 717 | 718 | carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 719 | carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 720 | 721 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 722 | carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 723 | 724 | carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 725 | 726 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 727 | 728 | h[0] = (int32_t)h0; 729 | h[1] = (int32_t)h1; 730 | h[2] = (int32_t)h2; 731 | h[3] = (int32_t)h3; 732 | h[4] = (int32_t)h4; 733 | h[5] = (int32_t)h5; 734 | h[6] = (int32_t)h6; 735 | h[7] = (int32_t)h7; 736 | h[8] = (int32_t)h8; 737 | h[9] = (int32_t)h9; 738 | } 739 | 740 | /* 741 | h = f - g 742 | Can overlap h with f or g. 743 | 744 | Preconditions: 745 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 746 | |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 747 | 748 | Postconditions: 749 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 750 | */ 751 | 752 | void fe_sub(fe h,fe f,fe g) 753 | { 754 | int32_t f0 = f[0]; 755 | int32_t f1 = f[1]; 756 | int32_t f2 = f[2]; 757 | int32_t f3 = f[3]; 758 | int32_t f4 = f[4]; 759 | int32_t f5 = f[5]; 760 | int32_t f6 = f[6]; 761 | int32_t f7 = f[7]; 762 | int32_t f8 = f[8]; 763 | int32_t f9 = f[9]; 764 | int32_t g0 = g[0]; 765 | int32_t g1 = g[1]; 766 | int32_t g2 = g[2]; 767 | int32_t g3 = g[3]; 768 | int32_t g4 = g[4]; 769 | int32_t g5 = g[5]; 770 | int32_t g6 = g[6]; 771 | int32_t g7 = g[7]; 772 | int32_t g8 = g[8]; 773 | int32_t g9 = g[9]; 774 | int32_t h0 = f0 - g0; 775 | int32_t h1 = f1 - g1; 776 | int32_t h2 = f2 - g2; 777 | int32_t h3 = f3 - g3; 778 | int32_t h4 = f4 - g4; 779 | int32_t h5 = f5 - g5; 780 | int32_t h6 = f6 - g6; 781 | int32_t h7 = f7 - g7; 782 | int32_t h8 = f8 - g8; 783 | int32_t h9 = f9 - g9; 784 | h[0] = h0; 785 | h[1] = h1; 786 | h[2] = h2; 787 | h[3] = h3; 788 | h[4] = h4; 789 | h[5] = h5; 790 | h[6] = h6; 791 | h[7] = h7; 792 | h[8] = h8; 793 | h[9] = h9; 794 | } 795 | 796 | /* 797 | Preconditions: 798 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 799 | 800 | Write p=2^255-19; q=floor(h/p). 801 | Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). 802 | 803 | Proof: 804 | Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. 805 | Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. 806 | 807 | Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). 808 | Then 0> 25; 846 | q = (h0 + q) >> 26; 847 | q = (h1 + q) >> 25; 848 | q = (h2 + q) >> 26; 849 | q = (h3 + q) >> 25; 850 | q = (h4 + q) >> 26; 851 | q = (h5 + q) >> 25; 852 | q = (h6 + q) >> 26; 853 | q = (h7 + q) >> 25; 854 | q = (h8 + q) >> 26; 855 | q = (h9 + q) >> 25; 856 | 857 | /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ 858 | h0 += 19 * q; 859 | /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ 860 | 861 | carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; 862 | carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; 863 | carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; 864 | carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; 865 | carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; 866 | carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; 867 | carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; 868 | carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; 869 | carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; 870 | carry9 = h9 >> 25; h9 -= carry9 << 25; 871 | /* h10 = carry9 */ 872 | 873 | /* 874 | Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. 875 | Have h0+...+2^230 h9 between 0 and 2^255-1; 876 | evidently 2^255 h10-2^255 q = 0. 877 | Goal: Output h0+...+2^230 h9. 878 | */ 879 | 880 | s[0] = h0 >> 0; 881 | s[1] = h0 >> 8; 882 | s[2] = h0 >> 16; 883 | s[3] = (h0 >> 24) | (h1 << 2); 884 | s[4] = h1 >> 6; 885 | s[5] = h1 >> 14; 886 | s[6] = (h1 >> 22) | (h2 << 3); 887 | s[7] = h2 >> 5; 888 | s[8] = h2 >> 13; 889 | s[9] = (h2 >> 21) | (h3 << 5); 890 | s[10] = h3 >> 3; 891 | s[11] = h3 >> 11; 892 | s[12] = (h3 >> 19) | (h4 << 6); 893 | s[13] = h4 >> 2; 894 | s[14] = h4 >> 10; 895 | s[15] = h4 >> 18; 896 | s[16] = h5 >> 0; 897 | s[17] = h5 >> 8; 898 | s[18] = h5 >> 16; 899 | s[19] = (h5 >> 24) | (h6 << 1); 900 | s[20] = h6 >> 7; 901 | s[21] = h6 >> 15; 902 | s[22] = (h6 >> 23) | (h7 << 3); 903 | s[23] = h7 >> 5; 904 | s[24] = h7 >> 13; 905 | s[25] = (h7 >> 21) | (h8 << 4); 906 | s[26] = h8 >> 4; 907 | s[27] = h8 >> 12; 908 | s[28] = (h8 >> 20) | (h9 << 6); 909 | s[29] = h9 >> 2; 910 | s[30] = h9 >> 10; 911 | s[31] = h9 >> 18; 912 | } 913 | -------------------------------------------------------------------------------- /ext/x25519_ref10/fe.h: -------------------------------------------------------------------------------- 1 | #ifndef FE_H 2 | #define FE_H 3 | 4 | #include 5 | 6 | typedef int32_t fe[10]; 7 | 8 | /* 9 | fe means field element. 10 | Here the field is \Z/(2^255-19). 11 | An element t, entries t[0]...t[9], represents the integer 12 | t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 13 | Bounds on each t[i] vary depending on context. 14 | */ 15 | 16 | #define fe_frombytes crypto_scalarmult_curve25519_ref10_fe_frombytes 17 | #define fe_tobytes crypto_scalarmult_curve25519_ref10_fe_tobytes 18 | #define fe_copy crypto_scalarmult_curve25519_ref10_fe_copy 19 | #define fe_0 crypto_scalarmult_curve25519_ref10_fe_0 20 | #define fe_1 crypto_scalarmult_curve25519_ref10_fe_1 21 | #define fe_cswap crypto_scalarmult_curve25519_ref10_fe_cswap 22 | #define fe_add crypto_scalarmult_curve25519_ref10_fe_add 23 | #define fe_sub crypto_scalarmult_curve25519_ref10_fe_sub 24 | #define fe_mul crypto_scalarmult_curve25519_ref10_fe_mul 25 | #define fe_sq crypto_scalarmult_curve25519_ref10_fe_sq 26 | #define fe_mul121666 crypto_scalarmult_curve25519_ref10_fe_mul121666 27 | #define fe_invert crypto_scalarmult_curve25519_ref10_fe_invert 28 | 29 | extern void fe_frombytes(fe,const unsigned char *); 30 | extern void fe_tobytes(unsigned char *,fe); 31 | 32 | extern void fe_copy(fe,fe); 33 | extern void fe_0(fe); 34 | extern void fe_1(fe); 35 | extern void fe_cswap(fe,fe,unsigned int); 36 | 37 | extern void fe_add(fe,fe,fe); 38 | extern void fe_sub(fe,fe,fe); 39 | extern void fe_mul(fe,fe,fe); 40 | extern void fe_sq(fe,fe); 41 | extern void fe_mul121666(fe,fe); 42 | extern void fe_invert(fe,fe); 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /ext/x25519_ref10/montgomery.h: -------------------------------------------------------------------------------- 1 | 2 | /* qhasm: fe X2 */ 3 | 4 | /* qhasm: fe Z2 */ 5 | 6 | /* qhasm: fe X3 */ 7 | 8 | /* qhasm: fe Z3 */ 9 | 10 | /* qhasm: fe X4 */ 11 | 12 | /* qhasm: fe Z4 */ 13 | 14 | /* qhasm: fe X5 */ 15 | 16 | /* qhasm: fe Z5 */ 17 | 18 | /* qhasm: fe A */ 19 | 20 | /* qhasm: fe B */ 21 | 22 | /* qhasm: fe C */ 23 | 24 | /* qhasm: fe D */ 25 | 26 | /* qhasm: fe E */ 27 | 28 | /* qhasm: fe AA */ 29 | 30 | /* qhasm: fe BB */ 31 | 32 | /* qhasm: fe DA */ 33 | 34 | /* qhasm: fe CB */ 35 | 36 | /* qhasm: fe t0 */ 37 | 38 | /* qhasm: fe t1 */ 39 | 40 | /* qhasm: fe t2 */ 41 | 42 | /* qhasm: fe t3 */ 43 | 44 | /* qhasm: fe t4 */ 45 | 46 | /* qhasm: enter ladder */ 47 | 48 | /* qhasm: D = X3-Z3 */ 49 | /* asm 1: fe_sub(>D=fe#5,D=tmp0,B=fe#6,B=tmp1,A=fe#1,A=x2,C=fe#2,C=z2,DA=fe#4,DA=z3,CB=fe#2,CB=z2,BB=fe#5,BB=tmp0,AA=fe#6,AA=tmp1,t0=fe#3,t0=x3,t1=fe#2,t1=z2,X4=fe#1,X4=x2,E=fe#6,E=tmp1,t2=fe#2,t2=z2,t3=fe#4,t3=z3,X5=fe#3,X5=x3,t4=fe#5,t4=tmp0,Z5=fe#4,x1,Z5=z3,x1,Z4=fe#2,Z4=z2,z2=fe#1,z2=fe#1,>z2=fe#1); */ 52 | /* asm 2: fe_sq(>z2=t0,z2=t0,>z2=t0); */ 53 | fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 54 | 55 | /* qhasm: z8 = z2^2^2 */ 56 | /* asm 1: fe_sq(>z8=fe#2,z8=fe#2,>z8=fe#2); */ 57 | /* asm 2: fe_sq(>z8=t1,z8=t1,>z8=t1); */ 58 | fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 59 | 60 | /* qhasm: z9 = z1*z8 */ 61 | /* asm 1: fe_mul(>z9=fe#2,z9=t1,z11=fe#1,z11=t0,z22=fe#3,z22=fe#3,>z22=fe#3); */ 72 | /* asm 2: fe_sq(>z22=t2,z22=t2,>z22=t2); */ 73 | fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2); 74 | 75 | /* qhasm: z_5_0 = z9*z22 */ 76 | /* asm 1: fe_mul(>z_5_0=fe#2,z_5_0=t1,z_10_5=fe#3,z_10_5=fe#3,>z_10_5=fe#3); */ 82 | /* asm 2: fe_sq(>z_10_5=t2,z_10_5=t2,>z_10_5=t2); */ 83 | fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2); 84 | 85 | /* qhasm: z_10_0 = z_10_5*z_5_0 */ 86 | /* asm 1: fe_mul(>z_10_0=fe#2,z_10_0=t1,z_20_10=fe#3,z_20_10=fe#3,>z_20_10=fe#3); */ 92 | /* asm 2: fe_sq(>z_20_10=t2,z_20_10=t2,>z_20_10=t2); */ 93 | fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2); 94 | 95 | /* qhasm: z_20_0 = z_20_10*z_10_0 */ 96 | /* asm 1: fe_mul(>z_20_0=fe#3,z_20_0=t2,z_40_20=fe#4,z_40_20=fe#4,>z_40_20=fe#4); */ 102 | /* asm 2: fe_sq(>z_40_20=t3,z_40_20=t3,>z_40_20=t3); */ 103 | fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3); 104 | 105 | /* qhasm: z_40_0 = z_40_20*z_20_0 */ 106 | /* asm 1: fe_mul(>z_40_0=fe#3,z_40_0=t2,z_50_10=fe#3,z_50_10=fe#3,>z_50_10=fe#3); */ 112 | /* asm 2: fe_sq(>z_50_10=t2,z_50_10=t2,>z_50_10=t2); */ 113 | fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2); 114 | 115 | /* qhasm: z_50_0 = z_50_10*z_10_0 */ 116 | /* asm 1: fe_mul(>z_50_0=fe#2,z_50_0=t1,z_100_50=fe#3,z_100_50=fe#3,>z_100_50=fe#3); */ 122 | /* asm 2: fe_sq(>z_100_50=t2,z_100_50=t2,>z_100_50=t2); */ 123 | fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2); 124 | 125 | /* qhasm: z_100_0 = z_100_50*z_50_0 */ 126 | /* asm 1: fe_mul(>z_100_0=fe#3,z_100_0=t2,z_200_100=fe#4,z_200_100=fe#4,>z_200_100=fe#4); */ 132 | /* asm 2: fe_sq(>z_200_100=t3,z_200_100=t3,>z_200_100=t3); */ 133 | fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3); 134 | 135 | /* qhasm: z_200_0 = z_200_100*z_100_0 */ 136 | /* asm 1: fe_mul(>z_200_0=fe#3,z_200_0=t2,z_250_50=fe#3,z_250_50=fe#3,>z_250_50=fe#3); */ 142 | /* asm 2: fe_sq(>z_250_50=t2,z_250_50=t2,>z_250_50=t2); */ 143 | fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2); 144 | 145 | /* qhasm: z_250_0 = z_250_50*z_50_0 */ 146 | /* asm 1: fe_mul(>z_250_0=fe#2,z_250_0=t1,z_255_5=fe#2,z_255_5=fe#2,>z_255_5=fe#2); */ 152 | /* asm 2: fe_sq(>z_255_5=t1,z_255_5=t1,>z_255_5=t1); */ 153 | fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1); 154 | 155 | /* qhasm: z_255_21 = z_255_5*z11 */ 156 | /* asm 1: fe_mul(>z_255_21=fe#12,z_255_21=out,= 0;--pos) { 32 | b = e[pos / 8] >> (pos & 7); 33 | b &= 1; 34 | swap ^= b; 35 | fe_cswap(x2,x3,swap); 36 | fe_cswap(z2,z3,swap); 37 | swap = b; 38 | #include "montgomery.h" 39 | } 40 | fe_cswap(x2,x3,swap); 41 | fe_cswap(z2,z3,swap); 42 | 43 | fe_invert(z2,z2); 44 | fe_mul(x2,x2,z2); 45 | fe_tobytes(q,x2); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /ext/x25519_ref10/x25519_ref10.c: -------------------------------------------------------------------------------- 1 | /* 2 | Ruby C extension providing bindings to the ref10 implementation of the 3 | X25519 Diffie-Hellman algorithm 4 | */ 5 | 6 | #include "ruby.h" 7 | #include "x25519_ref10.h" 8 | 9 | static VALUE mX25519 = Qnil; 10 | static VALUE mX25519_Provider = Qnil; 11 | static VALUE mX25519_Provider_Ref10 = Qnil; 12 | 13 | static VALUE mX25519_Provider_Ref10_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u); 14 | static VALUE mX25519_Provider_Ref10_scalarmult_base(VALUE self, VALUE scalar); 15 | 16 | /* Initialize the x25519_ref10 C extension */ 17 | void Init_x25519_ref10() 18 | { 19 | mX25519 = rb_define_module("X25519"); 20 | mX25519_Provider = rb_define_module_under(mX25519, "Provider"); 21 | mX25519_Provider_Ref10 = rb_define_module_under(mX25519_Provider, "Ref10"); 22 | 23 | rb_define_singleton_method(mX25519_Provider_Ref10, "scalarmult", mX25519_Provider_Ref10_scalarmult, 2); 24 | rb_define_singleton_method(mX25519_Provider_Ref10, "scalarmult_base", mX25519_Provider_Ref10_scalarmult_base, 1); 25 | } 26 | 27 | /* Variable-base scalar multiplication */ 28 | static VALUE mX25519_Provider_Ref10_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u) 29 | { 30 | X25519_KEY product; 31 | 32 | StringValue(scalar); 33 | if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) { 34 | rb_raise( 35 | rb_eArgError, 36 | "expected %d-byte scalar, got %ld", 37 | X25519_KEYSIZE_BYTES, 38 | RSTRING_LEN(scalar) 39 | ); 40 | } 41 | 42 | StringValue(montgomery_u); 43 | if(RSTRING_LEN(montgomery_u) != X25519_KEYSIZE_BYTES) { 44 | rb_raise( 45 | rb_eArgError, 46 | "expected %d-byte Montgomery-u coordinate, got %ld", 47 | X25519_KEYSIZE_BYTES, 48 | RSTRING_LEN(montgomery_u) 49 | ); 50 | } 51 | 52 | x25519_ref10_scalarmult( 53 | product, 54 | (const uint8_t *)RSTRING_PTR(scalar), 55 | (const uint8_t *)RSTRING_PTR(montgomery_u) 56 | ); 57 | 58 | return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES); 59 | } 60 | 61 | /* Fixed-base scalar multiplication */ 62 | static VALUE mX25519_Provider_Ref10_scalarmult_base(VALUE self, VALUE scalar) 63 | { 64 | X25519_KEY product; 65 | 66 | StringValue(scalar); 67 | if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) { 68 | rb_raise( 69 | rb_eArgError, 70 | "expected %d-byte scalar, got %ld", 71 | X25519_KEYSIZE_BYTES, 72 | RSTRING_LEN(scalar) 73 | ); 74 | } 75 | 76 | x25519_ref10_scalarmult_base( 77 | product, 78 | (const uint8_t *)RSTRING_PTR(scalar) 79 | ); 80 | 81 | return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES); 82 | } 83 | -------------------------------------------------------------------------------- /ext/x25519_ref10/x25519_ref10.h: -------------------------------------------------------------------------------- 1 | #ifndef X25519_REF10_H 2 | #define X25519_REF10_H 3 | 4 | #include 5 | 6 | #define X25519_KEYSIZE_BYTES 32 7 | typedef uint8_t X25519_KEY[X25519_KEYSIZE_BYTES]; 8 | 9 | /* Fixed-base scalar multiplication */ 10 | int x25519_ref10_scalarmult(uint8_t *q, const uint8_t *n, const uint8_t *p); 11 | 12 | /* Variable-base scalar multiplication */ 13 | int x25519_ref10_scalarmult_base(uint8_t *q, const uint8_t *n); 14 | 15 | #endif /* X25519_REF10_H */ 16 | -------------------------------------------------------------------------------- /lib/x25519.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "securerandom" 4 | 5 | require "x25519/version" 6 | 7 | require "x25519/montgomery_u" 8 | require "x25519/scalar" 9 | require "x25519/test_vectors" 10 | 11 | # Native extension backends 12 | require "x25519_ref10" 13 | begin 14 | require "x25519_precomputed" 15 | rescue LoadError 16 | require "x25519/precomputed_not_available" 17 | end 18 | 19 | # The X25519 elliptic curve Diffie-Hellman algorithm 20 | module X25519 21 | module_function 22 | 23 | # Size of an X25519 key (public or private) in bytes 24 | KEY_SIZE = 32 25 | 26 | # Raised when we detect a degenerate (i.e. all-zero) public key 27 | InvalidKeyError = Class.new(StandardError) 28 | 29 | # Raised when the built-in self-test fails 30 | SelfTestFailure = Class.new(StandardError) 31 | 32 | class << self 33 | # Obtain the backend provider module 34 | attr_accessor :provider 35 | end 36 | 37 | # ref10 is the default provider 38 | self.provider = X25519::Provider::Ref10 39 | 40 | # X25519::Precomputed requires a 4th generation Intel Core CPU or newer, 41 | # so only enable it if we detect we're on a supported platform. Otherwise, 42 | # fall back to the ref10 portable C implementation. 43 | self.provider = X25519::Provider::Precomputed if X25519::Provider::Precomputed.available? 44 | 45 | # Raw fixed-base scalar multiplication function that acts directly on 46 | # bytestrings. Calculates the coordinate of the elliptic curve point that 47 | # represents the public key for a given scalar. 48 | # 49 | # @param scalar_bytes [String] a serialized private scalar 50 | # 51 | # @return [String] compressed Montgomery-u coordinate of the resulting point 52 | def calculate_public_key(scalar_bytes) 53 | validate_key_bytes(scalar_bytes) 54 | provider.scalarmult_base(scalar_bytes) 55 | end 56 | 57 | # Raw Diffie-Hellman function that acts directly on bytestrings. An 58 | # alternative to the object-oriented API 59 | # 60 | # @param scalar_bytes [String] a serialized private scalar 61 | # @param montgomery_u_bytes [String] a point we wish to multiply by the scalar 62 | # 63 | # @return [String] resulting point, serialized as bytes 64 | def diffie_hellman(scalar_bytes, montgomery_u_bytes) 65 | validate_key_bytes(scalar_bytes) 66 | validate_key_bytes(montgomery_u_bytes) 67 | 68 | # The point located at a Montgomery-u coordinate of zero always returns 69 | # the point at zero regardless of which scalar it's multiplied with 70 | raise InvalidKeyError, "degenerate public key" if montgomery_u_bytes == ("\0" * KEY_SIZE) 71 | 72 | provider.scalarmult(scalar_bytes, montgomery_u_bytes) 73 | end 74 | 75 | # Ensure a serialized key meets the requirements 76 | def validate_key_bytes(key_bytes) 77 | raise TypeError, "expected String, got #{key_bytes.class}" unless key_bytes.is_a?(String) 78 | return true if key_bytes.bytesize == KEY_SIZE 79 | 80 | raise ArgumentError, "expected #{KEY_SIZE}-byte String, got #{key_bytes.bytesize}" 81 | end 82 | 83 | # Perform a self-test to ensure the selected provider is working 84 | def self_test 85 | X25519::TestVectors::VARIABLE_BASE.each do |v| 86 | shared_secret = provider.scalarmult([v.scalar].pack("H*"), [v.input_coord].pack("H*")) 87 | raise SelfTestFailure, "self test failed!" unless shared_secret.unpack1("H*") == v.output_coord 88 | end 89 | 90 | X25519::TestVectors::FIXED_BASE.each do |v| 91 | public_key = provider.scalarmult_base([v.scalar].pack("H*")) 92 | raise SelfTestFailure, "self test failed!" unless public_key.unpack1("H*") == v.output_coord 93 | end 94 | 95 | true 96 | end 97 | end 98 | 99 | # Automatically run self-test when library loads 100 | X25519.self_test 101 | -------------------------------------------------------------------------------- /lib/x25519/montgomery_u.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module X25519 4 | # X25519 public keys and shared secrets 5 | # 6 | # Montgomery-u coordinates of points on the elliptic curve used by X25519 7 | # (a.k.a. Curve25519) 8 | class MontgomeryU 9 | # Create an object representing a Montgomery-u coordinate from a bytestring 10 | # 11 | # @param bytes [String] 32-byte compressed Montgomery-u coordinate 12 | def initialize(bytes) 13 | X25519.validate_key_bytes(bytes) 14 | 15 | # The point located at a Montgomery-u coordinate of zero always returns 16 | # the point at zero regardless of which scalar it's multiplied with 17 | raise InvalidKeyError, "degenerate public key" if bytes == ("\0" * KEY_SIZE) 18 | 19 | @bytes = bytes 20 | end 21 | 22 | # Return a compressed Montgomery-u coordinate serialized as a bytestring 23 | # 24 | # @return [String] bytestring serialization of a Montgomery-u coordinate 25 | def to_bytes 26 | @bytes 27 | end 28 | 29 | # Show hex representation of serialized coordinate in string inspection 30 | def inspect 31 | "#<#{self.class}:#{@bytes.unpack1('H*')}>" 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/x25519/precomputed_not_available.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module X25519 4 | module Provider 5 | # We need this class and method even if we can't compile x25519_precomputed 6 | class Precomputed 7 | def self.available? 8 | false 9 | end 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/x25519/scalar.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module X25519 4 | # X25519 private keys 5 | # 6 | # Scalars are the integer component of scalar multiplication, multiplied 7 | # against an elliptic curve point. 8 | class Scalar 9 | # Securely generate a random scalar 10 | def self.generate 11 | new(SecureRandom.random_bytes(X25519::KEY_SIZE)) 12 | end 13 | 14 | # Create an X25519 scalar object from a bytestring 15 | # 16 | # @param bytes [String] 32-byte random secret scalar 17 | def initialize(bytes) 18 | X25519.validate_key_bytes(bytes) 19 | @scalar_bytes = bytes 20 | end 21 | 22 | # Variable-base scalar multiplication a.k.a. Diffie-Hellman 23 | # 24 | # This can be used to obtain a shared secret from a public key 25 | # 26 | # @param montgomery_u [X25519::MontgomeryU] coordinate of the public key/point to perform D-H with 27 | # 28 | # @return [X25519::MontgomeryU] resulting point (i.e. D-H shared secret) 29 | def diffie_hellman(montgomery_u) 30 | raise TypeError, "expected X25519::MontgomeryU, got #{montgomery_u}" unless montgomery_u.is_a?(MontgomeryU) 31 | 32 | MontgomeryU.new(X25519.diffie_hellman(@scalar_bytes, montgomery_u.to_bytes)) 33 | end 34 | alias multiply diffie_hellman 35 | 36 | # Fixed-base scalar multiplication. Calculates a public key from a 37 | # private scalar 38 | # 39 | # @return [X25519::MontgomeryU] resulting point (i.e. public key) 40 | def public_key 41 | MontgomeryU.new(X25519.calculate_public_key(@scalar_bytes)) 42 | end 43 | alias multiply_base public_key 44 | 45 | # Return a bytestring representation of this scalar 46 | # 47 | # @return [String] scalar converted to a bytestring 48 | def to_bytes 49 | @scalar_bytes 50 | end 51 | 52 | # String inspection that does not leak the private scalar 53 | def inspect 54 | to_s 55 | end 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /lib/x25519/test_vectors.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module X25519 4 | # Known-good inputs and outputs for X25519 functions 5 | module TestVectors 6 | # Test vector for variable-base scalar multiplication 7 | VariableBaseVector = Struct.new(:scalar, :input_coord, :output_coord) 8 | 9 | # X25519 variable-base test vectors from RFC 7748 10 | VARIABLE_BASE = [ 11 | VariableBaseVector.new( 12 | "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4", 13 | "e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c", 14 | "c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552" 15 | ), 16 | VariableBaseVector.new( 17 | "4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d", 18 | "e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493", 19 | "95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957" 20 | ) 21 | ].freeze 22 | 23 | # Test vector for fixed-base scalar multiplication 24 | FixedBaseVector = Struct.new(:scalar, :output_coord) 25 | 26 | # X25519 fixed-base test vectors, generated via RbNaCl/libsodium 27 | FIXED_BASE = [ 28 | FixedBaseVector.new( 29 | "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4", 30 | "1c9fd88f45606d932a80c71824ae151d15d73e77de38e8e000852e614fae7019" 31 | ), 32 | FixedBaseVector.new( 33 | "4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d", 34 | "ff63fe57bfbf43fa3f563628b149af704d3db625369c49983650347a6a71e00e" 35 | ) 36 | ].freeze 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /lib/x25519/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module X25519 4 | VERSION = "1.0.10" 5 | end 6 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/setup" 4 | require "x25519" 5 | require "support/provider_examples" 6 | 7 | RSpec.configure do |config| 8 | # Enable flags like --only-failures and --next-failure 9 | config.example_status_persistence_file_path = ".rspec_status" 10 | 11 | # Disable RSpec exposing methods globally on `Module` and `main` 12 | config.disable_monkey_patching! 13 | 14 | config.expect_with :rspec do |c| 15 | c.syntax = :expect 16 | end 17 | end 18 | 19 | # Convert a binary string to hex 20 | def hex(string) 21 | string.unpack1("H*") 22 | end 23 | 24 | # Parse a hex string to binary 25 | def unhex(string) 26 | [string].pack("H*") 27 | end 28 | -------------------------------------------------------------------------------- /spec/support/provider_examples.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Shared examples for all X25519::Provider backends 4 | RSpec.shared_examples "X25519::Provider" do 5 | describe "#scalarmult" do 6 | it "passes the RFC 7748 test vectors" do 7 | X25519::TestVectors::VARIABLE_BASE.each do |v| 8 | shared_secret = described_class.scalarmult(unhex(v.scalar), unhex(v.input_coord)) 9 | expect(hex(shared_secret)).to eq v.output_coord 10 | end 11 | end 12 | end 13 | 14 | describe "#scalarmult_base" do 15 | it "passes the test vectors" do 16 | X25519::TestVectors::FIXED_BASE.each do |v| 17 | public_key = described_class.scalarmult_base(unhex(v.scalar)) 18 | expect(hex(public_key)).to eq v.output_coord 19 | end 20 | end 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /spec/x25519/montgomery_u_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe X25519::MontgomeryU do 4 | subject(:point) { described_class.new(unhex(coordinate_hex)) } 5 | 6 | let(:coordinate_hex) { "e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c" } 7 | 8 | describe "#to_bytes" do 9 | it "serializes #{described_class} as a Encoding::BINARY String" do 10 | bytes = point.to_bytes 11 | expect(bytes).to be_a String 12 | expect(bytes.encoding).to eq Encoding::BINARY 13 | 14 | expect(hex(bytes)).to eq coordinate_hex 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /spec/x25519/provider/precomputed_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe X25519::Provider::Precomputed do 4 | if described_class.available? 5 | include_examples "X25519::Provider" 6 | else 7 | pending "#{described_class} provider not available on this CPU" 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /spec/x25519/provider/ref10_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe X25519::Provider::Ref10 do 4 | include_examples "X25519::Provider" 5 | end 6 | -------------------------------------------------------------------------------- /spec/x25519/scalar_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe X25519::Scalar do 4 | subject(:scalar) { described_class.new(unhex(scalar_hex)) } 5 | 6 | let(:scalar_hex) { "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4" } 7 | 8 | describe ".generate" do 9 | it "generates random keys" do 10 | # Not great, but better than nothing 11 | key1 = described_class.generate 12 | key2 = described_class.generate 13 | 14 | expect(key1).not_to eq key2 15 | end 16 | end 17 | 18 | describe "#multiply" do 19 | it "raises TypeError if given a non-X25519::MontgomeryU type" do 20 | expect { scalar.multiply(nil) }.to raise_error(TypeError) 21 | expect { scalar.multiply("derp") }.to raise_error(TypeError) 22 | end 23 | 24 | context "with RFC 7748 test vectors" do 25 | it "passes the test vectors" do 26 | X25519::TestVectors::VARIABLE_BASE.each do |v| 27 | scalar = described_class.new(unhex(v.scalar)) 28 | point = X25519::MontgomeryU.new(unhex(v.input_coord)) 29 | 30 | shared_secret = scalar.multiply(point) 31 | expect(hex(shared_secret.to_bytes)).to eq v.output_coord 32 | end 33 | end 34 | end 35 | end 36 | 37 | describe "#multiply_base" do 38 | context "with RFC 7748 test vectors" do 39 | it "passes the test vectors" do 40 | X25519::TestVectors::FIXED_BASE.each do |v| 41 | scalar = described_class.new(unhex(v.scalar)) 42 | expect(hex(scalar.multiply_base.to_bytes)).to eq v.output_coord 43 | end 44 | end 45 | end 46 | end 47 | 48 | describe "#to_bytes" do 49 | it "serializes #{described_class} as a Encoding::BINARY String" do 50 | bytes = scalar.to_bytes 51 | expect(bytes).to be_a String 52 | expect(bytes.encoding).to eq Encoding::BINARY 53 | expect(hex(bytes)).to eq scalar_hex 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /spec/x25519_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe X25519 do 4 | it "has a version number" do 5 | expect(described_class::VERSION).not_to be_nil 6 | end 7 | 8 | describe ".diffie_hellman" do 9 | let(:example_scalar) { unhex(X25519::TestVectors::VARIABLE_BASE.first.scalar) } 10 | 11 | it "raises ArgumentError if one of the inputs is the wrong length" do 12 | expect { described_class.diffie_hellman("foo", "bar") }.to raise_error(ArgumentError) 13 | end 14 | 15 | it "raises TypeError if one of the inputs is nil" do 16 | expect { described_class.diffie_hellman(nil, "foobar") }.to raise_error(TypeError) 17 | end 18 | 19 | it "raises InvalidKeyError if the point is degenerate" do 20 | degenerate_key = "\0" * X25519::KEY_SIZE 21 | 22 | expect do 23 | described_class.diffie_hellman(example_scalar, degenerate_key) 24 | end.to raise_error(X25519::InvalidKeyError) 25 | end 26 | 27 | context "with RFC 7748 test vectors" do 28 | it "passes the test vectors" do 29 | X25519::TestVectors::VARIABLE_BASE.each do |v| 30 | shared_secret = described_class.diffie_hellman(unhex(v.scalar), unhex(v.input_coord)) 31 | expect(hex(shared_secret)).to eq v.output_coord 32 | end 33 | end 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /x25519.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "lib/x25519/version" 4 | 5 | Gem::Specification.new do |spec| 6 | spec.name = "x25519" 7 | spec.version = X25519::VERSION 8 | spec.authors = ["Tony Arcieri"] 9 | spec.email = ["bascule@gmail.com"] 10 | spec.summary = "Public key cryptography library providing the X25519 Elliptic Curve Diffie-Hellman function" 11 | spec.description = <<-DESCRIPTION.strip.gsub(/\s+/, " ") 12 | An efficient public key cryptography library for Ruby providing key 13 | exchange/agreement via the X25519 (a.k.a. Curve25519) Elliptic Curve 14 | Diffie-Hellman function as described in RFC 7748. 15 | DESCRIPTION 16 | spec.homepage = "https://github.com/RubyCrypto/x25519" 17 | spec.license = "BSD-3-Clause" # https://spdx.org/licenses/BSD-3-Clause.html 18 | spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } 19 | spec.platform = Gem::Platform::RUBY 20 | spec.extensions = ["ext/x25519_precomputed/extconf.rb", "ext/x25519_ref10/extconf.rb"] 21 | 22 | spec.required_ruby_version = ">= 2.7" 23 | end 24 | --------------------------------------------------------------------------------