├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .rspec
├── .rubocop.yml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── Gemfile
├── LICENSE
├── README.md
├── Rakefile
├── ext
    ├── x25519_precomputed
    │   ├── cputest.c
    │   ├── extconf.rb
    │   ├── fp25519_x64.c
    │   ├── fp25519_x64.h
    │   ├── table_ladder_x25519.h
    │   ├── x25519_precomputed.c
    │   ├── x25519_precomputed.h
    │   └── x25519_x64.c
    └── x25519_ref10
    │   ├── api.h
    │   ├── base.c
    │   ├── extconf.rb
    │   ├── fe.c
    │   ├── fe.h
    │   ├── montgomery.h
    │   ├── pow225521.h
    │   ├── scalarmult.c
    │   ├── x25519_ref10.c
    │   └── x25519_ref10.h
├── lib
    ├── x25519.rb
    └── x25519
    │   ├── montgomery_u.rb
    │   ├── precomputed_not_available.rb
    │   ├── scalar.rb
    │   ├── test_vectors.rb
    │   └── version.rb
├── spec
    ├── spec_helper.rb
    ├── support
    │   └── provider_examples.rb
    ├── x25519
    │   ├── montgomery_u_spec.rb
    │   ├── provider
    │   │   ├── precomputed_spec.rb
    │   │   └── ref10_spec.rb
    │   └── scalar_spec.rb
    └── x25519_spec.rb
└── x25519.gemspec


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths-ignore:
 6 |       - README.md
 7 |   push:
 8 |     paths-ignore:
 9 |       - README.md
10 | 
11 | env:
12 |   BUNDLE_WITHOUT: "development"
13 | 
14 | jobs:
15 |   test:
16 |     runs-on: ubuntu-latest
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         ruby:
21 |           - ruby-2.7
22 |           - ruby-3.0
23 |           - ruby-3.1
24 |           - ruby-3.2
25 |           - ruby-3.3
26 |           - ruby-3.4
27 |           - ruby-head
28 |     steps:
29 |       - uses: actions/checkout@v4
30 |       - uses: ruby/setup-ruby@v1
31 |         with:
32 |           ruby-version: ${{ matrix.ruby }}
33 |           bundler-cache: true
34 |       - run: bundle exec rake compile
35 |       - run: bundle exec rake spec
36 | 
37 |   rubocop:
38 |     runs-on: ubuntu-latest
39 |     steps:
40 |       - uses: actions/checkout@v4
41 |       - uses: ruby/setup-ruby@v1
42 |         with:
43 |           ruby-version: 2.7
44 |           bundler-cache: true
45 |       - run: bundle exec rubocop --format progress --color
46 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /Gemfile.lock
 2 | /.bundle/
 3 | /.yardoc
 4 | /_yardoc/
 5 | /coverage/
 6 | /doc/
 7 | /pkg/
 8 | /spec/reports/
 9 | /tmp/
10 | *.o
11 | *.so
12 | *.bundle
13 | 
14 | # rspec failure tracking
15 | .rspec_status
16 | 


--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | --format documentation
3 | --order random
4 | --warnings
5 | --require spec_helper
6 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | require:
 2 |   - rubocop-performance
 3 |   - rubocop-rake
 4 |   - rubocop-rspec
 5 | 
 6 | AllCops:
 7 |   TargetRubyVersion: 2.7
 8 |   DisplayCopNames: true
 9 |   NewCops: enable
10 | 
11 | #
12 | # Gemspec
13 | #
14 | 
15 | Gemspec/RequireMFA:
16 |   Enabled: false
17 | 
18 | #
19 | # Metrics
20 | #
21 | 
22 | Metrics/AbcSize:
23 |   Enabled: false
24 | 
25 | Metrics/CyclomaticComplexity:
26 |   Enabled: false
27 | 
28 | Metrics/PerceivedComplexity:
29 |   Enabled: false
30 | 
31 | Metrics/BlockLength:
32 |   Max: 100
33 | 
34 | Metrics/ClassLength:
35 |   Max: 100
36 | 
37 | Metrics/LineLength:
38 |   Max: 128
39 | 
40 | Metrics/MethodLength:
41 |   Max: 25
42 | 
43 | #
44 | # Style
45 | #
46 | 
47 | Style/FrozenStringLiteralComment:
48 |   Enabled: true
49 | 
50 | Style/StringLiterals:
51 |   EnforcedStyle: double_quotes
52 | 
53 | #
54 | # RSpec
55 | #
56 | 
57 | RSpec/MultipleExpectations:
58 |   Max: 3
59 | 
60 | RSpec/ExampleLength:
61 |   Max: 6
62 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | ## [1.0.10] (2022-10-06)
  2 | 
  3 | [1.0.10]: https://github.com/RubyCrypto/x25519/compare/v1.0.9...v1.0.10
  4 | 
  5 | - [#32](https://github.com/RubyCrypto/x25519/pull/32)
  6 |   Avoid building precomputed on `aarch64-linux`
  7 | - [#29](https://github.com/RubyCrypto/x25519/pull/29), [#31](https://github.com/RubyCrypto/x25519/pull/31)
  8 |   Allow usage on Apple silicon
  9 | 
 10 | ## [1.0.9] (2021-08-04)
 11 | 
 12 | [1.0.9]: https://github.com/RubyCrypto/x25519/compare/v1.0.8...v1.0.9
 13 | 
 14 | - [#23](https://github.com/RubyCrypto/x25519/pull/23)
 15 |   Bump `required_ruby_version` to 2.5
 16 | - [#25](https://github.com/RubyCrypto/x25519/pull/25)
 17 |   Fix SIGILL caused by use of `-march=native` 
 18 | 
 19 | ## [1.0.8] (2018-11-08)
 20 | 
 21 | [1.0.8]: https://github.com/RubyCrypto/x25519/compare/v1.0.7...v1.0.8
 22 | 
 23 | - [#20](https://github.com/RubyCrypto/x25519/pull/19)
 24 |   Change license to BSD-3-clause.
 25 | 
 26 | ## [1.0.7] (2018-02-26)
 27 | 
 28 | [1.0.7]: https://github.com/RubyCrypto/x25519/compare/v1.0.6...v1.0.7
 29 | 
 30 | - [#19](https://github.com/RubyCrypto/x25519/pull/19)
 31 |   Incorporate upstream carry propagation bugfix and LICENSE changes.
 32 | 
 33 | ## [1.0.6] (2018-01-04)
 34 | 
 35 | [1.0.6]: https://github.com/RubyCrypto/x25519/compare/v1.0.5...v1.0.6
 36 | 
 37 | - Use correct (LGPLv3) license in gemspec
 38 | 
 39 | ## [1.0.5] (2017-12-31)
 40 | 
 41 | [1.0.5]: https://github.com/RubyCrypto/x25519/compare/v1.0.4...v1.0.5
 42 | 
 43 | - [#15](https://github.com/RubyCrypto/x25519/pull/15)
 44 |   RuboCop 0.52.1
 45 | 
 46 | - [#14](https://github.com/RubyCrypto/x25519/pull/14)
 47 |   `ext/x25519_ref10`: Consolidate all field element code into `fe.c`.
 48 | 
 49 | ## [1.0.4] (2017-12-31)
 50 | 
 51 | [1.0.4]: https://github.com/RubyCrypto/x25519/compare/v1.0.3...v1.0.4
 52 | 
 53 | - [#13](https://github.com/RubyCrypto/x25519/pull/13)
 54 |   Test against Ruby 2.5.0
 55 | 
 56 | - [#12](https://github.com/RubyCrypto/x25519/pull/12)
 57 |   Move project to the RubyCrypto GitHub organization
 58 | 
 59 | ## [1.0.3] (2017-12-13)
 60 | 
 61 | [1.0.3]: https://github.com/RubyCrypto/x25519/compare/v1.0.2...v1.0.3
 62 | 
 63 | - [#10](https://github.com/RubyCrypto/x25519/pull/10)
 64 |   Detect degenerate (i.e. all-zero) public keys (fixes #6)
 65 | 
 66 | ## [1.0.2] (2017-12-13)
 67 | 
 68 | [1.0.2]: https://github.com/RubyCrypto/x25519/compare/v1.0.1...v1.0.2
 69 | 
 70 | - [#9](https://github.com/RubyCrypto/x25519/pull/9)
 71 |   Make `X25519.provider` an `attr_accessor`
 72 | - Raise `X25519::SelfTestFailure` when self-test fails
 73 | 
 74 | ## [1.0.1] (2017-12-12)
 75 | 
 76 | [1.0.1]: https://github.com/RubyCrypto/x25519/compare/v1.0.0...v1.0.1
 77 | 
 78 | - Have `X25519.self_test` return true on success
 79 | 
 80 | ## [1.0.0] (2017-12-12)
 81 | 
 82 | [1.0.0]: https://github.com/RubyCrypto/x25519/compare/v0.2.0...v1.0.0
 83 | 
 84 | - [#8](https://github.com/RubyCrypto/x25519/pull/8)
 85 |   Add self-test
 86 | 
 87 | - [#7](https://github.com/RubyCrypto/x25519/pull/7)
 88 |   Factor providers into the `X25519::Provider` namespace
 89 | 
 90 | ## [0.2.0] (2017-12-12)
 91 | 
 92 | [0.2.0]: https://github.com/RubyCrypto/x25519/compare/v0.1.0...v0.2.0
 93 | 
 94 | - [#5](https://github.com/RubyCrypto/x25519/pull/5)
 95 |   Rewrite gem in Ruby with minimal native extensions
 96 | 
 97 | ## 0.1.0 (2017-12-11)
 98 | 
 99 | - Initial release
100 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at bascule@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 | 
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | source "https://rubygems.org"
 4 | 
 5 | gemspec
 6 | 
 7 | group :development, :test do
 8 |   gem "rake", require: false
 9 |   gem "rake-compiler", "~> 1.0", require: false
10 |   gem "rspec", "~> 3.10", require: false
11 |   gem "rubocop", "1.68", require: false
12 |   gem "rubocop-performance", "1.23.0", require: false
13 |   gem "rubocop-rake", "0.6.0", require: false
14 |   gem "rubocop-rspec", "3.2.0", require: false
15 | end
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD-3 License.
 2 | 
 3 | Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
 4 | Institute of Computing.
 5 | University of Campinas, Brazil.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions
 9 | are met:
10 | 
11 |  * Redistributions of source code must retain the above copyright
12 |    notice, this list of conditions and the following disclaimer.
13 |  * Redistributions in binary form must reproduce the above
14 |    copyright notice, this list of conditions and the following
15 |    disclaimer in the documentation and/or other materials provided
16 |    with the distribution.
17 |  * Neither the name of University of Campinas nor the names of its
18 |    contributors may be used to endorse or promote products derived
19 |    from this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
32 | OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # x25519.rb [![Latest Version][gem-shield]][gem-link] [![Yard Docs][docs-image]][docs-link] [![License: BSD 3-Clause][license-image]][license-link] [![Build Status][build-image]][build-link]
  2 | 
  3 | [gem-shield]: https://img.shields.io/gem/v/x25519?logo=ruby
  4 | [gem-link]: https://rubygems.org/gems/x25519
  5 | [docs-image]: https://img.shields.io/badge/yard-docs-blue.svg
  6 | [docs-link]: http://www.rubydoc.info/gems/x25519/1.0.6
  7 | [license-image]: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg
  8 | [license-link]: https://spdx.org/licenses/BSD-3-Clause.html
  9 | [build-image]: https://github.com/RubyCrypto/x25519/actions/workflows/ci.yml/badge.svg
 10 | [build-link]: https://github.com/RubyCrypto/x25519/actions/workflows/ci.yml
 11 | 
 12 | An efficient public key cryptography library for Ruby providing key
 13 | exchange/agreement.
 14 | 
 15 | This gem implements X25519 (a.k.a. Curve25519) Elliptic Curve Diffie-Hellman
 16 | function as described in [RFC7748] as a C extension using the
 17 | high performance [rfc7748_precomputed] implementation based on the paper
 18 | [How to (pre-)compute a ladder]
 19 | (with fallback to the ref10 C implementation).
 20 | 
 21 | X25519 is one of two notable algorithms implemented atop the Curve25519
 22 | elliptic curve. The [ed25519 gem] is a related project of this one,
 23 | and implements the Ed25519 signature scheme on the twisted Edwards form of
 24 | Curve25519.
 25 | 
 26 | [RFC7748]: https://tools.ietf.org/html/rfc7748
 27 | [How to (pre-)compute a ladder]: https://eprint.iacr.org/2017/264
 28 | [rfc7748_precomputed]: https://github.com/armfazh/rfc7748_precomputed
 29 | [ed25519 gem]: https://github.com/RubyCrypto/ed25519
 30 | 
 31 | ### Is it any good?
 32 | 
 33 | [Yes.](http://news.ycombinator.com/item?id=3067434)
 34 | 
 35 | ### What is it useful for?
 36 | 
 37 | X25519 is a key exchange/agreement algorithm generally used as a low-level
 38 | building block in cryptographic protocols.
 39 | 
 40 | ### Can I use X25519 to encrypt things?
 41 | 
 42 | Please use [RbNaCl::Box] if you would like a high-level construction which uses
 43 | X25519 for public-key encryption. Otherwise, the X25519 algorithm is not directly
 44 | useful for encryption without a higher-level encryption protocol built on top of it.
 45 | 
 46 | [RbNaCl::Box]: https://github.com/RubyCrypto/rbnacl/wiki/Public-Key-Encryption
 47 | 
 48 | ## Requirements
 49 | 
 50 | **x25519.rb** is supported on and tested against the following platforms:
 51 | 
 52 | - MRI 2.7, 3.0, 3.1, 3.2, 3.3, 3.4
 53 | 
 54 | ## Installation
 55 | 
 56 | Add this line to your application's Gemfile:
 57 | 
 58 | ```ruby
 59 | gem "x25519"
 60 | ```
 61 | 
 62 | And then execute:
 63 | 
 64 |     $ bundle
 65 | 
 66 | Or install it yourself as:
 67 | 
 68 |     $ gem install x25519
 69 | 
 70 | ## Usage
 71 | 
 72 | The example below shows how to perform a full Diffie-Hellman key exchange:
 73 | 
 74 | ```ruby
 75 | require "x25519"
 76 | 
 77 | # Alice generates random scalar (private key)
 78 | alice_sk = X25519::Scalar.generate
 79 | 
 80 | # Alice obtains public key for her private key/scalar
 81 | alice_pk = alice_sk.public_key
 82 | 
 83 | # Bob generates random scalar (private key)
 84 | # Ostensibly this would be on a different computer somewhere
 85 | bob_sk = X25519::Scalar.generate
 86 | bob_pk = bob_sk.public_key
 87 | 
 88 | # Alice can perform Diffie-Hellman with Bob's public key
 89 | alice_secret = alice_sk.diffie_hellman(bob_pk).to_bytes
 90 | 
 91 | # Bob can perform Diffie-Hellman with Alice's public key
 92 | bob_secret = bob_sk.diffie_hellman(alice_pk).to_bytes
 93 | 
 94 | # The resulting secrets should be the same
 95 | alice_secret == bob_secret # true
 96 | ```
 97 | 
 98 | ## X25519::Scalar: private keys
 99 | 
100 | The `X25519::Scalar` class represents secret integers used as X25519 private
101 | keys. These secret integers are multiplied by a well-known base point to
102 | obtain X25519 public keys (`X25519::MontgomeryU`).
103 | 
104 | ### `X25519::Scalar.generate()`: make a random private key
105 | 
106 | Generate a random private scalar (using `SecureRandom`)
107 | 
108 | **Example:**
109 | 
110 | ```ruby
111 | secret_key = X25519::Scalar.generate
112 | ```
113 | 
114 | ### `X25519::Scalar.new(bytes)`: load existing private key
115 | 
116 | * `bytes`: a 32-byte `String` value containing the private key
117 | 
118 | **Example:**
119 | 
120 | ```ruby
121 | secret_key = X25519::Scalar.new(File.read("alice.key"))
122 | ```
123 | 
124 | ### `X25519::Scalar#public_key()`: obtain public key for this scalar
125 | 
126 | NOTE: The `#multiply_base` method is an alias of this one.
127 | 
128 | Performs fixed-base scalar multiplication (i.e. calculates public key)
129 | 
130 | **Return Value:**
131 | 
132 | Returns a `X25519::MontgomeryU` object which represents the public key for this private key/scalar.
133 | 
134 | **Example:**
135 | 
136 | ```ruby
137 | secret_key = X25519::Scalar.generate
138 | public_key = secret_key.public_key
139 | ```
140 | 
141 | ### `X25519::Scalar#diffie_hellman(other_public_key)`: obtain public key for this scalar
142 | 
143 | NOTE: The `#multiply` method is an alias of this one.
144 | 
145 | Performs variable-base scalar multiplication, computing a shared secret between
146 | our private scalar and someone else's public key/point.
147 | 
148 | **Arguments:**
149 | 
150 | * `other_public_key`: a `X25519::MontgomeryU` object containing the public key
151 |   with which we'd like to compute a shared secret.
152 | 
153 | **Return Value:**
154 | 
155 | Returns a `X25519::MontgomeryU` object which represents the shared secret.
156 | 
157 | **Example:**
158 | 
159 | ```ruby
160 | secret_key = X25519::Scalar.generate
161 | public_key = X25519::MontgomeryU.new(File.read("bob.pub"))
162 | 
163 | # Returns an X25519::MontgomeryU
164 | shared_secret = secret_key.multiply(public_key)
165 | 
166 | # Obtain the shared secret as a serialized byte representation
167 | shared_secret_bytes = shared_secret.to_bytes
168 | ```
169 | 
170 | ### `X25519::Scalar#to_bytes`: serialize a scalar as a `String`
171 | 
172 | **Return Value:**
173 | 
174 | Returns a `String` containing a byte representation of this scalar:
175 | 
176 | **Example:**
177 | 
178 | ```ruby
179 | secret_key = X25519::Scalar.new(...)
180 | File.write("alice.key", secret_key.to_bytes)
181 | ```
182 | 
183 | ## X25519::MontgomeryU: public keys and shared secrets
184 | 
185 | The `X25519::MontgomeryU` class represents a coordinate (specifically a
186 | Montgomery-u coordinate) on the elliptic curve. In the X25519 Diffie-Hellman
187 | function, these serve both as public keys and as shared secrets.
188 | 
189 | ### `X25519::MontgomeryU.new(bytes)`: load existing public key
190 | 
191 | **Arguments:**
192 | 
193 | * `bytes`: a 32-byte `String` value containing the public key
194 | 
195 | **Example:**
196 | 
197 | ```ruby
198 | public_key = X25519::MontgomeryU.new(File.read("bob.pub"))
199 | ```
200 | 
201 | ### `X25519::MontgomeryU#to_bytes`: serialize a Montgomery-u coordinate as a `String`
202 | 
203 | **Return Value:**
204 | 
205 | Returns a `String` containing a byte representation of a compressed Montgomery-u coordinate:
206 | 
207 | **Example:**
208 | 
209 | ```ruby
210 | public_key = X25519::MontgomeryU..new(...)
211 | File.write("bob.pub", public_key.to_bytes)
212 | ```
213 | 
214 | ## X25519: module-level functionality
215 | 
216 | ### `X25519.diffie_hellman(secret_key, public_key)`: shorthand `String`-oriented API
217 | 
218 | If you'd like to avoid the object-oriented API, you can use a simplified API which
219 | acts entirely on bytestrings.
220 | 
221 | **Arguments:**
222 | 
223 | * `secret_key`: a 32-byte `String` containing a private scalar
224 | * `public_key`: a 32-byte `String` containing a compressed Montgomery-u coordinate
225 | 
226 | **Return Value:**
227 | 
228 | Returns a `String` containing a 32-byte compressed Montgomery-u coordinate
229 | 
230 | ## Contributing
231 | 
232 | Bug reports and pull requests are welcome on GitHub at https://github.com/RubyCrypto/x25519.
233 | This project is intended to be a safe, welcoming space for collaboration,
234 | and contributors are expected to adhere to the [Contributor Covenant](https://contributor-covenant.org)
235 | code of conduct.
236 | 
237 | ## Implementation Details
238 | 
239 | This gem contains two implementations of X25519: an optimized assembly
240 | implementation and a portable C implementation. Implementations are selected
241 | based on available CPU features.
242 | 
243 | ### [rfc7748_precomputed]: optimized assembly implementation
244 | 
245 | * Prime field arithmetic is optimized for the 4th and 6th generation of Intel Core processors
246 |   (Haswell and Skylake micro-architectures).
247 | * Efficient integer multiplication using MULX instruction.
248 | * Integer additions accelerated with ADCX/ADOX instructions.
249 | * Key generation uses a read-only table of 8 KB for X25519.
250 | 
251 | ### ref10: portable C implementation
252 | 
253 | * Taken from the [SUPERCOP] cryptographic benchmarking suite (supercop-20171020)
254 | * Portable C code which should compile on any architecture
255 | 
256 | [SUPERCOP]: https://bench.cr.yp.to/supercop.html
257 | 
258 | ## Designers
259 | 
260 | The X25519 Diffie-Hellman function was originally designed by Dan Bernstein:
261 | 
262 | https://cr.yp.to/ecdh.html
263 | 
264 | The optimized [rfc7748_precomputed] implementation was designed by:
265 | 
266 | * Thomaz Oliveira, Computer Science Department, Cinvestav-IPN, Mexico.
267 | * Julio López, University of Campinas, Brazil.
268 | * Hüseyin Hisil, Yasar University, Turkey.
269 | * Armando Faz-Hernández, University of Campinas, Brazil.
270 | * Francisco Rodríguez-Henríquez, Computer Science Department, Cinvestav-IPN, Mexico.
271 | 
272 | ## License
273 | 
274 | * Copyright (c) 2017-2018 Armando Faz
275 | * Copyright (c) 2017-2025 Tony Arcieri
276 | 
277 | This gem is available as open source under the terms of the
278 | BSD-3 Clause License ([LICENSE](./LICENSE))
279 | 
280 | ## Code of Conduct
281 | 
282 | Everyone interacting in the x25519.rb project’s codebases, issue trackers, chat
283 | rooms and mailing lists is expected to follow the [code of conduct].
284 | 
285 | [code of conduct]: https://github.com/RubyCrypto/x25519/blob/main/CODE_OF_CONDUCT.md
286 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "bundler/gem_tasks"
 4 | 
 5 | require "rake/clean"
 6 | CLEAN.include("**/*.o", "**/*.so", "**/*.bundle", "pkg", "tmp")
 7 | 
 8 | require "rake/extensiontask"
 9 | %w[precomputed ref10].each do |provider|
10 |   next if provider == "precomputed" && RUBY_PLATFORM !~ /x86_64|x64/
11 | 
12 |   Rake::ExtensionTask.new("x25519_#{provider}") do |ext|
13 |     ext.ext_dir = "ext/x25519_#{provider}"
14 |   end
15 | end
16 | 
17 | require "rspec/core/rake_task"
18 | RSpec::Core::RakeTask.new
19 | 
20 | require "rubocop/rake_task"
21 | RuboCop::RakeTask.new
22 | 
23 | task default: %w[compile spec rubocop]
24 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/cputest.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Test for 4th generation Intel Core processor family features (e.g. Haswell)
 3 | From https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
 4 | */
 5 | 
 6 | #include <stdint.h>
 7 | #if defined(_MSC_VER)
 8 | # include <intrin.h>
 9 | #endif
10 | 
11 | static void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd)
12 | {
13 | #if defined(_MSC_VER)
14 |     __cpuidex(abcd, eax, ecx);
15 | #else
16 |     uint32_t ebx = 0, edx;
17 | # if defined( __i386__ ) && defined ( __PIC__ )
18 |      /* in case of PIC under 32-bit EBX cannot be clobbered */
19 |     __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
20 | # else
21 |     __asm__ ( "cpuid" : "+b" (ebx),
22 | # endif
23 |               "+a" (eax), "+c" (ecx), "=d" (edx) );
24 |     abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
25 | #endif
26 | }
27 | 
28 | static int check_xcr0_ymm()
29 | {
30 |     uint32_t xcr0;
31 | #if defined(_MSC_VER)
32 |     xcr0 = (uint32_t)_xgetbv(0);  /* min VS2010 SP1 compiler is required */
33 | #else
34 |     __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
35 | #endif
36 |     return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
37 | }
38 | 
39 | int check_4th_gen_intel_core_features()
40 | {
41 |     uint32_t abcd[4];
42 |     uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27));
43 |     uint32_t avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8);
44 | 
45 |     /* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1   &&
46 |        CPUID.(EAX=01H, ECX=0H):ECX.MOVBE[bit 22]==1 &&
47 |        CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 */
48 |     run_cpuid( 1, 0, abcd );
49 |     if ( (abcd[2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask )
50 |         return 0;
51 | 
52 |     if ( ! check_xcr0_ymm() )
53 |         return 0;
54 | 
55 |     /*  CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1  &&
56 |         CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]==1  &&
57 |         CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1  */
58 |     run_cpuid( 7, 0, abcd );
59 |     if ( (abcd[1] & avx2_bmi12_mask) != avx2_bmi12_mask )
60 |         return 0;
61 | 
62 |     /* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */
63 |     run_cpuid( 0x80000001, 0, abcd );
64 |     if ( (abcd[2] & (1 << 5)) == 0)
65 |         return 0;
66 | 
67 |     return 1;
68 | }
69 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/extconf.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # rubocop:disable Style/GlobalVars
 4 | 
 5 | require "mkmf"
 6 | 
 7 | if RUBY_PLATFORM.match?(/x86_64|x64/)
 8 |   $CFLAGS << " -Wall -O3 -pedantic -std=c99 -mbmi -mbmi2 -march=haswell"
 9 | 
10 |   create_makefile "x25519_precomputed"
11 | else
12 |   File.write("Makefile", "install clean: ;")
13 | end
14 | 
15 | # rubocop:enable Style/GlobalVars
16 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/fp25519_x64.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
  3 |  * Institute of Computing.
  4 |  * University of Campinas, Brazil.
  5 |  *
  6 |  * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  7 |  * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
  8 |  *
  9 |  * Redistribution and use in source and binary forms, with or without
 10 |  * modification, are permitted provided that the following conditions
 11 |  * are met:
 12 |  *
 13 |  *  * Redistributions of source code must retain the above copyright
 14 |  *    notice, this list of conditions and the following disclaimer.
 15 |  *  * Redistributions in binary form must reproduce the above
 16 |  *    copyright notice, this list of conditions and the following
 17 |  *    disclaimer in the documentation and/or other materials provided
 18 |  *    with the distribution.
 19 |  *  * Neither the name of University of Campinas nor the names of its
 20 |  *    contributors may be used to endorse or promote products derived
 21 |  *    from this software without specific prior written permission.
 22 |  *
 23 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 24 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 25 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 26 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 27 |  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 28 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 29 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 30 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 31 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 32 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 33 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 34 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 35 |  */
 36 | 
 37 | #include "fp25519_x64.h"
 38 | 
 39 | /**
 40 |  *
 41 |  * @param c Two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
 42 |  * @param a Two 256-bit integers: a0[0:3] and a1[4:7]
 43 |  * @param b Two 256-bit integers: b0[0:3] and b1[4:7]
 44 |  */
 45 | void mul2_256x256_integer_x64(uint64_t *const c, uint64_t *const a,
 46 |                               uint64_t *const b) {
 47 | #ifdef __BMI2__
 48 | #ifdef __ADX__
 49 |   __asm__ __volatile__(
 50 |     "xorl %%r14d, %%r14d ;"
 51 |     "movq   (%1), %%rdx; " /* A[0] */
 52 |     "mulx   (%2),  %%r8, %%r12; " /* A[0]*B[0] */  "xorl %%r10d, %%r10d ;"  "movq %%r8, (%0) ;"
 53 |     "mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */  "adox %%r10, %%r12 ;"
 54 |     "mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */  "adox  %%r8, %%rax ;"
 55 |     "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */  "adox %%r10, %%rbx ;"
 56 |     /*******************************************/  "adox %%r14, %%rcx ;"
 57 | 
 58 |     "movq  8(%1), %%rdx; " /* A[1] */
 59 |     "mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */  "adox %%r12,  %%r8 ;"  "movq  %%r8, 8(%0) ;"
 60 |     "mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */  "adox %%r10,  %%r9 ;"  "adcx  %%r9, %%rax ;"
 61 |     "mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */  "adox  %%r8, %%r11 ;"  "adcx %%r11, %%rbx ;"
 62 |     "mulx 24(%2), %%r10, %%r12; " /* A[1]*B[3] */  "adox %%r10, %%r13 ;"  "adcx %%r13, %%rcx ;"
 63 |     /*******************************************/  "adox %%r14, %%r12 ;"  "adcx %%r14, %%r12 ;"
 64 | 
 65 |     "movq 16(%1), %%rdx; " /* A[2] */              "xorl %%r10d, %%r10d ;"
 66 |     "mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */  "adox %%rax,  %%r8 ;"  "movq %%r8, 16(%0) ;"
 67 |     "mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */  "adox %%r10,  %%r9 ;"  "adcx  %%r9, %%rbx ;"
 68 |     "mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */  "adox  %%r8, %%r11 ;"  "adcx %%r11, %%rcx ;"
 69 |     "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */  "adox %%r10, %%r13 ;"  "adcx %%r13, %%r12 ;"
 70 |     /*******************************************/  "adox %%r14, %%rax ;"  "adcx %%r14, %%rax ;"
 71 | 
 72 |     "movq 24(%1), %%rdx; " /* A[3] */              "xorl %%r10d, %%r10d ;"
 73 |     "mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */  "adox %%rbx,  %%r8 ;"  "movq %%r8, 24(%0) ;"
 74 |     "mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */  "adox %%r10,  %%r9 ;"  "adcx  %%r9, %%rcx ;"  "movq %%rcx, 32(%0) ;"
 75 |     "mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */  "adox  %%r8, %%r11 ;"  "adcx %%r11, %%r12 ;"  "movq %%r12, 40(%0) ;"
 76 |     "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */  "adox %%r10, %%r13 ;"  "adcx %%r13, %%rax ;"  "movq %%rax, 48(%0) ;"
 77 |     /*******************************************/  "adox %%r14, %%rbx ;"  "adcx %%r14, %%rbx ;"  "movq %%rbx, 56(%0) ;"
 78 | 
 79 |     "movq 32(%1), %%rdx; " /* C[0] */
 80 |     "mulx 32(%2),  %%r8, %%r12; " /* C[0]*D[0] */  "xorl %%r10d, %%r10d ;" "movq %%r8, 64(%0);"
 81 |     "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */  "adox %%r10, %%r12 ;"
 82 |     "mulx 48(%2),  %%r8, %%rbx; " /* C[0]*D[2] */  "adox  %%r8, %%rax ;"
 83 |     "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */  "adox %%r10, %%rbx ;"
 84 |     /*******************************************/  "adox %%r14, %%rcx ;"
 85 | 
 86 |     "movq 40(%1), %%rdx; " /* C[1] */              "xorl %%r10d, %%r10d ;"
 87 |     "mulx 32(%2),  %%r8,  %%r9; " /* C[1]*D[0] */  "adox %%r12,  %%r8 ;"  "movq  %%r8, 72(%0);"
 88 |     "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */  "adox %%r10,  %%r9 ;"  "adcx  %%r9, %%rax ;"
 89 |     "mulx 48(%2),  %%r8, %%r13; " /* C[1]*D[2] */  "adox  %%r8, %%r11 ;"  "adcx %%r11, %%rbx ;"
 90 |     "mulx 56(%2), %%r10, %%r12; " /* C[1]*D[3] */  "adox %%r10, %%r13 ;"  "adcx %%r13, %%rcx ;"
 91 |     /*******************************************/  "adox %%r14, %%r12 ;"  "adcx %%r14, %%r12 ;"
 92 | 
 93 |     "movq 48(%1), %%rdx; " /* C[2] */              "xorl %%r10d, %%r10d ;"
 94 |     "mulx 32(%2),  %%r8,  %%r9; " /* C[2]*D[0] */  "adox %%rax,  %%r8 ;"  "movq  %%r8, 80(%0);"
 95 |     "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */  "adox %%r10,  %%r9 ;"  "adcx  %%r9, %%rbx ;"
 96 |     "mulx 48(%2),  %%r8, %%r13; " /* C[2]*D[2] */  "adox  %%r8, %%r11 ;"  "adcx %%r11, %%rcx ;"
 97 |     "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */  "adox %%r10, %%r13 ;"  "adcx %%r13, %%r12 ;"
 98 |     /*******************************************/  "adox %%r14, %%rax ;"  "adcx %%r14, %%rax ;"
 99 | 
100 |     "movq 56(%1), %%rdx; " /* C[3] */              "xorl %%r10d, %%r10d ;"
101 |     "mulx 32(%2),  %%r8,  %%r9; " /* C[3]*D[0] */  "adox %%rbx,  %%r8 ;"  "movq  %%r8, 88(%0);"
102 |     "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */  "adox %%r10,  %%r9 ;"  "adcx  %%r9, %%rcx ;"  "movq %%rcx,  96(%0) ;"
103 |     "mulx 48(%2),  %%r8, %%r13; " /* C[3]*D[2] */  "adox  %%r8, %%r11 ;"  "adcx %%r11, %%r12 ;"  "movq %%r12, 104(%0) ;"
104 |     "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */  "adox %%r10, %%r13 ;"  "adcx %%r13, %%rax ;"  "movq %%rax, 112(%0) ;"
105 |     /*******************************************/  "adox %%r14, %%rbx ;"  "adcx %%r14, %%rbx ;"  "movq %%rbx, 120(%0) ;"
106 |   :
107 |   : "r" (c), "r" (a), "r" (b)
108 |   : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx",
109 |     "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14"
110 |   );
111 | #else
112 |   __asm__ __volatile__(
113 |     "movq   (%1), %%rdx; " /* A[0] */
114 |     "mulx   (%2),  %%r8, %%r12; " /* A[0]*B[0] */                           "movq %%r8,  (%0) ;"
115 |     "mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */    "addq %%r10, %%r12 ;"
116 |     "mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */    "adcq  %%r8, %%rax ;"
117 |     "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */    "adcq %%r10, %%rbx ;"
118 |     /*******************************************/    "adcq    $0, %%rcx ;"
119 | 
120 |     "movq  8(%1), %%rdx; " /* A[1] */
121 |     "mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */    "addq %%r12,  %%r8 ;"  "movq %%r8, 8(%0) ;"
122 |     "mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */    "adcq %%r10,  %%r9 ;"
123 |     "mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */    "adcq  %%r8, %%r11 ;"
124 |     "mulx 24(%2), %%r10, %%r12; " /* A[1]*B[3] */    "adcq %%r10, %%r13 ;"
125 |     /*******************************************/    "adcq    $0, %%r12 ;"
126 | 
127 |     "addq  %%r9, %%rax ;"
128 |     "adcq %%r11, %%rbx ;"
129 |     "adcq %%r13, %%rcx ;"
130 |     "adcq    $0, %%r12 ;"
131 | 
132 |     "movq 16(%1), %%rdx; " /* A[2] */
133 |     "mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */    "addq %%rax,  %%r8 ;"  "movq %%r8, 16(%0) ;"
134 |     "mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */    "adcq %%r10,  %%r9 ;"
135 |     "mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */    "adcq  %%r8, %%r11 ;"
136 |     "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */    "adcq %%r10, %%r13 ;"
137 |     /*******************************************/    "adcq    $0, %%rax ;"
138 | 
139 |     "addq  %%r9, %%rbx ;"
140 |     "adcq %%r11, %%rcx ;"
141 |     "adcq %%r13, %%r12 ;"
142 |     "adcq    $0, %%rax ;"
143 | 
144 |     "movq 24(%1), %%rdx; " /* A[3] */
145 |     "mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */    "addq %%rbx,  %%r8 ;"  "movq %%r8, 24(%0) ;"
146 |     "mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */    "adcq %%r10,  %%r9 ;"
147 |     "mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */    "adcq  %%r8, %%r11 ;"
148 |     "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */    "adcq %%r10, %%r13 ;"
149 |     /*******************************************/    "adcq    $0, %%rbx ;"
150 | 
151 |     "addq  %%r9, %%rcx ;"  "movq %%rcx, 32(%0) ;"
152 |     "adcq %%r11, %%r12 ;"  "movq %%r12, 40(%0) ;"
153 |     "adcq %%r13, %%rax ;"  "movq %%rax, 48(%0) ;"
154 |     "adcq    $0, %%rbx ;"  "movq %%rbx, 56(%0) ;"
155 | 
156 |     "movq 32(%1), %%rdx; " /* C[0] */
157 |     "mulx 32(%2),  %%r8, %%r12; " /* C[0]*D[0] */                           "movq %%r8, 64(%0) ;"
158 |     "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */    "addq %%r10, %%r12 ;"
159 |     "mulx 48(%2),  %%r8, %%rbx; " /* C[0]*D[2] */    "adcq  %%r8, %%rax ;"
160 |     "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */    "adcq %%r10, %%rbx ;"
161 |     /*******************************************/    "adcq    $0, %%rcx ;"
162 | 
163 |     "movq 40(%1), %%rdx; " /* C[1] */
164 |     "mulx 32(%2),  %%r8,  %%r9; " /* C[1]*D[0] */    "addq %%r12,  %%r8 ;"  "movq %%r8, 72(%0) ;"
165 |     "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */    "adcq %%r10,  %%r9 ;"
166 |     "mulx 48(%2),  %%r8, %%r13; " /* C[1]*D[2] */    "adcq  %%r8, %%r11 ;"
167 |     "mulx 56(%2), %%r10, %%r12; " /* C[1]*D[3] */    "adcq %%r10, %%r13 ;"
168 |     /*******************************************/    "adcq    $0, %%r12 ;"
169 | 
170 |     "addq  %%r9, %%rax ;"
171 |     "adcq %%r11, %%rbx ;"
172 |     "adcq %%r13, %%rcx ;"
173 |     "adcq    $0, %%r12 ;"
174 | 
175 |     "movq 48(%1), %%rdx; " /* C[2] */
176 |     "mulx 32(%2),  %%r8,  %%r9; " /* C[2]*D[0] */    "addq %%rax,  %%r8 ;"  "movq %%r8, 80(%0) ;"
177 |     "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */    "adcq %%r10,  %%r9 ;"
178 |     "mulx 48(%2),  %%r8, %%r13; " /* C[2]*D[2] */    "adcq  %%r8, %%r11 ;"
179 |     "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */    "adcq %%r10, %%r13 ;"
180 |     /*******************************************/    "adcq    $0, %%rax ;"
181 | 
182 |     "addq  %%r9, %%rbx ;"
183 |     "adcq %%r11, %%rcx ;"
184 |     "adcq %%r13, %%r12 ;"
185 |     "adcq    $0, %%rax ;"
186 | 
187 |     "movq 56(%1), %%rdx; " /* C[3] */
188 |     "mulx 32(%2),  %%r8,  %%r9; " /* C[3]*D[0] */    "addq %%rbx,  %%r8 ;"  "movq %%r8, 88(%0) ;"
189 |     "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */    "adcq %%r10,  %%r9 ;"
190 |     "mulx 48(%2),  %%r8, %%r13; " /* C[3]*D[2] */    "adcq  %%r8, %%r11 ;"
191 |     "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */    "adcq %%r10, %%r13 ;"
192 |     /*******************************************/    "adcq    $0, %%rbx ;"
193 | 
194 |     "addq  %%r9, %%rcx ;"  "movq %%rcx,  96(%0) ;"
195 |     "adcq %%r11, %%r12 ;"  "movq %%r12, 104(%0) ;"
196 |     "adcq %%r13, %%rax ;"  "movq %%rax, 112(%0) ;"
197 |     "adcq    $0, %%rbx ;"  "movq %%rbx, 120(%0) ;"
198 |   :
199 |   : "r" (c), "r" (a), "r" (b)
200 |   : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx",
201 |     "%r8", "%r9", "%r10", "%r11", "%r12", "%r13"
202 |   );
203 | #endif
204 | #else   /* Without BMI2 */
205 |   /**
206 |   * TODO: Multiplications using MULQ instruction.
207 |   **/
208 | #endif
209 | }
210 | 
211 | /**
212 |  *
213 |  * @param c
214 |  * @param a
215 |  */
216 | void sqr2_256x256_integer_x64(uint64_t *const c, uint64_t *const a) {
217 | #ifdef __BMI2__
218 | #ifdef __ADX__
219 |   __asm__ __volatile__(
220 |     "movq   (%1), %%rdx        ;" /* A[0]      */
221 |     "mulx  8(%1),  %%r8, %%r14 ;" /* A[1]*A[0] */  "xorl %%r15d, %%r15d;"
222 |     "mulx 16(%1),  %%r9, %%r10 ;" /* A[2]*A[0] */  "adcx %%r14,  %%r9 ;"
223 |     "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */  "adcx %%rax, %%r10 ;"
224 |     "movq 24(%1), %%rdx        ;" /* A[3]      */
225 |     "mulx  8(%1), %%r11, %%r12 ;" /* A[1]*A[3] */  "adcx %%rcx, %%r11 ;"
226 |     "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */  "adcx %%rax, %%r12 ;"
227 |     "movq  8(%1), %%rdx        ;" /* A[1]      */  "adcx %%r15, %%r13 ;"
228 |     "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */  "movq    $0, %%r14 ;"
229 |     /*******************************************/  "adcx %%r15, %%r14 ;"
230 | 
231 |     "xorl %%r15d, %%r15d;"
232 |     "adox %%rax, %%r10 ;"  "adcx  %%r8,  %%r8 ;"
233 |     "adox %%rcx, %%r11 ;"  "adcx  %%r9,  %%r9 ;"
234 |     "adox %%r15, %%r12 ;"  "adcx %%r10, %%r10 ;"
235 |     "adox %%r15, %%r13 ;"  "adcx %%r11, %%r11 ;"
236 |     "adox %%r15, %%r14 ;"  "adcx %%r12, %%r12 ;"
237 |                            "adcx %%r13, %%r13 ;"
238 |                            "adcx %%r14, %%r14 ;"
239 | 
240 |     "movq   (%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
241 |     /********************/  "movq %%rax,  0(%0) ;"
242 |     "addq %%rcx,  %%r8 ;"   "movq  %%r8,  8(%0) ;"
243 |     "movq  8(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
244 |     "adcq %%rax,  %%r9 ;"   "movq  %%r9, 16(%0) ;"
245 |     "adcq %%rcx, %%r10 ;"   "movq %%r10, 24(%0) ;"
246 |     "movq 16(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
247 |     "adcq %%rax, %%r11 ;"   "movq %%r11, 32(%0) ;"
248 |     "adcq %%rcx, %%r12 ;"   "movq %%r12, 40(%0) ;"
249 |     "movq 24(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
250 |     "adcq %%rax, %%r13 ;"   "movq %%r13, 48(%0) ;"
251 |     "adcq %%rcx, %%r14 ;"   "movq %%r14, 56(%0) ;"
252 | 
253 | 
254 |     "movq 32(%1), %%rdx        ;" /* B[0]      */
255 |     "mulx 40(%1),  %%r8, %%r14 ;" /* B[1]*B[0] */  "xorl %%r15d, %%r15d;"
256 |     "mulx 48(%1),  %%r9, %%r10 ;" /* B[2]*B[0] */  "adcx %%r14,  %%r9 ;"
257 |     "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */  "adcx %%rax, %%r10 ;"
258 |     "movq 56(%1), %%rdx        ;" /* B[3]      */
259 |     "mulx 40(%1), %%r11, %%r12 ;" /* B[1]*B[3] */  "adcx %%rcx, %%r11 ;"
260 |     "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */  "adcx %%rax, %%r12 ;"
261 |     "movq 40(%1), %%rdx        ;" /* B[1]      */  "adcx %%r15, %%r13 ;"
262 |     "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */  "movq    $0, %%r14 ;"
263 |     /*******************************************/  "adcx %%r15, %%r14 ;"
264 | 
265 |     "xorl %%r15d, %%r15d;"
266 |     "adox %%rax, %%r10 ;"  "adcx  %%r8,  %%r8 ;"
267 |     "adox %%rcx, %%r11 ;"  "adcx  %%r9,  %%r9 ;"
268 |     "adox %%r15, %%r12 ;"  "adcx %%r10, %%r10 ;"
269 |     "adox %%r15, %%r13 ;"  "adcx %%r11, %%r11 ;"
270 |     "adox %%r15, %%r14 ;"  "adcx %%r12, %%r12 ;"
271 |                            "adcx %%r13, %%r13 ;"
272 |                            "adcx %%r14, %%r14 ;"
273 | 
274 |     "movq 32(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
275 |     /********************/  "movq %%rax,  64(%0) ;"
276 |     "addq %%rcx,  %%r8 ;"   "movq  %%r8,  72(%0) ;"
277 |     "movq 40(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
278 |     "adcq %%rax,  %%r9 ;"   "movq  %%r9,  80(%0) ;"
279 |     "adcq %%rcx, %%r10 ;"   "movq %%r10,  88(%0) ;"
280 |     "movq 48(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
281 |     "adcq %%rax, %%r11 ;"   "movq %%r11,  96(%0) ;"
282 |     "adcq %%rcx, %%r12 ;"   "movq %%r12, 104(%0) ;"
283 |     "movq 56(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
284 |     "adcq %%rax, %%r13 ;"   "movq %%r13, 112(%0) ;"
285 |     "adcq %%rcx, %%r14 ;"   "movq %%r14, 120(%0) ;"
286 |   :
287 |   : "r" (c), "r" (a)
288 |   : "memory", "cc", "%rax", "%rcx", "%rdx",
289 |     "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
290 |   );
291 | #else    /* Without ADX */
292 |   __asm__ __volatile__(
293 |     "movq  8(%1), %%rdx        ;" /* A[1]      */
294 |     "mulx   (%1),  %%r8,  %%r9 ;" /* A[0]*A[1] */
295 |     "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
296 |     "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
297 | 
298 |     "movq 16(%1), %%rdx        ;" /* A[2]      */
299 |     "mulx 24(%1), %%r12, %%r13 ;" /* A[3]*A[2] */
300 |     "mulx   (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
301 | 
302 |     "addq %%rax,  %%r9 ;"
303 |     "adcq %%rdx, %%r10 ;"
304 |     "adcq %%rcx, %%r11 ;"
305 |     "adcq %%r14, %%r12 ;"
306 |     "adcq    $0, %%r13 ;"
307 |     "movq    $0, %%r14 ;"
308 |     "adcq    $0, %%r14 ;"
309 | 
310 |     "movq   (%1), %%rdx        ;" /* A[0]      */
311 |     "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
312 | 
313 |     "addq %%rax, %%r10 ;"
314 |     "adcq %%rcx, %%r11 ;"
315 |     "adcq    $0, %%r12 ;"
316 |     "adcq    $0, %%r13 ;"
317 |     "adcq    $0, %%r14 ;"
318 | 
319 |     "shldq $1, %%r13, %%r14 ;"
320 |     "shldq $1, %%r12, %%r13 ;"
321 |     "shldq $1, %%r11, %%r12 ;"
322 |     "shldq $1, %%r10, %%r11 ;"
323 |     "shldq $1,  %%r9, %%r10 ;"
324 |     "shldq $1,  %%r8,  %%r9 ;"
325 |     "shlq  $1,  %%r8        ;"
326 | 
327 |     /********************/ "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
328 |     /********************/ "movq %%rax,  0(%0) ;"
329 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8,  8(%0) ;"
330 |     "movq  8(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
331 |     "adcq %%rax,  %%r9 ;"  "movq  %%r9, 16(%0) ;"
332 |     "adcq %%rcx, %%r10 ;"  "movq %%r10, 24(%0) ;"
333 |     "movq 16(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
334 |     "adcq %%rax, %%r11 ;"  "movq %%r11, 32(%0) ;"
335 |     "adcq %%rcx, %%r12 ;"  "movq %%r12, 40(%0) ;"
336 |     "movq 24(%1), %%rdx ;" "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
337 |     "adcq %%rax, %%r13 ;"  "movq %%r13, 48(%0) ;"
338 |     "adcq %%rcx, %%r14 ;"  "movq %%r14, 56(%0) ;"
339 | 
340 |     "movq 40(%1), %%rdx        ;" /* B[1]      */
341 |     "mulx 32(%1),  %%r8,  %%r9 ;" /* B[0]*B[1] */
342 |     "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
343 |     "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
344 | 
345 |     "movq 48(%1), %%rdx        ;" /* B[2]      */
346 |     "mulx 56(%1), %%r12, %%r13 ;" /* B[3]*B[2] */
347 |     "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
348 | 
349 |     "addq %%rax,  %%r9 ;"
350 |     "adcq %%rdx, %%r10 ;"
351 |     "adcq %%rcx, %%r11 ;"
352 |     "adcq %%r14, %%r12 ;"
353 |     "adcq    $0, %%r13 ;"
354 |     "movq    $0, %%r14 ;"
355 |     "adcq    $0, %%r14 ;"
356 | 
357 |     "movq 32(%1), %%rdx        ;" /* B[0]      */
358 |     "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
359 | 
360 |     "addq %%rax, %%r10 ;"
361 |     "adcq %%rcx, %%r11 ;"
362 |     "adcq    $0, %%r12 ;"
363 |     "adcq    $0, %%r13 ;"
364 |     "adcq    $0, %%r14 ;"
365 | 
366 |     "shldq $1, %%r13, %%r14 ;"
367 |     "shldq $1, %%r12, %%r13 ;"
368 |     "shldq $1, %%r11, %%r12 ;"
369 |     "shldq $1, %%r10, %%r11 ;"
370 |     "shldq $1,  %%r9, %%r10 ;"
371 |     "shldq $1,  %%r8,  %%r9 ;"
372 |     "shlq  $1,  %%r8        ;"
373 | 
374 |     /********************/  "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
375 |     /********************/  "movq %%rax,  64(%0) ;"
376 |     "addq %%rcx,  %%r8 ;"   "movq  %%r8,  72(%0) ;"
377 |     "movq 40(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
378 |     "adcq %%rax,  %%r9 ;"   "movq  %%r9,  80(%0) ;"
379 |     "adcq %%rcx, %%r10 ;"   "movq %%r10,  88(%0) ;"
380 |     "movq 48(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
381 |     "adcq %%rax, %%r11 ;"   "movq %%r11,  96(%0) ;"
382 |     "adcq %%rcx, %%r12 ;"   "movq %%r12, 104(%0) ;"
383 |     "movq 56(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
384 |     "adcq %%rax, %%r13 ;"   "movq %%r13, 112(%0) ;"
385 |     "adcq %%rcx, %%r14 ;"   "movq %%r14, 120(%0) ;"
386 |   :
387 |   : "r" (c), "r" (a)
388 |   : "memory", "cc", "%rax", "%rcx", "%rdx",
389 |     "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14"
390 |   );
391 | #endif
392 | #else    /* Without BMI2 */
393 |   /**
394 |   * TODO: Multiplications using MULQ instruction.
395 |   **/
396 | #endif
397 | }
398 | 
399 | /**
400 |  *
401 |  * @param c
402 |  * @param a
403 |  */
404 | void red_EltFp25519_2w_x64(uint64_t *const c, uint64_t *const a) {
405 | #ifdef __BMI2__
406 | #ifdef __ADX__
407 |   __asm__ __volatile__(
408 |     "movl    $38, %%edx; " /* 2*c = 38 = 2^256 */
409 |     "mulx 32(%1),  %%r8, %%r10; " /* c*C[4] */   "xorl %%ebx, %%ebx ;"  "adox   (%1),  %%r8 ;"
410 |     "mulx 40(%1),  %%r9, %%r11; " /* c*C[5] */   "adcx %%r10,  %%r9 ;"  "adox  8(%1),  %%r9 ;"
411 |     "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */   "adcx %%r11, %%r10 ;"  "adox 16(%1), %%r10 ;"
412 |     "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */   "adcx %%rax, %%r11 ;"  "adox 24(%1), %%r11 ;"
413 |     /****************************************/   "adcx %%rbx, %%rcx ;"  "adox  %%rbx, %%rcx ;"
414 |     "clc ;"
415 |     "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */
416 |     "adcx %%rax,  %%r8 ;"
417 |     "adcx %%rcx,  %%r9 ;"  "movq  %%r9,  8(%0) ;"
418 |     "adcx %%rbx, %%r10 ;"  "movq %%r10, 16(%0) ;"
419 |     "adcx %%rbx, %%r11 ;"  "movq %%r11, 24(%0) ;"
420 |     "mov     $0, %%ecx ;"
421 |     "cmovc %%edx, %%ecx ;"
422 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8,   (%0) ;"
423 | 
424 |     "mulx  96(%1),  %%r8, %%r10; " /* c*C[4] */  "xorl %%ebx, %%ebx ;"  "adox 64(%1),  %%r8 ;"
425 |     "mulx 104(%1),  %%r9, %%r11; " /* c*C[5] */  "adcx %%r10,  %%r9 ;"  "adox 72(%1),  %%r9 ;"
426 |     "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */  "adcx %%r11, %%r10 ;"  "adox 80(%1), %%r10 ;"
427 |     "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */  "adcx %%rax, %%r11 ;"  "adox 88(%1), %%r11 ;"
428 |     /*****************************************/  "adcx %%rbx, %%rcx ;"  "adox  %%rbx, %%rcx ;"
429 |     "clc ;"
430 |     "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */
431 |     "adcx %%rax,  %%r8 ;"
432 |     "adcx %%rcx,  %%r9 ;"  "movq  %%r9, 40(%0) ;"
433 |     "adcx %%rbx, %%r10 ;"  "movq %%r10, 48(%0) ;"
434 |     "adcx %%rbx, %%r11 ;"  "movq %%r11, 56(%0) ;"
435 |     "mov     $0, %%ecx ;"
436 |     "cmovc %%edx, %%ecx ;"
437 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8, 32(%0) ;"
438 |   :
439 |   : "r" (c), "r" (a)
440 |   : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"
441 |   );
442 | #else
443 |   __asm__ __volatile__(
444 |     "movl    $38, %%edx ; " /* 2*c = 38 = 2^256 */
445 |     "mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
446 |     "mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */  "addq %%r10,  %%r9 ;"
447 |     "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */  "adcq %%r11, %%r10 ;"
448 |     "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */  "adcq %%rax, %%r11 ;"
449 |     /****************************************/  "adcq    $0, %%rcx ;"
450 |     "addq   (%1),  %%r8 ;"
451 |     "adcq  8(%1),  %%r9 ;"
452 |     "adcq 16(%1), %%r10 ;"
453 |     "adcq 24(%1), %%r11 ;"
454 |     "adcq     $0, %%rcx ;"
455 |     "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */
456 |     "addq %%rax,  %%r8 ;"
457 |     "adcq %%rcx,  %%r9 ;"  "movq  %%r9,  8(%0) ;"
458 |     "adcq    $0, %%r10 ;"  "movq %%r10, 16(%0) ;"
459 |     "adcq    $0, %%r11 ;"  "movq %%r11, 24(%0) ;"
460 |     "mov     $0, %%ecx ;"
461 |     "cmovc %%edx, %%ecx ;"
462 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8,   (%0) ;"
463 | 
464 |     "mulx  96(%1),  %%r8, %%r10 ;" /* c*C[4] */
465 |     "mulx 104(%1),  %%r9, %%r11 ;" /* c*C[5] */  "addq %%r10,  %%r9 ;"
466 |     "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */  "adcq %%r11, %%r10 ;"
467 |     "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */  "adcq %%rax, %%r11 ;"
468 |     /*****************************************/  "adcq    $0, %%rcx ;"
469 |     "addq 64(%1),  %%r8 ;"
470 |     "adcq 72(%1),  %%r9 ;"
471 |     "adcq 80(%1), %%r10 ;"
472 |     "adcq 88(%1), %%r11 ;"
473 |     "adcq     $0, %%rcx ;"
474 |     "mulx %%rcx, %%rax, %%rcx ;"  /* c*C[4] */
475 |     "addq %%rax,  %%r8 ;"
476 |     "adcq %%rcx,  %%r9 ;"  "movq  %%r9, 40(%0) ;"
477 |     "adcq    $0, %%r10 ;"  "movq %%r10, 48(%0) ;"
478 |     "adcq    $0, %%r11 ;"  "movq %%r11, 56(%0) ;"
479 |     "mov     $0, %%ecx ;"
480 |     "cmovc %%edx, %%ecx ;"
481 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8, 32(%0) ;"
482 |   :
483 |   : "r" (c), "r" (a)
484 |   : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"
485 |   );
486 | #endif
487 | #else    /* Without BMI2 */
488 |   /* [TODO] */
489 | #endif
490 | }
491 | 
492 | void mul_256x256_integer_x64(uint64_t *const c, uint64_t *const a, uint64_t *const b) {
493 | #ifdef __BMI2__
494 | #ifdef __ADX__
495 |   __asm__ __volatile__(
496 |     "movq   (%1), %%rdx; " /* A[0] */
497 |     "mulx   (%2),  %%r8,  %%r9; " /* A[0]*B[0] */    "xorl %%r10d, %%r10d ;"                           "movq  %%r8,  (%0) ;"
498 |     "mulx  8(%2), %%r10, %%r11; " /* A[0]*B[1] */    "adox  %%r9, %%r10 ;"                             "movq %%r10, 8(%0) ;"
499 |     "mulx 16(%2), %%r12, %%r13; " /* A[0]*B[2] */    "adox %%r11, %%r12 ;"
500 |     "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */    "adox %%r13, %%r14 ;"                                                       "movq $0, %%rax ;"
501 |     /*******************************************/    "adox %%rdx, %%rax ;"
502 | 
503 |     "movq  8(%1), %%rdx; " /* A[1] */
504 |     "mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */    "xorl %%r10d, %%r10d ;"  "adcx 8(%0),  %%r8 ;"    "movq  %%r8,  8(%0) ;"
505 |     "mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */    "adox  %%r9, %%r10 ;"    "adcx %%r12, %%r10 ;"    "movq %%r10, 16(%0) ;"
506 |     "mulx 16(%2), %%r12, %%r13; " /* A[1]*B[2] */    "adox %%r11, %%r12 ;"    "adcx %%r14, %%r12 ;"                              "movq $0, %%r8  ;"
507 |     "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */    "adox %%r13, %%r14 ;"    "adcx %%rax, %%r14 ;"                              "movq $0, %%rax ;"
508 |     /*******************************************/    "adox %%rdx, %%rax ;"    "adcx  %%r8, %%rax ;"
509 | 
510 |     "movq 16(%1), %%rdx; " /* A[2] */
511 |     "mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */    "xorl %%r10d, %%r10d ;"  "adcx 16(%0), %%r8 ;"    "movq  %%r8, 16(%0) ;"
512 |     "mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */    "adox  %%r9, %%r10 ;"    "adcx %%r12, %%r10 ;"    "movq %%r10, 24(%0) ;"
513 |     "mulx 16(%2), %%r12, %%r13; " /* A[2]*B[2] */    "adox %%r11, %%r12 ;"    "adcx %%r14, %%r12 ;"                              "movq $0, %%r8  ;"
514 |     "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */    "adox %%r13, %%r14 ;"    "adcx %%rax, %%r14 ;"                              "movq $0, %%rax ;"
515 |     /*******************************************/    "adox %%rdx, %%rax ;"    "adcx  %%r8, %%rax ;"
516 | 
517 |     "movq 24(%1), %%rdx; " /* A[3] */
518 |     "mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */    "xorl %%r10d, %%r10d ;"  "adcx 24(%0), %%r8 ;"    "movq  %%r8, 24(%0) ;"
519 |     "mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */    "adox  %%r9, %%r10 ;"    "adcx %%r12, %%r10 ;"    "movq %%r10, 32(%0) ;"
520 |     "mulx 16(%2), %%r12, %%r13; " /* A[3]*B[2] */    "adox %%r11, %%r12 ;"    "adcx %%r14, %%r12 ;"    "movq %%r12, 40(%0) ;"    "movq $0, %%r8  ;"
521 |     "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */    "adox %%r13, %%r14 ;"    "adcx %%rax, %%r14 ;"    "movq %%r14, 48(%0) ;"    "movq $0, %%rax ;"
522 |     /*******************************************/    "adox %%rdx, %%rax ;"    "adcx  %%r8, %%rax ;"    "movq %%rax, 56(%0) ;"
523 |   :
524 |   : "r" (c), "r" (a), "r" (b)
525 |   : "memory", "cc", "%rax", "%rdx", "%r8",
526 |     "%r9", "%r10", "%r11", "%r12", "%r13", "%r14"
527 |   );
528 | #else
529 |   __asm__ __volatile__(
530 |     "movq   (%1), %%rdx; " /* A[0] */
531 |     "mulx   (%2),  %%r8, %%r12; " /* A[0]*B[0] */                           "movq %%r8,  (%0) ;"
532 |     "mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */    "addq %%r10, %%r12 ;"
533 |     "mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */    "adcq  %%r8, %%rax ;"
534 |     "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */    "adcq %%r10, %%rbx ;"
535 |     /*******************************************/    "adcq    $0, %%rcx ;"
536 | 
537 |     "movq  8(%1), %%rdx; " /* A[1] */
538 |     "mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */    "addq %%r12,  %%r8 ;"  "movq %%r8, 8(%0) ;"
539 |     "mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */    "adcq %%r10,  %%r9 ;"
540 |     "mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */    "adcq  %%r8, %%r11 ;"
541 |     "mulx 24(%2), %%r10, %%r12; " /* A[1]*B[3] */    "adcq %%r10, %%r13 ;"
542 |     /*******************************************/    "adcq    $0, %%r12 ;"
543 | 
544 |     "addq  %%r9, %%rax ;"
545 |     "adcq %%r11, %%rbx ;"
546 |     "adcq %%r13, %%rcx ;"
547 |     "adcq    $0, %%r12 ;"
548 | 
549 |     "movq 16(%1), %%rdx; " /* A[2] */
550 |     "mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */    "addq %%rax,  %%r8 ;"  "movq %%r8, 16(%0) ;"
551 |     "mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */    "adcq %%r10,  %%r9 ;"
552 |     "mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */    "adcq  %%r8, %%r11 ;"
553 |     "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */    "adcq %%r10, %%r13 ;"
554 |     /*******************************************/    "adcq    $0, %%rax ;"
555 | 
556 |     "addq  %%r9, %%rbx ;"
557 |     "adcq %%r11, %%rcx ;"
558 |     "adcq %%r13, %%r12 ;"
559 |     "adcq    $0, %%rax ;"
560 | 
561 |     "movq 24(%1), %%rdx; " /* A[3] */
562 |     "mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */    "addq %%rbx,  %%r8 ;"  "movq %%r8, 24(%0) ;"
563 |     "mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */    "adcq %%r10,  %%r9 ;"
564 |     "mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */    "adcq  %%r8, %%r11 ;"
565 |     "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */    "adcq %%r10, %%r13 ;"
566 |     /*******************************************/    "adcq    $0, %%rbx ;"
567 | 
568 |     "addq  %%r9, %%rcx ;"  "movq %%rcx, 32(%0) ;"
569 |     "adcq %%r11, %%r12 ;"  "movq %%r12, 40(%0) ;"
570 |     "adcq %%r13, %%rax ;"  "movq %%rax, 48(%0) ;"
571 |     "adcq    $0, %%rbx ;"  "movq %%rbx, 56(%0) ;"
572 |   :
573 |   : "r" (c), "r" (a), "r" (b)
574 |   : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx",
575 |     "%r8", "%r9", "%r10", "%r11", "%r12", "%r13"
576 |   );
577 | #endif
578 | #else    /* Without BMI2 */
579 |   /**
580 |   * TODO: Multiplications using MULQ instruction.
581 |   **/
582 | #endif
583 | }
584 | 
585 | void sqr_256x256_integer_x64(uint64_t *const c, uint64_t *const a) {
586 | #ifdef __BMI2__
587 | #ifdef __ADX__
588 |   __asm__ __volatile__(
589 |     "movq   (%1), %%rdx        ;" /* A[0]      */
590 |     "mulx  8(%1),  %%r8, %%r14 ;" /* A[1]*A[0] */  "xorl %%r15d, %%r15d;"
591 |     "mulx 16(%1),  %%r9, %%r10 ;" /* A[2]*A[0] */  "adcx %%r14,  %%r9 ;"
592 |     "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */  "adcx %%rax, %%r10 ;"
593 |     "movq 24(%1), %%rdx        ;" /* A[3]      */
594 |     "mulx  8(%1), %%r11, %%r12 ;" /* A[1]*A[3] */  "adcx %%rcx, %%r11 ;"
595 |     "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */  "adcx %%rax, %%r12 ;"
596 |     "movq  8(%1), %%rdx        ;" /* A[1]      */  "adcx %%r15, %%r13 ;"
597 |     "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */  "movq    $0, %%r14 ;"
598 |     /*******************************************/  "adcx %%r15, %%r14 ;"
599 | 
600 |     "xorl %%r15d, %%r15d;"
601 |     "adox %%rax, %%r10 ;"  "adcx  %%r8,  %%r8 ;"
602 |     "adox %%rcx, %%r11 ;"  "adcx  %%r9,  %%r9 ;"
603 |     "adox %%r15, %%r12 ;"  "adcx %%r10, %%r10 ;"
604 |     "adox %%r15, %%r13 ;"  "adcx %%r11, %%r11 ;"
605 |     "adox %%r15, %%r14 ;"  "adcx %%r12, %%r12 ;"
606 |                            "adcx %%r13, %%r13 ;"
607 |                            "adcx %%r14, %%r14 ;"
608 | 
609 |     "movq   (%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
610 |     /********************/  "movq %%rax,  0(%0) ;"
611 |     "addq %%rcx,  %%r8 ;"   "movq  %%r8,  8(%0) ;"
612 |     "movq  8(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
613 |     "adcq %%rax,  %%r9 ;"   "movq  %%r9, 16(%0) ;"
614 |     "adcq %%rcx, %%r10 ;"   "movq %%r10, 24(%0) ;"
615 |     "movq 16(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
616 |     "adcq %%rax, %%r11 ;"   "movq %%r11, 32(%0) ;"
617 |     "adcq %%rcx, %%r12 ;"   "movq %%r12, 40(%0) ;"
618 |     "movq 24(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
619 |     "adcq %%rax, %%r13 ;"   "movq %%r13, 48(%0) ;"
620 |     "adcq %%rcx, %%r14 ;"   "movq %%r14, 56(%0) ;"
621 |   :
622 |   : "r" (c), "r" (a)
623 |   : "memory", "cc", "%rax", "%rcx", "%rdx",
624 |     "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
625 |   );
626 | #else    /* Without ADX */
627 |    __asm__ __volatile__(
628 |     "movq  8(%1), %%rdx        ;" /* A[1]      */
629 |     "mulx   (%1),  %%r8,  %%r9 ;" /* A[0]*A[1] */
630 |     "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
631 |     "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
632 | 
633 |     "movq 16(%1), %%rdx        ;" /* A[2]      */
634 |     "mulx 24(%1), %%r12, %%r13 ;" /* A[3]*A[2] */
635 |     "mulx   (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
636 | 
637 |     "addq %%rax,  %%r9 ;"
638 |     "adcq %%rdx, %%r10 ;"
639 |     "adcq %%rcx, %%r11 ;"
640 |     "adcq %%r14, %%r12 ;"
641 |     "adcq    $0, %%r13 ;"
642 |     "movq    $0, %%r14 ;"
643 |     "adcq    $0, %%r14 ;"
644 | 
645 |     "movq   (%1), %%rdx        ;" /* A[0]      */
646 |     "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
647 | 
648 |     "addq %%rax, %%r10 ;"
649 |     "adcq %%rcx, %%r11 ;"
650 |     "adcq    $0, %%r12 ;"
651 |     "adcq    $0, %%r13 ;"
652 |     "adcq    $0, %%r14 ;"
653 | 
654 |     "shldq $1, %%r13, %%r14 ;"
655 |     "shldq $1, %%r12, %%r13 ;"
656 |     "shldq $1, %%r11, %%r12 ;"
657 |     "shldq $1, %%r10, %%r11 ;"
658 |     "shldq $1,  %%r9, %%r10 ;"
659 |     "shldq $1,  %%r8,  %%r9 ;"
660 |     "shlq  $1,  %%r8        ;"
661 | 
662 |     /********************/  "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
663 |     /********************/  "movq %%rax,  0(%0) ;"
664 |     "addq %%rcx,  %%r8 ;"   "movq  %%r8,  8(%0) ;"
665 |     "movq  8(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
666 |     "adcq %%rax,  %%r9 ;"   "movq  %%r9, 16(%0) ;"
667 |     "adcq %%rcx, %%r10 ;"   "movq %%r10, 24(%0) ;"
668 |     "movq 16(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
669 |     "adcq %%rax, %%r11 ;"   "movq %%r11, 32(%0) ;"
670 |     "adcq %%rcx, %%r12 ;"   "movq %%r12, 40(%0) ;"
671 |     "movq 24(%1), %%rdx ;"  "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
672 |     "adcq %%rax, %%r13 ;"   "movq %%r13, 48(%0) ;"
673 |     "adcq %%rcx, %%r14 ;"   "movq %%r14, 56(%0) ;"
674 |   :
675 |   : "r" (c), "r" (a)
676 |   : "memory", "cc", "%rax", "%rcx", "%rdx",
677 |     "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14"
678 |   );
679 | #endif
680 | #else    /* Without BMI2 */
681 |   /**
682 |   * TODO: Multiplications using MULQ instruction.
683 |   **/
684 | #endif
685 | }
686 | 
687 | void red_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a) {
688 | #ifdef __BMI2__
689 | #ifdef __ADX__
690 |   __asm__ __volatile__(
691 |     "movl    $38, %%edx ;" /* 2*c = 38 = 2^256 */
692 |     "mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */  "xorl %%ebx, %%ebx ;"  "adox   (%1),  %%r8 ;"
693 |     "mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */  "adcx %%r10,  %%r9 ;"  "adox  8(%1),  %%r9 ;"
694 |     "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */  "adcx %%r11, %%r10 ;"  "adox 16(%1), %%r10 ;"
695 |     "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */  "adcx %%rax, %%r11 ;"  "adox 24(%1), %%r11 ;"
696 |     /****************************************/  "adcx %%rbx, %%rcx ;"  "adox  %%rbx, %%rcx ;"
697 |     "clc ;"
698 |     "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */
699 |     "adcx %%rax,  %%r8 ;"
700 |     "adcx %%rcx,  %%r9 ;"  "movq  %%r9,  8(%0) ;"
701 |     "adcx %%rbx, %%r10 ;"  "movq %%r10, 16(%0) ;"
702 |     "adcx %%rbx, %%r11 ;"  "movq %%r11, 24(%0) ;"
703 |     "mov     $0, %%ecx ;"
704 |     "cmovc %%edx, %%ecx ;"
705 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8,   (%0) ;"
706 |   :
707 |   : "r" (c), "r" (a)
708 |   : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"
709 |   );
710 | #else
711 |   __asm__ __volatile__(
712 |     "movl    $38, %%edx ;" /* 2*c = 38 = 2^256 */
713 |     "mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
714 |     "mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */  "addq %%r10,  %%r9 ;"
715 |     "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */  "adcq %%r11, %%r10 ;"
716 |     "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */  "adcq %%rax, %%r11 ;"
717 |     /****************************************/  "adcq    $0, %%rcx ;"
718 |     "addq   (%1),  %%r8 ;"
719 |     "adcq  8(%1),  %%r9 ;"
720 |     "adcq 16(%1), %%r10 ;"
721 |     "adcq 24(%1), %%r11 ;"
722 |     "adcq     $0, %%rcx ;"
723 |     "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */
724 |     "addq %%rax,  %%r8 ;"
725 |     "adcq %%rcx,  %%r9 ;"  "movq  %%r9,  8(%0) ;"
726 |     "adcq    $0, %%r10 ;"  "movq %%r10, 16(%0) ;"
727 |     "adcq    $0, %%r11 ;"  "movq %%r11, 24(%0) ;"
728 |     "mov     $0, %%ecx ;"
729 |     "cmovc %%edx, %%ecx ;"
730 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8,   (%0) ;"
731 |   :
732 |   : "r" (c), "r" (a)
733 |   : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"
734 |   );
735 | #endif
736 | #else    /* Without BMI2 */
737 |   /**
738 |   * TODO: Multiplications using MULQ instruction.
739 |   **/
740 | #endif
741 | }
742 | 
743 | inline void add_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a, uint64_t *const b) {
744 | #ifdef __ADX__
745 |   __asm__ __volatile__(
746 |     "mov     $38, %%eax ;"
747 |     "xorl  %%ecx, %%ecx ;"
748 |     "movq   (%2),  %%r8 ;"  "adcx   (%1),  %%r8 ;"
749 |     "movq  8(%2),  %%r9 ;"  "adcx  8(%1),  %%r9 ;"
750 |     "movq 16(%2), %%r10 ;"  "adcx 16(%1), %%r10 ;"
751 |     "movq 24(%2), %%r11 ;"  "adcx 24(%1), %%r11 ;"
752 |     "cmovc %%eax, %%ecx ;"
753 |     "xorl %%eax, %%eax  ;"
754 |     "adcx %%rcx,  %%r8  ;"
755 |     "adcx %%rax,  %%r9  ;"  "movq  %%r9,  8(%0) ;"
756 |     "adcx %%rax, %%r10  ;"  "movq %%r10, 16(%0) ;"
757 |     "adcx %%rax, %%r11  ;"  "movq %%r11, 24(%0) ;"
758 |     "mov     $38, %%ecx ;"
759 |     "cmovc %%ecx, %%eax ;"
760 |     "addq %%rax,  %%r8  ;"  "movq  %%r8,   (%0) ;"
761 |   :
762 |   : "r" (c), "r" (a), "r" (b)
763 |   : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"
764 |   );
765 | #else
766 |   __asm__ __volatile__(
767 |     "mov     $38, %%eax ;"
768 |     "movq   (%2),  %%r8 ;"  "addq   (%1),  %%r8 ;"
769 |     "movq  8(%2),  %%r9 ;"  "adcq  8(%1),  %%r9 ;"
770 |     "movq 16(%2), %%r10 ;"  "adcq 16(%1), %%r10 ;"
771 |     "movq 24(%2), %%r11 ;"  "adcq 24(%1), %%r11 ;"
772 |     "mov      $0, %%ecx ;"
773 |     "cmovc %%eax, %%ecx ;"
774 |     "addq %%rcx,  %%r8  ;"
775 |     "adcq    $0,  %%r9  ;"  "movq  %%r9,  8(%0) ;"
776 |     "adcq    $0, %%r10  ;"  "movq %%r10, 16(%0) ;"
777 |     "adcq    $0, %%r11  ;"  "movq %%r11, 24(%0) ;"
778 |     "mov     $0, %%ecx  ;"
779 |     "cmovc %%eax, %%ecx ;"
780 |     "addq %%rcx,  %%r8  ;"  "movq  %%r8,   (%0) ;"
781 |   :
782 |   : "r" (c), "r" (a), "r" (b)
783 |   : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"
784 |   );
785 | #endif
786 | }
787 | 
788 | inline void sub_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a, uint64_t *const b) {
789 |   __asm__ __volatile__(
790 |     "mov     $38, %%eax ;"
791 |     "movq   (%1),  %%r8 ;"  "subq   (%2),  %%r8 ;"
792 |     "movq  8(%1),  %%r9 ;"  "sbbq  8(%2),  %%r9 ;"
793 |     "movq 16(%1), %%r10 ;"  "sbbq 16(%2), %%r10 ;"
794 |     "movq 24(%1), %%r11 ;"  "sbbq 24(%2), %%r11 ;"
795 |     "mov      $0, %%ecx ;"
796 |     "cmovc %%eax, %%ecx ;"
797 |     "subq %%rcx,  %%r8  ;"
798 |     "sbbq    $0,  %%r9  ;"  "movq  %%r9,  8(%0) ;"
799 |     "sbbq    $0, %%r10  ;"  "movq %%r10, 16(%0) ;"
800 |     "sbbq    $0, %%r11  ;"  "movq %%r11, 24(%0) ;"
801 |     "mov     $0, %%ecx  ;"
802 |     "cmovc %%eax, %%ecx ;"
803 |     "subq %%rcx,  %%r8  ;"  "movq  %%r8,   (%0) ;"
804 |   :
805 |   : "r" (c), "r" (a), "r" (b)
806 |   : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"
807 |   );
808 | }
809 | 
810 | /**
811 |  * Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666
812 |  **/
813 | inline void mul_a24_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a) {
814 | #ifdef __BMI2__
815 |   const uint64_t a24 = 121666;
816 |   __asm__ __volatile__(
817 |     "movq     %2, %%rdx ;"
818 |     "mulx   (%1),  %%r8, %%r10 ;"
819 |     "mulx  8(%1),  %%r9, %%r11 ;"  "addq %%r10,  %%r9 ;"
820 |     "mulx 16(%1), %%r10, %%rax ;"  "adcq %%r11, %%r10 ;"
821 |     "mulx 24(%1), %%r11, %%rcx ;"  "adcq %%rax, %%r11 ;"
822 |     /***************************/  "adcq    $0, %%rcx ;"
823 |     "movl   $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
824 |     "mulx %%rcx, %%rax, %%rcx ;"
825 |     "addq %%rax,  %%r8 ;"
826 |     "adcq %%rcx,  %%r9 ;"  "movq  %%r9,  8(%0) ;"
827 |     "adcq    $0, %%r10 ;"  "movq %%r10, 16(%0) ;"
828 |     "adcq    $0, %%r11 ;"  "movq %%r11, 24(%0) ;"
829 |     "mov     $0, %%ecx ;"
830 |     "cmovc %%edx, %%ecx ;"
831 |     "addq %%rcx,  %%r8 ;"  "movq  %%r8,   (%0) ;"
832 |   :
833 |   : "r" (c), "r" (a), "r" (a24)
834 |   : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11"
835 |   );
836 | #else /* Without BMI2 */
837 |   /**
838 |   * TODO: Multiplications using MULQ instruction.
839 |   **/
840 | #endif
841 | }
842 | 
843 | void inv_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a) {
844 | #define sqrn_EltFp25519_1w_x64(A, times)\
845 |   counter = times;\
846 |   while ( counter-- > 0) {\
847 |       sqr_EltFp25519_1w_x64(A);\
848 |   }
849 | 
850 |   EltFp25519_1w_Buffer_x64 buffer_1w;
851 |   EltFp25519_1w_x64 x0, x1, x2;
852 |   uint64_t * T[5];
853 |   uint64_t counter;
854 | 
855 |   T[0] = x0;
856 |   T[1] = c; /* x^(-1) */
857 |   T[2] = x1;
858 |   T[3] = x2;
859 |   T[4] = a; /* x */
860 | 
861 |   copy_EltFp25519_1w_x64(T[1], a);
862 |   sqrn_EltFp25519_1w_x64(T[1], 1);
863 |   copy_EltFp25519_1w_x64(T[2], T[1]);
864 |   sqrn_EltFp25519_1w_x64(T[2], 2);
865 |   mul_EltFp25519_1w_x64(T[0], a, T[2]);
866 |   mul_EltFp25519_1w_x64(T[1], T[1], T[0]);
867 |   copy_EltFp25519_1w_x64(T[2], T[1]);
868 |   sqrn_EltFp25519_1w_x64(T[2], 1);
869 |   mul_EltFp25519_1w_x64(T[0], T[0], T[2]);
870 |   copy_EltFp25519_1w_x64(T[2], T[0]);
871 |   sqrn_EltFp25519_1w_x64(T[2], 5);
872 |   mul_EltFp25519_1w_x64(T[0], T[0], T[2]);
873 |   copy_EltFp25519_1w_x64(T[2], T[0]);
874 |   sqrn_EltFp25519_1w_x64(T[2], 10);
875 |   mul_EltFp25519_1w_x64(T[2], T[2], T[0]);
876 |   copy_EltFp25519_1w_x64(T[3], T[2]);
877 |   sqrn_EltFp25519_1w_x64(T[3], 20);
878 |   mul_EltFp25519_1w_x64(T[3], T[3], T[2]);
879 |   sqrn_EltFp25519_1w_x64(T[3], 10);
880 |   mul_EltFp25519_1w_x64(T[3], T[3], T[0]);
881 |   copy_EltFp25519_1w_x64(T[0], T[3]);
882 |   sqrn_EltFp25519_1w_x64(T[0], 50);
883 |   mul_EltFp25519_1w_x64(T[0], T[0], T[3]);
884 |   copy_EltFp25519_1w_x64(T[2], T[0]);
885 |   sqrn_EltFp25519_1w_x64(T[2], 100);
886 |   mul_EltFp25519_1w_x64(T[2], T[2], T[0]);
887 |   sqrn_EltFp25519_1w_x64(T[2], 50);
888 |   mul_EltFp25519_1w_x64(T[2], T[2], T[3]);
889 |   sqrn_EltFp25519_1w_x64(T[2], 5);
890 |   mul_EltFp25519_1w_x64(T[1], T[1], T[2]);
891 | #undef sqrn_EltFp25519_1w_x64
892 | }
893 | 
894 | /**
895 |  * Given C, a 256-bit number, fred_EltFp25519_1w_x64 updates C
896 |  * with a number such that 0 <= C < 2**255-19.
897 |  * Contributed by: Samuel Neves.
898 |  **/
899 | inline void fred_EltFp25519_1w_x64(uint64_t *const c) {
900 |   __asm__ __volatile__ (
901 |   /* First, obtains a number less than 2^255. */
902 |     "btrq   $63, 24(%0) ;"
903 |     "sbbl %%ecx, %%ecx  ;"
904 |     "andq   $19, %%rcx  ;"
905 |     "addq %%rcx,   (%0) ;"
906 |     "adcq    $0,  8(%0) ;"
907 |     "adcq    $0, 16(%0) ;"
908 |     "adcq    $0, 24(%0) ;"
909 | 
910 |     "btrq   $63, 24(%0) ;"
911 |     "sbbl %%ecx, %%ecx  ;"
912 |     "andq   $19, %%rcx  ;"
913 |     "addq %%rcx,   (%0) ;"
914 |     "adcq    $0,  8(%0) ;"
915 |     "adcq    $0, 16(%0) ;"
916 |     "adcq    $0, 24(%0) ;"
917 | 
918 |   /* Then, in case the number fall into [2^255-19, 2^255-1] */
919 |     "cmpq $-19,   (%0)   ;"
920 |     "setaeb %%al         ;"
921 |     "cmpq  $-1,  8(%0)   ;"
922 |     "setzb %%bl          ;"
923 |     "cmpq  $-1, 16(%0)   ;"
924 |     "setzb %%cl          ;"
925 |     "movq 24(%0), %%rdx  ;"
926 |     "addq   $1, %%rdx    ;"
927 |     "shrq  $63, %%rdx    ;"
928 |     "andb %%bl, %%al     ;"
929 |     "andb %%dl, %%cl     ;"
930 |     "test %%cl, %%al     ;"
931 |     "movl  $0, %%eax     ;"
932 |     "movl $19, %%ecx     ;"
933 |     "cmovnz %%rcx, %%rax ;"
934 |     "addq %%rax,   (%0)  ;"
935 |     "adcq    $0,  8(%0)  ;"
936 |     "adcq    $0, 16(%0)  ;"
937 |     "adcq    $0, 24(%0)  ;"
938 |     "btrq   $63, 24(%0)  ;"
939 |   :
940 |   : "r"(c)
941 |   : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx"
942 |   );
943 | }
944 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/fp25519_x64.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
  3 |  * Institute of Computing.
  4 |  * University of Campinas, Brazil.
  5 |  *
  6 |  * Redistribution and use in source and binary forms, with or without
  7 |  * modification, are permitted provided that the following conditions
  8 |  * are met:
  9 |  *
 10 |  *  * Redistributions of source code must retain the above copyright
 11 |  *    notice, this list of conditions and the following disclaimer.
 12 |  *  * Redistributions in binary form must reproduce the above
 13 |  *    copyright notice, this list of conditions and the following
 14 |  *    disclaimer in the documentation and/or other materials provided
 15 |  *    with the distribution.
 16 |  *  * Neither the name of University of Campinas nor the names of its
 17 |  *    contributors may be used to endorse or promote products derived
 18 |  *    from this software without specific prior written permission.
 19 |  *
 20 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 23 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 24 |  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 25 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 26 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 27 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 28 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 29 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 30 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 31 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 32 |  */
 33 | 
 34 | #ifndef FP25519_X64_H
 35 | #define FP25519_X64_H
 36 | 
 37 | #include <stdint.h>
 38 | 
 39 | #ifndef ALIGN_BYTES
 40 | #define ALIGN_BYTES 32
 41 | #endif
 42 | 
 43 | #ifndef ALIGN
 44 | #ifdef __INTEL_COMPILER
 45 | #define ALIGN __declspec(align(ALIGN_BYTES))
 46 | #else
 47 | #define ALIGN __attribute__((aligned(ALIGN_BYTES)))
 48 | #endif
 49 | #endif
 50 | 
 51 | #define SIZE_BYTES_FP25519 32
 52 | #define NUM_WORDS_ELTFP25519_X64 4
 53 | typedef ALIGN uint64_t EltFp25519_1w_x64[NUM_WORDS_ELTFP25519_X64];
 54 | typedef ALIGN uint64_t EltFp25519_1w_Buffer_x64[2 * NUM_WORDS_ELTFP25519_X64];
 55 | typedef ALIGN uint64_t EltFp25519_2w_x64[2 * NUM_WORDS_ELTFP25519_X64];
 56 | typedef ALIGN uint64_t EltFp25519_2w_Buffer_x64[4 * NUM_WORDS_ELTFP25519_X64];
 57 | 
 58 | #ifdef __cplusplus
 59 | extern "C" {
 60 | #endif
 61 | 
 62 | /* Integer Arithmetic */
 63 | void mul2_256x256_integer_x64(uint64_t *const c, uint64_t *const a,
 64 |                               uint64_t *const b);
 65 | 
 66 | void sqr2_256x256_integer_x64(uint64_t *const c, uint64_t *const a);
 67 | 
 68 | void red_EltFp25519_2w_x64(uint64_t *const c, uint64_t *const a);
 69 | 
 70 | void mul_256x256_integer_x64(uint64_t *const c, uint64_t *const a,
 71 |                              uint64_t *const b);
 72 | 
 73 | void sqr_256x256_integer_x64(uint64_t *const c, uint64_t *const a);
 74 | 
 75 | void red_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a);
 76 | 
 77 | /* Prime Field Arithmetic */
 78 | void add_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a,
 79 |                            uint64_t *const b);
 80 | 
 81 | void sub_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a,
 82 |                            uint64_t *const b);
 83 | 
 84 | void mul_a24_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a);
 85 | 
 86 | void inv_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a);
 87 | 
 88 | void fred_EltFp25519_1w_x64(uint64_t *const c);
 89 | 
 90 | #ifdef __cplusplus
 91 | }
 92 | #endif
 93 | 
 94 | #define mul_EltFp25519_1w_x64(c, a, b)      \
 95 |   mul_256x256_integer_x64(buffer_1w, a, b); \
 96 |   red_EltFp25519_1w_x64(c, buffer_1w);
 97 | 
 98 | #define sqr_EltFp25519_1w_x64(a)         \
 99 |   sqr_256x256_integer_x64(buffer_1w, a); \
100 |   red_EltFp25519_1w_x64(a, buffer_1w);
101 | 
102 | #define mul_EltFp25519_2w_x64(c, a, b)       \
103 |   mul2_256x256_integer_x64(buffer_2w, a, b); \
104 |   red_EltFp25519_2w_x64(c, buffer_2w);
105 | 
106 | #define sqr_EltFp25519_2w_x64(a)          \
107 |   sqr2_256x256_integer_x64(buffer_2w, a); \
108 |   red_EltFp25519_2w_x64(a, buffer_2w);
109 | 
110 | #define copy_EltFp25519_1w_x64(C, A) \
111 |   (C)[0] = (A)[0];                   \
112 |   (C)[1] = (A)[1];                   \
113 |   (C)[2] = (A)[2];                   \
114 |   (C)[3] = (A)[3];
115 | 
116 | #define setzero_EltFp25519_1w_x64(C) \
117 |   (C)[0] = 0;                        \
118 |   (C)[1] = 0;                        \
119 |   (C)[2] = 0;                        \
120 |   (C)[3] = 0;
121 | 
122 | #endif /* FP25519_X64_H */
123 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/table_ladder_x25519.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
  3 |  * Institute of Computing.
  4 |  * University of Campinas, Brazil.
  5 |  *
  6 |  * Redistribution and use in source and binary forms, with or without
  7 |  * modification, are permitted provided that the following conditions
  8 |  * are met:
  9 |  *
 10 |  *  * Redistributions of source code must retain the above copyright
 11 |  *    notice, this list of conditions and the following disclaimer.
 12 |  *  * Redistributions in binary form must reproduce the above
 13 |  *    copyright notice, this list of conditions and the following
 14 |  *    disclaimer in the documentation and/or other materials provided
 15 |  *    with the distribution.
 16 |  *  * Neither the name of University of Campinas nor the names of its
 17 |  *    contributors may be used to endorse or promote products derived
 18 |  *    from this software without specific prior written permission.
 19 |  *
 20 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 23 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 24 |  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 25 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 26 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 27 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 28 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 29 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 30 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 31 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 32 |  */
 33 | 
 34 | #ifndef TABLE_LADDER_8K_H
 35 | #define TABLE_LADDER_8K_H
 36 | #include <stdint.h>
 37 | 
 38 | ALIGN static const uint64_t Table_Ladder_8k[252 * NUM_WORDS_ELTFP25519_X64] = {
 39 |     /*   1 */ 0xfffffffffffffff3, 0xffffffffffffffff,
 40 |               0xffffffffffffffff, 0x5fffffffffffffff,
 41 |     /*   2 */ 0x6b8220f416aafe96, 0x82ebeb2b4f566a34,
 42 |               0xd5a9a5b075a5950f, 0x5142b2cf4b2488f4,
 43 |     /*   3 */ 0x6aaebc750069680c, 0x89cf7820a0f99c41,
 44 |               0x2a58d9183b56d0f4, 0x4b5aca80e36011a4,
 45 |     /*   4 */ 0x329132348c29745d, 0xf4a2e616e1642fd7,
 46 |               0x1e45bb03ff67bc34, 0x306912d0f42a9b4a,
 47 |     /*   5 */ 0xff886507e6af7154, 0x04f50e13dfeec82f,
 48 |               0xaa512fe82abab5ce, 0x174e251a68d5f222,
 49 |     /*   6 */ 0xcf96700d82028898, 0x1743e3370a2c02c5,
 50 |               0x379eec98b4e86eaa, 0x0c59888a51e0482e,
 51 |     /*   7 */ 0xfbcbf1d699b5d189, 0xacaef0d58e9fdc84,
 52 |               0xc1c20d06231f7614, 0x2938218da274f972,
 53 |     /*   8 */ 0xf6af49beff1d7f18, 0xcc541c22387ac9c2,
 54 |               0x96fcc9ef4015c56b, 0x69c1627c690913a9,
 55 |     /*   9 */ 0x7a86fd2f4733db0e, 0xfdb8c4f29e087de9,
 56 |               0x095e4b1a8ea2a229, 0x1ad7a7c829b37a79,
 57 |     /*  10 */ 0x342d89cad17ea0c0, 0x67bedda6cced2051,
 58 |               0x19ca31bf2bb42f74, 0x3df7b4c84980acbb,
 59 |     /*  11 */ 0xa8c6444dc80ad883, 0xb91e440366e3ab85,
 60 |               0xc215cda00164f6d8, 0x3d867c6ef247e668,
 61 |     /*  12 */ 0xc7dd582bcc3e658c, 0xfd2c4748ee0e5528,
 62 |               0xa0fd9b95cc9f4f71, 0x7529d871b0675ddf,
 63 |     /*  13 */ 0xb8f568b42d3cbd78, 0x1233011b91f3da82,
 64 |               0x2dce6ccd4a7c3b62, 0x75e7fc8e9e498603,
 65 |     /*  14 */ 0x2f4f13f1fcd0b6ec, 0xf1a8ca1f29ff7a45,
 66 |               0xc249c1a72981e29b, 0x6ebe0dbb8c83b56a,
 67 |     /*  15 */ 0x7114fa8d170bb222, 0x65a2dcd5bf93935f,
 68 |               0xbdc41f68b59c979a, 0x2f0eef79a2ce9289,
 69 |     /*  16 */ 0x42ecbf0c083c37ce, 0x2930bc09ec496322,
 70 |               0xf294b0c19cfeac0d, 0x3780aa4bedfabb80,
 71 |     /*  17 */ 0x56c17d3e7cead929, 0xe7cb4beb2e5722c5,
 72 |               0x0ce931732dbfe15a, 0x41b883c7621052f8,
 73 |     /*  18 */ 0xdbf75ca0c3d25350, 0x2936be086eb1e351,
 74 |               0xc936e03cb4a9b212, 0x1d45bf82322225aa,
 75 |     /*  19 */ 0xe81ab1036a024cc5, 0xe212201c304c9a72,
 76 |               0xc5d73fba6832b1fc, 0x20ffdb5a4d839581,
 77 |     /*  20 */ 0xa283d367be5d0fad, 0x6c2b25ca8b164475,
 78 |               0x9d4935467caaf22e, 0x5166408eee85ff49,
 79 |     /*  21 */ 0x3c67baa2fab4e361, 0xb3e433c67ef35cef,
 80 |               0x5259729241159b1c, 0x6a621892d5b0ab33,
 81 |     /*  22 */ 0x20b74a387555cdcb, 0x532aa10e1208923f,
 82 |               0xeaa17b7762281dd1, 0x61ab3443f05c44bf,
 83 |     /*  23 */ 0x257a6c422324def8, 0x131c6c1017e3cf7f,
 84 |               0x23758739f630a257, 0x295a407a01a78580,
 85 |     /*  24 */ 0xf8c443246d5da8d9, 0x19d775450c52fa5d,
 86 |               0x2afcfc92731bf83d, 0x7d10c8e81b2b4700,
 87 |     /*  25 */ 0xc8e0271f70baa20b, 0x993748867ca63957,
 88 |               0x5412efb3cb7ed4bb, 0x3196d36173e62975,
 89 |     /*  26 */ 0xde5bcad141c7dffc, 0x47cc8cd2b395c848,
 90 |               0xa34cd942e11af3cb, 0x0256dbf2d04ecec2,
 91 |     /*  27 */ 0x875ab7e94b0e667f, 0xcad4dd83c0850d10,
 92 |               0x47f12e8f4e72c79f, 0x5f1a87bb8c85b19b,
 93 |     /*  28 */ 0x7ae9d0b6437f51b8, 0x12c7ce5518879065,
 94 |               0x2ade09fe5cf77aee, 0x23a05a2f7d2c5627,
 95 |     /*  29 */ 0x5908e128f17c169a, 0xf77498dd8ad0852d,
 96 |               0x74b4c4ceab102f64, 0x183abadd10139845,
 97 |     /*  30 */ 0xb165ba8daa92aaac, 0xd5c5ef9599386705,
 98 |               0xbe2f8f0cf8fc40d1, 0x2701e635ee204514,
 99 |     /*  31 */ 0x629fa80020156514, 0xf223868764a8c1ce,
100 |               0x5b894fff0b3f060e, 0x60d9944cf708a3fa,
101 |     /*  32 */ 0xaeea001a1c7a201f, 0xebf16a633ee2ce63,
102 |               0x6f7709594c7a07e1, 0x79b958150d0208cb,
103 |     /*  33 */ 0x24b55e5301d410e7, 0xe3a34edff3fdc84d,
104 |               0xd88768e4904032d8, 0x131384427b3aaeec,
105 |     /*  34 */ 0x8405e51286234f14, 0x14dc4739adb4c529,
106 |               0xb8a2b5b250634ffd, 0x2fe2a94ad8a7ff93,
107 |     /*  35 */ 0xec5c57efe843fadd, 0x2843ce40f0bb9918,
108 |               0xa4b561d6cf3d6305, 0x743629bde8fb777e,
109 |     /*  36 */ 0x343edd46bbaf738f, 0xed981828b101a651,
110 |               0xa401760b882c797a, 0x1fc223e28dc88730,
111 |     /*  37 */ 0x48604e91fc0fba0e, 0xb637f78f052c6fa4,
112 |               0x91ccac3d09e9239c, 0x23f7eed4437a687c,
113 |     /*  38 */ 0x5173b1118d9bd800, 0x29d641b63189d4a7,
114 |               0xfdbf177988bbc586, 0x2959894fcad81df5,
115 |     /*  39 */ 0xaebc8ef3b4bbc899, 0x4148995ab26992b9,
116 |               0x24e20b0134f92cfb, 0x40d158894a05dee8,
117 |     /*  40 */ 0x46b00b1185af76f6, 0x26bac77873187a79,
118 |               0x3dc0bf95ab8fff5f, 0x2a608bd8945524d7,
119 |     /*  41 */ 0x26449588bd446302, 0x7c4bc21c0388439c,
120 |               0x8e98a4f383bd11b2, 0x26218d7bc9d876b9,
121 |     /*  42 */ 0xe3081542997c178a, 0x3c2d29a86fb6606f,
122 |               0x5c217736fa279374, 0x7dde05734afeb1fa,
123 |     /*  43 */ 0x3bf10e3906d42bab, 0xe4f7803e1980649c,
124 |               0xe6053bf89595bf7a, 0x394faf38da245530,
125 |     /*  44 */ 0x7a8efb58896928f4, 0xfbc778e9cc6a113c,
126 |               0x72670ce330af596f, 0x48f222a81d3d6cf7,
127 |     /*  45 */ 0xf01fce410d72caa7, 0x5a20ecc7213b5595,
128 |               0x7bc21165c1fa1483, 0x07f89ae31da8a741,
129 |     /*  46 */ 0x05d2c2b4c6830ff9, 0xd43e330fc6316293,
130 |               0xa5a5590a96d3a904, 0x705edb91a65333b6,
131 |     /*  47 */ 0x048ee15e0bb9a5f7, 0x3240cfca9e0aaf5d,
132 |               0x8f4b71ceedc4a40b, 0x621c0da3de544a6d,
133 |     /*  48 */ 0x92872836a08c4091, 0xce8375b010c91445,
134 |               0x8a72eb524f276394, 0x2667fcfa7ec83635,
135 |     /*  49 */ 0x7f4c173345e8752a, 0x061b47feee7079a5,
136 |               0x25dd9afa9f86ff34, 0x3780cef5425dc89c,
137 |     /*  50 */ 0x1a46035a513bb4e9, 0x3e1ef379ac575ada,
138 |               0xc78c5f1c5fa24b50, 0x321a967634fd9f22,
139 |     /*  51 */ 0x946707b8826e27fa, 0x3dca84d64c506fd0,
140 |               0xc189218075e91436, 0x6d9284169b3b8484,
141 |     /*  52 */ 0x3a67e840383f2ddf, 0x33eec9a30c4f9b75,
142 |               0x3ec7c86fa783ef47, 0x26ec449fbac9fbc4,
143 |     /*  53 */ 0x5c0f38cba09b9e7d, 0x81168cc762a3478c,
144 |               0x3e23b0d306fc121c, 0x5a238aa0a5efdcdd,
145 |     /*  54 */ 0x1ba26121c4ea43ff, 0x36f8c77f7c8832b5,
146 |               0x88fbea0b0adcf99a, 0x5ca9938ec25bebf9,
147 |     /*  55 */ 0xd5436a5e51fccda0, 0x1dbc4797c2cd893b,
148 |               0x19346a65d3224a08, 0x0f5034e49b9af466,
149 |     /*  56 */ 0xf23c3967a1e0b96e, 0xe58b08fa867a4d88,
150 |               0xfb2fabc6a7341679, 0x2a75381eb6026946,
151 |     /*  57 */ 0xc80a3be4c19420ac, 0x66b1f6c681f2b6dc,
152 |               0x7cf7036761e93388, 0x25abbbd8a660a4c4,
153 |     /*  58 */ 0x91ea12ba14fd5198, 0x684950fc4a3cffa9,
154 |               0xf826842130f5ad28, 0x3ea988f75301a441,
155 |     /*  59 */ 0xc978109a695f8c6f, 0x1746eb4a0530c3f3,
156 |               0x444d6d77b4459995, 0x75952b8c054e5cc7,
157 |     /*  60 */ 0xa3703f7915f4d6aa, 0x66c346202f2647d8,
158 |               0xd01469df811d644b, 0x77fea47d81a5d71f,
159 |     /*  61 */ 0xc5e9529ef57ca381, 0x6eeeb4b9ce2f881a,
160 |               0xb6e91a28e8009bd6, 0x4b80be3e9afc3fec,
161 |     /*  62 */ 0x7e3773c526aed2c5, 0x1b4afcb453c9a49d,
162 |               0xa920bdd7baffb24d, 0x7c54699f122d400e,
163 |     /*  63 */ 0xef46c8e14fa94bc8, 0xe0b074ce2952ed5e,
164 |               0xbea450e1dbd885d5, 0x61b68649320f712c,
165 |     /*  64 */ 0x8a485f7309ccbdd1, 0xbd06320d7d4d1a2d,
166 |               0x25232973322dbef4, 0x445dc4758c17f770,
167 |     /*  65 */ 0xdb0434177cc8933c, 0xed6fe82175ea059f,
168 |               0x1efebefdc053db34, 0x4adbe867c65daf99,
169 |     /*  66 */ 0x3acd71a2a90609df, 0xe5e991856dd04050,
170 |               0x1ec69b688157c23c, 0x697427f6885cfe4d,
171 |     /*  67 */ 0xd7be7b9b65e1a851, 0xa03d28d522c536dd,
172 |               0x28399d658fd2b645, 0x49e5b7e17c2641e1,
173 |     /*  68 */ 0x6f8c3a98700457a4, 0x5078f0a25ebb6778,
174 |               0xd13c3ccbc382960f, 0x2e003258a7df84b1,
175 |     /*  69 */ 0x8ad1f39be6296a1c, 0xc1eeaa652a5fbfb2,
176 |               0x33ee0673fd26f3cb, 0x59256173a69d2ccc,
177 |     /*  70 */ 0x41ea07aa4e18fc41, 0xd9fc19527c87a51e,
178 |               0xbdaacb805831ca6f, 0x445b652dc916694f,
179 |     /*  71 */ 0xce92a3a7f2172315, 0x1edc282de11b9964,
180 |               0xa1823aafe04c314a, 0x790a2d94437cf586,
181 |     /*  72 */ 0x71c447fb93f6e009, 0x8922a56722845276,
182 |               0xbf70903b204f5169, 0x2f7a89891ba319fe,
183 |     /*  73 */ 0x02a08eb577e2140c, 0xed9a4ed4427bdcf4,
184 |               0x5253ec44e4323cd1, 0x3e88363c14e9355b,
185 |     /*  74 */ 0xaa66c14277110b8c, 0x1ae0391610a23390,
186 |               0x2030bd12c93fc2a2, 0x3ee141579555c7ab,
187 |     /*  75 */ 0x9214de3a6d6e7d41, 0x3ccdd88607f17efe,
188 |               0x674f1288f8e11217, 0x5682250f329f93d0,
189 |     /*  76 */ 0x6cf00b136d2e396e, 0x6e4cf86f1014debf,
190 |               0x5930b1b5bfcc4e83, 0x047069b48aba16b6,
191 |     /*  77 */ 0x0d4ce4ab69b20793, 0xb24db91a97d0fb9e,
192 |               0xcdfa50f54e00d01d, 0x221b1085368bddb5,
193 |     /*  78 */ 0xe7e59468b1e3d8d2, 0x53c56563bd122f93,
194 |               0xeee8a903e0663f09, 0x61efa662cbbe3d42,
195 |     /*  79 */ 0x2cf8ddddde6eab2a, 0x9bf80ad51435f231,
196 |               0x5deadacec9f04973, 0x29275b5d41d29b27,
197 |     /*  80 */ 0xcfde0f0895ebf14f, 0xb9aab96b054905a7,
198 |               0xcae80dd9a1c420fd, 0x0a63bf2f1673bbc7,
199 |     /*  81 */ 0x092f6e11958fbc8c, 0x672a81e804822fad,
200 |               0xcac8351560d52517, 0x6f3f7722c8f192f8,
201 |     /*  82 */ 0xf8ba90ccc2e894b7, 0x2c7557a438ff9f0d,
202 |               0x894d1d855ae52359, 0x68e122157b743d69,
203 |     /*  83 */ 0xd87e5570cfb919f3, 0x3f2cdecd95798db9,
204 |               0x2121154710c0a2ce, 0x3c66a115246dc5b2,
205 |     /*  84 */ 0xcbedc562294ecb72, 0xba7143c36a280b16,
206 |               0x9610c2efd4078b67, 0x6144735d946a4b1e,
207 |     /*  85 */ 0x536f111ed75b3350, 0x0211db8c2041d81b,
208 |               0xf93cb1000e10413c, 0x149dfd3c039e8876,
209 |     /*  86 */ 0xd479dde46b63155b, 0xb66e15e93c837976,
210 |               0xdafde43b1f13e038, 0x5fafda1a2e4b0b35,
211 |     /*  87 */ 0x3600bbdf17197581, 0x3972050bbe3cd2c2,
212 |               0x5938906dbdd5be86, 0x34fce5e43f9b860f,
213 |     /*  88 */ 0x75a8a4cd42d14d02, 0x828dabc53441df65,
214 |               0x33dcabedd2e131d3, 0x3ebad76fb814d25f,
215 |     /*  89 */ 0xd4906f566f70e10f, 0x5d12f7aa51690f5a,
216 |               0x45adb16e76cefcf2, 0x01f768aead232999,
217 |     /*  90 */ 0x2b6cc77b6248febd, 0x3cd30628ec3aaffd,
218 |               0xce1c0b80d4ef486a, 0x4c3bff2ea6f66c23,
219 |     /*  91 */ 0x3f2ec4094aeaeb5f, 0x61b19b286e372ca7,
220 |               0x5eefa966de2a701d, 0x23b20565de55e3ef,
221 |     /*  92 */ 0xe301ca5279d58557, 0x07b2d4ce27c2874f,
222 |               0xa532cd8a9dcf1d67, 0x2a52fee23f2bff56,
223 |     /*  93 */ 0x8624efb37cd8663d, 0xbbc7ac20ffbd7594,
224 |               0x57b85e9c82d37445, 0x7b3052cb86a6ec66,
225 |     /*  94 */ 0x3482f0ad2525e91e, 0x2cb68043d28edca0,
226 |               0xaf4f6d052e1b003a, 0x185f8c2529781b0a,
227 |     /*  95 */ 0xaa41de5bd80ce0d6, 0x9407b2416853e9d6,
228 |               0x563ec36e357f4c3a, 0x4cc4b8dd0e297bce,
229 |     /*  96 */ 0xa2fc1a52ffb8730e, 0x1811f16e67058e37,
230 |               0x10f9a366cddf4ee1, 0x72f4a0c4a0b9f099,
231 |     /*  97 */ 0x8c16c06f663f4ea7, 0x693b3af74e970fba,
232 |               0x2102e7f1d69ec345, 0x0ba53cbc968a8089,
233 |     /*  98 */ 0xca3d9dc7fea15537, 0x4c6824bb51536493,
234 |               0xb9886314844006b1, 0x40d2a72ab454cc60,
235 |     /*  99 */ 0x5936a1b712570975, 0x91b9d648debda657,
236 |               0x3344094bb64330ea, 0x006ba10d12ee51d0,
237 |     /* 100 */ 0x19228468f5de5d58, 0x0eb12f4c38cc05b0,
238 |               0xa1039f9dd5601990, 0x4502d4ce4fff0e0b,
239 |     /* 101 */ 0xeb2054106837c189, 0xd0f6544c6dd3b93c,
240 |               0x40727064c416d74f, 0x6e15c6114b502ef0,
241 |     /* 102 */ 0x4df2a398cfb1a76b, 0x11256c7419f2f6b1,
242 |               0x4a497962066e6043, 0x705b3aab41355b44,
243 |     /* 103 */ 0x365ef536d797b1d8, 0x00076bd622ddf0db,
244 |               0x3bbf33b0e0575a88, 0x3777aa05c8e4ca4d,
245 |     /* 104 */ 0x392745c85578db5f, 0x6fda4149dbae5ae2,
246 |               0xb1f0b00b8adc9867, 0x09963437d36f1da3,
247 |     /* 105 */ 0x7e824e90a5dc3853, 0xccb5f6641f135cbd,
248 |               0x6736d86c87ce8fcc, 0x625f3ce26604249f,
249 |     /* 106 */ 0xaf8ac8059502f63f, 0x0c05e70a2e351469,
250 |               0x35292e9c764b6305, 0x1a394360c7e23ac3,
251 |     /* 107 */ 0xd5c6d53251183264, 0x62065abd43c2b74f,
252 |               0xb5fbf5d03b973f9b, 0x13a3da3661206e5e,
253 |     /* 108 */ 0xc6bd5837725d94e5, 0x18e30912205016c5,
254 |               0x2088ce1570033c68, 0x7fba1f495c837987,
255 |     /* 109 */ 0x5a8c7423f2f9079d, 0x1735157b34023fc5,
256 |               0xe4f9b49ad2fab351, 0x6691ff72c878e33c,
257 |     /* 110 */ 0x122c2adedc5eff3e, 0xf8dd4bf1d8956cf4,
258 |               0xeb86205d9e9e5bda, 0x049b92b9d975c743,
259 |     /* 111 */ 0xa5379730b0f6c05a, 0x72a0ffacc6f3a553,
260 |               0xb0032c34b20dcd6d, 0x470e9dbc88d5164a,
261 |     /* 112 */ 0xb19cf10ca237c047, 0xb65466711f6c81a2,
262 |               0xb3321bd16dd80b43, 0x48c14f600c5fbe8e,
263 |     /* 113 */ 0x66451c264aa6c803, 0xb66e3904a4fa7da6,
264 |               0xd45f19b0b3128395, 0x31602627c3c9bc10,
265 |     /* 114 */ 0x3120dc4832e4e10d, 0xeb20c46756c717f7,
266 |               0x00f52e3f67280294, 0x566d4fc14730c509,
267 |     /* 115 */ 0x7e3a5d40fd837206, 0xc1e926dc7159547a,
268 |               0x216730fba68d6095, 0x22e8c3843f69cea7,
269 |     /* 116 */ 0x33d074e8930e4b2b, 0xb6e4350e84d15816,
270 |               0x5534c26ad6ba2365, 0x7773c12f89f1f3f3,
271 |     /* 117 */ 0x8cba404da57962aa, 0x5b9897a81999ce56,
272 |               0x508e862f121692fc, 0x3a81907fa093c291,
273 |     /* 118 */ 0x0dded0ff4725a510, 0x10d8cc10673fc503,
274 |               0x5b9d151c9f1f4e89, 0x32a5c1d5cb09a44c,
275 |     /* 119 */ 0x1e0aa442b90541fb, 0x5f85eb7cc1b485db,
276 |               0xbee595ce8a9df2e5, 0x25e496c722422236,
277 |     /* 120 */ 0x5edf3c46cd0fe5b9, 0x34e75a7ed2a43388,
278 |               0xe488de11d761e352, 0x0e878a01a085545c,
279 |     /* 121 */ 0xba493c77e021bb04, 0x2b4d1843c7df899a,
280 |               0x9ea37a487ae80d67, 0x67a9958011e41794,
281 |     /* 122 */ 0x4b58051a6697b065, 0x47e33f7d8d6ba6d4,
282 |               0xbb4da8d483ca46c1, 0x68becaa181c2db0d,
283 |     /* 123 */ 0x8d8980e90b989aa5, 0xf95eb14a2c93c99b,
284 |               0x51c6c7c4796e73a2, 0x6e228363b5efb569,
285 |     /* 124 */ 0xc6bbc0b02dd624c8, 0x777eb47dec8170ee,
286 |               0x3cde15a004cfafa9, 0x1dc6bc087160bf9b,
287 |     /* 125 */ 0x2e07e043eec34002, 0x18e9fc677a68dc7f,
288 |               0xd8da03188bd15b9a, 0x48fbc3bb00568253,
289 |     /* 126 */ 0x57547d4cfb654ce1, 0xd3565b82a058e2ad,
290 |               0xf63eaf0bbf154478, 0x47531ef114dfbb18,
291 |     /* 127 */ 0xe1ec630a4278c587, 0x5507d546ca8e83f3,
292 |               0x85e135c63adc0c2b, 0x0aa7efa85682844e,
293 |     /* 128 */ 0x72691ba8b3e1f615, 0x32b4e9701fbe3ffa,
294 |               0x97b6d92e39bb7868, 0x2cfe53dea02e39e8,
295 |     /* 129 */ 0x687392cd85cd52b0, 0x27ff66c910e29831,
296 |               0x97134556a9832d06, 0x269bb0360a84f8a0,
297 |     /* 130 */ 0x706e55457643f85c, 0x3734a48c9b597d1b,
298 |               0x7aee91e8c6efa472, 0x5cd6abc198a9d9e0,
299 |     /* 131 */ 0x0e04de06cb3ce41a, 0xd8c6eb893402e138,
300 |               0x904659bb686e3772, 0x7215c371746ba8c8,
301 |     /* 132 */ 0xfd12a97eeae4a2d9, 0x9514b7516394f2c5,
302 |               0x266fd5809208f294, 0x5c847085619a26b9,
303 |     /* 133 */ 0x52985410fed694ea, 0x3c905b934a2ed254,
304 |               0x10bb47692d3be467, 0x063b3d2d69e5e9e1,
305 |     /* 134 */ 0x472726eedda57deb, 0xefb6c4ae10f41891,
306 |               0x2b1641917b307614, 0x117c554fc4f45b7c,
307 |     /* 135 */ 0xc07cf3118f9d8812, 0x01dbd82050017939,
308 |               0xd7e803f4171b2827, 0x1015e87487d225ea,
309 |     /* 136 */ 0xc58de3fed23acc4d, 0x50db91c294a7be2d,
310 |               0x0b94d43d1c9cf457, 0x6b1640fa6e37524a,
311 |     /* 137 */ 0x692f346c5fda0d09, 0x200b1c59fa4d3151,
312 |               0xb8c46f760777a296, 0x4b38395f3ffdfbcf,
313 |     /* 138 */ 0x18d25e00be54d671, 0x60d50582bec8aba6,
314 |               0x87ad8f263b78b982, 0x50fdf64e9cda0432,
315 |     /* 139 */ 0x90f567aac578dcf0, 0xef1e9b0ef2a3133b,
316 |               0x0eebba9242d9de71, 0x15473c9bf03101c7,
317 |     /* 140 */ 0x7c77e8ae56b78095, 0xb678e7666e6f078e,
318 |               0x2da0b9615348ba1f, 0x7cf931c1ff733f0b,
319 |     /* 141 */ 0x26b357f50a0a366c, 0xe9708cf42b87d732,
320 |               0xc13aeea5f91cb2c0, 0x35d90c991143bb4c,
321 |     /* 142 */ 0x47c1c404a9a0d9dc, 0x659e58451972d251,
322 |               0x3875a8c473b38c31, 0x1fbd9ed379561f24,
323 |     /* 143 */ 0x11fabc6fd41ec28d, 0x7ef8dfe3cd2a2dca,
324 |               0x72e73b5d8c404595, 0x6135fa4954b72f27,
325 |     /* 144 */ 0xccfc32a2de24b69c, 0x3f55698c1f095d88,
326 |               0xbe3350ed5ac3f929, 0x5e9bf806ca477eeb,
327 |     /* 145 */ 0xe9ce8fb63c309f68, 0x5376f63565e1f9f4,
328 |               0xd1afcfb35a6393f1, 0x6632a1ede5623506,
329 |     /* 146 */ 0x0b7d6c390c2ded4c, 0x56cb3281df04cb1f,
330 |               0x66305a1249ecc3c7, 0x5d588b60a38ca72a,
331 |     /* 147 */ 0xa6ecbf78e8e5f42d, 0x86eeb44b3c8a3eec,
332 |               0xec219c48fbd21604, 0x1aaf1af517c36731,
333 |     /* 148 */ 0xc306a2836769bde7, 0x208280622b1e2adb,
334 |               0x8027f51ffbff94a6, 0x76cfa1ce1124f26b,
335 |     /* 149 */ 0x18eb00562422abb6, 0xf377c4d58f8c29c3,
336 |               0x4dbbc207f531561a, 0x0253b7f082128a27,
337 |     /* 150 */ 0x3d1f091cb62c17e0, 0x4860e1abd64628a9,
338 |               0x52d17436309d4253, 0x356f97e13efae576,
339 |     /* 151 */ 0xd351e11aa150535b, 0x3e6b45bb1dd878cc,
340 |               0x0c776128bed92c98, 0x1d34ae93032885b8,
341 |     /* 152 */ 0x4ba0488ca85ba4c3, 0x985348c33c9ce6ce,
342 |               0x66124c6f97bda770, 0x0f81a0290654124a,
343 |     /* 153 */ 0x9ed09ca6569b86fd, 0x811009fd18af9a2d,
344 |               0xff08d03f93d8c20a, 0x52a148199faef26b,
345 |     /* 154 */ 0x3e03f9dc2d8d1b73, 0x4205801873961a70,
346 |               0xc0d987f041a35970, 0x07aa1f15a1c0d549,
347 |     /* 155 */ 0xdfd46ce08cd27224, 0x6d0a024f934e4239,
348 |               0x808a7a6399897b59, 0x0a4556e9e13d95a2,
349 |     /* 156 */ 0xd21a991fe9c13045, 0x9b0e8548fe7751b8,
350 |               0x5da643cb4bf30035, 0x77db28d63940f721,
351 |     /* 157 */ 0xfc5eeb614adc9011, 0x5229419ae8c411eb,
352 |               0x9ec3e7787d1dcf74, 0x340d053e216e4cb5,
353 |     /* 158 */ 0xcac7af39b48df2b4, 0xc0faec2871a10a94,
354 |               0x140a69245ca575ed, 0x0cf1c37134273a4c,
355 |     /* 159 */ 0xc8ee306ac224b8a5, 0x57eaee7ccb4930b0,
356 |               0xa1e806bdaacbe74f, 0x7d9a62742eeb657d,
357 |     /* 160 */ 0x9eb6b6ef546c4830, 0x885cca1fddb36e2e,
358 |               0xe6b9f383ef0d7105, 0x58654fef9d2e0412,
359 |     /* 161 */ 0xa905c4ffbe0e8e26, 0x942de5df9b31816e,
360 |               0x497d723f802e88e1, 0x30684dea602f408d,
361 |     /* 162 */ 0x21e5a278a3e6cb34, 0xaefb6e6f5b151dc4,
362 |               0xb30b8e049d77ca15, 0x28c3c9cf53b98981,
363 |     /* 163 */ 0x287fb721556cdd2a, 0x0d317ca897022274,
364 |               0x7468c7423a543258, 0x4a7f11464eb5642f,
365 |     /* 164 */ 0xa237a4774d193aa6, 0xd865986ea92129a1,
366 |               0x24c515ecf87c1a88, 0x604003575f39f5eb,
367 |     /* 165 */ 0x47b9f189570a9b27, 0x2b98cede465e4b78,
368 |               0x026df551dbb85c20, 0x74fcd91047e21901,
369 |     /* 166 */ 0x13e2a90a23c1bfa3, 0x0cb0074e478519f6,
370 |               0x5ff1cbbe3af6cf44, 0x67fe5438be812dbe,
371 |     /* 167 */ 0xd13cf64fa40f05b0, 0x054dfb2f32283787,
372 |               0x4173915b7f0d2aea, 0x482f144f1f610d4e,
373 |     /* 168 */ 0xf6210201b47f8234, 0x5d0ae1929e70b990,
374 |               0xdcd7f455b049567c, 0x7e93d0f1f0916f01,
375 |     /* 169 */ 0xdd79cbf18a7db4fa, 0xbe8391bf6f74c62f,
376 |               0x027145d14b8291bd, 0x585a73ea2cbf1705,
377 |     /* 170 */ 0x485ca03e928a0db2, 0x10fc01a5742857e7,
378 |               0x2f482edbd6d551a7, 0x0f0433b5048fdb8a,
379 |     /* 171 */ 0x60da2e8dd7dc6247, 0x88b4c9d38cd4819a,
380 |               0x13033ac001f66697, 0x273b24fe3b367d75,
381 |     /* 172 */ 0xc6e8f66a31b3b9d4, 0x281514a494df49d5,
382 |               0xd1726fdfc8b23da7, 0x4b3ae7d103dee548,
383 |     /* 173 */ 0xc6256e19ce4b9d7e, 0xff5c5cf186e3c61c,
384 |               0xacc63ca34b8ec145, 0x74621888fee66574,
385 |     /* 174 */ 0x956f409645290a1e, 0xef0bf8e3263a962e,
386 |               0xed6a50eb5ec2647b, 0x0694283a9dca7502,
387 |     /* 175 */ 0x769b963643a2dcd1, 0x42b7c8ea09fc5353,
388 |               0x4f002aee13397eab, 0x63005e2c19b7d63a,
389 |     /* 176 */ 0xca6736da63023bea, 0x966c7f6db12a99b7,
390 |               0xace09390c537c5e1, 0x0b696063a1aa89ee,
391 |     /* 177 */ 0xebb03e97288c56e5, 0x432a9f9f938c8be8,
392 |               0xa6a5a93d5b717f71, 0x1a5fb4c3e18f9d97,
393 |     /* 178 */ 0x1c94e7ad1c60cdce, 0xee202a43fc02c4a0,
394 |               0x8dafe4d867c46a20, 0x0a10263c8ac27b58,
395 |     /* 179 */ 0xd0dea9dfe4432a4a, 0x856af87bbe9277c5,
396 |               0xce8472acc212c71a, 0x6f151b6d9bbb1e91,
397 |     /* 180 */ 0x26776c527ceed56a, 0x7d211cb7fbf8faec,
398 |               0x37ae66a6fd4609cc, 0x1f81b702d2770c42,
399 |     /* 181 */ 0x2fb0b057eac58392, 0xe1dd89fe29744e9d,
400 |               0xc964f8eb17beb4f8, 0x29571073c9a2d41e,
401 |     /* 182 */ 0xa948a18981c0e254, 0x2df6369b65b22830,
402 |               0xa33eb2d75fcfd3c6, 0x078cd6ec4199a01f,
403 |     /* 183 */ 0x4a584a41ad900d2f, 0x32142b78e2c74c52,
404 |               0x68c4e8338431c978, 0x7f69ea9008689fc2,
405 |     /* 184 */ 0x52f2c81e46a38265, 0xfd78072d04a832fd,
406 |               0x8cd7d5fa25359e94, 0x4de71b7454cc29d2,
407 |     /* 185 */ 0x42eb60ad1eda6ac9, 0x0aad37dfdbc09c3a,
408 |               0x81004b71e33cc191, 0x44e6be345122803c,
409 |     /* 186 */ 0x03fe8388ba1920db, 0xf5d57c32150db008,
410 |               0x49c8c4281af60c29, 0x21edb518de701aee,
411 |     /* 187 */ 0x7fb63e418f06dc99, 0xa4460d99c166d7b8,
412 |               0x24dd5248ce520a83, 0x5ec3ad712b928358,
413 |     /* 188 */ 0x15022a5fbd17930f, 0xa4f64a77d82570e3,
414 |               0x12bc8d6915783712, 0x498194c0fc620abb,
415 |     /* 189 */ 0x38a2d9d255686c82, 0x785c6bd9193e21f0,
416 |               0xe4d5c81ab24a5484, 0x56307860b2e20989,
417 |     /* 190 */ 0x429d55f78b4d74c4, 0x22f1834643350131,
418 |               0x1e60c24598c71fff, 0x59f2f014979983ef,
419 |     /* 191 */ 0x46a47d56eb494a44, 0x3e22a854d636a18e,
420 |               0xb346e15274491c3b, 0x2ceafd4e5390cde7,
421 |     /* 192 */ 0xba8a8538be0d6675, 0x4b9074bb50818e23,
422 |               0xcbdab89085d304c3, 0x61a24fe0e56192c4,
423 |     /* 193 */ 0xcb7615e6db525bcb, 0xdd7d8c35a567e4ca,
424 |               0xe6b4153acafcdd69, 0x2d668e097f3c9766,
425 |     /* 194 */ 0xa57e7e265ce55ef0, 0x5d9f4e527cd4b967,
426 |               0xfbc83606492fd1e5, 0x090d52beb7c3f7ae,
427 |     /* 195 */ 0x09b9515a1e7b4d7c, 0x1f266a2599da44c0,
428 |               0xa1c49548e2c55504, 0x7ef04287126f15cc,
429 |     /* 196 */ 0xfed1659dbd30ef15, 0x8b4ab9eec4e0277b,
430 |               0x884d6236a5df3291, 0x1fd96ea6bf5cf788,
431 |     /* 197 */ 0x42a161981f190d9a, 0x61d849507e6052c1,
432 |               0x9fe113bf285a2cd5, 0x7c22d676dbad85d8,
433 |     /* 198 */ 0x82e770ed2bfbd27d, 0x4c05b2ece996f5a5,
434 |               0xcd40a9c2b0900150, 0x5895319213d9bf64,
435 |     /* 199 */ 0xe7cc5d703fea2e08, 0xb50c491258e2188c,
436 |               0xcce30baa48205bf0, 0x537c659ccfa32d62,
437 |     /* 200 */ 0x37b6623a98cfc088, 0xfe9bed1fa4d6aca4,
438 |               0x04d29b8e56a8d1b0, 0x725f71c40b519575,
439 |     /* 201 */ 0x28c7f89cd0339ce6, 0x8367b14469ddc18b,
440 |               0x883ada83a6a1652c, 0x585f1974034d6c17,
441 |     /* 202 */ 0x89cfb266f1b19188, 0xe63b4863e7c35217,
442 |               0xd88c9da6b4c0526a, 0x3e035c9df0954635,
443 |     /* 203 */ 0xdd9d5412fb45de9d, 0xdd684532e4cff40d,
444 |               0x4b5c999b151d671c, 0x2d8c2cc811e7f690,
445 |     /* 204 */ 0x7f54be1d90055d40, 0xa464c5df464aaf40,
446 |               0x33979624f0e917be, 0x2c018dc527356b30,
447 |     /* 205 */ 0xa5415024e330b3d4, 0x73ff3d96691652d3,
448 |               0x94ec42c4ef9b59f1, 0x0747201618d08e5a,
449 |     /* 206 */ 0x4d6ca48aca411c53, 0x66415f2fcfa66119,
450 |               0x9c4dd40051e227ff, 0x59810bc09a02f7eb,
451 |     /* 207 */ 0x2a7eb171b3dc101d, 0x441c5ab99ffef68e,
452 |               0x32025c9b93b359ea, 0x5e8ce0a71e9d112f,
453 |     /* 208 */ 0xbfcccb92429503fd, 0xd271ba752f095d55,
454 |               0x345ead5e972d091e, 0x18c8df11a83103ba,
455 |     /* 209 */ 0x90cd949a9aed0f4c, 0xc5d1f4cb6660e37e,
456 |               0xb8cac52d56c52e0b, 0x6e42e400c5808e0d,
457 |     /* 210 */ 0xa3b46966eeaefd23, 0x0c4f1f0be39ecdca,
458 |               0x189dc8c9d683a51d, 0x51f27f054c09351b,
459 |     /* 211 */ 0x4c487ccd2a320682, 0x587ea95bb3df1c96,
460 |               0xc8ccf79e555cb8e8, 0x547dc829a206d73d,
461 |     /* 212 */ 0xb822a6cd80c39b06, 0xe96d54732000d4c6,
462 |               0x28535b6f91463b4d, 0x228f4660e2486e1d,
463 |     /* 213 */ 0x98799538de8d3abf, 0x8cd8330045ebca6e,
464 |               0x79952a008221e738, 0x4322e1a7535cd2bb,
465 |     /* 214 */ 0xb114c11819d1801c, 0x2016e4d84f3f5ec7,
466 |               0xdd0e2df409260f4c, 0x5ec362c0ae5f7266,
467 |     /* 215 */ 0xc0462b18b8b2b4ee, 0x7cc8d950274d1afb,
468 |               0xf25f7105436b02d2, 0x43bbf8dcbff9ccd3,
469 |     /* 216 */ 0xb6ad1767a039e9df, 0xb0714da8f69d3583,
470 |               0x5e55fa18b42931f5, 0x4ed5558f33c60961,
471 |     /* 217 */ 0x1fe37901c647a5dd, 0x593ddf1f8081d357,
472 |               0x0249a4fd813fd7a6, 0x69acca274e9caf61,
473 |     /* 218 */ 0x047ba3ea330721c9, 0x83423fc20e7e1ea0,
474 |               0x1df4c0af01314a60, 0x09a62dab89289527,
475 |     /* 219 */ 0xa5b325a49cc6cb00, 0xe94b5dc654b56cb6,
476 |               0x3be28779adc994a0, 0x4296e8f8ba3a4aad,
477 |     /* 220 */ 0x328689761e451eab, 0x2e4d598bff59594a,
478 |               0x49b96853d7a7084a, 0x4980a319601420a8,
479 |     /* 221 */ 0x9565b9e12f552c42, 0x8a5318db7100fe96,
480 |               0x05c90b4d43add0d7, 0x538b4cd66a5d4eda,
481 |     /* 222 */ 0xf4e94fc3e89f039f, 0x592c9af26f618045,
482 |               0x08a36eb5fd4b9550, 0x25fffaf6c2ed1419,
483 |     /* 223 */ 0x34434459cc79d354, 0xeeecbfb4b1d5476b,
484 |               0xddeb34a061615d99, 0x5129cecceb64b773,
485 |     /* 224 */ 0xee43215894993520, 0x772f9c7cf14c0b3b,
486 |               0xd2e2fce306bedad5, 0x715f42b546f06a97,
487 |     /* 225 */ 0x434ecdceda5b5f1a, 0x0da17115a49741a9,
488 |               0x680bd77c73edad2e, 0x487c02354edd9041,
489 |     /* 226 */ 0xb8efeff3a70ed9c4, 0x56a32aa3e857e302,
490 |               0xdf3a68bd48a2a5a0, 0x07f650b73176c444,
491 |     /* 227 */ 0xe38b9b1626e0ccb1, 0x79e053c18b09fb36,
492 |               0x56d90319c9f94964, 0x1ca941e7ac9ff5c4,
493 |     /* 228 */ 0x49c4df29162fa0bb, 0x8488cf3282b33305,
494 |               0x95dfda14cabb437d, 0x3391f78264d5ad86,
495 |     /* 229 */ 0x729ae06ae2b5095d, 0xd58a58d73259a946,
496 |               0xe9834262d13921ed, 0x27fedafaa54bb592,
497 |     /* 230 */ 0xa99dc5b829ad48bb, 0x5f025742499ee260,
498 |               0x802c8ecd5d7513fd, 0x78ceb3ef3f6dd938,
499 |     /* 231 */ 0xc342f44f8a135d94, 0x7b9edb44828cdda3,
500 |               0x9436d11a0537cfe7, 0x5064b164ec1ab4c8,
501 |     /* 232 */ 0x7020eccfd37eb2fc, 0x1f31ea3ed90d25fc,
502 |               0x1b930d7bdfa1bb34, 0x5344467a48113044,
503 |     /* 233 */ 0x70073170f25e6dfb, 0xe385dc1a50114cc8,
504 |               0x2348698ac8fc4f00, 0x2a77a55284dd40d8,
505 |     /* 234 */ 0xfe06afe0c98c6ce4, 0xc235df96dddfd6e4,
506 |               0x1428d01e33bf1ed3, 0x785768ec9300bdaf,
507 |     /* 235 */ 0x9702e57a91deb63b, 0x61bdb8bfe5ce8b80,
508 |               0x645b426f3d1d58ac, 0x4804a82227a557bc,
509 |     /* 236 */ 0x8e57048ab44d2601, 0x68d6501a4b3a6935,
510 |               0xc39c9ec3f9e1c293, 0x4172f257d4de63e2,
511 |     /* 237 */ 0xd368b450330c6401, 0x040d3017418f2391,
512 |               0x2c34bb6090b7d90d, 0x16f649228fdfd51f,
513 |     /* 238 */ 0xbea6818e2b928ef5, 0xe28ccf91cdc11e72,
514 |               0x594aaa68e77a36cd, 0x313034806c7ffd0f,
515 |     /* 239 */ 0x8a9d27ac2249bd65, 0x19a3b464018e9512,
516 |               0xc26ccff352b37ec7, 0x056f68341d797b21,
517 |     /* 240 */ 0x5e79d6757efd2327, 0xfabdbcb6553afe15,
518 |               0xd3e7222c6eaf5a60, 0x7046c76d4dae743b,
519 |     /* 241 */ 0x660be872b18d4a55, 0x19992518574e1496,
520 |               0xc103053a302bdcbb, 0x3ed8e9800b218e8e,
521 |     /* 242 */ 0x7b0b9239fa75e03e, 0xefe9fb684633c083,
522 |               0x98a35fbe391a7793, 0x6065510fe2d0fe34,
523 |     /* 243 */ 0x55cb668548abad0c, 0xb4584548da87e527,
524 |               0x2c43ecea0107c1dd, 0x526028809372de35,
525 |     /* 244 */ 0x3415c56af9213b1f, 0x5bee1a4d017e98db,
526 |               0x13f6b105b5cf709b, 0x5ff20e3482b29ab6,
527 |     /* 245 */ 0x0aa29c75cc2e6c90, 0xfc7d73ca3a70e206,
528 |               0x899fc38fc4b5c515, 0x250386b124ffc207,
529 |     /* 246 */ 0x54ea28d5ae3d2b56, 0x9913149dd6de60ce,
530 |               0x16694fc58f06d6c1, 0x46b23975eb018fc7,
531 |     /* 247 */ 0x470a6a0fb4b7b4e2, 0x5d92475a8f7253de,
532 |               0xabeee5b52fbd3adb, 0x7fa20801a0806968,
533 |     /* 248 */ 0x76f3faf19f7714d2, 0xb3e840c12f4660c3,
534 |               0x0fb4cd8df212744e, 0x4b065a251d3a2dd2,
535 |     /* 249 */ 0x5cebde383d77cd4a, 0x6adf39df882c9cb1,
536 |               0xa2dd242eb09af759, 0x3147c0e50e5f6422,
537 |     /* 250 */ 0x164ca5101d1350db, 0xf8d13479c33fc962,
538 |               0xe640ce4d13e5da08, 0x4bdee0c45061f8ba,
539 |     /* 251 */ 0xd7c46dc1a4edb1c9, 0x5514d7b6437fd98a,
540 |               0x58942f6bb2a1c00b, 0x2dffb2ab1d70710e,
541 |     /* 252 */ 0xccdfcf2fc18b6d68, 0xa8ebcba8b7806167,
542 |               0x980697f95e2937e3, 0x02fbba1cd0126e8c};
543 | 
544 | #endif /* TABLE_LADDER_8K_H */
545 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/x25519_precomputed.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Ruby C extension providing bindings to the rfc7748_precomputed implementation of
 3 | the X25519 Diffie-Hellman algorithm
 4 | */
 5 | 
 6 | #include "ruby.h"
 7 | #include "x25519_precomputed.h"
 8 | 
 9 | static VALUE mX25519 = Qnil;
10 | static VALUE mX25519_Provider = Qnil;
11 | static VALUE mX25519_Provider_Precomputed = Qnil;
12 | 
13 | static VALUE mX25519_Provider_Precomputed_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u);
14 | static VALUE mX25519_Provider_Precomputed_scalarmult_base(VALUE self, VALUE scalar);
15 | static VALUE mX25519_is_available(VALUE self);
16 | 
17 | /* Initialize the x25519_precomputed C extension */
18 | void Init_x25519_precomputed()
19 | {
20 |     mX25519 = rb_define_module("X25519");
21 |     mX25519_Provider = rb_define_module_under(mX25519, "Provider");
22 |     mX25519_Provider_Precomputed = rb_define_module_under(mX25519_Provider, "Precomputed");
23 | 
24 |     rb_define_singleton_method(mX25519_Provider_Precomputed, "scalarmult", mX25519_Provider_Precomputed_scalarmult, 2);
25 |     rb_define_singleton_method(mX25519_Provider_Precomputed, "scalarmult_base", mX25519_Provider_Precomputed_scalarmult_base, 1);
26 |     rb_define_singleton_method(mX25519_Provider_Precomputed, "available?", mX25519_is_available, 0);
27 | }
28 | 
29 | /* Variable-base scalar multiplication */
30 | static VALUE mX25519_Provider_Precomputed_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u)
31 | {
32 |     /* X25519_KEY ensures inputs are aligned at 32-bytes */
33 |     X25519_KEY raw_scalar, raw_montgomery_u, product;
34 | 
35 |     StringValue(scalar);
36 |     if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) {
37 |         rb_raise(
38 |             rb_eArgError,
39 |             "expected %d-byte scalar, got %ld",
40 |             X25519_KEYSIZE_BYTES,
41 |             RSTRING_LEN(scalar)
42 |         );
43 |     }
44 | 
45 |     StringValue(montgomery_u);
46 |     if(RSTRING_LEN(montgomery_u) != X25519_KEYSIZE_BYTES) {
47 |         rb_raise(
48 |             rb_eArgError,
49 |             "expected %d-byte Montgomery-u coordinate, got %ld",
50 |             X25519_KEYSIZE_BYTES,
51 |             RSTRING_LEN(montgomery_u)
52 |         );
53 |     }
54 | 
55 |     memcpy(raw_scalar, RSTRING_PTR(scalar), X25519_KEYSIZE_BYTES);
56 |     memcpy(raw_montgomery_u, RSTRING_PTR(montgomery_u), X25519_KEYSIZE_BYTES);
57 |     x25519_precomputed_scalarmult(product, raw_scalar, raw_montgomery_u);
58 | 
59 |     return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES);
60 | }
61 | 
62 | /* Fixed-base scalar multiplication */
63 | static VALUE mX25519_Provider_Precomputed_scalarmult_base(VALUE self, VALUE scalar)
64 | {
65 |     /* X25519_KEY ensures inputs are aligned at 32-bytes */
66 |     X25519_KEY raw_scalar, product;
67 | 
68 |     StringValue(scalar);
69 |     if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) {
70 |         rb_raise(
71 |             rb_eArgError,
72 |             "expected %d-byte scalar, got %ld",
73 |             X25519_KEYSIZE_BYTES,
74 |             RSTRING_LEN(scalar)
75 |         );
76 |     }
77 | 
78 |     memcpy(raw_scalar, RSTRING_PTR(scalar), X25519_KEYSIZE_BYTES);
79 |     x25519_precomputed_scalarmult_base(product, raw_scalar);
80 | 
81 |     return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES);
82 | }
83 | 
84 | /* Is the x25519_precomputed backend supported on this CPU? */
85 | static VALUE mX25519_is_available(VALUE self)
86 | {
87 |     return check_4th_gen_intel_core_features() ? Qtrue : Qfalse;
88 | }
89 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/x25519_precomputed.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
 3 |  * Institute of Computing.
 4 |  * University of Campinas, Brazil.
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions
 8 |  * are met:
 9 |  *
10 |  *  * Redistributions of source code must retain the above copyright
11 |  *    notice, this list of conditions and the following disclaimer.
12 |  *  * Redistributions in binary form must reproduce the above
13 |  *    copyright notice, this list of conditions and the following
14 |  *    disclaimer in the documentation and/or other materials provided
15 |  *    with the distribution.
16 |  *  * Neither the name of University of Campinas nor the names of its
17 |  *    contributors may be used to endorse or promote products derived
18 |  *    from this software without specific prior written permission.
19 |  *
20 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 |  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
32 |  */
33 | 
34 | #ifndef X25519_PRECOMPUTED_H
35 | #define X25519_PRECOMPUTED_H
36 | 
37 | #include <stdint.h>
38 | 
39 | #ifndef ALIGN_BYTES
40 | #define ALIGN_BYTES 32
41 | #endif
42 | 
43 | #ifndef ALIGN
44 | #ifdef __INTEL_COMPILER
45 | #define ALIGN __declspec(align(ALIGN_BYTES))
46 | #else
47 | #define ALIGN __attribute__((aligned(ALIGN_BYTES)))
48 | #endif
49 | #endif
50 | 
51 | #define X25519_KEYSIZE_BYTES 32
52 | typedef ALIGN uint8_t X25519_KEY[X25519_KEYSIZE_BYTES];
53 | 
54 | void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key, uint8_t *session_key);
55 | void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key);
56 | int check_4th_gen_intel_core_features();
57 | 
58 | #endif /* X25519_PRECOMPUTED_H */
59 | 


--------------------------------------------------------------------------------
/ext/x25519_precomputed/x25519_x64.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
  3 |  * Institute of Computing.
  4 |  * University of Campinas, Brazil.
  5 |  *
  6 |  * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that the following conditions
 10 |  * are met:
 11 |  *
 12 |  *  * Redistributions of source code must retain the above copyright
 13 |  *    notice, this list of conditions and the following disclaimer.
 14 |  *  * Redistributions in binary form must reproduce the above
 15 |  *    copyright notice, this list of conditions and the following
 16 |  *    disclaimer in the documentation and/or other materials provided
 17 |  *    with the distribution.
 18 |  *  * Neither the name of University of Campinas nor the names of its
 19 |  *    contributors may be used to endorse or promote products derived
 20 |  *    from this software without specific prior written permission.
 21 |  *
 22 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 23 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 24 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 25 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 26 |  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 27 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 28 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 29 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 30 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 31 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 32 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 33 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 34 |  */
 35 | 
 36 | #include <string.h>
 37 | #include "fp25519_x64.h"
 38 | #include "x25519_precomputed.h"
 39 | #include "table_ladder_x25519.h"
 40 | 
 41 | static inline void cswap_x64(uint64_t bit, uint64_t *const px,
 42 |                              uint64_t *const py) {
 43 |   int i = 0;
 44 |   uint64_t mask = (uint64_t)0 - bit;
 45 |   for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) {
 46 |     uint64_t t = mask & (px[i] ^ py[i]);
 47 |     px[i] = px[i] ^ t;
 48 |     py[i] = py[i] ^ t;
 49 |   }
 50 | }
 51 | 
 52 | 
 53 | /** Original rfc7748_precomputed name: 'x25519_shared_secret_x64' */
 54 | void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key,
 55 |                                    uint8_t *session_key) {
 56 |   ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64];
 57 |   ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64];
 58 |   ALIGN uint64_t workspace[6 * NUM_WORDS_ELTFP25519_X64];
 59 |   ALIGN uint8_t session[X25519_KEYSIZE_BYTES];
 60 |   ALIGN uint8_t private[X25519_KEYSIZE_BYTES];
 61 | 
 62 |   int i = 0, j = 0;
 63 |   uint64_t prev = 0;
 64 |   uint64_t *const X1 = (uint64_t *)session;
 65 |   uint64_t *const key = (uint64_t *)private;
 66 |   uint64_t *const Px = coordinates + 0;
 67 |   uint64_t *const Pz = coordinates + 4;
 68 |   uint64_t *const Qx = coordinates + 8;
 69 |   uint64_t *const Qz = coordinates + 12;
 70 |   uint64_t *const X2 = Qx;
 71 |   uint64_t *const Z2 = Qz;
 72 |   uint64_t *const X3 = Px;
 73 |   uint64_t *const Z3 = Pz;
 74 |   uint64_t *const X2Z2 = Qx;
 75 |   uint64_t *const X3Z3 = Px;
 76 | 
 77 |   uint64_t *const A = workspace + 0;
 78 |   uint64_t *const B = workspace + 4;
 79 |   uint64_t *const D = workspace + 8;
 80 |   uint64_t *const C = workspace + 12;
 81 |   uint64_t *const DA = workspace + 16;
 82 |   uint64_t *const CB = workspace + 20;
 83 |   uint64_t *const AB = A;
 84 |   uint64_t *const DC = D;
 85 |   uint64_t *const DACB = DA;
 86 |   uint64_t *const buffer_1w = buffer;
 87 |   uint64_t *const buffer_2w = buffer;
 88 | 
 89 |   memcpy(private, private_key, sizeof(private));
 90 |   memcpy(session, session_key, sizeof(session));
 91 | 
 92 |   /* clampC function */
 93 |  private
 94 |   [0] = private[0] & (~(uint8_t)0x7);
 95 |  private
 96 |   [X25519_KEYSIZE_BYTES - 1] =
 97 |       (uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F);
 98 | 
 99 |   /**
100 |   * As in the draft:
101 |   * When receiving such an array, implementations of curve25519
102 |   * MUST mask the most-significant bit in the final byte. This
103 |   * is done to preserve compatibility with point formats which
104 |   * reserve the sign bit for use in other protocols and to
105 |   * increase resistance to implementation fingerprinting
106 |   **/
107 |   session[X25519_KEYSIZE_BYTES - 1] &= (1 << (255 % 8)) - 1;
108 | 
109 |   copy_EltFp25519_1w_x64(Px, X1);
110 |   setzero_EltFp25519_1w_x64(Pz);
111 |   setzero_EltFp25519_1w_x64(Qx);
112 |   setzero_EltFp25519_1w_x64(Qz);
113 | 
114 |   Pz[0] = 1;
115 |   Qx[0] = 1;
116 | 
117 |   /* main-loop */
118 |   prev = 0;
119 |   j = 62;
120 |   for (i = 3; i >= 0; i--) {
121 |     while (j >= 0) {
122 |       uint64_t bit = (key[i] >> j) & 0x1;
123 |       uint64_t swap = bit ^ prev;
124 |       prev = bit;
125 | 
126 |       add_EltFp25519_1w_x64(A, X2, Z2);    /* A = (X2+Z2)                   */
127 |       sub_EltFp25519_1w_x64(B, X2, Z2);    /* B = (X2-Z2)                   */
128 |       add_EltFp25519_1w_x64(C, X3, Z3);    /* C = (X3+Z3)                   */
129 |       sub_EltFp25519_1w_x64(D, X3, Z3);    /* D = (X3-Z3)                   */
130 |       mul_EltFp25519_2w_x64(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C]         */
131 | 
132 |       cswap_x64(swap, A, C);
133 |       cswap_x64(swap, B, D);
134 | 
135 |       sqr_EltFp25519_2w_x64(AB);         /* [AA|BB] = [A^2|B^2]           */
136 |       add_EltFp25519_1w_x64(X3, DA, CB); /* X3 = (DA+CB)                  */
137 |       sub_EltFp25519_1w_x64(Z3, DA, CB); /* Z3 = (DA-CB)                  */
138 |       sqr_EltFp25519_2w_x64(X3Z3);       /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
139 | 
140 |       copy_EltFp25519_1w_x64(X2, B);   /* X2 = B^2                      */
141 |       sub_EltFp25519_1w_x64(Z2, A, B); /* Z2 = E = AA-BB                */
142 | 
143 |       mul_a24_EltFp25519_1w_x64(B, Z2);      /* B = a24*E                     */
144 |       add_EltFp25519_1w_x64(B, B, X2);       /* B = a24*E+B                   */
145 |       mul_EltFp25519_2w_x64(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B]   */
146 |       mul_EltFp25519_1w_x64(Z3, Z3, X1);     /* Z3 = Z3*X1                    */
147 |       j--;
148 |     }
149 |     j = 63;
150 |   }
151 | 
152 |   inv_EltFp25519_1w_x64(A, Qz);
153 |   mul_EltFp25519_1w_x64((uint64_t *)shared, Qx, A);
154 |   fred_EltFp25519_1w_x64((uint64_t *)shared);
155 | }
156 | 
157 | /* Original rfc7748_precomputed name: 'x25519_keygen_precmp_x64' */
158 | void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key) {
159 |   ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64];
160 |   ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64];
161 |   ALIGN uint64_t workspace[4 * NUM_WORDS_ELTFP25519_X64];
162 |   ALIGN uint8_t private[X25519_KEYSIZE_BYTES];
163 | 
164 |   int i = 0, j = 0, k = 0;
165 |   uint64_t *const key = (uint64_t *)private;
166 |   uint64_t *const Ur1 = coordinates + 0;
167 |   uint64_t *const Zr1 = coordinates + 4;
168 |   uint64_t *const Ur2 = coordinates + 8;
169 |   uint64_t *const Zr2 = coordinates + 12;
170 | 
171 |   uint64_t *const UZr1 = coordinates + 0;
172 |   uint64_t *const ZUr2 = coordinates + 8;
173 | 
174 |   uint64_t *const A = workspace + 0;
175 |   uint64_t *const B = workspace + 4;
176 |   uint64_t *const C = workspace + 8;
177 |   uint64_t *const D = workspace + 12;
178 | 
179 |   uint64_t *const AB = workspace + 0;
180 |   uint64_t *const CD = workspace + 8;
181 | 
182 |   uint64_t *const buffer_1w = buffer;
183 |   uint64_t *const buffer_2w = buffer;
184 |   uint64_t *P = (uint64_t *)Table_Ladder_8k;
185 | 
186 |   memcpy(private, private_key, sizeof(private));
187 | 
188 |   /* clampC function */
189 |  private
190 |   [0] = private[0] & (~(uint8_t)0x7);
191 |  private
192 |   [X25519_KEYSIZE_BYTES - 1] =
193 |       (uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F);
194 | 
195 |   setzero_EltFp25519_1w_x64(Ur1);
196 |   setzero_EltFp25519_1w_x64(Zr1);
197 |   setzero_EltFp25519_1w_x64(Zr2);
198 |   Ur1[0] = 1;
199 |   Zr1[0] = 1;
200 |   Zr2[0] = 1;
201 | 
202 |   /* G-S */
203 |   Ur2[3] = 0x1eaecdeee27cab34;
204 |   Ur2[2] = 0xadc7a0b9235d48e2;
205 |   Ur2[1] = 0xbbf095ae14b2edf8;
206 |   Ur2[0] = 0x7e94e1fec82faabd;
207 | 
208 |   /* main-loop */
209 |   const int ite[4] = {64, 64, 64, 63};
210 |   const int q = 3;
211 |   uint64_t swap = 1;
212 | 
213 |   j = q;
214 |   for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) {
215 |     while (j < ite[i]) {
216 |       k = (64 * i + j - q);
217 |       uint64_t bit = (key[i] >> j) & 0x1;
218 |       swap = swap ^ bit;
219 |       cswap_x64(swap, Ur1, Ur2);
220 |       cswap_x64(swap, Zr1, Zr2);
221 |       swap = bit;
222 |       /** Addition */
223 |       sub_EltFp25519_1w_x64(B, Ur1, Zr1);     /* B = Ur1-Zr1                 */
224 |       add_EltFp25519_1w_x64(A, Ur1, Zr1);     /* A = Ur1+Zr1                 */
225 |       mul_EltFp25519_1w_x64(C, &P[4 * k], B); /* C = M0-B                    */
226 |       sub_EltFp25519_1w_x64(B, A, C);         /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
227 |       add_EltFp25519_1w_x64(A, A, C);         /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
228 |       sqr_EltFp25519_2w_x64(AB);              /* A = A^2      |  B = B^2     */
229 |       mul_EltFp25519_2w_x64(UZr1, ZUr2, AB);  /* Ur1 = Zr2*A  |  Zr1 = Ur2*B */
230 |       j++;
231 |     }
232 |     j = 0;
233 |   }
234 | 
235 |   /** Doubling */
236 |   for (i = 0; i < q; i++) {
237 |     add_EltFp25519_1w_x64(A, Ur1, Zr1);  /*  A = Ur1+Zr1   */
238 |     sub_EltFp25519_1w_x64(B, Ur1, Zr1);  /*  B = Ur1-Zr1   */
239 |     sqr_EltFp25519_2w_x64(AB);           /*  A = A**2     B = B**2   */
240 |     copy_EltFp25519_1w_x64(C, B);        /*  C = B         */
241 |     sub_EltFp25519_1w_x64(B, A, B);      /*  B = A-B       */
242 |     mul_a24_EltFp25519_1w_x64(D, B);     /*  D = my_a24*B  */
243 |     add_EltFp25519_1w_x64(D, D, C);      /*  D = D+C       */
244 |     mul_EltFp25519_2w_x64(UZr1, AB, CD); /*  Ur1 = A*B   Zr1 = Zr1*A */
245 |   }
246 | 
247 |   /* Convert to affine coordinates */
248 |   inv_EltFp25519_1w_x64(A, Zr1);
249 |   mul_EltFp25519_1w_x64((uint64_t *)session_key, Ur1, A);
250 |   fred_EltFp25519_1w_x64((uint64_t *)session_key);
251 | }
252 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/api.h:
--------------------------------------------------------------------------------
1 | #define CRYPTO_BYTES 32
2 | #define CRYPTO_SCALARBYTES 32
3 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/base.c:
--------------------------------------------------------------------------------
 1 | #include "fe.h"
 2 | #include "x25519_ref10.h"
 3 | 
 4 | static const uint8_t x25519_basepoint[32] = {9};
 5 | 
 6 | int x25519_ref10_scalarmult_base(uint8_t *q, const uint8_t *n)
 7 | {
 8 |   return x25519_ref10_scalarmult(q,n,x25519_basepoint);
 9 | }
10 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/extconf.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # rubocop:disable Style/GlobalVars
 4 | 
 5 | require "mkmf"
 6 | 
 7 | $CFLAGS << " -Wall -O3 -pedantic -std=c99"
 8 | 
 9 | create_makefile "x25519_ref10"
10 | 
11 | # rubocop:enable Style/GlobalVars
12 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/fe.c:
--------------------------------------------------------------------------------
  1 | #include "fe.h"
  2 | 
  3 | /*
  4 | h = 0
  5 | */
  6 | 
  7 | void fe_0(fe h)
  8 | {
  9 |   h[0] = 0;
 10 |   h[1] = 0;
 11 |   h[2] = 0;
 12 |   h[3] = 0;
 13 |   h[4] = 0;
 14 |   h[5] = 0;
 15 |   h[6] = 0;
 16 |   h[7] = 0;
 17 |   h[8] = 0;
 18 |   h[9] = 0;
 19 | }
 20 | 
 21 | /*
 22 | h = 1
 23 | */
 24 | 
 25 | void fe_1(fe h)
 26 | {
 27 |   h[0] = 1;
 28 |   h[1] = 0;
 29 |   h[2] = 0;
 30 |   h[3] = 0;
 31 |   h[4] = 0;
 32 |   h[5] = 0;
 33 |   h[6] = 0;
 34 |   h[7] = 0;
 35 |   h[8] = 0;
 36 |   h[9] = 0;
 37 | }
 38 | 
 39 | /*
 40 | h = f + g
 41 | Can overlap h with f or g.
 42 | 
 43 | Preconditions:
 44 |    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
 45 |    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
 46 | 
 47 | Postconditions:
 48 |    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
 49 | */
 50 | 
 51 | void fe_add(fe h,fe f,fe g)
 52 | {
 53 |   int32_t f0 = f[0];
 54 |   int32_t f1 = f[1];
 55 |   int32_t f2 = f[2];
 56 |   int32_t f3 = f[3];
 57 |   int32_t f4 = f[4];
 58 |   int32_t f5 = f[5];
 59 |   int32_t f6 = f[6];
 60 |   int32_t f7 = f[7];
 61 |   int32_t f8 = f[8];
 62 |   int32_t f9 = f[9];
 63 |   int32_t g0 = g[0];
 64 |   int32_t g1 = g[1];
 65 |   int32_t g2 = g[2];
 66 |   int32_t g3 = g[3];
 67 |   int32_t g4 = g[4];
 68 |   int32_t g5 = g[5];
 69 |   int32_t g6 = g[6];
 70 |   int32_t g7 = g[7];
 71 |   int32_t g8 = g[8];
 72 |   int32_t g9 = g[9];
 73 |   int32_t h0 = f0 + g0;
 74 |   int32_t h1 = f1 + g1;
 75 |   int32_t h2 = f2 + g2;
 76 |   int32_t h3 = f3 + g3;
 77 |   int32_t h4 = f4 + g4;
 78 |   int32_t h5 = f5 + g5;
 79 |   int32_t h6 = f6 + g6;
 80 |   int32_t h7 = f7 + g7;
 81 |   int32_t h8 = f8 + g8;
 82 |   int32_t h9 = f9 + g9;
 83 |   h[0] = h0;
 84 |   h[1] = h1;
 85 |   h[2] = h2;
 86 |   h[3] = h3;
 87 |   h[4] = h4;
 88 |   h[5] = h5;
 89 |   h[6] = h6;
 90 |   h[7] = h7;
 91 |   h[8] = h8;
 92 |   h[9] = h9;
 93 | }
 94 | 
 95 | /*
 96 | h = f
 97 | */
 98 | 
 99 | void fe_copy(fe h,fe f)
100 | {
101 |   int32_t f0 = f[0];
102 |   int32_t f1 = f[1];
103 |   int32_t f2 = f[2];
104 |   int32_t f3 = f[3];
105 |   int32_t f4 = f[4];
106 |   int32_t f5 = f[5];
107 |   int32_t f6 = f[6];
108 |   int32_t f7 = f[7];
109 |   int32_t f8 = f[8];
110 |   int32_t f9 = f[9];
111 |   h[0] = f0;
112 |   h[1] = f1;
113 |   h[2] = f2;
114 |   h[3] = f3;
115 |   h[4] = f4;
116 |   h[5] = f5;
117 |   h[6] = f6;
118 |   h[7] = f7;
119 |   h[8] = f8;
120 |   h[9] = f9;
121 | }
122 | 
123 | /*
124 | Replace (f,g) with (g,f) if b == 1;
125 | replace (f,g) with (f,g) if b == 0.
126 | 
127 | Preconditions: b in {0,1}.
128 | */
129 | 
130 | void fe_cswap(fe f,fe g,unsigned int b)
131 | {
132 |   int32_t f0 = f[0];
133 |   int32_t f1 = f[1];
134 |   int32_t f2 = f[2];
135 |   int32_t f3 = f[3];
136 |   int32_t f4 = f[4];
137 |   int32_t f5 = f[5];
138 |   int32_t f6 = f[6];
139 |   int32_t f7 = f[7];
140 |   int32_t f8 = f[8];
141 |   int32_t f9 = f[9];
142 |   int32_t g0 = g[0];
143 |   int32_t g1 = g[1];
144 |   int32_t g2 = g[2];
145 |   int32_t g3 = g[3];
146 |   int32_t g4 = g[4];
147 |   int32_t g5 = g[5];
148 |   int32_t g6 = g[6];
149 |   int32_t g7 = g[7];
150 |   int32_t g8 = g[8];
151 |   int32_t g9 = g[9];
152 |   int32_t x0 = f0 ^ g0;
153 |   int32_t x1 = f1 ^ g1;
154 |   int32_t x2 = f2 ^ g2;
155 |   int32_t x3 = f3 ^ g3;
156 |   int32_t x4 = f4 ^ g4;
157 |   int32_t x5 = f5 ^ g5;
158 |   int32_t x6 = f6 ^ g6;
159 |   int32_t x7 = f7 ^ g7;
160 |   int32_t x8 = f8 ^ g8;
161 |   int32_t x9 = f9 ^ g9;
162 |   b = -b;
163 |   x0 &= b;
164 |   x1 &= b;
165 |   x2 &= b;
166 |   x3 &= b;
167 |   x4 &= b;
168 |   x5 &= b;
169 |   x6 &= b;
170 |   x7 &= b;
171 |   x8 &= b;
172 |   x9 &= b;
173 |   f[0] = f0 ^ x0;
174 |   f[1] = f1 ^ x1;
175 |   f[2] = f2 ^ x2;
176 |   f[3] = f3 ^ x3;
177 |   f[4] = f4 ^ x4;
178 |   f[5] = f5 ^ x5;
179 |   f[6] = f6 ^ x6;
180 |   f[7] = f7 ^ x7;
181 |   f[8] = f8 ^ x8;
182 |   f[9] = f9 ^ x9;
183 |   g[0] = g0 ^ x0;
184 |   g[1] = g1 ^ x1;
185 |   g[2] = g2 ^ x2;
186 |   g[3] = g3 ^ x3;
187 |   g[4] = g4 ^ x4;
188 |   g[5] = g5 ^ x5;
189 |   g[6] = g6 ^ x6;
190 |   g[7] = g7 ^ x7;
191 |   g[8] = g8 ^ x8;
192 |   g[9] = g9 ^ x9;
193 | }
194 | 
195 | static uint64_t load_3(const unsigned char *in)
196 | {
197 |   uint64_t result;
198 |   result = (uint64_t) in[0];
199 |   result |= ((uint64_t) in[1]) << 8;
200 |   result |= ((uint64_t) in[2]) << 16;
201 |   return result;
202 | }
203 | 
204 | static uint64_t load_4(const unsigned char *in)
205 | {
206 |   uint64_t result;
207 |   result = (uint64_t) in[0];
208 |   result |= ((uint64_t) in[1]) << 8;
209 |   result |= ((uint64_t) in[2]) << 16;
210 |   result |= ((uint64_t) in[3]) << 24;
211 |   return result;
212 | }
213 | 
214 | void fe_frombytes(fe h,const unsigned char *s)
215 | {
216 |   int64_t h0 = load_4(s);
217 |   int64_t h1 = load_3(s + 4) << 6;
218 |   int64_t h2 = load_3(s + 7) << 5;
219 |   int64_t h3 = load_3(s + 10) << 3;
220 |   int64_t h4 = load_3(s + 13) << 2;
221 |   int64_t h5 = load_4(s + 16);
222 |   int64_t h6 = load_3(s + 20) << 7;
223 |   int64_t h7 = load_3(s + 23) << 5;
224 |   int64_t h8 = load_3(s + 26) << 4;
225 |   int64_t h9 = (load_3(s + 29) & 8388607) << 2;
226 |   int64_t carry0;
227 |   int64_t carry1;
228 |   int64_t carry2;
229 |   int64_t carry3;
230 |   int64_t carry4;
231 |   int64_t carry5;
232 |   int64_t carry6;
233 |   int64_t carry7;
234 |   int64_t carry8;
235 |   int64_t carry9;
236 | 
237 |   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
238 |   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
239 |   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
240 |   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
241 |   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
242 | 
243 |   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
244 |   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
245 |   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
246 |   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
247 |   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
248 | 
249 |   h[0] = (int32_t)h0;
250 |   h[1] = (int32_t)h1;
251 |   h[2] = (int32_t)h2;
252 |   h[3] = (int32_t)h3;
253 |   h[4] = (int32_t)h4;
254 |   h[5] = (int32_t)h5;
255 |   h[6] = (int32_t)h6;
256 |   h[7] = (int32_t)h7;
257 |   h[8] = (int32_t)h8;
258 |   h[9] = (int32_t)h9;
259 | }
260 | 
261 | void fe_invert(fe out,fe z)
262 | {
263 |   fe t0;
264 |   fe t1;
265 |   fe t2;
266 |   fe t3;
267 |   int i;
268 | 
269 | #include "pow225521.h"
270 | 
271 |   return;
272 | }
273 | 
274 | /*
275 | h = f * g
276 | Can overlap h with f or g.
277 | 
278 | Preconditions:
279 |    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
280 |    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
281 | 
282 | Postconditions:
283 |    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
284 | */
285 | 
286 | /*
287 | Notes on implementation strategy:
288 | 
289 | Using schoolbook multiplication.
290 | Karatsuba would save a little in some cost models.
291 | 
292 | Most multiplications by 2 and 19 are 32-bit precomputations;
293 | cheaper than 64-bit postcomputations.
294 | 
295 | There is one remaining multiplication by 19 in the carry chain;
296 | one *19 precomputation can be merged into this,
297 | but the resulting data flow is considerably less clean.
298 | 
299 | There are 12 carries below.
300 | 10 of them are 2-way parallelizable and vectorizable.
301 | Can get away with 11 carries, but then data flow is much deeper.
302 | 
303 | With tighter constraints on inputs can squeeze carries into int32.
304 | */
305 | 
306 | void fe_mul(fe h,fe f,fe g)
307 | {
308 |   int32_t f0 = f[0];
309 |   int32_t f1 = f[1];
310 |   int32_t f2 = f[2];
311 |   int32_t f3 = f[3];
312 |   int32_t f4 = f[4];
313 |   int32_t f5 = f[5];
314 |   int32_t f6 = f[6];
315 |   int32_t f7 = f[7];
316 |   int32_t f8 = f[8];
317 |   int32_t f9 = f[9];
318 |   int32_t g0 = g[0];
319 |   int32_t g1 = g[1];
320 |   int32_t g2 = g[2];
321 |   int32_t g3 = g[3];
322 |   int32_t g4 = g[4];
323 |   int32_t g5 = g[5];
324 |   int32_t g6 = g[6];
325 |   int32_t g7 = g[7];
326 |   int32_t g8 = g[8];
327 |   int32_t g9 = g[9];
328 |   int32_t g1_19 = 19 * g1; /* 1.4*2^29 */
329 |   int32_t g2_19 = 19 * g2; /* 1.4*2^30; still ok */
330 |   int32_t g3_19 = 19 * g3;
331 |   int32_t g4_19 = 19 * g4;
332 |   int32_t g5_19 = 19 * g5;
333 |   int32_t g6_19 = 19 * g6;
334 |   int32_t g7_19 = 19 * g7;
335 |   int32_t g8_19 = 19 * g8;
336 |   int32_t g9_19 = 19 * g9;
337 |   int32_t f1_2 = 2 * f1;
338 |   int32_t f3_2 = 2 * f3;
339 |   int32_t f5_2 = 2 * f5;
340 |   int32_t f7_2 = 2 * f7;
341 |   int32_t f9_2 = 2 * f9;
342 |   int64_t f0g0    = f0   * (int64_t) g0;
343 |   int64_t f0g1    = f0   * (int64_t) g1;
344 |   int64_t f0g2    = f0   * (int64_t) g2;
345 |   int64_t f0g3    = f0   * (int64_t) g3;
346 |   int64_t f0g4    = f0   * (int64_t) g4;
347 |   int64_t f0g5    = f0   * (int64_t) g5;
348 |   int64_t f0g6    = f0   * (int64_t) g6;
349 |   int64_t f0g7    = f0   * (int64_t) g7;
350 |   int64_t f0g8    = f0   * (int64_t) g8;
351 |   int64_t f0g9    = f0   * (int64_t) g9;
352 |   int64_t f1g0    = f1   * (int64_t) g0;
353 |   int64_t f1g1_2  = f1_2 * (int64_t) g1;
354 |   int64_t f1g2    = f1   * (int64_t) g2;
355 |   int64_t f1g3_2  = f1_2 * (int64_t) g3;
356 |   int64_t f1g4    = f1   * (int64_t) g4;
357 |   int64_t f1g5_2  = f1_2 * (int64_t) g5;
358 |   int64_t f1g6    = f1   * (int64_t) g6;
359 |   int64_t f1g7_2  = f1_2 * (int64_t) g7;
360 |   int64_t f1g8    = f1   * (int64_t) g8;
361 |   int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
362 |   int64_t f2g0    = f2   * (int64_t) g0;
363 |   int64_t f2g1    = f2   * (int64_t) g1;
364 |   int64_t f2g2    = f2   * (int64_t) g2;
365 |   int64_t f2g3    = f2   * (int64_t) g3;
366 |   int64_t f2g4    = f2   * (int64_t) g4;
367 |   int64_t f2g5    = f2   * (int64_t) g5;
368 |   int64_t f2g6    = f2   * (int64_t) g6;
369 |   int64_t f2g7    = f2   * (int64_t) g7;
370 |   int64_t f2g8_19 = f2   * (int64_t) g8_19;
371 |   int64_t f2g9_19 = f2   * (int64_t) g9_19;
372 |   int64_t f3g0    = f3   * (int64_t) g0;
373 |   int64_t f3g1_2  = f3_2 * (int64_t) g1;
374 |   int64_t f3g2    = f3   * (int64_t) g2;
375 |   int64_t f3g3_2  = f3_2 * (int64_t) g3;
376 |   int64_t f3g4    = f3   * (int64_t) g4;
377 |   int64_t f3g5_2  = f3_2 * (int64_t) g5;
378 |   int64_t f3g6    = f3   * (int64_t) g6;
379 |   int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
380 |   int64_t f3g8_19 = f3   * (int64_t) g8_19;
381 |   int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
382 |   int64_t f4g0    = f4   * (int64_t) g0;
383 |   int64_t f4g1    = f4   * (int64_t) g1;
384 |   int64_t f4g2    = f4   * (int64_t) g2;
385 |   int64_t f4g3    = f4   * (int64_t) g3;
386 |   int64_t f4g4    = f4   * (int64_t) g4;
387 |   int64_t f4g5    = f4   * (int64_t) g5;
388 |   int64_t f4g6_19 = f4   * (int64_t) g6_19;
389 |   int64_t f4g7_19 = f4   * (int64_t) g7_19;
390 |   int64_t f4g8_19 = f4   * (int64_t) g8_19;
391 |   int64_t f4g9_19 = f4   * (int64_t) g9_19;
392 |   int64_t f5g0    = f5   * (int64_t) g0;
393 |   int64_t f5g1_2  = f5_2 * (int64_t) g1;
394 |   int64_t f5g2    = f5   * (int64_t) g2;
395 |   int64_t f5g3_2  = f5_2 * (int64_t) g3;
396 |   int64_t f5g4    = f5   * (int64_t) g4;
397 |   int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
398 |   int64_t f5g6_19 = f5   * (int64_t) g6_19;
399 |   int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
400 |   int64_t f5g8_19 = f5   * (int64_t) g8_19;
401 |   int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
402 |   int64_t f6g0    = f6   * (int64_t) g0;
403 |   int64_t f6g1    = f6   * (int64_t) g1;
404 |   int64_t f6g2    = f6   * (int64_t) g2;
405 |   int64_t f6g3    = f6   * (int64_t) g3;
406 |   int64_t f6g4_19 = f6   * (int64_t) g4_19;
407 |   int64_t f6g5_19 = f6   * (int64_t) g5_19;
408 |   int64_t f6g6_19 = f6   * (int64_t) g6_19;
409 |   int64_t f6g7_19 = f6   * (int64_t) g7_19;
410 |   int64_t f6g8_19 = f6   * (int64_t) g8_19;
411 |   int64_t f6g9_19 = f6   * (int64_t) g9_19;
412 |   int64_t f7g0    = f7   * (int64_t) g0;
413 |   int64_t f7g1_2  = f7_2 * (int64_t) g1;
414 |   int64_t f7g2    = f7   * (int64_t) g2;
415 |   int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
416 |   int64_t f7g4_19 = f7   * (int64_t) g4_19;
417 |   int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
418 |   int64_t f7g6_19 = f7   * (int64_t) g6_19;
419 |   int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
420 |   int64_t f7g8_19 = f7   * (int64_t) g8_19;
421 |   int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
422 |   int64_t f8g0    = f8   * (int64_t) g0;
423 |   int64_t f8g1    = f8   * (int64_t) g1;
424 |   int64_t f8g2_19 = f8   * (int64_t) g2_19;
425 |   int64_t f8g3_19 = f8   * (int64_t) g3_19;
426 |   int64_t f8g4_19 = f8   * (int64_t) g4_19;
427 |   int64_t f8g5_19 = f8   * (int64_t) g5_19;
428 |   int64_t f8g6_19 = f8   * (int64_t) g6_19;
429 |   int64_t f8g7_19 = f8   * (int64_t) g7_19;
430 |   int64_t f8g8_19 = f8   * (int64_t) g8_19;
431 |   int64_t f8g9_19 = f8   * (int64_t) g9_19;
432 |   int64_t f9g0    = f9   * (int64_t) g0;
433 |   int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
434 |   int64_t f9g2_19 = f9   * (int64_t) g2_19;
435 |   int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
436 |   int64_t f9g4_19 = f9   * (int64_t) g4_19;
437 |   int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
438 |   int64_t f9g6_19 = f9   * (int64_t) g6_19;
439 |   int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
440 |   int64_t f9g8_19 = f9   * (int64_t) g8_19;
441 |   int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
442 |   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
443 |   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
444 |   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
445 |   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
446 |   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
447 |   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
448 |   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
449 |   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
450 |   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
451 |   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
452 |   int64_t carry0;
453 |   int64_t carry1;
454 |   int64_t carry2;
455 |   int64_t carry3;
456 |   int64_t carry4;
457 |   int64_t carry5;
458 |   int64_t carry6;
459 |   int64_t carry7;
460 |   int64_t carry8;
461 |   int64_t carry9;
462 | 
463 |   /*
464 |   |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
465 |     i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
466 |   |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
467 |     i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
468 |   */
469 | 
470 |   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
471 |   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
472 |   /* |h0| <= 2^25 */
473 |   /* |h4| <= 2^25 */
474 |   /* |h1| <= 1.51*2^58 */
475 |   /* |h5| <= 1.51*2^58 */
476 | 
477 |   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
478 |   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
479 |   /* |h1| <= 2^24; from now on fits into int32 */
480 |   /* |h5| <= 2^24; from now on fits into int32 */
481 |   /* |h2| <= 1.21*2^59 */
482 |   /* |h6| <= 1.21*2^59 */
483 | 
484 |   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
485 |   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
486 |   /* |h2| <= 2^25; from now on fits into int32 unchanged */
487 |   /* |h6| <= 2^25; from now on fits into int32 unchanged */
488 |   /* |h3| <= 1.51*2^58 */
489 |   /* |h7| <= 1.51*2^58 */
490 | 
491 |   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
492 |   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
493 |   /* |h3| <= 2^24; from now on fits into int32 unchanged */
494 |   /* |h7| <= 2^24; from now on fits into int32 unchanged */
495 |   /* |h4| <= 1.52*2^33 */
496 |   /* |h8| <= 1.52*2^33 */
497 | 
498 |   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
499 |   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
500 |   /* |h4| <= 2^25; from now on fits into int32 unchanged */
501 |   /* |h8| <= 2^25; from now on fits into int32 unchanged */
502 |   /* |h5| <= 1.01*2^24 */
503 |   /* |h9| <= 1.51*2^58 */
504 | 
505 |   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
506 |   /* |h9| <= 2^24; from now on fits into int32 unchanged */
507 |   /* |h0| <= 1.8*2^37 */
508 | 
509 |   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
510 |   /* |h0| <= 2^25; from now on fits into int32 unchanged */
511 |   /* |h1| <= 1.01*2^24 */
512 | 
513 |   h[0] = (int32_t)h0;
514 |   h[1] = (int32_t)h1;
515 |   h[2] = (int32_t)h2;
516 |   h[3] = (int32_t)h3;
517 |   h[4] = (int32_t)h4;
518 |   h[5] = (int32_t)h5;
519 |   h[6] = (int32_t)h6;
520 |   h[7] = (int32_t)h7;
521 |   h[8] = (int32_t)h8;
522 |   h[9] = (int32_t)h9;
523 | }
524 | 
525 | /*
526 | h = f * 121666
527 | Can overlap h with f.
528 | 
529 | Preconditions:
530 |    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
531 | 
532 | Postconditions:
533 |    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
534 | */
535 | 
536 | void fe_mul121666(fe h,fe f)
537 | {
538 |   int32_t f0 = f[0];
539 |   int32_t f1 = f[1];
540 |   int32_t f2 = f[2];
541 |   int32_t f3 = f[3];
542 |   int32_t f4 = f[4];
543 |   int32_t f5 = f[5];
544 |   int32_t f6 = f[6];
545 |   int32_t f7 = f[7];
546 |   int32_t f8 = f[8];
547 |   int32_t f9 = f[9];
548 |   int64_t h0 = f0 * (int64_t) 121666;
549 |   int64_t h1 = f1 * (int64_t) 121666;
550 |   int64_t h2 = f2 * (int64_t) 121666;
551 |   int64_t h3 = f3 * (int64_t) 121666;
552 |   int64_t h4 = f4 * (int64_t) 121666;
553 |   int64_t h5 = f5 * (int64_t) 121666;
554 |   int64_t h6 = f6 * (int64_t) 121666;
555 |   int64_t h7 = f7 * (int64_t) 121666;
556 |   int64_t h8 = f8 * (int64_t) 121666;
557 |   int64_t h9 = f9 * (int64_t) 121666;
558 |   int64_t carry0;
559 |   int64_t carry1;
560 |   int64_t carry2;
561 |   int64_t carry3;
562 |   int64_t carry4;
563 |   int64_t carry5;
564 |   int64_t carry6;
565 |   int64_t carry7;
566 |   int64_t carry8;
567 |   int64_t carry9;
568 | 
569 |   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
570 |   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
571 |   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
572 |   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
573 |   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
574 | 
575 |   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
576 |   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
577 |   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
578 |   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
579 |   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
580 | 
581 |   h[0] = (int32_t)h0;
582 |   h[1] = (int32_t)h1;
583 |   h[2] = (int32_t)h2;
584 |   h[3] = (int32_t)h3;
585 |   h[4] = (int32_t)h4;
586 |   h[5] = (int32_t)h5;
587 |   h[6] = (int32_t)h6;
588 |   h[7] = (int32_t)h7;
589 |   h[8] = (int32_t)h8;
590 |   h[9] = (int32_t)h9;
591 | }
592 | 
593 | /*
594 | h = f * f
595 | Can overlap h with f.
596 | 
597 | Preconditions:
598 |    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
599 | 
600 | Postconditions:
601 |    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
602 | */
603 | 
604 | /*
605 | See fe_mul.c for discussion of implementation strategy.
606 | */
607 | 
608 | void fe_sq(fe h,fe f)
609 | {
610 |   int32_t f0 = f[0];
611 |   int32_t f1 = f[1];
612 |   int32_t f2 = f[2];
613 |   int32_t f3 = f[3];
614 |   int32_t f4 = f[4];
615 |   int32_t f5 = f[5];
616 |   int32_t f6 = f[6];
617 |   int32_t f7 = f[7];
618 |   int32_t f8 = f[8];
619 |   int32_t f9 = f[9];
620 |   int32_t f0_2 = 2 * f0;
621 |   int32_t f1_2 = 2 * f1;
622 |   int32_t f2_2 = 2 * f2;
623 |   int32_t f3_2 = 2 * f3;
624 |   int32_t f4_2 = 2 * f4;
625 |   int32_t f5_2 = 2 * f5;
626 |   int32_t f6_2 = 2 * f6;
627 |   int32_t f7_2 = 2 * f7;
628 |   int32_t f5_38 = 38 * f5; /* 1.31*2^30 */
629 |   int32_t f6_19 = 19 * f6; /* 1.31*2^30 */
630 |   int32_t f7_38 = 38 * f7; /* 1.31*2^30 */
631 |   int32_t f8_19 = 19 * f8; /* 1.31*2^30 */
632 |   int32_t f9_38 = 38 * f9; /* 1.31*2^30 */
633 |   int64_t f0f0    = f0   * (int64_t) f0;
634 |   int64_t f0f1_2  = f0_2 * (int64_t) f1;
635 |   int64_t f0f2_2  = f0_2 * (int64_t) f2;
636 |   int64_t f0f3_2  = f0_2 * (int64_t) f3;
637 |   int64_t f0f4_2  = f0_2 * (int64_t) f4;
638 |   int64_t f0f5_2  = f0_2 * (int64_t) f5;
639 |   int64_t f0f6_2  = f0_2 * (int64_t) f6;
640 |   int64_t f0f7_2  = f0_2 * (int64_t) f7;
641 |   int64_t f0f8_2  = f0_2 * (int64_t) f8;
642 |   int64_t f0f9_2  = f0_2 * (int64_t) f9;
643 |   int64_t f1f1_2  = f1_2 * (int64_t) f1;
644 |   int64_t f1f2_2  = f1_2 * (int64_t) f2;
645 |   int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
646 |   int64_t f1f4_2  = f1_2 * (int64_t) f4;
647 |   int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
648 |   int64_t f1f6_2  = f1_2 * (int64_t) f6;
649 |   int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
650 |   int64_t f1f8_2  = f1_2 * (int64_t) f8;
651 |   int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
652 |   int64_t f2f2    = f2   * (int64_t) f2;
653 |   int64_t f2f3_2  = f2_2 * (int64_t) f3;
654 |   int64_t f2f4_2  = f2_2 * (int64_t) f4;
655 |   int64_t f2f5_2  = f2_2 * (int64_t) f5;
656 |   int64_t f2f6_2  = f2_2 * (int64_t) f6;
657 |   int64_t f2f7_2  = f2_2 * (int64_t) f7;
658 |   int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
659 |   int64_t f2f9_38 = f2   * (int64_t) f9_38;
660 |   int64_t f3f3_2  = f3_2 * (int64_t) f3;
661 |   int64_t f3f4_2  = f3_2 * (int64_t) f4;
662 |   int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
663 |   int64_t f3f6_2  = f3_2 * (int64_t) f6;
664 |   int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
665 |   int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
666 |   int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
667 |   int64_t f4f4    = f4   * (int64_t) f4;
668 |   int64_t f4f5_2  = f4_2 * (int64_t) f5;
669 |   int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
670 |   int64_t f4f7_38 = f4   * (int64_t) f7_38;
671 |   int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
672 |   int64_t f4f9_38 = f4   * (int64_t) f9_38;
673 |   int64_t f5f5_38 = f5   * (int64_t) f5_38;
674 |   int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
675 |   int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
676 |   int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
677 |   int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
678 |   int64_t f6f6_19 = f6   * (int64_t) f6_19;
679 |   int64_t f6f7_38 = f6   * (int64_t) f7_38;
680 |   int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
681 |   int64_t f6f9_38 = f6   * (int64_t) f9_38;
682 |   int64_t f7f7_38 = f7   * (int64_t) f7_38;
683 |   int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
684 |   int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
685 |   int64_t f8f8_19 = f8   * (int64_t) f8_19;
686 |   int64_t f8f9_38 = f8   * (int64_t) f9_38;
687 |   int64_t f9f9_38 = f9   * (int64_t) f9_38;
688 |   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
689 |   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
690 |   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
691 |   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
692 |   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
693 |   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
694 |   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
695 |   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
696 |   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
697 |   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
698 |   int64_t carry0;
699 |   int64_t carry1;
700 |   int64_t carry2;
701 |   int64_t carry3;
702 |   int64_t carry4;
703 |   int64_t carry5;
704 |   int64_t carry6;
705 |   int64_t carry7;
706 |   int64_t carry8;
707 |   int64_t carry9;
708 | 
709 |   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
710 |   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
711 | 
712 |   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
713 |   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
714 | 
715 |   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
716 |   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
717 | 
718 |   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
719 |   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
720 | 
721 |   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
722 |   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
723 | 
724 |   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
725 | 
726 |   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
727 | 
728 |   h[0] = (int32_t)h0;
729 |   h[1] = (int32_t)h1;
730 |   h[2] = (int32_t)h2;
731 |   h[3] = (int32_t)h3;
732 |   h[4] = (int32_t)h4;
733 |   h[5] = (int32_t)h5;
734 |   h[6] = (int32_t)h6;
735 |   h[7] = (int32_t)h7;
736 |   h[8] = (int32_t)h8;
737 |   h[9] = (int32_t)h9;
738 | }
739 | 
740 | /*
741 | h = f - g
742 | Can overlap h with f or g.
743 | 
744 | Preconditions:
745 |    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
746 |    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
747 | 
748 | Postconditions:
749 |    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
750 | */
751 | 
752 | void fe_sub(fe h,fe f,fe g)
753 | {
754 |   int32_t f0 = f[0];
755 |   int32_t f1 = f[1];
756 |   int32_t f2 = f[2];
757 |   int32_t f3 = f[3];
758 |   int32_t f4 = f[4];
759 |   int32_t f5 = f[5];
760 |   int32_t f6 = f[6];
761 |   int32_t f7 = f[7];
762 |   int32_t f8 = f[8];
763 |   int32_t f9 = f[9];
764 |   int32_t g0 = g[0];
765 |   int32_t g1 = g[1];
766 |   int32_t g2 = g[2];
767 |   int32_t g3 = g[3];
768 |   int32_t g4 = g[4];
769 |   int32_t g5 = g[5];
770 |   int32_t g6 = g[6];
771 |   int32_t g7 = g[7];
772 |   int32_t g8 = g[8];
773 |   int32_t g9 = g[9];
774 |   int32_t h0 = f0 - g0;
775 |   int32_t h1 = f1 - g1;
776 |   int32_t h2 = f2 - g2;
777 |   int32_t h3 = f3 - g3;
778 |   int32_t h4 = f4 - g4;
779 |   int32_t h5 = f5 - g5;
780 |   int32_t h6 = f6 - g6;
781 |   int32_t h7 = f7 - g7;
782 |   int32_t h8 = f8 - g8;
783 |   int32_t h9 = f9 - g9;
784 |   h[0] = h0;
785 |   h[1] = h1;
786 |   h[2] = h2;
787 |   h[3] = h3;
788 |   h[4] = h4;
789 |   h[5] = h5;
790 |   h[6] = h6;
791 |   h[7] = h7;
792 |   h[8] = h8;
793 |   h[9] = h9;
794 | }
795 | 
796 | /*
797 | Preconditions:
798 |   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
799 | 
800 | Write p=2^255-19; q=floor(h/p).
801 | Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
802 | 
803 | Proof:
804 |   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
805 |   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
806 | 
807 |   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
808 |   Then 0<y<1.
809 | 
810 |   Write r=h-pq.
811 |   Have 0<=r<=p-1=2^255-20.
812 |   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
813 | 
814 |   Write x=r+19(2^-255)r+y.
815 |   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
816 | 
817 |   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
818 |   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
819 | */
820 | 
821 | void fe_tobytes(unsigned char *s,fe h)
822 | {
823 |   int32_t h0 = h[0];
824 |   int32_t h1 = h[1];
825 |   int32_t h2 = h[2];
826 |   int32_t h3 = h[3];
827 |   int32_t h4 = h[4];
828 |   int32_t h5 = h[5];
829 |   int32_t h6 = h[6];
830 |   int32_t h7 = h[7];
831 |   int32_t h8 = h[8];
832 |   int32_t h9 = h[9];
833 |   int32_t q;
834 |   int32_t carry0;
835 |   int32_t carry1;
836 |   int32_t carry2;
837 |   int32_t carry3;
838 |   int32_t carry4;
839 |   int32_t carry5;
840 |   int32_t carry6;
841 |   int32_t carry7;
842 |   int32_t carry8;
843 |   int32_t carry9;
844 | 
845 |   q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
846 |   q = (h0 + q) >> 26;
847 |   q = (h1 + q) >> 25;
848 |   q = (h2 + q) >> 26;
849 |   q = (h3 + q) >> 25;
850 |   q = (h4 + q) >> 26;
851 |   q = (h5 + q) >> 25;
852 |   q = (h6 + q) >> 26;
853 |   q = (h7 + q) >> 25;
854 |   q = (h8 + q) >> 26;
855 |   q = (h9 + q) >> 25;
856 | 
857 |   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
858 |   h0 += 19 * q;
859 |   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
860 | 
861 |   carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
862 |   carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
863 |   carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
864 |   carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
865 |   carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
866 |   carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
867 |   carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
868 |   carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
869 |   carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
870 |   carry9 = h9 >> 25;               h9 -= carry9 << 25;
871 |                   /* h10 = carry9 */
872 | 
873 |   /*
874 |   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
875 |   Have h0+...+2^230 h9 between 0 and 2^255-1;
876 |   evidently 2^255 h10-2^255 q = 0.
877 |   Goal: Output h0+...+2^230 h9.
878 |   */
879 | 
880 |   s[0] = h0 >> 0;
881 |   s[1] = h0 >> 8;
882 |   s[2] = h0 >> 16;
883 |   s[3] = (h0 >> 24) | (h1 << 2);
884 |   s[4] = h1 >> 6;
885 |   s[5] = h1 >> 14;
886 |   s[6] = (h1 >> 22) | (h2 << 3);
887 |   s[7] = h2 >> 5;
888 |   s[8] = h2 >> 13;
889 |   s[9] = (h2 >> 21) | (h3 << 5);
890 |   s[10] = h3 >> 3;
891 |   s[11] = h3 >> 11;
892 |   s[12] = (h3 >> 19) | (h4 << 6);
893 |   s[13] = h4 >> 2;
894 |   s[14] = h4 >> 10;
895 |   s[15] = h4 >> 18;
896 |   s[16] = h5 >> 0;
897 |   s[17] = h5 >> 8;
898 |   s[18] = h5 >> 16;
899 |   s[19] = (h5 >> 24) | (h6 << 1);
900 |   s[20] = h6 >> 7;
901 |   s[21] = h6 >> 15;
902 |   s[22] = (h6 >> 23) | (h7 << 3);
903 |   s[23] = h7 >> 5;
904 |   s[24] = h7 >> 13;
905 |   s[25] = (h7 >> 21) | (h8 << 4);
906 |   s[26] = h8 >> 4;
907 |   s[27] = h8 >> 12;
908 |   s[28] = (h8 >> 20) | (h9 << 6);
909 |   s[29] = h9 >> 2;
910 |   s[30] = h9 >> 10;
911 |   s[31] = h9 >> 18;
912 | }
913 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/fe.h:
--------------------------------------------------------------------------------
 1 | #ifndef FE_H
 2 | #define FE_H
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | typedef int32_t fe[10];
 7 | 
 8 | /*
 9 | fe means field element.
10 | Here the field is \Z/(2^255-19).
11 | An element t, entries t[0]...t[9], represents the integer
12 | t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
13 | Bounds on each t[i] vary depending on context.
14 | */
15 | 
16 | #define fe_frombytes crypto_scalarmult_curve25519_ref10_fe_frombytes
17 | #define fe_tobytes crypto_scalarmult_curve25519_ref10_fe_tobytes
18 | #define fe_copy crypto_scalarmult_curve25519_ref10_fe_copy
19 | #define fe_0 crypto_scalarmult_curve25519_ref10_fe_0
20 | #define fe_1 crypto_scalarmult_curve25519_ref10_fe_1
21 | #define fe_cswap crypto_scalarmult_curve25519_ref10_fe_cswap
22 | #define fe_add crypto_scalarmult_curve25519_ref10_fe_add
23 | #define fe_sub crypto_scalarmult_curve25519_ref10_fe_sub
24 | #define fe_mul crypto_scalarmult_curve25519_ref10_fe_mul
25 | #define fe_sq crypto_scalarmult_curve25519_ref10_fe_sq
26 | #define fe_mul121666 crypto_scalarmult_curve25519_ref10_fe_mul121666
27 | #define fe_invert crypto_scalarmult_curve25519_ref10_fe_invert
28 | 
29 | extern void fe_frombytes(fe,const unsigned char *);
30 | extern void fe_tobytes(unsigned char *,fe);
31 | 
32 | extern void fe_copy(fe,fe);
33 | extern void fe_0(fe);
34 | extern void fe_1(fe);
35 | extern void fe_cswap(fe,fe,unsigned int);
36 | 
37 | extern void fe_add(fe,fe,fe);
38 | extern void fe_sub(fe,fe,fe);
39 | extern void fe_mul(fe,fe,fe);
40 | extern void fe_sq(fe,fe);
41 | extern void fe_mul121666(fe,fe);
42 | extern void fe_invert(fe,fe);
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/montgomery.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /* qhasm: fe X2 */
  3 | 
  4 | /* qhasm: fe Z2 */
  5 | 
  6 | /* qhasm: fe X3 */
  7 | 
  8 | /* qhasm: fe Z3 */
  9 | 
 10 | /* qhasm: fe X4 */
 11 | 
 12 | /* qhasm: fe Z4 */
 13 | 
 14 | /* qhasm: fe X5 */
 15 | 
 16 | /* qhasm: fe Z5 */
 17 | 
 18 | /* qhasm: fe A */
 19 | 
 20 | /* qhasm: fe B */
 21 | 
 22 | /* qhasm: fe C */
 23 | 
 24 | /* qhasm: fe D */
 25 | 
 26 | /* qhasm: fe E */
 27 | 
 28 | /* qhasm: fe AA */
 29 | 
 30 | /* qhasm: fe BB */
 31 | 
 32 | /* qhasm: fe DA */
 33 | 
 34 | /* qhasm: fe CB */
 35 | 
 36 | /* qhasm: fe t0 */
 37 | 
 38 | /* qhasm: fe t1 */
 39 | 
 40 | /* qhasm: fe t2 */
 41 | 
 42 | /* qhasm: fe t3 */
 43 | 
 44 | /* qhasm: fe t4 */
 45 | 
 46 | /* qhasm: enter ladder */
 47 | 
 48 | /* qhasm: D = X3-Z3 */
 49 | /* asm 1: fe_sub(>D=fe#5,<X3=fe#3,<Z3=fe#4); */
 50 | /* asm 2: fe_sub(>D=tmp0,<X3=x3,<Z3=z3); */
 51 | fe_sub(tmp0,x3,z3);
 52 | 
 53 | /* qhasm: B = X2-Z2 */
 54 | /* asm 1: fe_sub(>B=fe#6,<X2=fe#1,<Z2=fe#2); */
 55 | /* asm 2: fe_sub(>B=tmp1,<X2=x2,<Z2=z2); */
 56 | fe_sub(tmp1,x2,z2);
 57 | 
 58 | /* qhasm: A = X2+Z2 */
 59 | /* asm 1: fe_add(>A=fe#1,<X2=fe#1,<Z2=fe#2); */
 60 | /* asm 2: fe_add(>A=x2,<X2=x2,<Z2=z2); */
 61 | fe_add(x2,x2,z2);
 62 | 
 63 | /* qhasm: C = X3+Z3 */
 64 | /* asm 1: fe_add(>C=fe#2,<X3=fe#3,<Z3=fe#4); */
 65 | /* asm 2: fe_add(>C=z2,<X3=x3,<Z3=z3); */
 66 | fe_add(z2,x3,z3);
 67 | 
 68 | /* qhasm: DA = D*A */
 69 | /* asm 1: fe_mul(>DA=fe#4,<D=fe#5,<A=fe#1); */
 70 | /* asm 2: fe_mul(>DA=z3,<D=tmp0,<A=x2); */
 71 | fe_mul(z3,tmp0,x2);
 72 | 
 73 | /* qhasm: CB = C*B */
 74 | /* asm 1: fe_mul(>CB=fe#2,<C=fe#2,<B=fe#6); */
 75 | /* asm 2: fe_mul(>CB=z2,<C=z2,<B=tmp1); */
 76 | fe_mul(z2,z2,tmp1);
 77 | 
 78 | /* qhasm: BB = B^2 */
 79 | /* asm 1: fe_sq(>BB=fe#5,<B=fe#6); */
 80 | /* asm 2: fe_sq(>BB=tmp0,<B=tmp1); */
 81 | fe_sq(tmp0,tmp1);
 82 | 
 83 | /* qhasm: AA = A^2 */
 84 | /* asm 1: fe_sq(>AA=fe#6,<A=fe#1); */
 85 | /* asm 2: fe_sq(>AA=tmp1,<A=x2); */
 86 | fe_sq(tmp1,x2);
 87 | 
 88 | /* qhasm: t0 = DA+CB */
 89 | /* asm 1: fe_add(>t0=fe#3,<DA=fe#4,<CB=fe#2); */
 90 | /* asm 2: fe_add(>t0=x3,<DA=z3,<CB=z2); */
 91 | fe_add(x3,z3,z2);
 92 | 
 93 | /* qhasm: assign x3 to t0 */
 94 | 
 95 | /* qhasm: t1 = DA-CB */
 96 | /* asm 1: fe_sub(>t1=fe#2,<DA=fe#4,<CB=fe#2); */
 97 | /* asm 2: fe_sub(>t1=z2,<DA=z3,<CB=z2); */
 98 | fe_sub(z2,z3,z2);
 99 | 
100 | /* qhasm: X4 = AA*BB */
101 | /* asm 1: fe_mul(>X4=fe#1,<AA=fe#6,<BB=fe#5); */
102 | /* asm 2: fe_mul(>X4=x2,<AA=tmp1,<BB=tmp0); */
103 | fe_mul(x2,tmp1,tmp0);
104 | 
105 | /* qhasm: E = AA-BB */
106 | /* asm 1: fe_sub(>E=fe#6,<AA=fe#6,<BB=fe#5); */
107 | /* asm 2: fe_sub(>E=tmp1,<AA=tmp1,<BB=tmp0); */
108 | fe_sub(tmp1,tmp1,tmp0);
109 | 
110 | /* qhasm: t2 = t1^2 */
111 | /* asm 1: fe_sq(>t2=fe#2,<t1=fe#2); */
112 | /* asm 2: fe_sq(>t2=z2,<t1=z2); */
113 | fe_sq(z2,z2);
114 | 
115 | /* qhasm: t3 = a24*E */
116 | /* asm 1: fe_mul121666(>t3=fe#4,<E=fe#6); */
117 | /* asm 2: fe_mul121666(>t3=z3,<E=tmp1); */
118 | fe_mul121666(z3,tmp1);
119 | 
120 | /* qhasm: X5 = t0^2 */
121 | /* asm 1: fe_sq(>X5=fe#3,<t0=fe#3); */
122 | /* asm 2: fe_sq(>X5=x3,<t0=x3); */
123 | fe_sq(x3,x3);
124 | 
125 | /* qhasm: t4 = BB+t3 */
126 | /* asm 1: fe_add(>t4=fe#5,<BB=fe#5,<t3=fe#4); */
127 | /* asm 2: fe_add(>t4=tmp0,<BB=tmp0,<t3=z3); */
128 | fe_add(tmp0,tmp0,z3);
129 | 
130 | /* qhasm: Z5 = X1*t2 */
131 | /* asm 1: fe_mul(>Z5=fe#4,x1,<t2=fe#2); */
132 | /* asm 2: fe_mul(>Z5=z3,x1,<t2=z2); */
133 | fe_mul(z3,x1,z2);
134 | 
135 | /* qhasm: Z4 = E*t4 */
136 | /* asm 1: fe_mul(>Z4=fe#2,<E=fe#6,<t4=fe#5); */
137 | /* asm 2: fe_mul(>Z4=z2,<E=tmp1,<t4=tmp0); */
138 | fe_mul(z2,tmp1,tmp0);
139 | 
140 | /* qhasm: return */
141 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/pow225521.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /* qhasm: fe z1 */
  3 | 
  4 | /* qhasm: fe z2 */
  5 | 
  6 | /* qhasm: fe z8 */
  7 | 
  8 | /* qhasm: fe z9 */
  9 | 
 10 | /* qhasm: fe z11 */
 11 | 
 12 | /* qhasm: fe z22 */
 13 | 
 14 | /* qhasm: fe z_5_0 */
 15 | 
 16 | /* qhasm: fe z_10_5 */
 17 | 
 18 | /* qhasm: fe z_10_0 */
 19 | 
 20 | /* qhasm: fe z_20_10 */
 21 | 
 22 | /* qhasm: fe z_20_0 */
 23 | 
 24 | /* qhasm: fe z_40_20 */
 25 | 
 26 | /* qhasm: fe z_40_0 */
 27 | 
 28 | /* qhasm: fe z_50_10 */
 29 | 
 30 | /* qhasm: fe z_50_0 */
 31 | 
 32 | /* qhasm: fe z_100_50 */
 33 | 
 34 | /* qhasm: fe z_100_0 */
 35 | 
 36 | /* qhasm: fe z_200_100 */
 37 | 
 38 | /* qhasm: fe z_200_0 */
 39 | 
 40 | /* qhasm: fe z_250_50 */
 41 | 
 42 | /* qhasm: fe z_250_0 */
 43 | 
 44 | /* qhasm: fe z_255_5 */
 45 | 
 46 | /* qhasm: fe z_255_21 */
 47 | 
 48 | /* qhasm: enter pow225521 */
 49 | 
 50 | /* qhasm: z2 = z1^2^1 */
 51 | /* asm 1: fe_sq(>z2=fe#1,<z1=fe#11); for (i = 1;i < 1;++i) fe_sq(>z2=fe#1,>z2=fe#1); */
 52 | /* asm 2: fe_sq(>z2=t0,<z1=z); for (i = 1;i < 1;++i) fe_sq(>z2=t0,>z2=t0); */
 53 | fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
 54 | 
 55 | /* qhasm: z8 = z2^2^2 */
 56 | /* asm 1: fe_sq(>z8=fe#2,<z2=fe#1); for (i = 1;i < 2;++i) fe_sq(>z8=fe#2,>z8=fe#2); */
 57 | /* asm 2: fe_sq(>z8=t1,<z2=t0); for (i = 1;i < 2;++i) fe_sq(>z8=t1,>z8=t1); */
 58 | fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
 59 | 
 60 | /* qhasm: z9 = z1*z8 */
 61 | /* asm 1: fe_mul(>z9=fe#2,<z1=fe#11,<z8=fe#2); */
 62 | /* asm 2: fe_mul(>z9=t1,<z1=z,<z8=t1); */
 63 | fe_mul(t1,z,t1);
 64 | 
 65 | /* qhasm: z11 = z2*z9 */
 66 | /* asm 1: fe_mul(>z11=fe#1,<z2=fe#1,<z9=fe#2); */
 67 | /* asm 2: fe_mul(>z11=t0,<z2=t0,<z9=t1); */
 68 | fe_mul(t0,t0,t1);
 69 | 
 70 | /* qhasm: z22 = z11^2^1 */
 71 | /* asm 1: fe_sq(>z22=fe#3,<z11=fe#1); for (i = 1;i < 1;++i) fe_sq(>z22=fe#3,>z22=fe#3); */
 72 | /* asm 2: fe_sq(>z22=t2,<z11=t0); for (i = 1;i < 1;++i) fe_sq(>z22=t2,>z22=t2); */
 73 | fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
 74 | 
 75 | /* qhasm: z_5_0 = z9*z22 */
 76 | /* asm 1: fe_mul(>z_5_0=fe#2,<z9=fe#2,<z22=fe#3); */
 77 | /* asm 2: fe_mul(>z_5_0=t1,<z9=t1,<z22=t2); */
 78 | fe_mul(t1,t1,t2);
 79 | 
 80 | /* qhasm: z_10_5 = z_5_0^2^5 */
 81 | /* asm 1: fe_sq(>z_10_5=fe#3,<z_5_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_10_5=fe#3,>z_10_5=fe#3); */
 82 | /* asm 2: fe_sq(>z_10_5=t2,<z_5_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_10_5=t2,>z_10_5=t2); */
 83 | fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
 84 | 
 85 | /* qhasm: z_10_0 = z_10_5*z_5_0 */
 86 | /* asm 1: fe_mul(>z_10_0=fe#2,<z_10_5=fe#3,<z_5_0=fe#2); */
 87 | /* asm 2: fe_mul(>z_10_0=t1,<z_10_5=t2,<z_5_0=t1); */
 88 | fe_mul(t1,t2,t1);
 89 | 
 90 | /* qhasm: z_20_10 = z_10_0^2^10 */
 91 | /* asm 1: fe_sq(>z_20_10=fe#3,<z_10_0=fe#2); for (i = 1;i < 10;++i) fe_sq(>z_20_10=fe#3,>z_20_10=fe#3); */
 92 | /* asm 2: fe_sq(>z_20_10=t2,<z_10_0=t1); for (i = 1;i < 10;++i) fe_sq(>z_20_10=t2,>z_20_10=t2); */
 93 | fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
 94 | 
 95 | /* qhasm: z_20_0 = z_20_10*z_10_0 */
 96 | /* asm 1: fe_mul(>z_20_0=fe#3,<z_20_10=fe#3,<z_10_0=fe#2); */
 97 | /* asm 2: fe_mul(>z_20_0=t2,<z_20_10=t2,<z_10_0=t1); */
 98 | fe_mul(t2,t2,t1);
 99 | 
100 | /* qhasm: z_40_20 = z_20_0^2^20 */
101 | /* asm 1: fe_sq(>z_40_20=fe#4,<z_20_0=fe#3); for (i = 1;i < 20;++i) fe_sq(>z_40_20=fe#4,>z_40_20=fe#4); */
102 | /* asm 2: fe_sq(>z_40_20=t3,<z_20_0=t2); for (i = 1;i < 20;++i) fe_sq(>z_40_20=t3,>z_40_20=t3); */
103 | fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
104 | 
105 | /* qhasm: z_40_0 = z_40_20*z_20_0 */
106 | /* asm 1: fe_mul(>z_40_0=fe#3,<z_40_20=fe#4,<z_20_0=fe#3); */
107 | /* asm 2: fe_mul(>z_40_0=t2,<z_40_20=t3,<z_20_0=t2); */
108 | fe_mul(t2,t3,t2);
109 | 
110 | /* qhasm: z_50_10 = z_40_0^2^10 */
111 | /* asm 1: fe_sq(>z_50_10=fe#3,<z_40_0=fe#3); for (i = 1;i < 10;++i) fe_sq(>z_50_10=fe#3,>z_50_10=fe#3); */
112 | /* asm 2: fe_sq(>z_50_10=t2,<z_40_0=t2); for (i = 1;i < 10;++i) fe_sq(>z_50_10=t2,>z_50_10=t2); */
113 | fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
114 | 
115 | /* qhasm: z_50_0 = z_50_10*z_10_0 */
116 | /* asm 1: fe_mul(>z_50_0=fe#2,<z_50_10=fe#3,<z_10_0=fe#2); */
117 | /* asm 2: fe_mul(>z_50_0=t1,<z_50_10=t2,<z_10_0=t1); */
118 | fe_mul(t1,t2,t1);
119 | 
120 | /* qhasm: z_100_50 = z_50_0^2^50 */
121 | /* asm 1: fe_sq(>z_100_50=fe#3,<z_50_0=fe#2); for (i = 1;i < 50;++i) fe_sq(>z_100_50=fe#3,>z_100_50=fe#3); */
122 | /* asm 2: fe_sq(>z_100_50=t2,<z_50_0=t1); for (i = 1;i < 50;++i) fe_sq(>z_100_50=t2,>z_100_50=t2); */
123 | fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
124 | 
125 | /* qhasm: z_100_0 = z_100_50*z_50_0 */
126 | /* asm 1: fe_mul(>z_100_0=fe#3,<z_100_50=fe#3,<z_50_0=fe#2); */
127 | /* asm 2: fe_mul(>z_100_0=t2,<z_100_50=t2,<z_50_0=t1); */
128 | fe_mul(t2,t2,t1);
129 | 
130 | /* qhasm: z_200_100 = z_100_0^2^100 */
131 | /* asm 1: fe_sq(>z_200_100=fe#4,<z_100_0=fe#3); for (i = 1;i < 100;++i) fe_sq(>z_200_100=fe#4,>z_200_100=fe#4); */
132 | /* asm 2: fe_sq(>z_200_100=t3,<z_100_0=t2); for (i = 1;i < 100;++i) fe_sq(>z_200_100=t3,>z_200_100=t3); */
133 | fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
134 | 
135 | /* qhasm: z_200_0 = z_200_100*z_100_0 */
136 | /* asm 1: fe_mul(>z_200_0=fe#3,<z_200_100=fe#4,<z_100_0=fe#3); */
137 | /* asm 2: fe_mul(>z_200_0=t2,<z_200_100=t3,<z_100_0=t2); */
138 | fe_mul(t2,t3,t2);
139 | 
140 | /* qhasm: z_250_50 = z_200_0^2^50 */
141 | /* asm 1: fe_sq(>z_250_50=fe#3,<z_200_0=fe#3); for (i = 1;i < 50;++i) fe_sq(>z_250_50=fe#3,>z_250_50=fe#3); */
142 | /* asm 2: fe_sq(>z_250_50=t2,<z_200_0=t2); for (i = 1;i < 50;++i) fe_sq(>z_250_50=t2,>z_250_50=t2); */
143 | fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
144 | 
145 | /* qhasm: z_250_0 = z_250_50*z_50_0 */
146 | /* asm 1: fe_mul(>z_250_0=fe#2,<z_250_50=fe#3,<z_50_0=fe#2); */
147 | /* asm 2: fe_mul(>z_250_0=t1,<z_250_50=t2,<z_50_0=t1); */
148 | fe_mul(t1,t2,t1);
149 | 
150 | /* qhasm: z_255_5 = z_250_0^2^5 */
151 | /* asm 1: fe_sq(>z_255_5=fe#2,<z_250_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_255_5=fe#2,>z_255_5=fe#2); */
152 | /* asm 2: fe_sq(>z_255_5=t1,<z_250_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_255_5=t1,>z_255_5=t1); */
153 | fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
154 | 
155 | /* qhasm: z_255_21 = z_255_5*z11 */
156 | /* asm 1: fe_mul(>z_255_21=fe#12,<z_255_5=fe#2,<z11=fe#1); */
157 | /* asm 2: fe_mul(>z_255_21=out,<z_255_5=t1,<z11=t0); */
158 | fe_mul(out,t1,t0);
159 | 
160 | /* qhasm: return */
161 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/scalarmult.c:
--------------------------------------------------------------------------------
 1 | #include "fe.h"
 2 | #include "x25519_ref10.h"
 3 | 
 4 | int x25519_ref10_scalarmult(uint8_t *q, const uint8_t *n, const uint8_t *p)
 5 | {
 6 |   uint8_t e[32];
 7 |   unsigned int i;
 8 |   fe x1;
 9 |   fe x2;
10 |   fe z2;
11 |   fe x3;
12 |   fe z3;
13 |   fe tmp0;
14 |   fe tmp1;
15 |   int pos;
16 |   unsigned int swap;
17 |   unsigned int b;
18 | 
19 |   for (i = 0;i < 32;++i) e[i] = n[i];
20 |   e[0] &= 248;
21 |   e[31] &= 127;
22 |   e[31] |= 64;
23 | 
24 |   fe_frombytes(x1,p);
25 |   fe_1(x2);
26 |   fe_0(z2);
27 |   fe_copy(x3,x1);
28 |   fe_1(z3);
29 | 
30 |   swap = 0;
31 |   for (pos = 254;pos >= 0;--pos) {
32 |     b = e[pos / 8] >> (pos & 7);
33 |     b &= 1;
34 |     swap ^= b;
35 |     fe_cswap(x2,x3,swap);
36 |     fe_cswap(z2,z3,swap);
37 |     swap = b;
38 | #include "montgomery.h"
39 |   }
40 |   fe_cswap(x2,x3,swap);
41 |   fe_cswap(z2,z3,swap);
42 | 
43 |   fe_invert(z2,z2);
44 |   fe_mul(x2,x2,z2);
45 |   fe_tobytes(q,x2);
46 |   return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/x25519_ref10.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Ruby C extension providing bindings to the ref10 implementation of the
 3 | X25519 Diffie-Hellman algorithm
 4 | */
 5 | 
 6 | #include "ruby.h"
 7 | #include "x25519_ref10.h"
 8 | 
 9 | static VALUE mX25519 = Qnil;
10 | static VALUE mX25519_Provider = Qnil;
11 | static VALUE mX25519_Provider_Ref10 = Qnil;
12 | 
13 | static VALUE mX25519_Provider_Ref10_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u);
14 | static VALUE mX25519_Provider_Ref10_scalarmult_base(VALUE self, VALUE scalar);
15 | 
16 | /* Initialize the x25519_ref10 C extension */
17 | void Init_x25519_ref10()
18 | {
19 |     mX25519 = rb_define_module("X25519");
20 |     mX25519_Provider = rb_define_module_under(mX25519, "Provider");
21 |     mX25519_Provider_Ref10 = rb_define_module_under(mX25519_Provider, "Ref10");
22 | 
23 |     rb_define_singleton_method(mX25519_Provider_Ref10, "scalarmult", mX25519_Provider_Ref10_scalarmult, 2);
24 |     rb_define_singleton_method(mX25519_Provider_Ref10, "scalarmult_base", mX25519_Provider_Ref10_scalarmult_base, 1);
25 | }
26 | 
27 | /* Variable-base scalar multiplication */
28 | static VALUE mX25519_Provider_Ref10_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u)
29 | {
30 |     X25519_KEY product;
31 | 
32 |     StringValue(scalar);
33 |     if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) {
34 |         rb_raise(
35 |             rb_eArgError,
36 |             "expected %d-byte scalar, got %ld",
37 |             X25519_KEYSIZE_BYTES,
38 |             RSTRING_LEN(scalar)
39 |         );
40 |     }
41 | 
42 |     StringValue(montgomery_u);
43 |     if(RSTRING_LEN(montgomery_u) != X25519_KEYSIZE_BYTES) {
44 |         rb_raise(
45 |             rb_eArgError,
46 |             "expected %d-byte Montgomery-u coordinate, got %ld",
47 |             X25519_KEYSIZE_BYTES,
48 |             RSTRING_LEN(montgomery_u)
49 |         );
50 |     }
51 | 
52 |     x25519_ref10_scalarmult(
53 |         product,
54 |         (const uint8_t *)RSTRING_PTR(scalar),
55 |         (const uint8_t *)RSTRING_PTR(montgomery_u)
56 |     );
57 | 
58 |     return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES);
59 | }
60 | 
61 | /* Fixed-base scalar multiplication */
62 | static VALUE mX25519_Provider_Ref10_scalarmult_base(VALUE self, VALUE scalar)
63 | {
64 |     X25519_KEY product;
65 | 
66 |     StringValue(scalar);
67 |     if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) {
68 |         rb_raise(
69 |             rb_eArgError,
70 |             "expected %d-byte scalar, got %ld",
71 |             X25519_KEYSIZE_BYTES,
72 |             RSTRING_LEN(scalar)
73 |         );
74 |     }
75 | 
76 |     x25519_ref10_scalarmult_base(
77 |         product,
78 |         (const uint8_t *)RSTRING_PTR(scalar)
79 |     );
80 | 
81 |     return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES);
82 | }
83 | 


--------------------------------------------------------------------------------
/ext/x25519_ref10/x25519_ref10.h:
--------------------------------------------------------------------------------
 1 | #ifndef X25519_REF10_H
 2 | #define X25519_REF10_H
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | #define X25519_KEYSIZE_BYTES 32
 7 | typedef uint8_t X25519_KEY[X25519_KEYSIZE_BYTES];
 8 | 
 9 | /* Fixed-base scalar multiplication */
10 | int x25519_ref10_scalarmult(uint8_t *q, const uint8_t *n, const uint8_t *p);
11 | 
12 | /* Variable-base scalar multiplication */
13 | int x25519_ref10_scalarmult_base(uint8_t *q, const uint8_t *n);
14 | 
15 | #endif /* X25519_REF10_H */
16 | 


--------------------------------------------------------------------------------
/lib/x25519.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require "securerandom"
  4 | 
  5 | require "x25519/version"
  6 | 
  7 | require "x25519/montgomery_u"
  8 | require "x25519/scalar"
  9 | require "x25519/test_vectors"
 10 | 
 11 | # Native extension backends
 12 | require "x25519_ref10"
 13 | begin
 14 |   require "x25519_precomputed"
 15 | rescue LoadError
 16 |   require "x25519/precomputed_not_available"
 17 | end
 18 | 
 19 | # The X25519 elliptic curve Diffie-Hellman algorithm
 20 | module X25519
 21 |   module_function
 22 | 
 23 |   # Size of an X25519 key (public or private) in bytes
 24 |   KEY_SIZE = 32
 25 | 
 26 |   # Raised when we detect a degenerate (i.e. all-zero) public key
 27 |   InvalidKeyError = Class.new(StandardError)
 28 | 
 29 |   # Raised when the built-in self-test fails
 30 |   SelfTestFailure = Class.new(StandardError)
 31 | 
 32 |   class << self
 33 |     # Obtain the backend provider module
 34 |     attr_accessor :provider
 35 |   end
 36 | 
 37 |   # ref10 is the default provider
 38 |   self.provider = X25519::Provider::Ref10
 39 | 
 40 |   # X25519::Precomputed requires a 4th generation Intel Core CPU or newer,
 41 |   # so only enable it if we detect we're on a supported platform. Otherwise,
 42 |   # fall back to the ref10 portable C implementation.
 43 |   self.provider = X25519::Provider::Precomputed if X25519::Provider::Precomputed.available?
 44 | 
 45 |   # Raw fixed-base scalar multiplication function that acts directly on
 46 |   # bytestrings. Calculates the coordinate of the elliptic curve point that
 47 |   # represents the public key for a given scalar.
 48 |   #
 49 |   # @param scalar_bytes [String] a serialized private scalar
 50 |   #
 51 |   # @return [String] compressed Montgomery-u coordinate of the resulting point
 52 |   def calculate_public_key(scalar_bytes)
 53 |     validate_key_bytes(scalar_bytes)
 54 |     provider.scalarmult_base(scalar_bytes)
 55 |   end
 56 | 
 57 |   # Raw Diffie-Hellman function that acts directly on bytestrings. An
 58 |   # alternative to the object-oriented API
 59 |   #
 60 |   # @param scalar_bytes [String] a serialized private scalar
 61 |   # @param montgomery_u_bytes [String] a point we wish to multiply by the scalar
 62 |   #
 63 |   # @return [String] resulting point, serialized as bytes
 64 |   def diffie_hellman(scalar_bytes, montgomery_u_bytes)
 65 |     validate_key_bytes(scalar_bytes)
 66 |     validate_key_bytes(montgomery_u_bytes)
 67 | 
 68 |     # The point located at a Montgomery-u coordinate of zero always returns
 69 |     # the point at zero regardless of which scalar it's multiplied with
 70 |     raise InvalidKeyError, "degenerate public key" if montgomery_u_bytes == ("\0" * KEY_SIZE)
 71 | 
 72 |     provider.scalarmult(scalar_bytes, montgomery_u_bytes)
 73 |   end
 74 | 
 75 |   # Ensure a serialized key meets the requirements
 76 |   def validate_key_bytes(key_bytes)
 77 |     raise TypeError, "expected String, got #{key_bytes.class}" unless key_bytes.is_a?(String)
 78 |     return true if key_bytes.bytesize == KEY_SIZE
 79 | 
 80 |     raise ArgumentError, "expected #{KEY_SIZE}-byte String, got #{key_bytes.bytesize}"
 81 |   end
 82 | 
 83 |   # Perform a self-test to ensure the selected provider is working
 84 |   def self_test
 85 |     X25519::TestVectors::VARIABLE_BASE.each do |v|
 86 |       shared_secret = provider.scalarmult([v.scalar].pack("H*"), [v.input_coord].pack("H*"))
 87 |       raise SelfTestFailure, "self test failed!" unless shared_secret.unpack1("H*") == v.output_coord
 88 |     end
 89 | 
 90 |     X25519::TestVectors::FIXED_BASE.each do |v|
 91 |       public_key = provider.scalarmult_base([v.scalar].pack("H*"))
 92 |       raise SelfTestFailure, "self test failed!" unless public_key.unpack1("H*") == v.output_coord
 93 |     end
 94 | 
 95 |     true
 96 |   end
 97 | end
 98 | 
 99 | # Automatically run self-test when library loads
100 | X25519.self_test
101 | 


--------------------------------------------------------------------------------
/lib/x25519/montgomery_u.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module X25519
 4 |   # X25519 public keys and shared secrets
 5 |   #
 6 |   # Montgomery-u coordinates of points on the elliptic curve used by X25519
 7 |   # (a.k.a. Curve25519)
 8 |   class MontgomeryU
 9 |     # Create an object representing a Montgomery-u coordinate from a bytestring
10 |     #
11 |     # @param bytes [String] 32-byte compressed Montgomery-u coordinate
12 |     def initialize(bytes)
13 |       X25519.validate_key_bytes(bytes)
14 | 
15 |       # The point located at a Montgomery-u coordinate of zero always returns
16 |       # the point at zero regardless of which scalar it's multiplied with
17 |       raise InvalidKeyError, "degenerate public key" if bytes == ("\0" * KEY_SIZE)
18 | 
19 |       @bytes = bytes
20 |     end
21 | 
22 |     # Return a compressed Montgomery-u coordinate serialized as a bytestring
23 |     #
24 |     # @return [String] bytestring serialization of a Montgomery-u coordinate
25 |     def to_bytes
26 |       @bytes
27 |     end
28 | 
29 |     # Show hex representation of serialized coordinate in string inspection
30 |     def inspect
31 |       "#<#{self.class}:#{@bytes.unpack1('H*')}>"
32 |     end
33 |   end
34 | end
35 | 


--------------------------------------------------------------------------------
/lib/x25519/precomputed_not_available.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module X25519
 4 |   module Provider
 5 |     # We need this class and method even if we can't compile x25519_precomputed
 6 |     class Precomputed
 7 |       def self.available?
 8 |         false
 9 |       end
10 |     end
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/x25519/scalar.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module X25519
 4 |   # X25519 private keys
 5 |   #
 6 |   # Scalars are the integer component of scalar multiplication, multiplied
 7 |   # against an elliptic curve point.
 8 |   class Scalar
 9 |     # Securely generate a random scalar
10 |     def self.generate
11 |       new(SecureRandom.random_bytes(X25519::KEY_SIZE))
12 |     end
13 | 
14 |     # Create an X25519 scalar object from a bytestring
15 |     #
16 |     # @param bytes [String] 32-byte random secret scalar
17 |     def initialize(bytes)
18 |       X25519.validate_key_bytes(bytes)
19 |       @scalar_bytes = bytes
20 |     end
21 | 
22 |     # Variable-base scalar multiplication a.k.a. Diffie-Hellman
23 |     #
24 |     # This can be used to obtain a shared secret from a public key
25 |     #
26 |     # @param montgomery_u [X25519::MontgomeryU] coordinate of the public key/point to perform D-H with
27 |     #
28 |     # @return [X25519::MontgomeryU] resulting point (i.e. D-H shared secret)
29 |     def diffie_hellman(montgomery_u)
30 |       raise TypeError, "expected X25519::MontgomeryU, got #{montgomery_u}" unless montgomery_u.is_a?(MontgomeryU)
31 | 
32 |       MontgomeryU.new(X25519.diffie_hellman(@scalar_bytes, montgomery_u.to_bytes))
33 |     end
34 |     alias multiply diffie_hellman
35 | 
36 |     # Fixed-base scalar multiplication. Calculates a public key from a
37 |     # private scalar
38 |     #
39 |     # @return [X25519::MontgomeryU] resulting point (i.e. public key)
40 |     def public_key
41 |       MontgomeryU.new(X25519.calculate_public_key(@scalar_bytes))
42 |     end
43 |     alias multiply_base public_key
44 | 
45 |     # Return a bytestring representation of this scalar
46 |     #
47 |     # @return [String] scalar converted to a bytestring
48 |     def to_bytes
49 |       @scalar_bytes
50 |     end
51 | 
52 |     # String inspection that does not leak the private scalar
53 |     def inspect
54 |       to_s
55 |     end
56 |   end
57 | end
58 | 


--------------------------------------------------------------------------------
/lib/x25519/test_vectors.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module X25519
 4 |   # Known-good inputs and outputs for X25519 functions
 5 |   module TestVectors
 6 |     # Test vector for variable-base scalar multiplication
 7 |     VariableBaseVector = Struct.new(:scalar, :input_coord, :output_coord)
 8 | 
 9 |     # X25519 variable-base test vectors from RFC 7748
10 |     VARIABLE_BASE = [
11 |       VariableBaseVector.new(
12 |         "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
13 |         "e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
14 |         "c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552"
15 |       ),
16 |       VariableBaseVector.new(
17 |         "4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
18 |         "e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
19 |         "95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957"
20 |       )
21 |     ].freeze
22 | 
23 |     # Test vector for fixed-base scalar multiplication
24 |     FixedBaseVector = Struct.new(:scalar, :output_coord)
25 | 
26 |     # X25519 fixed-base test vectors, generated via RbNaCl/libsodium
27 |     FIXED_BASE = [
28 |       FixedBaseVector.new(
29 |         "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
30 |         "1c9fd88f45606d932a80c71824ae151d15d73e77de38e8e000852e614fae7019"
31 |       ),
32 |       FixedBaseVector.new(
33 |         "4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
34 |         "ff63fe57bfbf43fa3f563628b149af704d3db625369c49983650347a6a71e00e"
35 |       )
36 |     ].freeze
37 |   end
38 | end
39 | 


--------------------------------------------------------------------------------
/lib/x25519/version.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | module X25519
4 |   VERSION = "1.0.10"
5 | end
6 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "bundler/setup"
 4 | require "x25519"
 5 | require "support/provider_examples"
 6 | 
 7 | RSpec.configure do |config|
 8 |   # Enable flags like --only-failures and --next-failure
 9 |   config.example_status_persistence_file_path = ".rspec_status"
10 | 
11 |   # Disable RSpec exposing methods globally on `Module` and `main`
12 |   config.disable_monkey_patching!
13 | 
14 |   config.expect_with :rspec do |c|
15 |     c.syntax = :expect
16 |   end
17 | end
18 | 
19 | # Convert a binary string to hex
20 | def hex(string)
21 |   string.unpack1("H*")
22 | end
23 | 
24 | # Parse a hex string to binary
25 | def unhex(string)
26 |   [string].pack("H*")
27 | end
28 | 


--------------------------------------------------------------------------------
/spec/support/provider_examples.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Shared examples for all X25519::Provider backends
 4 | RSpec.shared_examples "X25519::Provider" do
 5 |   describe "#scalarmult" do
 6 |     it "passes the RFC 7748 test vectors" do
 7 |       X25519::TestVectors::VARIABLE_BASE.each do |v|
 8 |         shared_secret = described_class.scalarmult(unhex(v.scalar), unhex(v.input_coord))
 9 |         expect(hex(shared_secret)).to eq v.output_coord
10 |       end
11 |     end
12 |   end
13 | 
14 |   describe "#scalarmult_base" do
15 |     it "passes the test vectors" do
16 |       X25519::TestVectors::FIXED_BASE.each do |v|
17 |         public_key = described_class.scalarmult_base(unhex(v.scalar))
18 |         expect(hex(public_key)).to eq v.output_coord
19 |       end
20 |     end
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/spec/x25519/montgomery_u_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | RSpec.describe X25519::MontgomeryU do
 4 |   subject(:point) { described_class.new(unhex(coordinate_hex)) }
 5 | 
 6 |   let(:coordinate_hex) { "e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c" }
 7 | 
 8 |   describe "#to_bytes" do
 9 |     it "serializes #{described_class} as a Encoding::BINARY String" do
10 |       bytes = point.to_bytes
11 |       expect(bytes).to be_a String
12 |       expect(bytes.encoding).to eq Encoding::BINARY
13 | 
14 |       expect(hex(bytes)).to eq coordinate_hex
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/spec/x25519/provider/precomputed_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | RSpec.describe X25519::Provider::Precomputed do
 4 |   if described_class.available?
 5 |     include_examples "X25519::Provider"
 6 |   else
 7 |     pending "#{described_class} provider not available on this CPU"
 8 |   end
 9 | end
10 | 


--------------------------------------------------------------------------------
/spec/x25519/provider/ref10_spec.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | RSpec.describe X25519::Provider::Ref10 do
4 |   include_examples "X25519::Provider"
5 | end
6 | 


--------------------------------------------------------------------------------
/spec/x25519/scalar_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | RSpec.describe X25519::Scalar do
 4 |   subject(:scalar) { described_class.new(unhex(scalar_hex)) }
 5 | 
 6 |   let(:scalar_hex) { "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4" }
 7 | 
 8 |   describe ".generate" do
 9 |     it "generates random keys" do
10 |       # Not great, but better than nothing
11 |       key1 = described_class.generate
12 |       key2 = described_class.generate
13 | 
14 |       expect(key1).not_to eq key2
15 |     end
16 |   end
17 | 
18 |   describe "#multiply" do
19 |     it "raises TypeError if given a non-X25519::MontgomeryU type" do
20 |       expect { scalar.multiply(nil) }.to raise_error(TypeError)
21 |       expect { scalar.multiply("derp") }.to raise_error(TypeError)
22 |     end
23 | 
24 |     context "with RFC 7748 test vectors" do
25 |       it "passes the test vectors" do
26 |         X25519::TestVectors::VARIABLE_BASE.each do |v|
27 |           scalar = described_class.new(unhex(v.scalar))
28 |           point  = X25519::MontgomeryU.new(unhex(v.input_coord))
29 | 
30 |           shared_secret = scalar.multiply(point)
31 |           expect(hex(shared_secret.to_bytes)).to eq v.output_coord
32 |         end
33 |       end
34 |     end
35 |   end
36 | 
37 |   describe "#multiply_base" do
38 |     context "with RFC 7748 test vectors" do
39 |       it "passes the test vectors" do
40 |         X25519::TestVectors::FIXED_BASE.each do |v|
41 |           scalar = described_class.new(unhex(v.scalar))
42 |           expect(hex(scalar.multiply_base.to_bytes)).to eq v.output_coord
43 |         end
44 |       end
45 |     end
46 |   end
47 | 
48 |   describe "#to_bytes" do
49 |     it "serializes #{described_class} as a Encoding::BINARY String" do
50 |       bytes = scalar.to_bytes
51 |       expect(bytes).to be_a String
52 |       expect(bytes.encoding).to eq Encoding::BINARY
53 |       expect(hex(bytes)).to eq scalar_hex
54 |     end
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/spec/x25519_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | RSpec.describe X25519 do
 4 |   it "has a version number" do
 5 |     expect(described_class::VERSION).not_to be_nil
 6 |   end
 7 | 
 8 |   describe ".diffie_hellman" do
 9 |     let(:example_scalar) { unhex(X25519::TestVectors::VARIABLE_BASE.first.scalar) }
10 | 
11 |     it "raises ArgumentError if one of the inputs is the wrong length" do
12 |       expect { described_class.diffie_hellman("foo", "bar") }.to raise_error(ArgumentError)
13 |     end
14 | 
15 |     it "raises TypeError if one of the inputs is nil" do
16 |       expect { described_class.diffie_hellman(nil, "foobar") }.to raise_error(TypeError)
17 |     end
18 | 
19 |     it "raises InvalidKeyError if the point is degenerate" do
20 |       degenerate_key = "\0" * X25519::KEY_SIZE
21 | 
22 |       expect do
23 |         described_class.diffie_hellman(example_scalar, degenerate_key)
24 |       end.to raise_error(X25519::InvalidKeyError)
25 |     end
26 | 
27 |     context "with RFC 7748 test vectors" do
28 |       it "passes the test vectors" do
29 |         X25519::TestVectors::VARIABLE_BASE.each do |v|
30 |           shared_secret = described_class.diffie_hellman(unhex(v.scalar), unhex(v.input_coord))
31 |           expect(hex(shared_secret)).to eq v.output_coord
32 |         end
33 |       end
34 |     end
35 |   end
36 | end
37 | 


--------------------------------------------------------------------------------
/x25519.gemspec:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative "lib/x25519/version"
 4 | 
 5 | Gem::Specification.new do |spec|
 6 |   spec.name          = "x25519"
 7 |   spec.version       = X25519::VERSION
 8 |   spec.authors       = ["Tony Arcieri"]
 9 |   spec.email         = ["bascule@gmail.com"]
10 |   spec.summary       = "Public key cryptography library providing the X25519 Elliptic Curve Diffie-Hellman function"
11 |   spec.description = <<-DESCRIPTION.strip.gsub(/\s+/, " ")
12 |     An efficient public key cryptography library for Ruby providing key
13 |     exchange/agreement via the X25519 (a.k.a. Curve25519) Elliptic Curve
14 |     Diffie-Hellman function as described in RFC 7748.
15 |   DESCRIPTION
16 |   spec.homepage      = "https://github.com/RubyCrypto/x25519"
17 |   spec.license       = "BSD-3-Clause" # https://spdx.org/licenses/BSD-3-Clause.html
18 |   spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
19 |   spec.platform      = Gem::Platform::RUBY
20 |   spec.extensions    = ["ext/x25519_precomputed/extconf.rb", "ext/x25519_ref10/extconf.rb"]
21 | 
22 |   spec.required_ruby_version = ">= 2.7"
23 | end
24 | 


--------------------------------------------------------------------------------