├── .github ├── dependabot.yml └── workflows │ ├── md5.yml │ ├── sha1.yml │ ├── sha2.yml │ ├── whirlpool.yml │ └── workspace.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.md ├── md5 ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── benches │ └── lib.rs ├── build.rs └── src │ ├── lib.rs │ ├── x64.S │ └── x86.S ├── sha1 ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── benches │ └── lib.rs ├── build.rs └── src │ ├── aarch64.S │ ├── aarch64_apple.S │ ├── lib.rs │ ├── x64.S │ └── x86.S ├── sha2 ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── benches │ └── lib.rs ├── build.rs └── src │ ├── lib.rs │ ├── sha256_aarch64.S │ ├── sha256_aarch64_apple.S │ ├── sha256_x64.S │ ├── sha256_x86.S │ ├── sha512_x64.S │ └── sha512_x86.S └── whirlpool ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── benches └── lib.rs ├── build.rs └── src ├── lib.rs ├── x64.S └── x86.S /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | - package-ecosystem: github-actions 9 | directory: "/" 10 | schedule: 11 | interval: weekly 12 | open-pull-requests-limit: 10 13 | -------------------------------------------------------------------------------- /.github/workflows/md5.yml: -------------------------------------------------------------------------------- 1 | name: md5 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - "md5/**" 7 | - "Cargo.*" 8 | push: 9 | branches: master 10 | 11 | defaults: 12 | run: 13 | working-directory: md5 14 | 15 | env: 16 | CARGO_INCREMENTAL: 0 17 | RUSTFLAGS: "-Dwarnings" 18 | # Workaround for: https://github.com/rust-lang/cargo/issues/10303 19 | CARGO_NET_GIT_FETCH_WITH_CLI: "true" 20 | 21 | jobs: 22 | # Linux tests 23 | linux: 24 | strategy: 25 | matrix: 26 | include: 27 | # 32-bit Linux/x86 28 | - target: i686-unknown-linux-gnu 29 | toolchain: 1.45.0 # MSRV 30 | deps: sudo apt update && sudo apt install gcc-multilib 31 | - target: i686-unknown-linux-gnu 32 | toolchain: stable 33 | deps: sudo apt update && sudo apt install gcc-multilib 34 | 35 | # 64-bit Linux/x86_64 36 | - target: x86_64-unknown-linux-gnu 37 | toolchain: 1.45.0 # MSRV 38 | deps: true 39 | - target: x86_64-unknown-linux-gnu 40 | toolchain: stable 41 | deps: true 42 | 43 | runs-on: ubuntu-latest 44 | steps: 45 | - uses: actions/checkout@v4 46 | - uses: dtolnay/rust-toolchain@master 47 | with: 48 | toolchain: ${{ matrix.toolchain }} 49 | targets: ${{ matrix.target }} 50 | - run: ${{ matrix.deps }} 51 | - run: cargo test --target ${{ matrix.target }} --release 52 | 53 | # macOS tests 54 | macos: 55 | strategy: 56 | matrix: 57 | toolchain: 58 | - 1.54.0 # MSRV 59 | - stable 60 | 61 | runs-on: macos-latest 62 | steps: 63 | - uses: actions/checkout@v4 64 | - uses: dtolnay/rust-toolchain@master 65 | with: 66 | toolchain: ${{ matrix.toolchain }} 67 | targets: x86_64-apple-darwin 68 | - run: cargo test --release --target x86_64-apple-darwin 69 | -------------------------------------------------------------------------------- /.github/workflows/sha1.yml: -------------------------------------------------------------------------------- 1 | name: sha1 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - "sha1/**" 7 | - "Cargo.*" 8 | push: 9 | branches: master 10 | 11 | defaults: 12 | run: 13 | working-directory: sha1 14 | 15 | env: 16 | CARGO_INCREMENTAL: 0 17 | RUSTFLAGS: "-Dwarnings" 18 | # Workaround for: https://github.com/rust-lang/cargo/issues/10303 19 | CARGO_NET_GIT_FETCH_WITH_CLI: "true" 20 | 21 | jobs: 22 | # Linux tests 23 | linux: 24 | strategy: 25 | matrix: 26 | include: 27 | # 32-bit Linux/x86 28 | - target: i686-unknown-linux-gnu 29 | rust: 1.45.0 # MSRV 30 | deps: sudo apt update && sudo apt install gcc-multilib 31 | - target: i686-unknown-linux-gnu 32 | rust: stable 33 | deps: sudo apt update && sudo apt install gcc-multilib 34 | 35 | # 64-bit Linux/x86_64 36 | - target: x86_64-unknown-linux-gnu 37 | rust: 1.45.0 # MSRV 38 | - target: x86_64-unknown-linux-gnu 39 | rust: stable 40 | 41 | runs-on: ubuntu-latest 42 | steps: 43 | - uses: actions/checkout@v4 44 | - uses: dtolnay/rust-toolchain@master 45 | with: 46 | toolchain: ${{ matrix.rust }} 47 | targets: ${{ matrix.target }} 48 | - run: ${{ matrix.deps }} 49 | - run: cargo test --target ${{ matrix.target }} --release 50 | 51 | # macOS tests 52 | macos: 53 | strategy: 54 | matrix: 55 | toolchain: 56 | - 1.54.0 # MSRV 57 | - stable 58 | 59 | runs-on: macos-latest 60 | steps: 61 | - uses: actions/checkout@v4 62 | - uses: dtolnay/rust-toolchain@master 63 | with: 64 | toolchain: ${{ matrix.toolchain }} 65 | targets: x86_64-apple-darwin 66 | - run: cargo test --release --target x86_64-apple-darwin 67 | 68 | # Cross-compiled tests 69 | cross: 70 | strategy: 71 | matrix: 72 | include: 73 | # ARM64 74 | #- target: aarch64-unknown-linux-gnu 75 | # rust: 1.43.0 # MSRV 76 | - target: aarch64-unknown-linux-gnu 77 | rust: stable 78 | 79 | runs-on: ubuntu-latest 80 | steps: 81 | - uses: actions/checkout@v4 82 | - uses: dtolnay/rust-toolchain@master 83 | with: 84 | toolchain: ${{ matrix.rust }} 85 | targets: ${{ matrix.target }} 86 | - run: cargo install cross 87 | - run: cross test --target ${{ matrix.target }} --release 88 | -------------------------------------------------------------------------------- /.github/workflows/sha2.yml: -------------------------------------------------------------------------------- 1 | name: sha2 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - "sha2/**" 7 | - "Cargo.*" 8 | push: 9 | branches: master 10 | 11 | defaults: 12 | run: 13 | working-directory: sha2 14 | 15 | env: 16 | CARGO_INCREMENTAL: 0 17 | RUSTFLAGS: "-Dwarnings" 18 | # Workaround for: https://github.com/rust-lang/cargo/issues/10303 19 | CARGO_NET_GIT_FETCH_WITH_CLI: "true" 20 | 21 | jobs: 22 | # Linux tests 23 | linux: 24 | strategy: 25 | matrix: 26 | include: 27 | # 32-bit Linux/x86 28 | - target: i686-unknown-linux-gnu 29 | rust: 1.45.0 # MSRV 30 | deps: sudo apt update && sudo apt install gcc-multilib 31 | - target: i686-unknown-linux-gnu 32 | rust: stable 33 | deps: sudo apt update && sudo apt install gcc-multilib 34 | 35 | # 64-bit Linux/x86_64 36 | - target: x86_64-unknown-linux-gnu 37 | rust: 1.45.0 # MSRV 38 | - target: x86_64-unknown-linux-gnu 39 | rust: stable 40 | 41 | runs-on: ubuntu-latest 42 | steps: 43 | - uses: actions/checkout@v4 44 | - uses: dtolnay/rust-toolchain@master 45 | with: 46 | toolchain: ${{ matrix.rust }} 47 | targets: ${{ matrix.target }} 48 | - run: ${{ matrix.deps }} 49 | - run: cargo test --target ${{ matrix.target }} --release 50 | 51 | # macOS tests 52 | macos: 53 | strategy: 54 | matrix: 55 | toolchain: 56 | - 1.54.0 # MSRV 57 | - stable 58 | 59 | runs-on: macos-latest 60 | steps: 61 | - uses: actions/checkout@v4 62 | - uses: dtolnay/rust-toolchain@master 63 | with: 64 | toolchain: ${{ matrix.toolchain }} 65 | targets: x86_64-apple-darwin 66 | - run: cargo test --release --target x86_64-apple-darwin 67 | 68 | # Cross-compiled tests 69 | cross: 70 | strategy: 71 | matrix: 72 | include: 73 | # ARM64 74 | #- target: aarch64-unknown-linux-gnu 75 | # rust: 1.43.0 # MSRV 76 | - target: aarch64-unknown-linux-gnu 77 | rust: stable 78 | 79 | runs-on: ubuntu-latest 80 | steps: 81 | - uses: actions/checkout@v4 82 | - uses: dtolnay/rust-toolchain@master 83 | with: 84 | toolchain: ${{ matrix.rust }} 85 | targets: ${{ matrix.target }} 86 | - run: cargo install cross 87 | - run: cross test --target ${{ matrix.target }} --release 88 | -------------------------------------------------------------------------------- /.github/workflows/whirlpool.yml: -------------------------------------------------------------------------------- 1 | name: whirlpool 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - "whirlpool/**" 7 | - "Cargo.*" 8 | push: 9 | branches: master 10 | 11 | defaults: 12 | run: 13 | working-directory: whirlpool 14 | 15 | env: 16 | CARGO_INCREMENTAL: 0 17 | RUSTFLAGS: "-Dwarnings" 18 | # Workaround for: https://github.com/rust-lang/cargo/issues/10303 19 | CARGO_NET_GIT_FETCH_WITH_CLI: "true" 20 | 21 | jobs: 22 | # Linux tests 23 | linux: 24 | strategy: 25 | matrix: 26 | include: 27 | # 32-bit Linux/x86 28 | - target: i686-unknown-linux-gnu 29 | toolchain: 1.45.0 # MSRV 30 | deps: sudo apt update && sudo apt install gcc-multilib 31 | - target: i686-unknown-linux-gnu 32 | toolchain: stable 33 | deps: sudo apt update && sudo apt install gcc-multilib 34 | 35 | # 64-bit Linux/x86_64 36 | - target: x86_64-unknown-linux-gnu 37 | toolchain: 1.45.0 # MSRV 38 | deps: true 39 | - target: x86_64-unknown-linux-gnu 40 | toolchain: stable 41 | deps: true 42 | 43 | runs-on: ubuntu-latest 44 | steps: 45 | - uses: actions/checkout@v4 46 | - uses: dtolnay/rust-toolchain@master 47 | with: 48 | toolchain: ${{ matrix.toolchain }} 49 | targets: ${{ matrix.target }} 50 | - run: ${{ matrix.deps }} 51 | - run: cargo test --target ${{ matrix.target }} --release 52 | 53 | # macOS tests 54 | macos: 55 | strategy: 56 | matrix: 57 | toolchain: 58 | - 1.54.0 # MSRV 59 | - stable 60 | 61 | runs-on: macos-latest 62 | steps: 63 | - uses: actions/checkout@v4 64 | - uses: dtolnay/rust-toolchain@master 65 | with: 66 | toolchain: ${{ matrix.toolchain }} 67 | targets: x86_64-apple-darwin 68 | - run: cargo test --release --target x86_64-apple-darwin 69 | -------------------------------------------------------------------------------- /.github/workflows/workspace.yml: -------------------------------------------------------------------------------- 1 | name: Workspace 2 | 3 | on: 4 | pull_request: 5 | paths-ignore: 6 | - README.md 7 | push: 8 | branches: master 9 | paths-ignore: 10 | - README.md 11 | 12 | jobs: 13 | clippy: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: dtolnay/rust-toolchain@master 18 | with: 19 | toolchain: 1.63.0 20 | components: clippy 21 | - run: cargo clippy --all -- -D warnings 22 | 23 | rustfmt: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: Checkout sources 27 | uses: actions/checkout@v4 28 | 29 | - name: Install stable toolchain 30 | uses: dtolnay/rust-toolchain@master 31 | with: 32 | toolchain: stable 33 | components: rustfmt 34 | - name: Run cargo fmt 35 | run: cargo fmt --all -- --check 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | */target/ 3 | */*/target/ 4 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | # 4 | version = 3 5 | 6 | [[package]] 7 | name = "cc" 8 | version = "1.0.83" 9 | source = "registry+https://github.com/rust-lang/crates.io-index" 10 | checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" 11 | dependencies = [ 12 | "libc", 13 | ] 14 | 15 | [[package]] 16 | name = "libc" 17 | version = "0.2.154" 18 | source = "registry+https://github.com/rust-lang/crates.io-index" 19 | checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" 20 | 21 | [[package]] 22 | name = "md5-asm" 23 | version = "0.5.2" 24 | dependencies = [ 25 | "cc", 26 | ] 27 | 28 | [[package]] 29 | name = "sha1-asm" 30 | version = "0.5.3" 31 | dependencies = [ 32 | "cc", 33 | ] 34 | 35 | [[package]] 36 | name = "sha2-asm" 37 | version = "0.6.4" 38 | dependencies = [ 39 | "cc", 40 | ] 41 | 42 | [[package]] 43 | name = "whirlpool-asm" 44 | version = "0.6.2" 45 | dependencies = [ 46 | "cc", 47 | ] 48 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "md5", 4 | "sha1", 5 | "sha2", 6 | "whirlpool", 7 | ] 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RustCrypto: ASM hashes ![Rust Version][rustc-image] [![Project Chat][chat-image]][chat-link] [![dependency status][deps-image]][deps-link] 2 | 3 | Assembly implementations of hash functions core functionality based on code from 4 | [Project Nayuki](https://www.nayuki.io/). 5 | 6 | Crates in this repository provide only core compression functions, for full hash 7 | functionality please refer to the crates from 8 | [RustCrypto/hashes](https://github.com/RustCrypto/hashes) repository. With 9 | enabled `asm` feature `md5`, `sha-1`, `sha2` and `whirlpool` crates will use 10 | code from this repository. 11 | 12 | ## NOTE: this repo is in maintenance mode only 13 | 14 | Now that inline assembly is stable (as of Rust 1.59), assembly should be added 15 | directly to the relevant crates at . 16 | 17 | For more information, see [#45]. 18 | 19 | ## Supported Platforms 20 | 21 | All crates are tested on the following platforms: 22 | 23 | - Linux (32-bit and 64-bit x86) 24 | - Windows (64-bit x86, GNU only) 25 | - ARM64 (except `md5`, which is x86 only) 26 | 27 | Windows MSVC builds are known to be broken. See [#17]. 28 | 29 | ## Minimum Supported Rust Version 30 | 31 | All crates in this repository support **Rust 1.43** or higher. 32 | 33 | In the future when the minimum supported Rust version is changed, 34 | it will be accompanied by a minor version bump. 35 | 36 | ## License 37 | 38 | All crates licensed under the [MIT license](http://opensource.org/licenses/MIT). 39 | 40 | ### Contribution 41 | 42 | Unless you explicitly state otherwise, any contribution intentionally submitted 43 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 44 | dual licensed as above, without any additional terms or conditions. 45 | 46 | [//]: # (badges) 47 | 48 | [rustc-image]: https://img.shields.io/badge/rustc-1.43+-blue.svg 49 | [chat-image]: https://img.shields.io/badge/zulip-join_chat-blue.svg 50 | [chat-link]: https://rustcrypto.zulipchat.com/#narrow/stream/260041-hashes 51 | [deps-image]: https://deps.rs/repo/github/RustCrypto/asm-hashes/status.svg 52 | [deps-link]: https://deps.rs/repo/github/RustCrypto/asm-hashes 53 | 54 | [//]: # (general links) 55 | 56 | [#17]: https://github.com/RustCrypto/asm-hashes/issues/17 57 | [#45]: https://github.com/RustCrypto/asm-hashes/issues/45 58 | -------------------------------------------------------------------------------- /md5/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## 0.5.2 (2024-05-06) 9 | ### Changed 10 | - Emit compilation error when compiled for Windows targets. ([#79]) 11 | 12 | [#79]: https://github.com/RustCrypto/asm-hashes/pull/79 13 | 14 | ## 0.5.1 (2023-08-07) 15 | ### Changed 16 | - Prefix x86 asm symbols with `_` on Windows like on Apple ([#61]) 17 | - Fix deprecated use of `cc::Build::compile` ([#59]) 18 | 19 | [#61]: https://github.com/RustCrypto/asm-hashes/pull/61 20 | [#59]: https://github.com/RustCrypto/asm-hashes/pull/59 21 | -------------------------------------------------------------------------------- /md5/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "md5-asm" 3 | version = "0.5.2" 4 | authors = ["RustCrypto Developers"] 5 | license = "MIT" 6 | description = "Assembly implementation of MD5 compression function" 7 | documentation = "https://docs.rs/md5-asm" 8 | repository = "https://github.com/RustCrypto/asm-hashes" 9 | keywords = ["crypto", "md5", "asm"] 10 | categories = ["cryptography", "no-std"] 11 | edition = "2018" 12 | 13 | [build-dependencies] 14 | cc = "1.0" 15 | -------------------------------------------------------------------------------- /md5/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Project Nayuki 2 | Copyright (c) 2017 Artyom Pavlov 3 | 4 | Permission is hereby granted, free of charge, to any 5 | person obtaining a copy of this software and associated 6 | documentation files (the "Software"), to deal in the 7 | Software without restriction, including without 8 | limitation the rights to use, copy, modify, merge, 9 | publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software 11 | is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice 15 | shall be included in all copies or substantial portions 16 | of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 19 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 20 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 21 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 22 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 25 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 | DEALINGS IN THE SOFTWARE. 27 | -------------------------------------------------------------------------------- /md5/benches/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(test)] 3 | 4 | extern crate test; 5 | 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn bench_compress(b: &mut Bencher) { 10 | let mut state = Default::default(); 11 | let data = [[0u8; 64]]; 12 | 13 | b.iter(|| { 14 | md5_asm::compress(&mut state, &data); 15 | }); 16 | 17 | b.bytes = data.len() as u64; 18 | } 19 | -------------------------------------------------------------------------------- /md5/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); 3 | 4 | let asm_path = if target_arch == "x86" { 5 | "src/x86.S" 6 | } else if target_arch == "x86_64" { 7 | "src/x64.S" 8 | } else { 9 | panic!("Unsupported target architecture: {}", target_arch); 10 | }; 11 | cc::Build::new().flag("-c").file(asm_path).compile("md5"); 12 | } 13 | -------------------------------------------------------------------------------- /md5/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Assembly implementation of the [MD5] compression function. 2 | //! 3 | //! This crate is not intended for direct use, most users should 4 | //! prefer the [`md5`] crate with enabled `asm` feature instead. 5 | //! 6 | //! Only x86 and x86-64 architectures are currently supported. 7 | //! 8 | //! [MD5]: https://en.wikipedia.org/wiki/MD5 9 | //! [`md5`]: https://crates.io/crates/md5 10 | 11 | #![no_std] 12 | #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] 13 | compile_error!("crate can only be used on x86 and x86-64 architectures"); 14 | 15 | #[cfg(target_os = "windows")] 16 | compile_error!("crate does not support Windows targets"); 17 | 18 | #[link(name = "md5", kind = "static")] 19 | extern "C" { 20 | fn md5_compress(state: &mut [u32; 4], block: &[u8; 64]); 21 | } 22 | 23 | /// Safe wrapper around assembly implementation of MD5 compression function 24 | #[inline] 25 | pub fn compress(state: &mut [u32; 4], blocks: &[[u8; 64]]) { 26 | for block in blocks { 27 | unsafe { 28 | md5_compress(state, block); 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /md5/src/x64.S: -------------------------------------------------------------------------------- 1 | /* 2 | * MD5 hash in x86-64 assembly 3 | * 4 | * Copyright (c) 2016 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void md5_compress(uint32_t state[4], const uint8_t block[64]) */ 26 | #ifdef __APPLE__ 27 | .globl _md5_compress 28 | _md5_compress: 29 | #else 30 | .globl md5_compress 31 | md5_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 4 eax MD5 state variable A 37 | * 4 ebx MD5 state variable B 38 | * 4 ecx MD5 state variable C 39 | * 4 edx MD5 state variable D 40 | * 4 esi Temporary for calculation per round 41 | * 4 edi Temporary for calculation per round 42 | * 8 rbp Base address of block array argument (read-only) 43 | * 8 r8 Base address of state array argument (read-only) 44 | * 16 xmm0 Caller's value of rbx (only low 64 bits are used) 45 | * 16 xmm1 Caller's value of rbp (only low 64 bits are used) 46 | */ 47 | 48 | #define ROUND0(a, b, c, d, k, s, t) \ 49 | movl %c, %esi; \ 50 | addl (k*4)(%rbp), %a; \ 51 | xorl %d, %esi; \ 52 | andl %b, %esi; \ 53 | xorl %d, %esi; \ 54 | leal t(%esi,%a), %a; \ 55 | roll $s, %a; \ 56 | addl %b, %a; 57 | 58 | #define ROUND1(a, b, c, d, k, s, t) \ 59 | movl %d, %esi; \ 60 | movl %d, %edi; \ 61 | addl (k*4)(%rbp), %a; \ 62 | notl %esi; \ 63 | andl %b, %edi; \ 64 | andl %c, %esi; \ 65 | orl %edi, %esi; \ 66 | leal t(%esi,%a), %a; \ 67 | roll $s, %a; \ 68 | addl %b, %a; 69 | 70 | #define ROUND2(a, b, c, d, k, s, t) \ 71 | movl %c, %esi; \ 72 | addl (k*4)(%rbp), %a; \ 73 | xorl %d, %esi; \ 74 | xorl %b, %esi; \ 75 | leal t(%esi,%a), %a; \ 76 | roll $s, %a; \ 77 | addl %b, %a; 78 | 79 | #define ROUND3(a, b, c, d, k, s, t) \ 80 | movl %d, %esi; \ 81 | not %esi; \ 82 | addl (k*4)(%rbp), %a; \ 83 | orl %b, %esi; \ 84 | xorl %c, %esi; \ 85 | leal t(%esi,%a), %a; \ 86 | roll $s, %a; \ 87 | addl %b, %a; 88 | 89 | /* Save registers */ 90 | movq %rbx, %xmm0 91 | movq %rbp, %xmm1 92 | 93 | /* Load arguments */ 94 | movq %rsi, %rbp 95 | movl 0(%rdi), %eax /* a */ 96 | movl 4(%rdi), %ebx /* b */ 97 | movl 8(%rdi), %ecx /* c */ 98 | movl 12(%rdi), %edx /* d */ 99 | movq %rdi, %r8 100 | 101 | /* 64 rounds of hashing */ 102 | ROUND0(eax, ebx, ecx, edx, 0, 7, -0x28955B88) 103 | ROUND0(edx, eax, ebx, ecx, 1, 12, -0x173848AA) 104 | ROUND0(ecx, edx, eax, ebx, 2, 17, 0x242070DB) 105 | ROUND0(ebx, ecx, edx, eax, 3, 22, -0x3E423112) 106 | ROUND0(eax, ebx, ecx, edx, 4, 7, -0x0A83F051) 107 | ROUND0(edx, eax, ebx, ecx, 5, 12, 0x4787C62A) 108 | ROUND0(ecx, edx, eax, ebx, 6, 17, -0x57CFB9ED) 109 | ROUND0(ebx, ecx, edx, eax, 7, 22, -0x02B96AFF) 110 | ROUND0(eax, ebx, ecx, edx, 8, 7, 0x698098D8) 111 | ROUND0(edx, eax, ebx, ecx, 9, 12, -0x74BB0851) 112 | ROUND0(ecx, edx, eax, ebx, 10, 17, -0x0000A44F) 113 | ROUND0(ebx, ecx, edx, eax, 11, 22, -0x76A32842) 114 | ROUND0(eax, ebx, ecx, edx, 12, 7, 0x6B901122) 115 | ROUND0(edx, eax, ebx, ecx, 13, 12, -0x02678E6D) 116 | ROUND0(ecx, edx, eax, ebx, 14, 17, -0x5986BC72) 117 | ROUND0(ebx, ecx, edx, eax, 15, 22, 0x49B40821) 118 | ROUND1(eax, ebx, ecx, edx, 1, 5, -0x09E1DA9E) 119 | ROUND1(edx, eax, ebx, ecx, 6, 9, -0x3FBF4CC0) 120 | ROUND1(ecx, edx, eax, ebx, 11, 14, 0x265E5A51) 121 | ROUND1(ebx, ecx, edx, eax, 0, 20, -0x16493856) 122 | ROUND1(eax, ebx, ecx, edx, 5, 5, -0x29D0EFA3) 123 | ROUND1(edx, eax, ebx, ecx, 10, 9, 0x02441453) 124 | ROUND1(ecx, edx, eax, ebx, 15, 14, -0x275E197F) 125 | ROUND1(ebx, ecx, edx, eax, 4, 20, -0x182C0438) 126 | ROUND1(eax, ebx, ecx, edx, 9, 5, 0x21E1CDE6) 127 | ROUND1(edx, eax, ebx, ecx, 14, 9, -0x3CC8F82A) 128 | ROUND1(ecx, edx, eax, ebx, 3, 14, -0x0B2AF279) 129 | ROUND1(ebx, ecx, edx, eax, 8, 20, 0x455A14ED) 130 | ROUND1(eax, ebx, ecx, edx, 13, 5, -0x561C16FB) 131 | ROUND1(edx, eax, ebx, ecx, 2, 9, -0x03105C08) 132 | ROUND1(ecx, edx, eax, ebx, 7, 14, 0x676F02D9) 133 | ROUND1(ebx, ecx, edx, eax, 12, 20, -0x72D5B376) 134 | ROUND2(eax, ebx, ecx, edx, 5, 4, -0x0005C6BE) 135 | ROUND2(edx, eax, ebx, ecx, 8, 11, -0x788E097F) 136 | ROUND2(ecx, edx, eax, ebx, 11, 16, 0x6D9D6122) 137 | ROUND2(ebx, ecx, edx, eax, 14, 23, -0x021AC7F4) 138 | ROUND2(eax, ebx, ecx, edx, 1, 4, -0x5B4115BC) 139 | ROUND2(edx, eax, ebx, ecx, 4, 11, 0x4BDECFA9) 140 | ROUND2(ecx, edx, eax, ebx, 7, 16, -0x0944B4A0) 141 | ROUND2(ebx, ecx, edx, eax, 10, 23, -0x41404390) 142 | ROUND2(eax, ebx, ecx, edx, 13, 4, 0x289B7EC6) 143 | ROUND2(edx, eax, ebx, ecx, 0, 11, -0x155ED806) 144 | ROUND2(ecx, edx, eax, ebx, 3, 16, -0x2B10CF7B) 145 | ROUND2(ebx, ecx, edx, eax, 6, 23, 0x04881D05) 146 | ROUND2(eax, ebx, ecx, edx, 9, 4, -0x262B2FC7) 147 | ROUND2(edx, eax, ebx, ecx, 12, 11, -0x1924661B) 148 | ROUND2(ecx, edx, eax, ebx, 15, 16, 0x1FA27CF8) 149 | ROUND2(ebx, ecx, edx, eax, 2, 23, -0x3B53A99B) 150 | ROUND3(eax, ebx, ecx, edx, 0, 6, -0x0BD6DDBC) 151 | ROUND3(edx, eax, ebx, ecx, 7, 10, 0x432AFF97) 152 | ROUND3(ecx, edx, eax, ebx, 14, 15, -0x546BDC59) 153 | ROUND3(ebx, ecx, edx, eax, 5, 21, -0x036C5FC7) 154 | ROUND3(eax, ebx, ecx, edx, 12, 6, 0x655B59C3) 155 | ROUND3(edx, eax, ebx, ecx, 3, 10, -0x70F3336E) 156 | ROUND3(ecx, edx, eax, ebx, 10, 15, -0x00100B83) 157 | ROUND3(ebx, ecx, edx, eax, 1, 21, -0x7A7BA22F) 158 | ROUND3(eax, ebx, ecx, edx, 8, 6, 0x6FA87E4F) 159 | ROUND3(edx, eax, ebx, ecx, 15, 10, -0x01D31920) 160 | ROUND3(ecx, edx, eax, ebx, 6, 15, -0x5CFEBCEC) 161 | ROUND3(ebx, ecx, edx, eax, 13, 21, 0x4E0811A1) 162 | ROUND3(eax, ebx, ecx, edx, 4, 6, -0x08AC817E) 163 | ROUND3(edx, eax, ebx, ecx, 11, 10, -0x42C50DCB) 164 | ROUND3(ecx, edx, eax, ebx, 2, 15, 0x2AD7D2BB) 165 | ROUND3(ebx, ecx, edx, eax, 9, 21, -0x14792C6F) 166 | 167 | /* Save updated state */ 168 | addl %eax, 0(%r8) 169 | addl %ebx, 4(%r8) 170 | addl %ecx, 8(%r8) 171 | addl %edx, 12(%r8) 172 | 173 | /* Restore registers */ 174 | movq %xmm0, %rbx 175 | movq %xmm1, %rbp 176 | retq 177 | -------------------------------------------------------------------------------- /md5/src/x86.S: -------------------------------------------------------------------------------- 1 | /* 2 | * MD5 hash in x86 assembly 3 | * 4 | * Copyright (c) 2016 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void md5_compress(uint32_t state[4], const uint8_t block[64]) */ 26 | #if defined(__APPLE__) || defined(_WIN32) 27 | .globl _md5_compress 28 | _md5_compress: 29 | #else 30 | .globl md5_compress 31 | md5_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 4 eax MD5 state variable A 37 | * 4 ebx MD5 state variable B 38 | * 4 ecx MD5 state variable C 39 | * 4 edx MD5 state variable D 40 | * 4 esi Temporary for calculation per round 41 | * 4 edi Temporary for calculation per round 42 | * 4 ebp Base address of block array argument (read-only) 43 | * 4 esp x86 stack pointer 44 | * 4 [esp+ 0] Caller's value of ebx 45 | * 4 [esp+ 4] Caller's value of esi 46 | * 4 [esp+ 8] Caller's value of edi 47 | * 4 [esp+12] Caller's value of ebp 48 | */ 49 | 50 | #define ROUND0(a, b, c, d, k, s, t) \ 51 | movl %c, %esi; \ 52 | addl (k*4)(%ebp), %a; \ 53 | xorl %d, %esi; \ 54 | andl %b, %esi; \ 55 | xorl %d, %esi; \ 56 | leal t(%esi,%a), %a; \ 57 | roll $s, %a; \ 58 | addl %b, %a; 59 | 60 | #define ROUND1(a, b, c, d, k, s, t) \ 61 | movl %d, %esi; \ 62 | movl %d, %edi; \ 63 | addl (k*4)(%ebp), %a; \ 64 | notl %esi; \ 65 | andl %b, %edi; \ 66 | andl %c, %esi; \ 67 | orl %edi, %esi; \ 68 | leal t(%esi,%a), %a; \ 69 | roll $s, %a; \ 70 | addl %b, %a; 71 | 72 | #define ROUND2(a, b, c, d, k, s, t) \ 73 | movl %c, %esi; \ 74 | addl (k*4)(%ebp), %a; \ 75 | xorl %d, %esi; \ 76 | xorl %b, %esi; \ 77 | leal t(%esi,%a), %a; \ 78 | roll $s, %a; \ 79 | addl %b, %a; 80 | 81 | #define ROUND3(a, b, c, d, k, s, t) \ 82 | movl %d, %esi; \ 83 | not %esi; \ 84 | addl (k*4)(%ebp), %a; \ 85 | orl %b, %esi; \ 86 | xorl %c, %esi; \ 87 | leal t(%esi,%a), %a; \ 88 | roll $s, %a; \ 89 | addl %b, %a; 90 | 91 | /* Save registers */ 92 | subl $16, %esp 93 | movl %ebx, 0(%esp) 94 | movl %esi, 4(%esp) 95 | movl %edi, 8(%esp) 96 | movl %ebp, 12(%esp) 97 | 98 | /* Load arguments */ 99 | movl 20(%esp), %esi /* state */ 100 | movl 24(%esp), %ebp /* block */ 101 | movl 0(%esi), %eax /* a */ 102 | movl 4(%esi), %ebx /* b */ 103 | movl 8(%esi), %ecx /* c */ 104 | movl 12(%esi), %edx /* d */ 105 | 106 | /* 64 rounds of hashing */ 107 | ROUND0(eax, ebx, ecx, edx, 0, 7, 0xD76AA478) 108 | ROUND0(edx, eax, ebx, ecx, 1, 12, 0xE8C7B756) 109 | ROUND0(ecx, edx, eax, ebx, 2, 17, 0x242070DB) 110 | ROUND0(ebx, ecx, edx, eax, 3, 22, 0xC1BDCEEE) 111 | ROUND0(eax, ebx, ecx, edx, 4, 7, 0xF57C0FAF) 112 | ROUND0(edx, eax, ebx, ecx, 5, 12, 0x4787C62A) 113 | ROUND0(ecx, edx, eax, ebx, 6, 17, 0xA8304613) 114 | ROUND0(ebx, ecx, edx, eax, 7, 22, 0xFD469501) 115 | ROUND0(eax, ebx, ecx, edx, 8, 7, 0x698098D8) 116 | ROUND0(edx, eax, ebx, ecx, 9, 12, 0x8B44F7AF) 117 | ROUND0(ecx, edx, eax, ebx, 10, 17, 0xFFFF5BB1) 118 | ROUND0(ebx, ecx, edx, eax, 11, 22, 0x895CD7BE) 119 | ROUND0(eax, ebx, ecx, edx, 12, 7, 0x6B901122) 120 | ROUND0(edx, eax, ebx, ecx, 13, 12, 0xFD987193) 121 | ROUND0(ecx, edx, eax, ebx, 14, 17, 0xA679438E) 122 | ROUND0(ebx, ecx, edx, eax, 15, 22, 0x49B40821) 123 | ROUND1(eax, ebx, ecx, edx, 1, 5, 0xF61E2562) 124 | ROUND1(edx, eax, ebx, ecx, 6, 9, 0xC040B340) 125 | ROUND1(ecx, edx, eax, ebx, 11, 14, 0x265E5A51) 126 | ROUND1(ebx, ecx, edx, eax, 0, 20, 0xE9B6C7AA) 127 | ROUND1(eax, ebx, ecx, edx, 5, 5, 0xD62F105D) 128 | ROUND1(edx, eax, ebx, ecx, 10, 9, 0x02441453) 129 | ROUND1(ecx, edx, eax, ebx, 15, 14, 0xD8A1E681) 130 | ROUND1(ebx, ecx, edx, eax, 4, 20, 0xE7D3FBC8) 131 | ROUND1(eax, ebx, ecx, edx, 9, 5, 0x21E1CDE6) 132 | ROUND1(edx, eax, ebx, ecx, 14, 9, 0xC33707D6) 133 | ROUND1(ecx, edx, eax, ebx, 3, 14, 0xF4D50D87) 134 | ROUND1(ebx, ecx, edx, eax, 8, 20, 0x455A14ED) 135 | ROUND1(eax, ebx, ecx, edx, 13, 5, 0xA9E3E905) 136 | ROUND1(edx, eax, ebx, ecx, 2, 9, 0xFCEFA3F8) 137 | ROUND1(ecx, edx, eax, ebx, 7, 14, 0x676F02D9) 138 | ROUND1(ebx, ecx, edx, eax, 12, 20, 0x8D2A4C8A) 139 | ROUND2(eax, ebx, ecx, edx, 5, 4, 0xFFFA3942) 140 | ROUND2(edx, eax, ebx, ecx, 8, 11, 0x8771F681) 141 | ROUND2(ecx, edx, eax, ebx, 11, 16, 0x6D9D6122) 142 | ROUND2(ebx, ecx, edx, eax, 14, 23, 0xFDE5380C) 143 | ROUND2(eax, ebx, ecx, edx, 1, 4, 0xA4BEEA44) 144 | ROUND2(edx, eax, ebx, ecx, 4, 11, 0x4BDECFA9) 145 | ROUND2(ecx, edx, eax, ebx, 7, 16, 0xF6BB4B60) 146 | ROUND2(ebx, ecx, edx, eax, 10, 23, 0xBEBFBC70) 147 | ROUND2(eax, ebx, ecx, edx, 13, 4, 0x289B7EC6) 148 | ROUND2(edx, eax, ebx, ecx, 0, 11, 0xEAA127FA) 149 | ROUND2(ecx, edx, eax, ebx, 3, 16, 0xD4EF3085) 150 | ROUND2(ebx, ecx, edx, eax, 6, 23, 0x04881D05) 151 | ROUND2(eax, ebx, ecx, edx, 9, 4, 0xD9D4D039) 152 | ROUND2(edx, eax, ebx, ecx, 12, 11, 0xE6DB99E5) 153 | ROUND2(ecx, edx, eax, ebx, 15, 16, 0x1FA27CF8) 154 | ROUND2(ebx, ecx, edx, eax, 2, 23, 0xC4AC5665) 155 | ROUND3(eax, ebx, ecx, edx, 0, 6, 0xF4292244) 156 | ROUND3(edx, eax, ebx, ecx, 7, 10, 0x432AFF97) 157 | ROUND3(ecx, edx, eax, ebx, 14, 15, 0xAB9423A7) 158 | ROUND3(ebx, ecx, edx, eax, 5, 21, 0xFC93A039) 159 | ROUND3(eax, ebx, ecx, edx, 12, 6, 0x655B59C3) 160 | ROUND3(edx, eax, ebx, ecx, 3, 10, 0x8F0CCC92) 161 | ROUND3(ecx, edx, eax, ebx, 10, 15, 0xFFEFF47D) 162 | ROUND3(ebx, ecx, edx, eax, 1, 21, 0x85845DD1) 163 | ROUND3(eax, ebx, ecx, edx, 8, 6, 0x6FA87E4F) 164 | ROUND3(edx, eax, ebx, ecx, 15, 10, 0xFE2CE6E0) 165 | ROUND3(ecx, edx, eax, ebx, 6, 15, 0xA3014314) 166 | ROUND3(ebx, ecx, edx, eax, 13, 21, 0x4E0811A1) 167 | ROUND3(eax, ebx, ecx, edx, 4, 6, 0xF7537E82) 168 | ROUND3(edx, eax, ebx, ecx, 11, 10, 0xBD3AF235) 169 | ROUND3(ecx, edx, eax, ebx, 2, 15, 0x2AD7D2BB) 170 | ROUND3(ebx, ecx, edx, eax, 9, 21, 0xEB86D391) 171 | 172 | /* Save updated state */ 173 | movl 20(%esp), %esi 174 | addl %eax, 0(%esi) 175 | addl %ebx, 4(%esi) 176 | addl %ecx, 8(%esi) 177 | addl %edx, 12(%esi) 178 | 179 | /* Restore registers */ 180 | movl 0(%esp), %ebx 181 | movl 4(%esp), %esi 182 | movl 8(%esp), %edi 183 | movl 12(%esp), %ebp 184 | addl $16, %esp 185 | retl 186 | -------------------------------------------------------------------------------- /sha1/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## 0.5.3 (2024-05-06) 8 | ### Changed 9 | - Emit compilation error when compiled for Windows targets. ([#79]) 10 | 11 | [#79]: https://github.com/RustCrypto/asm-hashes/pull/79 12 | 13 | ## 0.5.2 (2023-08-07) 14 | ### Changed 15 | - Prefix x86 asm symbols with `_` on Windows like on Apple ([#61]) 16 | - Fix deprecated use of `cc::Build::compile` ([#59]) 17 | 18 | [#61]: https://github.com/RustCrypto/asm-hashes/pull/61 19 | [#59]: https://github.com/RustCrypto/asm-hashes/pull/59 20 | 21 | ## 0.5.1 (2021-07-16) 22 | ### Fixed 23 | - Builds on iOS/macOS ([#38]) 24 | 25 | [#38]: https://github.com/RustCrypto/asm-hashes/pull/38 26 | 27 | ## 0.5.0 (2021-02-19) 28 | 29 | ## 0.4.4 30 | 31 | ## 0.4.3 32 | 33 | ## 0.4.2 34 | 35 | ## 0.4.1 36 | 37 | ## 0.4.0 38 | 39 | ## 0.3.0 40 | 41 | ## 0.2.1 42 | 43 | ## 0.2.0 44 | 45 | ## 0.1.0 46 | -------------------------------------------------------------------------------- /sha1/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sha1-asm" 3 | version = "0.5.3" 4 | authors = ["RustCrypto Developers"] 5 | license = "MIT" 6 | description = "Assembly implementation of SHA-1 compression function" 7 | documentation = "https://docs.rs/sha1-asm" 8 | repository = "https://github.com/RustCrypto/asm-hashes" 9 | keywords = ["crypto", "sha1", "asm"] 10 | categories = ["cryptography", "no-std"] 11 | edition = "2018" 12 | 13 | [build-dependencies] 14 | cc = "1.0" 15 | -------------------------------------------------------------------------------- /sha1/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Project Nayuki 2 | Copyright (c) 2017 Artyom Pavlov 3 | 4 | Permission is hereby granted, free of charge, to any 5 | person obtaining a copy of this software and associated 6 | documentation files (the "Software"), to deal in the 7 | Software without restriction, including without 8 | limitation the rights to use, copy, modify, merge, 9 | publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software 11 | is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice 15 | shall be included in all copies or substantial portions 16 | of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 19 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 20 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 21 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 22 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 25 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 | DEALINGS IN THE SOFTWARE. 27 | -------------------------------------------------------------------------------- /sha1/benches/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(test)] 3 | 4 | extern crate test; 5 | 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn bench_compress(b: &mut Bencher) { 10 | let mut state = Default::default(); 11 | let data = [[0u8; 64]]; 12 | 13 | b.iter(|| { 14 | sha1_asm::compress(&mut state, &data); 15 | }); 16 | 17 | b.bytes = data.len() as u64; 18 | } 19 | -------------------------------------------------------------------------------- /sha1/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); 3 | let target_vendor = std::env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); 4 | 5 | let asm_path = if target_arch == "x86" { 6 | "src/x86.S" 7 | } else if target_arch == "x86_64" { 8 | "src/x64.S" 9 | } else if target_arch == "aarch64" && target_vendor == "apple" { 10 | "src/aarch64_apple.S" 11 | } else if target_arch == "aarch64" { 12 | "src/aarch64.S" 13 | } else { 14 | panic!("Unsupported target architecture: {}", target_arch); 15 | }; 16 | let mut build = cc::Build::new(); 17 | if target_arch == "aarch64" { 18 | build.flag("-march=armv8-a+crypto"); 19 | } 20 | build.flag("-c").file(asm_path).compile("sha1"); 21 | } 22 | -------------------------------------------------------------------------------- /sha1/src/aarch64.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-1 hash in AArch64 assembly 3 | * 4 | * Copyright (c) 2020 Emmanuel Gil Peyrot . (MIT License) 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | * this software and associated documentation files (the "Software"), to deal in 8 | * the Software without restriction, including without limitation the rights to 9 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | * the Software, and to permit persons to whom the Software is furnished to do so, 11 | * subject to the following conditions: 12 | * - The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * - The Software is provided "as is", without warranty of any kind, express or 15 | * implied, including but not limited to the warranties of merchantability, 16 | * fitness for a particular purpose and noninfringement. In no event shall the 17 | * authors or copyright holders be liable for any claim, damages or other 18 | * liability, whether in an action of contract, tort or otherwise, arising from, 19 | * out of or in connection with the Software or the use or other dealings in the 20 | * Software. 21 | */ 22 | 23 | 24 | /* void sha1_compress(uint32_t state[5], const uint8_t block[64]) */ 25 | .global sha1_compress 26 | sha1_compress: 27 | /* 28 | * Storage usage: 29 | * Bytes Location Description 30 | * 4 x0 state argument 31 | * 4 x1 block argument 32 | * 16 q0 W0 33 | * 16 q1 W1 34 | * 16 q2 W2 35 | * 16 q3 W3 36 | * 16 q4 k 37 | * 16 q5 Original ABCD 38 | * 16 q6 ABCD (with s3 being A) 39 | * 4 s16 E 40 | * 4 s17 e0 41 | * 4 s18 e1 42 | * 16 q19 wk 43 | */ 44 | 45 | // Load state in registers 46 | ldr q5, [x0] 47 | ldr s16, [x0, 16] 48 | mov v6.16b, v5.16b 49 | 50 | // Load block in registers 51 | ldr q0, [x1] 52 | ldr q1, [x1, 16] 53 | ldr q2, [x1, 32] 54 | ldr q3, [x1, 48] 55 | 56 | // TODO: only do that on little endian 57 | rev32 v0.16b, v0.16b 58 | rev32 v1.16b, v1.16b 59 | rev32 v2.16b, v2.16b 60 | rev32 v3.16b, v3.16b 61 | 62 | // k for the next five rounds 63 | adrp x1, .K0 64 | ldr q4, [x1, #:lo12:.K0] 65 | 66 | // 0 67 | sha1h s18, s6 68 | add v19.4s, v0.4s, v4.4s 69 | sha1c q6, s16, v19.4s 70 | sha1su0 v0.4s, v1.4s, v2.4s 71 | 72 | // 1 73 | sha1h s17, s6 74 | add v19.4s, v1.4s, v4.4s 75 | sha1c q6, s18, v19.4s 76 | sha1su1 v0.4s, v3.4s 77 | sha1su0 v1.4s, v2.4s, v3.4s 78 | 79 | // 2 80 | sha1h s18, s6 81 | add v19.4s, v2.4s, v4.4s 82 | sha1c q6, s17, v19.4s 83 | sha1su1 v1.4s, v0.4s 84 | sha1su0 v2.4s, v3.4s, v0.4s 85 | 86 | // 3 87 | sha1h s17, s6 88 | add v19.4s, v3.4s, v4.4s 89 | sha1c q6, s18, v19.4s 90 | sha1su1 v2.4s, v1.4s 91 | sha1su0 v3.4s, v0.4s, v1.4s 92 | 93 | // 4 94 | sha1h s18, s6 95 | add v19.4s, v0.4s, v4.4s 96 | sha1c q6, s17, v19.4s 97 | sha1su1 v3.4s, v2.4s 98 | sha1su0 v0.4s, v1.4s, v2.4s 99 | 100 | // k for the next five rounds 101 | adrp x1, .K1 102 | ldr q4, [x1, #:lo12:.K1] 103 | 104 | // 5 105 | sha1h s17, s6 106 | add v19.4s, v1.4s, v4.4s 107 | sha1p q6, s18, v19.4s 108 | sha1su1 v0.4s, v3.4s 109 | sha1su0 v1.4s, v2.4s, v3.4s 110 | 111 | // 6 112 | sha1h s18, s6 113 | add v19.4s, v2.4s, v4.4s 114 | sha1p q6, s17, v19.4s 115 | sha1su1 v1.4s, v0.4s 116 | sha1su0 v2.4s, v3.4s, v0.4s 117 | 118 | // 7 119 | sha1h s17, s6 120 | add v19.4s, v3.4s, v4.4s 121 | sha1p q6, s18, v19.4s 122 | sha1su1 v2.4s, v1.4s 123 | sha1su0 v3.4s, v0.4s, v1.4s 124 | 125 | // 8 126 | sha1h s18, s6 127 | add v19.4s, v0.4s, v4.4s 128 | sha1p q6, s17, v19.4s 129 | sha1su1 v3.4s, v2.4s 130 | sha1su0 v0.4s, v1.4s, v2.4s 131 | 132 | // 9 133 | sha1h s17, s6 134 | add v19.4s, v1.4s, v4.4s 135 | sha1p q6, s18, v19.4s 136 | sha1su1 v0.4s, v3.4s 137 | sha1su0 v1.4s, v2.4s, v3.4s 138 | 139 | // k for the next five rounds 140 | adrp x1, .K2 141 | ldr q4, [x1, #:lo12:.K2] 142 | 143 | // 10 144 | sha1h s18, s6 145 | add v19.4s, v2.4s, v4.4s 146 | sha1m q6, s17, v19.4s 147 | sha1su1 v1.4s, v0.4s 148 | sha1su0 v2.4s, v3.4s, v0.4s 149 | 150 | // 11 151 | sha1h s17, s6 152 | add v19.4s, v3.4s, v4.4s 153 | sha1m q6, s18, v19.4s 154 | sha1su1 v2.4s, v1.4s 155 | sha1su0 v3.4s, v0.4s, v1.4s 156 | 157 | // 12 158 | sha1h s18, s6 159 | add v19.4s, v0.4s, v4.4s 160 | sha1m q6, s17, v19.4s 161 | sha1su1 v3.4s, v2.4s 162 | sha1su0 v0.4s, v1.4s, v2.4s 163 | 164 | // 13 165 | sha1h s17, s6 166 | add v19.4s, v1.4s, v4.4s 167 | sha1m q6, s18, v19.4s 168 | sha1su1 v0.4s, v3.4s 169 | sha1su0 v1.4s, v2.4s, v3.4s 170 | 171 | // 14 172 | sha1h s18, s6 173 | add v19.4s, v2.4s, v4.4s 174 | sha1m q6, s17, v19.4s 175 | sha1su1 v1.4s, v0.4s 176 | sha1su0 v2.4s, v3.4s, v0.4s 177 | 178 | // k for the next five rounds 179 | adrp x1, .K3 180 | ldr q4, [x1, #:lo12:.K3] 181 | 182 | // 15 183 | sha1h s17, s6 184 | add v19.4s, v3.4s, v4.4s 185 | sha1p q6, s18, v19.4s 186 | sha1su1 v2.4s, v1.4s 187 | sha1su0 v3.4s, v0.4s, v1.4s 188 | 189 | // 16 190 | sha1h s18, s6 191 | add v19.4s, v0.4s, v4.4s 192 | sha1p q6, s17, v19.4s 193 | sha1su1 v3.4s, v2.4s 194 | 195 | // 17 196 | sha1h s17, s6 197 | add v19.4s, v1.4s, v4.4s 198 | sha1p q6, s18, v19.4s 199 | 200 | // 18 201 | sha1h s18, s6 202 | add v19.4s, v2.4s, v4.4s 203 | sha1p q6, s17, v19.4s 204 | 205 | // 19 206 | sha1h s17, s6 207 | add v19.4s, v3.4s, v4.4s 208 | sha1p q6, s18, v19.4s 209 | 210 | // Update state 211 | add v6.4s, v6.4s, v5.4s 212 | str q6, [x0] 213 | add v16.2s, v16.2s, v17.2s 214 | str s16, [x0, 16] 215 | 216 | ret 217 | .align 4 218 | .K0: 219 | .word 0x5A827999 220 | .word 0x5A827999 221 | .word 0x5A827999 222 | .word 0x5A827999 223 | .K1: 224 | .word 0x6ED9EBA1 225 | .word 0x6ED9EBA1 226 | .word 0x6ED9EBA1 227 | .word 0x6ED9EBA1 228 | .K2: 229 | .word 0x8F1BBCDC 230 | .word 0x8F1BBCDC 231 | .word 0x8F1BBCDC 232 | .word 0x8F1BBCDC 233 | .K3: 234 | .word 0xCA62C1D6 235 | .word 0xCA62C1D6 236 | .word 0xCA62C1D6 237 | .word 0xCA62C1D6 238 | -------------------------------------------------------------------------------- /sha1/src/aarch64_apple.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-1 hash in AArch64 assembly 3 | * 4 | * Copyright (c) 2020 Emmanuel Gil Peyrot . (MIT License) 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | * this software and associated documentation files (the "Software"), to deal in 8 | * the Software without restriction, including without limitation the rights to 9 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | * the Software, and to permit persons to whom the Software is furnished to do so, 11 | * subject to the following conditions: 12 | * - The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * - The Software is provided "as is", without warranty of any kind, express or 15 | * implied, including but not limited to the warranties of merchantability, 16 | * fitness for a particular purpose and noninfringement. In no event shall the 17 | * authors or copyright holders be liable for any claim, damages or other 18 | * liability, whether in an action of contract, tort or otherwise, arising from, 19 | * out of or in connection with the Software or the use or other dealings in the 20 | * Software. 21 | */ 22 | 23 | 24 | /* void sha1_compress(uint32_t state[5], const uint8_t block[64]) */ 25 | .global _sha1_compress 26 | _sha1_compress: 27 | /* 28 | * Storage usage: 29 | * Bytes Location Description 30 | * 4 x0 state argument 31 | * 4 x1 block argument 32 | * 16 q0 W0 33 | * 16 q1 W1 34 | * 16 q2 W2 35 | * 16 q3 W3 36 | * 16 q4 k 37 | * 16 q5 Original ABCD 38 | * 16 q6 ABCD (with s3 being A) 39 | * 4 s16 E 40 | * 4 s17 e0 41 | * 4 s18 e1 42 | * 16 q19 wk 43 | */ 44 | 45 | // Load state in registers 46 | ldr q5, [x0] 47 | ldr s16, [x0, 16] 48 | mov v6.16b, v5.16b 49 | 50 | // Load block in registers 51 | ldr q0, [x1] 52 | ldr q1, [x1, 16] 53 | ldr q2, [x1, 32] 54 | ldr q3, [x1, 48] 55 | 56 | // TODO: only do that on little endian 57 | rev32 v0.16b, v0.16b 58 | rev32 v1.16b, v1.16b 59 | rev32 v2.16b, v2.16b 60 | rev32 v3.16b, v3.16b 61 | 62 | // k for the next five rounds 63 | adrp x1, .K0@PAGE 64 | ldr q4, [x1, #:lo12:.K0@PAGEOFF] 65 | 66 | // 0 67 | sha1h s18, s6 68 | add v19.4s, v0.4s, v4.4s 69 | sha1c q6, s16, v19.4s 70 | sha1su0 v0.4s, v1.4s, v2.4s 71 | 72 | // 1 73 | sha1h s17, s6 74 | add v19.4s, v1.4s, v4.4s 75 | sha1c q6, s18, v19.4s 76 | sha1su1 v0.4s, v3.4s 77 | sha1su0 v1.4s, v2.4s, v3.4s 78 | 79 | // 2 80 | sha1h s18, s6 81 | add v19.4s, v2.4s, v4.4s 82 | sha1c q6, s17, v19.4s 83 | sha1su1 v1.4s, v0.4s 84 | sha1su0 v2.4s, v3.4s, v0.4s 85 | 86 | // 3 87 | sha1h s17, s6 88 | add v19.4s, v3.4s, v4.4s 89 | sha1c q6, s18, v19.4s 90 | sha1su1 v2.4s, v1.4s 91 | sha1su0 v3.4s, v0.4s, v1.4s 92 | 93 | // 4 94 | sha1h s18, s6 95 | add v19.4s, v0.4s, v4.4s 96 | sha1c q6, s17, v19.4s 97 | sha1su1 v3.4s, v2.4s 98 | sha1su0 v0.4s, v1.4s, v2.4s 99 | 100 | // k for the next five rounds 101 | adrp x1, .K1@PAGE 102 | ldr q4, [x1, #:lo12:.K1@PAGEOFF] 103 | 104 | // 5 105 | sha1h s17, s6 106 | add v19.4s, v1.4s, v4.4s 107 | sha1p q6, s18, v19.4s 108 | sha1su1 v0.4s, v3.4s 109 | sha1su0 v1.4s, v2.4s, v3.4s 110 | 111 | // 6 112 | sha1h s18, s6 113 | add v19.4s, v2.4s, v4.4s 114 | sha1p q6, s17, v19.4s 115 | sha1su1 v1.4s, v0.4s 116 | sha1su0 v2.4s, v3.4s, v0.4s 117 | 118 | // 7 119 | sha1h s17, s6 120 | add v19.4s, v3.4s, v4.4s 121 | sha1p q6, s18, v19.4s 122 | sha1su1 v2.4s, v1.4s 123 | sha1su0 v3.4s, v0.4s, v1.4s 124 | 125 | // 8 126 | sha1h s18, s6 127 | add v19.4s, v0.4s, v4.4s 128 | sha1p q6, s17, v19.4s 129 | sha1su1 v3.4s, v2.4s 130 | sha1su0 v0.4s, v1.4s, v2.4s 131 | 132 | // 9 133 | sha1h s17, s6 134 | add v19.4s, v1.4s, v4.4s 135 | sha1p q6, s18, v19.4s 136 | sha1su1 v0.4s, v3.4s 137 | sha1su0 v1.4s, v2.4s, v3.4s 138 | 139 | // k for the next five rounds 140 | adrp x1, .K2@PAGE 141 | ldr q4, [x1, #:lo12:.K2@PAGEOFF] 142 | 143 | // 10 144 | sha1h s18, s6 145 | add v19.4s, v2.4s, v4.4s 146 | sha1m q6, s17, v19.4s 147 | sha1su1 v1.4s, v0.4s 148 | sha1su0 v2.4s, v3.4s, v0.4s 149 | 150 | // 11 151 | sha1h s17, s6 152 | add v19.4s, v3.4s, v4.4s 153 | sha1m q6, s18, v19.4s 154 | sha1su1 v2.4s, v1.4s 155 | sha1su0 v3.4s, v0.4s, v1.4s 156 | 157 | // 12 158 | sha1h s18, s6 159 | add v19.4s, v0.4s, v4.4s 160 | sha1m q6, s17, v19.4s 161 | sha1su1 v3.4s, v2.4s 162 | sha1su0 v0.4s, v1.4s, v2.4s 163 | 164 | // 13 165 | sha1h s17, s6 166 | add v19.4s, v1.4s, v4.4s 167 | sha1m q6, s18, v19.4s 168 | sha1su1 v0.4s, v3.4s 169 | sha1su0 v1.4s, v2.4s, v3.4s 170 | 171 | // 14 172 | sha1h s18, s6 173 | add v19.4s, v2.4s, v4.4s 174 | sha1m q6, s17, v19.4s 175 | sha1su1 v1.4s, v0.4s 176 | sha1su0 v2.4s, v3.4s, v0.4s 177 | 178 | // k for the next five rounds 179 | adrp x1, .K3@PAGE 180 | ldr q4, [x1, #:lo12:.K3@PAGEOFF] 181 | 182 | // 15 183 | sha1h s17, s6 184 | add v19.4s, v3.4s, v4.4s 185 | sha1p q6, s18, v19.4s 186 | sha1su1 v2.4s, v1.4s 187 | sha1su0 v3.4s, v0.4s, v1.4s 188 | 189 | // 16 190 | sha1h s18, s6 191 | add v19.4s, v0.4s, v4.4s 192 | sha1p q6, s17, v19.4s 193 | sha1su1 v3.4s, v2.4s 194 | 195 | // 17 196 | sha1h s17, s6 197 | add v19.4s, v1.4s, v4.4s 198 | sha1p q6, s18, v19.4s 199 | 200 | // 18 201 | sha1h s18, s6 202 | add v19.4s, v2.4s, v4.4s 203 | sha1p q6, s17, v19.4s 204 | 205 | // 19 206 | sha1h s17, s6 207 | add v19.4s, v3.4s, v4.4s 208 | sha1p q6, s18, v19.4s 209 | 210 | // Update state 211 | add v6.4s, v6.4s, v5.4s 212 | str q6, [x0] 213 | add v16.2s, v16.2s, v17.2s 214 | str s16, [x0, 16] 215 | 216 | ret 217 | .align 4 218 | .K0: 219 | .word 0x5A827999 220 | .word 0x5A827999 221 | .word 0x5A827999 222 | .word 0x5A827999 223 | .K1: 224 | .word 0x6ED9EBA1 225 | .word 0x6ED9EBA1 226 | .word 0x6ED9EBA1 227 | .word 0x6ED9EBA1 228 | .K2: 229 | .word 0x8F1BBCDC 230 | .word 0x8F1BBCDC 231 | .word 0x8F1BBCDC 232 | .word 0x8F1BBCDC 233 | .K3: 234 | .word 0xCA62C1D6 235 | .word 0xCA62C1D6 236 | .word 0xCA62C1D6 237 | .word 0xCA62C1D6 238 | -------------------------------------------------------------------------------- /sha1/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Assembly implementation of the [SHA-1] compression function. 2 | //! 3 | //! This crate is not intended for direct use, most users should 4 | //! prefer the [`sha-1`] crate with enabled `asm` feature instead. 5 | //! 6 | //! Only x86, x86-64, and AArch64 architectures are currently supported. 7 | //! 8 | //! [SHA-1]: https://en.wikipedia.org/wiki/SHA-1 9 | //! [`sha-1`]: https://crates.io/crates/sha-1 10 | 11 | #![no_std] 12 | #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))] 13 | compile_error!("crate can only be used on x86, x86_64 and AArch64 architectures"); 14 | 15 | #[cfg(target_os = "windows")] 16 | compile_error!("crate does not support Windows targets"); 17 | 18 | #[link(name = "sha1", kind = "static")] 19 | extern "C" { 20 | fn sha1_compress(state: &mut [u32; 5], block: &[u8; 64]); 21 | } 22 | 23 | /// Safe wrapper around assembly implementation of SHA-1 compression function 24 | #[inline] 25 | pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) { 26 | for block in blocks { 27 | unsafe { 28 | sha1_compress(state, block); 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /sha1/src/x64.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-1 hash in x86-64 assembly 3 | * 4 | * Copyright (c) 2015 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-sha1-hash-implementation-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void sha1_compress(uint32_t state[5], const uint8_t block[64]) */ 26 | #ifdef __APPLE__ 27 | .globl _sha1_compress 28 | _sha1_compress: 29 | #else 30 | .globl sha1_compress 31 | sha1_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 4 eax SHA-1 state variable A 37 | * 4 ebx SHA-1 state variable B 38 | * 4 ecx SHA-1 state variable C 39 | * 4 edx SHA-1 state variable D 40 | * 4 ebp SHA-1 state variable E 41 | * 4 esi Temporary for calculation per round 42 | * 4 edi (Last 64 rounds) temporary for calculation per round 43 | * 8 rdi (First 16 rounds) base address of block array argument (read-only) 44 | * 8 r8 Base address of state array argument (read-only) 45 | * 8 rsp x86-64 stack pointer 46 | * 64 [rsp+0] Circular buffer of most recent 16 key schedule items, 4 bytes each 47 | * 16 xmm0 Caller's value of rbx (only low 64 bits are used) 48 | * 16 xmm1 Caller's value of rbp (only low 64 bits are used) 49 | */ 50 | 51 | #define ROUND0a(a, b, c, d, e, i) \ 52 | movl (i*4)(%rdi), %esi; \ 53 | bswapl %esi; \ 54 | movl %esi, (i*4)(%rsp); \ 55 | addl %esi, %e; \ 56 | movl %c, %esi; \ 57 | xorl %d, %esi; \ 58 | andl %b, %esi; \ 59 | xorl %d, %esi; \ 60 | ROUNDTAIL(a, b, e, i, 0x5A827999) 61 | 62 | #define SCHEDULE(i, e) \ 63 | movl (((i- 3)&0xF)*4)(%rsp), %esi; \ 64 | xorl (((i- 8)&0xF)*4)(%rsp), %esi; \ 65 | xorl (((i-14)&0xF)*4)(%rsp), %esi; \ 66 | xorl (((i-16)&0xF)*4)(%rsp), %esi; \ 67 | roll $1, %esi; \ 68 | addl %esi, %e; \ 69 | movl %esi, ((i&0xF)*4)(%rsp); 70 | 71 | #define ROUND0b(a, b, c, d, e, i) \ 72 | SCHEDULE(i, e) \ 73 | movl %c, %esi; \ 74 | xorl %d, %esi; \ 75 | andl %b, %esi; \ 76 | xorl %d, %esi; \ 77 | ROUNDTAIL(a, b, e, i, 0x5A827999) 78 | 79 | #define ROUND1(a, b, c, d, e, i) \ 80 | SCHEDULE(i, e) \ 81 | movl %b, %esi; \ 82 | xorl %c, %esi; \ 83 | xorl %d, %esi; \ 84 | ROUNDTAIL(a, b, e, i, 0x6ED9EBA1) 85 | 86 | #define ROUND2(a, b, c, d, e, i) \ 87 | SCHEDULE(i, e) \ 88 | movl %c, %esi; \ 89 | movl %c, %edi; \ 90 | orl %d, %esi; \ 91 | andl %b, %esi; \ 92 | andl %d, %edi; \ 93 | orl %edi, %esi; \ 94 | ROUNDTAIL(a, b, e, i, -0x70E44324) 95 | 96 | #define ROUND3(a, b, c, d, e, i) \ 97 | SCHEDULE(i, e) \ 98 | movl %b, %esi; \ 99 | xorl %c, %esi; \ 100 | xorl %d, %esi; \ 101 | ROUNDTAIL(a, b, e, i, -0x359D3E2A) 102 | 103 | #define ROUNDTAIL(a, b, e, i, k) \ 104 | roll $30, %b; \ 105 | leal k(%e,%esi), %e; \ 106 | movl %a, %esi; \ 107 | roll $5, %esi; \ 108 | addl %esi, %e; 109 | 110 | /* Save registers, allocate scratch space */ 111 | movq %rbx, %xmm0 112 | movq %rbp, %xmm1 113 | subq $64, %rsp 114 | 115 | /* Load arguments */ 116 | movq %rdi, %r8 117 | movl 0(%rdi), %eax /* a */ 118 | movl 4(%rdi), %ebx /* b */ 119 | movl 8(%rdi), %ecx /* c */ 120 | movl 12(%rdi), %edx /* d */ 121 | movl 16(%rdi), %ebp /* e */ 122 | movq %rsi, %rdi 123 | 124 | /* 80 rounds of hashing */ 125 | ROUND0a(eax, ebx, ecx, edx, ebp, 0) 126 | ROUND0a(ebp, eax, ebx, ecx, edx, 1) 127 | ROUND0a(edx, ebp, eax, ebx, ecx, 2) 128 | ROUND0a(ecx, edx, ebp, eax, ebx, 3) 129 | ROUND0a(ebx, ecx, edx, ebp, eax, 4) 130 | ROUND0a(eax, ebx, ecx, edx, ebp, 5) 131 | ROUND0a(ebp, eax, ebx, ecx, edx, 6) 132 | ROUND0a(edx, ebp, eax, ebx, ecx, 7) 133 | ROUND0a(ecx, edx, ebp, eax, ebx, 8) 134 | ROUND0a(ebx, ecx, edx, ebp, eax, 9) 135 | ROUND0a(eax, ebx, ecx, edx, ebp, 10) 136 | ROUND0a(ebp, eax, ebx, ecx, edx, 11) 137 | ROUND0a(edx, ebp, eax, ebx, ecx, 12) 138 | ROUND0a(ecx, edx, ebp, eax, ebx, 13) 139 | ROUND0a(ebx, ecx, edx, ebp, eax, 14) 140 | ROUND0a(eax, ebx, ecx, edx, ebp, 15) 141 | ROUND0b(ebp, eax, ebx, ecx, edx, 16) 142 | ROUND0b(edx, ebp, eax, ebx, ecx, 17) 143 | ROUND0b(ecx, edx, ebp, eax, ebx, 18) 144 | ROUND0b(ebx, ecx, edx, ebp, eax, 19) 145 | ROUND1(eax, ebx, ecx, edx, ebp, 20) 146 | ROUND1(ebp, eax, ebx, ecx, edx, 21) 147 | ROUND1(edx, ebp, eax, ebx, ecx, 22) 148 | ROUND1(ecx, edx, ebp, eax, ebx, 23) 149 | ROUND1(ebx, ecx, edx, ebp, eax, 24) 150 | ROUND1(eax, ebx, ecx, edx, ebp, 25) 151 | ROUND1(ebp, eax, ebx, ecx, edx, 26) 152 | ROUND1(edx, ebp, eax, ebx, ecx, 27) 153 | ROUND1(ecx, edx, ebp, eax, ebx, 28) 154 | ROUND1(ebx, ecx, edx, ebp, eax, 29) 155 | ROUND1(eax, ebx, ecx, edx, ebp, 30) 156 | ROUND1(ebp, eax, ebx, ecx, edx, 31) 157 | ROUND1(edx, ebp, eax, ebx, ecx, 32) 158 | ROUND1(ecx, edx, ebp, eax, ebx, 33) 159 | ROUND1(ebx, ecx, edx, ebp, eax, 34) 160 | ROUND1(eax, ebx, ecx, edx, ebp, 35) 161 | ROUND1(ebp, eax, ebx, ecx, edx, 36) 162 | ROUND1(edx, ebp, eax, ebx, ecx, 37) 163 | ROUND1(ecx, edx, ebp, eax, ebx, 38) 164 | ROUND1(ebx, ecx, edx, ebp, eax, 39) 165 | ROUND2(eax, ebx, ecx, edx, ebp, 40) 166 | ROUND2(ebp, eax, ebx, ecx, edx, 41) 167 | ROUND2(edx, ebp, eax, ebx, ecx, 42) 168 | ROUND2(ecx, edx, ebp, eax, ebx, 43) 169 | ROUND2(ebx, ecx, edx, ebp, eax, 44) 170 | ROUND2(eax, ebx, ecx, edx, ebp, 45) 171 | ROUND2(ebp, eax, ebx, ecx, edx, 46) 172 | ROUND2(edx, ebp, eax, ebx, ecx, 47) 173 | ROUND2(ecx, edx, ebp, eax, ebx, 48) 174 | ROUND2(ebx, ecx, edx, ebp, eax, 49) 175 | ROUND2(eax, ebx, ecx, edx, ebp, 50) 176 | ROUND2(ebp, eax, ebx, ecx, edx, 51) 177 | ROUND2(edx, ebp, eax, ebx, ecx, 52) 178 | ROUND2(ecx, edx, ebp, eax, ebx, 53) 179 | ROUND2(ebx, ecx, edx, ebp, eax, 54) 180 | ROUND2(eax, ebx, ecx, edx, ebp, 55) 181 | ROUND2(ebp, eax, ebx, ecx, edx, 56) 182 | ROUND2(edx, ebp, eax, ebx, ecx, 57) 183 | ROUND2(ecx, edx, ebp, eax, ebx, 58) 184 | ROUND2(ebx, ecx, edx, ebp, eax, 59) 185 | ROUND3(eax, ebx, ecx, edx, ebp, 60) 186 | ROUND3(ebp, eax, ebx, ecx, edx, 61) 187 | ROUND3(edx, ebp, eax, ebx, ecx, 62) 188 | ROUND3(ecx, edx, ebp, eax, ebx, 63) 189 | ROUND3(ebx, ecx, edx, ebp, eax, 64) 190 | ROUND3(eax, ebx, ecx, edx, ebp, 65) 191 | ROUND3(ebp, eax, ebx, ecx, edx, 66) 192 | ROUND3(edx, ebp, eax, ebx, ecx, 67) 193 | ROUND3(ecx, edx, ebp, eax, ebx, 68) 194 | ROUND3(ebx, ecx, edx, ebp, eax, 69) 195 | ROUND3(eax, ebx, ecx, edx, ebp, 70) 196 | ROUND3(ebp, eax, ebx, ecx, edx, 71) 197 | ROUND3(edx, ebp, eax, ebx, ecx, 72) 198 | ROUND3(ecx, edx, ebp, eax, ebx, 73) 199 | ROUND3(ebx, ecx, edx, ebp, eax, 74) 200 | ROUND3(eax, ebx, ecx, edx, ebp, 75) 201 | ROUND3(ebp, eax, ebx, ecx, edx, 76) 202 | ROUND3(edx, ebp, eax, ebx, ecx, 77) 203 | ROUND3(ecx, edx, ebp, eax, ebx, 78) 204 | ROUND3(ebx, ecx, edx, ebp, eax, 79) 205 | 206 | /* Save updated state */ 207 | addl %eax, 0(%r8) 208 | addl %ebx, 4(%r8) 209 | addl %ecx, 8(%r8) 210 | addl %edx, 12(%r8) 211 | addl %ebp, 16(%r8) 212 | 213 | /* Restore registers */ 214 | movq %xmm0, %rbx 215 | movq %xmm1, %rbp 216 | addq $64, %rsp 217 | retq 218 | -------------------------------------------------------------------------------- /sha1/src/x86.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-1 hash in x86 assembly 3 | * 4 | * Copyright (c) 2014 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-sha1-hash-implementation-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void sha1_compress(uint32_t state[5], const uint8_t block[64]) */ 26 | #if defined(__APPLE__) || defined(_WIN32) 27 | .globl _sha1_compress 28 | _sha1_compress: 29 | #else 30 | .globl sha1_compress 31 | sha1_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 4 eax SHA-1 state variable A 37 | * 4 ebx SHA-1 state variable B 38 | * 4 ecx SHA-1 state variable C 39 | * 4 edx SHA-1 state variable D 40 | * 4 ebp SHA-1 state variable E 41 | * 4 esi Temporary for calculation per round 42 | * 4 edi (First 16 rounds) base address of block array argument (read-only); (last 64 rounds) temporary for calculation per round 43 | * 4 esp x86 stack pointer 44 | * 64 [esp+ 0] Circular buffer of most recent 16 key schedule items, 4 bytes each 45 | * 4 [esp+64] Caller's value of ebx 46 | * 4 [esp+68] Caller's value of esi 47 | * 4 [esp+72] Caller's value of edi 48 | * 4 [esp+76] Caller's value of ebp 49 | */ 50 | 51 | #define ROUND0a(a, b, c, d, e, i) \ 52 | movl (i*4)(%edi), %esi; \ 53 | bswapl %esi; \ 54 | movl %esi, (i*4)(%esp); \ 55 | addl %esi, %e; \ 56 | movl %c, %esi; \ 57 | xorl %d, %esi; \ 58 | andl %b, %esi; \ 59 | xorl %d, %esi; \ 60 | ROUNDTAIL(a, b, e, i, 0x5A827999) 61 | 62 | #define SCHEDULE(i, e) \ 63 | movl (((i- 3)&0xF)*4)(%esp), %esi; \ 64 | xorl (((i- 8)&0xF)*4)(%esp), %esi; \ 65 | xorl (((i-14)&0xF)*4)(%esp), %esi; \ 66 | xorl (((i-16)&0xF)*4)(%esp), %esi; \ 67 | roll $1, %esi; \ 68 | addl %esi, %e; \ 69 | movl %esi, ((i&0xF)*4)(%esp); 70 | 71 | #define ROUND0b(a, b, c, d, e, i) \ 72 | SCHEDULE(i, e) \ 73 | movl %c, %esi; \ 74 | xorl %d, %esi; \ 75 | andl %b, %esi; \ 76 | xorl %d, %esi; \ 77 | ROUNDTAIL(a, b, e, i, 0x5A827999) 78 | 79 | #define ROUND1(a, b, c, d, e, i) \ 80 | SCHEDULE(i, e) \ 81 | movl %b, %esi; \ 82 | xorl %c, %esi; \ 83 | xorl %d, %esi; \ 84 | ROUNDTAIL(a, b, e, i, 0x6ED9EBA1) 85 | 86 | #define ROUND2(a, b, c, d, e, i) \ 87 | SCHEDULE(i, e) \ 88 | movl %c, %esi; \ 89 | movl %c, %edi; \ 90 | orl %d, %esi; \ 91 | andl %b, %esi; \ 92 | andl %d, %edi; \ 93 | orl %edi, %esi; \ 94 | ROUNDTAIL(a, b, e, i, 0x8F1BBCDC) 95 | 96 | #define ROUND3(a, b, c, d, e, i) \ 97 | SCHEDULE(i, e) \ 98 | movl %b, %esi; \ 99 | xorl %c, %esi; \ 100 | xorl %d, %esi; \ 101 | ROUNDTAIL(a, b, e, i, 0xCA62C1D6) 102 | 103 | #define ROUNDTAIL(a, b, e, i, k) \ 104 | roll $30, %b; \ 105 | leal k(%e,%esi), %e; \ 106 | movl %a, %esi; \ 107 | roll $5, %esi; \ 108 | addl %esi, %e; 109 | 110 | /* Save registers */ 111 | subl $80, %esp 112 | movl %ebx, 64(%esp) 113 | movl %esi, 68(%esp) 114 | movl %edi, 72(%esp) 115 | movl %ebp, 76(%esp) 116 | 117 | /* Load arguments */ 118 | movl 84(%esp), %esi /* state */ 119 | movl 88(%esp), %edi /* block */ 120 | movl 0(%esi), %eax /* a */ 121 | movl 4(%esi), %ebx /* b */ 122 | movl 8(%esi), %ecx /* c */ 123 | movl 12(%esi), %edx /* d */ 124 | movl 16(%esi), %ebp /* e */ 125 | 126 | /* 80 rounds of hashing */ 127 | ROUND0a(eax, ebx, ecx, edx, ebp, 0) 128 | ROUND0a(ebp, eax, ebx, ecx, edx, 1) 129 | ROUND0a(edx, ebp, eax, ebx, ecx, 2) 130 | ROUND0a(ecx, edx, ebp, eax, ebx, 3) 131 | ROUND0a(ebx, ecx, edx, ebp, eax, 4) 132 | ROUND0a(eax, ebx, ecx, edx, ebp, 5) 133 | ROUND0a(ebp, eax, ebx, ecx, edx, 6) 134 | ROUND0a(edx, ebp, eax, ebx, ecx, 7) 135 | ROUND0a(ecx, edx, ebp, eax, ebx, 8) 136 | ROUND0a(ebx, ecx, edx, ebp, eax, 9) 137 | ROUND0a(eax, ebx, ecx, edx, ebp, 10) 138 | ROUND0a(ebp, eax, ebx, ecx, edx, 11) 139 | ROUND0a(edx, ebp, eax, ebx, ecx, 12) 140 | ROUND0a(ecx, edx, ebp, eax, ebx, 13) 141 | ROUND0a(ebx, ecx, edx, ebp, eax, 14) 142 | ROUND0a(eax, ebx, ecx, edx, ebp, 15) 143 | ROUND0b(ebp, eax, ebx, ecx, edx, 16) 144 | ROUND0b(edx, ebp, eax, ebx, ecx, 17) 145 | ROUND0b(ecx, edx, ebp, eax, ebx, 18) 146 | ROUND0b(ebx, ecx, edx, ebp, eax, 19) 147 | ROUND1(eax, ebx, ecx, edx, ebp, 20) 148 | ROUND1(ebp, eax, ebx, ecx, edx, 21) 149 | ROUND1(edx, ebp, eax, ebx, ecx, 22) 150 | ROUND1(ecx, edx, ebp, eax, ebx, 23) 151 | ROUND1(ebx, ecx, edx, ebp, eax, 24) 152 | ROUND1(eax, ebx, ecx, edx, ebp, 25) 153 | ROUND1(ebp, eax, ebx, ecx, edx, 26) 154 | ROUND1(edx, ebp, eax, ebx, ecx, 27) 155 | ROUND1(ecx, edx, ebp, eax, ebx, 28) 156 | ROUND1(ebx, ecx, edx, ebp, eax, 29) 157 | ROUND1(eax, ebx, ecx, edx, ebp, 30) 158 | ROUND1(ebp, eax, ebx, ecx, edx, 31) 159 | ROUND1(edx, ebp, eax, ebx, ecx, 32) 160 | ROUND1(ecx, edx, ebp, eax, ebx, 33) 161 | ROUND1(ebx, ecx, edx, ebp, eax, 34) 162 | ROUND1(eax, ebx, ecx, edx, ebp, 35) 163 | ROUND1(ebp, eax, ebx, ecx, edx, 36) 164 | ROUND1(edx, ebp, eax, ebx, ecx, 37) 165 | ROUND1(ecx, edx, ebp, eax, ebx, 38) 166 | ROUND1(ebx, ecx, edx, ebp, eax, 39) 167 | ROUND2(eax, ebx, ecx, edx, ebp, 40) 168 | ROUND2(ebp, eax, ebx, ecx, edx, 41) 169 | ROUND2(edx, ebp, eax, ebx, ecx, 42) 170 | ROUND2(ecx, edx, ebp, eax, ebx, 43) 171 | ROUND2(ebx, ecx, edx, ebp, eax, 44) 172 | ROUND2(eax, ebx, ecx, edx, ebp, 45) 173 | ROUND2(ebp, eax, ebx, ecx, edx, 46) 174 | ROUND2(edx, ebp, eax, ebx, ecx, 47) 175 | ROUND2(ecx, edx, ebp, eax, ebx, 48) 176 | ROUND2(ebx, ecx, edx, ebp, eax, 49) 177 | ROUND2(eax, ebx, ecx, edx, ebp, 50) 178 | ROUND2(ebp, eax, ebx, ecx, edx, 51) 179 | ROUND2(edx, ebp, eax, ebx, ecx, 52) 180 | ROUND2(ecx, edx, ebp, eax, ebx, 53) 181 | ROUND2(ebx, ecx, edx, ebp, eax, 54) 182 | ROUND2(eax, ebx, ecx, edx, ebp, 55) 183 | ROUND2(ebp, eax, ebx, ecx, edx, 56) 184 | ROUND2(edx, ebp, eax, ebx, ecx, 57) 185 | ROUND2(ecx, edx, ebp, eax, ebx, 58) 186 | ROUND2(ebx, ecx, edx, ebp, eax, 59) 187 | ROUND3(eax, ebx, ecx, edx, ebp, 60) 188 | ROUND3(ebp, eax, ebx, ecx, edx, 61) 189 | ROUND3(edx, ebp, eax, ebx, ecx, 62) 190 | ROUND3(ecx, edx, ebp, eax, ebx, 63) 191 | ROUND3(ebx, ecx, edx, ebp, eax, 64) 192 | ROUND3(eax, ebx, ecx, edx, ebp, 65) 193 | ROUND3(ebp, eax, ebx, ecx, edx, 66) 194 | ROUND3(edx, ebp, eax, ebx, ecx, 67) 195 | ROUND3(ecx, edx, ebp, eax, ebx, 68) 196 | ROUND3(ebx, ecx, edx, ebp, eax, 69) 197 | ROUND3(eax, ebx, ecx, edx, ebp, 70) 198 | ROUND3(ebp, eax, ebx, ecx, edx, 71) 199 | ROUND3(edx, ebp, eax, ebx, ecx, 72) 200 | ROUND3(ecx, edx, ebp, eax, ebx, 73) 201 | ROUND3(ebx, ecx, edx, ebp, eax, 74) 202 | ROUND3(eax, ebx, ecx, edx, ebp, 75) 203 | ROUND3(ebp, eax, ebx, ecx, edx, 76) 204 | ROUND3(edx, ebp, eax, ebx, ecx, 77) 205 | ROUND3(ecx, edx, ebp, eax, ebx, 78) 206 | ROUND3(ebx, ecx, edx, ebp, eax, 79) 207 | 208 | /* Save updated state */ 209 | movl 84(%esp), %esi 210 | addl %eax, 0(%esi) 211 | addl %ebx, 4(%esi) 212 | addl %ecx, 8(%esi) 213 | addl %edx, 12(%esi) 214 | addl %ebp, 16(%esi) 215 | 216 | /* Restore registers */ 217 | movl 64(%esp), %ebx 218 | movl 68(%esp), %esi 219 | movl 72(%esp), %edi 220 | movl 76(%esp), %ebp 221 | addl $80, %esp 222 | retl 223 | -------------------------------------------------------------------------------- /sha2/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## 0.6.4 (2024-05-06) 9 | ### Changed 10 | - Emit compilation error when compiled for Windows targets. ([#79]) 11 | 12 | [#79]: https://github.com/RustCrypto/asm-hashes/pull/79 13 | 14 | ## 0.6.3 (2023-08-07) 15 | ### Changed 16 | - Prefix x86 asm symbols with `_` on Windows like on Apple ([#61]) 17 | 18 | [#61]: https://github.com/RustCrypto/asm-hashes/pull/61 19 | 20 | ## 0.6.2 (2021-07-16) 21 | ### Changed 22 | - Prefix x86 asm symbols with `_` on Windows like on Apple ([#61]) 23 | - Fix deprecated use of `cc::Build::compile` ([#59]) 24 | 25 | [#61]: https://github.com/RustCrypto/asm-hashes/pull/61 26 | [#59]: https://github.com/RustCrypto/asm-hashes/pull/59 27 | 28 | ## 0.6.1 (2021-05-05) 29 | ### Added 30 | - `aarch64` implementation of SHA-256 for the M1 chip ([#35]) 31 | 32 | [#35]: https://github.com/RustCrypto/asm-hashes/pull/35 33 | 34 | ## 0.6.0 (2021-02-09) 35 | 36 | ## 0.5.5 (2021-01-25) 37 | 38 | ## 0.5.4 (2020-06-11) 39 | 40 | ## 0.5.3 (2020-01-05) 41 | 42 | ## 0.5.2 (2019-04-15) 43 | 44 | ## 0.5.1 (2018-05-15) 45 | 46 | ## 0.5.0 (2018-04-27) 47 | 48 | ## 0.4.0 (2018-03-19) 49 | 50 | ## 0.3.0 (2017-06-27) 51 | 52 | ## 0.2.1 (2017-05-09) 53 | 54 | ## 0.2.0 (2017-05-08) 55 | 56 | ## 0.1.0 (2017-05-07) 57 | -------------------------------------------------------------------------------- /sha2/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sha2-asm" 3 | version = "0.6.4" 4 | authors = ["RustCrypto Developers"] 5 | license = "MIT" 6 | description = "Assembly implementation of SHA-2 compression functions" 7 | documentation = "https://docs.rs/sha2-asm" 8 | repository = "https://github.com/RustCrypto/asm-hashes" 9 | keywords = ["crypto", "sha2", "asm"] 10 | categories = ["cryptography", "no-std"] 11 | edition = "2018" 12 | 13 | [build-dependencies] 14 | cc = "1.0" 15 | -------------------------------------------------------------------------------- /sha2/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 RustCrypto Developers 2 | Copyright (c) 2017 Project Nayuki, Artyom Pavlov 3 | 4 | Permission is hereby granted, free of charge, to any 5 | person obtaining a copy of this software and associated 6 | documentation files (the "Software"), to deal in the 7 | Software without restriction, including without 8 | limitation the rights to use, copy, modify, merge, 9 | publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software 11 | is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice 15 | shall be included in all copies or substantial portions 16 | of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 19 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 20 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 21 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 22 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 25 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 | DEALINGS IN THE SOFTWARE. 27 | -------------------------------------------------------------------------------- /sha2/benches/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(test)] 3 | 4 | extern crate test; 5 | 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn bench_compress256(b: &mut Bencher) { 10 | let mut state = Default::default(); 11 | let data = [[0u8; 64]]; 12 | 13 | b.iter(|| { 14 | sha2_asm::compress256(&mut state, &data); 15 | }); 16 | 17 | b.bytes = data.len() as u64; 18 | } 19 | 20 | #[cfg(not(target_arch = "aarch64"))] 21 | #[bench] 22 | fn bench_compress512(b: &mut Bencher) { 23 | let mut state = Default::default(); 24 | let data = [[0u8; 128]]; 25 | 26 | b.iter(|| { 27 | sha2_asm::compress512(&mut state, &data); 28 | }); 29 | 30 | b.bytes = data.len() as u64; 31 | } 32 | -------------------------------------------------------------------------------- /sha2/build.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | fn main() { 4 | let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); 5 | let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); 6 | 7 | let mut build256 = cc::Build::new(); 8 | let (sha256_path, sha512_path) = if target_arch == "x86" { 9 | ("src/sha256_x86.S", "src/sha512_x86.S") 10 | } else if target_arch == "x86_64" { 11 | ("src/sha256_x64.S", "src/sha512_x64.S") 12 | } else if target_arch == "aarch64" && target_vendor == "apple" { 13 | build256.flag("-march=armv8-a+crypto"); 14 | ("src/sha256_aarch64_apple.S", "") 15 | } else if target_arch == "aarch64" { 16 | build256.flag("-march=armv8-a+crypto"); 17 | ("src/sha256_aarch64.S", "") 18 | } else { 19 | panic!("Unsupported target architecture: {}", target_arch); 20 | }; 21 | 22 | if target_arch != "aarch64" { 23 | cc::Build::new() 24 | .flag("-c") 25 | .file(sha512_path) 26 | .compile("libsha512.a"); 27 | } 28 | build256.flag("-c").file(sha256_path).compile("sha256"); 29 | } 30 | -------------------------------------------------------------------------------- /sha2/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Assembly implementation of the [SHA-2] compression functions. 2 | //! 3 | //! This crate is not intended for direct use, most users should 4 | //! prefer the [`sha2`] crate with enabled `asm` feature instead. 5 | //! 6 | //! Only x86, x86-64, and (partially) AArch64 architectures are 7 | //! currently supported. 8 | //! 9 | //! [SHA-2]: https://en.wikipedia.org/wiki/SHA-2 10 | //! [`sha2`]: https://crates.io/crates/sha2 11 | 12 | #![no_std] 13 | #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))] 14 | compile_error!("crate can only be used on x86, x86-64 and aarch64 architectures"); 15 | 16 | #[cfg(target_os = "windows")] 17 | compile_error!("crate does not support Windows targets"); 18 | 19 | #[link(name = "sha256", kind = "static")] 20 | extern "C" { 21 | fn sha256_compress(state: &mut [u32; 8], block: &[u8; 64]); 22 | } 23 | 24 | /// Safe wrapper around assembly implementation of SHA256 compression function 25 | #[inline] 26 | pub fn compress256(state: &mut [u32; 8], blocks: &[[u8; 64]]) { 27 | for block in blocks { 28 | unsafe { sha256_compress(state, block) } 29 | } 30 | } 31 | 32 | #[cfg(not(target_arch = "aarch64"))] 33 | #[link(name = "sha512", kind = "static")] 34 | extern "C" { 35 | fn sha512_compress(state: &mut [u64; 8], block: &[u8; 128]); 36 | } 37 | 38 | /// Safe wrapper around assembly implementation of SHA512 compression function 39 | /// 40 | /// This function is available only on x86 and x86-64 targets. 41 | #[cfg(not(target_arch = "aarch64"))] 42 | #[inline] 43 | pub fn compress512(state: &mut [u64; 8], blocks: &[[u8; 128]]) { 44 | for block in blocks { 45 | unsafe { sha512_compress(state, block) } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /sha2/src/sha256_aarch64.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-256 hash in AArch64 assembly 3 | * 4 | * Copyright (c) 2020 Emmanuel Gil Peyrot . (MIT License) 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | * this software and associated documentation files (the "Software"), to deal in 8 | * the Software without restriction, including without limitation the rights to 9 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | * the Software, and to permit persons to whom the Software is furnished to do so, 11 | * subject to the following conditions: 12 | * - The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * - The Software is provided "as is", without warranty of any kind, express or 15 | * implied, including but not limited to the warranties of merchantability, 16 | * fitness for a particular purpose and noninfringement. In no event shall the 17 | * authors or copyright holders be liable for any claim, damages or other 18 | * liability, whether in an action of contract, tort or otherwise, arising from, 19 | * out of or in connection with the Software or the use or other dealings in the 20 | * Software. 21 | */ 22 | 23 | 24 | /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ 25 | .global sha256_compress 26 | sha256_compress: 27 | /* 28 | * Storage usage: 29 | * Bytes Location Description 30 | * 4 x0 state argument 31 | * 4 x1 block argument 32 | * 4 x2 pointer to k 33 | * 16 q0 state0 34 | * 16 q1 state1 35 | * 16 q2 abef 36 | * 16 q3 cdgh 37 | * 16 q4 k0 38 | * 16 q5 k1 39 | * 16 q8 W0 40 | * 16 q9 W1 41 | * 16 q10 W2 42 | * 16 q11 W3 43 | */ 44 | 45 | // save the lower half of q8-q11 46 | stp d8, d9, [sp,#-32]! 47 | stp d10, d11, [sp,#16] 48 | 49 | // Load state in registers 50 | ldp q0, q1, [x0] 51 | mov v2.16b, v0.16b 52 | mov v3.16b, v1.16b 53 | 54 | // Load block in registers 55 | ld1 {v8.4s-v11.4s}, [x1] 56 | 57 | // TODO: only do that on little endian 58 | rev32 v8.16b, v8.16b 59 | rev32 v9.16b, v9.16b 60 | rev32 v10.16b, v10.16b 61 | rev32 v11.16b, v11.16b 62 | 63 | // Compute the pointer to k 64 | adrp x2, .K 65 | add x2, x2, :lo12:.K 66 | 67 | // load k 68 | ld1 {v16.4s-v19.4s}, [x2], #64 69 | ld1 {v20.4s-v23.4s}, [x2], #64 70 | ld1 {v24.4s-v27.4s}, [x2], #64 71 | ld1 {v28.4s-v31.4s}, [x2] 72 | add v6.4s, v8.4s, v16.4s 73 | 74 | // Rounds 0-3 75 | sha256su0 v8.4s, v9.4s 76 | mov v4.16b, v2.16b 77 | add v7.4s, v9.4s, v17.4s 78 | sha256h q2, q3, v6.4s 79 | sha256h2 q3, q4, v6.4s 80 | sha256su1 v8.4s, v10.4s, v11.4s 81 | 82 | // Rounds 4-7 83 | sha256su0 v9.4s, v10.4s 84 | mov v4.16b, v2.16b 85 | add v6.4s, v10.4s, v18.4s 86 | sha256h q2, q3, v7.4s 87 | sha256h2 q3, q4, v7.4s 88 | sha256su1 v9.4s, v11.4s, v8.4s 89 | 90 | // Rounds 8-11 91 | sha256su0 v10.4s, v11.4s 92 | mov v4.16b, v2.16b 93 | add v7.4s, v11.4s, v19.4s 94 | sha256h q2, q3, v6.4s 95 | sha256h2 q3, q4, v6.4s 96 | sha256su1 v10.4s, v8.4s, v9.4s 97 | 98 | // Rounds 12-15 99 | sha256su0 v11.4s, v8.4s 100 | mov v4.16b, v2.16b 101 | add v6.4s, v8.4s, v20.4s 102 | sha256h q2, q3, v7.4s 103 | sha256h2 q3, q4, v7.4s 104 | sha256su1 v11.4s, v9.4s, v10.4s 105 | 106 | // Rounds 16-19 107 | sha256su0 v8.4s, v9.4s 108 | mov v4.16b, v2.16b 109 | add v7.4s, v9.4s, v21.4s 110 | sha256h q2, q3, v6.4s 111 | sha256h2 q3, q4, v6.4s 112 | sha256su1 v8.4s, v10.4s, v11.4s 113 | 114 | // Rounds 20-23 115 | sha256su0 v9.4s, v10.4s 116 | mov v4.16b, v2.16b 117 | add v6.4s, v10.4s, v22.4s 118 | sha256h q2, q3, v7.4s 119 | sha256h2 q3, q4, v7.4s 120 | sha256su1 v9.4s, v11.4s, v8.4s 121 | 122 | // Rounds 24-27 123 | sha256su0 v10.4s, v11.4s 124 | mov v4.16b, v2.16b 125 | add v7.4s, v11.4s, v23.4s 126 | sha256h q2, q3, v6.4s 127 | sha256h2 q3, q4, v6.4s 128 | sha256su1 v10.4s, v8.4s, v9.4s 129 | 130 | // Rounds 28-31 131 | sha256su0 v11.4s, v8.4s 132 | mov v4.16b, v2.16b 133 | add v6.4s, v8.4s, v24.4s 134 | sha256h q2, q3, v7.4s 135 | sha256h2 q3, q4, v7.4s 136 | sha256su1 v11.4s, v9.4s, v10.4s 137 | 138 | // Rounds 32-35 139 | sha256su0 v8.4s, v9.4s 140 | mov v4.16b, v2.16b 141 | add v7.4s, v9.4s, v25.4s 142 | sha256h q2, q3, v6.4s 143 | sha256h2 q3, q4, v6.4s 144 | sha256su1 v8.4s, v10.4s, v11.4s 145 | 146 | // Rounds 36-39 147 | sha256su0 v9.4s, v10.4s 148 | mov v4.16b, v2.16b 149 | add v6.4s, v10.4s, v26.4s 150 | sha256h q2, q3, v7.4s 151 | sha256h2 q3, q4, v7.4s 152 | sha256su1 v9.4s, v11.4s, v8.4s 153 | 154 | // Rounds 40-43 155 | sha256su0 v10.4s, v11.4s 156 | mov v4.16b, v2.16b 157 | add v7.4s, v11.4s, v27.4s 158 | sha256h q2, q3, v6.4s 159 | sha256h2 q3, q4, v6.4s 160 | sha256su1 v10.4s, v8.4s, v9.4s 161 | 162 | // Rounds 44-47 163 | sha256su0 v11.4s, v8.4s 164 | mov v4.16b, v2.16b 165 | add v6.4s, v8.4s, v28.4s 166 | sha256h q2, q3, v7.4s 167 | sha256h2 q3, q4, v7.4s 168 | sha256su1 v11.4s, v9.4s, v10.4s 169 | 170 | // Rounds 48-51 171 | mov v4.16b, v2.16b 172 | add v7.4s, v9.4s, v29.4s 173 | sha256h q2, q3, v6.4s 174 | sha256h2 q3, q4, v6.4s 175 | 176 | // Rounds 52-55 177 | mov v4.16b, v2.16b 178 | add v6.4s, v10.4s, v30.4s 179 | sha256h q2, q3, v7.4s 180 | sha256h2 q3, q4, v7.4s 181 | 182 | // Rounds 56-59 183 | mov v4.16b, v2.16b 184 | add v7.4s, v11.4s, v31.4s 185 | sha256h q2, q3, v6.4s 186 | sha256h2 q3, q4, v6.4s 187 | 188 | // Rounds 60-63 189 | mov v4.16b, v2.16b 190 | sha256h q2, q3, v7.4s 191 | sha256h2 q3, q4, v7.4s 192 | 193 | // Update state 194 | add v0.4s, v0.4s, v2.4s 195 | add v1.4s, v1.4s, v3.4s 196 | stp q0, q1, [x0] 197 | 198 | // restore 199 | ldp d10, d11, [sp,#16] 200 | ldp d8, d9, [sp],#32 201 | 202 | ret 203 | .align 4 204 | .K: 205 | .word 0x428A2F98 206 | .word 0x71374491 207 | .word 0xB5C0FBCF 208 | .word 0xE9B5DBA5 209 | .word 0x3956C25B 210 | .word 0x59F111F1 211 | .word 0x923F82A4 212 | .word 0xAB1C5ED5 213 | .word 0xD807AA98 214 | .word 0x12835B01 215 | .word 0x243185BE 216 | .word 0x550C7DC3 217 | .word 0x72BE5D74 218 | .word 0x80DEB1FE 219 | .word 0x9BDC06A7 220 | .word 0xC19BF174 221 | .word 0xE49B69C1 222 | .word 0xEFBE4786 223 | .word 0x0FC19DC6 224 | .word 0x240CA1CC 225 | .word 0x2DE92C6F 226 | .word 0x4A7484AA 227 | .word 0x5CB0A9DC 228 | .word 0x76F988DA 229 | .word 0x983E5152 230 | .word 0xA831C66D 231 | .word 0xB00327C8 232 | .word 0xBF597FC7 233 | .word 0xC6E00BF3 234 | .word 0xD5A79147 235 | .word 0x06CA6351 236 | .word 0x14292967 237 | .word 0x27B70A85 238 | .word 0x2E1B2138 239 | .word 0x4D2C6DFC 240 | .word 0x53380D13 241 | .word 0x650A7354 242 | .word 0x766A0ABB 243 | .word 0x81C2C92E 244 | .word 0x92722C85 245 | .word 0xA2BFE8A1 246 | .word 0xA81A664B 247 | .word 0xC24B8B70 248 | .word 0xC76C51A3 249 | .word 0xD192E819 250 | .word 0xD6990624 251 | .word 0xF40E3585 252 | .word 0x106AA070 253 | .word 0x19A4C116 254 | .word 0x1E376C08 255 | .word 0x2748774C 256 | .word 0x34B0BCB5 257 | .word 0x391C0CB3 258 | .word 0x4ED8AA4A 259 | .word 0x5B9CCA4F 260 | .word 0x682E6FF3 261 | .word 0x748F82EE 262 | .word 0x78A5636F 263 | .word 0x84C87814 264 | .word 0x8CC70208 265 | .word 0x90BEFFFA 266 | .word 0xA4506CEB 267 | .word 0xBEF9A3F7 268 | .word 0xC67178F2 269 | -------------------------------------------------------------------------------- /sha2/src/sha256_aarch64_apple.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-256 hash in AArch64 assembly for macos/M1 3 | * 4 | * Based on the following C intrinsics implementation: 5 | * 6 | * 7 | * Original C written and placed in public domain by Jeffrey Walton. 8 | * Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and 9 | * Barry O'Rourke for the mbedTLS project. 10 | */ 11 | 12 | 13 | /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ 14 | .global _sha256_compress 15 | _sha256_compress: 16 | mov x8, #0 17 | ldp q0, q1, [x0] 18 | ldp q2, q3, [x1] 19 | ldp q4, q5, [x1, #32] 20 | stp q2, q3, [sp, #-64]! 21 | stp q4, q5, [sp, #32] 22 | mov x9, sp 23 | LBB0_1: 24 | ldr q2, [x9, x8] 25 | rev32.16b v2, v2 26 | str q2, [x9, x8] 27 | add x8, x8, #16 28 | cmp x8, #64 29 | b.ne LBB0_1 30 | adrp x8, K_0@PAGE 31 | ldr q2, [x8, K_0@PAGEOFF] 32 | ldp q6, q4, [sp] 33 | add.4s v3, v6, v2 34 | 35 | // Rounds 0-3 36 | sha256su0.4s v6, v4 37 | adrp x8, K_1@PAGE 38 | ldr q2, [x8, K_1@PAGEOFF] 39 | add.4s v7, v4, v2 40 | mov.16b v16, v0 41 | sha256h.4s q16, q1, v3 42 | mov.16b v2, v1 43 | sha256h2.4s q2, q0, v3 44 | ldp q5, q3, [sp, #32] 45 | sha256su1.4s v6, v5, v3 46 | 47 | // Rounds 4-7 48 | sha256su0.4s v4, v5 49 | adrp x8, K_2@PAGE 50 | ldr q17, [x8, K_2@PAGEOFF] 51 | add.4s v17, v5, v17 52 | mov.16b v18, v16 53 | sha256h.4s q18, q2, v7 54 | sha256h2.4s q2, q16, v7 55 | sha256su1.4s v4, v3, v6 56 | 57 | // Rounds 8-11 58 | sha256su0.4s v5, v3 59 | adrp x8, K_3@PAGE 60 | ldr q7, [x8, K_3@PAGEOFF] 61 | add.4s v7, v3, v7 62 | mov.16b v16, v18 63 | sha256h.4s q16, q2, v17 64 | sha256h2.4s q2, q18, v17 65 | sha256su1.4s v5, v6, v4 66 | 67 | // Rounds 12-15 68 | sha256su0.4s v3, v6 69 | adrp x8, K_4@PAGE 70 | ldr q17, [x8, K_4@PAGEOFF] 71 | add.4s v17, v6, v17 72 | mov.16b v18, v16 73 | sha256h.4s q18, q2, v7 74 | sha256h2.4s q2, q16, v7 75 | sha256su1.4s v3, v4, v5 76 | 77 | // Rounds 16-19 78 | sha256su0.4s v6, v4 79 | adrp x8, K_5@PAGE 80 | ldr q7, [x8, K_5@PAGEOFF] 81 | add.4s v7, v4, v7 82 | mov.16b v16, v18 83 | sha256h.4s q16, q2, v17 84 | sha256h2.4s q2, q18, v17 85 | sha256su1.4s v6, v5, v3 86 | 87 | // Rounds 20-23 88 | sha256su0.4s v4, v5 89 | adrp x8, K_6@PAGE 90 | ldr q17, [x8, K_6@PAGEOFF] 91 | add.4s v17, v5, v17 92 | mov.16b v18, v16 93 | sha256h.4s q18, q2, v7 94 | sha256h2.4s q2, q16, v7 95 | sha256su1.4s v4, v3, v6 96 | 97 | // Rounds 24-27 98 | sha256su0.4s v5, v3 99 | adrp x8, K_7@PAGE 100 | ldr q7, [x8, K_7@PAGEOFF] 101 | add.4s v7, v3, v7 102 | mov.16b v16, v18 103 | sha256h.4s q16, q2, v17 104 | sha256h2.4s q2, q18, v17 105 | sha256su1.4s v5, v6, v4 106 | 107 | // Rounds 28-31 108 | sha256su0.4s v3, v6 109 | adrp x8, K_8@PAGE 110 | ldr q17, [x8, K_8@PAGEOFF] 111 | add.4s v17, v6, v17 112 | mov.16b v18, v16 113 | sha256h.4s q18, q2, v7 114 | sha256h2.4s q2, q16, v7 115 | sha256su1.4s v3, v4, v5 116 | 117 | // Rounds 32-35 118 | sha256su0.4s v6, v4 119 | adrp x8, K_9@PAGE 120 | ldr q7, [x8, K_9@PAGEOFF] 121 | add.4s v7, v4, v7 122 | mov.16b v16, v18 123 | sha256h.4s q16, q2, v17 124 | sha256h2.4s q2, q18, v17 125 | sha256su1.4s v6, v5, v3 126 | 127 | // Rounds 36-39 128 | sha256su0.4s v4, v5 129 | adrp x8, K_10@PAGE 130 | ldr q17, [x8, K_10@PAGEOFF] 131 | add.4s v17, v5, v17 132 | mov.16b v18, v16 133 | sha256h.4s q18, q2, v7 134 | sha256h2.4s q2, q16, v7 135 | sha256su1.4s v4, v3, v6 136 | 137 | // Rounds 40-43 138 | sha256su0.4s v5, v3 139 | adrp x8, K_11@PAGE 140 | ldr q7, [x8, K_11@PAGEOFF] 141 | add.4s v7, v3, v7 142 | mov.16b v16, v18 143 | sha256h.4s q16, q2, v17 144 | sha256h2.4s q2, q18, v17 145 | sha256su1.4s v5, v6, v4 146 | 147 | // Rounds 44-47 148 | sha256su0.4s v3, v6 149 | adrp x8, K_12@PAGE 150 | ldr q17, [x8, K_12@PAGEOFF] 151 | add.4s v6, v6, v17 152 | mov.16b v17, v16 153 | sha256h.4s q17, q2, v7 154 | sha256h2.4s q2, q16, v7 155 | sha256su1.4s v3, v4, v5 156 | 157 | // Rounds 48-51 158 | adrp x8, K_13@PAGE 159 | ldr q7, [x8, K_13@PAGEOFF] 160 | add.4s v4, v4, v7 161 | mov.16b v7, v17 162 | sha256h.4s q7, q2, v6 163 | sha256h2.4s q2, q17, v6 164 | 165 | // Rounds 52-55 166 | adrp x8, K_14@PAGE 167 | ldr q6, [x8, K_14@PAGEOFF] 168 | add.4s v5, v5, v6 169 | mov.16b v6, v7 170 | sha256h.4s q6, q2, v4 171 | sha256h2.4s q2, q7, v4 172 | 173 | // Rounds 56-59 174 | adrp x8, K_15@PAGE 175 | ldr q4, [x8, K_15@PAGEOFF] 176 | add.4s v3, v3, v4 177 | mov.16b v4, v6 178 | sha256h.4s q4, q2, v5 179 | sha256h2.4s q2, q6, v5 180 | 181 | // Rounds 60-63 182 | mov.16b v5, v4 183 | sha256h.4s q5, q2, v3 184 | sha256h2.4s q2, q4, v3 185 | 186 | // Update state 187 | add.4s v0, v5, v0 188 | add.4s v1, v2, v1 189 | 190 | // restore 191 | stp q0, q1, [x0] 192 | add sp, sp, #64 193 | 194 | ret 195 | 196 | 197 | .align 4 198 | K_0: 199 | .long 1116352408 200 | .long 1899447441 201 | .long 3049323471 202 | .long 3921009573 203 | .align 4 204 | K_1: 205 | .long 961987163 206 | .long 1508970993 207 | .long 2453635748 208 | .long 2870763221 209 | .align 4 210 | K_2: 211 | .long 3624381080 212 | .long 310598401 213 | .long 607225278 214 | .long 1426881987 215 | .align 4 216 | K_3: 217 | .long 1925078388 218 | .long 2162078206 219 | .long 2614888103 220 | .long 3248222580 221 | .align 4 222 | K_4: 223 | .long 3835390401 224 | .long 4022224774 225 | .long 264347078 226 | .long 604807628 227 | .align 4 228 | K_5: 229 | .long 770255983 230 | .long 1249150122 231 | .long 1555081692 232 | .long 1996064986 233 | .align 4 234 | K_6: 235 | .long 2554220882 236 | .long 2821834349 237 | .long 2952996808 238 | .long 3210313671 239 | .align 4 240 | K_7: 241 | .long 3336571891 242 | .long 3584528711 243 | .long 113926993 244 | .long 338241895 245 | .align 4 246 | K_8: 247 | .long 666307205 248 | .long 773529912 249 | .long 1294757372 250 | .long 1396182291 251 | .align 4 252 | K_9: 253 | .long 1695183700 254 | .long 1986661051 255 | .long 2177026350 256 | .long 2456956037 257 | .align 4 258 | K_10: 259 | .long 2730485921 260 | .long 2820302411 261 | .long 3259730800 262 | .long 3345764771 263 | .align 4 264 | K_11: 265 | .long 3516065817 266 | .long 3600352804 267 | .long 4094571909 268 | .long 275423344 269 | .align 4 270 | K_12: 271 | .long 430227734 272 | .long 506948616 273 | .long 659060556 274 | .long 883997877 275 | .align 4 276 | K_13: 277 | .long 958139571 278 | .long 1322822218 279 | .long 1537002063 280 | .long 1747873779 281 | .align 4 282 | K_14: 283 | .long 1955562222 284 | .long 2024104815 285 | .long 2227730452 286 | .long 2361852424 287 | .align 4 288 | K_15: 289 | .long 2428436474 290 | .long 2756734187 291 | .long 3204031479 292 | .long 3329325298 293 | -------------------------------------------------------------------------------- /sha2/src/sha256_x64.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-256 hash in x86-64 assembly 3 | * 4 | * Copyright (c) 2015 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ 26 | #ifdef __APPLE__ 27 | .globl _sha256_compress 28 | _sha256_compress: 29 | #else 30 | .globl sha256_compress 31 | sha256_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 4 eax Temporary for calculation per round 37 | * 4 ebx Temporary for calculation per round 38 | * 4 ecx Temporary for calculation per round 39 | * 4 edx Temporary for calculation per round 40 | * 8 rsi Base address of block array argument (read-only) 41 | * 8 rdi Base address of state array argument (read-only) 42 | * 8 rsp x86-64 stack pointer 43 | * 4 r8d SHA-256 state variable A 44 | * 4 r9d SHA-256 state variable B 45 | * 4 r10d SHA-256 state variable C 46 | * 4 r11d SHA-256 state variable D 47 | * 4 r12d SHA-256 state variable E 48 | * 4 r13d SHA-256 state variable F 49 | * 4 r14d SHA-256 state variable G 50 | * 4 r15d SHA-256 state variable H 51 | * 64 [rsp+0] Circular buffer of most recent 16 key schedule items, 4 bytes each 52 | * 16 xmm0 Caller's value of r10 (only low 64 bits are used) 53 | * 16 xmm1 Caller's value of r11 (only low 64 bits are used) 54 | * 16 xmm2 Caller's value of r12 (only low 64 bits are used) 55 | * 16 xmm3 Caller's value of r13 (only low 64 bits are used) 56 | * 16 xmm4 Caller's value of r14 (only low 64 bits are used) 57 | * 16 xmm5 Caller's value of r15 (only low 64 bits are used) 58 | * 16 xmm6 Caller's value of rbx (only low 64 bits are used) 59 | */ 60 | 61 | #define SCHED(i) (((i)&0xF)*4)(%rsp) 62 | 63 | #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ 64 | movl (i*4)(%rsi), %ebx; \ 65 | bswapl %ebx; \ 66 | movl %ebx, SCHED(i); \ 67 | ROUNDTAIL(a, b, c, d, e, f, g, h, k) 68 | 69 | #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ 70 | movl SCHED(i-15), %eax; \ 71 | movl SCHED(i-16), %ebx; \ 72 | addl SCHED(i- 7), %ebx; \ 73 | movl %eax, %ecx; \ 74 | movl %eax, %edx; \ 75 | rorl $18, %ecx; \ 76 | shrl $3, %edx; \ 77 | rorl $7, %eax; \ 78 | xorl %edx, %ecx; \ 79 | xorl %ecx, %eax; \ 80 | addl %eax, %ebx; \ 81 | movl SCHED(i- 2), %eax; \ 82 | movl %eax, %ecx; \ 83 | movl %eax, %edx; \ 84 | rorl $19, %ecx; \ 85 | shrl $10, %edx; \ 86 | rorl $17, %eax; \ 87 | xorl %edx, %ecx; \ 88 | xorl %ecx, %eax; \ 89 | addl %eax, %ebx; \ 90 | movl %ebx, SCHED(i); \ 91 | ROUNDTAIL(a, b, c, d, e, f, g, h, k) 92 | 93 | #define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \ 94 | /* Part 0 */ \ 95 | /* See Intel's "Fast SHA-256 Implementations" for the ROR transformation */ \ 96 | movl %e, %eax; \ 97 | rorl $14, %eax; \ 98 | xorl %e, %eax; \ 99 | rorl $5, %eax; \ 100 | xorl %e, %eax; \ 101 | rorl $6, %eax; \ 102 | addl %ebx, %h; \ 103 | movl %g, %ecx; \ 104 | xorl %f, %ecx; \ 105 | andl %e, %ecx; \ 106 | xorl %g, %ecx; \ 107 | leal k(%rax,%rcx), %eax; \ 108 | addl %eax, %h; \ 109 | /* Part 1 */ \ 110 | addl %h, %d; \ 111 | /* Part 2 */ \ 112 | /* See Intel's "Fast SHA-256 Implementations" for the ROR transformation */ \ 113 | movl %a, %eax; \ 114 | rorl $9, %eax; \ 115 | xorl %a, %eax; \ 116 | rorl $11, %eax; \ 117 | xorl %a, %eax; \ 118 | rorl $2, %eax; \ 119 | movl %c, %ecx; \ 120 | addl %eax, %h; \ 121 | movl %c, %eax; \ 122 | orl %b, %eax; \ 123 | andl %b, %ecx; \ 124 | andl %a, %eax; \ 125 | orl %ecx, %eax; \ 126 | addl %eax, %h; 127 | 128 | /* Save registers, allocate scratch space */ 129 | movq %r10, %xmm0 130 | movq %r11, %xmm1 131 | movq %r12, %xmm2 132 | movq %r13, %xmm3 133 | movq %r14, %xmm4 134 | movq %r15, %xmm5 135 | movq %rbx, %xmm6 136 | subq $64, %rsp 137 | 138 | /* Load state */ 139 | movl 0(%rdi), %r8d /* a */ 140 | movl 4(%rdi), %r9d /* b */ 141 | movl 8(%rdi), %r10d /* c */ 142 | movl 12(%rdi), %r11d /* d */ 143 | movl 16(%rdi), %r12d /* e */ 144 | movl 20(%rdi), %r13d /* f */ 145 | movl 24(%rdi), %r14d /* g */ 146 | movl 28(%rdi), %r15d /* h */ 147 | 148 | /* Do 64 rounds of hashing */ 149 | ROUNDa( 0, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x428A2F98) 150 | ROUNDa( 1, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x71374491) 151 | ROUNDa( 2, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x4A3F0431) 152 | ROUNDa( 3, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x164A245B) 153 | ROUNDa( 4, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x3956C25B) 154 | ROUNDa( 5, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x59F111F1) 155 | ROUNDa( 6, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x6DC07D5C) 156 | ROUNDa( 7, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x54E3A12B) 157 | ROUNDa( 8, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x27F85568) 158 | ROUNDa( 9, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x12835B01) 159 | ROUNDa(10, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x243185BE) 160 | ROUNDa(11, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x550C7DC3) 161 | ROUNDa(12, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x72BE5D74) 162 | ROUNDa(13, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x7F214E02) 163 | ROUNDa(14, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x6423F959) 164 | ROUNDa(15, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x3E640E8C) 165 | ROUNDb(16, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x1B64963F) 166 | ROUNDb(17, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x1041B87A) 167 | ROUNDb(18, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x0FC19DC6) 168 | ROUNDb(19, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x240CA1CC) 169 | ROUNDb(20, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x2DE92C6F) 170 | ROUNDb(21, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x4A7484AA) 171 | ROUNDb(22, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x5CB0A9DC) 172 | ROUNDb(23, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x76F988DA) 173 | ROUNDb(24, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x67C1AEAE) 174 | ROUNDb(25, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x57CE3993) 175 | ROUNDb(26, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x4FFCD838) 176 | ROUNDb(27, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x40A68039) 177 | ROUNDb(28, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x391FF40D) 178 | ROUNDb(29, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x2A586EB9) 179 | ROUNDb(30, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x06CA6351) 180 | ROUNDb(31, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x14292967) 181 | ROUNDb(32, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x27B70A85) 182 | ROUNDb(33, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x2E1B2138) 183 | ROUNDb(34, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x4D2C6DFC) 184 | ROUNDb(35, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x53380D13) 185 | ROUNDb(36, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x650A7354) 186 | ROUNDb(37, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x766A0ABB) 187 | ROUNDb(38, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x7E3D36D2) 188 | ROUNDb(39, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x6D8DD37B) 189 | ROUNDb(40, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x5D40175F) 190 | ROUNDb(41, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x57E599B5) 191 | ROUNDb(42, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x3DB47490) 192 | ROUNDb(43, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x3893AE5D) 193 | ROUNDb(44, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x2E6D17E7) 194 | ROUNDb(45, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x2966F9DC) 195 | ROUNDb(46, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x0BF1CA7B) 196 | ROUNDb(47, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x106AA070) 197 | ROUNDb(48, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x19A4C116) 198 | ROUNDb(49, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x1E376C08) 199 | ROUNDb(50, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x2748774C) 200 | ROUNDb(51, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x34B0BCB5) 201 | ROUNDb(52, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x391C0CB3) 202 | ROUNDb(53, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x4ED8AA4A) 203 | ROUNDb(54, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x5B9CCA4F) 204 | ROUNDb(55, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x682E6FF3) 205 | ROUNDb(56, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x748F82EE) 206 | ROUNDb(57, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x78A5636F) 207 | ROUNDb(58, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x7B3787EC) 208 | ROUNDb(59, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x7338FDF8) 209 | ROUNDb(60, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x6F410006) 210 | ROUNDb(61, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x5BAF9315) 211 | ROUNDb(62, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x41065C09) 212 | ROUNDb(63, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x398E870E) 213 | 214 | /* Add to state */ 215 | addl %r8d , 0(%rdi) 216 | addl %r9d , 4(%rdi) 217 | addl %r10d, 8(%rdi) 218 | addl %r11d, 12(%rdi) 219 | addl %r12d, 16(%rdi) 220 | addl %r13d, 20(%rdi) 221 | addl %r14d, 24(%rdi) 222 | addl %r15d, 28(%rdi) 223 | 224 | /* Restore registers */ 225 | movq %xmm0, %r10 226 | movq %xmm1, %r11 227 | movq %xmm2, %r12 228 | movq %xmm3, %r13 229 | movq %xmm4, %r14 230 | movq %xmm5, %r15 231 | movq %xmm6, %rbx 232 | addq $64, %rsp 233 | retq 234 | -------------------------------------------------------------------------------- /sha2/src/sha256_x86.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-256 hash in x86 assembly 3 | * 4 | * Copyright (c) 2014 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ 26 | #if defined(__APPLE__) || defined(_WIN32) 27 | .globl _sha256_compress 28 | _sha256_compress: 29 | #else 30 | .globl sha256_compress 31 | sha256_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 4 eax Temporary for calculation per round 37 | * 4 ebx Temporary for calculation per round 38 | * 4 ecx Temporary for calculation per round 39 | * 4 edx Temporary for calculation per round 40 | * 4 ebp Temporary for calculation per round 41 | * 4 esi (During state loading and update) base address of state array argument 42 | * (During hash rounds) temporary for calculation per round 43 | * 4 edi Base address of block array argument (during key schedule loading rounds only) 44 | * 4 esp x86 stack pointer 45 | * 32 [esp+ 0] SHA-256 state variables A,B,C,D,E,F,G,H (4 bytes each) 46 | * 64 [esp+ 32] Key schedule of 16 * 4 bytes 47 | * 4 [esp+ 96] Caller's value of ebx 48 | * 4 [esp+100] Caller's value of esi 49 | * 4 [esp+104] Caller's value of edi 50 | * 4 [esp+108] Caller's value of ebp 51 | */ 52 | 53 | #define SCHED(i) ((((i)&0xF)+8)*4)(%esp) 54 | 55 | #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ 56 | movl (i*4)(%edi), %ebp; \ 57 | bswapl %ebp; \ 58 | movl %ebp, SCHED(i); \ 59 | ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) 60 | 61 | #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ 62 | movl SCHED(i-15), %eax; \ 63 | movl SCHED(i-16), %ebp; \ 64 | movl %eax, %ebx; \ 65 | addl SCHED(i- 7), %ebp; \ 66 | movl %eax, %ecx; \ 67 | rorl $18, %ebx; \ 68 | shrl $3, %ecx; \ 69 | rorl $7, %eax; \ 70 | xorl %ecx, %ebx; \ 71 | xorl %ebx, %eax; \ 72 | addl %eax, %ebp; \ 73 | movl SCHED(i- 2), %eax; \ 74 | movl %eax, %ebx; \ 75 | movl %eax, %ecx; \ 76 | rorl $19, %ebx; \ 77 | shrl $10, %ecx; \ 78 | rorl $17, %eax; \ 79 | xorl %ecx, %ebx; \ 80 | xorl %ebx, %eax; \ 81 | addl %eax, %ebp; \ 82 | movl %ebp, SCHED(i); \ 83 | ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) 84 | 85 | #define STATE(i) (i*4)(%esp) 86 | 87 | #define ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) \ 88 | /* Part 0 */ \ 89 | movl STATE(e), %eax; \ 90 | movl %eax, %ebx; \ 91 | movl %eax, %ecx; \ 92 | movl %eax, %edx; \ 93 | rorl $11, %eax; \ 94 | rorl $25, %ebx; \ 95 | rorl $6, %ecx; \ 96 | movl STATE(h), %esi; \ 97 | xorl %ebx, %eax; \ 98 | xorl %eax, %ecx; \ 99 | addl %ebp, %esi; \ 100 | movl STATE(g), %ebx; \ 101 | movl STATE(f), %eax; \ 102 | xorl %ebx, %eax; \ 103 | andl %edx, %eax; \ 104 | xorl %ebx, %eax; \ 105 | leal k(%ecx,%eax), %ecx; \ 106 | addl %ecx, %esi; \ 107 | /* Part 1 */ \ 108 | addl %esi, STATE(d); \ 109 | /* Part 2 */ \ 110 | movl STATE(a), %eax; \ 111 | movl %eax, %ebx; \ 112 | movl %eax, %ecx; \ 113 | movl %eax, %edx; \ 114 | rorl $13, %eax; \ 115 | rorl $22, %ebx; \ 116 | rorl $2, %ecx; \ 117 | xorl %ebx, %eax; \ 118 | xorl %eax, %ecx; \ 119 | movl STATE(c), %eax; \ 120 | addl %ecx, %esi; \ 121 | movl %eax, %ecx; \ 122 | movl STATE(b), %ebx; \ 123 | orl %ebx, %ecx; \ 124 | andl %ebx, %eax; \ 125 | andl %edx, %ecx; \ 126 | orl %eax, %ecx; \ 127 | addl %ecx, %esi; \ 128 | movl %esi, STATE(h); 129 | 130 | /* Allocate scratch space, save registers */ 131 | subl $112, %esp 132 | movl %ebx, 96(%esp) 133 | movl %esi, 100(%esp) 134 | movl %edi, 104(%esp) 135 | movl %ebp, 108(%esp) 136 | 137 | /* Copy state */ 138 | movl 116(%esp), %esi /* Argument: state */ 139 | movl 0(%esi), %eax; movl %eax, 0(%esp) 140 | movl 4(%esi), %eax; movl %eax, 4(%esp) 141 | movl 8(%esi), %eax; movl %eax, 8(%esp) 142 | movl 12(%esi), %eax; movl %eax, 12(%esp) 143 | movl 16(%esi), %eax; movl %eax, 16(%esp) 144 | movl 20(%esi), %eax; movl %eax, 20(%esp) 145 | movl 24(%esi), %eax; movl %eax, 24(%esp) 146 | movl 28(%esi), %eax; movl %eax, 28(%esp) 147 | 148 | /* Do 64 rounds of hashing */ 149 | movl 120(%esp), %edi /* Argument: block */ 150 | ROUNDa( 0, 0, 1, 2, 3, 4, 5, 6, 7, 0x428A2F98) 151 | ROUNDa( 1, 7, 0, 1, 2, 3, 4, 5, 6, 0x71374491) 152 | ROUNDa( 2, 6, 7, 0, 1, 2, 3, 4, 5, 0xB5C0FBCF) 153 | ROUNDa( 3, 5, 6, 7, 0, 1, 2, 3, 4, 0xE9B5DBA5) 154 | ROUNDa( 4, 4, 5, 6, 7, 0, 1, 2, 3, 0x3956C25B) 155 | ROUNDa( 5, 3, 4, 5, 6, 7, 0, 1, 2, 0x59F111F1) 156 | ROUNDa( 6, 2, 3, 4, 5, 6, 7, 0, 1, 0x923F82A4) 157 | ROUNDa( 7, 1, 2, 3, 4, 5, 6, 7, 0, 0xAB1C5ED5) 158 | ROUNDa( 8, 0, 1, 2, 3, 4, 5, 6, 7, 0xD807AA98) 159 | ROUNDa( 9, 7, 0, 1, 2, 3, 4, 5, 6, 0x12835B01) 160 | ROUNDa(10, 6, 7, 0, 1, 2, 3, 4, 5, 0x243185BE) 161 | ROUNDa(11, 5, 6, 7, 0, 1, 2, 3, 4, 0x550C7DC3) 162 | ROUNDa(12, 4, 5, 6, 7, 0, 1, 2, 3, 0x72BE5D74) 163 | ROUNDa(13, 3, 4, 5, 6, 7, 0, 1, 2, 0x80DEB1FE) 164 | ROUNDa(14, 2, 3, 4, 5, 6, 7, 0, 1, 0x9BDC06A7) 165 | ROUNDa(15, 1, 2, 3, 4, 5, 6, 7, 0, 0xC19BF174) 166 | ROUNDb(16, 0, 1, 2, 3, 4, 5, 6, 7, 0xE49B69C1) 167 | ROUNDb(17, 7, 0, 1, 2, 3, 4, 5, 6, 0xEFBE4786) 168 | ROUNDb(18, 6, 7, 0, 1, 2, 3, 4, 5, 0x0FC19DC6) 169 | ROUNDb(19, 5, 6, 7, 0, 1, 2, 3, 4, 0x240CA1CC) 170 | ROUNDb(20, 4, 5, 6, 7, 0, 1, 2, 3, 0x2DE92C6F) 171 | ROUNDb(21, 3, 4, 5, 6, 7, 0, 1, 2, 0x4A7484AA) 172 | ROUNDb(22, 2, 3, 4, 5, 6, 7, 0, 1, 0x5CB0A9DC) 173 | ROUNDb(23, 1, 2, 3, 4, 5, 6, 7, 0, 0x76F988DA) 174 | ROUNDb(24, 0, 1, 2, 3, 4, 5, 6, 7, 0x983E5152) 175 | ROUNDb(25, 7, 0, 1, 2, 3, 4, 5, 6, 0xA831C66D) 176 | ROUNDb(26, 6, 7, 0, 1, 2, 3, 4, 5, 0xB00327C8) 177 | ROUNDb(27, 5, 6, 7, 0, 1, 2, 3, 4, 0xBF597FC7) 178 | ROUNDb(28, 4, 5, 6, 7, 0, 1, 2, 3, 0xC6E00BF3) 179 | ROUNDb(29, 3, 4, 5, 6, 7, 0, 1, 2, 0xD5A79147) 180 | ROUNDb(30, 2, 3, 4, 5, 6, 7, 0, 1, 0x06CA6351) 181 | ROUNDb(31, 1, 2, 3, 4, 5, 6, 7, 0, 0x14292967) 182 | ROUNDb(32, 0, 1, 2, 3, 4, 5, 6, 7, 0x27B70A85) 183 | ROUNDb(33, 7, 0, 1, 2, 3, 4, 5, 6, 0x2E1B2138) 184 | ROUNDb(34, 6, 7, 0, 1, 2, 3, 4, 5, 0x4D2C6DFC) 185 | ROUNDb(35, 5, 6, 7, 0, 1, 2, 3, 4, 0x53380D13) 186 | ROUNDb(36, 4, 5, 6, 7, 0, 1, 2, 3, 0x650A7354) 187 | ROUNDb(37, 3, 4, 5, 6, 7, 0, 1, 2, 0x766A0ABB) 188 | ROUNDb(38, 2, 3, 4, 5, 6, 7, 0, 1, 0x81C2C92E) 189 | ROUNDb(39, 1, 2, 3, 4, 5, 6, 7, 0, 0x92722C85) 190 | ROUNDb(40, 0, 1, 2, 3, 4, 5, 6, 7, 0xA2BFE8A1) 191 | ROUNDb(41, 7, 0, 1, 2, 3, 4, 5, 6, 0xA81A664B) 192 | ROUNDb(42, 6, 7, 0, 1, 2, 3, 4, 5, 0xC24B8B70) 193 | ROUNDb(43, 5, 6, 7, 0, 1, 2, 3, 4, 0xC76C51A3) 194 | ROUNDb(44, 4, 5, 6, 7, 0, 1, 2, 3, 0xD192E819) 195 | ROUNDb(45, 3, 4, 5, 6, 7, 0, 1, 2, 0xD6990624) 196 | ROUNDb(46, 2, 3, 4, 5, 6, 7, 0, 1, 0xF40E3585) 197 | ROUNDb(47, 1, 2, 3, 4, 5, 6, 7, 0, 0x106AA070) 198 | ROUNDb(48, 0, 1, 2, 3, 4, 5, 6, 7, 0x19A4C116) 199 | ROUNDb(49, 7, 0, 1, 2, 3, 4, 5, 6, 0x1E376C08) 200 | ROUNDb(50, 6, 7, 0, 1, 2, 3, 4, 5, 0x2748774C) 201 | ROUNDb(51, 5, 6, 7, 0, 1, 2, 3, 4, 0x34B0BCB5) 202 | ROUNDb(52, 4, 5, 6, 7, 0, 1, 2, 3, 0x391C0CB3) 203 | ROUNDb(53, 3, 4, 5, 6, 7, 0, 1, 2, 0x4ED8AA4A) 204 | ROUNDb(54, 2, 3, 4, 5, 6, 7, 0, 1, 0x5B9CCA4F) 205 | ROUNDb(55, 1, 2, 3, 4, 5, 6, 7, 0, 0x682E6FF3) 206 | ROUNDb(56, 0, 1, 2, 3, 4, 5, 6, 7, 0x748F82EE) 207 | ROUNDb(57, 7, 0, 1, 2, 3, 4, 5, 6, 0x78A5636F) 208 | ROUNDb(58, 6, 7, 0, 1, 2, 3, 4, 5, 0x84C87814) 209 | ROUNDb(59, 5, 6, 7, 0, 1, 2, 3, 4, 0x8CC70208) 210 | ROUNDb(60, 4, 5, 6, 7, 0, 1, 2, 3, 0x90BEFFFA) 211 | ROUNDb(61, 3, 4, 5, 6, 7, 0, 1, 2, 0xA4506CEB) 212 | ROUNDb(62, 2, 3, 4, 5, 6, 7, 0, 1, 0xBEF9A3F7) 213 | ROUNDb(63, 1, 2, 3, 4, 5, 6, 7, 0, 0xC67178F2) 214 | 215 | /* Add to state */ 216 | movl 116(%esp), %esi /* Argument: state */ 217 | movl 0(%esp), %eax; addl %eax, 0(%esi) 218 | movl 4(%esp), %eax; addl %eax, 4(%esi) 219 | movl 8(%esp), %eax; addl %eax, 8(%esi) 220 | movl 12(%esp), %eax; addl %eax, 12(%esi) 221 | movl 16(%esp), %eax; addl %eax, 16(%esi) 222 | movl 20(%esp), %eax; addl %eax, 20(%esi) 223 | movl 24(%esp), %eax; addl %eax, 24(%esi) 224 | movl 28(%esp), %eax; addl %eax, 28(%esi) 225 | 226 | /* Restore registers */ 227 | movl 96(%esp), %ebx 228 | movl 100(%esp), %esi 229 | movl 104(%esp), %edi 230 | movl 108(%esp), %ebp 231 | addl $112, %esp 232 | retl 233 | -------------------------------------------------------------------------------- /sha2/src/sha512_x64.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-512 hash in x86-64 assembly 3 | * 4 | * Copyright (c) 2017 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void sha512_compress(uint64_t state[8], const uint8_t block[128]) */ 26 | #ifdef __APPLE__ 27 | .globl _sha512_compress 28 | _sha512_compress: 29 | #else 30 | .globl sha512_compress 31 | sha512_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 8 rax Temporary for calculation per round 37 | * 8 rbx Temporary for calculation per round 38 | * 8 rcx Temporary for calculation per round 39 | * 8 rdx Temporary for calculation per round 40 | * 8 rsi Base address of block array argument (read-only) 41 | * 8 rdi Base address of state array argument (read-only) 42 | * 8 rsp x86-64 stack pointer 43 | * 8 r8 SHA-512 state variable A 44 | * 8 r9 SHA-512 state variable B 45 | * 8 r10 SHA-512 state variable C 46 | * 8 r11 SHA-512 state variable D 47 | * 8 r12 SHA-512 state variable E 48 | * 8 r13 SHA-512 state variable F 49 | * 8 r14 SHA-512 state variable G 50 | * 8 r15 SHA-512 state variable H 51 | * 128 [rsp+0] Circular buffer of most recent 16 key schedule items, 8 bytes each 52 | * 16 xmm0 Caller's value of r10 (only low 64 bits are used) 53 | * 16 xmm1 Caller's value of r11 (only low 64 bits are used) 54 | * 16 xmm2 Caller's value of r12 (only low 64 bits are used) 55 | * 16 xmm3 Caller's value of r13 (only low 64 bits are used) 56 | * 16 xmm4 Caller's value of r14 (only low 64 bits are used) 57 | * 16 xmm5 Caller's value of r15 (only low 64 bits are used) 58 | * 16 xmm6 Caller's value of rbx (only low 64 bits are used) 59 | */ 60 | 61 | #define SCHED(i) (((i)&0xF)*8)(%rsp) 62 | 63 | #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ 64 | movq (i*8)(%rsi), %rbx; \ 65 | bswapq %rbx; \ 66 | movq %rbx, SCHED(i); \ 67 | ROUNDTAIL(a, b, c, d, e, f, g, h, k) 68 | 69 | #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ 70 | movq SCHED(i-15), %rax; \ 71 | movq SCHED(i-16), %rbx; \ 72 | addq SCHED(i- 7), %rbx; \ 73 | movq %rax, %rcx; \ 74 | movq %rax, %rdx; \ 75 | rorq $8, %rcx; \ 76 | shrq $7, %rdx; \ 77 | rorq $1, %rax; \ 78 | xorq %rdx, %rcx; \ 79 | xorq %rcx, %rax; \ 80 | addq %rax, %rbx; \ 81 | movq SCHED(i- 2), %rax; \ 82 | movq %rax, %rcx; \ 83 | movq %rax, %rdx; \ 84 | rorq $61, %rcx; \ 85 | shrq $6, %rdx; \ 86 | rorq $19, %rax; \ 87 | xorq %rdx, %rcx; \ 88 | xorq %rcx, %rax; \ 89 | addq %rax, %rbx; \ 90 | movq %rbx, SCHED(i); \ 91 | ROUNDTAIL(a, b, c, d, e, f, g, h, k) 92 | 93 | #define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \ 94 | /* Part 0 */ \ 95 | /* ROR transformation inspired by Intel's SHA-256 implementation */ \ 96 | movq %e, %rax; \ 97 | rorq $23, %rax; \ 98 | xorq %e, %rax; \ 99 | rorq $4, %rax; \ 100 | xorq %e, %rax; \ 101 | rorq $14, %rax; \ 102 | addq %rbx, %h; \ 103 | movq %g, %rcx; \ 104 | xorq %f, %rcx; \ 105 | andq %e, %rcx; \ 106 | xorq %g, %rcx; \ 107 | addq %rax, %h; \ 108 | movabs $k, %rax; \ 109 | addq %rcx, %h; \ 110 | addq %rax, %h; \ 111 | /* Part 1 */ \ 112 | addq %h, %d; \ 113 | /* Part 2 */ \ 114 | /* ROR transformation inspired by Intel's SHA-256 implementation */ \ 115 | movq %a, %rax; \ 116 | rorq $5, %rax; \ 117 | xorq %a, %rax; \ 118 | rorq $6, %rax; \ 119 | xorq %a, %rax; \ 120 | rorq $28, %rax; \ 121 | movq %c, %rcx; \ 122 | addq %rax, %h; \ 123 | movq %c, %rax; \ 124 | orq %b, %rax; \ 125 | andq %b, %rcx; \ 126 | andq %a, %rax; \ 127 | orq %rcx, %rax; \ 128 | addq %rax, %h; 129 | 130 | /* Save registers, allocate scratch space */ 131 | movq %r10, %xmm0 132 | movq %r11, %xmm1 133 | movq %r12, %xmm2 134 | movq %r13, %xmm3 135 | movq %r14, %xmm4 136 | movq %r15, %xmm5 137 | movq %rbx, %xmm6 138 | subq $128, %rsp 139 | 140 | /* Load state */ 141 | movq 0(%rdi), %r8 /* a */ 142 | movq 8(%rdi), %r9 /* b */ 143 | movq 16(%rdi), %r10 /* c */ 144 | movq 24(%rdi), %r11 /* d */ 145 | movq 32(%rdi), %r12 /* e */ 146 | movq 40(%rdi), %r13 /* f */ 147 | movq 48(%rdi), %r14 /* g */ 148 | movq 56(%rdi), %r15 /* h */ 149 | 150 | /* Do 80 rounds of hashing */ 151 | ROUNDa( 0, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x428A2F98D728AE22) 152 | ROUNDa( 1, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x7137449123EF65CD) 153 | ROUNDa( 2, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB5C0FBCFEC4D3B2F) 154 | ROUNDa( 3, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xE9B5DBA58189DBBC) 155 | ROUNDa( 4, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x3956C25BF348B538) 156 | ROUNDa( 5, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x59F111F1B605D019) 157 | ROUNDa( 6, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x923F82A4AF194F9B) 158 | ROUNDa( 7, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xAB1C5ED5DA6D8118) 159 | ROUNDa( 8, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xD807AA98A3030242) 160 | ROUNDa( 9, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x12835B0145706FBE) 161 | ROUNDa(10, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x243185BE4EE4B28C) 162 | ROUNDa(11, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x550C7DC3D5FFB4E2) 163 | ROUNDa(12, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x72BE5D74F27B896F) 164 | ROUNDa(13, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x80DEB1FE3B1696B1) 165 | ROUNDa(14, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x9BDC06A725C71235) 166 | ROUNDa(15, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC19BF174CF692694) 167 | ROUNDb(16, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xE49B69C19EF14AD2) 168 | ROUNDb(17, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xEFBE4786384F25E3) 169 | ROUNDb(18, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x0FC19DC68B8CD5B5) 170 | ROUNDb(19, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x240CA1CC77AC9C65) 171 | ROUNDb(20, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x2DE92C6F592B0275) 172 | ROUNDb(21, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4A7484AA6EA6E483) 173 | ROUNDb(22, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5CB0A9DCBD41FBD4) 174 | ROUNDb(23, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x76F988DA831153B5) 175 | ROUNDb(24, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x983E5152EE66DFAB) 176 | ROUNDb(25, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA831C66D2DB43210) 177 | ROUNDb(26, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB00327C898FB213F) 178 | ROUNDb(27, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xBF597FC7BEEF0EE4) 179 | ROUNDb(28, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xC6E00BF33DA88FC2) 180 | ROUNDb(29, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD5A79147930AA725) 181 | ROUNDb(30, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x06CA6351E003826F) 182 | ROUNDb(31, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x142929670A0E6E70) 183 | ROUNDb(32, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x27B70A8546D22FFC) 184 | ROUNDb(33, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x2E1B21385C26C926) 185 | ROUNDb(34, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x4D2C6DFC5AC42AED) 186 | ROUNDb(35, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x53380D139D95B3DF) 187 | ROUNDb(36, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x650A73548BAF63DE) 188 | ROUNDb(37, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x766A0ABB3C77B2A8) 189 | ROUNDb(38, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x81C2C92E47EDAEE6) 190 | ROUNDb(39, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x92722C851482353B) 191 | ROUNDb(40, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xA2BFE8A14CF10364) 192 | ROUNDb(41, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA81A664BBC423001) 193 | ROUNDb(42, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xC24B8B70D0F89791) 194 | ROUNDb(43, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xC76C51A30654BE30) 195 | ROUNDb(44, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xD192E819D6EF5218) 196 | ROUNDb(45, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD69906245565A910) 197 | ROUNDb(46, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xF40E35855771202A) 198 | ROUNDb(47, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x106AA07032BBD1B8) 199 | ROUNDb(48, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x19A4C116B8D2D0C8) 200 | ROUNDb(49, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x1E376C085141AB53) 201 | ROUNDb(50, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x2748774CDF8EEB99) 202 | ROUNDb(51, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x34B0BCB5E19B48A8) 203 | ROUNDb(52, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x391C0CB3C5C95A63) 204 | ROUNDb(53, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4ED8AA4AE3418ACB) 205 | ROUNDb(54, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5B9CCA4F7763E373) 206 | ROUNDb(55, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x682E6FF3D6B2B8A3) 207 | ROUNDb(56, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x748F82EE5DEFB2FC) 208 | ROUNDb(57, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x78A5636F43172F60) 209 | ROUNDb(58, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x84C87814A1F0AB72) 210 | ROUNDb(59, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x8CC702081A6439EC) 211 | ROUNDb(60, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x90BEFFFA23631E28) 212 | ROUNDb(61, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xA4506CEBDE82BDE9) 213 | ROUNDb(62, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xBEF9A3F7B2C67915) 214 | ROUNDb(63, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC67178F2E372532B) 215 | ROUNDb(64, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xCA273ECEEA26619C) 216 | ROUNDb(65, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xD186B8C721C0C207) 217 | ROUNDb(66, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xEADA7DD6CDE0EB1E) 218 | ROUNDb(67, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xF57D4F7FEE6ED178) 219 | ROUNDb(68, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x06F067AA72176FBA) 220 | ROUNDb(69, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x0A637DC5A2C898A6) 221 | ROUNDb(70, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x113F9804BEF90DAE) 222 | ROUNDb(71, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x1B710B35131C471B) 223 | ROUNDb(72, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x28DB77F523047D84) 224 | ROUNDb(73, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x32CAAB7B40C72493) 225 | ROUNDb(74, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x3C9EBE0A15C9BEBC) 226 | ROUNDb(75, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x431D67C49C100D4C) 227 | ROUNDb(76, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x4CC5D4BECB3E42B6) 228 | ROUNDb(77, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x597F299CFC657E2A) 229 | ROUNDb(78, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5FCB6FAB3AD6FAEC) 230 | ROUNDb(79, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x6C44198C4A475817) 231 | 232 | /* Add to state */ 233 | addq %r8 , 0(%rdi) 234 | addq %r9 , 8(%rdi) 235 | addq %r10, 16(%rdi) 236 | addq %r11, 24(%rdi) 237 | addq %r12, 32(%rdi) 238 | addq %r13, 40(%rdi) 239 | addq %r14, 48(%rdi) 240 | addq %r15, 56(%rdi) 241 | 242 | /* Restore registers */ 243 | movq %xmm0, %r10 244 | movq %xmm1, %r11 245 | movq %xmm2, %r12 246 | movq %xmm3, %r13 247 | movq %xmm4, %r14 248 | movq %xmm5, %r15 249 | movq %xmm6, %rbx 250 | addq $128, %rsp 251 | retq 252 | -------------------------------------------------------------------------------- /sha2/src/sha512_x86.S: -------------------------------------------------------------------------------- 1 | /* 2 | * SHA-512 hash in x86 assembly 3 | * 4 | * Copyright (c) 2014 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void sha512_compress(uint64_t state[8], const uint8_t block[128]) */ 26 | #if defined(__APPLE__) || defined(_WIN32) 27 | .globl _sha512_compress 28 | _sha512_compress: 29 | #else 30 | .globl sha512_compress 31 | sha512_compress: 32 | #endif 33 | /* 34 | * Storage usage: 35 | * Bytes Location Description 36 | * 4 eax Temporary base address of state or block array arguments 37 | * 4 ecx Old value of esp 38 | * 4 esp x86 stack pointer 39 | * 64 [esp+ 0] SHA-512 state variables A,B,C,D,E,F,G,H (8 bytes each) 40 | * 128 [esp+64] Circular buffer of most recent 16 key schedule items, 8 bytes each 41 | * 56 mm0..mm6 Temporary for calculation per round 42 | * 8 mm7 Control value for byte endian reversal 43 | * 64 xmm0..xmm3 Temporary for copying or calculation 44 | */ 45 | 46 | #define SCHED(i) (((i)&0xF)*8+64)(%esp) 47 | #define STATE(i) (i*8)(%esp) 48 | 49 | #define RORQ(reg, shift, temp) \ 50 | movq %reg, %temp; \ 51 | psllq $(64-shift), %temp; \ 52 | psrlq $shift, %reg; \ 53 | por %temp, %reg; 54 | 55 | #define ROUNDa(i, a, b, c, d, e, f, g, h) \ 56 | movq (i*8)(%eax), %mm0; \ 57 | pshufb %mm7, %mm0; \ 58 | movq %mm0, SCHED(i); \ 59 | ROUNDTAIL(i, a, b, c, d, e, f, g, h) 60 | 61 | #define ROUNDb(i, a, b, c, d, e, f, g, h) \ 62 | movq SCHED(i-16), %mm0; \ 63 | paddq SCHED(i- 7), %mm0; \ 64 | movq SCHED(i-15), %mm1; \ 65 | movq %mm1, %mm2; \ 66 | movq %mm1, %mm3; \ 67 | RORQ(mm1, 1, mm5) \ 68 | RORQ(mm2, 8, mm4) \ 69 | psrlq $7, %mm3; \ 70 | pxor %mm3, %mm2; \ 71 | pxor %mm2, %mm1; \ 72 | paddq %mm1, %mm0; \ 73 | movq SCHED(i- 2), %mm1; \ 74 | movq %mm1, %mm2; \ 75 | movq %mm1, %mm3; \ 76 | RORQ(mm1, 19, mm5) \ 77 | RORQ(mm2, 61, mm4) \ 78 | psrlq $6, %mm3; \ 79 | pxor %mm3, %mm2; \ 80 | pxor %mm2, %mm1; \ 81 | paddq %mm1, %mm0; \ 82 | movq %mm0, SCHED(i); \ 83 | ROUNDTAIL(i, a, b, c, d, e, f, g, h) 84 | 85 | #define ROUNDTAIL(i, a, b, c, d, e, f, g, h) \ 86 | /* Part 0 */ \ 87 | paddq STATE(h), %mm0; \ 88 | movq STATE(e), %mm1; \ 89 | movq %mm1, %mm2; \ 90 | movq %mm1, %mm3; \ 91 | RORQ(mm1, 18, mm4) \ 92 | RORQ(mm2, 41, mm5) \ 93 | RORQ(mm3, 14, mm6) \ 94 | pxor %mm2, %mm1; \ 95 | pxor %mm3, %mm1; \ 96 | paddq .roundconstants+i*8, %mm0; \ 97 | movq STATE(g), %mm2; \ 98 | pxor STATE(f), %mm2; \ 99 | pand STATE(e), %mm2; \ 100 | pxor STATE(g), %mm2; \ 101 | paddq %mm1, %mm0; \ 102 | paddq %mm2, %mm0; \ 103 | /* Part 1 */ \ 104 | movq STATE(d), %mm1; \ 105 | paddq %mm0, %mm1; \ 106 | movq %mm1, STATE(d); \ 107 | /* Part 2 */ \ 108 | movq STATE(a), %mm1; \ 109 | movq %mm1, %mm2; \ 110 | movq %mm1, %mm3; \ 111 | RORQ(mm1, 39, mm4) \ 112 | RORQ(mm2, 34, mm5) \ 113 | RORQ(mm3, 28, mm6) \ 114 | pxor %mm2, %mm1; \ 115 | pxor %mm3, %mm1; \ 116 | movq STATE(c), %mm2; \ 117 | paddq %mm1, %mm0; \ 118 | movq %mm2, %mm3; \ 119 | por STATE(b), %mm3; \ 120 | pand STATE(b), %mm2; \ 121 | pand STATE(a), %mm3; \ 122 | por %mm2, %mm3; \ 123 | paddq %mm3, %mm0; \ 124 | movq %mm0, STATE(h); 125 | 126 | /* Allocate 16-byte aligned scratch space */ 127 | movl %esp, %ecx 128 | subl $192, %esp 129 | andl $~0xF, %esp 130 | 131 | /* Copy state */ 132 | movl 4(%ecx), %eax 133 | movdqu 0(%eax), %xmm0; movdqu %xmm0, 0(%esp) 134 | movdqu 16(%eax), %xmm1; movdqu %xmm1, 16(%esp) 135 | movdqu 32(%eax), %xmm2; movdqu %xmm2, 32(%esp) 136 | movdqu 48(%eax), %xmm3; movdqu %xmm3, 48(%esp) 137 | 138 | /* Do 80 rounds of hashing */ 139 | movl 8(%ecx), %eax 140 | movq .bswap64, %mm7 141 | ROUNDa( 0, 0, 1, 2, 3, 4, 5, 6, 7) 142 | ROUNDa( 1, 7, 0, 1, 2, 3, 4, 5, 6) 143 | ROUNDa( 2, 6, 7, 0, 1, 2, 3, 4, 5) 144 | ROUNDa( 3, 5, 6, 7, 0, 1, 2, 3, 4) 145 | ROUNDa( 4, 4, 5, 6, 7, 0, 1, 2, 3) 146 | ROUNDa( 5, 3, 4, 5, 6, 7, 0, 1, 2) 147 | ROUNDa( 6, 2, 3, 4, 5, 6, 7, 0, 1) 148 | ROUNDa( 7, 1, 2, 3, 4, 5, 6, 7, 0) 149 | ROUNDa( 8, 0, 1, 2, 3, 4, 5, 6, 7) 150 | ROUNDa( 9, 7, 0, 1, 2, 3, 4, 5, 6) 151 | ROUNDa(10, 6, 7, 0, 1, 2, 3, 4, 5) 152 | ROUNDa(11, 5, 6, 7, 0, 1, 2, 3, 4) 153 | ROUNDa(12, 4, 5, 6, 7, 0, 1, 2, 3) 154 | ROUNDa(13, 3, 4, 5, 6, 7, 0, 1, 2) 155 | ROUNDa(14, 2, 3, 4, 5, 6, 7, 0, 1) 156 | ROUNDa(15, 1, 2, 3, 4, 5, 6, 7, 0) 157 | ROUNDb(16, 0, 1, 2, 3, 4, 5, 6, 7) 158 | ROUNDb(17, 7, 0, 1, 2, 3, 4, 5, 6) 159 | ROUNDb(18, 6, 7, 0, 1, 2, 3, 4, 5) 160 | ROUNDb(19, 5, 6, 7, 0, 1, 2, 3, 4) 161 | ROUNDb(20, 4, 5, 6, 7, 0, 1, 2, 3) 162 | ROUNDb(21, 3, 4, 5, 6, 7, 0, 1, 2) 163 | ROUNDb(22, 2, 3, 4, 5, 6, 7, 0, 1) 164 | ROUNDb(23, 1, 2, 3, 4, 5, 6, 7, 0) 165 | ROUNDb(24, 0, 1, 2, 3, 4, 5, 6, 7) 166 | ROUNDb(25, 7, 0, 1, 2, 3, 4, 5, 6) 167 | ROUNDb(26, 6, 7, 0, 1, 2, 3, 4, 5) 168 | ROUNDb(27, 5, 6, 7, 0, 1, 2, 3, 4) 169 | ROUNDb(28, 4, 5, 6, 7, 0, 1, 2, 3) 170 | ROUNDb(29, 3, 4, 5, 6, 7, 0, 1, 2) 171 | ROUNDb(30, 2, 3, 4, 5, 6, 7, 0, 1) 172 | ROUNDb(31, 1, 2, 3, 4, 5, 6, 7, 0) 173 | ROUNDb(32, 0, 1, 2, 3, 4, 5, 6, 7) 174 | ROUNDb(33, 7, 0, 1, 2, 3, 4, 5, 6) 175 | ROUNDb(34, 6, 7, 0, 1, 2, 3, 4, 5) 176 | ROUNDb(35, 5, 6, 7, 0, 1, 2, 3, 4) 177 | ROUNDb(36, 4, 5, 6, 7, 0, 1, 2, 3) 178 | ROUNDb(37, 3, 4, 5, 6, 7, 0, 1, 2) 179 | ROUNDb(38, 2, 3, 4, 5, 6, 7, 0, 1) 180 | ROUNDb(39, 1, 2, 3, 4, 5, 6, 7, 0) 181 | ROUNDb(40, 0, 1, 2, 3, 4, 5, 6, 7) 182 | ROUNDb(41, 7, 0, 1, 2, 3, 4, 5, 6) 183 | ROUNDb(42, 6, 7, 0, 1, 2, 3, 4, 5) 184 | ROUNDb(43, 5, 6, 7, 0, 1, 2, 3, 4) 185 | ROUNDb(44, 4, 5, 6, 7, 0, 1, 2, 3) 186 | ROUNDb(45, 3, 4, 5, 6, 7, 0, 1, 2) 187 | ROUNDb(46, 2, 3, 4, 5, 6, 7, 0, 1) 188 | ROUNDb(47, 1, 2, 3, 4, 5, 6, 7, 0) 189 | ROUNDb(48, 0, 1, 2, 3, 4, 5, 6, 7) 190 | ROUNDb(49, 7, 0, 1, 2, 3, 4, 5, 6) 191 | ROUNDb(50, 6, 7, 0, 1, 2, 3, 4, 5) 192 | ROUNDb(51, 5, 6, 7, 0, 1, 2, 3, 4) 193 | ROUNDb(52, 4, 5, 6, 7, 0, 1, 2, 3) 194 | ROUNDb(53, 3, 4, 5, 6, 7, 0, 1, 2) 195 | ROUNDb(54, 2, 3, 4, 5, 6, 7, 0, 1) 196 | ROUNDb(55, 1, 2, 3, 4, 5, 6, 7, 0) 197 | ROUNDb(56, 0, 1, 2, 3, 4, 5, 6, 7) 198 | ROUNDb(57, 7, 0, 1, 2, 3, 4, 5, 6) 199 | ROUNDb(58, 6, 7, 0, 1, 2, 3, 4, 5) 200 | ROUNDb(59, 5, 6, 7, 0, 1, 2, 3, 4) 201 | ROUNDb(60, 4, 5, 6, 7, 0, 1, 2, 3) 202 | ROUNDb(61, 3, 4, 5, 6, 7, 0, 1, 2) 203 | ROUNDb(62, 2, 3, 4, 5, 6, 7, 0, 1) 204 | ROUNDb(63, 1, 2, 3, 4, 5, 6, 7, 0) 205 | ROUNDb(64, 0, 1, 2, 3, 4, 5, 6, 7) 206 | ROUNDb(65, 7, 0, 1, 2, 3, 4, 5, 6) 207 | ROUNDb(66, 6, 7, 0, 1, 2, 3, 4, 5) 208 | ROUNDb(67, 5, 6, 7, 0, 1, 2, 3, 4) 209 | ROUNDb(68, 4, 5, 6, 7, 0, 1, 2, 3) 210 | ROUNDb(69, 3, 4, 5, 6, 7, 0, 1, 2) 211 | ROUNDb(70, 2, 3, 4, 5, 6, 7, 0, 1) 212 | ROUNDb(71, 1, 2, 3, 4, 5, 6, 7, 0) 213 | ROUNDb(72, 0, 1, 2, 3, 4, 5, 6, 7) 214 | ROUNDb(73, 7, 0, 1, 2, 3, 4, 5, 6) 215 | ROUNDb(74, 6, 7, 0, 1, 2, 3, 4, 5) 216 | ROUNDb(75, 5, 6, 7, 0, 1, 2, 3, 4) 217 | ROUNDb(76, 4, 5, 6, 7, 0, 1, 2, 3) 218 | ROUNDb(77, 3, 4, 5, 6, 7, 0, 1, 2) 219 | ROUNDb(78, 2, 3, 4, 5, 6, 7, 0, 1) 220 | ROUNDb(79, 1, 2, 3, 4, 5, 6, 7, 0) 221 | 222 | /* Add to state */ 223 | movl 4(%ecx), %eax 224 | movdqu 0(%eax), %xmm0; paddq 0(%esp), %xmm0; movdqu %xmm0, 0(%eax) 225 | movdqu 16(%eax), %xmm1; paddq 16(%esp), %xmm1; movdqu %xmm1, 16(%eax) 226 | movdqu 32(%eax), %xmm2; paddq 32(%esp), %xmm2; movdqu %xmm2, 32(%eax) 227 | movdqu 48(%eax), %xmm3; paddq 48(%esp), %xmm3; movdqu %xmm3, 48(%eax) 228 | 229 | /* Clean up */ 230 | emms 231 | movl %ecx, %esp 232 | retl 233 | 234 | 235 | .balign 8 236 | .bswap64: 237 | .quad 0x0001020304050607 238 | 239 | .roundconstants: 240 | .quad 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC 241 | .quad 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118 242 | .quad 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2 243 | .quad 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694 244 | .quad 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65 245 | .quad 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5 246 | .quad 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4 247 | .quad 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70 248 | .quad 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF 249 | .quad 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B 250 | .quad 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30 251 | .quad 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8 252 | .quad 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8 253 | .quad 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3 254 | .quad 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC 255 | .quad 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B 256 | .quad 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178 257 | .quad 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B 258 | .quad 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C 259 | .quad 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817 260 | -------------------------------------------------------------------------------- /whirlpool/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## 0.6.2 (2024-05-06) 9 | ### Changed 10 | - Emit compilation error when compiled for Windows targets. ([#79]) 11 | 12 | [#79]: https://github.com/RustCrypto/asm-hashes/pull/79 13 | 14 | ## 0.6.1 (2023-08-07) 15 | ### Changed 16 | - Prefix x86 asm symbols with `_` on Windows like on Apple ([#61]) 17 | - Fix deprecated use of `cc::Build::compile` ([#59]) 18 | 19 | [#61]: https://github.com/RustCrypto/asm-hashes/pull/61 20 | [#59]: https://github.com/RustCrypto/asm-hashes/pull/59 21 | -------------------------------------------------------------------------------- /whirlpool/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "whirlpool-asm" 3 | version = "0.6.2" 4 | authors = ["RustCrypto Developers"] 5 | license = "MIT" 6 | description = "Assembly implementation of Whirlpool compression function" 7 | documentation = "https://docs.rs/whirlpool-asm" 8 | repository = "https://github.com/RustCrypto/asm-hashes" 9 | keywords = ["crypto", "whirlpool", "asm"] 10 | categories = ["cryptography", "no-std"] 11 | edition = "2018" 12 | 13 | [build-dependencies] 14 | cc = "1.0" 15 | -------------------------------------------------------------------------------- /whirlpool/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Project Nayuki, Artyom Pavlov 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /whirlpool/benches/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(test)] 3 | 4 | extern crate test; 5 | 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn bench_compress(b: &mut Bencher) { 10 | let mut state = [0u64; 8]; 11 | let data = [[0u8; 64]]; 12 | 13 | b.iter(|| { 14 | whirlpool_asm::compress(&mut state, &data); 15 | }); 16 | 17 | b.bytes = data.len() as u64; 18 | } 19 | -------------------------------------------------------------------------------- /whirlpool/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); 3 | 4 | let asm_path = if target_arch == "x86" { 5 | "src/x86.S" 6 | } else if target_arch == "x86_64" { 7 | "src/x64.S" 8 | } else { 9 | panic!("Unsupported target architecture: {}", target_arch); 10 | }; 11 | cc::Build::new() 12 | .flag("-c") 13 | .file(asm_path) 14 | .compile("whirlpool"); 15 | } 16 | -------------------------------------------------------------------------------- /whirlpool/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Assembly implementation of the [Whirlpool] compression function. 2 | //! 3 | //! This crate is not intended for direct use, most users should 4 | //! prefer the [`whirlpool`] crate with enabled `asm` feature instead. 5 | //! 6 | //! Only x86 and x86-64 architectures are currently supported. 7 | //! 8 | //! [Whirlpool]: https://en.wikipedia.org/wiki/Whirlpool_(cryptography) 9 | //! [`whirlpool`]: https://crates.io/crates/whirlpool 10 | 11 | #![no_std] 12 | #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] 13 | compile_error!("crate can only be used on x86 and x86-64 architectures"); 14 | 15 | #[cfg(target_os = "windows")] 16 | compile_error!("crate does not support Windows targets"); 17 | 18 | #[link(name = "whirlpool", kind = "static")] 19 | extern "C" { 20 | fn whirlpool_compress(state: &mut [u64; 8], block: &[u8; 64]); 21 | } 22 | 23 | /// Safe wrapper around assembly implementation of the Whirlpool compression function 24 | #[inline] 25 | pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 64]]) { 26 | for block in blocks { 27 | unsafe { whirlpool_compress(state, block) } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /whirlpool/src/x64.S: -------------------------------------------------------------------------------- 1 | /* 2 | * Whirlpool hash in x86-64 assembly 3 | * 4 | * Copyright (c) 2017 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-whirlpool-hash-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void whirlpool_compress(uint8_t state[64], const uint8_t block[64]) */ 26 | /* state and block can be optionally aligned to 8 bytes for slightly better performance. */ 27 | #ifdef __APPLE__ 28 | .globl _whirlpool_compress 29 | _whirlpool_compress: 30 | #else 31 | .globl whirlpool_compress 32 | whirlpool_compress: 33 | #endif 34 | /* 35 | * Storage usage: 36 | * Bytes Location Description 37 | * 8 rax Temporary 16-bit value for each pair of bytes processed during a round 38 | * 8 rdx Temporary byte value 39 | * 8 rcx Upward loop counter for 10 rounds 40 | * 8 rsi Base address of block array argument (read-only) 41 | * 8 rdi Base address of state array argument (read-only) 42 | * 8 rbx Base address of magictable0 array (read-only) 43 | * 8 rbp Base address of magictable1 array (read-only) 44 | * 8 rsp x86-64 stack pointer 45 | * 64 r8..r15 Output rows for current round being computed, in little endian (8 bytes per register) 46 | * 64 xmm0..xmm3 All contents of current state array, in little endian (16 bytes per register) 47 | * 64 xmm4..xmm7 All contents of current block array, in little endian (16 bytes per register) 48 | * 8 [rsp+ 0] Temporary storage for transferring R15 to XMM 49 | * 8 [rsp+ 8] Caller's value of r15 50 | * 8 [rsp+16] Caller's value of r14 51 | * 8 [rsp+24] Caller's value of r13 52 | * 8 [rsp+32] Caller's value of r12 53 | * 8 [rsp+40] Caller's value of r11 54 | * 8 [rsp+48] Caller's value of r10 55 | * 8 [rsp+56] Caller's value of rbp 56 | * 8 [rsp+64] Caller's value of rbx 57 | */ 58 | 59 | #define NUM_ROUNDS 10 /* Any number from 1 to 32 is allowed */ 60 | 61 | #define DOBYTEPAIRFIRST(inreg,offset,outreg0,outreg1) \ 62 | pextrw $(offset), %inreg, %eax; \ 63 | movzbl %ah, %edx; \ 64 | andl $0xFF, %eax; \ 65 | movq (%rbx,%rax,8), %outreg0; \ 66 | movq (%rbp,%rdx,8), %outreg1; 67 | 68 | #define DOBYTEPAIR(inreg,offset,outreg0,outreg1) \ 69 | pextrw $(offset), %inreg, %eax; \ 70 | movzbl %ah, %edx; \ 71 | andl $0xFF, %eax; \ 72 | xorq (%rbx,%rax,8), %outreg0; \ 73 | xorq (%rbp,%rdx,8), %outreg1; 74 | 75 | #define ROTATERIGHT() \ 76 | rorq $16, %r8 ; \ 77 | rorq $16, %r9 ; \ 78 | rorq $16, %r10; \ 79 | rorq $16, %r11; \ 80 | rorq $16, %r12; \ 81 | rorq $16, %r13; \ 82 | rorq $16, %r14; \ 83 | rorq $16, %r15; 84 | 85 | /* Used for sigma (AddRoundKey) */ 86 | #define XOR_XMM0_3_TO_XMM4_7() \ 87 | pxor %xmm0, %xmm4; \ 88 | pxor %xmm1, %xmm5; \ 89 | pxor %xmm2, %xmm6; \ 90 | pxor %xmm3, %xmm7; 91 | 92 | /* Save registers */ 93 | pushq %rbx 94 | pushq %rbp 95 | pushq %r10 96 | pushq %r11 97 | pushq %r12 98 | pushq %r13 99 | pushq %r14 100 | pushq %r15 101 | subq $8, %rsp 102 | 103 | /* Load state into XMM */ 104 | movdqu 0(%rdi), %xmm0 105 | movdqu 16(%rdi), %xmm1 106 | movdqu 32(%rdi), %xmm2 107 | movdqu 48(%rdi), %xmm3 108 | 109 | /* Load block into XMM */ 110 | movdqu 0(%rsi), %xmm4 111 | movdqu 16(%rsi), %xmm5 112 | movdqu 32(%rsi), %xmm6 113 | movdqu 48(%rsi), %xmm7 114 | 115 | /* XOR block with state */ 116 | XOR_XMM0_3_TO_XMM4_7() 117 | 118 | /* Load table addresses */ 119 | leaq .magictable0(%rip), %rbx 120 | leaq .magictable1(%rip), %rbp 121 | 122 | /* Do 10 rounds */ 123 | movl $0, %ecx 124 | .looptop: 125 | 126 | /* Process all 64 state bytes */ 127 | DOBYTEPAIRFIRST(xmm0, 0, r8 , r9 ) 128 | DOBYTEPAIRFIRST(xmm1, 0, r10, r11) 129 | DOBYTEPAIRFIRST(xmm2, 0, r12, r13) 130 | DOBYTEPAIRFIRST(xmm3, 0, r14, r15) 131 | DOBYTEPAIR(xmm0, 4, r9 , r10) 132 | DOBYTEPAIR(xmm1, 4, r11, r12) 133 | DOBYTEPAIR(xmm2, 4, r13, r14) 134 | DOBYTEPAIR(xmm3, 4, r15, r8 ) 135 | ROTATERIGHT() 136 | DOBYTEPAIR(xmm3, 1, r8 , r9 ) 137 | DOBYTEPAIR(xmm0, 1, r10, r11) 138 | DOBYTEPAIR(xmm1, 1, r12, r13) 139 | DOBYTEPAIR(xmm2, 1, r14, r15) 140 | DOBYTEPAIR(xmm3, 5, r9 , r10) 141 | DOBYTEPAIR(xmm0, 5, r11, r12) 142 | DOBYTEPAIR(xmm1, 5, r13, r14) 143 | DOBYTEPAIR(xmm2, 5, r15, r8 ) 144 | ROTATERIGHT() 145 | DOBYTEPAIR(xmm2, 2, r8 , r9 ) 146 | DOBYTEPAIR(xmm3, 2, r10, r11) 147 | DOBYTEPAIR(xmm0, 2, r12, r13) 148 | DOBYTEPAIR(xmm1, 2, r14, r15) 149 | DOBYTEPAIR(xmm2, 6, r9 , r10) 150 | DOBYTEPAIR(xmm3, 6, r11, r12) 151 | DOBYTEPAIR(xmm0, 6, r13, r14) 152 | DOBYTEPAIR(xmm1, 6, r15, r8 ) 153 | ROTATERIGHT() 154 | DOBYTEPAIR(xmm1, 3, r8 , r9 ) 155 | DOBYTEPAIR(xmm2, 3, r10, r11) 156 | DOBYTEPAIR(xmm3, 3, r12, r13) 157 | DOBYTEPAIR(xmm0, 3, r14, r15) 158 | DOBYTEPAIR(xmm1, 7, r9 , r10) 159 | DOBYTEPAIR(xmm2, 7, r11, r12) 160 | DOBYTEPAIR(xmm3, 7, r13, r14) 161 | DOBYTEPAIR(xmm0, 7, r15, r8 ) 162 | ROTATERIGHT() 163 | leaq .roundconstants(%rip), %rax 164 | xorq (%rax,%rcx,8), %r8 /* Add round constant */ 165 | 166 | /* Copy state back to XMM */ 167 | movq %r15, (%rsp) 168 | movq %r8 , %xmm0; movq %r9 , %xmm1; shufpd $0, %xmm1, %xmm0 169 | movq %r10, %xmm1; movq %r11, %xmm2; shufpd $0, %xmm2, %xmm1 170 | movq %r12, %xmm2; movq %r13, %xmm3; shufpd $0, %xmm3, %xmm2 171 | movq %r14, %xmm3; movhps (%rsp), %xmm3 172 | 173 | /* Process all 64 block bytes */ 174 | DOBYTEPAIRFIRST(xmm4, 0, r8 , r9 ) 175 | DOBYTEPAIRFIRST(xmm5, 0, r10, r11) 176 | DOBYTEPAIRFIRST(xmm6, 0, r12, r13) 177 | DOBYTEPAIRFIRST(xmm7, 0, r14, r15) 178 | DOBYTEPAIR(xmm4, 4, r9 , r10) 179 | DOBYTEPAIR(xmm5, 4, r11, r12) 180 | DOBYTEPAIR(xmm6, 4, r13, r14) 181 | DOBYTEPAIR(xmm7, 4, r15, r8 ) 182 | ROTATERIGHT() 183 | DOBYTEPAIR(xmm7, 1, r8 , r9 ) 184 | DOBYTEPAIR(xmm4, 1, r10, r11) 185 | DOBYTEPAIR(xmm5, 1, r12, r13) 186 | DOBYTEPAIR(xmm6, 1, r14, r15) 187 | DOBYTEPAIR(xmm7, 5, r9 , r10) 188 | DOBYTEPAIR(xmm4, 5, r11, r12) 189 | DOBYTEPAIR(xmm5, 5, r13, r14) 190 | DOBYTEPAIR(xmm6, 5, r15, r8 ) 191 | ROTATERIGHT() 192 | DOBYTEPAIR(xmm6, 2, r8 , r9 ) 193 | DOBYTEPAIR(xmm7, 2, r10, r11) 194 | DOBYTEPAIR(xmm4, 2, r12, r13) 195 | DOBYTEPAIR(xmm5, 2, r14, r15) 196 | DOBYTEPAIR(xmm6, 6, r9 , r10) 197 | DOBYTEPAIR(xmm7, 6, r11, r12) 198 | DOBYTEPAIR(xmm4, 6, r13, r14) 199 | DOBYTEPAIR(xmm5, 6, r15, r8 ) 200 | ROTATERIGHT() 201 | DOBYTEPAIR(xmm5, 3, r8 , r9 ) 202 | DOBYTEPAIR(xmm6, 3, r10, r11) 203 | DOBYTEPAIR(xmm7, 3, r12, r13) 204 | DOBYTEPAIR(xmm4, 3, r14, r15) 205 | DOBYTEPAIR(xmm5, 7, r9 , r10) 206 | DOBYTEPAIR(xmm6, 7, r11, r12) 207 | DOBYTEPAIR(xmm7, 7, r13, r14) 208 | DOBYTEPAIR(xmm4, 7, r15, r8 ) 209 | ROTATERIGHT() 210 | 211 | /* Copy block back to XMM */ 212 | movq %r15, (%rsp) 213 | movq %r8 , %xmm4; movq %r9 , %xmm5; shufpd $0, %xmm5, %xmm4 214 | movq %r10, %xmm5; movq %r11, %xmm6; shufpd $0, %xmm6, %xmm5 215 | movq %r12, %xmm6; movq %r13, %xmm7; shufpd $0, %xmm7, %xmm6 216 | movq %r14, %xmm7; movhps (%rsp), %xmm7 217 | 218 | /* Add state to block */ 219 | XOR_XMM0_3_TO_XMM4_7() 220 | 221 | /* Loop back */ 222 | incl %ecx 223 | cmpl $NUM_ROUNDS, %ecx 224 | jne .looptop 225 | 226 | /* XOR old state (in memory) with old block (in memory) and new block (in XMM) */ 227 | movdqu 0(%rdi), %xmm0 /* Load old state */ 228 | movdqu 16(%rdi), %xmm1 229 | movdqu 32(%rdi), %xmm2 230 | movdqu 48(%rdi), %xmm3 231 | XOR_XMM0_3_TO_XMM4_7() /* XOR into new block */ 232 | movdqu 0(%rsi), %xmm0 /* Load old block */ 233 | movdqu 16(%rsi), %xmm1 234 | movdqu 32(%rsi), %xmm2 235 | movdqu 48(%rsi), %xmm3 236 | XOR_XMM0_3_TO_XMM4_7() /* XOR into new block */ 237 | movdqu %xmm4, 0(%rdi) /* Store new state */ 238 | movdqu %xmm5, 16(%rdi) 239 | movdqu %xmm6, 32(%rdi) 240 | movdqu %xmm7, 48(%rdi) 241 | 242 | /* Clean up, restore registers, return */ 243 | addq $8, %rsp 244 | popq %r15 245 | popq %r14 246 | popq %r13 247 | popq %r12 248 | popq %r11 249 | popq %r10 250 | popq %rbp 251 | popq %rbx 252 | retq 253 | 254 | 255 | .balign 8 256 | .roundconstants: /* Starting from the beginning, each round uses 8 bytes */ 257 | .byte 0x18, 0x23, 0xC6, 0xE8, 0x87, 0xB8, 0x01, 0x4F, 0x36, 0xA6, 0xD2, 0xF5, 0x79, 0x6F, 0x91, 0x52 258 | .byte 0x60, 0xBC, 0x9B, 0x8E, 0xA3, 0x0C, 0x7B, 0x35, 0x1D, 0xE0, 0xD7, 0xC2, 0x2E, 0x4B, 0xFE, 0x57 259 | .byte 0x15, 0x77, 0x37, 0xE5, 0x9F, 0xF0, 0x4A, 0xDA, 0x58, 0xC9, 0x29, 0x0A, 0xB1, 0xA0, 0x6B, 0x85 260 | .byte 0xBD, 0x5D, 0x10, 0xF4, 0xCB, 0x3E, 0x05, 0x67, 0xE4, 0x27, 0x41, 0x8B, 0xA7, 0x7D, 0x95, 0xD8 261 | .byte 0xFB, 0xEE, 0x7C, 0x66, 0xDD, 0x17, 0x47, 0x9E, 0xCA, 0x2D, 0xBF, 0x07, 0xAD, 0x5A, 0x83, 0x33 262 | .byte 0x63, 0x02, 0xAA, 0x71, 0xC8, 0x19, 0x49, 0xD9, 0xF2, 0xE3, 0x5B, 0x88, 0x9A, 0x26, 0x32, 0xB0 263 | .byte 0xE9, 0x0F, 0xD5, 0x80, 0xBE, 0xCD, 0x34, 0x48, 0xFF, 0x7A, 0x90, 0x5F, 0x20, 0x68, 0x1A, 0xAE 264 | .byte 0xB4, 0x54, 0x93, 0x22, 0x64, 0xF1, 0x73, 0x12, 0x40, 0x08, 0xC3, 0xEC, 0xDB, 0xA1, 0x8D, 0x3D 265 | .byte 0x97, 0x00, 0xCF, 0x2B, 0x76, 0x82, 0xD6, 0x1B, 0xB5, 0xAF, 0x6A, 0x50, 0x45, 0xF3, 0x30, 0xEF 266 | .byte 0x3F, 0x55, 0xA2, 0xEA, 0x65, 0xBA, 0x2F, 0xC0, 0xDE, 0x1C, 0xFD, 0x4D, 0x92, 0x75, 0x06, 0x8A 267 | .byte 0xB2, 0xE6, 0x0E, 0x1F, 0x62, 0xD4, 0xA8, 0x96, 0xF9, 0xC5, 0x25, 0x59, 0x84, 0x72, 0x39, 0x4C 268 | .byte 0x5E, 0x78, 0x38, 0x8C, 0xD1, 0xA5, 0xE2, 0x61, 0xB3, 0x21, 0x9C, 0x1E, 0x43, 0xC7, 0xFC, 0x04 269 | .byte 0x51, 0x99, 0x6D, 0x0D, 0xFA, 0xDF, 0x7E, 0x24, 0x3B, 0xAB, 0xCE, 0x11, 0x8F, 0x4E, 0xB7, 0xEB 270 | .byte 0x3C, 0x81, 0x94, 0xF7, 0xB9, 0x13, 0x2C, 0xD3, 0xE7, 0x6E, 0xC4, 0x03, 0x56, 0x44, 0x7F, 0xA9 271 | .byte 0x2A, 0xBB, 0xC1, 0x53, 0xDC, 0x0B, 0x9D, 0x6C, 0x31, 0x74, 0xF6, 0x46, 0xAC, 0x89, 0x14, 0xE1 272 | .byte 0x16, 0x3A, 0x69, 0x09, 0x70, 0xB6, 0xD0, 0xED, 0xCC, 0x42, 0x98, 0xA4, 0x28, 0x5C, 0xF8, 0x86 273 | 274 | .magictable0: /* The combined effect of gamma (SubBytes) and theta (MixRows) */ 275 | .quad 0xD83078C018601818, 0x2646AF05238C2323, 0xB891F97EC63FC6C6, 0xFBCD6F13E887E8E8, 0xCB13A14C87268787, 0x116D62A9B8DAB8B8, 0x0902050801040101, 0x0D9E6E424F214F4F 276 | .quad 0x9B6CEEAD36D83636, 0xFF510459A6A2A6A6, 0x0CB9BDDED26FD2D2, 0x0EF706FBF5F3F5F5, 0x96F280EF79F97979, 0x30DECE5F6FA16F6F, 0x6D3FEFFC917E9191, 0xF8A407AA52555252 277 | .quad 0x47C0FD27609D6060, 0x35657689BCCABCBC, 0x372BCDAC9B569B9B, 0x8A018C048E028E8E, 0xD25B1571A3B6A3A3, 0x6C183C600C300C0C, 0x84F68AFF7BF17B7B, 0x806AE1B535D43535 278 | .quad 0xF53A69E81D741D1D, 0xB3DD4753E0A7E0E0, 0x21B3ACF6D77BD7D7, 0x9C99ED5EC22FC2C2, 0x435C966D2EB82E2E, 0x29967A624B314B4B, 0x5DE121A3FEDFFEFE, 0xD5AE168257415757 279 | .quad 0xBD2A41A815541515, 0xE8EEB69F77C17777, 0x926EEBA537DC3737, 0x9ED7567BE5B3E5E5, 0x1323D98C9F469F9F, 0x23FD17D3F0E7F0F0, 0x20947F6A4A354A4A, 0x44A9959EDA4FDADA 280 | .quad 0xA2B025FA587D5858, 0xCF8FCA06C903C9C9, 0x7C528D5529A42929, 0x5A1422500A280A0A, 0x507F4FE1B1FEB1B1, 0xC95D1A69A0BAA0A0, 0x14D6DA7F6BB16B6B, 0xD917AB5C852E8585 281 | .quad 0x3C677381BDCEBDBD, 0x8FBA34D25D695D5D, 0x9020508010401010, 0x07F503F3F4F7F4F4, 0xDD8BC016CB0BCBCB, 0xD37CC6ED3EF83E3E, 0x2D0A112805140505, 0x78CEE61F67816767 282 | .quad 0x97D55373E4B7E4E4, 0x024EBB25279C2727, 0x7382583241194141, 0xA70B9D2C8B168B8B, 0xF6530151A7A6A7A7, 0xB2FA94CF7DE97D7D, 0x4937FBDC956E9595, 0x56AD9F8ED847D8D8 283 | .quad 0x70EB308BFBCBFBFB, 0xCDC17123EE9FEEEE, 0xBBF891C77CED7C7C, 0x71CCE31766856666, 0x7BA78EA6DD53DDDD, 0xAF2E4BB8175C1717, 0x458E460247014747, 0x1A21DC849E429E9E 284 | .quad 0xD489C51ECA0FCACA, 0x585A99752DB42D2D, 0x2E637991BFC6BFBF, 0x3F0E1B38071C0707, 0xAC472301AD8EADAD, 0xB0B42FEA5A755A5A, 0xEF1BB56C83368383, 0xB666FF8533CC3333 285 | .quad 0x5CC6F23F63916363, 0x12040A1002080202, 0x93493839AA92AAAA, 0xDEE2A8AF71D97171, 0xC68DCF0EC807C8C8, 0xD1327DC819641919, 0x3B92707249394949, 0x5FAF9A86D943D9D9 286 | .quad 0x31F91DC3F2EFF2F2, 0xA8DB484BE3ABE3E3, 0xB9B62AE25B715B5B, 0xBC0D9234881A8888, 0x3E29C8A49A529A9A, 0x0B4CBE2D26982626, 0xBF64FA8D32C83232, 0x597D4AE9B0FAB0B0 287 | .quad 0xF2CF6A1BE983E9E9, 0x771E33780F3C0F0F, 0x33B7A6E6D573D5D5, 0xF41DBA74803A8080, 0x27617C99BEC2BEBE, 0xEB87DE26CD13CDCD, 0x8968E4BD34D03434, 0x3290757A483D4848 288 | .quad 0x54E324ABFFDBFFFF, 0x8DF48FF77AF57A7A, 0x643DEAF4907A9090, 0x9DBE3EC25F615F5F, 0x3D40A01D20802020, 0x0FD0D56768BD6868, 0xCA3472D01A681A1A, 0xB7412C19AE82AEAE 289 | .quad 0x7D755EC9B4EAB4B4, 0xCEA8199A544D5454, 0x7F3BE5EC93769393, 0x2F44AA0D22882222, 0x63C8E907648D6464, 0x2AFF12DBF1E3F1F1, 0xCCE6A2BF73D17373, 0x82245A9012481212 290 | .quad 0x7A805D3A401D4040, 0x4810284008200808, 0x959BE856C32BC3C3, 0xDFC57B33EC97ECEC, 0x4DAB9096DB4BDBDB, 0xC05F1F61A1BEA1A1, 0x9107831C8D0E8D8D, 0xC87AC9F53DF43D3D 291 | .quad 0x5B33F1CC97669797, 0x0000000000000000, 0xF983D436CF1BCFCF, 0x6E5687452BAC2B2B, 0xE1ECB39776C57676, 0xE619B06482328282, 0x28B1A9FED67FD6D6, 0xC33677D81B6C1B1B 292 | .quad 0x74775BC1B5EEB5B5, 0xBE432911AF86AFAF, 0x1DD4DF776AB56A6A, 0xEAA00DBA505D5050, 0x578A4C1245094545, 0x38FB18CBF3EBF3F3, 0xAD60F09D30C03030, 0xC4C3742BEF9BEFEF 293 | .quad 0xDA7EC3E53FFC3F3F, 0xC7AA1C9255495555, 0xDB591079A2B2A2A2, 0xE9C96503EA8FEAEA, 0x6ACAEC0F65896565, 0x036968B9BAD2BABA, 0x4A5E93652FBC2F2F, 0x8E9DE74EC027C0C0 294 | .quad 0x60A181BEDE5FDEDE, 0xFC386CE01C701C1C, 0x46E72EBBFDD3FDFD, 0x1F9A64524D294D4D, 0x7639E0E492729292, 0xFAEABC8F75C97575, 0x360C1E3006180606, 0xAE0998248A128A8A 295 | .quad 0x4B7940F9B2F2B2B2, 0x85D15963E6BFE6E6, 0x7E1C36700E380E0E, 0xE73E63F81F7C1F1F, 0x55C4F73762956262, 0x3AB5A3EED477D4D4, 0x814D3229A89AA8A8, 0x5231F4C496629696 296 | .quad 0x62EF3A9BF9C3F9F9, 0xA397F666C533C5C5, 0x104AB13525942525, 0xABB220F259795959, 0xD015AE54842A8484, 0xC5E4A7B772D57272, 0xEC72DDD539E43939, 0x1698615A4C2D4C4C 297 | .quad 0x94BC3BCA5E655E5E, 0x9FF085E778FD7878, 0xE570D8DD38E03838, 0x980586148C0A8C8C, 0x17BFB2C6D163D1D1, 0xE4570B41A5AEA5A5, 0xA1D94D43E2AFE2E2, 0x4EC2F82F61996161 298 | .quad 0x427B45F1B3F6B3B3, 0x3442A51521842121, 0x0825D6949C4A9C9C, 0xEE3C66F01E781E1E, 0x6186522243114343, 0xB193FC76C73BC7C7, 0x4FE52BB3FCD7FCFC, 0x2408142004100404 299 | .quad 0xE3A208B251595151, 0x252FC7BC995E9999, 0x22DAC44F6DA96D6D, 0x651A39680D340D0D, 0x79E93583FACFFAFA, 0x69A384B6DF5BDFDF, 0xA9FC9BD77EE57E7E, 0x1948B43D24902424 300 | .quad 0xFE76D7C53BEC3B3B, 0x9A4B3D31AB96ABAB, 0xF081D13ECE1FCECE, 0x9922558811441111, 0x8303890C8F068F8F, 0x049C6B4A4E254E4E, 0x667351D1B7E6B7B7, 0xE0CB600BEB8BEBEB 301 | .quad 0xC178CCFD3CF03C3C, 0xFD1FBF7C813E8181, 0x4035FED4946A9494, 0x1CF30CEBF7FBF7F7, 0x186F67A1B9DEB9B9, 0x8B265F98134C1313, 0x51589C7D2CB02C2C, 0x05BBB8D6D36BD3D3 302 | .quad 0x8CD35C6BE7BBE7E7, 0x39DCCB576EA56E6E, 0xAA95F36EC437C4C4, 0x1B060F18030C0303, 0xDCAC138A56455656, 0x5E88491A440D4444, 0xA0FE9EDF7FE17F7F, 0x884F3721A99EA9A9 303 | .quad 0x6754824D2AA82A2A, 0x0A6B6DB1BBD6BBBB, 0x879FE246C123C1C1, 0xF1A602A253515353, 0x72A58BAEDC57DCDC, 0x531627580B2C0B0B, 0x0127D39C9D4E9D9D, 0x2BD8C1476CAD6C6C 304 | .quad 0xA462F59531C43131, 0xF3E8B98774CD7474, 0x15F109E3F6FFF6F6, 0x4C8C430A46054646, 0xA5452609AC8AACAC, 0xB50F973C891E8989, 0xB42844A014501414, 0xBADF425BE1A3E1E1 305 | .quad 0xA62C4EB016581616, 0xF774D2CD3AE83A3A, 0x06D2D06F69B96969, 0x41122D4809240909, 0xD7E0ADA770DD7070, 0x6F7154D9B6E2B6B6, 0x1EBDB7CED067D0D0, 0xD6C77E3BED93EDED 306 | .quad 0xE285DB2ECC17CCCC, 0x6884572A42154242, 0x2C2DC2B4985A9898, 0xED550E49A4AAA4A4, 0x7550885D28A02828, 0x86B831DA5C6D5C5C, 0x6BED3F93F8C7F8F8, 0xC211A44486228686 307 | .magictable1: /* Same table but rotated by 1 byte */ 308 | .quad 0x3078C018601818D8, 0x46AF05238C232326, 0x91F97EC63FC6C6B8, 0xCD6F13E887E8E8FB, 0x13A14C87268787CB, 0x6D62A9B8DAB8B811, 0x0205080104010109, 0x9E6E424F214F4F0D 309 | .quad 0x6CEEAD36D836369B, 0x510459A6A2A6A6FF, 0xB9BDDED26FD2D20C, 0xF706FBF5F3F5F50E, 0xF280EF79F9797996, 0xDECE5F6FA16F6F30, 0x3FEFFC917E91916D, 0xA407AA52555252F8 310 | .quad 0xC0FD27609D606047, 0x657689BCCABCBC35, 0x2BCDAC9B569B9B37, 0x018C048E028E8E8A, 0x5B1571A3B6A3A3D2, 0x183C600C300C0C6C, 0xF68AFF7BF17B7B84, 0x6AE1B535D4353580 311 | .quad 0x3A69E81D741D1DF5, 0xDD4753E0A7E0E0B3, 0xB3ACF6D77BD7D721, 0x99ED5EC22FC2C29C, 0x5C966D2EB82E2E43, 0x967A624B314B4B29, 0xE121A3FEDFFEFE5D, 0xAE168257415757D5 312 | .quad 0x2A41A815541515BD, 0xEEB69F77C17777E8, 0x6EEBA537DC373792, 0xD7567BE5B3E5E59E, 0x23D98C9F469F9F13, 0xFD17D3F0E7F0F023, 0x947F6A4A354A4A20, 0xA9959EDA4FDADA44 313 | .quad 0xB025FA587D5858A2, 0x8FCA06C903C9C9CF, 0x528D5529A429297C, 0x1422500A280A0A5A, 0x7F4FE1B1FEB1B150, 0x5D1A69A0BAA0A0C9, 0xD6DA7F6BB16B6B14, 0x17AB5C852E8585D9 314 | .quad 0x677381BDCEBDBD3C, 0xBA34D25D695D5D8F, 0x2050801040101090, 0xF503F3F4F7F4F407, 0x8BC016CB0BCBCBDD, 0x7CC6ED3EF83E3ED3, 0x0A1128051405052D, 0xCEE61F6781676778 315 | .quad 0xD55373E4B7E4E497, 0x4EBB25279C272702, 0x8258324119414173, 0x0B9D2C8B168B8BA7, 0x530151A7A6A7A7F6, 0xFA94CF7DE97D7DB2, 0x37FBDC956E959549, 0xAD9F8ED847D8D856 316 | .quad 0xEB308BFBCBFBFB70, 0xC17123EE9FEEEECD, 0xF891C77CED7C7CBB, 0xCCE3176685666671, 0xA78EA6DD53DDDD7B, 0x2E4BB8175C1717AF, 0x8E46024701474745, 0x21DC849E429E9E1A 317 | .quad 0x89C51ECA0FCACAD4, 0x5A99752DB42D2D58, 0x637991BFC6BFBF2E, 0x0E1B38071C07073F, 0x472301AD8EADADAC, 0xB42FEA5A755A5AB0, 0x1BB56C83368383EF, 0x66FF8533CC3333B6 318 | .quad 0xC6F23F639163635C, 0x040A100208020212, 0x493839AA92AAAA93, 0xE2A8AF71D97171DE, 0x8DCF0EC807C8C8C6, 0x327DC819641919D1, 0x927072493949493B, 0xAF9A86D943D9D95F 319 | .quad 0xF91DC3F2EFF2F231, 0xDB484BE3ABE3E3A8, 0xB62AE25B715B5BB9, 0x0D9234881A8888BC, 0x29C8A49A529A9A3E, 0x4CBE2D269826260B, 0x64FA8D32C83232BF, 0x7D4AE9B0FAB0B059 320 | .quad 0xCF6A1BE983E9E9F2, 0x1E33780F3C0F0F77, 0xB7A6E6D573D5D533, 0x1DBA74803A8080F4, 0x617C99BEC2BEBE27, 0x87DE26CD13CDCDEB, 0x68E4BD34D0343489, 0x90757A483D484832 321 | .quad 0xE324ABFFDBFFFF54, 0xF48FF77AF57A7A8D, 0x3DEAF4907A909064, 0xBE3EC25F615F5F9D, 0x40A01D208020203D, 0xD0D56768BD68680F, 0x3472D01A681A1ACA, 0x412C19AE82AEAEB7 322 | .quad 0x755EC9B4EAB4B47D, 0xA8199A544D5454CE, 0x3BE5EC937693937F, 0x44AA0D228822222F, 0xC8E907648D646463, 0xFF12DBF1E3F1F12A, 0xE6A2BF73D17373CC, 0x245A901248121282 323 | .quad 0x805D3A401D40407A, 0x1028400820080848, 0x9BE856C32BC3C395, 0xC57B33EC97ECECDF, 0xAB9096DB4BDBDB4D, 0x5F1F61A1BEA1A1C0, 0x07831C8D0E8D8D91, 0x7AC9F53DF43D3DC8 324 | .quad 0x33F1CC976697975B, 0x0000000000000000, 0x83D436CF1BCFCFF9, 0x5687452BAC2B2B6E, 0xECB39776C57676E1, 0x19B06482328282E6, 0xB1A9FED67FD6D628, 0x3677D81B6C1B1BC3 325 | .quad 0x775BC1B5EEB5B574, 0x432911AF86AFAFBE, 0xD4DF776AB56A6A1D, 0xA00DBA505D5050EA, 0x8A4C124509454557, 0xFB18CBF3EBF3F338, 0x60F09D30C03030AD, 0xC3742BEF9BEFEFC4 326 | .quad 0x7EC3E53FFC3F3FDA, 0xAA1C9255495555C7, 0x591079A2B2A2A2DB, 0xC96503EA8FEAEAE9, 0xCAEC0F658965656A, 0x6968B9BAD2BABA03, 0x5E93652FBC2F2F4A, 0x9DE74EC027C0C08E 327 | .quad 0xA181BEDE5FDEDE60, 0x386CE01C701C1CFC, 0xE72EBBFDD3FDFD46, 0x9A64524D294D4D1F, 0x39E0E49272929276, 0xEABC8F75C97575FA, 0x0C1E300618060636, 0x0998248A128A8AAE 328 | .quad 0x7940F9B2F2B2B24B, 0xD15963E6BFE6E685, 0x1C36700E380E0E7E, 0x3E63F81F7C1F1FE7, 0xC4F7376295626255, 0xB5A3EED477D4D43A, 0x4D3229A89AA8A881, 0x31F4C49662969652 329 | .quad 0xEF3A9BF9C3F9F962, 0x97F666C533C5C5A3, 0x4AB1352594252510, 0xB220F259795959AB, 0x15AE54842A8484D0, 0xE4A7B772D57272C5, 0x72DDD539E43939EC, 0x98615A4C2D4C4C16 330 | .quad 0xBC3BCA5E655E5E94, 0xF085E778FD78789F, 0x70D8DD38E03838E5, 0x0586148C0A8C8C98, 0xBFB2C6D163D1D117, 0x570B41A5AEA5A5E4, 0xD94D43E2AFE2E2A1, 0xC2F82F619961614E 331 | .quad 0x7B45F1B3F6B3B342, 0x42A5152184212134, 0x25D6949C4A9C9C08, 0x3C66F01E781E1EEE, 0x8652224311434361, 0x93FC76C73BC7C7B1, 0xE52BB3FCD7FCFC4F, 0x0814200410040424 332 | .quad 0xA208B251595151E3, 0x2FC7BC995E999925, 0xDAC44F6DA96D6D22, 0x1A39680D340D0D65, 0xE93583FACFFAFA79, 0xA384B6DF5BDFDF69, 0xFC9BD77EE57E7EA9, 0x48B43D2490242419 333 | .quad 0x76D7C53BEC3B3BFE, 0x4B3D31AB96ABAB9A, 0x81D13ECE1FCECEF0, 0x2255881144111199, 0x03890C8F068F8F83, 0x9C6B4A4E254E4E04, 0x7351D1B7E6B7B766, 0xCB600BEB8BEBEBE0 334 | .quad 0x78CCFD3CF03C3CC1, 0x1FBF7C813E8181FD, 0x35FED4946A949440, 0xF30CEBF7FBF7F71C, 0x6F67A1B9DEB9B918, 0x265F98134C13138B, 0x589C7D2CB02C2C51, 0xBBB8D6D36BD3D305 335 | .quad 0xD35C6BE7BBE7E78C, 0xDCCB576EA56E6E39, 0x95F36EC437C4C4AA, 0x060F18030C03031B, 0xAC138A56455656DC, 0x88491A440D44445E, 0xFE9EDF7FE17F7FA0, 0x4F3721A99EA9A988 336 | .quad 0x54824D2AA82A2A67, 0x6B6DB1BBD6BBBB0A, 0x9FE246C123C1C187, 0xA602A253515353F1, 0xA58BAEDC57DCDC72, 0x1627580B2C0B0B53, 0x27D39C9D4E9D9D01, 0xD8C1476CAD6C6C2B 337 | .quad 0x62F59531C43131A4, 0xE8B98774CD7474F3, 0xF109E3F6FFF6F615, 0x8C430A460546464C, 0x452609AC8AACACA5, 0x0F973C891E8989B5, 0x2844A014501414B4, 0xDF425BE1A3E1E1BA 338 | .quad 0x2C4EB016581616A6, 0x74D2CD3AE83A3AF7, 0xD2D06F69B9696906, 0x122D480924090941, 0xE0ADA770DD7070D7, 0x7154D9B6E2B6B66F, 0xBDB7CED067D0D01E, 0xC77E3BED93EDEDD6 339 | .quad 0x85DB2ECC17CCCCE2, 0x84572A4215424268, 0x2DC2B4985A98982C, 0x550E49A4AAA4A4ED, 0x50885D28A0282875, 0xB831DA5C6D5C5C86, 0xED3F93F8C7F8F86B, 0x11A44486228686C2 340 | -------------------------------------------------------------------------------- /whirlpool/src/x86.S: -------------------------------------------------------------------------------- 1 | /* 2 | * Whirlpool hash in x86 assembly 3 | * 4 | * Copyright (c) 2014 Project Nayuki. (MIT License) 5 | * https://www.nayuki.io/page/fast-whirlpool-hash-in-x86-assembly 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 11 | * the Software, and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * - The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * - The Software is provided "as is", without warranty of any kind, express or 16 | * implied, including but not limited to the warranties of merchantability, 17 | * fitness for a particular purpose and noninfringement. In no event shall the 18 | * authors or copyright holders be liable for any claim, damages or other 19 | * liability, whether in an action of contract, tort or otherwise, arising from, 20 | * out of or in connection with the Software or the use or other dealings in the 21 | * Software. 22 | */ 23 | 24 | 25 | /* void whirlpool_compress(uint8_t state[64], const uint8_t block[64]) */ 26 | /* state and block can be optionally aligned to 8 bytes for slightly better performance. */ 27 | #if defined(__APPLE__) || defined(_WIN32) 28 | .globl _whirlpool_compress 29 | _whirlpool_compress: 30 | #else 31 | .globl whirlpool_compress 32 | whirlpool_compress: 33 | #endif 34 | /* 35 | * Storage usage: 36 | * Bytes Location Description 37 | * 4 eax Temporary 16-bit value for each pair of bytes processed during a round (zero-extended to eax) 38 | * 4 ebx Temporary byte value (zero-extended to ebx) 39 | * 4 ecx Upward loop counter for 10 rounds 40 | * 4 edx Base address of state array argument (read-only) 41 | * 4 esi Base address of block array argument (read-only) 42 | * 4 edi Unused (retains caller's value) 43 | * 4 ebp Unused (retains caller's value) 44 | * 4 esp x86 stack pointer 45 | * 64 mm0..mm7 Output rows for current round being computed, in little endian (8 bytes per register) 46 | * 64 xmm0..xmm3 All contents of current state array, in little endian (16 bytes per register) 47 | * 64 xmm4..xmm7 All contents of current block array, in little endian (16 bytes per register) 48 | * 8 [esp+ 0] Temporary storage for transferring MM7 to XMM 49 | * 4 [esp+ 8] Caller's value of esi 50 | * 4 [esp+12] Caller's value of ebx 51 | */ 52 | 53 | #define NUM_ROUNDS 10 /* Any number from 1 to 32 is allowed */ 54 | 55 | #define DOBYTEPAIRFIRST(inreg,offset,outreg0,outreg1) \ 56 | pextrw $(offset), %inreg, %eax; \ 57 | movzbl %ah, %ebx; \ 58 | andl $0xFF, %eax; \ 59 | movq .magictable0(,%eax,8), %outreg0; \ 60 | movq .magictable1(,%ebx,8), %outreg1; 61 | 62 | #define DOBYTEPAIR(inreg,offset,outreg0,outreg1) \ 63 | pextrw $(offset), %inreg, %eax; \ 64 | movzbl %ah, %ebx; \ 65 | andl $0xFF, %eax; \ 66 | pxor .magictable0(,%eax,8), %outreg0; \ 67 | pxor .magictable1(,%ebx,8), %outreg1; 68 | 69 | #define ROTATERIGHT() \ 70 | pshufw $0x39, %mm0, %mm0; \ 71 | pshufw $0x39, %mm1, %mm1; \ 72 | pshufw $0x39, %mm2, %mm2; \ 73 | pshufw $0x39, %mm3, %mm3; \ 74 | pshufw $0x39, %mm4, %mm4; \ 75 | pshufw $0x39, %mm5, %mm5; \ 76 | pshufw $0x39, %mm6, %mm6; \ 77 | pshufw $0x39, %mm7, %mm7; 78 | 79 | /* Used for sigma (AddRoundKey) */ 80 | #define XOR_XMM0_3_TO_XMM4_7() \ 81 | pxor %xmm0, %xmm4; \ 82 | pxor %xmm1, %xmm5; \ 83 | pxor %xmm2, %xmm6; \ 84 | pxor %xmm3, %xmm7; 85 | 86 | /* Save registers, load arguments */ 87 | pushl %ebx 88 | pushl %esi 89 | movl 12(%esp), %edx /* state */ 90 | movl 16(%esp), %esi /* block */ 91 | subl $8, %esp 92 | 93 | /* Load state into XMM */ 94 | movdqu 0(%edx), %xmm0 95 | movdqu 16(%edx), %xmm1 96 | movdqu 32(%edx), %xmm2 97 | movdqu 48(%edx), %xmm3 98 | 99 | /* Load block into XMM */ 100 | movdqu 0(%esi), %xmm4 101 | movdqu 16(%esi), %xmm5 102 | movdqu 32(%esi), %xmm6 103 | movdqu 48(%esi), %xmm7 104 | 105 | /* XOR block with state */ 106 | XOR_XMM0_3_TO_XMM4_7() 107 | 108 | /* Do 10 rounds */ 109 | movl $0, %ecx 110 | .looptop: 111 | 112 | /* Process all 64 state bytes */ 113 | DOBYTEPAIRFIRST(xmm0, 0, mm0, mm1) 114 | DOBYTEPAIRFIRST(xmm1, 0, mm2, mm3) 115 | DOBYTEPAIRFIRST(xmm2, 0, mm4, mm5) 116 | DOBYTEPAIRFIRST(xmm3, 0, mm6, mm7) 117 | DOBYTEPAIR(xmm0, 4, mm1, mm2) 118 | DOBYTEPAIR(xmm1, 4, mm3, mm4) 119 | DOBYTEPAIR(xmm2, 4, mm5, mm6) 120 | DOBYTEPAIR(xmm3, 4, mm7, mm0) 121 | ROTATERIGHT() 122 | DOBYTEPAIR(xmm3, 1, mm0, mm1) 123 | DOBYTEPAIR(xmm0, 1, mm2, mm3) 124 | DOBYTEPAIR(xmm1, 1, mm4, mm5) 125 | DOBYTEPAIR(xmm2, 1, mm6, mm7) 126 | DOBYTEPAIR(xmm3, 5, mm1, mm2) 127 | DOBYTEPAIR(xmm0, 5, mm3, mm4) 128 | DOBYTEPAIR(xmm1, 5, mm5, mm6) 129 | DOBYTEPAIR(xmm2, 5, mm7, mm0) 130 | ROTATERIGHT() 131 | DOBYTEPAIR(xmm2, 2, mm0, mm1) 132 | DOBYTEPAIR(xmm3, 2, mm2, mm3) 133 | DOBYTEPAIR(xmm0, 2, mm4, mm5) 134 | DOBYTEPAIR(xmm1, 2, mm6, mm7) 135 | DOBYTEPAIR(xmm2, 6, mm1, mm2) 136 | DOBYTEPAIR(xmm3, 6, mm3, mm4) 137 | DOBYTEPAIR(xmm0, 6, mm5, mm6) 138 | DOBYTEPAIR(xmm1, 6, mm7, mm0) 139 | ROTATERIGHT() 140 | DOBYTEPAIR(xmm1, 3, mm0, mm1) 141 | DOBYTEPAIR(xmm2, 3, mm2, mm3) 142 | DOBYTEPAIR(xmm3, 3, mm4, mm5) 143 | DOBYTEPAIR(xmm0, 3, mm6, mm7) 144 | DOBYTEPAIR(xmm1, 7, mm1, mm2) 145 | DOBYTEPAIR(xmm2, 7, mm3, mm4) 146 | DOBYTEPAIR(xmm3, 7, mm5, mm6) 147 | DOBYTEPAIR(xmm0, 7, mm7, mm0) 148 | ROTATERIGHT() 149 | pxor .roundconstants(,%ecx,8), %mm0 /* Add round constant */ 150 | 151 | /* Copy state back to XMM */ 152 | movq %mm7, (%esp) 153 | movq2dq %mm0, %xmm0; movq2dq %mm1, %xmm1; shufpd $0, %xmm1, %xmm0 154 | movq2dq %mm2, %xmm1; movq2dq %mm3, %xmm2; shufpd $0, %xmm2, %xmm1 155 | movq2dq %mm4, %xmm2; movq2dq %mm5, %xmm3; shufpd $0, %xmm3, %xmm2 156 | movq2dq %mm6, %xmm3; movhps (%esp), %xmm3 157 | 158 | /* Process all 64 block bytes */ 159 | DOBYTEPAIRFIRST(xmm4, 0, mm0, mm1) 160 | DOBYTEPAIRFIRST(xmm5, 0, mm2, mm3) 161 | DOBYTEPAIRFIRST(xmm6, 0, mm4, mm5) 162 | DOBYTEPAIRFIRST(xmm7, 0, mm6, mm7) 163 | DOBYTEPAIR(xmm4, 4, mm1, mm2) 164 | DOBYTEPAIR(xmm5, 4, mm3, mm4) 165 | DOBYTEPAIR(xmm6, 4, mm5, mm6) 166 | DOBYTEPAIR(xmm7, 4, mm7, mm0) 167 | ROTATERIGHT() 168 | DOBYTEPAIR(xmm7, 1, mm0, mm1) 169 | DOBYTEPAIR(xmm4, 1, mm2, mm3) 170 | DOBYTEPAIR(xmm5, 1, mm4, mm5) 171 | DOBYTEPAIR(xmm6, 1, mm6, mm7) 172 | DOBYTEPAIR(xmm7, 5, mm1, mm2) 173 | DOBYTEPAIR(xmm4, 5, mm3, mm4) 174 | DOBYTEPAIR(xmm5, 5, mm5, mm6) 175 | DOBYTEPAIR(xmm6, 5, mm7, mm0) 176 | ROTATERIGHT() 177 | DOBYTEPAIR(xmm6, 2, mm0, mm1) 178 | DOBYTEPAIR(xmm7, 2, mm2, mm3) 179 | DOBYTEPAIR(xmm4, 2, mm4, mm5) 180 | DOBYTEPAIR(xmm5, 2, mm6, mm7) 181 | DOBYTEPAIR(xmm6, 6, mm1, mm2) 182 | DOBYTEPAIR(xmm7, 6, mm3, mm4) 183 | DOBYTEPAIR(xmm4, 6, mm5, mm6) 184 | DOBYTEPAIR(xmm5, 6, mm7, mm0) 185 | ROTATERIGHT() 186 | DOBYTEPAIR(xmm5, 3, mm0, mm1) 187 | DOBYTEPAIR(xmm6, 3, mm2, mm3) 188 | DOBYTEPAIR(xmm7, 3, mm4, mm5) 189 | DOBYTEPAIR(xmm4, 3, mm6, mm7) 190 | DOBYTEPAIR(xmm5, 7, mm1, mm2) 191 | DOBYTEPAIR(xmm6, 7, mm3, mm4) 192 | DOBYTEPAIR(xmm7, 7, mm5, mm6) 193 | DOBYTEPAIR(xmm4, 7, mm7, mm0) 194 | ROTATERIGHT() 195 | 196 | /* Copy block back to XMM */ 197 | movq %mm7, (%esp) 198 | movq2dq %mm0, %xmm4; movq2dq %mm1, %xmm5; shufpd $0, %xmm5, %xmm4 199 | movq2dq %mm2, %xmm5; movq2dq %mm3, %xmm6; shufpd $0, %xmm6, %xmm5 200 | movq2dq %mm4, %xmm6; movq2dq %mm5, %xmm7; shufpd $0, %xmm7, %xmm6 201 | movq2dq %mm6, %xmm7; movhps (%esp), %xmm7 202 | 203 | /* Add state to block */ 204 | XOR_XMM0_3_TO_XMM4_7() 205 | 206 | /* Loop back */ 207 | incl %ecx 208 | cmpl $NUM_ROUNDS, %ecx 209 | jne .looptop 210 | 211 | /* XOR old state (in memory) with old block (in memory) and new block (in XMM) */ 212 | movdqu 0(%edx), %xmm0 /* Load old state */ 213 | movdqu 16(%edx), %xmm1 214 | movdqu 32(%edx), %xmm2 215 | movdqu 48(%edx), %xmm3 216 | XOR_XMM0_3_TO_XMM4_7() /* XOR into new block */ 217 | movdqu 0(%esi), %xmm0 /* Load old block */ 218 | movdqu 16(%esi), %xmm1 219 | movdqu 32(%esi), %xmm2 220 | movdqu 48(%esi), %xmm3 221 | XOR_XMM0_3_TO_XMM4_7() /* XOR into new block */ 222 | movdqu %xmm4, 0(%edx) /* Store new state */ 223 | movdqu %xmm5, 16(%edx) 224 | movdqu %xmm6, 32(%edx) 225 | movdqu %xmm7, 48(%edx) 226 | 227 | /* Clean up, restore registers, return */ 228 | emms 229 | addl $8, %esp 230 | popl %esi 231 | popl %ebx 232 | retl 233 | 234 | 235 | .balign 8 236 | .roundconstants: /* Starting from the beginning, each round uses 8 bytes */ 237 | .byte 0x18, 0x23, 0xC6, 0xE8, 0x87, 0xB8, 0x01, 0x4F, 0x36, 0xA6, 0xD2, 0xF5, 0x79, 0x6F, 0x91, 0x52 238 | .byte 0x60, 0xBC, 0x9B, 0x8E, 0xA3, 0x0C, 0x7B, 0x35, 0x1D, 0xE0, 0xD7, 0xC2, 0x2E, 0x4B, 0xFE, 0x57 239 | .byte 0x15, 0x77, 0x37, 0xE5, 0x9F, 0xF0, 0x4A, 0xDA, 0x58, 0xC9, 0x29, 0x0A, 0xB1, 0xA0, 0x6B, 0x85 240 | .byte 0xBD, 0x5D, 0x10, 0xF4, 0xCB, 0x3E, 0x05, 0x67, 0xE4, 0x27, 0x41, 0x8B, 0xA7, 0x7D, 0x95, 0xD8 241 | .byte 0xFB, 0xEE, 0x7C, 0x66, 0xDD, 0x17, 0x47, 0x9E, 0xCA, 0x2D, 0xBF, 0x07, 0xAD, 0x5A, 0x83, 0x33 242 | .byte 0x63, 0x02, 0xAA, 0x71, 0xC8, 0x19, 0x49, 0xD9, 0xF2, 0xE3, 0x5B, 0x88, 0x9A, 0x26, 0x32, 0xB0 243 | .byte 0xE9, 0x0F, 0xD5, 0x80, 0xBE, 0xCD, 0x34, 0x48, 0xFF, 0x7A, 0x90, 0x5F, 0x20, 0x68, 0x1A, 0xAE 244 | .byte 0xB4, 0x54, 0x93, 0x22, 0x64, 0xF1, 0x73, 0x12, 0x40, 0x08, 0xC3, 0xEC, 0xDB, 0xA1, 0x8D, 0x3D 245 | .byte 0x97, 0x00, 0xCF, 0x2B, 0x76, 0x82, 0xD6, 0x1B, 0xB5, 0xAF, 0x6A, 0x50, 0x45, 0xF3, 0x30, 0xEF 246 | .byte 0x3F, 0x55, 0xA2, 0xEA, 0x65, 0xBA, 0x2F, 0xC0, 0xDE, 0x1C, 0xFD, 0x4D, 0x92, 0x75, 0x06, 0x8A 247 | .byte 0xB2, 0xE6, 0x0E, 0x1F, 0x62, 0xD4, 0xA8, 0x96, 0xF9, 0xC5, 0x25, 0x59, 0x84, 0x72, 0x39, 0x4C 248 | .byte 0x5E, 0x78, 0x38, 0x8C, 0xD1, 0xA5, 0xE2, 0x61, 0xB3, 0x21, 0x9C, 0x1E, 0x43, 0xC7, 0xFC, 0x04 249 | .byte 0x51, 0x99, 0x6D, 0x0D, 0xFA, 0xDF, 0x7E, 0x24, 0x3B, 0xAB, 0xCE, 0x11, 0x8F, 0x4E, 0xB7, 0xEB 250 | .byte 0x3C, 0x81, 0x94, 0xF7, 0xB9, 0x13, 0x2C, 0xD3, 0xE7, 0x6E, 0xC4, 0x03, 0x56, 0x44, 0x7F, 0xA9 251 | .byte 0x2A, 0xBB, 0xC1, 0x53, 0xDC, 0x0B, 0x9D, 0x6C, 0x31, 0x74, 0xF6, 0x46, 0xAC, 0x89, 0x14, 0xE1 252 | .byte 0x16, 0x3A, 0x69, 0x09, 0x70, 0xB6, 0xD0, 0xED, 0xCC, 0x42, 0x98, 0xA4, 0x28, 0x5C, 0xF8, 0x86 253 | 254 | .magictable0: /* The combined effect of gamma (SubBytes) and theta (MixRows) */ 255 | .quad 0xD83078C018601818, 0x2646AF05238C2323, 0xB891F97EC63FC6C6, 0xFBCD6F13E887E8E8, 0xCB13A14C87268787, 0x116D62A9B8DAB8B8, 0x0902050801040101, 0x0D9E6E424F214F4F 256 | .quad 0x9B6CEEAD36D83636, 0xFF510459A6A2A6A6, 0x0CB9BDDED26FD2D2, 0x0EF706FBF5F3F5F5, 0x96F280EF79F97979, 0x30DECE5F6FA16F6F, 0x6D3FEFFC917E9191, 0xF8A407AA52555252 257 | .quad 0x47C0FD27609D6060, 0x35657689BCCABCBC, 0x372BCDAC9B569B9B, 0x8A018C048E028E8E, 0xD25B1571A3B6A3A3, 0x6C183C600C300C0C, 0x84F68AFF7BF17B7B, 0x806AE1B535D43535 258 | .quad 0xF53A69E81D741D1D, 0xB3DD4753E0A7E0E0, 0x21B3ACF6D77BD7D7, 0x9C99ED5EC22FC2C2, 0x435C966D2EB82E2E, 0x29967A624B314B4B, 0x5DE121A3FEDFFEFE, 0xD5AE168257415757 259 | .quad 0xBD2A41A815541515, 0xE8EEB69F77C17777, 0x926EEBA537DC3737, 0x9ED7567BE5B3E5E5, 0x1323D98C9F469F9F, 0x23FD17D3F0E7F0F0, 0x20947F6A4A354A4A, 0x44A9959EDA4FDADA 260 | .quad 0xA2B025FA587D5858, 0xCF8FCA06C903C9C9, 0x7C528D5529A42929, 0x5A1422500A280A0A, 0x507F4FE1B1FEB1B1, 0xC95D1A69A0BAA0A0, 0x14D6DA7F6BB16B6B, 0xD917AB5C852E8585 261 | .quad 0x3C677381BDCEBDBD, 0x8FBA34D25D695D5D, 0x9020508010401010, 0x07F503F3F4F7F4F4, 0xDD8BC016CB0BCBCB, 0xD37CC6ED3EF83E3E, 0x2D0A112805140505, 0x78CEE61F67816767 262 | .quad 0x97D55373E4B7E4E4, 0x024EBB25279C2727, 0x7382583241194141, 0xA70B9D2C8B168B8B, 0xF6530151A7A6A7A7, 0xB2FA94CF7DE97D7D, 0x4937FBDC956E9595, 0x56AD9F8ED847D8D8 263 | .quad 0x70EB308BFBCBFBFB, 0xCDC17123EE9FEEEE, 0xBBF891C77CED7C7C, 0x71CCE31766856666, 0x7BA78EA6DD53DDDD, 0xAF2E4BB8175C1717, 0x458E460247014747, 0x1A21DC849E429E9E 264 | .quad 0xD489C51ECA0FCACA, 0x585A99752DB42D2D, 0x2E637991BFC6BFBF, 0x3F0E1B38071C0707, 0xAC472301AD8EADAD, 0xB0B42FEA5A755A5A, 0xEF1BB56C83368383, 0xB666FF8533CC3333 265 | .quad 0x5CC6F23F63916363, 0x12040A1002080202, 0x93493839AA92AAAA, 0xDEE2A8AF71D97171, 0xC68DCF0EC807C8C8, 0xD1327DC819641919, 0x3B92707249394949, 0x5FAF9A86D943D9D9 266 | .quad 0x31F91DC3F2EFF2F2, 0xA8DB484BE3ABE3E3, 0xB9B62AE25B715B5B, 0xBC0D9234881A8888, 0x3E29C8A49A529A9A, 0x0B4CBE2D26982626, 0xBF64FA8D32C83232, 0x597D4AE9B0FAB0B0 267 | .quad 0xF2CF6A1BE983E9E9, 0x771E33780F3C0F0F, 0x33B7A6E6D573D5D5, 0xF41DBA74803A8080, 0x27617C99BEC2BEBE, 0xEB87DE26CD13CDCD, 0x8968E4BD34D03434, 0x3290757A483D4848 268 | .quad 0x54E324ABFFDBFFFF, 0x8DF48FF77AF57A7A, 0x643DEAF4907A9090, 0x9DBE3EC25F615F5F, 0x3D40A01D20802020, 0x0FD0D56768BD6868, 0xCA3472D01A681A1A, 0xB7412C19AE82AEAE 269 | .quad 0x7D755EC9B4EAB4B4, 0xCEA8199A544D5454, 0x7F3BE5EC93769393, 0x2F44AA0D22882222, 0x63C8E907648D6464, 0x2AFF12DBF1E3F1F1, 0xCCE6A2BF73D17373, 0x82245A9012481212 270 | .quad 0x7A805D3A401D4040, 0x4810284008200808, 0x959BE856C32BC3C3, 0xDFC57B33EC97ECEC, 0x4DAB9096DB4BDBDB, 0xC05F1F61A1BEA1A1, 0x9107831C8D0E8D8D, 0xC87AC9F53DF43D3D 271 | .quad 0x5B33F1CC97669797, 0x0000000000000000, 0xF983D436CF1BCFCF, 0x6E5687452BAC2B2B, 0xE1ECB39776C57676, 0xE619B06482328282, 0x28B1A9FED67FD6D6, 0xC33677D81B6C1B1B 272 | .quad 0x74775BC1B5EEB5B5, 0xBE432911AF86AFAF, 0x1DD4DF776AB56A6A, 0xEAA00DBA505D5050, 0x578A4C1245094545, 0x38FB18CBF3EBF3F3, 0xAD60F09D30C03030, 0xC4C3742BEF9BEFEF 273 | .quad 0xDA7EC3E53FFC3F3F, 0xC7AA1C9255495555, 0xDB591079A2B2A2A2, 0xE9C96503EA8FEAEA, 0x6ACAEC0F65896565, 0x036968B9BAD2BABA, 0x4A5E93652FBC2F2F, 0x8E9DE74EC027C0C0 274 | .quad 0x60A181BEDE5FDEDE, 0xFC386CE01C701C1C, 0x46E72EBBFDD3FDFD, 0x1F9A64524D294D4D, 0x7639E0E492729292, 0xFAEABC8F75C97575, 0x360C1E3006180606, 0xAE0998248A128A8A 275 | .quad 0x4B7940F9B2F2B2B2, 0x85D15963E6BFE6E6, 0x7E1C36700E380E0E, 0xE73E63F81F7C1F1F, 0x55C4F73762956262, 0x3AB5A3EED477D4D4, 0x814D3229A89AA8A8, 0x5231F4C496629696 276 | .quad 0x62EF3A9BF9C3F9F9, 0xA397F666C533C5C5, 0x104AB13525942525, 0xABB220F259795959, 0xD015AE54842A8484, 0xC5E4A7B772D57272, 0xEC72DDD539E43939, 0x1698615A4C2D4C4C 277 | .quad 0x94BC3BCA5E655E5E, 0x9FF085E778FD7878, 0xE570D8DD38E03838, 0x980586148C0A8C8C, 0x17BFB2C6D163D1D1, 0xE4570B41A5AEA5A5, 0xA1D94D43E2AFE2E2, 0x4EC2F82F61996161 278 | .quad 0x427B45F1B3F6B3B3, 0x3442A51521842121, 0x0825D6949C4A9C9C, 0xEE3C66F01E781E1E, 0x6186522243114343, 0xB193FC76C73BC7C7, 0x4FE52BB3FCD7FCFC, 0x2408142004100404 279 | .quad 0xE3A208B251595151, 0x252FC7BC995E9999, 0x22DAC44F6DA96D6D, 0x651A39680D340D0D, 0x79E93583FACFFAFA, 0x69A384B6DF5BDFDF, 0xA9FC9BD77EE57E7E, 0x1948B43D24902424 280 | .quad 0xFE76D7C53BEC3B3B, 0x9A4B3D31AB96ABAB, 0xF081D13ECE1FCECE, 0x9922558811441111, 0x8303890C8F068F8F, 0x049C6B4A4E254E4E, 0x667351D1B7E6B7B7, 0xE0CB600BEB8BEBEB 281 | .quad 0xC178CCFD3CF03C3C, 0xFD1FBF7C813E8181, 0x4035FED4946A9494, 0x1CF30CEBF7FBF7F7, 0x186F67A1B9DEB9B9, 0x8B265F98134C1313, 0x51589C7D2CB02C2C, 0x05BBB8D6D36BD3D3 282 | .quad 0x8CD35C6BE7BBE7E7, 0x39DCCB576EA56E6E, 0xAA95F36EC437C4C4, 0x1B060F18030C0303, 0xDCAC138A56455656, 0x5E88491A440D4444, 0xA0FE9EDF7FE17F7F, 0x884F3721A99EA9A9 283 | .quad 0x6754824D2AA82A2A, 0x0A6B6DB1BBD6BBBB, 0x879FE246C123C1C1, 0xF1A602A253515353, 0x72A58BAEDC57DCDC, 0x531627580B2C0B0B, 0x0127D39C9D4E9D9D, 0x2BD8C1476CAD6C6C 284 | .quad 0xA462F59531C43131, 0xF3E8B98774CD7474, 0x15F109E3F6FFF6F6, 0x4C8C430A46054646, 0xA5452609AC8AACAC, 0xB50F973C891E8989, 0xB42844A014501414, 0xBADF425BE1A3E1E1 285 | .quad 0xA62C4EB016581616, 0xF774D2CD3AE83A3A, 0x06D2D06F69B96969, 0x41122D4809240909, 0xD7E0ADA770DD7070, 0x6F7154D9B6E2B6B6, 0x1EBDB7CED067D0D0, 0xD6C77E3BED93EDED 286 | .quad 0xE285DB2ECC17CCCC, 0x6884572A42154242, 0x2C2DC2B4985A9898, 0xED550E49A4AAA4A4, 0x7550885D28A02828, 0x86B831DA5C6D5C5C, 0x6BED3F93F8C7F8F8, 0xC211A44486228686 287 | .magictable1: /* Same table but rotated by 1 byte */ 288 | .quad 0x3078C018601818D8, 0x46AF05238C232326, 0x91F97EC63FC6C6B8, 0xCD6F13E887E8E8FB, 0x13A14C87268787CB, 0x6D62A9B8DAB8B811, 0x0205080104010109, 0x9E6E424F214F4F0D 289 | .quad 0x6CEEAD36D836369B, 0x510459A6A2A6A6FF, 0xB9BDDED26FD2D20C, 0xF706FBF5F3F5F50E, 0xF280EF79F9797996, 0xDECE5F6FA16F6F30, 0x3FEFFC917E91916D, 0xA407AA52555252F8 290 | .quad 0xC0FD27609D606047, 0x657689BCCABCBC35, 0x2BCDAC9B569B9B37, 0x018C048E028E8E8A, 0x5B1571A3B6A3A3D2, 0x183C600C300C0C6C, 0xF68AFF7BF17B7B84, 0x6AE1B535D4353580 291 | .quad 0x3A69E81D741D1DF5, 0xDD4753E0A7E0E0B3, 0xB3ACF6D77BD7D721, 0x99ED5EC22FC2C29C, 0x5C966D2EB82E2E43, 0x967A624B314B4B29, 0xE121A3FEDFFEFE5D, 0xAE168257415757D5 292 | .quad 0x2A41A815541515BD, 0xEEB69F77C17777E8, 0x6EEBA537DC373792, 0xD7567BE5B3E5E59E, 0x23D98C9F469F9F13, 0xFD17D3F0E7F0F023, 0x947F6A4A354A4A20, 0xA9959EDA4FDADA44 293 | .quad 0xB025FA587D5858A2, 0x8FCA06C903C9C9CF, 0x528D5529A429297C, 0x1422500A280A0A5A, 0x7F4FE1B1FEB1B150, 0x5D1A69A0BAA0A0C9, 0xD6DA7F6BB16B6B14, 0x17AB5C852E8585D9 294 | .quad 0x677381BDCEBDBD3C, 0xBA34D25D695D5D8F, 0x2050801040101090, 0xF503F3F4F7F4F407, 0x8BC016CB0BCBCBDD, 0x7CC6ED3EF83E3ED3, 0x0A1128051405052D, 0xCEE61F6781676778 295 | .quad 0xD55373E4B7E4E497, 0x4EBB25279C272702, 0x8258324119414173, 0x0B9D2C8B168B8BA7, 0x530151A7A6A7A7F6, 0xFA94CF7DE97D7DB2, 0x37FBDC956E959549, 0xAD9F8ED847D8D856 296 | .quad 0xEB308BFBCBFBFB70, 0xC17123EE9FEEEECD, 0xF891C77CED7C7CBB, 0xCCE3176685666671, 0xA78EA6DD53DDDD7B, 0x2E4BB8175C1717AF, 0x8E46024701474745, 0x21DC849E429E9E1A 297 | .quad 0x89C51ECA0FCACAD4, 0x5A99752DB42D2D58, 0x637991BFC6BFBF2E, 0x0E1B38071C07073F, 0x472301AD8EADADAC, 0xB42FEA5A755A5AB0, 0x1BB56C83368383EF, 0x66FF8533CC3333B6 298 | .quad 0xC6F23F639163635C, 0x040A100208020212, 0x493839AA92AAAA93, 0xE2A8AF71D97171DE, 0x8DCF0EC807C8C8C6, 0x327DC819641919D1, 0x927072493949493B, 0xAF9A86D943D9D95F 299 | .quad 0xF91DC3F2EFF2F231, 0xDB484BE3ABE3E3A8, 0xB62AE25B715B5BB9, 0x0D9234881A8888BC, 0x29C8A49A529A9A3E, 0x4CBE2D269826260B, 0x64FA8D32C83232BF, 0x7D4AE9B0FAB0B059 300 | .quad 0xCF6A1BE983E9E9F2, 0x1E33780F3C0F0F77, 0xB7A6E6D573D5D533, 0x1DBA74803A8080F4, 0x617C99BEC2BEBE27, 0x87DE26CD13CDCDEB, 0x68E4BD34D0343489, 0x90757A483D484832 301 | .quad 0xE324ABFFDBFFFF54, 0xF48FF77AF57A7A8D, 0x3DEAF4907A909064, 0xBE3EC25F615F5F9D, 0x40A01D208020203D, 0xD0D56768BD68680F, 0x3472D01A681A1ACA, 0x412C19AE82AEAEB7 302 | .quad 0x755EC9B4EAB4B47D, 0xA8199A544D5454CE, 0x3BE5EC937693937F, 0x44AA0D228822222F, 0xC8E907648D646463, 0xFF12DBF1E3F1F12A, 0xE6A2BF73D17373CC, 0x245A901248121282 303 | .quad 0x805D3A401D40407A, 0x1028400820080848, 0x9BE856C32BC3C395, 0xC57B33EC97ECECDF, 0xAB9096DB4BDBDB4D, 0x5F1F61A1BEA1A1C0, 0x07831C8D0E8D8D91, 0x7AC9F53DF43D3DC8 304 | .quad 0x33F1CC976697975B, 0x0000000000000000, 0x83D436CF1BCFCFF9, 0x5687452BAC2B2B6E, 0xECB39776C57676E1, 0x19B06482328282E6, 0xB1A9FED67FD6D628, 0x3677D81B6C1B1BC3 305 | .quad 0x775BC1B5EEB5B574, 0x432911AF86AFAFBE, 0xD4DF776AB56A6A1D, 0xA00DBA505D5050EA, 0x8A4C124509454557, 0xFB18CBF3EBF3F338, 0x60F09D30C03030AD, 0xC3742BEF9BEFEFC4 306 | .quad 0x7EC3E53FFC3F3FDA, 0xAA1C9255495555C7, 0x591079A2B2A2A2DB, 0xC96503EA8FEAEAE9, 0xCAEC0F658965656A, 0x6968B9BAD2BABA03, 0x5E93652FBC2F2F4A, 0x9DE74EC027C0C08E 307 | .quad 0xA181BEDE5FDEDE60, 0x386CE01C701C1CFC, 0xE72EBBFDD3FDFD46, 0x9A64524D294D4D1F, 0x39E0E49272929276, 0xEABC8F75C97575FA, 0x0C1E300618060636, 0x0998248A128A8AAE 308 | .quad 0x7940F9B2F2B2B24B, 0xD15963E6BFE6E685, 0x1C36700E380E0E7E, 0x3E63F81F7C1F1FE7, 0xC4F7376295626255, 0xB5A3EED477D4D43A, 0x4D3229A89AA8A881, 0x31F4C49662969652 309 | .quad 0xEF3A9BF9C3F9F962, 0x97F666C533C5C5A3, 0x4AB1352594252510, 0xB220F259795959AB, 0x15AE54842A8484D0, 0xE4A7B772D57272C5, 0x72DDD539E43939EC, 0x98615A4C2D4C4C16 310 | .quad 0xBC3BCA5E655E5E94, 0xF085E778FD78789F, 0x70D8DD38E03838E5, 0x0586148C0A8C8C98, 0xBFB2C6D163D1D117, 0x570B41A5AEA5A5E4, 0xD94D43E2AFE2E2A1, 0xC2F82F619961614E 311 | .quad 0x7B45F1B3F6B3B342, 0x42A5152184212134, 0x25D6949C4A9C9C08, 0x3C66F01E781E1EEE, 0x8652224311434361, 0x93FC76C73BC7C7B1, 0xE52BB3FCD7FCFC4F, 0x0814200410040424 312 | .quad 0xA208B251595151E3, 0x2FC7BC995E999925, 0xDAC44F6DA96D6D22, 0x1A39680D340D0D65, 0xE93583FACFFAFA79, 0xA384B6DF5BDFDF69, 0xFC9BD77EE57E7EA9, 0x48B43D2490242419 313 | .quad 0x76D7C53BEC3B3BFE, 0x4B3D31AB96ABAB9A, 0x81D13ECE1FCECEF0, 0x2255881144111199, 0x03890C8F068F8F83, 0x9C6B4A4E254E4E04, 0x7351D1B7E6B7B766, 0xCB600BEB8BEBEBE0 314 | .quad 0x78CCFD3CF03C3CC1, 0x1FBF7C813E8181FD, 0x35FED4946A949440, 0xF30CEBF7FBF7F71C, 0x6F67A1B9DEB9B918, 0x265F98134C13138B, 0x589C7D2CB02C2C51, 0xBBB8D6D36BD3D305 315 | .quad 0xD35C6BE7BBE7E78C, 0xDCCB576EA56E6E39, 0x95F36EC437C4C4AA, 0x060F18030C03031B, 0xAC138A56455656DC, 0x88491A440D44445E, 0xFE9EDF7FE17F7FA0, 0x4F3721A99EA9A988 316 | .quad 0x54824D2AA82A2A67, 0x6B6DB1BBD6BBBB0A, 0x9FE246C123C1C187, 0xA602A253515353F1, 0xA58BAEDC57DCDC72, 0x1627580B2C0B0B53, 0x27D39C9D4E9D9D01, 0xD8C1476CAD6C6C2B 317 | .quad 0x62F59531C43131A4, 0xE8B98774CD7474F3, 0xF109E3F6FFF6F615, 0x8C430A460546464C, 0x452609AC8AACACA5, 0x0F973C891E8989B5, 0x2844A014501414B4, 0xDF425BE1A3E1E1BA 318 | .quad 0x2C4EB016581616A6, 0x74D2CD3AE83A3AF7, 0xD2D06F69B9696906, 0x122D480924090941, 0xE0ADA770DD7070D7, 0x7154D9B6E2B6B66F, 0xBDB7CED067D0D01E, 0xC77E3BED93EDEDD6 319 | .quad 0x85DB2ECC17CCCCE2, 0x84572A4215424268, 0x2DC2B4985A98982C, 0x550E49A4AAA4A4ED, 0x50885D28A0282875, 0xB831DA5C6D5C5C86, 0xED3F93F8C7F8F86B, 0x11A44486228686C2 320 | --------------------------------------------------------------------------------