├── .gitignore ├── images ├── clockmed.jpg ├── clocksmall.jpg ├── clockxtrasmall.jpg ├── clockxtrasmall2.jpg ├── clockxtrasmall_border.jpg ├── clockxtrasmall_border2.jpg └── readme.txt ├── montgomery_arithmetic ├── include │ └── hurchalla │ │ └── montgomery_arithmetic │ │ ├── low_level_api │ │ ├── detail │ │ │ ├── integer_inverse.odt │ │ │ ├── integer_inverse.pdf │ │ │ ├── impl_inverse_mod_R.h │ │ │ └── platform_specific │ │ │ │ ├── impl_get_Rsquared_mod_n.h │ │ │ │ └── README_REDC_supplement.md │ │ ├── inverse_mod_R.h │ │ ├── get_R_mod_n.h │ │ └── get_Rsquared_mod_n.h │ │ └── detail │ │ ├── MontyTags.h │ │ ├── experimental │ │ ├── montgomery_two_pow │ │ │ ├── bench_reference.sh │ │ │ └── testbench.sh │ │ ├── montgomery_pow_2kary │ │ │ ├── filter_lines.py │ │ │ ├── remove_lines.py │ │ │ ├── firstline.py │ │ │ ├── timings_x64_Zen4 │ │ │ │ └── partial_array_size_2 │ │ │ │ │ ├── out.txt │ │ │ │ │ └── out2.txt │ │ │ ├── testbench_2kary.sh │ │ │ └── timings_ARM64_M2 │ │ │ │ ├── 64_half_gcc_noasm_array.txt │ │ │ │ ├── 64_half_clang_noasm_array.txt │ │ │ │ └── 64_quarter_gcc_noasm_array.txt │ │ ├── README.md │ │ ├── montgomery_two_pow_API.h │ │ └── unit_testing_helpers │ │ │ └── AbstractMontgomeryWrapper.h │ │ ├── BaseMontgomeryValue.h │ │ ├── MontgomeryDefault.h │ │ ├── ImplMontgomeryForm.h │ │ └── MontgomeryFormExtensions.h └── CMakeLists.txt ├── examples ├── example_with_cmake │ ├── example.sh │ ├── CMakeLists.txt │ └── example.cpp └── example_without_cmake │ ├── example.sh │ └── example.cpp ├── .github └── workflows │ ├── devskim.yml │ ├── flawfinder.yml │ ├── cmake.yml │ └── codeql.yml ├── modular_arithmetic ├── include │ └── hurchalla │ │ └── modular_arithmetic │ │ ├── detail │ │ ├── optimization_tag_structs.h │ │ ├── impl_modular_pow.h │ │ └── impl_modular_multiplicative_inverse.h │ │ ├── modular_pow.h │ │ ├── absolute_value_difference.h │ │ ├── modular_multiplicative_inverse.h │ │ ├── modular_multiplication.h │ │ ├── modular_addition.h │ │ └── modular_subtraction.h ├── src │ └── platform_specific_MSVC_x86_64 │ │ └── modular_multiply_uint64--x64_microsoft.asm └── CMakeLists.txt ├── msvc_build_tests.bat ├── test ├── modular_arithmetic │ ├── test_modular_addition_with_subtraction.cpp │ ├── test_absolute_value_difference.cpp │ ├── test_modular_addition.cpp │ ├── test_modular_multiplication.cpp │ └── test_modular_pow.cpp ├── CMakeLists.txt ├── montgomery_arithmetic │ ├── low_level_api │ │ ├── test_REDC.cpp │ │ ├── test_REDC_inline_asm.cpp │ │ ├── test_inverse_mod_R.cpp │ │ └── test_get_Rsquared_mod_n.cpp │ └── test_MontgomeryForm_extra.cpp └── FetchGoogleTest.cmake ├── CMakeLists.txt ├── macros_for_performance.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | [Bb]uild/* 2 | -------------------------------------------------------------------------------- /images/clockmed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/images/clockmed.jpg -------------------------------------------------------------------------------- /images/clocksmall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/images/clocksmall.jpg -------------------------------------------------------------------------------- /images/clockxtrasmall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/images/clockxtrasmall.jpg -------------------------------------------------------------------------------- /images/clockxtrasmall2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/images/clockxtrasmall2.jpg -------------------------------------------------------------------------------- /images/clockxtrasmall_border.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/images/clockxtrasmall_border.jpg -------------------------------------------------------------------------------- /images/clockxtrasmall_border2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/images/clockxtrasmall_border2.jpg -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/integer_inverse.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/integer_inverse.odt -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/integer_inverse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurchalla/modular_arithmetic/HEAD/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/integer_inverse.pdf -------------------------------------------------------------------------------- /images/readme.txt: -------------------------------------------------------------------------------- 1 | The files clockmed.jpg, clocksmall.jpg, clockxtrasmall.jpg, clockxtrasmall2.jpg, clockxtrasmall_border.jpg, and clockxtrasmall_border2.jpg are cropped versions of the photograph 2 | https://commons.wikimedia.org/wiki/File:Clock_gears_in_the_St_Maximus_church_in_Magnac-Laval_03.jpg 3 | 4 | The photo is by Krzysztof Golik, and licensed CC BY-SA 4.0 https://creativecommons.org/licenses/by-sa/4.0/deed.en 5 | -------------------------------------------------------------------------------- /examples/example_with_cmake/example.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2020-2025 Jeffrey Hurchalla. 4 | # This Source Code Form is subject to the terms of the Mozilla Public 5 | # License, v. 2.0. If a copy of the MPL was not distributed with this 6 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 7 | 8 | 9 | # This example is meant to show how to use the modular arithmetic library within 10 | # a CMake project. If you haven't already done so, you should follow the steps 11 | # in the README.md for "How to use the library" | "With CMake" 12 | 13 | mkdir -p tmp 14 | cmake -S. -B./tmp -DCMAKE_BUILD_TYPE=Release 15 | cmake --build ./tmp --config Release 16 | 17 | echo 18 | echo Running example... 19 | echo 20 | ./tmp/modular_arithmetic_example 21 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyTags.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_MONTY_TAGS_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_MONTY_TAGS_H_INCLUDED 10 | 11 | 12 | 13 | namespace hurchalla { namespace detail { 14 | 15 | 16 | struct TagMontyQuarterrange final {}; // IDs MontyQuarterRange independent of T 17 | struct TagMontyHalfrange final {}; 18 | struct TagMontyFullrange final {}; 19 | struct TagMontyWrappedmath final {}; 20 | struct TagMontyFullrangeMasked final {}; 21 | 22 | 23 | }} // end namespace 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /.github/workflows/devskim.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | name: DevSkim 7 | 8 | on: 9 | push: 10 | branches: [ master ] 11 | pull_request: 12 | branches: [ master ] 13 | schedule: 14 | - cron: '27 3 * * 4' 15 | 16 | jobs: 17 | lint: 18 | name: DevSkim 19 | runs-on: ubuntu-20.04 20 | permissions: 21 | actions: read 22 | contents: read 23 | security-events: write 24 | steps: 25 | - name: Checkout code 26 | uses: actions/checkout@v3 27 | 28 | - name: Run DevSkim scanner 29 | uses: microsoft/DevSkim-Action@v1 30 | 31 | - name: Upload DevSkim scan results to GitHub Security tab 32 | uses: github/codeql-action/upload-sarif@v2 33 | with: 34 | sarif_file: devskim-results.sarif 35 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow/bench_reference.sh: -------------------------------------------------------------------------------- 1 | 2 | ./testbench.sh clang++ O3 MontgomeryQuarter uint64_t 191 8 22 -DTEST_SCALAR -DHURCHALLA_MONTGOMERY_TWO_POW_USE_CSELECT_ON_BIT -DHURCHALLA_ALLOW_INLINE_ASM_ALL 3 | 4 | ./testbench.sh clang++ O3 MontgomeryHalf uint64_t 191 8 22 -DTEST_SCALAR -DHURCHALLA_MONTGOMERY_TWO_POW_USE_CSELECT_ON_BIT -DHURCHALLA_ALLOW_INLINE_ASM_ALL 5 | 6 | ./testbench.sh clang++ O3 MontgomeryFull uint64_t 191 8 22 -DTEST_SCALAR -DHURCHALLA_MONTGOMERY_TWO_POW_USE_CSELECT_ON_BIT -DHURCHALLA_ALLOW_INLINE_ASM_ALL 7 | 8 | 9 | ./testbench.sh g++ O3 MontgomeryQuarter uint64_t 191 8 22 -DTEST_SCALAR -DHURCHALLA_MONTGOMERY_TWO_POW_USE_CSELECT_ON_BIT -DHURCHALLA_ALLOW_INLINE_ASM_ALL 10 | 11 | ./testbench.sh g++ O3 MontgomeryHalf uint64_t 191 8 22 -DTEST_SCALAR -DHURCHALLA_MONTGOMERY_TWO_POW_USE_CSELECT_ON_BIT -DHURCHALLA_ALLOW_INLINE_ASM_ALL 12 | 13 | ./testbench.sh g++ O3 MontgomeryFull uint64_t 191 8 22 -DTEST_SCALAR -DHURCHALLA_MONTGOMERY_TWO_POW_USE_CSELECT_ON_BIT -DHURCHALLA_ALLOW_INLINE_ASM_ALL 14 | 15 | -------------------------------------------------------------------------------- /examples/example_with_cmake/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this 4 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | 6 | 7 | cmake_minimum_required(VERSION 3.14) 8 | 9 | project(modular_arithmetic_example VERSION 1.0.0 LANGUAGES CXX) 10 | 11 | add_executable(modular_arithmetic_example) 12 | 13 | target_sources(modular_arithmetic_example PRIVATE 14 | example.cpp 15 | ) 16 | 17 | # in add_subdirectory below, the first argument must be the path on your system 18 | # to the root of the cloned modular arithmetic repository. In this example, 19 | # "../.." is used because (unless you move it) this CMakeLists.txt file has a 20 | # path that is within the repository, and that path is two directory levels 21 | # below the repo's root. 22 | add_subdirectory(../.. ${CMAKE_CURRENT_BINARY_DIR}/modular_arithmetic) 23 | 24 | target_link_libraries(modular_arithmetic_example 25 | hurchalla_modular_arithmetic) 26 | 27 | 28 | 29 | # To build this example, see the file example.sh. 30 | -------------------------------------------------------------------------------- /modular_arithmetic/include/hurchalla/modular_arithmetic/detail/optimization_tag_structs.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MODULAR_ARITHMETIC_OPTIMIZATION_TAG_STRUCTS_H_INCLUDED 9 | #define HURCHALLA_MODULAR_ARITHMETIC_OPTIMIZATION_TAG_STRUCTS_H_INCLUDED 10 | 11 | 12 | namespace hurchalla { 13 | 14 | 15 | // private optimization tag intended only for use by the implementation 16 | struct PrivateAnyTag {}; 17 | 18 | 19 | // Public optimization tags: 20 | // ------------------------ 21 | // LowlatencyTag potentially offers optimizations targeted toward lowering the 22 | // latency of functions. 23 | // LowuopsTag potentially offers optimizations targeted toward reducing the 24 | // number of instructions generated/executed by functions. 25 | 26 | struct LowlatencyTag final : public PrivateAnyTag {}; 27 | struct LowuopsTag final : public PrivateAnyTag {}; 28 | 29 | 30 | } // end namespace 31 | 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/filter_lines.py: -------------------------------------------------------------------------------- 1 | # filter_lines.py 2 | 3 | import sys 4 | 5 | def filter_lines(input_filename, search_string, output_filename): 6 | """ 7 | Reads lines from input_filename and writes to output_filename 8 | all lines that contain search_string. 9 | """ 10 | with open(input_filename, 'r', encoding='utf-8') as infile, \ 11 | open(output_filename, 'w', encoding='utf-8') as outfile: 12 | 13 | for line in infile: 14 | if search_string in line: 15 | outfile.write(line) 16 | 17 | def main(): 18 | # Expect exactly three command-line arguments 19 | if len(sys.argv) != 4: 20 | print("Usage: python filter_lines.py ") 21 | sys.exit(1) 22 | 23 | input_filename = sys.argv[1] 24 | search_string = sys.argv[2] 25 | output_filename = sys.argv[3] 26 | 27 | filter_lines(input_filename, search_string, output_filename) 28 | print(f"Lines containing '{search_string}' have been written to '{output_filename}'.") 29 | 30 | if __name__ == "__main__": 31 | main() -------------------------------------------------------------------------------- /modular_arithmetic/src/platform_specific_MSVC_x86_64/modular_multiply_uint64--x64_microsoft.asm: -------------------------------------------------------------------------------- 1 | ; Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | ; This Source Code Form is subject to the terms of the Mozilla Public 3 | ; License, v. 2.0. If a copy of the MPL was not distributed with this 4 | ; file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | 6 | .code 7 | 8 | ; This uses Microsoft x64 calling convention 9 | 10 | 11 | ; extern "C" uint64_t modular_multiply_uint64_asm_UID7b5f83fc983(uint64_t a, 12 | ; uint64_t b, uint64_t modulus); 13 | ; Preconditions: 0 <= a < modulus, 0 <= b < modulus, modulus > 0 14 | ; Postconditions: returns (a*b)%modulus 15 | ; 16 | ; rcx == a, rdx == b, r8 == modulus 17 | ; return register is rax 18 | PUBLIC modular_multiply_uint64_asm_UID7b5f83fc983 19 | modular_multiply_uint64_asm_UID7b5f83fc983 PROC 20 | mov rax, rcx 21 | mul rdx ; RDX:RAX = RAX*RDX; high-order bits of the product in RDX 22 | div r8 ; (quotient RAX, remainder RDX) = RDX:RAX/R8 23 | mov rax, rdx ; return the remainder 24 | ret 0 25 | modular_multiply_uint64_asm_UID7b5f83fc983 ENDP 26 | 27 | 28 | End 29 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/remove_lines.py: -------------------------------------------------------------------------------- 1 | # remove_lines.py 2 | 3 | import sys 4 | 5 | def remove_lines(input_filename, search_string, output_filename): 6 | """ 7 | Reads lines from input_filename and writes to output_filename 8 | all lines that do not contain search_string. 9 | """ 10 | with open(input_filename, 'r', encoding='utf-8') as infile, \ 11 | open(output_filename, 'w', encoding='utf-8') as outfile: 12 | 13 | for line in infile: 14 | if search_string not in line: 15 | outfile.write(line) 16 | 17 | def main(): 18 | # Expect exactly three command-line arguments 19 | if len(sys.argv) != 4: 20 | print("Usage: python remove_lines.py ") 21 | sys.exit(1) 22 | 23 | input_filename = sys.argv[1] 24 | search_string = sys.argv[2] 25 | output_filename = sys.argv[3] 26 | 27 | remove_lines(input_filename, search_string, output_filename) 28 | print(f"Lines containing '{search_string}' have been written to '{output_filename}'.") 29 | 30 | if __name__ == "__main__": 31 | main() -------------------------------------------------------------------------------- /.github/workflows/flawfinder.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | name: flawfinder 7 | 8 | on: 9 | push: 10 | branches: [ master ] 11 | pull_request: 12 | # The branches below must be a subset of the branches above 13 | branches: [ master ] 14 | schedule: 15 | - cron: '39 7 * * 0' 16 | 17 | jobs: 18 | flawfinder: 19 | name: Flawfinder 20 | runs-on: ubuntu-latest 21 | permissions: 22 | actions: read 23 | contents: read 24 | security-events: write 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v3 28 | 29 | - name: flawfinder_scan 30 | uses: david-a-wheeler/flawfinder@8e4a779ad59dbfaee5da586aa9210853b701959c 31 | with: 32 | arguments: '--sarif ./' 33 | output: 'flawfinder_results.sarif' 34 | 35 | - name: Upload analysis results to GitHub Security tab 36 | uses: github/codeql-action/upload-sarif@v2 37 | with: 38 | sarif_file: ${{github.workspace}}/flawfinder_results.sarif 39 | -------------------------------------------------------------------------------- /examples/example_without_cmake/example.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2020-2025 Jeffrey Hurchalla. 4 | # This Source Code Form is subject to the terms of the Mozilla Public 5 | # License, v. 2.0. If a copy of the MPL was not distributed with this 6 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 7 | 8 | 9 | # This example is intended for the case that you are not using CMake. 10 | # If you haven't already done so, you should follow the steps in the README.md 11 | # for "How to use the library" | "Without CMake" 12 | 13 | 14 | # -------------------------------------------------------------------------- 15 | # You'll need to change the installed_path below, and you may need to change 16 | # the cpp_compiler. 17 | # -------------------------------------------------------------------------- 18 | 19 | # set installed_path to the directory where you installed the modular arithmetic 20 | # library 21 | installed_path=/home/jeff/Desktop 22 | include_path=${installed_path}/include 23 | 24 | # set the compiler to whatever you wish. Below is gcc or clang. 25 | cpp_compiler=g++ 26 | #cpp_compiler=clang++ 27 | 28 | 29 | $cpp_compiler -std="c++17" \ 30 | -Wall -Wextra -O2 \ 31 | -I$include_path \ 32 | -o example example.cpp 33 | 34 | ./example 35 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/firstline.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def main(): 4 | if len(sys.argv) != 2: 5 | print("Usage: python3 script.py ") 6 | sys.exit(1) 7 | 8 | filename = sys.argv[1] 9 | 10 | try: 11 | with open(filename, 'r') as file: 12 | lines = file.readlines() 13 | except FileNotFoundError: 14 | print(f"Error: File '{filename}' not found.") 15 | sys.exit(1) 16 | 17 | # Find start and end markers 18 | try: 19 | start_index = next(i for i, line in enumerate(lines) if "OVERALL BEST:" in line) 20 | end_index = next(i for i, line in enumerate(lines) if "Timings By Test Type:" in line) 21 | except StopIteration: 22 | print("Error: Could not find required markers in the file.") 23 | sys.exit(1) 24 | 25 | # Process lines between the markers 26 | for line in lines[start_index + 1:end_index]: 27 | parts = line.strip().split() 28 | if len(parts) != 7: 29 | continue # skip malformed lines 30 | try: 31 | third_field = int(parts[2]) 32 | except ValueError: 33 | continue # skip lines where the third field isn’t an integer 34 | if third_field < 6: 35 | print(line.strip()) 36 | return 37 | 38 | print("No line found where the third field is less than 6.") 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /msvc_build_tests.bat: -------------------------------------------------------------------------------- 1 | 2 | @echo off 3 | REM This Source Code Form is subject to the terms of the Mozilla Public 4 | REM License, v. 2.0. If a copy of the MPL was not distributed with this 5 | REM file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | 7 | set build_dir=build\msvc 8 | 9 | REM Example of how to use an earlier version of MSVC than the default: 10 | REM cmake --help (will show the available Generators you can use) 11 | REM cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_MODULAR_ARITHMETIC=ON -G "Visual Studio 15" 12 | REM the above line appears to build x86-32. To get x64: 13 | REM cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_MODULAR_ARITHMETIC=ON -G "Visual Studio 15 2017 Win64" 14 | 15 | REM for Visual Studio 2019 and above, set the architecture with -A, for example: 16 | REM -G "Visual Studio 16 2019" -A Win32 17 | REM -G "Visual Studio 16 2019" -A x64 18 | REM -G "Visual Studio 16 2019" -A ARM 19 | REM -G "Visual Studio 16 2019" -A ARM64 20 | 21 | cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_MODULAR_ARITHMETIC=ON -DHURCHALLA_TEST_MODULAR_ARITHMETIC_HEAVYWEIGHT=ON -G "Visual Studio 17 2022" -A x64 22 | if %errorlevel% neq 0 exit /b %errorlevel% 23 | cmake --build .\%build_dir% --config Release 24 | if %errorlevel% neq 0 exit /b %errorlevel% 25 | cmake --build .\%build_dir% --config Debug 26 | if %errorlevel% neq 0 exit /b %errorlevel% 27 | 28 | 29 | %build_dir%\Release\test_hurchalla_modular_arithmetic.exe 30 | if %errorlevel% neq 0 exit /b %errorlevel% 31 | 32 | %build_dir%\Debug\test_hurchalla_modular_arithmetic.exe 33 | if %errorlevel% neq 0 exit /b %errorlevel% 34 | -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | 7 | env: 8 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 9 | BUILD_TYPE: Release 10 | #other potential flags for CXX_FLAGS are -DHURCHALLA_AVOID_CSELECT=1 -DHURCHALLA_ALLOW_INLINE_ASM_ALL=1 11 | CXX_FLAGS: '-std=c++11' 12 | 13 | jobs: 14 | build: 15 | # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. 16 | # You can convert this to a matrix build if you need cross-platform coverage. 17 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - name: Configure CMake 24 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 25 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 26 | 27 | #-DCMAKE_CXX_COMPILER=clang++-10 28 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DTEST_HURCHALLA_MODULAR_ARITHMETIC=ON -DCMAKE_CXX_FLAGS=${{env.CXX_FLAGS}} 29 | 30 | - name: Build 31 | # Build your program with the given configuration 32 | run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} 33 | 34 | - name: Test Modular Arithmetic 35 | working-directory: ${{github.workspace}}/build 36 | run: ./test_hurchalla_modular_arithmetic 37 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/BaseMontgomeryValue.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_BASE_MONTGOMERY_VALUE_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_BASE_MONTGOMERY_VALUE_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/util/traits/ut_numeric_limits.h" 13 | #include "hurchalla/util/traits/extensible_make_unsigned.h" 14 | #include "hurchalla/util/traits/safely_promote_unsigned.h" 15 | #include "hurchalla/util/conditional_select.h" 16 | #include "hurchalla/util/compiler_macros.h" 17 | #include 18 | 19 | namespace hurchalla { namespace detail { 20 | 21 | 22 | template 23 | class BaseMontgomeryValue { 24 | static_assert(ut_numeric_limits::is_integer, ""); 25 | T value; 26 | protected: 27 | HURCHALLA_FORCE_INLINE explicit BaseMontgomeryValue(T a) : value(a) {} 28 | HURCHALLA_FORCE_INLINE T get() const { return value; } 29 | public: 30 | // This next constructor purposely does not initialize 'value' - the 31 | // contents are undefined until the object is assigned to. 32 | HURCHALLA_FORCE_INLINE BaseMontgomeryValue() = default; 33 | 34 | template 35 | HURCHALLA_FORCE_INLINE void cmov(bool cond, BaseMontgomeryValue v) 36 | { 37 | // value = cond ? v.value : value 38 | value = ::hurchalla::conditional_select(cond,v.value,value); 39 | } 40 | }; 41 | 42 | 43 | }} // end namespace 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /modular_arithmetic/include/hurchalla/modular_arithmetic/modular_pow.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MODULAR_ARITHMETIC_MODULAR_POW_H_INCLUDED 9 | #define HURCHALLA_MODULAR_ARITHMETIC_MODULAR_POW_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/modular_arithmetic/detail/impl_modular_pow.h" 13 | #include "hurchalla/util/traits/ut_numeric_limits.h" 14 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 15 | 16 | namespace hurchalla { 17 | 18 | 19 | // Alternatively, please consider using the MontgomeryForm class member function 20 | // pow() instead of this function modular_pow(). There's an excellent chance 21 | // that you will achieve much better perfomance using MontgomeryForm's pow - 22 | // though note that MontgomeryForm can only be used if your modulus is odd. 23 | 24 | template 25 | T modular_pow(T base, U exponent, T modulus) 26 | { 27 | static_assert(ut_numeric_limits::is_integer, ""); 28 | static_assert(!(ut_numeric_limits::is_signed), ""); 29 | static_assert(ut_numeric_limits::is_integer, ""); 30 | static_assert(!(ut_numeric_limits::is_signed), ""); 31 | HPBC_CLOCKWORK_API_PRECONDITION(modulus > 1); 32 | 33 | T result = detail::impl_modular_pow::call(base, exponent, modulus); 34 | 35 | // POSTCONDITION: 36 | // Returns the modular exponentiation of base to the exponent (mod modulus) 37 | HPBC_CLOCKWORK_POSTCONDITION(result=0 and b>=0. 22 | 23 | template HURCHALLA_FORCE_INLINE 24 | T absolute_value_difference(T a, T b) 25 | { 26 | static_assert(ut_numeric_limits::is_integer, ""); 27 | HPBC_CLOCKWORK_API_PRECONDITION(a >= 0); 28 | HPBC_CLOCKWORK_API_PRECONDITION(b >= 0); 29 | 30 | T result = detail::impl_absolute_value_difference::call(a, b); 31 | 32 | HPBC_CLOCKWORK_POSTCONDITION(result >= 0); 33 | HPBC_CLOCKWORK_POSTCONDITION(result == ((a>b) ? a-b : b-a)); 34 | return result; 35 | } 36 | 37 | 38 | // Performance note for RISC-V (and other uncommon CPU architectures that do not 39 | // have an instruction for conditional move or conditional select): 40 | // On this architecture, this function may perform better when T is signed 41 | // than when it is unsigned. Specifically, when HURCHALLA_AVOID_CSELECT is 42 | // defined (see hurchalla/util/compiler_macros.h), a signed type may perform 43 | // better; if it is not defined, you should expect no performance difference 44 | // between signed and unsigned. 45 | 46 | 47 | } // end namespace 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/inverse_mod_R.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_INVERSE_MOD_R_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_INVERSE_MOD_R_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/montgomery_arithmetic/low_level_api/detail/impl_inverse_mod_R.h" 13 | #include "hurchalla/util/traits/safely_promote_unsigned.h" 14 | #include "hurchalla/util/traits/ut_numeric_limits.h" 15 | #include "hurchalla/util/compiler_macros.h" 16 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 17 | 18 | namespace hurchalla { 19 | 20 | 21 | // For discussion purposes, let type UP be a conceptually unlimited precision 22 | // unsigned integer type, and let the unlimited precision constant R represent 23 | // R = (UP)1 << ut_numeric_limits::digits. Equivalently, 24 | // R = (UP)ut_numeric_limits::max + 1. For example, if T is uint64_t, we 25 | // would have R = (UP)1 << 64. 26 | 27 | // Returns the integer x satisfying x*a ≡ 1 (mod R) 28 | // This function is constexpr when compiling for std=c++14 or higher 29 | template 30 | HURCHALLA_CPP14_CONSTEXPR 31 | T inverse_mod_R(T a) 32 | { 33 | static_assert(ut_numeric_limits::is_integer, ""); 34 | static_assert(!(ut_numeric_limits::is_signed), ""); 35 | static_assert(ut_numeric_limits::is_modulo, ""); 36 | HPBC_CLOCKWORK_CONSTEXPR_PRECONDITION(a % 2 == 1); 37 | 38 | T inv = detail::impl_inverse_mod_R::call::digits>(a); 39 | 40 | // guarantee inv*a ≡ 1 (mod R) 41 | using P = typename safely_promote_unsigned::type; 42 | HPBC_CLOCKWORK_CONSTEXPR_POSTCONDITION(static_cast(1) == 43 | static_cast(static_cast

(inv) * static_cast

(a))); 44 | return inv; 45 | } 46 | 47 | 48 | } // end namespace 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/get_R_mod_n.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_GET_R_MOD_N_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_GET_R_MOD_N_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/util/traits/ut_numeric_limits.h" 13 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 14 | 15 | namespace hurchalla { 16 | 17 | 18 | // For discussion purposes, let type UP be a conceptually unlimited precision 19 | // unsigned integer type, and let the unlimited precision constant R represent 20 | // R = (UP)1 << ut_numeric_limits::digits. Equivalently, 21 | // R = (UP)ut_numeric_limits::max + 1. For example, if T is uint64_t, we 22 | // would have R = (UP)1 << 64. 23 | 24 | // Compute R % n 25 | template 26 | T get_R_mod_n(T n) 27 | { 28 | static_assert(ut_numeric_limits::is_integer, ""); 29 | static_assert(!(ut_numeric_limits::is_signed), ""); 30 | static_assert(ut_numeric_limits::is_modulo, ""); 31 | HPBC_CLOCKWORK_PRECONDITION2(n % 2 == 1); 32 | HPBC_CLOCKWORK_PRECONDITION2(n > 1); 33 | 34 | // Assign a tmp T variable rather than directly using the intermediate 35 | // expression, in order to avoid a negative value (and a wrong answer) 36 | // in cases where 'n' would be promoted to type 'int'. 37 | T tmp = static_cast(static_cast(0) - n); 38 | // Compute R % n. Arithmetic wraparound behavior of the unsigned integral 39 | // type T results in (0 - n) equaling (R - n). Thus 40 | // rModN = R % n == (R - n) % n == (0 - n) % n 41 | T rModN = static_cast(tmp % n); 42 | // Since n is odd and > 1, and R is a power of 2, n can not divide R. 43 | // Thus, rModN != 0. 44 | 45 | HPBC_CLOCKWORK_POSTCONDITION2(0 < rModN && rModN < n); 46 | return rModN; 47 | } 48 | 49 | 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /examples/example_with_cmake/example.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | 9 | // This example is intended for the case that you are using CMake. 10 | // If you haven't already, you need to follow the steps in the README.md 11 | // for "How to use the library" | "With CMake" 12 | #include "hurchalla/modular_arithmetic/modular_pow.h" 13 | #include "hurchalla/montgomery_arithmetic/MontgomeryForm.h" 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | int main() 20 | { 21 | namespace hc = ::hurchalla; 22 | 23 | // you could use any integer type that the compiler supports 24 | // (including __uint128_t) 25 | using T = uint64_t; 26 | 27 | T modulus = 333333333; 28 | T base = 42; 29 | T exponent = 123456789; 30 | 31 | // ---- Demonstration of modular exponentiation ---- 32 | 33 | // Montgomery arithmetic version: 34 | assert(modulus % 2 == 1); // montgomery arithmetic always needs odd modulus. 35 | // First construct a MontgomeryForm object to do Montgomery arithmetic 36 | // with the modulus we chose. 37 | hc::MontgomeryForm mf(modulus); 38 | // Convert base to its Montgomery representation. 39 | auto mont_base = mf.convertIn(base); 40 | // Get the pow result in Montgomery representation. 41 | auto mont_result = mf.pow(mont_base, exponent); 42 | // Convert the Montgomery representation result to normal integer domain. 43 | T result1 = mf.convertOut(mont_result); 44 | 45 | 46 | // Standard arithmetic version: (note that Montgomery arithmetic is 47 | // usually much faster) 48 | T result2 = hc::modular_pow(base, exponent, modulus); 49 | 50 | 51 | std::cout << "Example results for " << base << "^" << exponent 52 | << " (mod " << modulus << ")\n"; 53 | std::cout << "---------\n"; 54 | std::cout << "using Montgomery arithmetic: " << result1 << "\n"; 55 | std::cout << "using standard arithmetic: " << result2 << "\n"; 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /examples/example_without_cmake/example.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | 9 | // This example is intended for the case that you are not using CMake. 10 | // If you haven't already, you need to follow the steps in the README.md 11 | // for "How to use the library" | "Without CMake" 12 | #include "hurchalla/modular_arithmetic/modular_pow.h" 13 | #include "hurchalla/montgomery_arithmetic/MontgomeryForm.h" 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | int main() 20 | { 21 | namespace hc = ::hurchalla; 22 | 23 | // you could use any integer type that the compiler supports 24 | // (including __uint128_t) 25 | using T = uint64_t; 26 | 27 | T modulus = 333333333; 28 | T base = 42; 29 | T exponent = 123456789; 30 | 31 | // ---- Demonstration of modular exponentiation ---- 32 | 33 | // Montgomery arithmetic version: 34 | assert(modulus % 2 == 1); // montgomery arithmetic always needs odd modulus. 35 | // First construct a MontgomeryForm object to do Montgomery arithmetic 36 | // with the modulus we chose. 37 | hc::MontgomeryForm mf(modulus); 38 | // Convert base to its Montgomery representation. 39 | auto mont_base = mf.convertIn(base); 40 | // Get the pow result in Montgomery representation. 41 | auto mont_result = mf.pow(mont_base, exponent); 42 | // Convert the Montgomery representation result to normal integer domain. 43 | T result1 = mf.convertOut(mont_result); 44 | 45 | 46 | // Standard arithmetic version: (note that Montgomery arithmetic is 47 | // usually much faster) 48 | T result2 = hc::modular_pow(base, exponent, modulus); 49 | 50 | 51 | std::cout << "Example results for " << base << "^" << exponent 52 | << " (mod " << modulus << ")\n"; 53 | std::cout << "---------\n"; 54 | std::cout << "using Montgomery arithmetic: " << result1 << "\n"; 55 | std::cout << "using standard arithmetic: " << result2 << "\n"; 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /test/modular_arithmetic/test_modular_addition_with_subtraction.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | 9 | // This is an exhaustive test of modular addition using modular subtraction to 10 | // verify the addition results. The test uses only type uint8_t, in order to 11 | // make it computationaly feasible. 12 | 13 | 14 | // Strictly for testing purposes, we'll define HURCHALLA_ALLOW_INLINE_ASM_ALL 15 | // here in order to make modular addition use an inline asm function version if 16 | // it is available. Internally, this inline asm function will also call the 17 | // generic template function version of modular addition inside a postcondition, 18 | // in order to make sure that the asm result is correct. Of course postcondition 19 | // checks must be enabled for this check to occur - the easiest way to ensure 20 | // postconditions are enabled is to define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS, 21 | // which is why we do so here. The same applies to modular subtraction as well. 22 | #undef HURCHALLA_ALLOW_INLINE_ASM_ALL 23 | #define HURCHALLA_ALLOW_INLINE_ASM_ALL 1 24 | 25 | #ifndef HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 26 | # define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 27 | #endif 28 | 29 | #include "hurchalla/modular_arithmetic/modular_addition.h" 30 | #include "hurchalla/modular_arithmetic/modular_subtraction.h" 31 | #include "hurchalla/modular_arithmetic/detail/optimization_tag_structs.h" 32 | #include "gtest/gtest.h" 33 | #include 34 | 35 | namespace { 36 | 37 | 38 | TEST(ModularArithmetic, modular_addition_with_subtraction) { 39 | namespace hc = ::hurchalla; 40 | using T = std::uint8_t; 41 | 42 | for (T modulus=255; modulus>0; --modulus) { 43 | for (T a=0; a(sum, b, modulus))); 47 | EXPECT_TRUE(a == (hc::modular_subtraction_prereduced_inputs(sum, b, modulus))); 48 | } 49 | } 50 | } 51 | } 52 | 53 | 54 | } // end unnamed namespace 55 | -------------------------------------------------------------------------------- /modular_arithmetic/include/hurchalla/modular_arithmetic/modular_multiplicative_inverse.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MODULAR_ARITHMETIC_MODULAR_MULTIPLICATIVE_INVERSE_H_INCLUDED 9 | #define HURCHALLA_MODULAR_ARITHMETIC_MODULAR_MULTIPLICATIVE_INVERSE_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/modular_arithmetic/detail/impl_modular_multiplicative_inverse.h" 13 | #include "hurchalla/modular_arithmetic/modular_multiplication.h" 14 | #include "hurchalla/util/traits/ut_numeric_limits.h" 15 | #include "hurchalla/util/compiler_macros.h" 16 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 17 | 18 | namespace hurchalla { 19 | 20 | 21 | // Returns the modular multiplicative inverse of 'a', mod the modulus. 22 | // Also assigns the gcd of 'a' and modulus to the reference parameter gcd. 23 | // 24 | // Note: Calling with a < modulus slightly improves performance. 25 | // [The multiplicative inverse is an integer > 0 and < modulus, such that 26 | // a * multiplicative_inverse == 1 (mod modulus). It is a unique number, 27 | // but it exists if and only if 'a' and 'modulus' are coprime.] 28 | template 29 | T modular_multiplicative_inverse(T a, T modulus, T& gcd) 30 | { 31 | static_assert(ut_numeric_limits::is_integer, ""); 32 | static_assert(!(ut_numeric_limits::is_signed), ""); 33 | HPBC_CLOCKWORK_API_PRECONDITION(modulus > 1); 34 | 35 | T inv = detail::impl_modular_multiplicative_inverse::call(a, modulus, gcd); 36 | 37 | HPBC_CLOCKWORK_POSTCONDITION(inv < modulus); 38 | //POSTCONDITION: Returns 0 if the inverse does not exist. Otherwise returns 39 | // the value of the inverse (which is never 0, given that modulus>1). 40 | HPBC_CLOCKWORK_POSTCONDITION(inv == 0 || 41 | ::hurchalla::modular_multiplication_prereduced_inputs( 42 | static_cast(a % modulus), inv, modulus) == 1); 43 | return inv; 44 | } 45 | 46 | // Same as the above function, except that it omits the gcd reference parameter. 47 | template 48 | HURCHALLA_FORCE_INLINE T modular_multiplicative_inverse(T a, T modulus) 49 | { 50 | T gcd; // ignored 51 | return modular_multiplicative_inverse(a, modulus, gcd); 52 | } 53 | 54 | 55 | } // end namespace 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/README.md: -------------------------------------------------------------------------------- 1 | All files in this experimental directory are expected to be good and immediately usable if desired. However, I consider them to be experiments that aren't useful enough to be moved outside of an "experimental" directory. 2 | 3 | MontyFullRangeMasked.h: 4 | The class MontyFullRangeMasked is usable in the same situations and in the same way as MontyFullRange; i.e. any odd-value is permissable for the modulus of the constructor. It uses some interesting and unusual optimizations to the Montgomery arithmetic algorithms, in order to (in theory) perform faster multiply and square and fused-multiply/square-add/sub operations. The speedup comes at the cost of slightly slower simple add and subtract operations. The speedup also applies only to certain sizes of T. For a type T that is the same size as the CPU integer registers (e.g. uin64_t on a 64 bit computer) or a type T that is smaller than the register size, there is a decent chance that MontyFullRangeMasked will perform better overall than MontyFullRange, when both are given the same modulus. This is due to the improved multiply, square, and fused-multiply/square-add/sub functions. However, the plain add() and subtract() functions in MontyFullRangeMasked will usually be slower than those in MontyFullRange. For a type T that is larger than the CPU integer register size, you can usually expect MontyFullRangeMasked will perform worse overall than MontyFullRange, and to provide little or no benefit. If your modulus is small enough to allow use of MontyQuarterRange or MontyHalfRange, you can usually expect those classes to perform better than either MontyFullRange or MontyFullRangeMasked, regardless of the size of T. 5 | To use MontyFullRangeMasked, you would ordinarily declare a variable (using an unsigned integral type T) as follows: 6 | MontgomeryForm> mf; 7 | 8 | The unit_testing_helpers subdirectory contains classes that provide a run-time polymorphic version of MontgomeryForm for potentially much faster compile times during unit testing. These classes of course have a run-time performance penalty, so they're intended for use only in unit testing. At the moment, the class NoForceInlineMontgomeryForm (in the main test folder) seems to improve the compile times for the unit tests sufficiently, and so these extra classes remain here as experimental. Nevertheless, these extra classes compile correctly for me with clang16 (on macOS) and pass their tests in test_MontgomeryForm_extra.cpp. 9 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '16 9 * * 1' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'cpp' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v3 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v2 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 54 | # If this step fails, then you should remove it and run the build manually (see below) 55 | - name: Autobuild 56 | uses: github/codeql-action/autobuild@v2 57 | 58 | # ℹ️ Command-line programs to run using the OS shell. 59 | # 📚 https://git.io/JvXDl 60 | 61 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 62 | # and modify them (or add more) to build your code if your project 63 | # uses a compiled language 64 | 65 | #- run: | 66 | # make bootstrap 67 | # make release 68 | 69 | - name: Perform CodeQL Analysis 70 | uses: github/codeql-action/analyze@v2 71 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontgomeryDefault.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_MONTGOMERY_DEFAULT_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_MONTGOMERY_DEFAULT_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/montgomery_arithmetic/detail/MontyFullRange.h" 13 | #include "hurchalla/montgomery_arithmetic/detail/MontyHalfRange.h" 14 | #include "hurchalla/montgomery_arithmetic/detail/MontyQuarterRange.h" 15 | #include "hurchalla/util/traits/extensible_make_unsigned.h" 16 | #include "hurchalla/util/traits/ut_numeric_limits.h" 17 | #include "hurchalla/util/sized_uint.h" 18 | #include 19 | 20 | namespace hurchalla { namespace detail { 21 | 22 | 23 | template 24 | class MontgomeryDefault final { 25 | static_assert(ut_numeric_limits::is_integer, ""); 26 | using U = typename extensible_make_unsigned::type; 27 | static constexpr int bitsT = ut_numeric_limits::digits; 28 | static constexpr int target_bits = HURCHALLA_TARGET_BIT_WIDTH; 29 | public: 30 | using type = typename std::conditional< 31 | (bitsT <= target_bits - 2), 32 | MontyQuarterRange::type>, 33 | typename std::conditional< 34 | (bitsT <= target_bits - 1), 35 | MontyHalfRange::type>, 36 | MontyFullRange 37 | >::type 38 | >::type; 39 | }; 40 | 41 | // Implementation note: when bitsT > target_bits (e.g. T == __int128_t on a 64 42 | // bit system), we purposely never use MontyHalfRange above and instead default 43 | // to MontyFullRange, because MontyFullRange uses unsigned hi_lo mults, whereas 44 | // MontyHalfRange uses signed hi_lo multiplications... 45 | // When bitsT > target_bits we're forced to use a 'slow' hi_lo mult routine, 46 | // since there's no simple asm instruction that's applicable- e.g. on x86_64, 47 | // we need far more than a single MUL or IMUL. And unfortunately we don't have 48 | // a signed routine that's as good as unsigned when bitsT > target_bits. For 49 | // details see the comments for slow_signed_multiply_to_hilo_product() in 50 | // hurchalla/util/detail/platform_specific/impl_signed_multiply_to_hilo_product.h 51 | 52 | 53 | }} // end namespace 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /modular_arithmetic/include/hurchalla/modular_arithmetic/modular_multiplication.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MODULAR_ARITHMETIC_MODULAR_MULTIPLICATION_H_INCLUDED 9 | #define HURCHALLA_MODULAR_ARITHMETIC_MODULAR_MULTIPLICATION_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/modular_arithmetic/detail/platform_specific/impl_modular_multiplication.h" 13 | #include "hurchalla/util/traits/ut_numeric_limits.h" 14 | #include "hurchalla/util/compiler_macros.h" 15 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 16 | 17 | namespace hurchalla { 18 | 19 | 20 | // Alternatively, please consider using the montgomery multiplication class 21 | // MontgomeryForm (specifically its multiply function) instead of this function 22 | // modular_multiplication_prereduced_inputs(). If you are heavily using modular 23 | // multiplication in your code, there's a very good chance that montgomery 24 | // multiplication will improve performance- often significantly. It always 25 | // requires an odd modulus though. 26 | 27 | template 28 | T modular_multiplication_prereduced_inputs(T a, T b, T modulus) 29 | { 30 | static_assert(ut_numeric_limits::is_integer, ""); 31 | static_assert(!(ut_numeric_limits::is_signed), ""); 32 | HPBC_CLOCKWORK_API_PRECONDITION(modulus>0); 33 | HPBC_CLOCKWORK_API_PRECONDITION(a::call(a, b, modulus); 37 | 38 | // POSTCONDITION: Returns (a*b)%modulus, theoretically calculated at 39 | // infinite precision to avoid overflow. 40 | HPBC_CLOCKWORK_POSTCONDITION(result 51 | HURCHALLA_FORCE_INLINE constexpr bool modular_multiplication_has_slow_perf() 52 | { 53 | return detail::impl_modular_multiplication::has_slow_perf(); 54 | } 55 | 56 | 57 | } // end namespace 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/ImplMontgomeryForm.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_IMPL_MONTGOMERY_FORM_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_IMPL_MONTGOMERY_FORM_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/modular_arithmetic/detail/optimization_tag_structs.h" 13 | #include "hurchalla/util/traits/ut_numeric_limits.h" 14 | #include "hurchalla/util/compiler_macros.h" 15 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 16 | #include 17 | #include 18 | 19 | namespace hurchalla { namespace detail { 20 | 21 | 22 | // The primary template below handles when InlineAll == true, and annotates 23 | // all class functions with a force inline attribute. 24 | // The template specialization handles InlineAll == false, and does not annotate 25 | // any of the class functions with a force inline attibute. 26 | // 27 | // Note: this is a rare case where ugly #define / #undef / #include hacking 28 | // seems to be the best way to make the code clear and maintainable. Placing or 29 | // not placing an attribute on a function doesn't appear to be something we can 30 | // directly do with a template parameter. So we work-around it by creating two 31 | // exact class duplicates (not counting the attribute, which is #defined or 32 | // #undef'd) by using #include, and we use a class specialization (as below) 33 | // to determine whether or not the class's functions get the attribute defined 34 | // and placed, or not. 35 | 36 | 37 | #define HURCHALLA_IMF_MAYBE_FORCE_INLINE HURCHALLA_FORCE_INLINE 38 | // 39 | // Primary template, instantiated for InlineAll == true. 40 | // 41 | // All functions in this instantiation get a force inline annotation. 42 | template 43 | class ImplMontgomeryForm final { 44 | #include "hurchalla/montgomery_arithmetic/detail/ImplMontgomeryForm.contents" 45 | }; 46 | #undef HURCHALLA_IMF_MAYBE_FORCE_INLINE 47 | 48 | 49 | #define HURCHALLA_IMF_MAYBE_FORCE_INLINE 50 | // 51 | // Specialization, instantiated for InlineAll == false. 52 | // 53 | // No functions will get a force inline annotation, because 54 | // HURCHALLA_IMF_MAYBE_FORCE_INLINE is blank. 55 | template 56 | class ImplMontgomeryForm final { 57 | #include "hurchalla/montgomery_arithmetic/detail/ImplMontgomeryForm.contents" 58 | }; 59 | #undef HURCHALLA_IMF_MAYBE_FORCE_INLINE 60 | 61 | 62 | }} // end namespace 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this 4 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | 6 | 7 | if(TARGET test_hurchalla_modular_arithmetic) 8 | return() 9 | endif() 10 | 11 | # later versions are probably fine, but are untested 12 | cmake_minimum_required(VERSION 3.14...4.03) 13 | 14 | 15 | include(FetchGoogleTest.cmake) 16 | include(EnableMaxWarnings.cmake) 17 | 18 | 19 | #set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/tests) 20 | #set(CTEST_BINARY_DIRECTORY ${PROJECT_BINARY_DIR}/tests) 21 | 22 | 23 | # needed for gtest_discover_tests() 24 | include(GoogleTest) 25 | 26 | 27 | add_executable(test_hurchalla_modular_arithmetic 28 | modular_arithmetic/test_absolute_value_difference.cpp 29 | modular_arithmetic/test_modular_addition.cpp 30 | modular_arithmetic/test_modular_addition_with_subtraction.cpp 31 | modular_arithmetic/test_modular_multiplication.cpp 32 | modular_arithmetic/test_modular_multiplicative_inverse.cpp 33 | modular_arithmetic/test_modular_pow.cpp 34 | modular_arithmetic/test_modular_subtraction.cpp 35 | montgomery_arithmetic/low_level_api/test_get_Rsquared_mod_n.cpp 36 | montgomery_arithmetic/low_level_api/test_inverse_mod_R.cpp 37 | montgomery_arithmetic/low_level_api/test_REDC.cpp 38 | montgomery_arithmetic/low_level_api/test_REDC_inline_asm.cpp 39 | montgomery_arithmetic/test_montgomery_pow.cpp 40 | montgomery_arithmetic/test_montgomery_two_pow.cpp 41 | montgomery_arithmetic/test_MontgomeryForm.cpp 42 | montgomery_arithmetic/test_MontgomeryFormExtensions.cpp 43 | montgomery_arithmetic/test_MontgomeryForm_extra.cpp 44 | ) 45 | 46 | EnableMaxWarnings(test_hurchalla_modular_arithmetic) 47 | 48 | 49 | if(HURCHALLA_TEST_MODULAR_ARITHMETIC_HEAVYWEIGHT) 50 | target_compile_definitions(test_hurchalla_modular_arithmetic 51 | PRIVATE 52 | HURCHALLA_TEST_MODULAR_ARITHMETIC_HEAVYWEIGHT) 53 | endif() 54 | if(MSVC) 55 | target_compile_options(test_hurchalla_modular_arithmetic PRIVATE /bigobj) 56 | endif() 57 | 58 | 59 | set_target_properties(test_hurchalla_modular_arithmetic 60 | PROPERTIES FOLDER "Tests") 61 | target_link_libraries(test_hurchalla_modular_arithmetic 62 | hurchalla_modular_arithmetic 63 | gtest_main) 64 | #add_test(test_hurchalla_modular_arithmetic test_hurchalla_modular_arithmetic) 65 | gtest_discover_tests(test_hurchalla_modular_arithmetic) 66 | -------------------------------------------------------------------------------- /modular_arithmetic/include/hurchalla/modular_arithmetic/detail/impl_modular_pow.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MODULAR_ARITHMETIC_IMPL_MODULAR_POW_H_INCLUDED 9 | #define HURCHALLA_MODULAR_ARITHMETIC_IMPL_MODULAR_POW_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/modular_arithmetic/modular_multiplication.h" 13 | #include "hurchalla/util/traits/ut_numeric_limits.h" 14 | #include "hurchalla/util/cselect_on_bit.h" 15 | #include "hurchalla/util/compiler_macros.h" 16 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 17 | 18 | namespace hurchalla { namespace detail { 19 | 20 | 21 | // Returns the modular exponentiation of base^exponent (mod modulus). 22 | // For details, see http://en.wikipedia.org/wiki/Modular_exponentiation 23 | // note: uses a static member function to disallow ADL. 24 | struct impl_modular_pow { 25 | template 26 | HURCHALLA_FORCE_INLINE static T call(T base, U exponent, T modulus) 27 | { 28 | static_assert(ut_numeric_limits::is_integer, ""); 29 | static_assert(!(ut_numeric_limits::is_signed), ""); 30 | static_assert(ut_numeric_limits::is_integer, ""); 31 | static_assert(!(ut_numeric_limits::is_signed), ""); 32 | HPBC_CLOCKWORK_PRECONDITION2(modulus > 1); 33 | 34 | namespace hc = ::hurchalla; 35 | if (base >= modulus) 36 | base = static_cast(base % modulus); 37 | /* 38 | // Applied Handbook of Cryptography- http://cacr.uwaterloo.ca/hac/ 39 | // Algorithm 14.76, original unoptimized version 40 | T result = 1; 41 | while (exponent > 0) 42 | { 43 | if (exponent & 1u) { 44 | result = hc::modular_multiplication_prereduced_inputs( 45 | result, base, modulus); 46 | } 47 | exponent = exponent >> 1; 48 | base = hc::modular_multiplication_prereduced_inputs(base, base, modulus); 49 | } 50 | */ 51 | // slightly optimized version 52 | // T result = (exponent & 1u) ? base : 1; 53 | T result = ::hurchalla::cselect_on_bit<0>::ne_0( 54 | static_cast(exponent), base, static_cast(1)); 55 | while (exponent > 1) 56 | { 57 | exponent = static_cast(exponent >> 1); 58 | base = hc::modular_multiplication_prereduced_inputs(base, base, modulus); 59 | if (exponent & 1u) { 60 | result = hc::modular_multiplication_prereduced_inputs( 61 | result, base, modulus); 62 | } 63 | } 64 | return static_cast(result); 65 | } 66 | }; 67 | 68 | 69 | }} // end namespace 70 | 71 | #endif // include guard 72 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow/testbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2025 Jeffrey Hurchalla. 4 | # 5 | # This Source Code Form is subject to the terms of the Mozilla Public 6 | # License, v. 2.0. If a copy of the MPL was not distributed with this 7 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 8 | 9 | 10 | 11 | 12 | # You need to clone the util, factoring, and modular_arithmetic repos 13 | # from https://github.com/hurchalla 14 | 15 | # SET repo_directory TO THE DIRECTORY WHERE YOU CLONED THE HURCHALLA GIT 16 | # REPOSITORIES. (or otherwise ensure the compiler /I flags correctly specify 17 | # the needed hurchalla include directories) 18 | 19 | repo_directory=/Users/jeffreyhurchalla/Desktop 20 | #repo_directory=/home/jeff/repos 21 | 22 | 23 | # you would ordinarily use either g++ or clang++ for $1 24 | cppcompiler=$1 25 | 26 | 27 | if [[ $cppcompiler == "g++" ]]; then 28 | error_limit=-fmax-errors=3 29 | warn_nrvo=-Wnrvo 30 | else 31 | error_limit=-ferror-limit=3 32 | fi 33 | 34 | 35 | 36 | 37 | exit_on_failure () { 38 | if [ $? -ne 0 ]; then 39 | exit 1 40 | fi 41 | } 42 | 43 | #optimization_level=O2 44 | #optimization_level=O3 45 | optimization_level=$2 46 | 47 | #define_mont_type=-DDEF_MONT_TYPE=MontgomeryQuarter 48 | define_mont_type=-DDEF_MONT_TYPE=$3 49 | define_uint_type=-DDEF_UINT_TYPE=$4 50 | 51 | # you must specify either -DTEST_ARRAY or -DTEST_SCALAR for $8 52 | define_test_type=$8 53 | 54 | 55 | cpp_standard=c++17 56 | 57 | 58 | # You can use arguments $9 and ${10} and ${11} etc to define macros such as 59 | # -DHURCHALLA_ALLOW_INLINE_ASM_ALL 60 | # for debugging, defining the following macros may be useful 61 | # -DHURCHALLA_CLOCKWORK_ENABLE_ASSERTS -DHURCHALLA_UTIL_ENABLE_ASSERTS 62 | 63 | 64 | # we could also use -g to get debug symbols (for lldb/gdb, and objdump) 65 | 66 | $cppcompiler \ 67 | $error_limit -$optimization_level \ 68 | $define_mont_type $define_uint_type $define_test_type \ 69 | $9 ${10} ${11} ${12} ${13} ${14} \ 70 | -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion $warn_nrvo \ 71 | -std=$cpp_standard \ 72 | -I${repo_directory}/modular_arithmetic/modular_arithmetic/include \ 73 | -I${repo_directory}/modular_arithmetic/montgomery_arithmetic/include \ 74 | -I${repo_directory}/util/include \ 75 | -c testbench_montgomery_two_pow.cpp 76 | 77 | exit_on_failure 78 | 79 | $cppcompiler -$optimization_level -std=$cpp_standard -o testbench_montgomery_two_pow testbench_montgomery_two_pow.o -lm 80 | 81 | exit_on_failure 82 | 83 | echo "compilation finished, now executing:" 84 | 85 | 86 | # argument $5 (if present), is the randomization seed for std::mt19937_64 87 | # argument $6 (if present), is max_modulus_bits_reduce 88 | # argument $7 (if present), is exponent_bits_reduce 89 | 90 | ./testbench_montgomery_two_pow $5 $6 $7 91 | 92 | # To give you an example of invoking this script at the command line: 93 | # ./testbench.sh clang++ O3 MontgomeryFull __uint128_t 191 8 50 -DTEST_ARRAY -DHURCHALLA_ALLOW_INLINE_ASM_ALL 94 | 95 | 96 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow_API.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_EXPERIMENTAL_API_MONTGOMERY_TWO_POW_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_EXPERIMENTAL_API_MONTGOMERY_TWO_POW_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/montgomery_arithmetic/detail/platform_specific/montgomery_two_pow.h" 13 | #include 14 | #include 15 | 16 | namespace experimental_api { 17 | 18 | 19 | // Calculates the integer pow(2, n), modulo the modulus of mf, and returns the 20 | // result in MongomeryForm representation. 21 | // 22 | // MF can be any MontgomeryForm type (see MontgomeryForm.h), and U can be any 23 | // integer type. ('n' is the exponent to use) 24 | // 25 | template 26 | typename MF::MontgomeryValue montgomery_two_pow(const MF& mf, U n) 27 | { 28 | // Rather than calling this function, you could just directly call 29 | // mf.two_pow(n), as done in the next line. 30 | return mf.two_pow(n); 31 | 32 | // Implementation note: the above function call internally just delegates to 33 | // return hurchalla::detail::montgomery_two_pow::call(mf, n); 34 | // It uses novel optimizations of the k-ary exponentiation algorithm 35 | // ( https://en.wikipedia.org/wiki/Exponentiation_by_squaring ) 36 | // that rely on a hard-coded base 2. 37 | } 38 | 39 | 40 | // An array version of the above function - you can expect it to always have 41 | // significantly higher throughput than the above. (In benchmarks I have 42 | // observed it to have a performance advantage of anywhere from 1.4x to 3x 43 | // higher throughput, depending on the CPU type and whether 64 or 128 bit 44 | // integer types are calculated) 45 | // 46 | // For each array index 'i' from 0 to ARRAY_SIZE-1, this function calculates 47 | // the integer result[i] = pow(2, n[i]) modulo the modulus of mf[i], and 48 | // returns this result array; the result array is in MontgomeryForm 49 | // representation. 50 | // 51 | // MF can be any MontgomeryForm type (see MontgomeryForm.h), and U can be any 52 | // integer type. 53 | // 54 | template 55 | std::array 56 | array_montgomery_two_pow(const std::array& mf, const std::array& n) 57 | { 58 | // Implementation note: at the moment this API function is the only easy way 59 | // to get the array version of Montgomery two pow (MontgomeryForm.h does not 60 | // have an *array* two_pow member function). 61 | // At some point in the next 8 months I expect to create a SIMD version of 62 | // MontgomeryForm, and at that time the SIMD MontgomeryForm will become the 63 | // preferred API to use to access the (high throughput) array version of 64 | // Montgomery two_pow. 65 | 66 | return hurchalla::detail::montgomery_two_pow::call(mf, n); 67 | } 68 | 69 | 70 | } // end namespace 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /modular_arithmetic/include/hurchalla/modular_arithmetic/detail/impl_modular_multiplicative_inverse.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MODULAR_ARITHMETIC_IMPL_MODULAR_MULTIPLICATIVE_INV_H_INCLUDED 9 | #define HURCHALLA_MODULAR_ARITHMETIC_IMPL_MODULAR_MULTIPLICATIVE_INV_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/util/traits/extensible_make_signed.h" 13 | #include "hurchalla/util/traits/safely_promote_unsigned.h" 14 | #include "hurchalla/util/traits/ut_numeric_limits.h" 15 | #include "hurchalla/util/conditional_select.h" 16 | #include "hurchalla/util/compiler_macros.h" 17 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 18 | #include 19 | 20 | namespace hurchalla { namespace detail { 21 | 22 | 23 | // note: uses a static member function to disallow ADL. 24 | struct impl_modular_multiplicative_inverse { 25 | template 26 | HURCHALLA_FORCE_INLINE static T call(T val, T modulus, T& gcd) 27 | { 28 | static_assert(ut_numeric_limits::is_integer, ""); 29 | static_assert(!(ut_numeric_limits::is_signed), ""); 30 | // I decided not to support modulus<=1, since it's not likely to be used and 31 | // it complicates the return type and adds conditional branches. 32 | HPBC_CLOCKWORK_PRECONDITION2(modulus > 1); 33 | 34 | // POSTCONDITION1: Returns 0 if the inverse doesn't exist. Otherwise returns 35 | // the inverse (which is never 0, given that modulus>1). 36 | // POSTCONDITION2: Sets gcd to the greatest common divisor of val and 37 | // modulus. Note that if the inverse exists, we will get gcd == 1. 38 | 39 | using U = typename safely_promote_unsigned::type; 40 | using S = typename extensible_make_signed::type; 41 | 42 | // The following algorithm is adapted from Figure 6 of 43 | // https://jeffhurchalla.com/2018/10/13/implementing-the-extended-euclidean-algorithm-with-unsigned-inputs/ 44 | // calculating only what is needed for the modular multiplicative inverse. 45 | S y1=0; 46 | U a1=modulus; 47 | S y0=1; 48 | U a2=val; 49 | U q=0; 50 | while (a2 > 1) { 51 | S y2 = static_cast(y0 - static_cast(q)*y1); 52 | y0=y1; 53 | y1=y2; 54 | U a0=a1; 55 | a1=a2; 56 | 57 | q = static_cast(a0/a1); 58 | a2 = static_cast(a0 - q*a1); 59 | } 60 | HPBC_CLOCKWORK_ASSERT2(a1 > 1); 61 | 62 | if (a2 == 1) { 63 | gcd = 1; 64 | S y = static_cast(y0 - static_cast(q)*y1); 65 | // inv = (y<0) ? y+modulus : y 66 | U inv = ::hurchalla::conditional_select(y<0, 67 | static_cast(static_cast(y)+modulus), 68 | static_cast(y)); 69 | HPBC_CLOCKWORK_POSTCONDITION2(inv < modulus); 70 | return static_cast(inv); 71 | } 72 | else { 73 | gcd = static_cast(a1); 74 | HPBC_CLOCKWORK_ASSERT2(gcd > 1); 75 | return 0; 76 | } 77 | } 78 | }; 79 | 80 | 81 | }} // end namespace 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this 4 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | 6 | 7 | if(TARGET hurchalla_modular_arithmetic) 8 | return() 9 | endif() 10 | 11 | # later versions are probably fine, but are untested 12 | cmake_minimum_required(VERSION 3.14...4.03) 13 | 14 | project(hurchalla_modular_arithmetic VERSION 1.0.0 LANGUAGES CXX) 15 | 16 | 17 | # if this is the top level CMakeLists.txt, let IDEs group projects into folders 18 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) 19 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 20 | endif() 21 | 22 | 23 | if(NOT DEFINED CMAKE_ARCHIVE_OUTPUT_DIRECTORY) 24 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 25 | endif() 26 | if(NOT DEFINED CMAKE_LIBRARY_OUTPUT_DIRECTORY) 27 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 28 | endif() 29 | if(NOT DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY) 30 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 31 | endif() 32 | 33 | 34 | add_library(hurchalla_modular_arithmetic INTERFACE) 35 | 36 | 37 | add_subdirectory(modular_arithmetic 38 | ${CMAKE_CURRENT_BINARY_DIR}/modular_arithmetic) 39 | add_subdirectory(montgomery_arithmetic 40 | ${CMAKE_CURRENT_BINARY_DIR}/montgomery_arithmetic) 41 | 42 | target_link_libraries(hurchalla_modular_arithmetic 43 | INTERFACE hurchalla_basic_modular_arithmetic) 44 | target_link_libraries(hurchalla_modular_arithmetic 45 | INTERFACE hurchalla_montgomery_arithmetic) 46 | 47 | 48 | 49 | # TODO: The following may be overly simple, but works so far to install target 50 | # include directories. It assumes that the build step from the subdirectories 51 | # montgomery_arithmetic and modular_arithmetic (which have build phase 52 | # target_include_directories commands) provides the information to cmake which 53 | # cmake then uses in the install phase target_include_directories below. This 54 | # has worked for the basic cmake install tests I've done so far... 55 | # --------------------- 56 | target_include_directories(hurchalla_modular_arithmetic 57 | INTERFACE $) 58 | # TODO: use this instead? 59 | # ---------------- 60 | #target_include_directories(hurchalla_modular_arithmetic SYSTEM 61 | # INTERFACE $/include>) 62 | 63 | 64 | 65 | # ***Tests*** 66 | 67 | # If this is the top level CMakeLists.txt, add testing options, and enable 68 | # testing when testing options have been set to ON. 69 | 70 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) 71 | option(TEST_HURCHALLA_MODULAR_ARITHMETIC 72 | "Build the tests for the Hurchalla modular arithmetic library project." 73 | OFF) 74 | option(HURCHALLA_TEST_MODULAR_ARITHMETIC_HEAVYWEIGHT 75 | "Include extensive and time consuming tests." 76 | OFF) 77 | option(FORCE_TEST_HURCHALLA_CPP11_STANDARD 78 | "If testing this library, ensure we build googletest and tests using -std=c++11") 79 | 80 | if(TEST_HURCHALLA_MODULAR_ARITHMETIC) 81 | enable_testing() 82 | # include(CTest) 83 | add_subdirectory(test) 84 | endif() 85 | endif() 86 | -------------------------------------------------------------------------------- /macros_for_performance.md: -------------------------------------------------------------------------------- 1 | 2 | Optional macros you can define to tune performance 3 | -------------------------------------------------- 4 | There are a number of macros you can optionally define when you compile, to tune the 5 | performance on your system for the modular arithmetic functions. It is 6 | generally recommended not to do so, but in some cases you may find it useful. 7 | You would define one or more of these macros when compiling *your* sources, 8 | given that Clockwork is a header-only library. 9 | 10 | For example, if you are compiling using clang or gcc from the command line, you would 11 | specify the -D compilation flag, similarly to the following: 12 | clang++ -DHURCHALLA_ALLOW_INLINE_ASM_ALL ...more arguments... 13 | As another example, if you are using CMake you would add the command "target_compile_definitions" 14 | to your CMakeLists.txt, similarly to the following: 15 | target_compile_definitions(<your_target_name> PRIVATE HURCHALLA_ALLOW_INLINE_ASM_ALL) 16 | \ 17 | \ 18 | HURCHALLA_TARGET_ISA_HAS_NO_DIVIDE - define this macro if your target system's 19 | instruction set does not include division. Although it is unusual, some 20 | microcontrollers do not have division, and defining this macro might improve 21 | performance in such a case. 22 | 23 | HURCHALLA_AVOID_CSELECT - you may wish to define this macro if your target 24 | system's instruction set does not include conditional move or conditional 25 | select. It may improve performance in such a case. This macro is normally 26 | already defined for RISC-V. 27 | 28 | HURCHALLA_ALLOW_INLINE_ASM_ALL - defining this macro will enable all 29 | available inline asm functions. Although this is the easiest macro to use, you 30 | can more selectively enable inline asm for particular functions, using macros 31 | listed below. In some cases HURCHALLA_ALLOW_INLINE_ASM_ALL may improve 32 | performance up to 20% (gcc often benefits), and in other cases it may make 33 | essentially no difference or harm performance (clang does not seem to benefit). 34 | It is not enabled by default because inline asm is extremely difficult to verify 35 | for correctness. While I believe I'm skilled at writing high quality inline 36 | asm, I advise you to be skeptical of this and of any inline asm you see. 37 | Unit tests of inline asm are far less helpful than you might think - the ability 38 | of a unit test to detect a bug in inline asm often depends upon the register 39 | allocation choices the compiler makes for surrounding test code, which is mostly 40 | outside a programmer's control. Generally speaking, it is [difficult to 41 | recommend inline asm](https://gcc.gnu.org/wiki/DontUseInlineAsm) unless there is 42 | a large performance benefit or performance is critical. 43 | 44 | HURCHALLA_ALLOW_INLINE_ASM_REDC 45 | HURCHALLA_ALLOW_INLINE_ASM_ABSDIFF 46 | HURCHALLA_ALLOW_INLINE_ASM_MODADD 47 | HURCHALLA_ALLOW_INLINE_ASM_MODSUB 48 | HURCHALLA_ALLOW_INLINE_ASM_QUARTERRANGE_GET_CANONICAL 49 | HURCHALLA_ALLOW_INLINE_ASM_HALFRANGE_GET_CANONICAL 50 | - these macros selectively enable inline asm for functions. They may or may not 51 | improve performance, and the warnings above for HURCHALLA_ALLOW_INLINE_ASM_ALL 52 | apply here too. To determine if they are even useful, you would need to 53 | compare performance with different ASM macros defined/not defined. Generally 54 | you would want to start with HURCHALLA_ALLOW_INLINE_ASM_REDC. 55 | -------------------------------------------------------------------------------- /test/montgomery_arithmetic/low_level_api/test_REDC.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | // Test the REDC function versions that contain no inline asm. 9 | 10 | #undef HURCHALLA_ALLOW_INLINE_ASM_ALL 11 | #undef HURCHALLA_ALLOW_INLINE_ASM_REDC 12 | 13 | 14 | #include "test_REDC.h" 15 | 16 | 17 | TEST(MontgomeryArithmetic, REDC8) { 18 | std::vector moduli { 3, 255, 19, 21, 211, 23, 171 }; 19 | for (auto n : moduli) 20 | REDC_test_all(n); 21 | } 22 | TEST(MontgomeryArithmetic, REDC16) { 23 | std::vector moduli { 3, 17, UINT16_C(65535), 24 | UINT16_C(65533), UINT16_C(357), UINT16_C(32253), 25 | UINT16_C(11111) }; 26 | for (auto n : moduli) 27 | REDC_test_all(n); 28 | } 29 | TEST(MontgomeryArithmetic, REDC32) { 30 | std::vector moduli { 3, 13, UINT32_C(4294967295), 31 | UINT32_C(4294967293), UINT32_C(2147483347), 32 | UINT32_C(246098243), UINT32_C(1111111) }; 33 | for (auto n : moduli) 34 | REDC_test_all(n); 35 | } 36 | TEST(MontgomeryArithmetic, REDC64) { 37 | std::vector moduli { 3, 11, UINT64_C(18446744073709551615), 38 | UINT64_C(18446744073709551613), 39 | UINT64_C(4294967295), 40 | UINT64_C(3194806714689), UINT64_C(11111111311) }; 41 | for (auto n : moduli) 42 | REDC_test_all(n); 43 | } 44 | 45 | #if !defined(__GNUC__) || __GNUC__ >= 11 || defined(__INTEL_COMPILER) || \ 46 | defined(__clang__) 47 | // Older versions of GCC (most of them prior to v11) have a compiler bug that 48 | // causes an incorrect value of n to be produced and thus results in one of my 49 | // google test assertions failing. See 50 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98474 . The bug appears to have 51 | // been introduced as a regression to gcc in v5.1. It exists up to the latest 52 | // released version (v10.2) of gcc at the time of this writing. It's unclear 53 | // at the moment whether __uint128_t is safe to use with any version of gcc 54 | // between 5.1 and 10.2. The patch appears to fix the bug, and it is scheduled 55 | // to be in the gcc 11 release. 56 | // The #if above disables the following tests on gcc prior to gcc v11, since 57 | // they will fail at optimization level -O1 or higher due to the compiler bug. 58 | # if HURCHALLA_COMPILER_HAS_UINT128_T() 59 | TEST(MontgomeryArithmetic, REDC128) { 60 | __uint128_t zero = 0; 61 | std::vector<__uint128_t> moduli { 3, 11, zero-1, zero-3, 62 | static_cast<__uint128_t>(UINT64_C(18446744073709551613)) * 63 | UINT64_C(18446744073709551611), 64 | static_cast<__uint128_t>(UINT64_C(35698723439051265)) * 65 | UINT64_C(70945870135873583), 66 | static_cast<__uint128_t>(UINT64_C(34069834503)) * 67 | UINT64_C(895835939) }; 68 | for (auto n : moduli) 69 | REDC_test_all(n); 70 | } 71 | # endif 72 | #endif 73 | -------------------------------------------------------------------------------- /test/montgomery_arithmetic/test_MontgomeryForm_extra.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2024 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #include "test_MontgomeryForm.h" 9 | #include "hurchalla/montgomery_arithmetic/detail/MontyWrappedStandardMath.h" 10 | #include "hurchalla/montgomery_arithmetic/detail/experimental/MontyFullRangeMasked.h" 11 | #include "hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/AbstractMontgomeryForm.h" 12 | #include "hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/ConcreteMontgomeryForm.h" 13 | #include "hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/AbstractMontgomeryWrapper.h" 14 | #include "hurchalla/util/compiler_macros.h" 15 | #include "gtest/gtest.h" 16 | 17 | 18 | namespace { 19 | 20 | 21 | // For unit testing, we want fast compile times, so it helps to use the version 22 | // of MontgomeryForm that generally doesn't do force inlining. 23 | #if 1 24 | constexpr bool forceInlineAllFunctions = false; 25 | #else 26 | constexpr bool forceInlineAllFunctions = true; 27 | #endif 28 | 29 | template using MF = 30 | hurchalla::MontgomeryForm; 31 | 32 | 33 | 34 | // test the 'unusual' Montgomery types, which are MontyWrappedStandardMath and 35 | // the experimental class MontyFullRangeMasked. 36 | 37 | TEST(MontgomeryArithmetic, MontyWrappedStandardMath) { 38 | test_custom_monty(); 39 | } 40 | 41 | 42 | #ifdef HURCHALLA_TEST_MODULAR_ARITHMETIC_HEAVYWEIGHT 43 | // MontyFullRangeMasked is experimental, so we skip it when we're not doing 44 | // extensive (heavyweight) testing. 45 | TEST(MontgomeryArithmetic, MontyFullRangeMasked) { 46 | test_custom_monty(); 47 | } 48 | 49 | // The group of classes: ConcreteMontgomeryForm, AbstractMontgomeryForm, and 50 | // AbstractMontgomeryWrapper, are experimental, so we skip testing them when 51 | // we're not doing extensive (heavyweight) testing. 52 | TEST(MontgomeryArithmetic, MontyVirtual) { 53 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 54 | { 55 | using ConcreteMF = hurchalla::ConcreteMontgomeryForm, 56 | TESTABLE_ARRAY_POW_SIZES()>; 57 | using Wrapper = hurchalla::AbstractMontgomeryWrapper; 58 | test_MontgomeryForm(); 59 | } 60 | #endif 61 | { 62 | using ConcreteMF = hurchalla::ConcreteMontgomeryForm, 63 | TESTABLE_ARRAY_POW_SIZES()>; 64 | using Wrapper = hurchalla::AbstractMontgomeryWrapper; 65 | test_MontgomeryForm(); 66 | } 67 | { 68 | using ConcreteMF = hurchalla::ConcreteMontgomeryForm, 69 | TESTABLE_ARRAY_POW_SIZES()>; 70 | using Wrapper = hurchalla::AbstractMontgomeryWrapper; 71 | test_MontgomeryForm(); 72 | } 73 | } 74 | #endif 75 | 76 | 77 | } // end anonymous namespace 78 | -------------------------------------------------------------------------------- /test/montgomery_arithmetic/low_level_api/test_REDC_inline_asm.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | // Test the REDC function versions that contain inline asm. 9 | 10 | #undef HURCHALLA_ALLOW_INLINE_ASM_ALL 11 | #define HURCHALLA_ALLOW_INLINE_ASM_ALL 12 | 13 | // For extra coverage, we also enable the asserts, so that the internal REDC 14 | // function postconditions call corresponding non-inline asm functions to 15 | // check their results. 16 | #undef HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 17 | #define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 18 | #undef HURCHALLA_CLOCKWORK_ASSERT_LEVEL 19 | #define HURCHALLA_CLOCKWORK_ASSERT_LEVEL 3 20 | 21 | 22 | #include "test_REDC.h" 23 | 24 | 25 | TEST(MontgomeryArithmetic, REDC8_inline_asm) { 26 | std::vector moduli { 3, 255, 19, 21, 211, 23, 171 }; 27 | for (auto n : moduli) 28 | REDC_test_all(n); 29 | } 30 | TEST(MontgomeryArithmetic, REDC16_inline_asm) { 31 | std::vector moduli { 3, 17, UINT16_C(65535), 32 | UINT16_C(65533), UINT16_C(357), UINT16_C(32253), 33 | UINT16_C(11111) }; 34 | for (auto n : moduli) 35 | REDC_test_all(n); 36 | } 37 | TEST(MontgomeryArithmetic, REDC32_inline_asm) { 38 | std::vector moduli { 3, 13, UINT32_C(4294967295), 39 | UINT32_C(4294967293), UINT32_C(2147483347), 40 | UINT32_C(246098243), UINT32_C(1111111) }; 41 | for (auto n : moduli) 42 | REDC_test_all(n); 43 | } 44 | TEST(MontgomeryArithmetic, REDC64_inline_asm) { 45 | std::vector moduli { 3, 11, UINT64_C(18446744073709551615), 46 | UINT64_C(18446744073709551613), 47 | UINT64_C(4294967295), 48 | UINT64_C(3194806714689), UINT64_C(11111111311) }; 49 | for (auto n : moduli) 50 | REDC_test_all(n); 51 | } 52 | 53 | #if !defined(__GNUC__) || __GNUC__ >= 11 || defined(__INTEL_COMPILER) || \ 54 | defined(__clang__) 55 | // Older versions of GCC (most of them prior to v11) have a compiler bug that 56 | // causes an incorrect value of n to be produced and thus results in one of my 57 | // google test assertions failing. See 58 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98474 . The bug appears to have 59 | // been introduced as a regression to gcc in v5.1. It exists up to the latest 60 | // released version (v10.2) of gcc at the time of this writing. It's unclear 61 | // at the moment whether __uint128_t is safe to use with any version of gcc 62 | // between 5.1 and 10.2. The patch appears to fix the bug, and it is scheduled 63 | // to be in the gcc 11 release. 64 | // The #if above disables the following tests on gcc prior to gcc v11, since 65 | // they will fail at optimization level -O1 or higher due to the compiler bug. 66 | # if HURCHALLA_COMPILER_HAS_UINT128_T() 67 | TEST(MontgomeryArithmetic, REDC128_inline_asm) { 68 | __uint128_t zero = 0; 69 | std::vector<__uint128_t> moduli { 3, 11, zero-1, zero-3, 70 | static_cast<__uint128_t>(UINT64_C(18446744073709551613)) * 71 | UINT64_C(18446744073709551611), 72 | static_cast<__uint128_t>(UINT64_C(35698723439051265)) * 73 | UINT64_C(70945870135873583), 74 | static_cast<__uint128_t>(UINT64_C(34069834503)) * 75 | UINT64_C(895835939) }; 76 | for (auto n : moduli) 77 | REDC_test_all(n); 78 | } 79 | # endif 80 | #endif 81 | -------------------------------------------------------------------------------- /modular_arithmetic/include/hurchalla/modular_arithmetic/modular_addition.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MODULAR_ARITHMETIC_MODULAR_ADDITION_H_INCLUDED 9 | #define HURCHALLA_MODULAR_ARITHMETIC_MODULAR_ADDITION_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/modular_arithmetic/detail/platform_specific/impl_modular_addition.h" 13 | #include "hurchalla/util/traits/ut_numeric_limits.h" 14 | #include "hurchalla/util/compiler_macros.h" 15 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 16 | 17 | namespace hurchalla { 18 | 19 | 20 | // Perfomance notes are given below this function 21 | template HURCHALLA_FORCE_INLINE 22 | T modular_addition_prereduced_inputs(T a, T b, T modulus) 23 | { 24 | static_assert(ut_numeric_limits::is_integer, ""); 25 | HPBC_CLOCKWORK_API_PRECONDITION(modulus > 0); 26 | HPBC_CLOCKWORK_API_PRECONDITION(0<=a && a::call(a, b, modulus); 30 | 31 | // POSTCONDITION: 32 | // Returns (a+b)%modulus, performed as if a and b have infinite precision 33 | // and thus as if (a+b) is never subject to integer overflow. 34 | HPBC_CLOCKWORK_POSTCONDITION(0<=result && result 18 | 19 | namespace hurchalla { 20 | 21 | 22 | // Perfomance recommendations are given below this function 23 | template HURCHALLA_FORCE_INLINE 24 | T modular_subtraction_prereduced_inputs(T a, T b, T modulus) 25 | { 26 | static_assert(ut_numeric_limits::is_integer, ""); 27 | static_assert(std::is_same::value || 28 | std::is_same::value, ""); 29 | HPBC_CLOCKWORK_API_PRECONDITION(modulus > 0); 30 | HPBC_CLOCKWORK_API_PRECONDITION(0<=a && a::call(a, b, modulus); 34 | 35 | // POSTCONDITION: 36 | // Let a conceptual "%%" operator represent a modulo operator that always 37 | // returns a non-negative remainder. 38 | // This function returns (a-b) %% modulus, performed as if a and b are 39 | // infinite precision signed ints (and thus as if it is impossible for the 40 | // subtraction (a-b) to overflow). 41 | HPBC_CLOCKWORK_POSTCONDITION(0<=result && result 14 | 15 | namespace { 16 | 17 | 18 | namespace hc = ::hurchalla; 19 | 20 | template 21 | void test_single_inverse(T a) 22 | { 23 | using P = typename hc::safely_promote_unsigned::type; 24 | T one = static_cast(1); 25 | 26 | T inv = hc::inverse_mod_R(a); 27 | EXPECT_TRUE(static_cast(static_cast

(inv) * static_cast

(a)) == one); 28 | } 29 | 30 | 31 | template 32 | void test_constexpr_inverse() 33 | { 34 | // the #if is a slight hack, but inverse_mod_R is only constexpr for C++14 35 | // and above (C++11's support for constexpr functions was too primitive) 36 | #if (__cplusplus >= 201402L) || \ 37 | (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L && _MSC_VER >= 1910) 38 | // test constexpr use of inverse_mod_R 39 | static_assert(hc::inverse_mod_R(static_cast(1)) == 1, ""); 40 | 41 | // Suppress a false positive warning MSVC++ 2017 issues when constexpr 42 | // compiling, regarding (unsigned) integral overflow that occurs inside 43 | // impl_inverse_mod_R. Unfortunately suppressing it there doesn't work, 44 | // probably due to VC2017 awkwardly compiling constexpr functions. Unsigned 45 | // overflow is well defined and correct there, and MS removed this false 46 | // warning in VC++ 2019. 47 | #if defined(_MSC_VER) 48 | # pragma warning(push) 49 | # pragma warning(disable : 4307) 50 | #endif 51 | static_assert(static_cast(3 * hc::inverse_mod_R(static_cast(3))) 52 | == 1, ""); 53 | static_assert(static_cast(251 * hc::inverse_mod_R(static_cast(251))) 54 | == 1, ""); 55 | #if defined(_MSC_VER) 56 | # pragma warning(pop) 57 | #endif 58 | #endif 59 | } 60 | 61 | 62 | template 63 | void test_inverse_exhaustive() 64 | { 65 | T tmax = hc::ut_numeric_limits::max(); 66 | T evenmax = static_cast((tmax/2)*2); 67 | T oddmax = (evenmax != tmax) ? tmax : static_cast(tmax - 1); 68 | 69 | for (T a=oddmax; a>1; a=static_cast(a-2)) 70 | test_single_inverse(a); 71 | test_single_inverse(static_cast(1)); 72 | } 73 | 74 | 75 | template 76 | void test_inverse_mod_r() 77 | { 78 | T tmax = hc::ut_numeric_limits::max(); 79 | T evenmax = static_cast((tmax/2)*2); 80 | T oddmax = (evenmax != tmax) ? tmax : static_cast(tmax - 1); 81 | T oddhalfmax = static_cast((tmax/4)*2 + 1); 82 | 83 | // inverse_mod_r's preconditions require input a is odd. 84 | 85 | test_single_inverse(static_cast(1)); 86 | test_single_inverse(static_cast(3)); 87 | test_single_inverse(static_cast(5)); 88 | test_single_inverse(static_cast(7)); 89 | 90 | test_single_inverse(static_cast(oddmax)); 91 | test_single_inverse(static_cast(oddmax - 2)); 92 | test_single_inverse(static_cast(oddmax - 4)); 93 | 94 | test_single_inverse(static_cast(oddhalfmax)); 95 | test_single_inverse(static_cast(oddhalfmax + 2)); 96 | test_single_inverse(static_cast(oddhalfmax - 2)); 97 | 98 | test_constexpr_inverse(); 99 | } 100 | 101 | 102 | 103 | TEST(MontgomeryArithmetic, inverse_mod_r) { 104 | test_inverse_mod_r(); 105 | test_inverse_mod_r(); 106 | test_inverse_mod_r(); 107 | test_inverse_mod_r(); 108 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 109 | test_inverse_mod_r<__uint128_t>(); 110 | #endif 111 | 112 | test_inverse_exhaustive(); 113 | test_inverse_exhaustive(); 114 | } 115 | 116 | 117 | } // end unnamed namespace 118 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/testbench_2kary.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2025 Jeffrey Hurchalla. 4 | # 5 | # This Source Code Form is subject to the terms of the Mozilla Public 6 | # License, v. 2.0. If a copy of the MPL was not distributed with this 7 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 8 | 9 | 10 | 11 | 12 | # You need to clone the util, factoring, and modular_arithmetic repos 13 | # from https://github.com/hurchalla 14 | 15 | # SET repo_directory TO THE DIRECTORY WHERE YOU CLONED THE HURCHALLA GIT 16 | # REPOSITORIES. (or otherwise ensure the compiler /I flags correctly specify 17 | # the needed hurchalla include directories) 18 | 19 | repo_directory=/Users/jeffreyhurchalla/Desktop 20 | #repo_directory=/home/jeff/repos 21 | 22 | 23 | # you would ordinarily use either g++ or clang++ for $1 24 | cppcompiler=$1 25 | 26 | 27 | if [[ $cppcompiler == "g++" ]]; then 28 | error_limit=-fmax-errors=3 29 | warn_nrvo=-Wnrvo 30 | else 31 | error_limit=-ferror-limit=3 32 | fi 33 | 34 | 35 | 36 | 37 | exit_on_failure () { 38 | if [ $? -ne 0 ]; then 39 | exit 1 40 | fi 41 | } 42 | 43 | #optimization_level=O2 44 | #optimization_level=O3 45 | optimization_level=$2 46 | 47 | #define_mont_type=-DDEF_MONT_TYPE=MontgomeryQuarter 48 | define_mont_type=-DDEF_MONT_TYPE=$3 49 | define_uint_type=-DDEF_UINT_TYPE=$4 50 | 51 | # you must specify either -DTEST_ARRAY or -DTEST_SCALAR or -DTEST_PARTIAL_ARRAY for $8 52 | define_test_type=$8 53 | 54 | 55 | cpp_standard=c++17 56 | 57 | 58 | # You can use arguments $9 and ${10} and ${11} etc to define macros such as 59 | # -DHURCHALLA_ALLOW_INLINE_ASM_ALL 60 | # For debugging, defining the following macros may be useful 61 | # -DHURCHALLA_CLOCKWORK_ENABLE_ASSERTS -DHURCHALLA_UTIL_ENABLE_ASSERTS 62 | 63 | append_if_set() { 64 | local array_name="$1" 65 | local value="$2" 66 | if [ -n "$value" ]; then 67 | eval "$array_name+=(\"\$value\")" 68 | fi 69 | } 70 | extra_args=() 71 | append_if_set extra_args "$9" 72 | append_if_set extra_args "${10}" 73 | append_if_set extra_args "${11}" 74 | append_if_set extra_args "${12}" 75 | append_if_set extra_args "${13}" 76 | append_if_set extra_args "${14}" 77 | append_if_set extra_args "${15}" 78 | append_if_set extra_args "${16}" 79 | append_if_set extra_args "${17}" 80 | append_if_set extra_args "${18}" 81 | append_if_set extra_args "${19}" 82 | append_if_set extra_args "${20}" 83 | append_if_set extra_args "${21}" 84 | append_if_set extra_args "${22}" 85 | append_if_set extra_args "${23}" 86 | append_if_set extra_args "${24}" 87 | append_if_set extra_args "${25}" 88 | append_if_set extra_args "${26}" 89 | append_if_set extra_args "${27}" 90 | append_if_set extra_args "${28}" 91 | append_if_set extra_args "${29}" 92 | append_if_set extra_args "${30}" 93 | 94 | 95 | # we could also use -g to get debug symbols (for lldb/gdb, and objdump) 96 | 97 | $cppcompiler \ 98 | $error_limit -$optimization_level \ 99 | $define_mont_type $define_uint_type $define_test_type \ 100 | "${extra_args[@]}" \ 101 | -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion $warn_nrvo \ 102 | -std=$cpp_standard \ 103 | -I${repo_directory}/modular_arithmetic/modular_arithmetic/include \ 104 | -I${repo_directory}/modular_arithmetic/montgomery_arithmetic/include \ 105 | -I${repo_directory}/util/include \ 106 | -c testbench_montgomery_pow_2kary.cpp 107 | 108 | exit_on_failure 109 | 110 | $cppcompiler -$optimization_level -std=$cpp_standard -o testbench_montgomery_pow_2kary testbench_montgomery_pow_2kary.o -lm 111 | 112 | exit_on_failure 113 | 114 | echo "compilation finished, now executing:" 115 | 116 | 117 | # argument $5 (if present), is the randomization seed for std::mt19937_64 118 | # argument $6 (if present), is max_modulus_bits_reduce 119 | # argument $7 (if present), is exponent_bits_reduce 120 | 121 | ./testbench_montgomery_pow_2kary $5 $6 $7 122 | 123 | # To give you an example of invoking this script at the command line: 124 | # ./testbench.sh clang++ O3 MontgomeryFull __uint128_t 191 8 50 -DTEST_ARRAY -DHURCHALLA_ALLOW_INLINE_ASM_ALL 125 | 126 | 127 | -------------------------------------------------------------------------------- /modular_arithmetic/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this 4 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | 6 | 7 | if(TARGET hurchalla_basic_modular_arithmetic) 8 | return() 9 | endif() 10 | 11 | # later versions are probably fine, but are untested 12 | cmake_minimum_required(VERSION 3.14...4.03) 13 | 14 | project(hurchalla_basic_modular_arithmetic VERSION 1.0.0 LANGUAGES CXX) 15 | 16 | # We need to detect if we're using MSVC for x86_64, prior to MSVC2019, since 17 | # these old MSVC versions need a separate asm file for modular multiplication. 18 | # (_MSC_VER < 1920 indicates Visual Studio 2017 or lower) 19 | if((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND (MSVC_VERSION LESS 1920) AND 20 | (CMAKE_SYSTEM_PROCESSOR MATCHES "x86|X86|amd64|AMD64|EM64T") AND 21 | (CMAKE_SIZEOF_VOID_P EQUAL 8)) 22 | set(HURCHALLA_MA_USING_OLD_MSVC_X64 TRUE) 23 | enable_language(ASM_MASM) 24 | else() 25 | set(HURCHALLA_MA_USING_OLD_MSVC_X64 FALSE) 26 | endif() 27 | 28 | 29 | # if this is the top level CMakeLists.txt, let IDEs group projects into folders 30 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) 31 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 32 | endif() 33 | 34 | 35 | if(HURCHALLA_MA_USING_OLD_MSVC_X64) 36 | add_library(hurchalla_basic_modular_arithmetic STATIC) 37 | else() 38 | add_library(hurchalla_basic_modular_arithmetic INTERFACE) 39 | endif() 40 | 41 | 42 | target_sources(hurchalla_basic_modular_arithmetic INTERFACE 43 | $ 44 | $ 45 | $ 46 | $ 47 | $ 48 | $ 49 | $ 50 | $ 51 | $ 52 | $ 53 | $ 54 | $ 55 | $ 56 | $ 57 | ) 58 | if(HURCHALLA_MA_USING_OLD_MSVC_X64) 59 | target_sources(hurchalla_basic_modular_arithmetic PRIVATE 60 | $ 61 | ) 62 | endif() 63 | 64 | 65 | install(DIRECTORY 66 | $ 67 | DESTINATION include) 68 | 69 | 70 | target_include_directories(hurchalla_basic_modular_arithmetic INTERFACE 71 | $) 72 | 73 | 74 | include(FetchContent) 75 | FetchContent_Declare( 76 | hurchalla_util 77 | GIT_REPOSITORY https://github.com/hurchalla/util.git 78 | GIT_TAG 8e03b87c7b6d5c3bf3c0e439a153768c59c512c5 79 | ) 80 | FetchContent_MakeAvailable(hurchalla_util) 81 | 82 | target_link_libraries(hurchalla_basic_modular_arithmetic 83 | INTERFACE hurchalla_util) 84 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/get_Rsquared_mod_n.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_GET_RSQUARED_MOD_N_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_GET_RSQUARED_MOD_N_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/montgomery_arithmetic/low_level_api/detail/platform_specific/impl_get_Rsquared_mod_n.h" 13 | #include "hurchalla/montgomery_arithmetic/low_level_api/detail/platform_specific/impl_array_get_Rsquared_mod_n.h" 14 | #include "hurchalla/modular_arithmetic/detail/optimization_tag_structs.h" 15 | #include "hurchalla/util/traits/safely_promote_unsigned.h" 16 | #include "hurchalla/util/traits/ut_numeric_limits.h" 17 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 18 | #include 19 | #include 20 | 21 | #if defined(_MSC_VER) 22 | # pragma warning(push) 23 | # pragma warning(disable : 4127) 24 | #endif 25 | 26 | namespace hurchalla { 27 | 28 | 29 | // For discussion purposes, let type UP be a conceptually unlimited precision 30 | // unsigned integer type, and let the unlimited precision constant R represent 31 | // R = (UP)1 << ut_numeric_limits::digits. Equivalently, 32 | // R = (UP)ut_numeric_limits::max + 1. For example, if T is uint64_t, we 33 | // would have R = (UP)1 << 64. 34 | 35 | // get_Rsquared_mod_n() computes and returns (R*R) % n. 36 | // You can get the argument inverse_n_modR by calling inverse_mod_r(). You can 37 | // get Rmod_n by calling get_R_mod_n(). 38 | 39 | // For the template arguments nIsGuaranteedLessThanRdiv4 and LowlatencyTag, it 40 | // is easiest not to specify them, and accept the defaults. Their purpose is 41 | // solely to provide ways to improve performance. These are their details: 42 | // For nIsGuaranteedLessThanRdiv, if you can guarantee that n <= R/4, you can 43 | // set it to true to improve performance. Otherwise, accept the default of 44 | // false. 45 | // For PTAG, if you prefer to have the lowest number of uops rather than 46 | // lowest latency, then you can set it to LowuopsTag. Otherwise accept the 47 | // default of LowlatencyTag. 48 | 49 | template 52 | T get_Rsquared_mod_n(T n, T inverse_n_modR, T Rmod_n) 53 | { 54 | static_assert(ut_numeric_limits::is_integer, ""); 55 | static_assert(!(ut_numeric_limits::is_signed), ""); 56 | static_assert(ut_numeric_limits::is_modulo, ""); 57 | HPBC_CLOCKWORK_PRECONDITION2(n % 2 == 1); // REDC requires an odd modulus. 58 | HPBC_CLOCKWORK_PRECONDITION2(n > 1); 59 | using P = typename safely_promote_unsigned::type; 60 | // verify that n * inverse_n_modR ≡ 1 (mod R) 61 | HPBC_CLOCKWORK_PRECONDITION2( 62 | static_cast(static_cast

(n) * static_cast

(inverse_n_modR)) == 1); 63 | 64 | T rSquaredModN = detail::impl_get_Rsquared_mod_n 65 | ::call(n, inverse_n_modR, Rmod_n); 66 | 67 | HPBC_CLOCKWORK_POSTCONDITION2(rSquaredModN < n); 68 | return rSquaredModN; 69 | } 70 | 71 | 72 | // You can usually get much better performance by using this std::array 73 | // version, when you need multiple calculations of different Rsquared mod Ns. 74 | template // = LowuopsTag 77 | std::array 78 | get_Rsquared_mod_n(const std::array& n, 79 | const std::array& inverse_n_modR, 80 | const std::array& Rmod_n) 81 | { 82 | static_assert(ut_numeric_limits::is_integer, ""); 83 | static_assert(!(ut_numeric_limits::is_signed), ""); 84 | static_assert(ut_numeric_limits::is_modulo, ""); 85 | 86 | using P = typename safely_promote_unsigned::type; 87 | 88 | if (HPBC_CLOCKWORK_PRECONDITION2_MACRO_IS_ACTIVE) { 89 | for (std::size_t i = 0; i < ARRAY_SIZE; ++i) { 90 | HPBC_CLOCKWORK_PRECONDITION2(n[i] % 2 == 1); // REDC requires an odd modulus. 91 | HPBC_CLOCKWORK_PRECONDITION2(n[i] > 1); 92 | HPBC_CLOCKWORK_PRECONDITION2(static_cast(static_cast

(n[i]) * 93 | static_cast

(inverse_n_modR[i])) == 1); 94 | } 95 | } 96 | 97 | std::array result = detail::impl_array_get_Rsquared_mod_n 98 | ::call(n, inverse_n_modR, Rmod_n); 99 | 100 | if (HPBC_CLOCKWORK_POSTCONDITION2_MACRO_IS_ACTIVE) { 101 | for (std::size_t i = 0; i < ARRAY_SIZE; ++i) 102 | HPBC_CLOCKWORK_POSTCONDITION2(result[i] < n[i]); 103 | } 104 | return result; 105 | } 106 | 107 | 108 | } // end namespace 109 | 110 | #if defined(_MSC_VER) 111 | # pragma warning(pop) 112 | #endif 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/impl_inverse_mod_R.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_IMPL_INVERSE_MOD_R_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_IMPL_INVERSE_MOD_R_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/util/traits/safely_promote_unsigned.h" 13 | #include "hurchalla/util/traits/ut_numeric_limits.h" 14 | #include "hurchalla/util/sized_uint.h" 15 | #include "hurchalla/util/compiler_macros.h" 16 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 17 | #include 18 | 19 | namespace hurchalla { namespace detail { 20 | 21 | 22 | // For discussion purposes, let type UP be a conceptually unlimited precision 23 | // unsigned integer type, and let the unlimited precision constant R represent 24 | // R = (UP)1 << ut_numeric_limits::digits. Equivalently, 25 | // R = (UP)ut_numeric_limits::max + 1. For example, if T is uint64_t, we 26 | // would have R = (UP)1 << 64. 27 | 28 | // minor note: we use static member functions to disallow ADL. 29 | 30 | struct impl_inverse_mod_R { 31 | private: 32 | template // internal helper constexpr function 33 | constexpr static int log2() 34 | { 35 | // PRECONDITION: n!=0 (this isn't possible to express via static_assert) 36 | static_assert(n>=0, ""); 37 | static_assert(n==1 || (n/2)*2 == n, ""); 38 | return (n<=1) ? 0 : 1 + log2(); 39 | } 40 | public: 41 | // This algorithm for the inverse (mod R) is described in 42 | // https://arxiv.org/abs/2204.04342. Note: it is a 43 | // generalized and slightly more efficient version of Dumas' algorithm (from 44 | // https://arxiv.org/abs/1209.6626), so we still call it Dumas' algorithm. 45 | // 46 | // Note: Dumas' alg only makes sense to use for the native integral types - 47 | // Newton's method becomes more efficient when larger types are required. 48 | #ifndef HURCHALLA_TARGET_BIT_WIDTH 49 | # error "HURCHALLA_TARGET_BIT_WIDTH must be defined" 50 | #endif 51 | template 52 | static HURCHALLA_FORCE_INLINE HURCHALLA_CPP14_CONSTEXPR 53 | typename std::enable_if<(bits <= HURCHALLA_TARGET_BIT_WIDTH), T>::type 54 | call(T a) 55 | { 56 | static_assert(ut_numeric_limits::is_integer, ""); 57 | static_assert(!(ut_numeric_limits::is_signed), ""); 58 | 59 | static_assert(bits == ut_numeric_limits::digits, ""); 60 | static_assert(std::is_unsigned::value, ""); //native unsigned integer 61 | HPBC_CLOCKWORK_CONSTEXPR_PRECONDITION(a % 2 == 1); 62 | 63 | // avoid undefined behavior that could result if T is an unsigned type 64 | // that would be promoted to (signed) 'int'. 65 | using P = typename safely_promote_unsigned::type; 66 | P b = static_cast

(a); 67 | 68 | P x = (3u*b)^2u; // good to 5 bits, but we'll treat it as good to 4 69 | constexpr int goodbits = 4; // must be a power of 2 70 | P s = b*x; 71 | P y = 1-s; 72 | 73 | static_assert((bits/goodbits)*goodbits == bits, ""); 74 | constexpr int iterations = log2(); 75 | // cause compile error if iterations isn't initialized at compile time 76 | static_assert(iterations != 0, ""); 77 | HURCHALLA_REQUEST_UNROLL_LOOP 78 | for (int i=0; i(x); 84 | } 85 | 86 | // This is Newton's method algorithm for the inverse (mod R). 87 | // To get the starting bits of 'x' we recurse until we use Dumas' method 88 | // (it's more efficient than Newton's method for native integer types). 89 | template 90 | static HURCHALLA_FORCE_INLINE HURCHALLA_CPP14_CONSTEXPR 91 | typename std::enable_if::type 92 | call(T a) 93 | { 94 | static_assert(ut_numeric_limits::is_integer, ""); 95 | static_assert(!(ut_numeric_limits::is_signed), ""); 96 | static_assert((bits/2)*2 == bits, ""); 97 | constexpr bool is_valid_su = is_valid_sized_uint::value; 98 | using T2 = typename std::conditional::type, T>::type; 100 | HPBC_CLOCKWORK_CONSTEXPR_PRECONDITION(a % 2 == 1); 101 | 102 | // set x so that the lower ('bits'/2) half of the bits are good. 103 | T x = static_cast(call(static_cast(a))); 104 | 105 | using P = typename safely_promote_unsigned::type; 106 | // use one step of the standard newtons method algorithm for the 107 | // inverse to double the number of good bits. 108 | return static_cast(x * (2 - static_cast

(a)*x)); 109 | } 110 | }; 111 | 112 | 113 | }} // end namespace 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /test/modular_arithmetic/test_absolute_value_difference.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | 9 | // Strictly for testing purposes, we'll define HURCHALLA_ALLOW_INLINE_ASM_ALL 10 | // here in order to make absolute_value_difference() use an inline asm function 11 | // version if it is available. 12 | // Internally, this inline asm function will also call the generic template 13 | // function version of absolute_value_difference inside a postcondition, in 14 | // order to make sure that the asm result is correct. Of course postcondition 15 | // checks must be enabled for this check to occur - the easiest way to ensure 16 | // postconditions are enabled is to define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS, 17 | // which is why we do so here. This is all strictly for testing purposes. 18 | #undef HURCHALLA_ALLOW_INLINE_ASM_ALL 19 | #define HURCHALLA_ALLOW_INLINE_ASM_ALL 1 20 | 21 | #ifndef HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 22 | # define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 23 | #endif 24 | 25 | 26 | #include "hurchalla/modular_arithmetic/absolute_value_difference.h" 27 | #include "hurchalla/util/traits/ut_numeric_limits.h" 28 | #include "hurchalla/util/compiler_macros.h" 29 | #include "gtest/gtest.h" 30 | #include 31 | 32 | namespace { 33 | 34 | 35 | template 36 | void test_absolute_value_difference() 37 | { 38 | namespace hc = ::hurchalla; 39 | 40 | // Test with a few basic examples first 41 | T a = 5; 42 | T b = 12; 43 | EXPECT_TRUE(static_cast(7) == hc::absolute_value_difference(a, b)); 44 | EXPECT_TRUE(static_cast(7) == hc::absolute_value_difference(b, a)); 45 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(b, b)); 46 | a = 7; b = 6; 47 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(a, b)); 48 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(b, a)); 49 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(b, b)); 50 | 51 | // --------- Test possible edge cases -------- 52 | 53 | a = 0; b = 0; 54 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(a, b)); 55 | a = 0; b = 1; 56 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(a, b)); 57 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(b, a)); 58 | a = 1; b = 1; 59 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(a, b)); 60 | 61 | a = 0; b = hc::ut_numeric_limits::max(); 62 | EXPECT_TRUE(b == hc::absolute_value_difference(a, b)); 63 | EXPECT_TRUE(b == hc::absolute_value_difference(b, a)); 64 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(b, b)); 65 | a = 1; 66 | EXPECT_TRUE(static_cast(b-1) == hc::absolute_value_difference(a, b)); 67 | EXPECT_TRUE(static_cast(b-1) == hc::absolute_value_difference(b, a)); 68 | 69 | a = 0; b = static_cast(hc::ut_numeric_limits::max() - 1); 70 | EXPECT_TRUE(b == hc::absolute_value_difference(a, b)); 71 | EXPECT_TRUE(b == hc::absolute_value_difference(b, a)); 72 | a = 1; 73 | EXPECT_TRUE(static_cast(b-1) == hc::absolute_value_difference(a, b)); 74 | EXPECT_TRUE(static_cast(b-1) == hc::absolute_value_difference(b, a)); 75 | 76 | a = static_cast(hc::ut_numeric_limits::max()/2); 77 | b = static_cast(a + 1); 78 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(a, b)); 79 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(b, a)); 80 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(a, a)); 81 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(b, b)); 82 | 83 | b++; 84 | EXPECT_TRUE(static_cast(2) == hc::absolute_value_difference(a, b)); 85 | EXPECT_TRUE(static_cast(2) == hc::absolute_value_difference(b, a)); 86 | a++; 87 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(a, b)); 88 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(b, a)); 89 | 90 | a = static_cast(hc::ut_numeric_limits::max()/2 - 1); 91 | b = static_cast(a + 1); 92 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(a, b)); 93 | EXPECT_TRUE(static_cast(1) == hc::absolute_value_difference(b, a)); 94 | EXPECT_TRUE(static_cast(0) == hc::absolute_value_difference(a, a)); 95 | } 96 | 97 | 98 | 99 | TEST(ModularArithmetic, absolute_value_difference) { 100 | test_absolute_value_difference(); 101 | test_absolute_value_difference(); 102 | test_absolute_value_difference(); 103 | test_absolute_value_difference(); 104 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 105 | test_absolute_value_difference<__uint128_t>(); 106 | #endif 107 | 108 | test_absolute_value_difference(); 109 | test_absolute_value_difference(); 110 | test_absolute_value_difference(); 111 | test_absolute_value_difference(); 112 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 113 | test_absolute_value_difference<__int128_t>(); 114 | #endif 115 | } 116 | 117 | 118 | } // end unnamed namespace 119 | -------------------------------------------------------------------------------- /montgomery_arithmetic/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this 4 | # file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | 6 | 7 | if(TARGET hurchalla_montgomery_arithmetic) 8 | return() 9 | endif() 10 | 11 | # later versions are probably fine, but are untested 12 | cmake_minimum_required(VERSION 3.14...4.03) 13 | 14 | project(hurchalla_montgomery_arithmetic VERSION 1.0.0 LANGUAGES CXX) 15 | 16 | 17 | # if this is the top level CMakeLists.txt, let IDEs group projects into folders 18 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) 19 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 20 | endif() 21 | 22 | 23 | add_library(hurchalla_montgomery_arithmetic INTERFACE) 24 | 25 | 26 | target_sources(hurchalla_montgomery_arithmetic INTERFACE 27 | $ 28 | $ 29 | $ 30 | $ 31 | $ 32 | $ 33 | $ 34 | $ 35 | $ 36 | $ 37 | $ 38 | $ 39 | $ 40 | $ 41 | $ 42 | $ 43 | $ 44 | $ 45 | $ 46 | $ 47 | $ 48 | $ 49 | $ 50 | $ 51 | $ 52 | $ 53 | $ 54 | $ 55 | $ 56 | $ 57 | $ 58 | $ 59 | $ 60 | $ 61 | ) 62 | 63 | 64 | install(DIRECTORY 65 | $ 66 | DESTINATION include) 67 | 68 | 69 | target_include_directories(hurchalla_montgomery_arithmetic 70 | INTERFACE $) 71 | 72 | 73 | add_subdirectory(../modular_arithmetic 74 | ${CMAKE_CURRENT_BINARY_DIR}/modular_arithmetic) 75 | target_link_libraries(hurchalla_montgomery_arithmetic 76 | INTERFACE hurchalla_basic_modular_arithmetic) 77 | 78 | 79 | include(FetchContent) 80 | FetchContent_Declare( 81 | hurchalla_util 82 | GIT_REPOSITORY https://github.com/hurchalla/util.git 83 | GIT_TAG 8e03b87c7b6d5c3bf3c0e439a153768c59c512c5 84 | ) 85 | FetchContent_MakeAvailable(hurchalla_util) 86 | 87 | target_link_libraries(hurchalla_montgomery_arithmetic 88 | INTERFACE hurchalla_util) 89 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontgomeryFormExtensions.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_MONTGOMERY_FORM_EXTENSIONS_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_MONTGOMERY_FORM_EXTENSIONS_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/modular_arithmetic/detail/optimization_tag_structs.h" 13 | #include "hurchalla/util/traits/ut_numeric_limits.h" 14 | #include "hurchalla/util/compiler_macros.h" 15 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 16 | #include 17 | 18 | namespace hurchalla { namespace detail { 19 | 20 | 21 | // Implementation helper functions that shouldn't be exposed in the 22 | // MontgomeryForm API. 23 | 24 | 25 | template 26 | struct MontgomeryFormExtensions final { 27 | 28 | using RU = typename MF::MontType::uint_type; 29 | // conceptually, R = (UP)1 << ut_numeric_limits::digits, with UP as an 30 | // unlimited precision unsigned integer type. 31 | static_assert(ut_numeric_limits::is_integer, ""); 32 | static_assert(!(ut_numeric_limits::is_signed), ""); 33 | 34 | using CanonicalValue = typename MF::CanonicalValue; 35 | using MontgomeryValue = typename MF::MontgomeryValue; 36 | using SquaringValue = typename MF::MontType::squaringvalue_type; 37 | 38 | HURCHALLA_FORCE_INLINE 39 | static MontgomeryValue convertInExtended(const MF& mf, RU a) 40 | { 41 | return mf.impl.template convertInExtended(a); 42 | } 43 | 44 | // note: montvalueR is the Montgomery representation of R. 45 | // In normal integer form it is literally R squared mod N. 46 | HURCHALLA_FORCE_INLINE 47 | static CanonicalValue getMontvalueR(const MF& mf) 48 | { 49 | return mf.impl.getMontvalueR(); 50 | } 51 | 52 | // this first shifts x by exponent, which is equivalent to 53 | // multiplying x by 2^exponent, and then it completes the 54 | // mont mul as usual by calling REDC. 55 | // -- IMPORTANT NOTE -- because (2^exponent) is an integer domain 56 | // value rather than a montgomery domain value, the returned 57 | // result viewed as an integer value is 58 | // REDC((x_int * R) * (2^exponent)) == (x_int * (2^exponent) * R) * R^(-1) 59 | // To counteract the inverse R factor, so that you get what most likely 60 | // you wanted, being just plain (x_int * (2^exponent) * R), 61 | // you need to ensure that x has an extra factor of R built into it it, 62 | // rather than just the normal single factor of x_int * R. To build an 63 | // extra factor of R into x, you first get montR = getMontvalueR(mf), 64 | // and then you do a normal montgomery multiply of x and montR. 65 | HURCHALLA_FORCE_INLINE 66 | static MontgomeryValue twoPowLimited_times_x(const MF& mf, size_t exponent, CanonicalValue x) 67 | { 68 | HPBC_CLOCKWORK_PRECONDITION(exponent < ut_numeric_limits::digits); 69 | return mf.impl.template twoPowLimited_times_x(exponent, x); 70 | } 71 | HURCHALLA_FORCE_INLINE 72 | static MontgomeryValue twoPowLimited_times_x_v2(const MF& mf, size_t exponent, CanonicalValue x) 73 | { 74 | HPBC_CLOCKWORK_PRECONDITION(0 < exponent && exponent <= ut_numeric_limits::digits); 75 | return mf.impl.template twoPowLimited_times_x_v2(exponent, x); 76 | } 77 | 78 | // note: magicValue is R cubed mod N (in normal integer form) 79 | HURCHALLA_FORCE_INLINE 80 | static RU getMagicValue(const MF& mf) 81 | { 82 | return mf.impl.template getMagicValue(); 83 | } 84 | 85 | HURCHALLA_FORCE_INLINE 86 | static MontgomeryValue 87 | convertInExtended_aTimesR(const MF& mf, RU a, RU magicValue) 88 | { 89 | HPBC_CLOCKWORK_PRECONDITION(magicValue == getMagicValue(mf)); 90 | return mf.impl.template convertInExtended_aTimesR(a, magicValue); 91 | } 92 | 93 | // this shifts RsquaredModN by exponent (rather than multiplying by 94 | // (1<::digits); 102 | return mf.impl.template twoPowLimited(exponent); 103 | } 104 | 105 | // this shifts RcubedModN by exponent (rather than multiplying by 106 | // (1<::digits); 114 | return mf.impl.template RTimesTwoPowLimited(exponent, magicValue); 115 | } 116 | 117 | 118 | HURCHALLA_FORCE_INLINE 119 | static SquaringValue getSquaringValue(const MF& mf, MontgomeryValue x) 120 | { 121 | return mf.impl.getSquaringValue(x); 122 | } 123 | 124 | HURCHALLA_FORCE_INLINE 125 | static SquaringValue squareSV(const MF& mf, SquaringValue sv) 126 | { 127 | return mf.impl.template squareSV(sv); 128 | } 129 | 130 | HURCHALLA_FORCE_INLINE 131 | static MontgomeryValue 132 | squareToMontgomeryValue(const MF& mf, SquaringValue sv) 133 | { 134 | return mf.impl.template squareToMontgomeryValue(sv); 135 | } 136 | 137 | HURCHALLA_FORCE_INLINE 138 | static MontgomeryValue getMontgomeryValue(const MF& mf, SquaringValue sv) 139 | { 140 | return mf.impl.getMontgomeryValue(sv); 141 | } 142 | }; 143 | 144 | 145 | }} // end namespace 146 | 147 | #endif 148 | -------------------------------------------------------------------------------- /test/modular_arithmetic/test_modular_addition.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | 9 | // Strictly for testing purposes, we'll define HURCHALLA_ALLOW_INLINE_ASM_ALL 10 | // here in order to make modular addition use an inline asm function version if 11 | // it is available. Internally, this inline asm function will also call the 12 | // generic template function version of modular addition inside a postcondition, 13 | // in order to make sure that the asm result is correct. Of course postcondition 14 | // checks must be enabled for this check to occur - the easiest way to ensure 15 | // postconditions are enabled is to define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS, 16 | // which is why we do so here. This is all strictly for testing purposes. 17 | #undef HURCHALLA_ALLOW_INLINE_ASM_ALL 18 | #define HURCHALLA_ALLOW_INLINE_ASM_ALL 1 19 | 20 | #ifndef HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 21 | # define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 22 | #endif 23 | 24 | 25 | #include "hurchalla/modular_arithmetic/modular_addition.h" 26 | #include "hurchalla/util/traits/ut_numeric_limits.h" 27 | #include "hurchalla/util/compiler_macros.h" 28 | #include "gtest/gtest.h" 29 | #include 30 | 31 | namespace { 32 | 33 | 34 | namespace hc = ::hurchalla; 35 | 36 | template 37 | void test_modulus(T modulus) 38 | { 39 | EXPECT_TRUE(modulus > 2); // if this fails, this test file has a bug 40 | 41 | T a = 0; 42 | T b = 0; 43 | EXPECT_TRUE(static_cast(0) == 44 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 45 | a = 0; b = 1; 46 | EXPECT_TRUE(static_cast(1) == 47 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 48 | EXPECT_TRUE(static_cast(1) == 49 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 50 | a = 1; b = 1; 51 | EXPECT_TRUE(static_cast(2) == 52 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 53 | 54 | a = 0; b = static_cast(modulus - 1); 55 | EXPECT_TRUE(b == hc::modular_addition_prereduced_inputs(a, b, modulus)); 56 | EXPECT_TRUE(b == hc::modular_addition_prereduced_inputs(b, a, modulus)); 57 | EXPECT_TRUE(static_cast(modulus - 2) == 58 | hc::modular_addition_prereduced_inputs(b, b, modulus)); 59 | 60 | a = 1; b = static_cast(modulus - 1); 61 | EXPECT_TRUE(static_cast(0) == 62 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 63 | EXPECT_TRUE(static_cast(0) == 64 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 65 | 66 | a = static_cast(modulus/2); 67 | b = static_cast(modulus - a); 68 | EXPECT_TRUE(static_cast(0) == 69 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 70 | EXPECT_TRUE(static_cast(0) == 71 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 72 | 73 | b++; 74 | EXPECT_TRUE(static_cast(1) == 75 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 76 | EXPECT_TRUE(static_cast(1) == 77 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 78 | a++; 79 | EXPECT_TRUE(static_cast(2) == 80 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 81 | EXPECT_TRUE(static_cast(2) == 82 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 83 | 84 | a = static_cast(modulus/2 - 1); 85 | b = static_cast(modulus - a - 2); 86 | EXPECT_TRUE(static_cast(modulus - 2) == 87 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 88 | EXPECT_TRUE(static_cast(modulus - 2) == 89 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 90 | a++; 91 | EXPECT_TRUE(static_cast(modulus - 1) == 92 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 93 | EXPECT_TRUE(static_cast(modulus - 1) == 94 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 95 | } 96 | 97 | 98 | template 99 | void test_modular_addition() 100 | { 101 | // test with a few basic examples first 102 | T modulus = 13; 103 | T a = 5; 104 | T b = 12; 105 | EXPECT_TRUE(static_cast(4) == 106 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 107 | EXPECT_TRUE(static_cast(4) == 108 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 109 | EXPECT_TRUE(static_cast(11) == 110 | hc::modular_addition_prereduced_inputs(b, b, modulus)); 111 | a = 7; b = 6; 112 | EXPECT_TRUE(static_cast(0) == 113 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 114 | EXPECT_TRUE(static_cast(0) == 115 | hc::modular_addition_prereduced_inputs(b, a, modulus)); 116 | EXPECT_TRUE(static_cast(12) == 117 | hc::modular_addition_prereduced_inputs(b, b, modulus)); 118 | 119 | test_modulus(modulus); 120 | test_modulus(static_cast(14)); 121 | 122 | // --------- Test using moduli that are likely edge cases -------- 123 | 124 | modulus = 1; 125 | a = 0; b = 0; 126 | EXPECT_TRUE(static_cast(0) == 127 | hc::modular_addition_prereduced_inputs(a, b, modulus)); 128 | 129 | modulus = hc::ut_numeric_limits::max(); 130 | test_modulus(modulus); 131 | modulus--; 132 | test_modulus(modulus); 133 | 134 | modulus = hc::ut_numeric_limits::max() / 2; 135 | test_modulus(modulus); 136 | modulus++; 137 | test_modulus(modulus); 138 | } 139 | 140 | 141 | 142 | TEST(ModularArithmetic, modular_addition) { 143 | test_modular_addition(); 144 | test_modular_addition(); 145 | test_modular_addition(); 146 | test_modular_addition(); 147 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 148 | test_modular_addition<__uint128_t>(); 149 | #endif 150 | 151 | test_modular_addition(); 152 | test_modular_addition(); 153 | test_modular_addition(); 154 | test_modular_addition(); 155 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 156 | test_modular_addition<__int128_t>(); 157 | #endif 158 | } 159 | 160 | 161 | } // end unnamed namespace 162 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/platform_specific/impl_get_Rsquared_mod_n.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2025 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_IMPL_GET_RSQUARED_MOD_N_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_IMPL_GET_RSQUARED_MOD_N_H_INCLUDED 10 | 11 | 12 | #include "hurchalla/montgomery_arithmetic/low_level_api/REDC.h" 13 | #include "hurchalla/montgomery_arithmetic/detail/platform_specific/two_times_restricted.h" 14 | #include "hurchalla/modular_arithmetic/modular_addition.h" 15 | #include "hurchalla/modular_arithmetic/modular_multiplication.h" 16 | #include "hurchalla/util/traits/ut_numeric_limits.h" 17 | #include "hurchalla/util/unsigned_square_to_hilo_product.h" 18 | #include "hurchalla/util/compiler_macros.h" 19 | #include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h" 20 | 21 | #if defined(_MSC_VER) 22 | # pragma warning(push) 23 | # pragma warning(disable : 4127) 24 | #endif 25 | 26 | namespace hurchalla { namespace detail { 27 | 28 | 29 | // For discussion purposes, let type UP be a conceptually unlimited precision 30 | // unsigned integer type, and let the unlimited precision constant R represent 31 | // R = (UP)1 << ut_numeric_limits::digits. Equivalently, 32 | // R = (UP)ut_numeric_limits::max + 1. For example, if T is uint64_t, we 33 | // would have R = (UP)1 << 64. 34 | 35 | // Compute (R*R) % n 36 | 37 | // Minor note: we use a static member function to disallow ADL. 38 | template 39 | struct impl_get_Rsquared_mod_n { 40 | 41 | template 42 | HURCHALLA_FORCE_INLINE static T call(T n, T inverse_n_modR, T Rmod_n) 43 | { 44 | static_assert(ut_numeric_limits::is_integer, ""); 45 | static_assert(!(ut_numeric_limits::is_signed), ""); 46 | 47 | HPBC_CLOCKWORK_PRECONDITION2(n % 2 == 1); 48 | HPBC_CLOCKWORK_PRECONDITION2(n > 1); 49 | 50 | namespace hc = ::hurchalla; 51 | T rSquaredModN; 52 | #ifdef HURCHALLA_TESTING_RSQUARED_MOD_N 53 | if (true) { 54 | #else 55 | if HURCHALLA_CPP17_CONSTEXPR 56 | (hc::modular_multiplication_has_slow_perf()) { 57 | #endif 58 | HPBC_CLOCKWORK_ASSERT2(Rmod_n < n); 59 | T tmp = Rmod_n; // Rmod_n ≡ 1*R (mod n) 60 | int i=0; 61 | for (; i<8; ++i) 62 | tmp = hc::modular_addition_prereduced_inputs(tmp, tmp, n); 63 | // at this point, tmp ≡ 256*R (mod n) 64 | constexpr int bitsT = ut_numeric_limits::digits; 65 | for (; i 91 | struct impl_get_Rsquared_mod_n { 92 | 93 | template 94 | HURCHALLA_FORCE_INLINE static T call(T n, T inverse_n_modR, T Rmod_n) 95 | { 96 | static_assert(ut_numeric_limits::is_integer, ""); 97 | static_assert(!(ut_numeric_limits::is_signed), ""); 98 | 99 | HPBC_CLOCKWORK_PRECONDITION2(n % 2 == 1); 100 | HPBC_CLOCKWORK_PRECONDITION2(n > 1); 101 | // and since the template param nIsGuaranteedLessThanRdiv4 == true, 102 | constexpr T Rdiv4 = static_cast(static_cast(1) << 103 | (ut_numeric_limits::digits - 2)); 104 | HPBC_CLOCKWORK_PRECONDITION2(n < Rdiv4); 105 | 106 | namespace hc = ::hurchalla; 107 | T rSquaredModN; 108 | #ifdef HURCHALLA_TESTING_RSQUARED_MOD_N 109 | if (true) { 110 | #else 111 | if HURCHALLA_CPP17_CONSTEXPR 112 | (hc::modular_multiplication_has_slow_perf()) { 113 | #endif 114 | HPBC_CLOCKWORK_ASSERT2(Rmod_n < n); 115 | T tmp = Rmod_n; // Rmod_n ≡ 1*R (mod n) 116 | int i=0; 117 | 118 | for (; i<4; ++i) 119 | tmp = hc::detail::two_times_restricted::call(tmp, n); 120 | 121 | // at this point, tmp ≡ 16*R (mod n) 122 | constexpr int bitsT = ut_numeric_limits::digits; 123 | 124 | for (; i(tmp + n); 131 | HPBC_CLOCKWORK_ASSERT2(0 < tmp && tmp < static_cast(2*n)); 132 | } 133 | HPBC_CLOCKWORK_ASSERT2(i == bitsT/2); 134 | { 135 | // This final iteration was unrolled from the loop above so we can 136 | // use standard REDC, which will end with tmp in the range [0, n). 137 | T u_hi, u_lo; 138 | u_hi = hc::unsigned_square_to_hilo_product(u_lo, tmp); 139 | tmp = hc::REDC_standard(u_hi, u_lo, n, inverse_n_modR, PTAG()); 140 | } 141 | 142 | // We should now have tmp ≡ R*R (mod n). 143 | // REDC_standard's postcondition guarantees the following: 144 | HPBC_CLOCKWORK_ASSERT2(tmp < n); 145 | 146 | rSquaredModN = tmp; 147 | HPBC_CLOCKWORK_POSTCONDITION2(rSquaredModN == 148 | hc::modular_multiplication_prereduced_inputs(Rmod_n, Rmod_n, n)); 149 | } else { 150 | rSquaredModN = hc::modular_multiplication_prereduced_inputs( 151 | Rmod_n, Rmod_n, n); 152 | } 153 | 154 | HPBC_CLOCKWORK_POSTCONDITION2(rSquaredModN < n); 155 | return rSquaredModN; 156 | } 157 | }; 158 | 159 | 160 | }} // end namespace 161 | 162 | #if defined(_MSC_VER) 163 | # pragma warning(pop) 164 | #endif 165 | 166 | 167 | #endif 168 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/platform_specific/README_REDC_supplement.md: -------------------------------------------------------------------------------- 1 | This file supplements the document [README_REDC.md](README_REDC.md). 2 |

3 | 4 | The simplest and usually most effective way to implement the traditional REDC is to write a delegating function that calls the alternate REDC. With inlining, its total uops will likely be lower than the low-uops asm version further below, and there is a decent chance that the compiler will loop hoist the calculation of invN if we are calling this function from a loop. Thus this version could also achieve latency equal to the low-latency asm version further below. In practice, even if the negation is not loop hoisted, REDC will most often be called during Montgomery multiplication, and the negation will not contribute to latency since its calculation will overlap with the preceding multiply in the Montgomery multiplication. Yet another reason why we might prefer this implementation is that the delegate "REDC_alternate" function can be implemented effectively with just standard C, which would eliminate the chance of inline-asm related bugs, and will sometimes improve performance since inline-asm may hinder compiler optimizations.
5 | 6 |

 7 | // On Intel Skylake: ~9-10 cycles latency, ~8 fused uops
 8 | inline uint64_t REDC_traditional_delegating(uint64_t T_hi, uint64_t T_lo,
 9 |                                                    uint64_t N, uint64_t negInvN)
10 | {
11 |     uint64_t invN = -negInvN;
12 |     return REDC_alternate(T_hi, T_lo, N, invN);
13 | }
14 | 
15 | Delegating Function for the Traditional REDC 16 |

17 | 18 | There is usually no need to read further unless you are curious. 19 | 20 | We can improve upon the inline assembly we saw in the main document for the traditional REDC, though the code becomes harder to understand. The improvements also can't be implemented well in standard C; none of the major compilers (gcc, clang, MSVC, icc) are able to compile standard C versions of the functions below without adding significant extra latency and uops, even with idiomatic use of the ternary operator for conditional move. 21 | 22 | Since the alternate REDC function from [README_REDC.md](README_REDC.md) does better on uops and equals or betters the latency, all while being easier to understand, and friendlier for compilers if written in standard C, we should certainly prefer the alternate REDC to the functions that follow. Nevertheless the functions below do improve the traditional REDC inline asm, so they could be useful as an easy drop-in replacement of an existing REDC function (which will almost certainly be traditional REDC with the negative inverse), or they might be interesting for anyone curious. 23 | 24 | The improved functions below are correct and produce output equivalent to the previous inline asm we saw for the traditional REDC. You can find a rough proof of correctness in comments of the C++ function ["REDC(T u_hi, T u_lo, T n, T neg_inv_n, FullrangeTag, InplaceLowlatencyTag)" of an old git commit](https://github.com/hurchalla/modular_arithmetic/blob/66281af1639031b04bdaf9b916e5d5638d3ded25/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/platform_specific/RedcLargeR.h#L365). 25 | 26 | The first function below is optimized for lowest latency(REDC_traditional_improved1). The second is optimized for lowest uops (REDC_traditional_improved2):
27 | 28 | 29 |
30 | // On Intel Skylake: 9 cycles latency, 11 fused uops
31 | inline uint64_t REDC_traditional_improved1(uint64_t T_hi, uint64_t T_lo,
32 |                                                    uint64_t N, uint64_t negInvN)
33 | {
34 |     assert(T_hi < N);   // REDC requires T < NR, and this enforces it.
35 |     uint64_t rrax = T_lo;
36 |     uint64_t Thi = T_hi;
37 |     uint64_t tmp;
38 |     __asm__ (
39 |         "movq %%rax, %[tmp] \n\t"
40 |         "imulq %[inv], %%rax \n\t"    /* m = T_lo * negInvN */
41 |         "mulq %[N] \n\t"              /* mN = m * N */
42 |         "movq %[Thi], %%rax \n\t"
43 |         "subq %[N], %%rax \n\t"       /* diff = T_hi - N */
44 |         "negq %[tmp] \n\t"            /* Sets carry to (T_lo != 0) */
45 |         "adcq %%rdx, %[Thi] \n\t"     /* sum1 = addcarry(T_hi, mN_hi) */
46 |         "negq %[tmp] \n\t"            /* Sets carry to (T_lo != 0) */
47 |         "adcq %%rdx, %%rax \n\t"      /* sum2 = addcarry(diff, mN_hi) */
48 |         "cmovaeq %[Thi], %%rax \n\t"  /* rrax = (sum2 >= mN_hi) ? sum1 : sum2 */
49 |         : [Thi]"+r"(Thi), "+&a"(rrax), [tmp]"=&r"(tmp)
50 |         : [N]"r"(N), [inv]"r"(negInvN)
51 |         : "rdx", "cc");
52 |     return rrax;
53 | }
54 | 
55 | Improved Traditional REDC (low latency version) 56 | 57 |
58 | 59 |
60 | // On Intel Skylake: 10 cycles latency, 9 fused uops.
61 | inline uint64_t REDC_traditional_improved2(uint64_t T_hi, uint64_t T_lo,
62 |                                                    uint64_t N, uint64_t negInvN)
63 | {
64 |     assert(T_hi < N);   // REDC requires T < NR, and this enforces it.
65 |     uint64_t rrax = T_lo;
66 |     uint64_t Thi = T_hi;
67 |     uint64_t tmp;
68 |     __asm__ (
69 |         "movq %%rax, %[tmp] \n\t"
70 |         "imulq %[inv], %%rax \n\t"        /* m = T_lo * negInvN */
71 |         "mulq %[N] \n\t"                  /* mN = m * N */
72 |         "subq %[N], %[Thi] \n\t"          /* diff = T_hi - N */
73 |         "negq %[tmp] \n\t"                /* Sets carry to (T_lo != 0) */
74 |         "adcq %[Thi], %%rdx \n\t"         /* rdx = addcarry(diff, mN_hi) */
75 |         "leaq (%%rdx, %[N]), %%rax \n\t"  /* rax = rdx + N */
76 |         "cmovbq %%rdx, %%rax \n\t"        /* rrax = (rdx < mN_hi) ? rdx : rax */
77 |         : [Thi]"+&r"(Thi), "+&a"(rrax), [tmp]"=&r"(tmp)
78 |         : [N]"r"(N), [inv]"r"(negInvN)
79 |         : "rdx", "cc");
80 |     return rrax;
81 | }
82 | 
83 | Improved Traditional REDC (low uops version) 84 | 85 |
86 | 87 | All code in this file is licensed under the MIT Open Source License: 88 | 89 | Copyright (c) 2022 by Jeffrey Hurchalla. 90 | 91 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 92 | 93 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 94 | 95 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 96 | -------------------------------------------------------------------------------- /test/modular_arithmetic/test_modular_multiplication.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | 9 | // We'll undefine HURCHALLA_DISALLOW_INLINE_ASM_MODMUL here in order to make 10 | // modular multiplication use an inline asm function version if it is available. 11 | // This shouldn't be strictly necessary, since there's no reason this macro 12 | // would be defined at this point, and by default modular multiplication uses 13 | // inline asm (if available) unless this macro is defined. 14 | // Internally, the inline asm function will also call the generic template 15 | // function version of modular multiplication inside a postcondition, in order 16 | // to make sure that the asm result is correct. Of course postcondition checks 17 | // must be enabled for this check to occur - the easiest way to ensure 18 | // postconditions are enabled is to define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS, 19 | // which is why we do so here. This is all strictly for testing purposes. 20 | #undef HURCHALLA_DISALLOW_INLINE_ASM_MODMUL 21 | 22 | #ifndef HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 23 | # define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 24 | #endif 25 | 26 | 27 | 28 | #include "hurchalla/modular_arithmetic/modular_multiplication.h" 29 | #include "hurchalla/util/traits/ut_numeric_limits.h" 30 | #include "hurchalla/util/compiler_macros.h" 31 | #include "gtest/gtest.h" 32 | #include 33 | 34 | namespace { 35 | 36 | 37 | namespace hc = ::hurchalla; 38 | 39 | template 40 | void test_modulus(T modulus) 41 | { 42 | T a = 0; 43 | T b = 0; 44 | EXPECT_TRUE(static_cast(0) == 45 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 46 | a = 0; b = 1; 47 | EXPECT_TRUE(static_cast(0) == 48 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 49 | EXPECT_TRUE(static_cast(0) == 50 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 51 | a = 1; b = 1; 52 | EXPECT_TRUE(static_cast(1) == 53 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 54 | 55 | a = 2; b = 3; 56 | EXPECT_TRUE(static_cast(6) == 57 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 58 | EXPECT_TRUE(static_cast(6) == 59 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 60 | EXPECT_TRUE(static_cast(4) == 61 | hc::modular_multiplication_prereduced_inputs(a, a, modulus)); 62 | 63 | a = 0; b = static_cast(modulus - 1); 64 | EXPECT_TRUE(static_cast(0) == 65 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 66 | EXPECT_TRUE(static_cast(0) == 67 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 68 | EXPECT_TRUE(static_cast(1) == 69 | hc::modular_multiplication_prereduced_inputs(b, b, modulus)); 70 | 71 | a = 1; b = static_cast(modulus - 1); 72 | EXPECT_TRUE(static_cast(modulus - 1) == 73 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 74 | EXPECT_TRUE(static_cast(modulus - 1) == 75 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 76 | 77 | a = static_cast(modulus - 1); 78 | b = static_cast(modulus - 2); 79 | EXPECT_TRUE(static_cast(2) == 80 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 81 | EXPECT_TRUE(static_cast(2) == 82 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 83 | 84 | a = static_cast(modulus - 2); 85 | b = static_cast(modulus - 3); 86 | EXPECT_TRUE(static_cast(6) == 87 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 88 | EXPECT_TRUE(static_cast(6) == 89 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 90 | 91 | T tmp = static_cast((modulus/4)*4); // make tmp == 4n for some integer n 92 | a = static_cast(tmp/2); 93 | EXPECT_TRUE(static_cast(0) == 94 | hc::modular_multiplication_prereduced_inputs(a, a, tmp)); 95 | 96 | tmp = static_cast((modulus/2)*2); 97 | a = static_cast(tmp/2); 98 | b = static_cast(6); 99 | EXPECT_TRUE(static_cast(0) == 100 | hc::modular_multiplication_prereduced_inputs(a, b, tmp)); 101 | EXPECT_TRUE(static_cast(0) == 102 | hc::modular_multiplication_prereduced_inputs(b, a, tmp)); 103 | 104 | b = static_cast(5); 105 | EXPECT_TRUE(a == hc::modular_multiplication_prereduced_inputs(a, b, tmp)); 106 | EXPECT_TRUE(a == hc::modular_multiplication_prereduced_inputs(b, a, tmp)); 107 | } 108 | 109 | 110 | template 111 | void test_modular_multiplication() 112 | { 113 | // test with a few basic examples first 114 | T modulus = 13; 115 | T a = 5; 116 | T b = 12; 117 | EXPECT_TRUE(static_cast(8) == 118 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 119 | EXPECT_TRUE(static_cast(8) == 120 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 121 | EXPECT_TRUE(static_cast(12) == 122 | hc::modular_multiplication_prereduced_inputs(a, a, modulus)); 123 | EXPECT_TRUE(static_cast(1) == 124 | hc::modular_multiplication_prereduced_inputs(b, b, modulus)); 125 | 126 | modulus = 14; 127 | a = 7; 128 | b = 8; 129 | EXPECT_TRUE(static_cast(0) == 130 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 131 | EXPECT_TRUE(static_cast(0) == 132 | hc::modular_multiplication_prereduced_inputs(b, a, modulus)); 133 | 134 | test_modulus(modulus); 135 | test_modulus(static_cast(15)); 136 | 137 | // --------- Test using moduli that are likely edge cases -------- 138 | 139 | modulus = 1; 140 | a = 0; b = 0; 141 | EXPECT_TRUE(static_cast(0) == 142 | hc::modular_multiplication_prereduced_inputs(a, b, modulus)); 143 | 144 | modulus = hc::ut_numeric_limits::max(); 145 | test_modulus(modulus); 146 | modulus--; 147 | test_modulus(modulus); 148 | 149 | modulus = hc::ut_numeric_limits::max() / 2; 150 | test_modulus(modulus); 151 | modulus++; 152 | test_modulus(modulus); 153 | } 154 | 155 | 156 | 157 | TEST(ModularArithmetic, modular_multiplication) { 158 | test_modular_multiplication(); 159 | test_modular_multiplication(); 160 | test_modular_multiplication(); 161 | test_modular_multiplication(); 162 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 163 | test_modular_multiplication<__uint128_t>(); 164 | #endif 165 | } 166 | 167 | 168 | } // end unnamed namespace 169 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Clockwork Modular Arithmetic library 2 | 3 | ![Alt text](images/clockxtrasmall_border2.jpg?raw=true "Clock Gears, photo by Krzysztof Golik, licensed CC BY-SA 4.0") 4 | 5 | Clockwork is a high performance, easy to use Modular Arithmetic library for C++ provided as a "header-only" library, supporting up to 128 bit integer types, and providing extensive support for Montgomery arithmetic. If you want or need Montgomery arithmetic in this range, or general modular arithmetic functions, Clockwork is almost certainly the fastest and easiest library you could use. 6 | 7 | The library requires only C++11, and works with all higher versions of the C++ standard. 8 | 9 | ## Design goals 10 | 11 | Clockwork is designed to be a flexible and bulletproof library with the best performance achievable for modular arithmetic using standard C++ language integer types (e.g. uint32_t or uint64_t) and the language extension types \_\_uint128_t and \_\_int128_t. Larger than 128 bit types are permissible by [specialization](https://github.com/hurchalla/util/blob/master/include/hurchalla/util/traits/ut_numeric_limits.h); however a library like [GMP](https://gmplib.org/) is likely to be a better choice for such sizes. 12 | 13 | ## Requirements 14 | 15 | The Clockwork library requires only compiler support for C++11, which is essentially supported universally at this point. 16 | 17 | Compilers that are confirmed to build this library without warnings or errors on Ubuntu linux (x64) include clang6, clang10, clang18, gcc7, gcc10, gcc13, and intel compiler 19. On Windows, Microsoft Visual C++ 2017, 2019, 2022 are all confirmed to build the library without warnings or errors. On MacOS, clang16 and gcc14 are confirmed to build without warnings or errors. The library is intended for use on all architectures (e.g. x86/64, ARM, RISC-V), but has so far been tested only with x86, x64 (Windows and Ubuntu), and ARM64 (MacOS). 18 | 19 | ## Status 20 | 21 | Released. All planned functionality and unit tests are finished and working correctly. 22 | 23 | ## Author 24 | 25 | * **Jeffrey Hurchalla** 26 | 27 | ## License 28 | 29 | This project is licensed under the MPL 2.0 License - see the [LICENSE.TXT](LICENSE.TXT) file for details 30 | 31 |
32 | 33 | ## How to use the library 34 | 35 | ### With CMake 36 | 37 | If you're using CMake for your project and you wish to add the Clockwork modular arithmetic library to it, then clone this git repository onto your system. In your project's CMakeLists.txt file, add the following two lines with appropriate changes to their italic portions to match your project and paths ( an easy replacement for *your_binary_dir* is ${CMAKE_CURRENT_BINARY_DIR} ): 38 | add_subdirectory(*path_of_the_cloned_modular_arithmetic_repository*   *your_binary_dir*/modular_arithmetic) 39 | target_link_libraries(*your_project_target_name*   hurchalla_modular_arithmetic) 40 | 41 | It may help to see a simple [example project with CMake](examples/example_with_cmake). 42 | 43 | ### Without CMake 44 | 45 | If you're not using CMake for your project, you'll need to install Clockwork's modular arithmetic headers and its dependencies to some directory in order to use them. To do this, first clone this git repository onto your system. You'll need to have CMake (at least temporarily) on your system, so install CMake if you don't have it. Then from your shell run the following commands: 46 | 47 | >cd *path_of_the_cloned_modular_arithmetic_repository* 48 | >mkdir tmp 49 | >cd tmp 50 | >cmake -S.. -B. 51 | >cmake --install . --prefix *the_folder_you_want_to_install_to* 52 | If you prefer, for the last command you could instead use CMake's default install location (on linux this is /usr/local) by omitting the --prefix and subsequent folder. 53 | 54 | This will copy all the files needed for this modular arithmetic library to an "include" subfolder in the installation folder of your choosing. 55 | When compiling your project, you'll of course need to ensure that you have that include subfolder as part of your include path. 56 | 57 | It may help to see a simple [example](examples/example_without_cmake). 58 | 59 | ## The API 60 | 61 | Clockwork modular arithmetic is a header-only library, and the API is exposed by very short and simple header files (all headers not under any *detail* folder). There are two main folder groupings: montgomery_arithmetic, and modular_arithmetic (i.e. standard non-montgomery). A quick summary of the header files and functions is provided below; in all cases T is a template parameter of integral type. Please view the header files for their documentation. Probably the single most useful file is MontgomeryForm.h, discussed below. 62 | 63 | From the modular_arithmetic group, the files *absolute_value_difference.h*, *modular_addition.h*, *modular_subtraction.h*, *modular_multiplication.h*, *modular_multiplicative_inverse.h*, and *modular_pow.h* provide the following functions, using standard (non-Montgomery) modular arithmetic: 64 | 65 | *hurchalla::absolute_value_difference(T a, T b)*. Returns the absolute value of (a-b), performed as if a and b are infinite precision signed ints. 66 | *hurchalla::modular_subtraction_prereduced_inputs(T a, T b, T modulus)*. Let a conceptual "%%" operator represent a modulo operator that always returns a non-negative remainder. This function returns (a-b) %% modulus, performed as if a and b are infinite precision signed ints. 67 | *hurchalla::modular_addition_prereduced_inputs(T a, T b, T modulus)*. Returns (a+b)%modulus, performed as if a and b have infinite precision and thus as if (a+b) is never subject to integer overflow. 68 | *hurchalla::modular_multiplication_prereduced_inputs(T a, T b, T modulus)*. Returns (a\*b)%modulus, performed as if a and b have infinite precision. 69 | *hurchalla::modular_multiplicative_inverse(T a, T modulus)*. Returns the multiplicative inverse of a if it exists, and otherwise returns 0. 70 | *hurchalla::modular_pow(T base, T exponent, T modulus)*. Returns the modular exponentiation of base to the exponent (mod modulus). 71 | 72 | From the montgomery_arithmetic group, the file *MontgomeryForm.h* provides the easy to use (and zero cost abstraction) class *hurchalla::MontgomeryForm*, which has simple member functions for performing operations in the Montgomery domain. These operations include converting to/from Montgomery domain, add, subtract, multiply, square, [fused-multiply-add/sub](https://jeffhurchalla.com/2022/05/01/the-montgomery-multiply-accumulate), pow, gcd, and more. For improved performance, if you can guarantee your modulus will be under half or under a quarter of the maximum value of your integer type T, the file *montgomery_form_aliases.h* provides aliases of the class MontgomeryForm which typically run ~5-10% faster. 73 | 74 | For an easy demonstration of MontgomeryForm, you can see one of the [examples](examples/example_without_cmake). 75 | 76 | If you prefer not to use the high level interface of MontgomeryForm, and instead wish to directly call low level Montgomery arithmetic functions (such as REDC), the API header files within montgomery_arithmetic/low_level_api provide the essential low level functions. 77 | 78 | ## Performance Notes 79 | 80 | If you're interested in experimenting, defining certain macros when compiling might improve performance - see [macros_for_performance.md](macros_for_performance.md). 81 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/AbstractMontgomeryWrapper.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | #ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_ABSTRACT_MONTGOMERY_WRAPPER_H_INCLUDED 9 | #define HURCHALLA_MONTGOMERY_ARITHMETIC_ABSTRACT_MONTGOMERY_WRAPPER_H_INCLUDED 10 | 11 | 12 | //#include "AbstractMontgomeryForm.h" 13 | #include 14 | 15 | namespace hurchalla { 16 | 17 | // AMF should be AbstractMontgomeryForm or AbstractMontgomeryForm 18 | template 19 | class AbstractMontgomeryWrapper final { 20 | std::unique_ptr pimpl; 21 | public: 22 | using IntegerType = typename AMF::IntegerType; 23 | using MontgomeryValue = typename AMF::MontgomeryValue; 24 | using CanonicalValue = typename AMF::CanonicalValue; 25 | using FusingValue = typename AMF::FusingValue; 26 | // using RU = typename AMF::RU; 27 | 28 | explicit AbstractMontgomeryWrapper(std::unique_ptr pimpl_) 29 | : pimpl(std::move(pimpl_)) {} 30 | 31 | IntegerType max_modulus() const { return pimpl->max_modulus(); } 32 | IntegerType getModulus() const { return pimpl->getModulus(); } 33 | 34 | template 35 | MontgomeryValue convertIn(IntegerType a) const 36 | { return pimpl->template convertIn(a); } 37 | 38 | template 39 | IntegerType convertOut(MontgomeryValue x) const 40 | { return pimpl->template convertOut(x); } 41 | 42 | CanonicalValue getCanonicalValue(MontgomeryValue x) const 43 | { return pimpl->getCanonicalValue(x); } 44 | FusingValue getFusingValue(MontgomeryValue x) const 45 | { return pimpl->getFusingValue(x); } 46 | CanonicalValue getUnityValue() const 47 | { return pimpl->getUnityValue(); } 48 | CanonicalValue getZeroValue() const 49 | { return pimpl->getZeroValue(); } 50 | CanonicalValue getNegativeOneValue() const 51 | { return pimpl->getNegativeOneValue(); } 52 | MontgomeryValue add(MontgomeryValue x, MontgomeryValue y) const 53 | { return pimpl->add(x, y); } 54 | MontgomeryValue add(MontgomeryValue x, CanonicalValue y) const 55 | { return pimpl->add(x, y); } 56 | MontgomeryValue add(CanonicalValue x, MontgomeryValue y) const 57 | { return pimpl->add(x, y); } 58 | CanonicalValue add(CanonicalValue x, CanonicalValue y) const 59 | { return pimpl->add(x, y); } 60 | 61 | template 62 | MontgomeryValue subtract(MontgomeryValue x, MontgomeryValue y) const 63 | { return pimpl->template subtract(x, y); } 64 | template 65 | MontgomeryValue subtract(MontgomeryValue x, CanonicalValue y) const 66 | { return pimpl->template subtract(x, y); } 67 | template 68 | MontgomeryValue subtract(CanonicalValue x, MontgomeryValue y) const 69 | { return pimpl->template subtract(x, y); } 70 | template 71 | CanonicalValue subtract(CanonicalValue x, CanonicalValue y) const 72 | { return pimpl->template subtract(x, y); } 73 | 74 | MontgomeryValue unorderedSubtract(MontgomeryValue x, MontgomeryValue y) const 75 | { return pimpl->unorderedSubtract(x, y); } 76 | MontgomeryValue unorderedSubtract(MontgomeryValue x, CanonicalValue y) const 77 | { return pimpl->unorderedSubtract(x, y); } 78 | MontgomeryValue unorderedSubtract(CanonicalValue x, MontgomeryValue y) const 79 | { return pimpl->unorderedSubtract(x, y); } 80 | MontgomeryValue negate(MontgomeryValue x) const 81 | { return pimpl->negate(x); } 82 | CanonicalValue negate(CanonicalValue x) const 83 | { return pimpl->negate(x); } 84 | 85 | MontgomeryValue two_times(MontgomeryValue x) const 86 | { return pimpl->two_times(x); } 87 | CanonicalValue two_times(CanonicalValue x) const 88 | { return pimpl->two_times(x); } 89 | 90 | MontgomeryValue halve(MontgomeryValue x) const 91 | { return pimpl->halve(x); } 92 | CanonicalValue halve(CanonicalValue x) const 93 | { return pimpl->halve(x); } 94 | 95 | template 96 | MontgomeryValue multiply(MontgomeryValue x, MontgomeryValue y) const 97 | { return pimpl->template multiply(x, y); } 98 | 99 | template 100 | MontgomeryValue multiply(MontgomeryValue x, MontgomeryValue y, bool& resultIsZero) const 101 | { return pimpl->template multiply(x, y, resultIsZero); } 102 | 103 | template 104 | MontgomeryValue fmsub(MontgomeryValue x, MontgomeryValue y, CanonicalValue z) const 105 | { return pimpl->template fmsub(x, y, z); } 106 | 107 | template 108 | MontgomeryValue fmsub(MontgomeryValue x, MontgomeryValue y, FusingValue z) const 109 | { return pimpl->template fmsub(x, y, z); } 110 | 111 | template 112 | MontgomeryValue fmadd(MontgomeryValue x, MontgomeryValue y, CanonicalValue z) const 113 | { return pimpl->template fmadd(x, y, z); } 114 | 115 | template 116 | MontgomeryValue fmadd(MontgomeryValue x, MontgomeryValue y, FusingValue z) const 117 | { return pimpl->template fmadd(x, y, z); } 118 | 119 | template 120 | MontgomeryValue square(MontgomeryValue x) const 121 | { return pimpl->template square(x); } 122 | 123 | template 124 | MontgomeryValue fusedSquareSub(MontgomeryValue x, CanonicalValue cv) const 125 | { return pimpl->template fusedSquareSub(x, cv); } 126 | 127 | template 128 | MontgomeryValue fusedSquareAdd(MontgomeryValue x, CanonicalValue cv) const 129 | { return pimpl->template fusedSquareAdd(x, cv); } 130 | 131 | template 132 | CanonicalValue inverse(MontgomeryValue x) const 133 | { return pimpl->template inverse(x); } 134 | 135 | MontgomeryValue pow(MontgomeryValue base, IntegerType exponent) const 136 | { return pimpl->pow(base, exponent); } 137 | 138 | MontgomeryValue two_pow(IntegerType exponent) const 139 | { return pimpl->two_pow(exponent); } 140 | 141 | template 142 | std::array 143 | pow(const std::array& bases, IntegerType exponent) const 144 | { return pimpl->pow(bases, exponent); } 145 | 146 | template 147 | IntegerType gcd_with_modulus(MontgomeryValue x, const F& gcd_functor) const 148 | { return pimpl->gcd_with_modulus(x, gcd_functor); } 149 | 150 | template 151 | IntegerType remainder(IntegerType a) const 152 | { return pimpl->template remainder(a); } 153 | }; 154 | 155 | 156 | } // end namespace 157 | 158 | #endif 159 | -------------------------------------------------------------------------------- /test/modular_arithmetic/test_modular_pow.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | 9 | // We'll undefine HURCHALLA_DISALLOW_INLINE_ASM_MODMUL here in order to make 10 | // modular multiplication use an inline asm function version if it is available. 11 | // This shouldn't be strictly necessary, since there's no reason this macro 12 | // would be defined at this point, and by default modular multiplication uses 13 | // inline asm (if available) unless this macro is defined. 14 | // Internally, the inline asm function will also call the generic template 15 | // function version of modular multiplication inside a postcondition, in order 16 | // to make sure that the asm result is correct. Of course postcondition checks 17 | // must be enabled for this check to occur - the easiest way to ensure 18 | // postconditions are enabled is to define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS, 19 | // which is why we do so here. This is all strictly for testing purposes. 20 | #undef HURCHALLA_DISALLOW_INLINE_ASM_MODMUL 21 | 22 | #ifndef HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 23 | # define HURCHALLA_CLOCKWORK_ENABLE_ASSERTS 24 | #endif 25 | 26 | 27 | #include "hurchalla/modular_arithmetic/modular_pow.h" 28 | #include "hurchalla/modular_arithmetic/modular_multiplication.h" 29 | #include "hurchalla/util/traits/ut_numeric_limits.h" 30 | #include "hurchalla/util/compiler_macros.h" 31 | #include "gtest/gtest.h" 32 | #include 33 | 34 | namespace { 35 | 36 | 37 | namespace hc = ::hurchalla; 38 | 39 | template 40 | T brute_modular_pow(T base, T power, T modulus) 41 | { 42 | T result = 1; 43 | for (T i=0; i 51 | void test_modulus(T modulus) 52 | { 53 | static_cast(modulus); 54 | 55 | T base = 0; 56 | T power = 0; 57 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 58 | base = 0; power = 1; 59 | EXPECT_TRUE(static_cast(0) == hc::modular_pow(base, power, modulus)); 60 | base = 0; power = 2; 61 | EXPECT_TRUE(static_cast(0) == hc::modular_pow(base, power, modulus)); 62 | base = 1; power = 0; 63 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 64 | base = 1; power = 1; 65 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 66 | base = 1; power = 2; 67 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 68 | 69 | base = static_cast(modulus - 1); 70 | power = 0; 71 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 72 | power = 1; 73 | EXPECT_TRUE(base == hc::modular_pow(base, power, modulus)); 74 | power = 2; 75 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 76 | power = 3; 77 | EXPECT_TRUE(base == hc::modular_pow(base, power, modulus)); 78 | 79 | T tmax = hc::ut_numeric_limits::max(); 80 | // make power the largest possible even number 81 | power = static_cast((tmax/2)*2); 82 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 83 | --power; // power should now be odd 84 | EXPECT_TRUE(base == hc::modular_pow(base, power, modulus)); 85 | 86 | base = modulus; 87 | power = 2; 88 | EXPECT_TRUE(static_cast(0) == hc::modular_pow(base, power, modulus)); 89 | power = 5; 90 | EXPECT_TRUE(static_cast(0) == hc::modular_pow(base, power, modulus)); 91 | 92 | if (modulus < tmax) { 93 | base = static_cast(modulus + 1); 94 | power = 2; 95 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 96 | power = 5; 97 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 98 | } 99 | 100 | T tmp = static_cast((modulus/4)*4); // make tmp == 4n for some integer n 101 | base = static_cast(tmp/2); 102 | power = 2; 103 | EXPECT_TRUE(static_cast(0) == hc::modular_pow(base, power, tmp)); 104 | } 105 | 106 | 107 | template 108 | void test_modular_pow() 109 | { 110 | // test with a few basic examples first 111 | T modulus = 13; 112 | T base = 5; 113 | T power = 12; 114 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 115 | base = 7; power = 6; 116 | EXPECT_TRUE(static_cast(12) == hc::modular_pow(base, power, modulus)); 117 | modulus = 14; 118 | EXPECT_TRUE(static_cast(7) == hc::modular_pow(base, power, modulus)); 119 | 120 | base = 5; 121 | power = 53; 122 | modulus = 13; 123 | EXPECT_TRUE(static_cast(5) == hc::modular_pow(base, power, modulus)); 124 | base = 6; 125 | EXPECT_TRUE(static_cast(2) == hc::modular_pow(base, power, modulus)); 126 | 127 | test_modulus(static_cast(13)); 128 | test_modulus(static_cast(14)); 129 | 130 | // --------- Test using moduli that are likely edge cases -------- 131 | 132 | modulus = 2; 133 | base = 0; power = 0; 134 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 135 | base = 0; power = 5; 136 | EXPECT_TRUE(static_cast(0) == hc::modular_pow(base, power, modulus)); 137 | base = 1; power = 0; 138 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 139 | base = 31; power = 0; 140 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 141 | base = 1; power = 3; 142 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 143 | base = 17; power = 3; 144 | EXPECT_TRUE(static_cast(1) == hc::modular_pow(base, power, modulus)); 145 | base = 14; power = 3; 146 | EXPECT_TRUE(static_cast(0) == hc::modular_pow(base, power, modulus)); 147 | 148 | modulus = hc::ut_numeric_limits::max(); 149 | test_modulus(modulus); 150 | modulus--; 151 | test_modulus(modulus); 152 | 153 | modulus = hc::ut_numeric_limits::max() / 2; 154 | test_modulus(modulus); 155 | modulus++; 156 | test_modulus(modulus); 157 | } 158 | 159 | 160 | 161 | TEST(ModularArithmetic, modular_pow) { 162 | test_modular_pow(); 163 | test_modular_pow(); 164 | test_modular_pow(); 165 | test_modular_pow(); 166 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 167 | test_modular_pow<__uint128_t>(); 168 | #endif 169 | } 170 | 171 | TEST(ModularArithmetic, modular_pow_large_exponents) { 172 | // test a couple large exponent cases 173 | std::uint32_t base = 81452; 174 | std::uint32_t exponent = 113; 175 | std::uint32_t modulus = 2951486173u; 176 | std::uint32_t result = hc::modular_pow(base, exponent, modulus); 177 | EXPECT_TRUE(result == brute_modular_pow(base, exponent, modulus)); 178 | 179 | base = 81451; 180 | exponent = 113; 181 | result = hc::modular_pow(base, exponent, modulus); 182 | EXPECT_TRUE(result == brute_modular_pow(base, exponent, modulus)); 183 | 184 | exponent = 114; 185 | result = hc::modular_pow(base, exponent, modulus); 186 | EXPECT_TRUE(result == brute_modular_pow(base, exponent, modulus)); 187 | } 188 | 189 | 190 | } // end unnamed namespace 191 | -------------------------------------------------------------------------------- /test/montgomery_arithmetic/low_level_api/test_get_Rsquared_mod_n.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022 Jeffrey Hurchalla. 2 | /* 3 | * This Source Code Form is subject to the terms of the Mozilla Public 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 | */ 7 | 8 | // lets us get inside the black box of get_Rsquared_mod_n() to ensure that 9 | // we test the complex compiled possibility rather than the trivial one. 10 | #define HURCHALLA_TESTING_RSQUARED_MOD_N 1 11 | 12 | #include "hurchalla/montgomery_arithmetic/low_level_api/get_Rsquared_mod_n.h" 13 | #include "hurchalla/montgomery_arithmetic/low_level_api/inverse_mod_R.h" 14 | #include "hurchalla/montgomery_arithmetic/low_level_api/get_R_mod_n.h" 15 | #include "hurchalla/util/traits/safely_promote_unsigned.h" 16 | #include "hurchalla/util/traits/ut_numeric_limits.h" 17 | #include "gtest/gtest.h" 18 | #include 19 | #include 20 | 21 | namespace { 22 | 23 | 24 | using namespace ::hurchalla; 25 | 26 | 27 | template 28 | void test_single_R2(T n) 29 | { 30 | static_assert(ut_numeric_limits::is_integer, ""); 31 | static_assert(!ut_numeric_limits::is_signed, ""); 32 | constexpr int digitsR = ut_numeric_limits::digits; 33 | constexpr T Rdiv4 = static_cast(1) << (digitsR - 2); 34 | 35 | T rmodn = get_R_mod_n(n); 36 | T inv = inverse_mod_R(n); 37 | using P = typename safely_promote_unsigned::type; 38 | T one = static_cast(1); 39 | // the next line tests inverse_mod_R - we might as well test it while here. 40 | EXPECT_TRUE(static_cast(static_cast

(inv) * static_cast

(n)) == one); 41 | T answer = modular_multiplication_prereduced_inputs(rmodn, rmodn, n); 42 | 43 | if (n < Rdiv4) { 44 | T r2modn_1 = get_Rsquared_mod_n(n, inv, rmodn); 45 | T r2modn_2 = get_Rsquared_mod_n(n, inv, rmodn); 46 | EXPECT_TRUE(r2modn_1 == answer); 47 | EXPECT_TRUE(r2modn_2 == answer); 48 | } 49 | // test version that works for all n 50 | { 51 | T r2modn_1 = get_Rsquared_mod_n(n, inv, rmodn); 52 | T r2modn_2 = get_Rsquared_mod_n(n, inv, rmodn); 53 | EXPECT_TRUE(r2modn_1 == answer); 54 | EXPECT_TRUE(r2modn_2 == answer); 55 | } 56 | } 57 | 58 | 59 | template 60 | void test_single_R2_array(T n) 61 | { 62 | static_assert(ut_numeric_limits::is_integer, ""); 63 | static_assert(!ut_numeric_limits::is_signed, ""); 64 | constexpr int digitsR = ut_numeric_limits::digits; 65 | constexpr T Rdiv4 = static_cast(1) << (digitsR - 2); 66 | 67 | std::array a_n; 68 | std::array a_rmn; 69 | std::array a_invn; 70 | std::array answer; 71 | for (std::size_t i=0; i(n - 2*i); 76 | a_rmn[i] = get_R_mod_n(a_n[i]); 77 | a_invn[i] = inverse_mod_R(a_n[i]); 78 | answer[i] = modular_multiplication_prereduced_inputs( 79 | a_rmn[i], a_rmn[i], a_n[i]); 80 | // we might as well test inverse_mod_R while here. 81 | using P = typename safely_promote_unsigned::type; 82 | T one = static_cast(1); 83 | EXPECT_TRUE(static_cast( 84 | static_cast

(a_invn[i]) * static_cast

(a_n[i])) == one); 85 | } 86 | 87 | // since we subtracted from n to set a_n, (a_n[0] < Rdiv4) covers all a_n[i] 88 | if (a_n[0] < Rdiv4) { 89 | auto r2mn1 = get_Rsquared_mod_n 90 | (a_n, a_invn, a_rmn); 91 | auto r2mn2 = get_Rsquared_mod_n 92 | (a_n, a_invn, a_rmn); 93 | EXPECT_TRUE(r2mn1 == answer); 94 | EXPECT_TRUE(r2mn2 == answer); 95 | } 96 | // test version that works for any size a_n[i] 97 | { 98 | auto r2mn1 = get_Rsquared_mod_n 99 | (a_n, a_invn, a_rmn); 100 | auto r2mn2 = get_Rsquared_mod_n 101 | (a_n, a_invn, a_rmn); 102 | EXPECT_TRUE(r2mn1 == answer); 103 | EXPECT_TRUE(r2mn2 == answer); 104 | } 105 | } 106 | 107 | 108 | 109 | template 110 | void test_R2_exhaustive() 111 | { 112 | T max = ut_numeric_limits::max(); 113 | EXPECT_TRUE(max > 0); 114 | T evenmax = static_cast((max/2)*2); 115 | T oddmax = (evenmax != max) ? max : static_cast(max - 1); 116 | // get_Rsquared_mod_n's preconditions require input n is odd and > 1. 117 | for (T n=oddmax; n>1; n=static_cast(n-2)) { 118 | test_single_R2(n); 119 | test_single_R2_array<3>(n); // array size of 3 is an arbitrary size 120 | } 121 | } 122 | 123 | 124 | template 125 | void test_R2() 126 | { 127 | T max = ut_numeric_limits::max(); 128 | EXPECT_TRUE(max > 0); 129 | T evenmax = static_cast((max/2)*2); 130 | T oddmax = (evenmax != max) ? max : static_cast(max - 1); 131 | T oddquartermax = static_cast((max/8)*2 + 1); 132 | 133 | // get_Rsquared_mod_n's preconditions require input n is odd and > 1. 134 | 135 | T n = 3; 136 | test_single_R2(n); 137 | test_single_R2_array<1>(n); 138 | test_single_R2_array<2>(n); 139 | test_single_R2_array<5>(n); 140 | 141 | n = 9; 142 | test_single_R2(n); 143 | test_single_R2_array<1>(n); 144 | test_single_R2_array<2>(n); 145 | test_single_R2_array<5>(n); 146 | 147 | n = 11; 148 | test_single_R2(n); 149 | test_single_R2_array<1>(n); 150 | test_single_R2_array<2>(n); 151 | test_single_R2_array<5>(n); 152 | 153 | n = 21; 154 | test_single_R2(n); 155 | test_single_R2_array<1>(n); 156 | test_single_R2_array<2>(n); 157 | test_single_R2_array<5>(n); 158 | 159 | n = oddmax; 160 | test_single_R2(n); 161 | test_single_R2_array<1>(n); 162 | test_single_R2_array<2>(n); 163 | test_single_R2_array<5>(n); 164 | 165 | n = static_cast(oddmax - 2); 166 | test_single_R2(n); 167 | test_single_R2_array<1>(n); 168 | test_single_R2_array<2>(n); 169 | test_single_R2_array<5>(n); 170 | 171 | n = static_cast(oddmax - 6); 172 | test_single_R2(n); 173 | test_single_R2_array<1>(n); 174 | test_single_R2_array<2>(n); 175 | test_single_R2_array<5>(n); 176 | 177 | n = oddquartermax; 178 | test_single_R2(n); 179 | test_single_R2_array<1>(n); 180 | test_single_R2_array<2>(n); 181 | test_single_R2_array<5>(n); 182 | 183 | n = static_cast(oddquartermax + 2); 184 | test_single_R2(n); 185 | test_single_R2_array<1>(n); 186 | test_single_R2_array<2>(n); 187 | test_single_R2_array<5>(n); 188 | 189 | n = static_cast(oddquartermax - 2); 190 | test_single_R2(n); 191 | test_single_R2_array<1>(n); 192 | test_single_R2_array<2>(n); 193 | test_single_R2_array<5>(n); 194 | } 195 | 196 | 197 | 198 | TEST(MontgomeryArithmetic, get_Rsquared_mod_N) { 199 | test_R2(); 200 | test_R2(); 201 | test_R2(); 202 | test_R2(); 203 | #if HURCHALLA_COMPILER_HAS_UINT128_T() 204 | test_R2<__uint128_t>(); 205 | #endif 206 | 207 | test_R2_exhaustive(); 208 | test_R2_exhaustive(); 209 | } 210 | 211 | 212 | } // end unnamed namespace 213 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/timings_ARM64_M2/64_half_gcc_noasm_array.txt: -------------------------------------------------------------------------------- 1 | 2 | ./testbench_2kary.sh g++ O3 MontgomeryHalf uint64_t 191 8 22 -DTEST_ARRAY 3 | 4 | compilation finished, now executing: 5 | ---Running Program--- 6 | 7 | begin benchmarks - array pow 8 | (ignore)17513345000201297728 9 | 10 | OVERALL BEST: 11 | 0.1842 3 00 x 08 12 | 0.1859 3 01 x 08 13 | 0.1863 3 01 x 12 14 | 0.1874 3 01 x 07 15 | 0.1876 3 01 x 10 16 | 0.1877 3 00 x 07 17 | 0.1878 3 00 x 10 18 | 0.1881 3 00 x 12 19 | 0.1887 3 01 x 06 20 | 0.1897 4 00 x 08 21 | 0.1898 3 00 x 06 22 | 0.1901 4 01 x 08 23 | 0.1932 4 01 x 12 24 | 0.1941 4 00 x 07 25 | 0.1946 4 01 x 10 26 | 0.1948 4 00 x 12 27 | 0.1950 4 01 x 07 28 | 0.1954 4 00 x 06 29 | 0.1955 3 00 x 05 30 | 0.1955 4 00 x 10 31 | 0.1968 4 01 x 06 32 | 0.1982 3 01 x 05 33 | 0.2006 2 01 x 07 34 | 0.2016 2 00 x 08 35 | 0.2018 2 00 x 07 36 | 0.2030 4 00 x 05 37 | 0.2032 2 01 x 08 38 | 0.2044 2 00 x 12 39 | 0.2055 2 00 x 06 40 | 0.2060 4 01 x 05 41 | 0.2061 2 00 x 10 42 | 0.2065 2 01 x 10 43 | 0.2071 2 01 x 12 44 | 0.2084 2 01 x 06 45 | 0.2142 2 00 x 05 46 | 0.2148 3 00 x 04 47 | 0.2150 3 01 x 04 48 | 0.2154 5 00 x 08 49 | 0.2171 2 01 x 05 50 | 0.2179 5 01 x 08 51 | 0.2193 4 00 x 04 52 | 0.2206 4 01 x 04 53 | 0.2283 5 00 x 07 54 | 0.2284 5 00 x 12 55 | 0.2287 5 00 x 10 56 | 0.2292 5 01 x 12 57 | 0.2300 5 01 x 07 58 | 0.2305 5 01 x 06 59 | 0.2308 5 01 x 10 60 | 0.2325 5 00 x 06 61 | 0.2369 5 00 x 05 62 | 0.2391 5 01 x 05 63 | 0.2443 2 01 x 04 64 | 0.2463 2 00 x 04 65 | 0.2481 5 00 x 04 66 | 0.2502 5 01 x 04 67 | 0.2660 3 00 x 03 68 | 0.2679 3 01 x 03 69 | 0.2707 4 00 x 03 70 | 0.2727 4 01 x 03 71 | 0.2955 2 01 x 03 72 | 0.2961 2 00 x 03 73 | 0.3002 5 00 x 03 74 | 0.3020 5 01 x 03 75 | 0.3633 4 01 x 02 76 | 0.3634 4 00 x 02 77 | 0.3687 3 00 x 02 78 | 0.3688 3 01 x 02 79 | 0.3803 5 01 x 02 80 | 0.3804 5 00 x 02 81 | 0.4116 2 01 x 02 82 | 0.4118 2 00 x 02 83 | Timings By Test Type: 84 | 0.0549 3 00 x 08 0.0549 3 00 x 08 0.0372 3 00 x 08 0.0372 3 00 x 08 85 | 0.0554 3 01 x 08 0.0553 3 01 x 08 0.0376 3 01 x 12 0.0375 3 01 x 08 86 | 0.0556 4 00 x 08 0.0554 3 01 x 12 0.0376 3 01 x 08 0.0376 3 01 x 12 87 | 0.0558 3 01 x 12 0.0555 3 01 x 10 0.0377 3 00 x 12 0.0377 3 01 x 10 88 | 0.0558 4 01 x 08 0.0555 4 00 x 08 0.0377 3 01 x 10 0.0378 3 01 x 07 89 | 0.0559 3 01 x 07 0.0556 3 00 x 12 0.0379 3 00 x 07 0.0378 3 00 x 12 90 | 0.0560 3 00 x 07 0.0557 3 00 x 10 0.0379 3 01 x 07 0.0378 3 00 x 07 91 | 0.0563 3 01 x 06 0.0557 4 01 x 08 0.0379 3 00 x 10 0.0379 3 00 x 10 92 | 0.0564 3 00 x 10 0.0558 3 01 x 07 0.0381 3 01 x 06 0.0381 3 01 x 06 93 | 0.0566 3 00 x 06 0.0559 3 00 x 07 0.0383 3 00 x 06 0.0383 3 00 x 06 94 | 0.0567 3 01 x 10 0.0561 4 01 x 12 0.0392 4 00 x 08 0.0393 3 00 x 05 95 | 0.0568 4 00 x 07 0.0562 3 01 x 06 0.0392 4 01 x 08 0.0393 4 00 x 08 96 | 0.0569 3 00 x 12 0.0565 4 01 x 10 0.0393 3 00 x 05 0.0393 4 01 x 08 97 | 0.0570 4 01 x 07 0.0566 4 00 x 12 0.0398 4 01 x 12 0.0399 3 01 x 05 98 | 0.0571 4 00 x 06 0.0566 3 00 x 06 0.0399 3 01 x 05 0.0399 4 01 x 12 99 | 0.0574 4 01 x 12 0.0567 4 00 x 10 0.0400 2 01 x 07 0.0400 2 01 x 07 100 | 0.0575 4 01 x 06 0.0567 4 00 x 07 0.0402 2 00 x 07 0.0401 4 00 x 12 101 | 0.0576 4 01 x 10 0.0571 4 01 x 07 0.0402 2 00 x 08 0.0402 2 00 x 07 102 | 0.0577 4 00 x 10 0.0571 4 00 x 06 0.0402 4 00 x 12 0.0403 2 00 x 08 103 | 0.0578 4 00 x 12 0.0575 4 01 x 06 0.0403 4 01 x 10 0.0403 4 00 x 07 104 | 0.0584 3 00 x 05 0.0585 3 00 x 05 0.0404 2 00 x 12 0.0403 4 01 x 10 105 | 0.0592 3 01 x 05 0.0592 3 01 x 05 0.0404 4 00 x 07 0.0403 2 01 x 08 106 | 0.0596 4 00 x 05 0.0596 4 00 x 05 0.0404 4 00 x 10 0.0404 4 01 x 07 107 | 0.0603 2 01 x 07 0.0603 2 01 x 07 0.0405 4 01 x 07 0.0405 4 00 x 06 108 | 0.0604 4 01 x 05 0.0605 4 01 x 05 0.0405 4 00 x 06 0.0406 2 00 x 12 109 | 0.0605 2 00 x 08 0.0606 2 00 x 08 0.0405 2 01 x 08 0.0407 4 00 x 10 110 | 0.0608 2 00 x 07 0.0607 2 00 x 07 0.0408 2 00 x 10 0.0408 2 01 x 10 111 | 0.0615 2 01 x 08 0.0608 2 01 x 08 0.0408 4 01 x 06 0.0408 4 01 x 06 112 | 0.0616 5 00 x 08 0.0610 2 00 x 12 0.0408 2 01 x 10 0.0409 2 00 x 06 113 | 0.0618 2 00 x 06 0.0617 5 00 x 08 0.0409 2 00 x 06 0.0410 2 01 x 12 114 | 0.0624 5 01 x 08 0.0617 2 01 x 10 0.0411 2 01 x 12 0.0410 2 00 x 10 115 | 0.0624 2 00 x 12 0.0617 2 00 x 10 0.0415 2 01 x 06 0.0415 2 01 x 06 116 | 0.0626 2 00 x 10 0.0618 2 00 x 06 0.0419 4 00 x 05 0.0419 4 00 x 05 117 | 0.0627 2 01 x 06 0.0622 2 01 x 12 0.0426 4 01 x 05 0.0426 4 01 x 05 118 | 0.0629 2 01 x 12 0.0622 5 01 x 08 0.0427 2 00 x 05 0.0427 2 00 x 05 119 | 0.0632 2 01 x 10 0.0627 2 01 x 06 0.0428 3 00 x 04 0.0428 3 00 x 04 120 | 0.0644 2 00 x 05 0.0644 2 00 x 05 0.0430 3 01 x 04 0.0430 3 01 x 04 121 | 0.0645 3 01 x 04 0.0644 5 00 x 10 0.0433 2 01 x 05 0.0433 2 01 x 05 122 | 0.0646 3 00 x 04 0.0645 5 00 x 12 0.0448 4 00 x 04 0.0447 4 00 x 04 123 | 0.0649 4 00 x 04 0.0645 3 01 x 04 0.0451 4 01 x 04 0.0451 4 01 x 04 124 | 0.0650 5 00 x 07 0.0646 3 00 x 04 0.0461 5 00 x 08 0.0461 5 00 x 08 125 | 0.0652 4 01 x 04 0.0646 5 01 x 12 0.0466 5 01 x 08 0.0467 5 01 x 08 126 | 0.0653 2 01 x 05 0.0649 4 00 x 04 0.0484 2 01 x 04 0.0484 2 01 x 04 127 | 0.0655 5 01 x 06 0.0649 5 00 x 07 0.0487 5 00 x 12 0.0487 2 00 x 04 128 | 0.0655 5 01 x 07 0.0652 4 01 x 04 0.0487 2 00 x 04 0.0489 5 00 x 12 129 | 0.0663 5 00 x 10 0.0653 2 01 x 05 0.0490 5 01 x 12 0.0490 5 00 x 10 130 | 0.0663 5 00 x 12 0.0653 5 01 x 07 0.0490 5 00 x 10 0.0491 5 01 x 12 131 | 0.0663 5 00 x 06 0.0654 5 01 x 10 0.0492 5 00 x 07 0.0492 5 00 x 07 132 | 0.0665 5 01 x 12 0.0656 5 01 x 06 0.0493 5 01 x 10 0.0493 5 01 x 10 133 | 0.0669 5 01 x 10 0.0662 5 00 x 06 0.0496 5 01 x 07 0.0496 5 01 x 07 134 | 0.0677 5 00 x 05 0.0677 5 00 x 05 0.0497 5 01 x 06 0.0497 5 01 x 06 135 | 0.0683 5 01 x 05 0.0683 5 01 x 05 0.0500 5 00 x 06 0.0500 5 00 x 06 136 | 0.0719 5 00 x 04 0.0720 5 00 x 04 0.0508 5 00 x 05 0.0507 5 00 x 05 137 | 0.0725 5 01 x 04 0.0725 5 01 x 04 0.0513 5 01 x 05 0.0513 5 01 x 05 138 | 0.0738 2 01 x 04 0.0738 2 01 x 04 0.0521 5 00 x 04 0.0521 5 00 x 04 139 | 0.0744 2 00 x 04 0.0744 2 00 x 04 0.0525 3 00 x 03 0.0525 3 00 x 03 140 | 0.0805 4 00 x 03 0.0804 3 00 x 03 0.0526 5 01 x 04 0.0526 5 01 x 04 141 | 0.0805 3 00 x 03 0.0806 4 00 x 03 0.0531 3 01 x 03 0.0530 3 01 x 03 142 | 0.0809 3 01 x 03 0.0809 3 01 x 03 0.0548 4 00 x 03 0.0548 4 00 x 03 143 | 0.0811 4 01 x 03 0.0811 4 01 x 03 0.0553 4 01 x 03 0.0553 4 01 x 03 144 | 0.0874 5 00 x 03 0.0873 5 00 x 03 0.0582 2 01 x 03 0.0582 2 01 x 03 145 | 0.0878 5 01 x 03 0.0878 5 01 x 03 0.0583 2 00 x 03 0.0583 2 00 x 03 146 | 0.0896 2 01 x 03 0.0895 2 01 x 03 0.0627 5 00 x 03 0.0627 5 00 x 03 147 | 0.0898 2 00 x 03 0.0898 2 00 x 03 0.0632 5 01 x 03 0.0632 5 01 x 03 148 | 0.1098 4 01 x 02 0.1095 4 01 x 02 0.0720 3 00 x 02 0.0720 4 01 x 02 149 | 0.1099 4 00 x 02 0.1095 4 00 x 02 0.0720 4 00 x 02 0.0720 4 00 x 02 150 | 0.1124 3 01 x 02 0.1122 3 00 x 02 0.0720 4 01 x 02 0.0720 3 01 x 02 151 | 0.1125 3 00 x 02 0.1123 3 01 x 02 0.0720 3 01 x 02 0.0721 3 00 x 02 152 | 0.1134 5 00 x 02 0.1130 5 01 x 02 0.0769 5 01 x 02 0.0769 5 01 x 02 153 | 0.1135 5 01 x 02 0.1132 5 00 x 02 0.0769 5 00 x 02 0.0770 5 00 x 02 154 | 0.1260 2 01 x 02 0.1255 2 01 x 02 0.0800 2 00 x 02 0.0800 2 01 x 02 155 | 0.1260 2 00 x 02 0.1256 2 00 x 02 0.0800 2 01 x 02 0.0802 2 00 x 02 156 | ---Benchmark Program Finished--- 157 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/timings_ARM64_M2/64_half_clang_noasm_array.txt: -------------------------------------------------------------------------------- 1 | 2 | ./testbench_2kary.sh clang++ O3 MontgomeryHalf uint64_t 191 8 22 -DTEST_ARRAY 3 | 4 | compilation finished, now executing: 5 | ---Running Program--- 6 | 7 | begin benchmarks - array pow 8 | (ignore)17513345000201297728 9 | 10 | OVERALL BEST: 11 | 0.1760 3 00 x 08 12 | 0.1776 3 01 x 08 13 | 0.1822 4 00 x 08 14 | 0.1836 4 01 x 08 15 | 0.1894 3 00 x 12 16 | 0.1894 3 00 x 10 17 | 0.1896 2 00 x 08 18 | 0.1898 3 01 x 10 19 | 0.1902 2 01 x 08 20 | 0.1920 4 01 x 12 21 | 0.1925 4 00 x 10 22 | 0.1930 4 01 x 10 23 | 0.1931 3 01 x 12 24 | 0.1947 3 01 x 07 25 | 0.1949 4 00 x 12 26 | 0.1954 3 00 x 07 27 | 0.1958 4 01 x 07 28 | 0.1967 4 00 x 07 29 | 0.1995 3 01 x 06 30 | 0.1999 4 00 x 06 31 | 0.2000 4 01 x 06 32 | 0.2003 3 00 x 06 33 | 0.2053 3 01 x 05 34 | 0.2056 3 00 x 05 35 | 0.2075 4 01 x 05 36 | 0.2091 2 00 x 12 37 | 0.2092 4 00 x 05 38 | 0.2102 2 01 x 10 39 | 0.2103 5 00 x 08 40 | 0.2104 2 00 x 10 41 | 0.2105 5 01 x 08 42 | 0.2109 2 01 x 12 43 | 0.2169 2 01 x 07 44 | 0.2183 2 00 x 07 45 | 0.2184 5 01 x 12 46 | 0.2189 5 00 x 10 47 | 0.2195 5 01 x 10 48 | 0.2210 5 01 x 07 49 | 0.2211 5 00 x 12 50 | 0.2214 3 00 x 04 51 | 0.2215 5 00 x 07 52 | 0.2218 3 01 x 04 53 | 0.2247 5 00 x 06 54 | 0.2249 5 01 x 06 55 | 0.2257 2 01 x 06 56 | 0.2265 4 00 x 04 57 | 0.2267 4 01 x 04 58 | 0.2270 2 00 x 06 59 | 0.2332 5 01 x 05 60 | 0.2338 5 00 x 05 61 | 0.2371 2 00 x 05 62 | 0.2374 2 01 x 05 63 | 0.2431 2 00 x 04 64 | 0.2440 2 01 x 04 65 | 0.2513 5 00 x 04 66 | 0.2520 5 01 x 04 67 | 0.2767 4 00 x 03 68 | 0.2772 4 01 x 03 69 | 0.2795 3 00 x 03 70 | 0.2796 3 01 x 03 71 | 0.2988 5 01 x 03 72 | 0.2989 5 00 x 03 73 | 0.3030 2 00 x 03 74 | 0.3033 2 01 x 03 75 | 0.3617 4 00 x 02 76 | 0.3624 4 01 x 02 77 | 0.3708 3 00 x 02 78 | 0.3708 3 01 x 02 79 | 0.3835 5 01 x 02 80 | 0.3836 5 00 x 02 81 | 0.4153 2 00 x 02 82 | 0.4156 2 01 x 02 83 | Timings By Test Type: 84 | 0.0523 3 00 x 08 0.0523 3 00 x 08 0.0357 3 00 x 08 0.0357 3 00 x 08 85 | 0.0528 3 01 x 08 0.0527 3 01 x 08 0.0361 3 01 x 08 0.0361 3 01 x 08 86 | 0.0532 4 00 x 08 0.0532 4 00 x 08 0.0379 4 00 x 08 0.0379 4 00 x 08 87 | 0.0536 4 01 x 08 0.0536 4 01 x 08 0.0380 2 00 x 08 0.0380 2 00 x 08 88 | 0.0560 4 01 x 12 0.0560 4 01 x 12 0.0382 4 01 x 08 0.0381 4 01 x 08 89 | 0.0562 3 00 x 12 0.0562 3 00 x 12 0.0382 2 01 x 08 0.0381 2 01 x 08 90 | 0.0563 3 00 x 10 0.0562 3 00 x 10 0.0384 3 00 x 10 0.0384 3 00 x 10 91 | 0.0563 4 00 x 10 0.0563 4 00 x 10 0.0385 3 00 x 12 0.0385 3 00 x 12 92 | 0.0564 3 01 x 10 0.0563 3 01 x 10 0.0385 3 01 x 10 0.0385 3 01 x 10 93 | 0.0564 4 01 x 10 0.0564 4 01 x 10 0.0394 3 01 x 07 0.0394 3 01 x 07 94 | 0.0568 4 00 x 12 0.0568 4 00 x 12 0.0395 3 01 x 12 0.0395 3 01 x 12 95 | 0.0568 2 00 x 08 0.0568 2 00 x 08 0.0395 3 00 x 07 0.0395 3 00 x 07 96 | 0.0569 2 01 x 08 0.0570 2 01 x 08 0.0400 4 00 x 10 0.0400 4 00 x 10 97 | 0.0571 3 01 x 12 0.0571 3 01 x 12 0.0400 4 01 x 12 0.0400 4 01 x 12 98 | 0.0574 4 01 x 07 0.0574 4 01 x 07 0.0401 4 01 x 10 0.0401 4 01 x 10 99 | 0.0577 4 00 x 07 0.0577 4 00 x 07 0.0403 3 01 x 06 0.0403 3 01 x 06 100 | 0.0579 3 01 x 07 0.0579 3 01 x 07 0.0404 3 00 x 06 0.0404 3 00 x 06 101 | 0.0581 3 00 x 07 0.0583 3 00 x 07 0.0405 4 01 x 07 0.0405 4 01 x 07 102 | 0.0587 4 00 x 06 0.0587 4 00 x 06 0.0406 4 00 x 07 0.0407 4 00 x 07 103 | 0.0587 4 01 x 06 0.0587 4 01 x 06 0.0407 4 00 x 12 0.0407 4 00 x 12 104 | 0.0595 3 01 x 06 0.0595 3 01 x 06 0.0412 4 00 x 06 0.0413 4 00 x 06 105 | 0.0597 3 00 x 06 0.0597 3 00 x 06 0.0413 4 01 x 06 0.0413 4 01 x 06 106 | 0.0600 5 00 x 08 0.0600 5 00 x 08 0.0414 3 01 x 05 0.0414 3 01 x 05 107 | 0.0600 5 01 x 08 0.0600 5 01 x 08 0.0415 3 00 x 05 0.0415 3 00 x 05 108 | 0.0610 4 01 x 05 0.0610 4 01 x 05 0.0419 2 00 x 12 0.0419 2 00 x 12 109 | 0.0612 3 01 x 05 0.0612 3 01 x 05 0.0421 2 01 x 10 0.0421 2 01 x 10 110 | 0.0613 3 00 x 05 0.0613 3 00 x 05 0.0422 2 00 x 10 0.0421 2 00 x 10 111 | 0.0615 4 00 x 05 0.0615 4 00 x 05 0.0423 2 01 x 12 0.0423 2 01 x 12 112 | 0.0622 5 01 x 12 0.0622 5 01 x 12 0.0428 4 01 x 05 0.0427 4 01 x 05 113 | 0.0624 5 00 x 10 0.0625 5 00 x 10 0.0431 4 00 x 05 0.0431 4 00 x 05 114 | 0.0626 2 00 x 12 0.0626 2 00 x 12 0.0434 2 01 x 07 0.0434 2 01 x 07 115 | 0.0626 5 01 x 10 0.0627 5 01 x 10 0.0435 2 00 x 07 0.0436 2 00 x 07 116 | 0.0629 5 00 x 12 0.0629 5 00 x 12 0.0443 3 00 x 04 0.0442 3 00 x 04 117 | 0.0630 2 01 x 10 0.0630 2 01 x 10 0.0444 3 01 x 04 0.0444 3 01 x 04 118 | 0.0630 2 00 x 10 0.0630 2 00 x 10 0.0450 2 01 x 06 0.0449 2 01 x 06 119 | 0.0632 2 01 x 12 0.0631 2 01 x 12 0.0452 5 00 x 08 0.0452 5 00 x 08 120 | 0.0632 5 01 x 07 0.0632 5 01 x 07 0.0453 2 00 x 06 0.0452 5 01 x 08 121 | 0.0634 5 00 x 07 0.0634 5 00 x 07 0.0453 5 01 x 08 0.0452 2 00 x 06 122 | 0.0644 5 00 x 06 0.0644 5 00 x 06 0.0463 4 00 x 04 0.0463 4 00 x 04 123 | 0.0645 5 01 x 06 0.0645 5 01 x 06 0.0463 4 01 x 04 0.0463 4 01 x 04 124 | 0.0651 2 01 x 07 0.0651 2 01 x 07 0.0470 5 00 x 10 0.0469 5 00 x 10 125 | 0.0656 2 00 x 07 0.0656 2 00 x 07 0.0470 5 01 x 12 0.0470 5 01 x 12 126 | 0.0665 3 00 x 04 0.0664 3 00 x 04 0.0471 5 01 x 10 0.0471 5 01 x 10 127 | 0.0665 3 01 x 04 0.0666 3 01 x 04 0.0471 2 00 x 05 0.0471 2 00 x 05 128 | 0.0670 4 00 x 04 0.0670 4 00 x 04 0.0473 2 01 x 05 0.0473 2 01 x 05 129 | 0.0670 4 01 x 04 0.0670 4 01 x 04 0.0473 5 01 x 07 0.0473 5 01 x 07 130 | 0.0670 5 01 x 05 0.0671 5 01 x 05 0.0474 5 00 x 07 0.0473 5 00 x 07 131 | 0.0672 5 00 x 05 0.0672 5 00 x 05 0.0476 5 00 x 12 0.0476 5 00 x 12 132 | 0.0679 2 01 x 06 0.0679 2 01 x 06 0.0479 5 00 x 06 0.0480 5 00 x 06 133 | 0.0683 2 00 x 06 0.0683 2 00 x 06 0.0480 5 01 x 06 0.0480 5 01 x 06 134 | 0.0714 2 00 x 05 0.0714 2 00 x 05 0.0481 2 00 x 04 0.0482 2 00 x 04 135 | 0.0714 2 01 x 05 0.0715 2 01 x 05 0.0483 2 01 x 04 0.0483 2 01 x 04 136 | 0.0728 5 00 x 04 0.0728 5 00 x 04 0.0495 5 01 x 05 0.0495 5 01 x 05 137 | 0.0730 5 01 x 04 0.0730 5 01 x 04 0.0497 5 00 x 05 0.0497 5 00 x 05 138 | 0.0734 2 00 x 04 0.0734 2 00 x 04 0.0529 5 00 x 04 0.0529 5 00 x 04 139 | 0.0737 2 01 x 04 0.0737 2 01 x 04 0.0530 5 01 x 04 0.0530 5 01 x 04 140 | 0.0823 4 00 x 03 0.0823 4 00 x 03 0.0552 3 00 x 03 0.0552 3 00 x 03 141 | 0.0825 4 01 x 03 0.0825 4 01 x 03 0.0553 3 01 x 03 0.0552 3 01 x 03 142 | 0.0845 3 01 x 03 0.0845 3 00 x 03 0.0560 4 00 x 03 0.0560 4 00 x 03 143 | 0.0846 3 00 x 03 0.0846 3 01 x 03 0.0561 4 01 x 03 0.0561 4 01 x 03 144 | 0.0874 5 01 x 03 0.0874 5 00 x 03 0.0597 2 00 x 03 0.0597 2 00 x 03 145 | 0.0874 5 00 x 03 0.0874 5 01 x 03 0.0598 2 01 x 03 0.0598 2 01 x 03 146 | 0.0918 2 00 x 03 0.0918 2 00 x 03 0.0620 5 01 x 03 0.0620 5 01 x 03 147 | 0.0918 2 01 x 03 0.0919 2 01 x 03 0.0620 5 00 x 03 0.0620 5 00 x 03 148 | 0.1092 4 00 x 02 0.1094 4 00 x 02 0.0716 4 00 x 02 0.0715 4 00 x 02 149 | 0.1094 4 01 x 02 0.1096 4 01 x 02 0.0718 4 01 x 02 0.0717 4 01 x 02 150 | 0.1128 3 01 x 02 0.1128 3 00 x 02 0.0726 3 01 x 02 0.0725 3 01 x 02 151 | 0.1128 3 00 x 02 0.1129 3 01 x 02 0.0726 3 00 x 02 0.0726 3 00 x 02 152 | 0.1141 5 01 x 02 0.1141 5 01 x 02 0.0777 5 00 x 02 0.0777 5 00 x 02 153 | 0.1141 5 00 x 02 0.1141 5 00 x 02 0.0777 5 01 x 02 0.0777 5 01 x 02 154 | 0.1268 2 01 x 02 0.1268 2 00 x 02 0.0809 2 00 x 02 0.0809 2 00 x 02 155 | 0.1268 2 00 x 02 0.1268 2 01 x 02 0.0810 2 01 x 02 0.0810 2 01 x 02 156 | ---Benchmark Program Finished--- 157 | -------------------------------------------------------------------------------- /montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/timings_ARM64_M2/64_quarter_gcc_noasm_array.txt: -------------------------------------------------------------------------------- 1 | 2 | ./testbench_2kary.sh g++ O3 MontgomeryQuarter uint64_t 191 8 22 -DTEST_ARRAY 3 | 4 | compilation finished, now executing: 5 | ---Running Program--- 6 | 7 | begin benchmarks - array pow 8 | (ignore)368336231662783440 9 | 10 | OVERALL BEST: 11 | 0.1804 3 01 x 08 12 | 0.1813 3 00 x 07 13 | 0.1813 3 00 x 08 14 | 0.1815 3 01 x 10 15 | 0.1821 3 01 x 12 16 | 0.1824 3 00 x 12 17 | 0.1831 3 01 x 07 18 | 0.1846 3 00 x 06 19 | 0.1850 4 00 x 08 20 | 0.1850 4 01 x 08 21 | 0.1854 3 00 x 10 22 | 0.1893 3 01 x 06 23 | 0.1896 4 01 x 10 24 | 0.1900 4 00 x 10 25 | 0.1910 4 01 x 07 26 | 0.1920 4 01 x 06 27 | 0.1921 4 00 x 07 28 | 0.1922 4 01 x 12 29 | 0.1924 4 00 x 12 30 | 0.1928 3 00 x 05 31 | 0.1954 3 01 x 05 32 | 0.1964 2 01 x 12 33 | 0.1972 2 01 x 10 34 | 0.1974 4 00 x 06 35 | 0.1976 2 01 x 07 36 | 0.1979 2 00 x 08 37 | 0.1979 2 01 x 08 38 | 0.1993 2 00 x 07 39 | 0.1995 2 00 x 12 40 | 0.2010 2 00 x 10 41 | 0.2015 2 00 x 06 42 | 0.2016 4 00 x 05 43 | 0.2019 2 01 x 06 44 | 0.2024 4 01 x 05 45 | 0.2075 2 00 x 05 46 | 0.2091 2 01 x 05 47 | 0.2132 3 00 x 04 48 | 0.2159 3 01 x 04 49 | 0.2167 5 00 x 08 50 | 0.2169 5 01 x 08 51 | 0.2180 4 00 x 04 52 | 0.2199 4 01 x 04 53 | 0.2236 5 00 x 12 54 | 0.2247 5 01 x 12 55 | 0.2252 5 00 x 07 56 | 0.2254 5 01 x 10 57 | 0.2263 5 00 x 10 58 | 0.2293 5 01 x 06 59 | 0.2301 5 01 x 07 60 | 0.2331 5 00 x 06 61 | 0.2357 5 00 x 05 62 | 0.2372 5 01 x 05 63 | 0.2416 2 00 x 04 64 | 0.2437 2 01 x 04 65 | 0.2482 5 00 x 04 66 | 0.2503 5 01 x 04 67 | 0.2641 3 01 x 03 68 | 0.2652 3 00 x 03 69 | 0.2657 4 00 x 03 70 | 0.2663 4 01 x 03 71 | 0.2874 2 00 x 03 72 | 0.2892 2 01 x 03 73 | 0.2961 5 00 x 03 74 | 0.2966 5 01 x 03 75 | 0.3581 4 01 x 02 76 | 0.3583 4 00 x 02 77 | 0.3640 3 00 x 02 78 | 0.3645 3 01 x 02 79 | 0.3743 5 01 x 02 80 | 0.3754 5 00 x 02 81 | 0.4051 2 00 x 02 82 | 0.4056 2 01 x 02 83 | Timings By Test Type: 84 | 0.0535 3 01 x 08 0.0536 3 01 x 08 0.0365 3 00 x 08 0.0365 3 00 x 08 85 | 0.0538 3 00 x 07 0.0537 3 00 x 07 0.0367 3 01 x 08 0.0367 3 01 x 08 86 | 0.0539 3 01 x 10 0.0538 3 01 x 10 0.0369 3 00 x 07 0.0369 3 00 x 07 87 | 0.0539 4 01 x 08 0.0539 4 01 x 08 0.0369 3 01 x 10 0.0369 3 01 x 10 88 | 0.0540 3 01 x 12 0.0540 4 00 x 08 0.0370 3 01 x 12 0.0370 3 01 x 12 89 | 0.0540 4 00 x 08 0.0541 3 00 x 12 0.0371 3 00 x 12 0.0371 3 00 x 12 90 | 0.0541 3 00 x 12 0.0541 3 00 x 08 0.0373 3 01 x 07 0.0373 3 01 x 07 91 | 0.0542 3 00 x 08 0.0541 3 01 x 12 0.0375 3 00 x 06 0.0375 3 00 x 06 92 | 0.0543 3 01 x 07 0.0543 3 01 x 07 0.0376 3 00 x 10 0.0376 3 00 x 10 93 | 0.0548 3 00 x 06 0.0548 3 00 x 06 0.0384 4 00 x 08 0.0385 3 01 x 06 94 | 0.0551 3 00 x 10 0.0551 4 01 x 10 0.0385 3 01 x 06 0.0385 4 00 x 08 95 | 0.0551 4 01 x 10 0.0551 3 00 x 10 0.0386 4 01 x 08 0.0386 4 01 x 08 96 | 0.0552 4 00 x 10 0.0553 4 00 x 10 0.0391 3 00 x 05 0.0392 3 00 x 05 97 | 0.0555 4 01 x 07 0.0556 4 01 x 07 0.0396 2 01 x 12 0.0396 2 01 x 12 98 | 0.0558 4 01 x 06 0.0558 4 00 x 07 0.0397 4 01 x 10 0.0396 3 01 x 05 99 | 0.0558 4 00 x 07 0.0559 4 01 x 12 0.0397 3 01 x 05 0.0397 4 01 x 10 100 | 0.0560 4 00 x 12 0.0559 4 01 x 06 0.0398 4 00 x 10 0.0397 4 00 x 10 101 | 0.0561 4 01 x 12 0.0560 4 00 x 12 0.0398 2 01 x 10 0.0398 2 01 x 07 102 | 0.0562 3 01 x 06 0.0561 3 01 x 06 0.0399 2 01 x 07 0.0398 2 01 x 10 103 | 0.0573 3 00 x 05 0.0573 3 00 x 05 0.0399 4 01 x 07 0.0398 2 00 x 08 104 | 0.0576 4 00 x 06 0.0576 4 00 x 06 0.0399 2 01 x 08 0.0399 2 01 x 08 105 | 0.0580 3 01 x 05 0.0581 3 01 x 05 0.0400 2 00 x 08 0.0399 4 01 x 07 106 | 0.0586 2 01 x 12 0.0587 2 01 x 12 0.0401 2 00 x 07 0.0401 4 01 x 12 107 | 0.0588 2 01 x 10 0.0589 2 01 x 10 0.0401 4 01 x 12 0.0402 4 01 x 06 108 | 0.0589 4 00 x 05 0.0589 4 00 x 05 0.0401 4 01 x 06 0.0402 2 00 x 07 109 | 0.0590 2 00 x 08 0.0589 2 01 x 07 0.0402 4 00 x 12 0.0402 4 00 x 12 110 | 0.0590 4 01 x 05 0.0590 2 01 x 08 0.0402 4 00 x 07 0.0402 4 00 x 07 111 | 0.0590 2 01 x 08 0.0590 2 00 x 08 0.0403 2 00 x 12 0.0402 2 00 x 12 112 | 0.0591 2 01 x 07 0.0590 4 01 x 05 0.0404 2 00 x 10 0.0404 2 00 x 10 113 | 0.0595 2 00 x 07 0.0595 2 00 x 12 0.0406 2 00 x 06 0.0405 2 00 x 06 114 | 0.0595 2 00 x 12 0.0595 2 00 x 07 0.0407 2 01 x 06 0.0406 2 01 x 06 115 | 0.0602 2 00 x 10 0.0600 2 00 x 10 0.0411 4 00 x 06 0.0411 4 00 x 06 116 | 0.0602 2 00 x 06 0.0602 2 00 x 06 0.0417 2 00 x 05 0.0417 2 00 x 05 117 | 0.0603 2 01 x 06 0.0603 2 01 x 06 0.0419 4 00 x 05 0.0419 4 00 x 05 118 | 0.0616 5 01 x 08 0.0616 5 01 x 08 0.0420 2 01 x 05 0.0420 2 01 x 05 119 | 0.0617 5 00 x 08 0.0617 5 00 x 08 0.0421 4 01 x 05 0.0421 4 01 x 05 120 | 0.0620 2 00 x 05 0.0621 2 00 x 05 0.0428 3 00 x 04 0.0428 3 00 x 04 121 | 0.0625 2 01 x 05 0.0625 2 01 x 05 0.0435 3 01 x 04 0.0434 3 01 x 04 122 | 0.0635 5 01 x 12 0.0633 5 00 x 12 0.0448 4 00 x 04 0.0448 4 00 x 04 123 | 0.0636 5 00 x 12 0.0636 5 01 x 12 0.0453 4 01 x 04 0.0453 4 01 x 04 124 | 0.0636 5 00 x 07 0.0637 5 00 x 07 0.0466 5 00 x 08 0.0467 5 00 x 08 125 | 0.0638 3 00 x 04 0.0638 5 01 x 10 0.0468 5 01 x 08 0.0468 5 01 x 08 126 | 0.0641 5 01 x 10 0.0638 3 00 x 04 0.0483 5 00 x 12 0.0484 2 00 x 04 127 | 0.0642 4 00 x 04 0.0642 5 00 x 10 0.0484 2 00 x 04 0.0484 5 00 x 12 128 | 0.0643 5 00 x 10 0.0642 4 00 x 04 0.0487 2 01 x 04 0.0486 5 01 x 12 129 | 0.0645 3 01 x 04 0.0645 3 01 x 04 0.0488 5 01 x 10 0.0487 5 01 x 10 130 | 0.0647 4 01 x 04 0.0647 4 01 x 04 0.0488 5 00 x 10 0.0487 2 01 x 04 131 | 0.0649 5 01 x 06 0.0650 5 01 x 06 0.0489 5 00 x 07 0.0489 5 00 x 07 132 | 0.0650 5 01 x 07 0.0650 5 01 x 07 0.0490 5 01 x 12 0.0490 5 00 x 10 133 | 0.0663 5 00 x 06 0.0663 5 00 x 06 0.0496 5 01 x 06 0.0498 5 01 x 06 134 | 0.0671 5 00 x 05 0.0671 5 00 x 05 0.0501 5 01 x 07 0.0500 5 01 x 07 135 | 0.0675 5 01 x 05 0.0675 5 01 x 05 0.0503 5 00 x 06 0.0503 5 00 x 06 136 | 0.0718 5 00 x 04 0.0717 5 00 x 04 0.0508 5 00 x 05 0.0508 5 00 x 05 137 | 0.0722 5 01 x 04 0.0722 5 01 x 04 0.0511 5 01 x 05 0.0511 5 01 x 05 138 | 0.0724 2 00 x 04 0.0725 2 00 x 04 0.0524 5 00 x 04 0.0524 5 00 x 04 139 | 0.0731 2 01 x 04 0.0731 2 01 x 04 0.0525 3 01 x 03 0.0526 3 01 x 03 140 | 0.0786 4 00 x 03 0.0786 4 00 x 03 0.0528 3 00 x 03 0.0528 3 00 x 03 141 | 0.0787 4 01 x 03 0.0788 4 01 x 03 0.0529 5 01 x 04 0.0530 5 01 x 04 142 | 0.0795 3 01 x 03 0.0795 3 01 x 03 0.0542 4 00 x 03 0.0543 4 00 x 03 143 | 0.0798 3 00 x 03 0.0799 3 00 x 03 0.0544 4 01 x 03 0.0544 4 01 x 03 144 | 0.0857 5 00 x 03 0.0858 5 00 x 03 0.0572 2 00 x 03 0.0572 2 00 x 03 145 | 0.0857 5 01 x 03 0.0858 5 01 x 03 0.0576 2 01 x 03 0.0576 2 01 x 03 146 | 0.0865 2 00 x 03 0.0865 2 00 x 03 0.0623 5 00 x 03 0.0623 5 00 x 03 147 | 0.0870 2 01 x 03 0.0869 2 01 x 03 0.0625 5 01 x 03 0.0625 5 01 x 03 148 | 0.1074 4 01 x 02 0.1074 4 01 x 02 0.0716 4 01 x 02 0.0716 4 00 x 02 149 | 0.1075 4 00 x 02 0.1075 4 00 x 02 0.0716 4 00 x 02 0.0717 4 01 x 02 150 | 0.1103 3 00 x 02 0.1102 3 00 x 02 0.0718 3 00 x 02 0.0718 3 00 x 02 151 | 0.1103 3 01 x 02 0.1104 3 01 x 02 0.0719 3 01 x 02 0.0719 3 01 x 02 152 | 0.1108 5 01 x 02 0.1108 5 01 x 02 0.0763 5 01 x 02 0.0764 5 01 x 02 153 | 0.1114 5 00 x 02 0.1110 5 00 x 02 0.0765 5 00 x 02 0.0765 5 00 x 02 154 | 0.1228 2 00 x 02 0.1229 2 01 x 02 0.0797 2 00 x 02 0.0797 2 00 x 02 155 | 0.1232 2 01 x 02 0.1229 2 00 x 02 0.0797 2 01 x 02 0.0797 2 01 x 02 156 | ---Benchmark Program Finished--- 157 | --------------------------------------------------------------------------------