├── .bazelrc ├── .github └── workflows │ └── ci.yml ├── .travis.yml ├── BUILD ├── README.md ├── WORKSPACE ├── benchmarks ├── benchmark.md ├── benchmark_20180629.md ├── benchmark_20190918.md ├── benchmark_20191012.md ├── benchmark_20191013.md ├── benchmark_20200628.md ├── benchmark_20230101.md ├── benchmark_20250315.md ├── benchmark_20250601.md ├── format_test_result.py └── perf_test_result.txt ├── build_all.bat ├── example ├── BUILD ├── bi_example_pe483.c ├── billion_sort.c ├── bit.c ├── build_each.bat ├── build_each.py ├── continued_fraction.c ├── count_carlitz_words.c ├── dfa_counter.c ├── dfa_summer.c ├── dva.c ├── example.c ├── linear_recurrence.c ├── matrix_power.c ├── mma_find_recurrence.c ├── mma_interpolating_polynomial.c ├── mma_to_cpp.c ├── mod_number.c ├── multiplicative_function_prefix_sum_common_function.c ├── multiplicative_function_prefix_sum_mavlue_base.c ├── parallel_cal_prime_pi.c ├── partition_mobius.c ├── pe_db.c ├── power_sum.c ├── prime_power_sum.c ├── random_sample.c ├── range.c └── sym_poly.c ├── format.py ├── gen_config.py ├── legacy ├── pe_poly.hpp └── pe_sym_poly.hpp ├── libraries_on_win64.md ├── pe ├── pe.hpp ├── pe_algo ├── pe_array ├── pe_base ├── pe_bi32 ├── pe_bit ├── pe_config ├── pe_db ├── pe_dpe ├── pe_extended_int ├── pe_extended_signed_int ├── pe_extended_unsigned_int ├── pe_fft ├── pe_float ├── pe_fraction ├── pe_gbi ├── pe_geometry ├── pe_initializer ├── pe_int ├── pe_int_algo ├── pe_internal ├── pe_io ├── pe_mat ├── pe_memory ├── pe_misc ├── pe_mma ├── pe_mod ├── pe_mpf ├── pe_mpz ├── pe_nt ├── pe_nt_base ├── pe_ntf ├── pe_parallel ├── pe_parallel_algo ├── pe_persistance ├── pe_poly_algo ├── pe_poly_base ├── pe_poly_base_common ├── pe_poly_base_flint ├── pe_poly_base_gmp ├── pe_poly_base_libbf ├── pe_poly_base_min25 ├── pe_poly_base_ntl ├── pe_rand ├── pe_range ├── pe_serialization ├── pe_span ├── pe_sym_poly ├── pe_time ├── pe_tree ├── pe_type_traits ├── pe_vector ├── precompile.bat ├── test ├── BUILD ├── algo_test.c ├── array_test.c ├── bi_div_test.c ├── bi_mul_test.c ├── bit_test.c ├── dva_test.c ├── extended_signed_int_test.c ├── extended_unsigned_int_test.c ├── fft_test.c ├── gbi_test.c ├── init_inv_test.c ├── int128_test.c ├── mat_mul_test.c ├── misc_test.c ├── mod_test.c ├── mpf_test.c ├── nt_test.c ├── parallel_sort_test.c ├── pe_test.c ├── pe_test.h ├── poly_algo_test.c ├── poly_div_test.c ├── poly_mul_test.c ├── prime_pi_sum_test.c ├── test_compile_each.bat ├── test_compile_each.py ├── test_int128_noopenmp.bat ├── test_int128_openmp.bat ├── test_noint128_noopenmp.bat ├── test_noint128_openmp.bat ├── test_perf.bat ├── test_perf.c └── tree_test.c ├── test_all.bat └── toolchain ├── BUILD └── pe_toolchain.bzl /.bazelrc: -------------------------------------------------------------------------------- 1 | startup --output_user_root=D:/bazel-output 2 | build --action_env=C_INCLUDE_PATH=D:/Hilbert/usr/include;D:/Hilbert/usr/include/pe;D:/Hilbert/usr/include/flint 3 | build --action_env=LIBRARY_PATH=D:/Hilbert/usr/lib 4 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: PE Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | gcc-build-and-test: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout repository 11 | uses: actions/checkout@v3 12 | 13 | - name: Install dependencies 14 | run: sudo apt-get update && sudo apt-get install -y g++ libgmp-dev libflint-dev libntl-dev 15 | 16 | - name: Build project 17 | run: | 18 | export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH} 19 | export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH} 20 | g++ test/pe_test.c -o ./pe_test.out --std=c++20 -O3 -march=native -mtune=native -fopenmp -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 -DENABLE_EIGEN=0 -DENABLE_GMP=1 -DENABLE_FLINT=0 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_ZMQ=0 -DENABLE_LIBBF=0 -DENABLE_PRIME_COUNT=0 -DENABLE_PRIME_SIEVE=0 -DENABLED_TEST=SMALL,MEDIUM,BIG -DCONTINUOUS_INTEGRATION_TEST 21 | 22 | - name: Run tests 23 | run: ./pe_test.out 24 | 25 | gcc-build-examples: 26 | runs-on: ubuntu-latest 27 | 28 | steps: 29 | - name: Checkout repository 30 | uses: actions/checkout@v3 31 | 32 | - name: Install dependencies 33 | run: sudo apt-get update && sudo apt-get install -y g++ libgmp-dev libflint-dev libntl-dev 34 | 35 | - name: Install Bazel 36 | uses: bazel-contrib/setup-bazel@0.14.0 37 | 38 | - name: Build examples 39 | run: | 40 | export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH} 41 | export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH} 42 | bazel --ignore_all_rc_files build //example:gcc_builds --action_env=CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH} --action_env=LIBRARY_PATH=${LIBRARY_PATH} 43 | 44 | clang-build-and-test: 45 | runs-on: ubuntu-latest 46 | 47 | steps: 48 | - name: Checkout repository 49 | uses: actions/checkout@v3 50 | 51 | - name: Install dependencies 52 | run: sudo apt-get update && sudo apt-get install -y clang libgmp-dev libflint-dev libntl-dev 53 | 54 | - name: Build project 55 | run: | 56 | export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH} 57 | export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH} 58 | clang++ -x c++ test/pe_test.c -o ./pe_test.out --std=c++20 -O3 -march=native -mtune=native -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0 -DENABLE_EIGEN=0 -DENABLE_GMP=1 -DENABLE_FLINT=0 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_ZMQ=0 -DENABLE_LIBBF=0 -DENABLE_PRIME_COUNT=0 -DENABLE_PRIME_SIEVE=0 -DENABLED_TEST=SMALL,MEDIUM,BIG -DCONTINUOUS_INTEGRATION_TEST 59 | 60 | - name: Run tests 61 | run: ./pe_test.out 62 | 63 | msvc-build-and-test: 64 | runs-on: windows-latest 65 | 66 | steps: 67 | - name: Checkout repository 68 | uses: actions/checkout@v3 69 | 70 | - name: Install dependencies 71 | run: | 72 | #Invoke-WebRequest -Uri "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip" -OutFile "eigen.zip" 73 | #Expand-Archive -Path "eigen.zip" -DestinationPath "C:\eigen" 74 | 75 | - uses: ilammy/msvc-dev-cmd@v1.4.1 76 | 77 | - name: Build project 78 | run: | 79 | cl test\pe_test.c /TP /GS /GL /W3 /Gy /Zc:wchar_t /Zi /Gm- /O2 /Zc:inline /fp:precise /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /errorReport:prompt /WX- /Zc:forScope /Gd /Oi /MT /openmp /std:c++20 /FC /EHsc /nologo /diagnostics:classic /DENABLE_ASSERT=0 /DTRY_TO_USE_INT128=1 /DENABLE_OPENMP=1 /DENABLE_EIGEN=0 /DENABLE_GMP=0 /DENABLE_FLINT=0 /DENABLE_MPFR=0 /DENABLE_NTL=0 /DENABLE_ZMQ=0 /DENABLE_LIBBF=0 /DENABLE_PRIME_COUNT=0 /DENABLE_PRIME_SIEVE=0 /DENABLED_TEST=SMALL,MEDIUM,BIG /DCONTINUOUS_INTEGRATION_TEST /I "$env:GITHUB_WORKSPACE" /I "C:\eigen\eigen-3.4.0" 80 | 81 | - name: Run tests 82 | run: | 83 | .\pe_test.exe 84 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: focal 2 | language: cpp 3 | env: 4 | - BUILD_ARGUMENTS="--std=c++17 -O3 -march=native -mtune=native -lgmpxx -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0 -DENABLE_EIGEN=1 -DENABLE_GMP=1 -DENABLE_FLINT=1 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_LIBBF=0" BUILD_CMD_PUSH="clang++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST -DNO_SUPER_TEST" BUILD_CMD_CRON="clang++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST" 5 | - BUILD_ARGUMENTS="--std=c++17 -O3 -march=native -mtune=native -fopenmp -lgmpxx -lflint -lmpfr -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 -DENABLE_EIGEN=1 -DENABLE_GMP=1 -DENABLE_FLINT=1 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_LIBBF=0" BUILD_CMD_PUSH="g++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST -DNO_SUPER_TEST" BUILD_CMD_CRON="g++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST" 6 | script: 7 | - sudo apt-get update 8 | - sudo apt-get -y install libeigen3-dev 9 | - sudo apt-get -y install libgmp-dev 10 | - sudo apt-get -y install libflint-dev 11 | - sudo apt-get -y install libntl-dev 12 | - export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH} 13 | - export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH} 14 | - echo ${TRAVIS_EVENT_TYPE} 15 | - echo && [ "${TRAVIS_EVENT_TYPE}" == "push" ] && ${BUILD_CMD_PUSH} && ./a.out || [ "${TRAVIS_EVENT_TYPE}" != "push" ] && echo "skip push build" 16 | - echo && [ "${TRAVIS_EVENT_TYPE}" == "cron" ] && ${BUILD_CMD_CRON} && ./a.out || [ "${TRAVIS_EVENT_TYPE}" != "cron" ] && echo "skip cron build" 17 | -------------------------------------------------------------------------------- /BUILD: -------------------------------------------------------------------------------- 1 | load("//toolchain:pe_toolchain.bzl", "pe_library") 2 | 3 | package( 4 | default_visibility = [ 5 | "//visibility:public", 6 | ], 7 | ) 8 | 9 | [pe_library(name = x + "_lib", srcs = [x]) for x in glob(["pe_*"])] 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PE: C++ Library for Project Euler 2 | 3 | [![Build Status (Windows/MSVC)](https://ci.appveyor.com/api/projects/status/scaji00tde2gb7uy?svg=true)](https://ci.appveyor.com/project/baihacker/pe-win-msvc) 4 | [![Build Status (Ubuntu/GCC)](https://ci.appveyor.com/api/projects/status/9bt606nax24anyen?svg=true)](https://ci.appveyor.com/project/baihacker/pe-ubuntu-gcc) 5 | [![Build Status (Ubuntu/Clang)](https://ci.appveyor.com/api/projects/status/nw243uvs95i0bj85?svg=true)](https://ci.appveyor.com/project/baihacker/pe-ubuntu-clang) 6 | [![Build Status](https://github.com/baihacker/pe/actions/workflows/ci.yml/badge.svg)](https://github.com/baihacker/pe/actions) 7 | [![GitHub Releases](https://img.shields.io/github/release/baihacker/pe.svg)](https://github.com/baihacker/pe/releases) 8 | ![Project Euler](https://projecteuler.net/profile/baihacker.png?) 9 | 10 | **PE** is a C++ library designed to solve problems on [Project Euler](https://projecteuler.net/recent). 11 | 12 | ## Prerequisites 13 | 14 | To use this library, you need a C++ development environment that supports: 15 | * C++17 or later. 16 | * Building `x86_64` targets. 17 | 18 | ## Installation 19 | 20 | 1. **Include the Library:** 21 | - Place all the library files into a directory of your choice. 22 | - Ensure that `#include ` is by adding the directory to the `CPLUS_INCLUDE_PATH` environment variable. 23 | 24 | 2. **Configure the Library:** 25 | - Run **[gen_config.py](https://github.com/baihacker/pe/blob/master/gen_config.py)** from the installation directory to generate **[pe_config](https://github.com/baihacker/pe/blob/master/pe_config)**. 26 | - This script generates a static configuration file with default values. You can manually edit this file after generation. 27 | - `ENABLE_ASSERT`: Enable assertions for certain inputs or conditions. 28 | - `TRY_TO_USE_INT128`: Check if the compiler supports `int128` and enable it. Set to `0` to disable `int128` even if supported. 29 | - The script also automatically detects the presence of third-party libraries and sets the appropriate flags: 30 | - `ENABLE_EIGEN`: Use [Eigen](http://eigen.tuxfamily.org/index.php?title=Main_Page). 31 | - `ENABLE_GMP`: Use [GMP](https://gmplib.org). 32 | - `ENABLE_FLINT`: Use [FLINT](http://www.flintlib.org). 33 | - `ENABLE_MPFR`: Use [MPFR](https://www.mpfr.org). 34 | - `ENABLE_LIBBF`: Use [libbf](https://bellard.org/libbf). 35 | - `ENABLE_NTL`: Use [NTL](https://www.shoup.net/ntl/download.html). 36 | - `ENABLE_ZMQ`: Use [ZeroMQ](https://zeromq.org/). 37 | - `ENABLE_PRIME_COUNT`: Use [PrimeCount](https://github.com/kimwalisch/primecount). 38 | - `ENABLE_PRIME_SIEVE`: Use [PrimeSieve](https://github.com/kimwalisch/primesieve). 39 | - `ENABLE_TCMALLOC`: Use [tcmalloc](https://github.com/gperftools/gperftools). 40 | - Manually edit **[pe_config](https://github.com/baihacker/pe/blob/master/pe_config)** to add or modify configuration items as needed: 41 | - `ENABLE_OPENMP`: Enable [OpenMP](http://www.openmp.org). The script doesn't generate the default config for OpenMP. 42 | 43 | 3. **(Optional) Generate Precompiled Header:** 44 | - Run `g++ -xc++-header pe.hpp` in the installation directory to create a precompiled header (`pe.hpp.gch`). 45 | - You may add additional compiler options if required (e.g., `g++ -xc++-header pe.hpp --std=c++17 -O3 -march=native -fopenmp`). 46 | 47 | ## Usage 48 | 49 | For a quick start, refer to [example.c](https://github.com/baihacker/pe/blob/master/example/example.c). 50 | 51 | ## File List 52 | 53 | - **pe**: Contains all implementation files. 54 | - **pe.hpp**: Header file for generating the precompiled header. Includes the core library. 55 | - **pe_algo**: Contains various algorithms. 56 | - **pe_array**: Array implementation with compile-time and runtime dimension length. Supports element counts exceeding `int32` limits and custom allocators. 57 | - **pe_base**: Pre-included headers, macros, typedefs, and basic inline functions. 58 | - **pe_bi32**: Big integer implementation with base `1 << 32`. 59 | - **pe_bit**: Bit manipulation utilities. 60 | - **pe_config**: Centralized configuration file for PE. 61 | - **pe_db**: Load and save pre-calculated results, such as prime pi and prime sum. 62 | - **pe_dpe**: Distributed computation. 63 | - **pe_extended_int**: Extended integer types. 64 | - **pe_extended_signed_int**: Extended signed integer types. 65 | - **pe_extended_unsigned_int**: Extended unsigned integer types. 66 | - **pe_fft**: Fast Fourier Transform and polynomial multiplication utilities. 67 | - **pe_float**: Functions for unified float operations including `__float128`. 68 | - **pe_fraction**: Fraction arithmetic operations. 69 | - **pe_gbi**: General big integer operations, corresponding to `pe_nt`. 70 | - **pe_geometry**: Support for `Point2D` and `Point3D`. 71 | - **pe_initializer**: Helper classes and macros for library initialization. 72 | - **pe_int**: Basic integer utilities. 73 | - **pe_int_algo**: Integer algorithm for extended integer and general big integer. 74 | - **pe_internal**: Includes configuration, defines necessary types/macros, and third-party libraries. 75 | - **pe_io**: Methods and macros for simplified or accelerated I/O operations. 76 | - **pe_mat**: Matrix operations. 77 | - **pe_memory**: Memory management utilities (Windows only). 78 | - **pe_misc**: Miscellaneous utility functions. 79 | - **pe_mma**: Support for MMA: helper methods or classes for MMA code generation. 80 | - **pe_mod**: Modular arithmetic utilities. 81 | - **pe_mpf**: Multi-precision floating number based on gmp. 82 | - **pe_mpz**: Multi-precision integer based on gmp. 83 | - **pe_nt**: Core number theory utilities. 84 | - **pe_nt_base**: Prime list generation, integer factorization, prime testing, and computations of φ and μ. 85 | - **pe_parallel**: Simple framework for multi-threaded problem-solving (Windows only). 86 | - **pe_parallel_algo**: Parallel algorithms. 87 | - **pe_persistance**: Key-Value Persistence (may support Linux with adjustments). 88 | - **pe_poly_algo**: Polynomial algorithms. 89 | - **pe_poly_base**: Basic polynomial algorithms. 90 | - **pe_poly_base_flint**: Polynomial algorithms based on FLINT. 91 | - **pe_poly_base_gmp**: Polynomial algorithms based on gmp. 92 | - **pe_poly_base_libbf**: Polynomial algorithms based on libbf. 93 | - **pe_poly_base_min25**: Polynomial algorithms from [Min_25](https://github.com/min-25), including the fastest polynomial multiplication. 94 | - **pe_poly_base_ntl**: Polynomial algorithms based on NTL. 95 | - **pe_rand**: Random number generation utilities. 96 | - **pe_range**: Range implementation. 97 | - **pe_serialization**: Objects serialization. 98 | - **pe_span**: Implementation of `Span`. 99 | - **pe_sym_poly**: Symbolic polynomial operations. 100 | - **pe_time**: Utilities for `TimeDelta` and `TimeRecorder`. 101 | - **pe_tree**: Tree-based data structures. 102 | - **pe_type_traits**: Type trait utilities. 103 | - **pe_vector**: Vector operations. 104 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baihacker/pe/a149689695a1be4ff987b6b8c5c3297f7f1a86ba/WORKSPACE -------------------------------------------------------------------------------- /benchmarks/benchmark_20180629.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | 3 | ## Ntt benchmark 4 | 5 | ### Test Environment: 6 | 7 | * OS: Win10 Pro 1803 8 | * CPU: i7-4790K (MMX, SSE, SSE2, SSE3, SSE4.1, SSE4.2, EM64T, VT-x, AES, AVX, AVX2, FMA3) 9 | * Compiler: MinGW-x86_64-8.1.0-win32-seh-rt_v6-rev0 10 | * Msys2: msys2-x86_64-20190524 11 | * Test code: [Ntt test](https://github.com/baihacker/pe/blob/master/test/ntt_test.c) 12 | * Build libbf: 13 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o libbf.avx2.o libbf.c 14 | 15 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o cutils.avx2.o cutils.c 16 | 17 | > gcc-ar crv libbf.avx2.a cutils.avx2.o libbf.avx2.o 18 | * Build test target: 19 | > -o a.exe --std=c++11 -O3 -march=native -mtune=native -fopenmp -Wl,--stack,268435456 -lbf -lgmpxx -lflint -lgmp -lmpfr -lmpir 20 | 21 | ### Test result 22 | 23 | #### Openmp enabled 24 | ```cpp 25 | ntt test : data = random, size = 0, n = 1000000, mod = 100019 26 | flint n : 0.407 27 | flint p : 1.156 28 | ntt32 s : 1.031 29 | ntt32 l : 1.156 30 | ntt64 s : 1.578 31 | ntt64 l : 1.656 32 | Min_25 s : 0.156 33 | Min_25 l : 0.203 34 | libbf : 0.828 35 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003 36 | flint n : 1.234 37 | flint p : 2.890 38 | ntt32 l : 2.375 39 | ntt64 l : 3.391 40 | Min_25 l : 0.343 41 | libbf : 1.359 42 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779 43 | flint n : 0.843 44 | flint p : 2.063 45 | ntt64 l : 1.671 46 | Min_25 l : 0.203 47 | libbf : 0.859 48 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003 49 | flint n : 0.422 50 | flint p : 1.234 51 | ntt32 s : 0.984 52 | ntt32 l : 1.156 53 | ntt64 s : 1.515 54 | ntt64 l : 1.651 55 | Min_25 s : 0.141 56 | Min_25 l : 0.172 57 | libbf : 0.813 58 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003 59 | flint n : 1.234 60 | flint p : 2.750 61 | ntt32 l : 2.359 62 | ntt64 l : 3.422 63 | Min_25 l : 0.359 64 | libbf : 1.375 65 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779 66 | flint n : 0.829 67 | flint p : 2.077 68 | ntt64 l : 1.703 69 | Min_25 l : 0.172 70 | libbf : 0.859 71 | 72 | mod = 100019 73 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 74 | flint n 0.016 0.000 0.000 0.000 0.000 0.000 0.016 0.032 0.079 0.219 0.422 75 | flint p 0.000 0.000 0.000 0.000 0.016 0.031 0.063 0.140 0.282 0.594 1.219 76 | ntt32 s 0.000 0.000 0.016 0.000 0.016 0.047 0.062 0.109 0.235 0.484 1.000 77 | ntt32 l 0.000 0.000 0.000 0.015 0.016 0.047 0.062 0.125 0.281 0.578 1.187 78 | ntt64 s 0.000 0.000 0.016 0.016 0.031 0.062 0.094 0.172 0.375 0.734 1.531 79 | ntt64 l 0.000 0.000 0.016 0.015 0.032 0.062 0.094 0.188 0.406 0.813 1.687 80 | Min_25 s 0.000 0.015 0.000 0.000 0.000 0.016 0.016 0.016 0.032 0.062 0.141 81 | Min_25 l 0.000 0.000 0.000 0.000 0.000 0.015 0.016 0.032 0.031 0.078 0.172 82 | libbf 0.000 0.000 0.000 0.000 0.000 0.015 0.047 0.093 0.188 0.406 0.843 83 | mod = 100000000003 84 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 85 | flint n 0.016 0.000 0.000 0.000 0.000 0.016 0.031 0.062 0.140 0.390 0.859 86 | flint p 0.000 0.000 0.000 0.000 0.016 0.047 0.094 0.203 0.422 0.906 1.907 87 | ntt32 l 0.000 0.000 0.000 0.015 0.031 0.047 0.078 0.141 0.297 0.594 1.187 88 | ntt64 l 0.016 0.000 0.000 0.015 0.031 0.063 0.093 0.203 0.391 0.828 1.672 89 | Min_25 l 0.000 0.000 0.000 0.000 0.016 0.015 0.016 0.031 0.047 0.094 0.265 90 | libbf 0.000 0.000 0.000 0.000 0.016 0.031 0.047 0.094 0.203 0.421 0.875 91 | mod = 316227766016779 92 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 93 | flint n 0.000 0.000 0.000 0.000 0.000 0.016 0.047 0.110 0.203 0.391 0.828 94 | flint p 0.000 0.016 0.000 0.016 0.031 0.047 0.125 0.234 0.468 1.016 2.124 95 | ntt64 l 0.000 0.000 0.016 0.016 0.031 0.062 0.094 0.204 0.406 0.813 1.672 96 | Min_25 l 0.000 0.000 0.016 0.000 0.000 0.000 0.015 0.016 0.046 0.078 0.172 97 | libbf 0.000 0.000 0.015 0.000 0.016 0.016 0.047 0.093 0.203 0.437 0.875 98 | ``` 99 | 100 | #### Openmp disabled (option "-fopenmp" removed) 101 | ```cpp 102 | ntt test : data = random, size = 0, n = 1000000, mod = 100019 103 | flint n : 0.406 104 | flint p : 1.172 105 | ntt32 s : 2.968 106 | ntt32 l : 4.531 107 | ntt64 s : 2.500 108 | ntt64 l : 5.015 109 | Min_25 s : 0.172 110 | Min_25 l : 0.344 111 | libbf : 0.812 112 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003 113 | flint n : 1.234 114 | flint p : 2.906 115 | ntt32 l : 9.470 116 | ntt64 l : 10.530 117 | Min_25 l : 0.719 118 | libbf : 1.359 119 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779 120 | flint n : 0.828 121 | flint p : 2.078 122 | ntt64 l : 5.077 123 | Min_25 l : 0.391 124 | libbf : 0.844 125 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003 126 | flint n : 0.406 127 | flint p : 1.250 128 | ntt32 s : 2.970 129 | ntt32 l : 4.531 130 | ntt64 s : 2.469 131 | ntt64 l : 4.952 132 | Min_25 s : 0.156 133 | Min_25 l : 0.328 134 | libbf : 0.813 135 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003 136 | flint n : 1.218 137 | flint p : 2.766 138 | ntt32 l : 9.483 139 | ntt64 l : 10.517 140 | Min_25 l : 0.734 141 | libbf : 1.359 142 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779 143 | flint n : 0.828 144 | flint p : 2.078 145 | ntt64 l : 5.000 146 | Min_25 l : 0.422 147 | libbf : 0.844 148 | 149 | mod = 100019 150 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 151 | flint n 0.000 0.000 0.000 0.016 0.000 0.016 0.015 0.032 0.078 0.219 0.421 152 | flint p 0.000 0.000 0.000 0.016 0.016 0.031 0.062 0.140 0.281 0.578 1.234 153 | ntt32 s 0.000 0.000 0.015 0.016 0.031 0.078 0.141 0.297 0.641 1.390 3.000 154 | ntt32 l 0.000 0.000 0.016 0.031 0.047 0.109 0.219 0.469 0.985 2.093 4.546 155 | ntt64 s 0.000 0.000 0.016 0.000 0.015 0.047 0.125 0.266 0.562 1.188 2.500 156 | ntt64 l 0.000 0.000 0.016 0.031 0.047 0.110 0.250 0.531 1.110 2.343 4.999 157 | Min_25 s 0.000 0.000 0.000 0.000 0.016 0.000 0.000 0.031 0.032 0.062 0.171 158 | Min_25 l 0.000 0.000 0.000 0.000 0.000 0.000 0.015 0.047 0.078 0.172 0.360 159 | libbf 0.000 0.000 0.000 0.015 0.000 0.015 0.047 0.094 0.203 0.406 0.812 160 | mod = 100000000003 161 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 162 | flint n 0.000 0.000 0.000 0.000 0.015 0.016 0.031 0.046 0.125 0.391 0.875 163 | flint p 0.000 0.000 0.015 0.016 0.015 0.047 0.094 0.203 0.422 0.907 1.891 164 | ntt32 l 0.000 0.015 0.016 0.016 0.062 0.109 0.219 0.469 1.000 2.140 4.562 165 | ntt64 l 0.000 0.000 0.000 0.032 0.062 0.125 0.250 0.531 1.125 2.375 5.077 166 | Min_25 l 0.000 0.000 0.000 0.000 0.016 0.016 0.031 0.046 0.093 0.187 0.406 167 | libbf 0.000 0.000 0.000 0.000 0.015 0.031 0.078 0.093 0.203 0.422 0.859 168 | mod = 316227766016779 169 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 170 | flint n 0.000 0.000 0.016 0.000 0.016 0.015 0.047 0.093 0.203 0.391 0.828 171 | flint p 0.000 0.000 0.015 0.000 0.016 0.047 0.109 0.234 0.469 1.015 2.140 172 | ntt64 l 0.000 0.000 0.016 0.031 0.063 0.125 0.265 0.531 1.125 2.406 5.109 173 | Min_25 l 0.000 0.000 0.000 0.000 0.015 0.016 0.031 0.047 0.078 0.188 0.422 174 | libbf 0.000 0.000 0.000 0.016 0.000 0.015 0.047 0.094 0.219 0.437 0.859 175 | ``` 176 | -------------------------------------------------------------------------------- /benchmarks/benchmark_20190918.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | 3 | ## Ntt benchmark 4 | 5 | ### Test Environment: 6 | 7 | * Date: 2019.09.18 8 | * OS: Win10 Pro 1903 (18362.356) 9 | * CPU: i9-9900K (MMX, SSE, SSE2, SSE3, SSE4.1, SSE4.2, EM64T, VT-x, AES, AVX, AVX2, FMA3, TSX) 10 | * Compiler: MinGW-x86_64-8.1.0-win32-seh-rt_v6-rev0 11 | * Msys2: msys2-x86_64-20190524 12 | * Test code: [Ntt test](https://github.com/baihacker/pe/blob/master/test/ntt_test.c) 13 | * Build libbf: 14 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o libbf.avx2.o libbf.c 15 | 16 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o cutils.avx2.o cutils.c 17 | 18 | > gcc-ar crv libbf.avx2.a cutils.avx2.o libbf.avx2.o 19 | * Build test target: 20 | > -o a.exe --std=c++14 -fno-diagnostics-color -O3 -march=native -mtune=native -fopenmp -Wl,--stack,268435456 -static -s -lbf -lgmpxx -lflint -lgmp -lmpfr -lmpir 21 | 22 | ### Test result 23 | 24 | #### Openmp enabled 25 | ```cpp 26 | ntt test : data = random, size = 0, n = 1000000, mod = 100019 27 | flint n : 0.306 28 | flint p : 1.029 29 | ntt32 s : 0.816 30 | ntt32 l : 0.870 31 | ntt64 s : 1.376 32 | ntt64 l : 1.422 33 | Min_25 s : 0.114 34 | Min_25 l : 0.125 35 | libbf : 0.635 36 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003 37 | flint n : 0.919 38 | flint p : 2.633 39 | ntt32 l : 1.743 40 | ntt64 l : 2.857 41 | Min_25 l : 0.255 42 | libbf : 1.084 43 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779 44 | flint n : 0.623 45 | flint p : 1.869 46 | ntt64 l : 1.429 47 | Min_25 l : 0.133 48 | libbf : 0.682 49 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003 50 | flint n : 0.313 51 | flint p : 1.087 52 | ntt32 s : 0.813 53 | ntt32 l : 0.878 54 | ntt64 s : 1.354 55 | ntt64 l : 1.419 56 | Min_25 s : 0.111 57 | Min_25 l : 0.125 58 | libbf : 0.645 59 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003 60 | flint n : 0.922 61 | flint p : 2.502 62 | ntt32 l : 1.743 63 | ntt64 l : 2.839 64 | Min_25 l : 0.253 65 | libbf : 1.088 66 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779 67 | flint n : 0.624 68 | flint p : 1.873 69 | ntt64 l : 1.406 70 | Min_25 l : 0.133 71 | libbf : 0.682 72 | 73 | mod = 100019 74 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 75 | flint n 0.000 0.000 0.001 0.001 0.003 0.006 0.012 0.028 0.059 0.160 0.318 76 | flint p 0.001 0.001 0.003 0.006 0.014 0.028 0.058 0.123 0.253 0.532 1.097 77 | ntt32 s 0.001 0.003 0.003 0.007 0.015 0.032 0.048 0.101 0.201 0.403 0.819 78 | ntt32 l 0.001 0.002 0.004 0.008 0.016 0.032 0.051 0.110 0.223 0.427 0.877 79 | ntt64 s 0.001 0.002 0.006 0.012 0.025 0.055 0.081 0.165 0.338 0.677 1.376 80 | ntt64 l 0.001 0.003 0.006 0.012 0.026 0.055 0.081 0.173 0.353 0.704 1.413 81 | Min_25 s 0.000 0.000 0.001 0.001 0.002 0.002 0.006 0.014 0.027 0.054 0.112 82 | Min_25 l 0.001 0.001 0.000 0.001 0.003 0.006 0.010 0.016 0.030 0.059 0.128 83 | libbf 0.000 0.001 0.002 0.005 0.009 0.022 0.048 0.072 0.149 0.310 0.645 84 | mod = 100000000003 85 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 86 | flint n 0.001 0.000 0.001 0.002 0.005 0.010 0.021 0.045 0.099 0.301 0.645 87 | flint p 0.001 0.002 0.005 0.010 0.021 0.043 0.092 0.190 0.397 0.828 1.727 88 | ntt32 l 0.001 0.002 0.004 0.008 0.017 0.035 0.053 0.111 0.217 0.439 0.869 89 | ntt64 l 0.001 0.003 0.005 0.012 0.027 0.057 0.085 0.174 0.352 0.705 1.431 90 | Min_25 l 0.001 0.001 0.001 0.002 0.004 0.008 0.013 0.020 0.034 0.062 0.133 91 | libbf 0.001 0.001 0.002 0.004 0.010 0.024 0.045 0.077 0.159 0.330 0.682 92 | mod = 316227766016779 93 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 94 | flint n 0.000 0.000 0.001 0.002 0.005 0.012 0.029 0.072 0.152 0.298 0.625 95 | flint p 0.001 0.002 0.006 0.011 0.022 0.049 0.104 0.208 0.430 0.910 1.910 96 | ntt64 l 0.001 0.003 0.006 0.013 0.027 0.056 0.085 0.173 0.350 0.707 1.434 97 | Min_25 l 0.000 0.001 0.001 0.002 0.003 0.008 0.013 0.020 0.034 0.066 0.157 98 | libbf 0.000 0.001 0.002 0.004 0.010 0.024 0.051 0.077 0.159 0.333 0.692 99 | ``` 100 | 101 | #### Openmp disabled (option "-fopenmp" removed) 102 | ```cpp 103 | ntt test : data = random, size = 0, n = 1000000, mod = 100019 104 | flint n : 0.303 105 | flint p : 1.063 106 | ntt32 s : 2.720 107 | ntt32 l : 4.035 108 | ntt64 s : 2.284 109 | ntt64 l : 4.605 110 | Min_25 s : 0.121 111 | Min_25 l : 0.264 112 | libbf : 0.638 113 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003 114 | flint n : 0.932 115 | flint p : 2.630 116 | ntt32 l : 8.624 117 | ntt64 l : 9.764 118 | Min_25 l : 0.617 119 | libbf : 1.083 120 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779 121 | flint n : 0.628 122 | flint p : 1.884 123 | ntt64 l : 4.664 124 | Min_25 l : 0.341 125 | libbf : 0.682 126 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003 127 | flint n : 0.312 128 | flint p : 1.092 129 | ntt32 s : 2.703 130 | ntt32 l : 4.038 131 | ntt64 s : 2.254 132 | ntt64 l : 4.560 133 | Min_25 s : 0.121 134 | Min_25 l : 0.262 135 | libbf : 0.650 136 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003 137 | flint n : 0.929 138 | flint p : 2.527 139 | ntt32 l : 8.570 140 | ntt64 l : 9.727 141 | Min_25 l : 0.648 142 | libbf : 1.095 143 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779 144 | flint n : 0.630 145 | flint p : 1.885 146 | ntt64 l : 4.601 147 | Min_25 l : 0.332 148 | libbf : 0.687 149 | 150 | mod = 100019 151 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 152 | flint n 0.000 0.001 0.000 0.001 0.002 0.005 0.011 0.028 0.060 0.159 0.317 153 | flint p 0.001 0.001 0.003 0.006 0.014 0.029 0.060 0.123 0.251 0.534 1.094 154 | ntt32 s 0.001 0.003 0.006 0.014 0.030 0.064 0.137 0.288 0.607 1.287 2.722 155 | ntt32 l 0.002 0.004 0.009 0.022 0.045 0.096 0.204 0.430 0.901 1.910 4.044 156 | ntt64 s 0.001 0.002 0.006 0.011 0.025 0.053 0.113 0.244 0.516 1.082 2.438 157 | ntt64 l 0.003 0.005 0.012 0.024 0.054 0.115 0.247 0.525 1.096 2.230 4.652 158 | Min_25 s 0.000 0.000 0.001 0.001 0.001 0.003 0.007 0.014 0.029 0.058 0.132 159 | Min_25 l 0.000 0.001 0.000 0.001 0.003 0.006 0.014 0.029 0.061 0.131 0.267 160 | libbf 0.001 0.000 0.002 0.004 0.009 0.019 0.044 0.073 0.150 0.311 0.649 161 | mod = 100000000003 162 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 163 | flint n 0.001 0.000 0.001 0.001 0.005 0.009 0.021 0.046 0.098 0.302 0.651 164 | flint p 0.001 0.002 0.005 0.010 0.022 0.045 0.092 0.190 0.400 -0.662 1.775 165 | ntt32 l 0.002 0.005 0.010 0.021 0.046 0.097 0.206 0.438 0.915 2.034 4.193 166 | ntt64 l 0.002 0.005 0.011 0.025 0.052 0.118 0.247 0.500 1.050 2.217 4.675 167 | Min_25 l 0.000 0.000 0.001 0.001 0.003 0.009 0.018 0.039 0.076 0.158 0.328 168 | libbf 0.001 0.001 0.002 0.005 0.009 0.021 0.047 0.077 0.159 0.328 0.688 169 | mod = 316227766016779 170 | log2(n) 10 11 12 13 14 15 16 17 18 19 20 171 | flint n 0.000 0.001 0.001 0.002 0.005 0.012 0.029 0.072 0.153 0.299 0.630 172 | flint p 0.002 0.003 0.005 0.011 0.023 0.048 0.105 0.213 0.437 0.933 1.912 173 | ntt64 l 0.002 0.005 0.012 0.024 0.052 0.110 0.237 0.499 1.054 2.224 4.694 174 | Min_25 l 0.001 0.001 0.001 0.002 0.004 0.009 0.018 0.038 0.079 0.166 0.344 175 | libbf 0.001 0.001 0.002 0.004 0.010 0.023 0.047 0.076 0.159 0.335 0.699 176 | ``` 177 | -------------------------------------------------------------------------------- /benchmarks/format_test_result.py: -------------------------------------------------------------------------------- 1 | #! python 2 | #-*- coding: utf8 -*- 3 | import os 4 | import pprint 5 | 6 | 7 | def load_db_data(file): 8 | file = file.decode(encoding='utf8', errors='ignore').encode(encoding='gbk', 9 | errors='ignore') 10 | if os.path.exists(file): 11 | with open(file, 'rb') as tempf: 12 | result = tempf.read() 13 | return True, result 14 | return False, '' 15 | 16 | 17 | def load_db(): 18 | data = load_db_data('perf_test_result.txt')[1] 19 | return eval(data) 20 | 21 | 22 | class MyPrettyPrinter(pprint.PrettyPrinter): 23 | 24 | def format(self, object, context, maxlevels, level): 25 | return pprint.PrettyPrinter.format(self, object, context, maxlevels, level) 26 | 27 | 28 | def export(db, file): 29 | with open(file, 'w') as tempf: 30 | pp = MyPrettyPrinter(stream=tempf) 31 | pp.pprint(db) 32 | #tempf.write(str(db)) 33 | 34 | 35 | if __name__ == '__main__': 36 | db = load_db() 37 | #export(db, "formated.txt") -------------------------------------------------------------------------------- /build_all.bat: -------------------------------------------------------------------------------- 1 | bazel clean && bazel build //test:all //example:all //:all -------------------------------------------------------------------------------- /example/BUILD: -------------------------------------------------------------------------------- 1 | load("//toolchain:pe_toolchain.bzl", "pe_binary") 2 | 3 | package( 4 | default_visibility = [ 5 | "//visibility:public", 6 | ], 7 | ) 8 | 9 | [pe_binary(name = x[:-2], srcs = [x]) for x in glob(["*.c"])] 10 | [pe_binary(name = x[:-2] + "_gcc", 11 | srcs = [x], 12 | executable_suffix = ".out", 13 | enable_pe_flags = False, 14 | copts = [ 15 | "-std=c++17", 16 | "-Wno-delete-incomplete", 17 | "-Wno-shift-count-overflow", 18 | "-O2", 19 | "-march=native", 20 | "-mtune=native", 21 | "-fopenmp"], 22 | defines = [ 23 | "ENABLE_ASSERT=0", 24 | "TRY_TO_USE_INT128=1", 25 | "ENABLE_OPENMP=1", 26 | "ENABLE_EIGEN=0", 27 | "ENABLE_GMP=1", 28 | "ENABLE_FLINT=1", 29 | "ENABLE_MPFR=1", 30 | "ENABLE_NTL=1", 31 | "ENABLE_ZMQ=0", 32 | "ENABLE_LIBBF=0", 33 | "ENABLE_PRIME_COUNT=0", 34 | "ENABLE_PRIME_SIEVE=0", 35 | "TEST_ALL", 36 | "CONTINUOUS_INTEGRATION_TEST", 37 | "NO_SUPER_TEST"], 38 | linkopts = [ 39 | "-fopenmp", 40 | "-lflint", 41 | "-lmpfr", 42 | "-lntl", 43 | "-lgmp"], 44 | cc_path = "g++", 45 | ) for x in glob(["*.c"])] 46 | 47 | filegroup(name ="gcc_builds", srcs = [x[:-2] + "_gcc" for x in glob(["*.c"])]) -------------------------------------------------------------------------------- /example/bi_example_pe483.c: -------------------------------------------------------------------------------- 1 | 2 | // Project Euler 483 Repeated permutation 3 | // reference answer: 4 | // N = 100: 53817203945.52453 5 | // output: 53817203945 6 | // N = 150: 55335570173801.14 7 | // output: 55335570173801 8 | #include 9 | using namespace pe; 10 | const int N = 100; 11 | BigInteger choose[505][505]; 12 | BigInteger fac[501]; 13 | void init() { 14 | for (int i = 0; i <= 500; ++i) 15 | for (int j = 0; j <= i; ++j) 16 | choose[i][j] = 17 | (j == i || j == 0) ? 1 : choose[i - 1][j] + choose[i - 1][j - 1]; 18 | fac[0] = 1; 19 | for (int i = 1; i <= 500; ++i) fac[i] = fac[i - 1] * i; 20 | } 21 | BigInteger dp[N + 1]; 22 | std::map orz[N + 1]; 23 | int main() { 24 | init(); 25 | dp[0] = 1; 26 | orz[0][1] = 1; 27 | for (int i = 1; i <= N; ++i) { 28 | std::cerr << i << " "; 29 | for (int j = N; j >= i; --j) { 30 | BigInteger total = 0; 31 | std::map inc; 32 | const int curr_step = i; 33 | const int n = j; 34 | for (int x = 1; x * curr_step <= n; ++x) { 35 | BigInteger t = 1, u = 1; 36 | for (int i = 0, j = n; i < x; ++i) { 37 | t = t * choose[j][curr_step]; 38 | j -= curr_step; 39 | u = u * fac[curr_step - 1]; 40 | } 41 | BigInteger magic = t / fac[x] * u; 42 | total += magic * dp[n - curr_step * x]; 43 | foreach (it, orz[n - x * curr_step]) { 44 | int128 d = Gcd((int128)curr_step, it.first); 45 | int128 now = curr_step / d * it.first; 46 | inc[now] += it.second * magic; 47 | } 48 | } 49 | dp[n] += total; 50 | foreach (it, inc) orz[n][it.first] += it.second; 51 | } 52 | std::cerr << dp[N] << std::endl; 53 | } 54 | 55 | std::cerr << dp[N] << std::endl; 56 | BigInteger s = 0; 57 | foreach (it, orz[N]) 58 | s += BigInteger(it.first) * BigInteger(it.first) * it.second; 59 | std::cerr << s / dp[N] << std::endl; 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /example/billion_sort.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | #if OS_TYPE_WIN 5 | const int64 N = 1000000000; 6 | LargeMemory lm; 7 | 8 | int main() { 9 | float* data = reinterpret_cast(lm.Allocate(N * sizeof(float))); 10 | dbg("memory ready"); 11 | 12 | for (int i = 0; i < N; ++i) data[i] = 1. * rand() / RAND_MAX; 13 | dbg("data ready"); 14 | 15 | TimeRecorder tr; 16 | ParallelSort<30>(data, data + N); 17 | // std::sort(data, data+N); 18 | dbg("sorted"); 19 | 20 | std::cerr << tr.Elapsed().Format() << std::endl; 21 | return 0; 22 | } 23 | #else 24 | int main() { return 0; } 25 | #endif -------------------------------------------------------------------------------- /example/bit.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | using namespace std; 4 | 5 | int main() { 6 | TableFormatter tf; 7 | auto& line = tf.AppendLine(); 8 | line.push_back("n"); 9 | line.push_back("BitWidth"); 10 | line.push_back("BitFloor"); 11 | line.push_back("BitCeil"); 12 | line.push_back("HighestBitIndex"); 13 | line.push_back("HighestBit"); 14 | line.push_back("LowestBitIndex"); 15 | line.push_back("LowestBit"); 16 | for (int n = 0; n < 32; ++n) { 17 | auto& line = tf.AppendLine(); 18 | line.push_back(ToString(n)); 19 | line.push_back(ToString(BitWidth(n))); 20 | line.push_back(ToString(BitFloor(n))); 21 | line.push_back(ToString(BitCeil(n))); 22 | line.push_back(ToString(HighestBitIndex(n))); 23 | line.push_back(ToString(HighestBit(n))); 24 | line.push_back(ToString(LowestBitIndex(n))); 25 | line.push_back(ToString(LowestBit(n))); 26 | } 27 | tf.Render(std::cout); 28 | return 0; 29 | } 30 | 31 | /* 32 | n BitWidth BitFloor BitCeil HighestBitIndex HighestBit LowestBitIndex LowestBit 33 | 0 0 0 1 -1 0 -1 0 34 | 1 1 1 1 0 1 0 1 35 | 2 2 2 2 1 2 1 2 36 | 3 2 2 4 1 2 0 1 37 | 4 3 4 4 2 4 2 4 38 | 5 3 4 8 2 4 0 1 39 | 6 3 4 8 2 4 1 2 40 | 7 3 4 8 2 4 0 1 41 | 8 4 8 8 3 8 3 8 42 | 9 4 8 16 3 8 0 1 43 | 10 4 8 16 3 8 1 2 44 | 11 4 8 16 3 8 0 1 45 | 12 4 8 16 3 8 2 4 46 | 13 4 8 16 3 8 0 1 47 | 14 4 8 16 3 8 1 2 48 | 15 4 8 16 3 8 0 1 49 | 16 5 16 16 4 16 4 16 50 | 17 5 16 32 4 16 0 1 51 | 18 5 16 32 4 16 1 2 52 | 19 5 16 32 4 16 0 1 53 | 20 5 16 32 4 16 2 4 54 | 21 5 16 32 4 16 0 1 55 | 22 5 16 32 4 16 1 2 56 | 23 5 16 32 4 16 0 1 57 | 24 5 16 32 4 16 3 8 58 | 25 5 16 32 4 16 0 1 59 | 26 5 16 32 4 16 1 2 60 | 27 5 16 32 4 16 0 1 61 | 28 5 16 32 4 16 2 4 62 | 29 5 16 32 4 16 0 1 63 | 30 5 16 32 4 16 1 2 64 | 31 5 16 32 4 16 0 1 65 | */ -------------------------------------------------------------------------------- /example/build_each.bat: -------------------------------------------------------------------------------- 1 | build_each.py 2 | pause -------------------------------------------------------------------------------- /example/build_each.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # -*- coding: UTF-8 -*- 3 | import os 4 | import sys 5 | import time 6 | 7 | CURRENT_DIRECTORY = os.getcwd() 8 | 9 | 10 | def DurationPartsFromNs(duration): 11 | min_part = duration // 1000000000 // 60 12 | sec_part = duration // 1000000000 % 60 13 | millisec_part = duration // 1000000 % 1000 14 | return (min_part, sec_part, millisec_part) 15 | 16 | 17 | def FormatNs(duration): 18 | return '%d:%02d.%03d' % DurationPartsFromNs(duration) 19 | 20 | 21 | def main(): 22 | ret = 0 23 | for file in os.listdir(CURRENT_DIRECTORY): 24 | _, file_ext_name = os.path.splitext(file) 25 | if file_ext_name != '.c': 26 | continue 27 | print('Compile %s' % file) 28 | start_time = time.perf_counter_ns() 29 | ret = os.system('pe++.py %s -hc' % file) 30 | time_usage = FormatNs(time.perf_counter_ns() - start_time) 31 | print('Done, return code = %d, time usage = %s' % (ret, time_usage)) 32 | print() 33 | if ret != 0: 34 | print('Failed to compile %s' % file) 35 | break 36 | if os.path.exists('a.exe'): 37 | os.remove('a.exe') 38 | return ret 39 | 40 | 41 | if __name__ == '__main__': 42 | sys.exit(main()) 43 | -------------------------------------------------------------------------------- /example/continued_fraction.c: -------------------------------------------------------------------------------- 1 | #include "pe.hpp" 2 | using namespace pe; 3 | 4 | template 5 | void demo() { 6 | std::vector data = {1, 2, 2, 2, 2, 2, 2, 2, 2, 2}; 7 | for (int i = 0; i < 10; ++i) { 8 | std::cout << FromCf(data, i) << std::endl; 9 | } 10 | std::cout << FromCfN(data) << std::endl; 11 | for (int i = 50; i <= 50; ++i) { 12 | std::cout << i << " " << ToCf(0, 1, 6, 1, i) << std::endl; 13 | } 14 | 15 | std::cout << ToCf(0, 1, 2, 1, 10) << std::endl; 16 | std::cout << FromCf(ToCf(0, 1, 2, 1, 50)) << std::endl; 17 | 18 | std::cout << ToCf(123456, 654321) << std::endl; 19 | std::cout << FromCf(ToCf(123456, 654321)) << std::endl; 20 | } 21 | 22 | int main() { 23 | demo(); 24 | #if ENABLE_GMP 25 | demo(); 26 | #endif 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /example/count_carlitz_words.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | // Given the number of each letter, find the number of words without adjacent 5 | // letter of the same 6 | 7 | int brute_force(const std::vector& vec) { 8 | int size = 0; 9 | for (auto& iter : vec) size += iter; 10 | 11 | std::vector data; 12 | int id = 0; 13 | for (auto iter : vec) { 14 | for (int j = 0; j < iter; ++j) data.push_back(id); 15 | ++id; 16 | } 17 | 18 | int ret = 0; 19 | do { 20 | int ok = 1; 21 | for (int i = 0; i < size - 1; ++i) { 22 | int idx = (i + 1) % size; 23 | if (i != idx && data[i] == data[idx]) { 24 | ok = 0; 25 | break; 26 | } 27 | } 28 | ret += ok; 29 | } while (next_permutation(data.begin(), data.end())); 30 | return ret; 31 | } 32 | 33 | int main() { 34 | CarlitzWordsCounter counter(1000000007, 1000000); 35 | 36 | std::vector> test_data = { 37 | {1}, {2}, {1, 1}, {1, 3}, {2, 2}, {3, 7}, 38 | {2, 3}, {2, 2, 2}, {2, 3, 3}, {3, 3, 2, 2}, {3, 3, 3}, {2, 2, 2, 2, 2}, 39 | {4, 4}, {4, 4, 2, 2}, {5, 5, 5}, 40 | }; 41 | 42 | for (const std::vector& iter : test_data) { 43 | std::cout << brute_force(iter) << " " << counter.Cal(iter) << std::endl; 44 | } 45 | return 0; 46 | } -------------------------------------------------------------------------------- /example/dfa_counter.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | // Count the number which contains 3. 5 | 6 | int64 cal(int64 n) { 7 | for (; n; n /= 10) 8 | if (n % 10 == 3) return 1; 9 | return 0; 10 | } 11 | 12 | int64 bf(int64 n) { return XRange(1LL, n).Map(cal).Sum(); } 13 | 14 | int main() { 15 | PE_INIT(maxp = 1000000); 16 | 17 | // state 0: initial state 18 | // state 1: 3 is not seen 19 | // state 2: 3 is seen 20 | DfaCounter counter1; 21 | counter1.Init(3, 10, 16); 22 | for (int i = 1; i <= 9; ++i) 23 | if (i != 3) counter1.AddTrans(0, i, 1); 24 | counter1.AddTrans(0, 3, 2); 25 | counter1.AddTrans(0, 0, 0); 26 | for (int i = 0; i <= 9; ++i) 27 | if (i != 3) counter1.AddTrans(1, i, 1); 28 | counter1.AddTrans(1, 3, 2); 29 | for (int i = 0; i <= 9; ++i) counter1.AddTrans(2, i, 2); 30 | counter1.MarkTargetState(2); 31 | 32 | // In counter2, the dfa doesn't accept leading zeros, in other words, if it's 33 | // initial state and 0 comes, the targe state is invalid. So we need to call 34 | // set_count_each_len(1). 35 | // 36 | // state 0: initial state 37 | // state 1: 3 is not seen 38 | // state 2: 3 is seen 39 | // state 3: invalid 40 | DfaCounter counter2; 41 | counter2.Init(4, 10, 16); 42 | for (int i = 1; i <= 9; ++i) 43 | if (i != 3) counter2.AddTrans(0, i, 1); 44 | counter2.AddTrans(0, 3, 2); 45 | for (int i = 0; i <= 9; ++i) 46 | if (i != 3) counter2.AddTrans(1, i, 1); 47 | counter2.AddTrans(1, 3, 2); 48 | for (int i = 0; i <= 9; ++i) counter2.AddTrans(2, i, 2); 49 | counter2.MarkTargetState(2); 50 | counter2.AddTrans(0, 0, 3); 51 | for (int i = 0; i <= 9; ++i) counter2.AddTrans(3, i, 3); 52 | counter2.set_count_each_len(1); 53 | 54 | for (int64 n = 10; n <= 1000000; n *= 10) { 55 | auto a = bf(n); 56 | auto b = counter1.Cal(n); 57 | auto c = counter2.Cal(n); 58 | std::cout << n << "\t" << a << "\t" << b << "\t" << c << std::endl; 59 | } 60 | return 0; 61 | } -------------------------------------------------------------------------------- /example/dfa_summer.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | // Compute the 3rd power of the number which contains 3. 5 | const int64 mod = 1000000007; 6 | using MT = NMod64; 7 | 8 | int64 cal(int64 n) { 9 | const int64 m = n; 10 | for (; n; n /= 10) 11 | if (n % 10 == 3) return m * m % mod * m % mod; 12 | return 0; 13 | } 14 | 15 | int64 bf(int64 n) { return XRange(1LL, n).Map(cal).SumMod(mod); } 16 | 17 | int main() { 18 | PE_INIT(maxp = 1000000); 19 | 20 | // state 0: initial state 21 | // state 1: 3 is not seen 22 | // state 2: 3 is seen 23 | DfaSummer summer1; 24 | summer1.Init(3, 3, 10, 16); 25 | for (int i = 1; i <= 9; ++i) 26 | if (i != 3) summer1.AddTrans(0, i, 1); 27 | summer1.AddTrans(0, 3, 2); 28 | summer1.AddTrans(0, 0, 0); 29 | for (int i = 0; i <= 9; ++i) 30 | if (i != 3) summer1.AddTrans(1, i, 1); 31 | summer1.AddTrans(1, 3, 2); 32 | for (int i = 0; i <= 9; ++i) summer1.AddTrans(2, i, 2); 33 | summer1.MarkTargetState(2); 34 | 35 | // In summer2, the dfa doesn't accept leading zeros, in other words, if it's 36 | // initial state and 0 comes, the targe state is invalid. So we need to call 37 | // set_count_each_len(1). 38 | // 39 | // state 0: initial state 40 | // state 1: 3 is not seen 41 | // state 2: 3 is seen 42 | // state 3: invalid 43 | DfaSummer summer2; 44 | summer2.Init(4, 3, 10, 16); 45 | for (int i = 1; i <= 9; ++i) 46 | if (i != 3) summer2.AddTrans(0, i, 1); 47 | summer2.AddTrans(0, 3, 2); 48 | for (int i = 0; i <= 9; ++i) 49 | if (i != 3) summer2.AddTrans(1, i, 1); 50 | summer2.AddTrans(1, 3, 2); 51 | for (int i = 0; i <= 9; ++i) summer2.AddTrans(2, i, 2); 52 | summer2.MarkTargetState(2); 53 | summer2.AddTrans(0, 0, 3); 54 | for (int i = 0; i <= 9; ++i) summer2.AddTrans(3, i, 3); 55 | summer2.set_count_each_len(1); 56 | 57 | for (int64 n = 10; n <= 1000000; n *= 10) { 58 | auto a = bf(n); 59 | auto b = summer1.Cal(n); 60 | auto c = summer2.Cal(n); 61 | std::cout << n << "\t" << a << "\t" << b << "\t" << c << std::endl; 62 | } 63 | return 0; 64 | } -------------------------------------------------------------------------------- /example/dva.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace std; 3 | using namespace pe; 4 | 5 | const int64 mod = 1000000007; 6 | using MT = NMod64; 7 | 8 | // https://en.wikipedia.org/wiki/Arithmetic_function#Relations_among_the_functions 9 | void ConvolutionExamples() { 10 | const int64 n = 10000000; 11 | 12 | auto eps = MakePrefixSumEpsilon(n); 13 | auto one = MakePrefixSumOne(n); 14 | auto mu = MakePrefixSumMu(n); 15 | auto id = MakePrefixSumId(n); 16 | auto phi = MakePrefixSumPhi(n); 17 | { 18 | // eps = one * mu 19 | auto t = DVAConv(one, mu); 20 | for (int i = 1; i < t.key_size; ++i) { 21 | PE_ASSERT(t.values[i] == eps.values[i]); 22 | // cout << t.values[i] << " " << eps.values[i] << endl; 23 | // cout << (t.values[i] == eps.values[i]) << endl; 24 | } 25 | } 26 | { 27 | // phi = mu * id 28 | auto t = DVAConv(mu, id); 29 | for (int i = 1; i < t.key_size; ++i) { 30 | PE_ASSERT(t.values[i] == phi.values[i]); 31 | } 32 | } 33 | { 34 | // id = one * phi 35 | auto t = DVAConv(one, phi); 36 | for (int i = 1; i < t.key_size; ++i) { 37 | PE_ASSERT(t.values[i] == id.values[i]); 38 | } 39 | } 40 | 41 | // Divisor count 42 | DVA d0(n); 43 | { 44 | for (int i = 1; i <= n; ++i) { 45 | int64 me = 1; 46 | for (auto iter : Factorize(i)) me *= iter.second + 1; 47 | d0[i] += me; 48 | } 49 | for (int i = 1; i < d0.key_size; ++i) d0.values[i] += d0.values[i - 1]; 50 | } 51 | { 52 | // d0 = one * one 53 | // d0 = id0 * one 54 | auto t = DVAConv(one, one); 55 | for (int i = 1; i < t.key_size; ++i) { 56 | PE_ASSERT(t.values[i] == d0.values[i]); 57 | } 58 | } 59 | 60 | // Divisor sum 61 | DVA d1(n); 62 | { 63 | for (int i = 1; i <= n; ++i) { 64 | int64 me = 0; 65 | for (auto iter : GetFactors(i)) me += iter; 66 | d1[i] += me; 67 | } 68 | for (int i = 1; i < d1.key_size; ++i) d1.values[i] += d1.values[i - 1]; 69 | } 70 | { 71 | // d1 = id * one 72 | // d1 = id1 * one 73 | auto t = DVAConv(id, one); 74 | for (int i = 1; i < t.key_size; ++i) { 75 | PE_ASSERT(t.values[i] == d1.values[i]); 76 | } 77 | } 78 | 79 | // Divisor square sum 80 | DVA d2(n); 81 | { 82 | for (int i = 1; i <= n; ++i) { 83 | int64 me = 0; 84 | for (auto iter : GetFactors(i)) me += iter * iter; 85 | d2[i] += me; 86 | } 87 | for (int i = 1; i < d2.key_size; ++i) d2.values[i] += d2.values[i - 1]; 88 | } 89 | DVA id2(n); 90 | { 91 | for (int i = 1; i < id2.key_size; ++i) { 92 | id2.values[i] = P2SumMod(id2.keys[i], mod); 93 | } 94 | } 95 | { 96 | // d2 = id2 * one 97 | auto t = DVAConv(id2, one); 98 | for (int i = 1; i < t.key_size; ++i) { 99 | PE_ASSERT(t.values[i] == d2.values[i]); 100 | } 101 | } 102 | 103 | // Prime omega 104 | // https://en.wikipedia.org/wiki/Prime_omega_function 105 | DVA omega(n); 106 | { 107 | for (int i = 1; i <= n; ++i) { 108 | omega[i] += Factorize(i).size(); 109 | } 110 | for (int i = 1; i < omega.key_size; ++i) 111 | omega.values[i] += omega.values[i - 1]; 112 | } 113 | { 114 | // omega = primeq * one 115 | auto t = DVAConv(PrimeS0(n), one); 116 | for (int i = 1; i < t.key_size; ++i) { 117 | PE_ASSERT(t.values[i] == omega.values[i]); 118 | } 119 | } 120 | } 121 | 122 | int main() { 123 | PE_INIT(maxp = 10000000, cal_phi = 1, cal_mu = 1); 124 | ConvolutionExamples(); 125 | return 0; 126 | } 127 | -------------------------------------------------------------------------------- /example/linear_recurrence.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int64 mod = 1000000007; 5 | 6 | int64 solve_nth(int64 n) { 7 | auto ans = MatrixPowerMod( 8 | [=](auto& m, auto& v) { 9 | m(0, 0) = 1; 10 | m(0, 1) = 1; 11 | m(1, 0) = 1; 12 | v[0] = 1; 13 | v[1] = 0; 14 | }, 15 | 2, n); 16 | return ans[1]; 17 | } 18 | 19 | int64 solve_nth_sum(int64 n) { 20 | auto ans = MatrixPowerMod( 21 | [=](auto& m, auto& v) { 22 | m(0, 0) = 1; 23 | m(0, 1) = 1; 24 | m(1, 0) = 1; 25 | m(2, 0) = 1; 26 | m(2, 2) = 1; 27 | v[0] = 1; 28 | v[1] = 0; 29 | v[2] = 0; 30 | }, 31 | 3, n); 32 | return ans[2]; 33 | } 34 | 35 | int main() { 36 | const std::vector init = {0LL, 1LL, 1LL, 2LL, 3LL, 5LL, 8LL}; 37 | 38 | for (int64 n = 1; n <= 1000000000; n *= 10) { 39 | // Use Berlekamp Massey algorithm to find the recurrence. 40 | // The minimal initial element count is: 2 * order + 1 41 | const int64 ans0 = *FindLinearRecurrenceValueAt(init, n, mod); 42 | // Compute the nth element assuming the recurrence is known. 43 | const int64 ans1 = 44 | LinearRecurrenceValueAt({mod - 1, mod - 1, 1}, init, n, mod); 45 | // Use matric multiplication to compute the nth element. 46 | const int64 ans2 = solve_nth(n); 47 | std::cout << "n = " << n << std::endl; 48 | std::cout << "Ans0 = " << ans0 << std::endl; 49 | std::cout << "Ans1 = " << ans1 << std::endl; 50 | std::cout << "Ans2 = " << ans2 << std::endl; 51 | std::cout << std::endl; 52 | } 53 | 54 | for (int64 n = 1; n <= 1000000000; n *= 10) { 55 | // The minimal initial element count is: 2 * (order + 1) + 1 56 | const int64 ans1 = 57 | LinearRecurrenceSumAt({mod - 1, mod - 1, 1}, init, n, mod); 58 | const int64 ans2 = solve_nth_sum(n); 59 | std::cout << "n = " << n << std::endl; 60 | std::cout << "Ans1 = " << ans1 << std::endl; 61 | std::cout << "Ans2 = " << ans2 << std::endl; 62 | std::cout << std::endl; 63 | } 64 | return 0; 65 | } -------------------------------------------------------------------------------- /example/matrix_power.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int64 mod = 316227766016779; 5 | using MT = NMod64; 6 | 7 | // Computes 8 | // |1 1|^n * |1| 9 | // |1 0| |0| 10 | 11 | // Mod is specified at compiling time. 12 | // The element type is choosed internally. 13 | int64 solve0(int64 n) { 14 | auto ans = MatrixPowerMod( 15 | [=](auto& m, auto& v) { 16 | m(0, 0) = 1; 17 | m(0, 1) = 1; 18 | m(1, 0) = 1; 19 | v[0] = 1; 20 | v[1] = 0; 21 | }, 22 | 2, n); 23 | return ans[0]; 24 | } 25 | 26 | // Mod is associated with T at compiling time. 27 | int64 solve1(int64 n) { 28 | auto ans = MatrixPowerMod( 29 | [=](auto& m, auto& v) { 30 | m(0, 0) = 1; 31 | m(0, 1) = 1; 32 | m(1, 0) = 1; 33 | v[0] = 1; 34 | v[1] = 0; 35 | }, 36 | 2, n); 37 | return ans[0].value(); 38 | } 39 | 40 | #if ENABLE_EIGEN 41 | // Mod is associated with T at runtime. 42 | // Different threads use different mod. 43 | int64 solve2(int64 n, int64 rmod) { 44 | SetEigenNbThreads(1); 45 | TLMod64::Set(rmod); 46 | auto ans = MatrixPowerMod>( 47 | [=](auto& m, auto& v) { 48 | m(0, 0) = 1; 49 | m(0, 1) = 1; 50 | m(1, 0) = 1; 51 | v[0] = 1; 52 | v[1] = 0; 53 | }, 54 | 2, n); 55 | SetEigenNbThreads(0); 56 | return ans[0].value(); 57 | } 58 | #endif 59 | 60 | // Mod is associated with T at runtime. 61 | // All the threads use the same mod. 62 | int64 solve3(int64 n, int64 rmod) { 63 | using T = NModNumber; 64 | DefaultMod::Set(rmod); 65 | auto ans = MatrixPowerMod( 66 | [=](auto& m, auto& v) { 67 | m(0, 0) = 1; 68 | m(0, 1) = 1; 69 | m(1, 0) = 1; 70 | v[0] = 1; 71 | v[1] = 0; 72 | }, 73 | 2, n); 74 | return ans[0].value(); 75 | } 76 | 77 | // Mod is specified at runtime. 78 | // If int128 is available, use int128 as element type, different threads use 79 | // different mod. 80 | // Otherwise, use DefaultMod, all the threads use the same mod, and this is same 81 | // as solve3 and the difference is solve3 needs to set the default mod 82 | // explicitly and solve4 will set it automatically. 83 | int64 solve4(int64 n, int64 mod) { 84 | auto ans = MatrixPowerMod( 85 | [=](auto& m, auto& v) { 86 | m(0, 0) = 1; 87 | m(0, 1) = 1; 88 | m(1, 0) = 1; 89 | v[0] = 1; 90 | v[1] = 0; 91 | }, 92 | 2, n, mod); 93 | return ans[0]; 94 | } 95 | 96 | int main() { 97 | PE_INIT(maxp = 200000000); 98 | 99 | for (int64 n = 1; n <= 1000000000; n *= 10) { 100 | std::cout << "n = " << n << std::endl; 101 | int64 ans0 = solve0(n); 102 | int64 ans1 = solve1(n); 103 | #if ENABLE_EIGEN 104 | int64 ans2 = solve2(n, mod); 105 | #endif 106 | int64 ans3 = solve3(n, mod); 107 | int64 ans4 = solve4(n, mod); 108 | std::cout << "Ans0 = " << ans0 << std::endl; 109 | std::cout << "Ans1 = " << ans1 << std::endl; 110 | #if ENABLE_EIGEN 111 | std::cout << "Ans2 = " << ans2 << std::endl; 112 | #endif 113 | std::cout << "Ans3 = " << ans3 << std::endl; 114 | std::cout << "Ans4 = " << ans4 << std::endl; 115 | std::cout << std::endl; 116 | } 117 | 118 | for (int64 rmod = mod; rmod <= mod + 100; ++rmod) 119 | if (IsPrime(rmod)) { 120 | const int64 n = 1000000000; 121 | std::cout << "n = " << n << std::endl; 122 | std::cout << "rmod = " << rmod << std::endl; 123 | #if ENABLE_EIGEN 124 | int64 ans2 = solve2(n, rmod); 125 | #endif 126 | int64 ans3 = solve3(n, rmod); 127 | int64 ans4 = solve4(n, rmod); 128 | #if ENABLE_EIGEN 129 | std::cout << "Ans2 = " << ans2 << std::endl; 130 | #endif 131 | std::cout << "Ans3 = " << ans3 << std::endl; 132 | std::cout << "Ans4 = " << ans4 << std::endl; 133 | std::cout << std::endl; 134 | } 135 | 136 | return 0; 137 | } -------------------------------------------------------------------------------- /example/mma_find_recurrence.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | // http://oeis.org/A001499 5 | // Oeis gives two recurrence formula: 6 | // 2 a[n] = 2 n (n-1) a[n-1] + n (n-1)^2 a[n-2] 7 | // 2 a[n] = n (n-1)^2 ((2 n - 3) a[n-2] + (n-2)^2 a[n-3]) 8 | // 9 | // This example uses some sample data to guess the formula. 10 | // Note: the result may not be unique. 11 | 12 | int main() { 13 | mma::FRHelper helper; 14 | helper 15 | .set_values({1, 0, 1, 6, 90, 2040, 67950, 3110940, 187530840, 14398171200, 16 | 1371785398200}) 17 | // In most cases we can use all the available check points, but 18 | // sometimes it has different recurrence according to n. For example, when 19 | // n is odd, it has a recurrence formula. When n is even, it has a 20 | // different one. 21 | .set_check_points({3, 5, 7}) 22 | .set_offsets({1, 2}) // Tune the offset manually 23 | .set_max_degree(5) // Tune the polynomial degree manually 24 | .set_max_abs_coe(4) // Tune the coefficient manually 25 | .set_leading(2); // Tune the leading manually 26 | 27 | std::cout << helper << std::endl << std::endl; 28 | 29 | // The output of the generated mathematica command is 30 | const std::string result = 31 | "{{x1p0 -> 0, x1p1 -> -2, x1p2 -> 2, x1p3 -> 0, x1p4 -> 0, x1p5 " 32 | "-> 0, x2p0 -> 0, x2p1 -> 1, x2p2 -> -2, x2p3 -> 1, x2p4 -> 0, " 33 | "x2p5 -> 0}}"; 34 | 35 | // Validate the result by sample data. 36 | helper.Validate(result); 37 | 38 | // Validate the result by more data. 39 | std::vector dp0 = {1, 0, 1, 6, 90, 2040, 67950}; 40 | for (int64 i = 7; i < 100; ++i) { 41 | dp0.push_back((2 * i * (i - 1) * dp0[i - 1] + i * sq(i - 1) * dp0[i - 2]) / 42 | 2); 43 | } 44 | 45 | std::vector dp1 = {1, 0, 1, 6, 90, 2040, 67950}; 46 | for (int64 i = 7; i < 100; ++i) { 47 | dp1.push_back(i * sq(i - 1) * 48 | ((2 * i - 3) * dp1[i - 2] + sq(i - 2) * dp1[i - 3]) / 2); 49 | } 50 | 51 | helper.Validate(dp0, result); 52 | helper.Validate(dp1, result); 53 | 54 | return 0; 55 | } -------------------------------------------------------------------------------- /example/mma_interpolating_polynomial.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | int64 f(int64 x) { return 1 + 2 * x + 3 * x * x + 4 * x * x * x; } 5 | 6 | int64 g(int64 x, int64 y) { 7 | return 1 + 2 * x + 3 * y + 4 * x * y + 5 * x * x + 6 * y * y; 8 | } 9 | 10 | int main() { 11 | PE_INIT(maxp = 2000000); 12 | { 13 | mma::IntPoly ip; 14 | ip.Reset({"x"}); // one variable x. 15 | for (int64 x = 0; x <= 10; ++x) { 16 | ip.Add({x}, f(x)); 17 | } 18 | std::cout << ip << std::endl; 19 | // output: 20 | // InterpolatingPolynomial[{{0, 1}, {1, 10}, {2, 49}, {3, 142}, {4, 313}, 21 | // {5, 586}, {6, 985}, {7, 1534}, {8, 2257}, {9, 3178}, {10, 4321}}, x] 22 | // Simplified mma output: 23 | // 1 + 2 x + 3 x^2 + 4 x^3 24 | } 25 | { 26 | mma::IntPoly2D ip(2, // x's degree 27 | 2, // y's degree 28 | 2 // max a + b of terms in the form of x^a y^b 29 | ); 30 | for (int64 x = 0; x <= 4; ++x) 31 | for (int64 y = 0; y <= 4; ++y) { 32 | ip.Add({x, y}, g(x, y)); 33 | } 34 | std::cout << ip << std::endl; 35 | // output: 36 | // Values[Solve[{1 x0 + 0 x1 + 0 x2 + 0 x3 + 0 x4 + 0 x5 == 1 && 1 x0 + 1 x1 37 | // + 1 x2 + 0 x3 + 0 x4 + 0 x5 == 8 && 1 x0 + 2 x1 + 4 x2 + 0 x3 + 0 x4 + 0 38 | // x5 == 25 && 1 x0 + 3 x1 + 9 x2 + 0 x3 + 0 x4 + 0 x5 == 52 && 1 x0 + 4 x1 39 | // + 16 x2 + 0 x3 + 0 x4 + 0 x5 == 89 && 1 x0 + 0 x1 + 0 x2 + 1 x3 + 0 x4 + 40 | // 1 x5 == 6 && 1 x0 + 1 x1 + 1 x2 + 1 x3 + 1 x4 + 1 x5 == 16 && 1 x0 + 2 x1 41 | // + 4 x2 + 1 x3 + 2 x4 + 1 x5 == 36 && 1 x0 + 3 x1 + 9 x2 + 1 x3 + 3 x4 + 1 42 | // x5 43 | // == 66 && 1 x0 + 4 x1 + 16 x2 + 1 x3 + 4 x4 + 1 x5 == 106 && 1 x0 + 0 x1 + 44 | // 0 x2 + 2 x3 + 0 x4 + 4 x5 == 19&& 1 x0 + 1 x1 + 1 x2 + 2 x3 + 2 x4 + 4 x5 45 | // == 32 && 1 x0 + 2 x1 + 4 x2 + 2 x3 + 4 x4 + 4 x5 == 55 && 1 x0 + 3 x1 + 9 46 | // x2 + 2 x3 + 6 x4 + 4 x5 == 88 && 1 x0 + 4 x1 + 16 x2 + 2 x3 + 8 x4 + 4 x5 47 | // == 131 && 1 x0 + 0 x1 + 0 x2 + 3 x3 + 0 x4 + 9 x5 == 40 && 1 x0 + 1 x1 + 48 | // 1 x2 + 3 x3 + 3 x4 + 9 x5 == 56 && 1 x0 + 2 x1 + 4 x2+ 3 x3 + 6 x4 + 9 x5 49 | // == 82 && 1 x0 + 3 x1 + 9 x2 + 3 x3 + 9 x4 + 9 x5 == 118 && 1 x0 + 4 x1 + 50 | // 16 x2 + 3 x3 + 12 x4 + 9 x5 == 164 && 1 x0 + 0 x1 + 0 x2 + 4 x3 + 0 x4 + 51 | // 16 x5 == 69 && 1 x0 + 1 x1 + 1 x2 + 4 x3 + 4 x4 + 16 x5== 88 && 1 x0 + 2 52 | // x1 + 4 x2 + 4 x3 + 8 x4 + 16 x5 == 117 && 1 x0 + 3 x1 + 9 x2 + 4 x3 + 12 53 | // x4 + 16 x5 == 156 && 1 x0 + 4 x1 + 16 x2 + 4 x3 + 16 x4 + 16 x5 == 205}, 54 | // {x0, x1, x2, x3, x4, x5}]] mma output: 55 | // {{1, 3, 6, 2, 4, 5}} 56 | ip.Show(std::cout, {1, 3, 6, 2, 4, 5}); 57 | // output: 58 | // 1 + 3 y + 6 y^2 + 2 x + 4 x y + 5 x^2 59 | } 60 | return 0; 61 | } -------------------------------------------------------------------------------- /example/mma_to_cpp.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | int main() { 5 | PE_INIT(maxp = 2000000); 6 | // Convert mathematica expression to cpp 7 | // For mathematica function, it only supports Sqrt 8 | for (auto a : mma::Compile( 9 | "Sqrt(-A^4 + 2 A^2 B^2 - B^4 + 2 A^2 x1^2 + 2 B^2 x1^2 - x1^4 - 4 " 10 | "A^2 x1 x2 - 4 B^2 x1 x2 + 4 x1^3 x2 + 2 A^2 x2^2 + 2 B^2 x2^2 - " 11 | "6 x1^2 x2^2 + 4 x1 x2^3 - x2^4)/(2 Sqrt(x1^2 - 2 x1 x2 + " 12 | "x2^2))")) { 13 | std::cout << a << std::endl; 14 | } 15 | // output: 16 | // sqrt(-A * A * A * A + 2 * A * A * B * B - B * B * B * B + 2 * A * A * x1 * 17 | // x1 + 2 * B * B * x1 * x1 - x1 * x1 * x1 * x1 - 4 * A * A * x1 * x2 - 4 * B 18 | // * B * x1 * x2 + 4 * x1 * x1 * x1 * x2 + 2 * A * A * x2 * x2 + 2 * B * B * 19 | // x2 * x2 - 6 * x1 * x1 * x2 * x2 + 4 * x1 * x2 * x2 * x2 - x2 * x2 * x2 * 20 | // x2) / (2 21 | // * sqrt(x1 * x1 - 2 * x1 * x2 + x2 * x2)) 22 | // Convert mathematica expression to cpp using modular arithmetic. 23 | for (auto a : mma::CompileMod("(a^4+a b)*7/b")) { 24 | std::cout << a << std::endl; 25 | } 26 | // output: 27 | // int64 foo(int64 a, int64 b, int64 mod) { 28 | // const int64 t0 = a % mod; 29 | // const int64 t1 = 4; 30 | // const int64 t2 = PowerMod(t0, t1, mod) % mod; 31 | // const int64 t3 = b % mod; 32 | // const int64 t4 = (t0 * t3) % mod; 33 | // const int64 t5 = (t2 + t4) % mod; 34 | // const int64 t6 = 7 % mod; 35 | // const int64 t7 = (t5 * t6) % mod; 36 | // const int64 t8 = t7 * ModInv(t3, mod) % mod; 37 | // return t8; 38 | // } 39 | return 0; 40 | } -------------------------------------------------------------------------------- /example/mod_number.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int64 mod = 1000000007; 5 | 6 | int main() { 7 | { 8 | NMod64 a(5); 9 | std::cout << a.Power(10) << std::endl; 10 | std::cout << Power(a, 10) << std::endl; 11 | std::cout << PowerMod(a, 10) << std::endl; 12 | std::cout << PowerMod(a, 10, mod) << std::endl; 13 | std::cout << PowerMod(a, 10) << std::endl; 14 | std::cout << PowerMod(a, 10, mod) << std::endl; 15 | } 16 | { 17 | TLMod::Set(mod); 18 | TLNMod64<> a(5); 19 | std::cout << a.Power(10) << std::endl; 20 | std::cout << Power(a, 10) << std::endl; 21 | std::cout << PowerMod(a, 10) << std::endl; 22 | std::cout << PowerMod(a, 10, mod) << std::endl; 23 | std::cout << PowerMod(a, 10) << std::endl; 24 | std::cout << PowerMod(a, 10, mod) << std::endl; 25 | } 26 | { 27 | DefaultMod::Set(mod); 28 | NModNumber a(5); 29 | std::cout << a.Power(10) << std::endl; 30 | std::cout << Power(a, 10) << std::endl; 31 | std::cout << PowerMod(a, 10) << std::endl; 32 | std::cout << PowerMod(a, 10, mod) << std::endl; 33 | std::cout << PowerMod(a, 10) << std::endl; 34 | std::cout << PowerMod(a, 10, mod) << std::endl; 35 | } 36 | { 37 | // NModNumber> a(5); 38 | } 39 | 40 | { 41 | NModM64 a(5); 42 | std::cout << a.Power(10) << std::endl; 43 | std::cout << Power(a, 10) << std::endl; 44 | std::cout << PowerMod(a, 10) << std::endl; 45 | std::cout << PowerMod(a, 10, mod) << std::endl; 46 | std::cout << PowerMod(a, 10) << std::endl; 47 | std::cout << PowerMod(a, 10, mod) << std::endl; 48 | } 49 | { 50 | TLMod::Set(mod); 51 | TLNModM64<> a(5); 52 | std::cout << a.Power(10) << std::endl; 53 | std::cout << Power(a, 10) << std::endl; 54 | std::cout << PowerMod(a, 10) << std::endl; 55 | std::cout << PowerMod(a, 10, mod) << std::endl; 56 | std::cout << PowerMod(a, 10) << std::endl; 57 | std::cout << PowerMod(a, 10, mod) << std::endl; 58 | } 59 | { 60 | DefaultMod::Set(mod); 61 | NModNumberM a(5); 62 | std::cout << a.Power(10) << std::endl; 63 | std::cout << Power(a, 10) << std::endl; 64 | std::cout << PowerMod(a, 10) << std::endl; 65 | std::cout << PowerMod(a, 10, mod) << std::endl; 66 | std::cout << PowerMod(a, 10) << std::endl; 67 | std::cout << PowerMod(a, 10, mod) << std::endl; 68 | } 69 | { 70 | // Not recommended. 71 | NModNumberM> a(5, MemMod(mod)); 72 | std::cout << a.Power(10) << std::endl; 73 | std::cout << Power(a, 10) << std::endl; 74 | std::cout << PowerMod(a, 10) << std::endl; 75 | std::cout << PowerMod(a, 10, mod) << std::endl; 76 | std::cout << PowerMod(a, 10) << std::endl; 77 | std::cout << PowerMod(a, 10, mod) << std::endl; 78 | } 79 | return 0; 80 | } -------------------------------------------------------------------------------- /example/multiplicative_function_prefix_sum_common_function.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int64 mod = 1000000007; 5 | 6 | int main() { 7 | PE_INIT(maxp = 2000000, cal_phi = 1, cal_mu = 1); 8 | 9 | SFCounter sf_counter; 10 | MuSummer mu_summer; 11 | MuPhiSummer mu_phi_summer; 12 | Sigma0Summer sigma0_summer; 13 | MuPhiSumModer mu_phi_sum_moder(mod); 14 | Sigma0SumModer sigma0_sum_moder(mod); 15 | 16 | for (int64 n = 1; n <= 10000000000; n *= 10) { 17 | std::cout << n << " = " << n << std::endl; 18 | std::cout << "# square free number " << sf_counter.Cal(n) << std::endl; 19 | std::cout << "sum mu " << mu_summer.Cal(n) << std::endl; 20 | std::cout << "sum mu " << mu_phi_summer.CalSumMu(n) << std::endl; 21 | std::cout << "sum phi " << mu_phi_summer.CalSumPhi(n) << std::endl; 22 | std::cout << "sum sigma0 " << sigma0_summer.Cal(n) << std::endl; 23 | 24 | std::cout << "sum mu mod " << mod << " " << mu_phi_sum_moder.CalSumMu(n) 25 | << std::endl; 26 | std::cout << "sum phi mod " << mod << " " << mu_phi_sum_moder.CalSumPhi(n) 27 | << std::endl; 28 | std::cout << "sum sigma0 mod " << mod << " " << sigma0_sum_moder.Cal(n) 29 | << std::endl; 30 | std::cout << std::endl; 31 | } 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /example/parallel_cal_prime_pi.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | #if 0 5 | const int TN = 8; 6 | int64 CalPi0(int64 n) { 7 | int64 result = PARALLEL_RESULT( 8 | BEGIN_PARALLEL 9 | FROM 1 TO n EACH_BLOCK_IS 10000000 CACHE "" 10 | THREADS TN 11 | MAP { 12 | return IsPrimeEx(key); 13 | } 14 | REDUCE { 15 | result += value; 16 | return result; 17 | } 18 | END_PARALLEL); 19 | return result; 20 | } 21 | 22 | struct CalPI : public ParallelRangeT { 23 | int64 UpdateResult(int64 result, int64 value) { return result + value; } 24 | int64 WorkOnBlock(int64 first, int64 last, int64 worker) { 25 | int64 t = 0; 26 | for (int64 i = first; i <= last; ++i) t += IsPrimeEx(i); 27 | return t; 28 | } 29 | }; 30 | 31 | int64 CalPi1(int64 n) { 32 | return CalPI() 33 | .From(1) 34 | .To(n) 35 | .DividedBy(10000000) 36 | .SetThreadsCount(TN) 37 | .Start() 38 | .Result(); 39 | } 40 | 41 | int main() { 42 | PE_INIT(maxp = 2000000); 43 | 44 | const int m = 8; 45 | const int n = Power(10, m); 46 | 47 | int64 ans0 = CalPi0(n); 48 | int64 ans1 = CalPi1(n); 49 | std::cout << "n = " << n << std::endl; 50 | std::cout << "Expected: " << kPrimePi[m] << std::endl; 51 | std::cout << "CalPi0: " << ans0 << std::endl; 52 | std::cout << "CalPi1: " << ans1 << std::endl; 53 | 54 | return 0; 55 | } 56 | #else 57 | int main() { 58 | return 0; 59 | } 60 | #endif -------------------------------------------------------------------------------- /example/partition_mobius.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int S = 500; 5 | // The number of non-negative solution for 6 | // 1 x1 + 2 x2 + 3 x3 + 4 x4 + 5 x5 = S 7 | // x1, x2, ..., x5 are distinct. 8 | 9 | const int64 mod = 1000000007; 10 | using MT = NMod64; 11 | 12 | char used[S + 1]; 13 | int64 dfs(int now, int s) { 14 | if (now == 6) return s == 0; 15 | int64 ret = 0; 16 | for (int i = 0; i <= S && now * i <= s; ++i) 17 | if (used[i] == 0) { 18 | used[i] = 1; 19 | ret += dfs(now + 1, s - now * i); 20 | used[i] = 0; 21 | } 22 | return ret; 23 | } 24 | 25 | int64 solve0() { return dfs(1, S); } 26 | 27 | std::map, int64> cache; 28 | int64 compute(std::vector coe) { 29 | // sum(coe[i] * x_i) = S 30 | auto where = cache.find(coe); 31 | if (where != cache.end()) { 32 | return where->second; 33 | } 34 | int64 dp[S + 1] = {1}; 35 | for (int& iter : coe) 36 | for (int i = 0; i + iter <= S; ++i) { 37 | dp[i + iter] += dp[i]; 38 | } 39 | return dp[S]; 40 | } 41 | 42 | int64 solve1() { 43 | PartitionMobius pm(mod); 44 | std::vector pattern = {1, 2, 3, 4, 5}; 45 | MT ret = 0; 46 | for (Partition p : Partition::GenPartitions(5)) { 47 | std::map mem; 48 | for (int i = 0; i < 5; ++i) mem[p.colors[i]] += pattern[i]; 49 | std::vector key; 50 | for (auto& i : mem) key.push_back(i.second); 51 | std::sort(std::begin(key), std::end(key)); 52 | ret += compute(key) * pm.Cal(p) % mod; 53 | } 54 | return ret.value(); 55 | } 56 | 57 | int main() { 58 | std::cout << solve0() << std::endl; 59 | std::cout << solve1() << std::endl; 60 | return 0; 61 | } -------------------------------------------------------------------------------- /example/pe_db.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | void MakeDb() { 5 | PeDb db("D:/"); 6 | const int64 n = 100000000000000; 7 | db.MergePrimePi(PrimeS0Parallel(n)); 8 | #if PE_HAS_INT128 9 | db.MergePrimeSum(PrimeS1Parallel(n)); 10 | #endif 11 | db.Save(); 12 | } 13 | 14 | int main() { 15 | PE_INIT(maxp = 70000000); 16 | // MakeDb(); 17 | const int64 n = 10000000000; 18 | PeDb db("D:/"); 19 | db.Load(); 20 | 21 | { 22 | DVA dva = PrimeS0Ex(n); 23 | DVA dva1(n); 24 | db.FillPrimePi(dva1); 25 | for (int i = 0; i < dva.key_size; ++i) { 26 | if (dva.values[i] != dva1.values[i]) { 27 | std::cout << dva.values[i] << std::endl; 28 | std::cout << dva1.values[i] << std::endl; 29 | } 30 | assert(dva.values[i] == dva1.values[i]); 31 | } 32 | std::cout << dva[n] << std::endl; 33 | std::cout << dva1[n] << std::endl; 34 | // https://oeis.org/A006880 35 | std::cout << db.PrimePi(Power(10LL, 14)) << std::endl; 36 | } 37 | #if PE_HAS_INT128 38 | { 39 | DVA dva = PrimeS1Ex(n); 40 | DVA dva1(n); 41 | db.FillPrimeSum(dva1); 42 | for (int i = 0; i < dva.key_size; ++i) { 43 | if (dva.values[i] != dva1.values[i]) { 44 | std::cout << dva.values[i] << std::endl; 45 | std::cout << dva1.values[i] << std::endl; 46 | } 47 | assert(dva.values[i] == dva1.values[i]); 48 | } 49 | std::cout << dva[n] << std::endl; 50 | std::cout << dva1[n] << std::endl; 51 | // https://oeis.org/A046731 52 | std::cout << db.PrimeSum(Power(10LL, 14)) << std::endl; 53 | } 54 | #endif 55 | return 0; 56 | } -------------------------------------------------------------------------------- /example/power_sum.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int64 mod = 1000000007; 5 | using MT = NMod64; 6 | 7 | using SumfunctionType = int64 (*)(int64 n, int64 mod); 8 | SumfunctionType sum_function[8] = { 9 | nullptr, &P1SumMod, &P2SumMod, &P3SumMod, 10 | &P4SumMod, &P5SumMod, &P6SumMod, &P7SumMod, 11 | }; 12 | 13 | int main() { 14 | PE_INIT(maxp = 2000000); 15 | PowerSumModer moder0(mod); // maxk = 128 16 | PowerSumModerB moder1(mod); // maxk = 128 17 | PowerSumModerB1 moder2(mod); // maxk = 128 18 | for (int k = 0; k <= 10; ++k) { 19 | std::cout << "k = " << k << std::endl; 20 | for (int64 n = 1; n <= 1000000000; n *= 10) { 21 | std::cout << "n = " << n << std::endl; 22 | std::vector ans = PowerSumModBatch(n, k, mod); 23 | std::cout << "InitPowerSumMod " << ans[k] << std::endl; 24 | std::cout << "PowerSumModer " << moder0.Cal(n, k) << std::endl; 25 | std::cout << "PowerSumModerB " << moder1.Cal(n, k) << std::endl; 26 | std::cout << "PowerSumModerB1 " << moder2.Cal(n, k) << std::endl; 27 | if (k >= 1 && k <= 7) { 28 | std::cout << "P" << k << "SumMod " << sum_function[k](n, mod) 29 | << std::endl; 30 | } 31 | std::cout << std::endl; 32 | } 33 | std::cout << std::endl; 34 | } 35 | return 0; 36 | } -------------------------------------------------------------------------------- /example/prime_power_sum.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int64 mod = 1000000007; 5 | using MT = NMod64; 6 | 7 | void prime_s0() { 8 | CachedPi cp; 9 | for (int64 n = 1; n <= 10000000000; n *= 10) { 10 | std::cout << "n = " << n << std::endl; 11 | std::cout << "PrimeS0 " << PrimeS0(n)[n] % mod << std::endl; 12 | std::cout << "PrimeS0Parallel " << PrimeS0Parallel(n)[n] % mod 13 | << std::endl; 14 | std::cout << "PrimeS0Ex " << PrimeS0Ex(n)[n] % mod 15 | << std::endl; 16 | std::cout << "PrimeS0 " << PrimeS0(n)[n] << std::endl; 17 | std::cout << "PrimeS0Parallel " << PrimeS0Parallel(n)[n] << std::endl; 18 | std::cout << "PrimeS0Ex " << PrimeS0Ex(n)[n] << std::endl; 19 | std::cout << "PrimeSkEx " << PrimeSkEx(n, 0)[n] << std::endl; 20 | std::cout << "CachedPi " << cp.Cal(n) % mod << std::endl; 21 | std::cout << std::endl; 22 | } 23 | } 24 | 25 | void prime_s1() { 26 | for (int64 n = 1; n <= 10000000000; n *= 10) { 27 | std::cout << "n = " << n << std::endl; 28 | std::cout << "PrimeS1 " << PrimeS1(n)[n] % mod << std::endl; 29 | std::cout << "PrimeS1Parallel " << PrimeS1Parallel(n)[n] % mod 30 | << std::endl; 31 | std::cout << "PrimeS1Ex " << PrimeS1Ex(n)[n] % mod 32 | << std::endl; 33 | std::cout << "PrimeS1 " << PrimeS1(n)[n] << std::endl; 34 | std::cout << "PrimeS1Parallel " << PrimeS1Parallel(n)[n] << std::endl; 35 | std::cout << "PrimeS1Ex " << PrimeS1Ex(n)[n] << std::endl; 36 | std::cout << "PrimeSkEx " << PrimeSkEx(n, 1)[n] << std::endl; 37 | std::cout << std::endl; 38 | } 39 | } 40 | 41 | void prime_pmod_s0() { 42 | for (int64 n = 1; n <= 1000000000; n *= 10) { 43 | std::cout << "n = " << n << std::endl; 44 | for (int pmod = 2; pmod <= 7; ++pmod) { 45 | auto ans0 = PrimeS0PMod(n, pmod); 46 | auto ans1 = PrimeSkPMod(n, 0, pmod); 47 | std::cout << "pmod = " << pmod << std::endl; 48 | for (int j = 0; j < pmod; ++j) { 49 | // number of prime such that p % pmod = j 50 | std::cout << "p % " << pmod << " = " << j << " " << ans0[j][n] << " " 51 | << ans1[j][n] << std::endl; 52 | } 53 | std::cout << std::endl; 54 | } 55 | std::cout << std::endl; 56 | } 57 | } 58 | 59 | void prime_pmod_s1() { 60 | for (int64 n = 1; n <= 1000000000; n *= 10) { 61 | std::cout << "n = " << n << std::endl; 62 | for (int pmod = 2; pmod <= 7; ++pmod) { 63 | auto ans0 = PrimeS1PMod(n, pmod); 64 | auto ans1 = PrimeSkPMod(n, 1, pmod); 65 | std::cout << "pmod = " << pmod << std::endl; 66 | for (int j = 0; j < pmod; ++j) { 67 | // sum of prime such that p % pmod = j 68 | std::cout << "p % " << pmod << " = " << j << " " << ans0[j][n] << " " 69 | << ans1[j][n] << std::endl; 70 | } 71 | std::cout << std::endl; 72 | } 73 | std::cout << std::endl; 74 | } 75 | } 76 | 77 | int main() { 78 | PE_INIT(maxp = 2000000); 79 | 80 | prime_s0(); 81 | prime_s1(); 82 | prime_pmod_s0(); 83 | prime_pmod_s1(); 84 | return 0; 85 | } -------------------------------------------------------------------------------- /example/random_sample.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | const int sample_count = 1000000; 5 | 6 | const double coe15 = 1. / (1ULL << 15); 7 | double CalPi_CRand15() { 8 | int ok = 0; 9 | for (int i = 0; i < sample_count; ++i) { 10 | double p[]{coe15 * CRand15(), coe15 * CRand15()}; 11 | if (p[0] * p[0] + p[1] * p[1] < 1) { 12 | ++ok; 13 | } 14 | } 15 | return 4. * ok / sample_count; 16 | } 17 | 18 | const double coe31 = 1. / (1ULL << 31); 19 | double CalPi_CRand31() { 20 | int ok = 0; 21 | for (int i = 0; i < sample_count; ++i) { 22 | double p[]{coe31 * CRand31(), coe31 * CRand31()}; 23 | if (p[0] * p[0] + p[1] * p[1] < 1) { 24 | ++ok; 25 | } 26 | } 27 | return 4. * ok / sample_count; 28 | } 29 | 30 | const double coe63 = 1. / (1ULL << 63); 31 | double CalPi_CRand63() { 32 | int ok = 0; 33 | for (int i = 0; i < sample_count; ++i) { 34 | double p[]{coe63 * CRand63(), coe63 * CRand63()}; 35 | if (p[0] * p[0] + p[1] * p[1] < 1) { 36 | ++ok; 37 | } 38 | } 39 | return 4. * ok / sample_count; 40 | } 41 | 42 | const int maxn = 2000000000; 43 | auto rand_generator = MakeUniformGenerator(0, maxn - 1); 44 | const double randcoe = 1. / maxn; 45 | double CalPi_Rand() { 46 | int ok = 0; 47 | for (int i = 0; i < sample_count; ++i) { 48 | double p[]{randcoe * rand_generator(), randcoe * rand_generator()}; 49 | if (p[0] * p[0] + p[1] * p[1] < 1) { 50 | ++ok; 51 | } 52 | } 53 | return 4. * ok / sample_count; 54 | } 55 | 56 | double CalPi_Halton() { 57 | int ok = 0; 58 | for (int i = 0; i < sample_count; ++i) { 59 | std::vector p = Halton(i, 2); 60 | if (p[0] * p[0] + p[1] * p[1] < 1) { 61 | ++ok; 62 | } 63 | } 64 | return 4. * ok / sample_count; 65 | } 66 | 67 | #if HAS_MPF 68 | double CalPi_Mpf() { 69 | gmp_randstate_t state; 70 | gmp_randinit_mt(state); 71 | Mpf::SetDefaultPrec(200); 72 | int ok = 0; 73 | for (int i = 0; i < sample_count; ++i) { 74 | Mpf a; 75 | Mpf b; 76 | mpf_urandomb(a.mpf(), state, 128); 77 | mpf_urandomb(b.mpf(), state, 128); 78 | if (a * a + b * b < 1) { 79 | ++ok; 80 | } 81 | } 82 | return 4. * ok / sample_count; 83 | } 84 | #endif 85 | 86 | int main() { 87 | PE_INIT(maxp = 1000000); 88 | printf("CRand15\t%.16f\n", CalPi_CRand15()); 89 | printf("CRand31\t%.16f\n", CalPi_CRand31()); 90 | printf("CRand63\t%.16f\n", CalPi_CRand63()); 91 | printf("Rand\t%.16f\n", CalPi_Rand()); 92 | printf("Halton\t%.16f\n", CalPi_Halton()); 93 | #if HAS_MPF 94 | printf("Mpf\t%.16f\n", CalPi_Mpf()); 95 | #endif 96 | return 0; 97 | } -------------------------------------------------------------------------------- /example/sym_poly.c: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace pe; 3 | 4 | // Guess the solution to a^2=b^2+c^2+bc where 5 | // a = t1, b = t2, c = t3, c7 = 1, c8 = 0, c9 = -1 6 | SymPoly t1("c1 m^2 + c2 m n + c3 n^2"); 7 | SymPoly t2("c4 m^2 + c5 m n + c6 n^2"); 8 | SymPoly t3("c7 m^2 + c8 m n + c9 n^2"); 9 | 10 | SymPoly target = SymPoly("a^2-b^2-c^2-b c") 11 | .Replace("a", t1) 12 | .Replace("b", t2) 13 | .Replace("c", t3); 14 | 15 | int his[10]; 16 | void dfs(int now, const SymPoly& p) { 17 | if (now == 10) { 18 | if (std::empty(p.terms()) && his[7] == 1 && his[8] == 0 && his[9] == -1) { 19 | auto aa = 20 | t1.Replace("c1", his[1]).Replace("c2", his[2]).Replace("c3", his[3]); 21 | auto bb = 22 | t2.Replace("c4", his[4]).Replace("c5", his[5]).Replace("c6", his[6]); 23 | auto cc = 24 | t3.Replace("c7", his[7]).Replace("c8", his[8]).Replace("c9", his[9]); 25 | if (std::empty(aa.terms()) || std::empty(bb.terms()) || std::empty(cc.terms())) 26 | return; 27 | std::cout << "a = " << aa << std::endl; 28 | std::cout << "b = " << bb << std::endl; 29 | std::cout << "c = " << cc << std::endl; 30 | std::cout << std::endl; 31 | } 32 | } else { 33 | std::string me = "c" + ToString(now); 34 | for (int i = -1; i <= 2; ++i) { 35 | his[now] = i; 36 | dfs(now + 1, p.Replace(me, i)); 37 | } 38 | } 39 | } 40 | 41 | int main() { 42 | std::cout << target.Replace("n", 1).Replace("m", 1) << std::endl; 43 | dfs(1, target); 44 | return 0; 45 | } -------------------------------------------------------------------------------- /format.py: -------------------------------------------------------------------------------- 1 | #! python2 2 | import os 3 | import subprocess 4 | 5 | CURRENT_DIRECTORY = os.getcwd() 6 | # BINARY_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) 7 | 8 | compile_commands_template = """[{ 9 | "directory": 10 | "$(CURRENT_DIRECTORY)", 11 | "file": 12 | "pe", 13 | "arguments": [ 14 | "clang++.exe", "-xc++", "pe", "--driver-mode=g++", "-c", "--std=c++17", 15 | "-O3", "-march=native", "-mtune=native", 16 | "--target=x86_64-w64-windows-gnu", "-fopenmp" 17 | ] 18 | }]""" 19 | 20 | tidy_options = [ 21 | '-checks=-*', 'google-readability-casting', 22 | 'google-readability-braces-around-statements', 23 | 'google-readability-namespace-comments', 'performance-*', 'modernize-use-*', 24 | '-modernize-use-trailing-return-type', '-modernize-use-nodiscard', 25 | 'misc-unused-parameters' 26 | ] 27 | 28 | tidy_cmd = [ 29 | 'run-clang-tidy.py', ','.join(tidy_options), '-header-filter=pe.*', 30 | '-export-fixes=format-fixes.yaml', '-fix', 'pe' 31 | ] 32 | 33 | 34 | def tidy_code(): 35 | with open('compile_commands.json', 'wb') as tempf: 36 | tempf.write( 37 | compile_commands_template.replace( 38 | '$(CURRENT_DIRECTORY)', CURRENT_DIRECTORY.replace('\\', '\\\\'))) 39 | os.system(' '.join(tidy_cmd)) 40 | os.remove('compile_commands.json') 41 | os.remove('format-fixes.yaml') 42 | 43 | 44 | def should_format(filename): 45 | if filename in ['parallel_cal_prime_pi.c']: 46 | return False 47 | 48 | _, file_ext_name = os.path.splitext(filename) 49 | 50 | return file_ext_name in ['', '.h', '.hpp', '.c', '.cxx', '.cpp'] 51 | 52 | 53 | def format_code(): 54 | for rt, _, files in os.walk(CURRENT_DIRECTORY): 55 | if rt.find('.git') != -1: 56 | continue 57 | for f in files: 58 | if should_format(f): 59 | fpath = os.path.join(rt, f) 60 | print(fpath) 61 | subprocess.call('clang-format -style=Google -sort-includes=0 -i %s' % 62 | fpath) 63 | 64 | 65 | if __name__ == '__main__': 66 | tidy_code() 67 | format_code() 68 | -------------------------------------------------------------------------------- /gen_config.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # -*- coding: UTF-8 -*- 3 | import os 4 | 5 | CURRENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) 6 | TARGET_FILENAME = 'pe_config' 7 | TARGET_PATH = os.path.join(CURRENT_DIRECTORY, TARGET_FILENAME) 8 | 9 | SPLITTER = ';' if os.name == 'nt' else ':' 10 | CHECKING_PATHS = os.environ.get('CPLUS_INCLUDE_PATH', '').split(SPLITTER) 11 | 12 | RULES = [ 13 | ('ENABLE_EIGEN', ['Eigen/Dense']), 14 | ('ENABLE_GMP', ['gmp.h']), 15 | ('ENABLE_FLINT', ['flint.h']), 16 | ('ENABLE_MPFR', ['mpfr.h']), 17 | #('ENABLE_MPIR', ['mpir.h']), 18 | ('ENABLE_LIBBF', ['libbf.h']), 19 | ('ENABLE_NTL', ['NTL/ZZ.h']), 20 | ('ENABLE_ZMQ', ['zmq/zmq.h']), 21 | ('ENABLE_PRIME_COUNT', ['primecount.hpp']), 22 | ('ENABLE_PRIME_SIEVE', ['primesieve.hpp']), 23 | ('ENABLE_TCMALLOC', []), # Always disable 24 | ] 25 | 26 | content = [] 27 | 28 | 29 | def add_define(key, value): 30 | content.append('#ifndef %s' % key) 31 | content.append('#define %s %s' % (key, value)) 32 | content.append('#endif') 33 | content.append('') 34 | 35 | 36 | def check_target(path): 37 | for folder in CHECKING_PATHS: 38 | if not os.path.exists(folder): 39 | continue 40 | for i in path: 41 | if os.path.exists(os.path.join(folder, i)): 42 | return True 43 | return False 44 | 45 | 46 | def main(): 47 | content.append('#ifndef PE_CONFIG_') 48 | content.append('#define PE_CONFIG_') 49 | content.append('') 50 | content.append('// This file provides a centralized place to configure pe') 51 | content.append('') 52 | content.append( 53 | '// Auto generated by gen_config.py, and you can edit it manually') 54 | content.append('') 55 | content.append('// Configuration priority (first match):') 56 | content.append('// 1. Compiling command') 57 | content.append('// 2. The configurations in this file') 58 | content.append('// 3. The configurations in file pe') 59 | content.append('') 60 | add_define('ENABLE_ASSERT', '1') 61 | add_define('TRY_TO_USE_INT128', '1') 62 | for (key, value) in RULES: 63 | ok = check_target(value) 64 | add_define(key, '1' if ok else '0') 65 | content.append('#endif') 66 | with open(TARGET_PATH, 'wb') as tempf: 67 | tempf.write('\r\n'.join(content).encode('utf8')) 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /legacy/pe_poly.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PE_POLY_ 2 | #define PE_POLY_ 3 | 4 | #include "pe_base" 5 | #include "pe_type_traits" 6 | #include "pe_mod" 7 | #include "pe_nt" 8 | #include "pe_poly_base" 9 | 10 | namespace pe { 11 | struct NModPoly { 12 | int64 mod; 13 | std::vector data; 14 | 15 | NModPoly(int64 mod = 1) : mod(mod) {} 16 | 17 | NModPoly(const std::vector& data, int64 mod, 18 | int adjust_leading_zero = 1) 19 | : data(data), mod(mod) { 20 | AdjustMod(); 21 | if (adjust_leading_zero) { 22 | AdjustLeadingZeros(); 23 | } 24 | } 25 | 26 | NModPoly(std::vector&& data, int64 mod, int adjust_leading_zero = 1) 27 | : data(std::move(data)), mod(mod) { 28 | AdjustMod(); 29 | if (adjust_leading_zero) { 30 | AdjustLeadingZeros(); 31 | } 32 | } 33 | 34 | NModPoly& operator=(const std::vector& v) { 35 | data = v; 36 | return *this; 37 | } 38 | 39 | NModPoly& operator=(const std::vector&& v) { 40 | data = v; 41 | return *this; 42 | } 43 | 44 | NModPoly(const NModPoly& p) = default; 45 | NModPoly(NModPoly&& p) = default; 46 | NModPoly& operator=(const NModPoly& other) = default; 47 | NModPoly& operator=(NModPoly&& other) = default; 48 | 49 | NModPoly& AdjustLeadingZeros() { 50 | AdjustPolyLeadingZero(data); 51 | return *this; 52 | } 53 | 54 | NModPoly& AdjustMod() { 55 | for (auto& iter : data) { 56 | iter = Mod(iter, mod); 57 | } 58 | return *this; 59 | } 60 | 61 | int64 deg() const { return static_cast(std::size(data) - 1); } 62 | int64 size() const { return static_cast(std::size(data)); } 63 | 64 | NModPoly& Resize(int64 n) { 65 | const int64 m = static_cast(std::size(data)); 66 | data.resize(n); 67 | for (int64 i = m; i < n; ++i) { 68 | data[i] = 0; 69 | } 70 | return *this; 71 | } 72 | 73 | NModPoly& Redeg(int64 n) { return Resize(n + 1); } 74 | 75 | int64& operator[](int64 idx) { 76 | PE_ASSERT(idx >= 0 && idx < static_cast(std::size(data))); 77 | return data[idx]; 78 | } 79 | 80 | int64 operator[](int64 idx) const { 81 | PE_ASSERT(idx >= 0 && idx < static_cast(std::size(data))); 82 | return data[idx]; 83 | } 84 | 85 | int64 At(int64 idx) const { 86 | return idx >= 0 && idx < static_cast(std::size(data)) ? data[idx] 87 | : 0; 88 | } 89 | 90 | bool IsZero() const { return std::size(data) == 1 && data[0] == 0; } 91 | 92 | int64 ValueAt(int64 v) const { 93 | int64 ret = 0; 94 | const int64 t = Mod(v, mod); 95 | for (int64 i = deg(); i >= 0; --i) { 96 | ret = AddMod(MulMod(ret, t, mod), data[i], mod); 97 | } 98 | return ret; 99 | } 100 | 101 | NModPoly LowerTerms(int64 n, int adjust_leading_zero = 1) const { 102 | const int64 m = std::min(n, static_cast(std::size(data))); 103 | return NModPoly(std::vector(data.begin(), data.begin() + m), mod, 104 | adjust_leading_zero); 105 | } 106 | 107 | NModPoly Inv(int64 n) const; 108 | }; 109 | 110 | template 111 | struct NModPolyT : public NModPoly { 112 | NModPolyT() : NModPoly(M) {} 113 | 114 | NModPolyT(const std::vector& data) : NModPoly(data, M) {} 115 | 116 | NModPolyT(std::vector&& data) : NModPoly(std::move(data), M) {} 117 | 118 | NModPolyT(std::initializer_list l) 119 | : NModPolyT(std::vector(l)) {} 120 | }; 121 | 122 | SL NModPoly PolyMul(const NModPoly& X, const NModPoly& Y) { 123 | return NModPoly{PolyMul(X.data, Y.data, X.mod), X.mod}; 124 | } 125 | 126 | SL NModPoly PolyInv(const NModPoly& x, int64 n) { 127 | return NModPoly(PolyInv(x.data, n, x.mod), x.mod); 128 | } 129 | 130 | #define PPOLY_DIV_AND_MOD_IMPL(PolyDivAndMod, PolyDivAndModImpl) \ 131 | SL std::tuple PolyDivAndMod(const NModPoly& X, \ 132 | const NModPoly& Y) { \ 133 | auto [q, r] = PolyDivAndModImpl(X.data, Y.data, X.mod); \ 134 | return std::make_tuple(NModPoly(std::move(q), X.mod), \ 135 | NModPoly(std::move(r), X.mod)); \ 136 | } 137 | 138 | #define PPOLY_DIV_IMPL(PolyDiv, PolyDivImpl) \ 139 | SL NModPoly PolyDiv(const NModPoly& X, const NModPoly& Y) { \ 140 | return NModPoly(PolyDivImpl(X.data, Y.data, X.mod), X.mod); \ 141 | } 142 | 143 | #define PPOLY_MOD_IMPL(PolyMod, PolyModImpl) \ 144 | SL NModPoly PolyMod(const NModPoly& X, const NModPoly& Y) { \ 145 | return NModPoly(PolyModImpl(X.data, Y.data, X.mod), X.mod); \ 146 | } 147 | 148 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndModDc, pe::PolyDivAndModDc) 149 | PPOLY_DIV_IMPL(PolyDivDc, pe::PolyDivDc) 150 | PPOLY_MOD_IMPL(PolyModDc, pe::PolyModDc) 151 | 152 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndModNormal, pe::PolyDivAndModNormal) 153 | PPOLY_DIV_IMPL(PolyDivNormal, pe::PolyDivNormal) 154 | PPOLY_MOD_IMPL(PolyModNormal, pe::PolyModNormal) 155 | 156 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndMod, pe::PolyDivAndMod) 157 | PPOLY_DIV_IMPL(PolyDiv, pe::PolyDiv) 158 | PPOLY_MOD_IMPL(PolyMod, pe::PolyMod) 159 | 160 | inline NModPoly NModPoly::Inv(int64 n) const { 161 | auto t = PolyInv(data, n, mod); 162 | return NModPoly(std::vector(static_cast(&t[0]), 163 | static_cast(&t[0]) + std::size(t)), 164 | mod); 165 | } 166 | 167 | NModPoly operator<<(const NModPoly& p, int64 m) { 168 | return NModPoly(PolyShiftLeft(p.data, m), p.mod); 169 | } 170 | 171 | NModPoly operator>>(const NModPoly& p, int64 m) { 172 | return NModPoly(PolyShiftRight(p.data, m), p.mod); 173 | } 174 | 175 | SL NModPoly operator-(const NModPoly& x, const NModPoly& y) { 176 | return NModPoly(PolySub(x.data, y.data, x.mod), x.mod).AdjustLeadingZeros(); 177 | } 178 | 179 | SL NModPoly operator+(const NModPoly& x, const NModPoly& y) { 180 | return NModPoly(PolyAdd(x.data, y.data, x.mod), x.mod).AdjustLeadingZeros(); 181 | } 182 | 183 | SL NModPoly operator*(const NModPoly& x, int64 v) { 184 | std::vector data(x.data); 185 | for (auto& iter : data) { 186 | iter = iter * v % x.mod; 187 | } 188 | return NModPoly(std::move(data), x.mod); 189 | } 190 | 191 | SL NModPoly operator*(int64 v, const NModPoly& x) { return x * v; } 192 | 193 | SL NModPoly operator*(const NModPoly& x, const NModPoly& y) { 194 | return PolyMul(x, y); 195 | } 196 | 197 | SL NModPoly operator/(const NModPoly& x, const NModPoly& y) { 198 | return PolyDiv(x, y); 199 | } 200 | 201 | SL NModPoly operator%(const NModPoly& x, const NModPoly& y) { 202 | return PolyMod(x, y); 203 | } 204 | 205 | SL int operator==(const NModPoly& x, const NModPoly& y) { 206 | return x.mod == y.mod && x.data == y.data; 207 | } 208 | 209 | // x^n % mod 210 | SL NModPoly operator%(int64 n, const NModPoly& mod) { 211 | NModPoly x{{0, 1}, mod.mod}; 212 | NModPoly ret{{1}, mod.mod}; 213 | for (; n > 0; n >>= 1) { 214 | if (n & 1) { 215 | ret = PolyMod(x * ret, mod); 216 | } 217 | if (n > 1) { 218 | x = PolyMod(x * x, mod); 219 | } 220 | } 221 | return ret; 222 | } 223 | 224 | SL std::ostream& operator<<(std::ostream& o, const NModPoly& p) { 225 | const int64 n = static_cast(std::size(p.data)); 226 | for (int64 i = 0; i < n - 1; ++i) { 227 | o << p.data[i] << ", "; 228 | } 229 | return o << p.data[n - 1]; 230 | } 231 | } // namespace pe 232 | #endif 233 | -------------------------------------------------------------------------------- /legacy/pe_sym_poly.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PE_SYM_POLY_ 2 | #define PE_SYM_POLY_ 3 | 4 | #include "pe_base" 5 | 6 | namespace pe { 7 | // [+- ]* 8 | SL std::vector ParseSgnList(const std::string& s, int& i) { 9 | const int size = static_cast(std::size(s)); 10 | std::vector sgns; 11 | for (;;) { 12 | while (i < size && std::isspace(s[i])) ++i; 13 | if (i < size && (s[i] == '+' || s[i] == '-')) { 14 | sgns.push_back(s[i] == '+' ? 1 : -1); 15 | ++i; 16 | } else { 17 | return sgns; 18 | } 19 | } 20 | } 21 | 22 | SL std::string ParseDigList(const std::string& s, int& i) { 23 | const int size = static_cast(std::size(s)); 24 | while (i < size && std::isspace(s[i])) ++i; 25 | if (i < size && std::isdigit(s[i])) { 26 | int start = i; 27 | while (i < size && std::isdigit(s[i])) { 28 | ++i; 29 | } 30 | return s.substr(start, i - start); 31 | } 32 | return ""; 33 | } 34 | 35 | SL std::string ParseIdentifier(const std::string& s, int& i) { 36 | const int size = static_cast(std::size(s)); 37 | while (i < size && std::isspace(s[i])) ++i; 38 | if (i < size && (s[i] == '_' || std::isalpha(s[i]))) { 39 | int start = i; 40 | while (i < size && 41 | (s[i] == '_' || std::isdigit(s[i]) || std::isalpha(s[i]))) { 42 | ++i; 43 | } 44 | return s.substr(start, i - start); 45 | } 46 | return ""; 47 | } 48 | 49 | SL std::vector> ParseSingleTermList( 50 | const std::string& s, int& i) { 51 | std::vector> result; 52 | const int size = static_cast(std::size(s)); 53 | for (;;) { 54 | while (i < size && std::isspace(s[i])) ++i; 55 | if (s[i] == '*') { 56 | ++i; 57 | while (i < size && std::isspace(s[i])) ++i; 58 | } 59 | std::string variable = ParseIdentifier(s, i); 60 | if (std::empty(variable)) variable = ParseDigList(s, i); 61 | if (std::empty(variable)) return result; 62 | while (i < size && std::isspace(s[i])) ++i; 63 | if (i < size && (s[i] == '^' && !std::isdigit(variable[0]))) { 64 | ++i; 65 | std::string e = ParseDigList(s, i); 66 | result.emplace_back(variable, e); 67 | } else { 68 | result.emplace_back(variable, ""); 69 | } 70 | } 71 | } 72 | 73 | template 74 | SL CT EvaluateDigs(const std::vector& sgns, const std::string& digs, 75 | CT defaultVal = 0) { 76 | int s = 1; 77 | for (const auto& iter : sgns) { 78 | if (iter == -1) s = -s; 79 | } 80 | CT v = 0; 81 | for (const auto& iter : digs) v = v * 10 + iter - '0'; 82 | if (std::empty(digs)) v = defaultVal; 83 | return s == 1 ? v : -v; 84 | } 85 | 86 | template 87 | SL std::pair EvaluateSingleTermList( 88 | const std::vector>& singleTerms) { 89 | std::map t; 90 | CT c = 1; 91 | for (const auto& iter : singleTerms) { 92 | if (std::isdigit(iter.first[0])) { 93 | c *= EvaluateDigs({}, iter.first, 1); 94 | } else { 95 | t[iter.first] += EvaluateDigs({}, iter.second, 1); 96 | } 97 | } 98 | TermKey key; 99 | for (const auto& iter : t) key.emplace_back(iter.first, iter.second); 100 | return {key, c}; 101 | } 102 | 103 | template 104 | SL std::pair ParseTerm(const std::string& s, int& i) { 105 | auto sgns = ParseSgnList(s, i); 106 | auto singleTerms = ParseSingleTermList(s, i); 107 | auto t = EvaluateSingleTermList(singleTerms); 108 | auto sgn = EvaluateDigs(sgns, "", 1); 109 | return {t.first, 110 | std::empty(singleTerms) ? CT(0) : (sgn == 1 ? t.second : -t.second)}; 111 | } 112 | 113 | template 114 | SL std::pair ParseTerm(const std::string& s) { 115 | int i = 0; 116 | return ParseTerm(s, i); 117 | } 118 | 119 | template 120 | SL std::map ParseSymPolyTerms(const std::string& s) { 121 | const int size = static_cast(std::size(s)); 122 | int i = 0; 123 | std::map terms; 124 | for (;;) { 125 | while (i < size && std::isspace(s[i])) ++i; 126 | int j = i; 127 | auto t = ParseTerm(s, i); 128 | if (j == i) { 129 | if (i < size) { 130 | std::cerr << "Unknown: " << s.substr(i) << std::endl; 131 | } 132 | break; 133 | } 134 | terms[t.first] += t.second; 135 | } 136 | return terms; 137 | } 138 | 139 | template 140 | SL SymPoly ParseSymPoly(const std::string& s) { 141 | return SymPoly(ParseSymPolyTerms(s)); 142 | } 143 | 144 | SL TermKey ToTermKey(const std::string& s) { 145 | int i = 0; 146 | return ParseTerm(s, i).first; 147 | } 148 | } -------------------------------------------------------------------------------- /pe: -------------------------------------------------------------------------------- 1 | #ifndef PE_ 2 | #define PE_ 3 | 4 | // Base 5 | #include "pe_base" 6 | #include "pe_type_traits" 7 | #include "pe_span" 8 | #include "pe_bit" 9 | #include "pe_mod" // Modular arithmetic 10 | #include "pe_int" 11 | #include "pe_extended_int" 12 | #include "pe_float" 13 | #include "pe_vector" 14 | 15 | // General util 16 | #include "pe_io" 17 | #include "pe_time" 18 | #include "pe_persistance" 19 | #include "pe_tree" 20 | #include "pe_rand" 21 | 22 | // Range 23 | #include "pe_range" 24 | 25 | // Matrix arithmetic 26 | #include "pe_mat" 27 | 28 | // Number theory arithmetic 29 | #include "pe_nt_base" 30 | #include "pe_nt" 31 | 32 | // Fraction arithmetic 33 | #include "pe_fraction" 34 | 35 | // Parallel support 36 | #include "pe_parallel" 37 | #include "pe_parallel_algo" 38 | 39 | // Polynomial 40 | #include "pe_poly_base" 41 | #include "pe_poly_algo" 42 | 43 | // fft 44 | #include "pe_fft" 45 | 46 | // Big integer 47 | #include "pe_gbi" 48 | #include "pe_bi32" 49 | #include "pe_mpz" 50 | 51 | // Geometry 52 | #include "pe_geometry" 53 | 54 | // Large memory support (windows) 55 | #include "pe_memory" 56 | 57 | // MP extension 58 | #include "pe_mpf" 59 | 60 | #include "pe_serialization" 61 | 62 | // Misc 63 | #include "pe_misc" 64 | #include "pe_mma" 65 | 66 | #include "pe_array" 67 | #include "pe_ntf" 68 | #include "pe_algo" 69 | #include "pe_sym_poly" 70 | #include "pe_db" 71 | #include "pe_int_algo" 72 | 73 | #include "pe_dpe" 74 | 75 | #include "pe_initializer" 76 | 77 | // Undefined or unspecified behavior 78 | // https://en.cppreference.com/w/cpp/language/extending_std 79 | namespace std { 80 | // Required by 81 | // 1. operator << for std::vector, std::set, std::map, etc. 82 | // 2. pe::int128, pe::uint128, pe::float128 which are non-class types. 83 | using pe::operator<<; 84 | } // namespace std 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /pe.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PE_HPP_ 2 | #define PE_HPP_ 3 | 4 | // Use 5 | // g++ -xc++-header pe.hpp --std=c++20 -fno-diagnostics-color -O3 -march=native 6 | // -mtune=native -fopenmp -pthread -static 7 | // to generate pe.hpp.gch 8 | #include 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /pe_array: -------------------------------------------------------------------------------- 1 | #ifndef PE_ARRAY_ 2 | #define PE_ARRAY_ 3 | 4 | #include "pe_base" 5 | #include "pe_memory" 6 | 7 | namespace pe { 8 | template 9 | struct DArrayRef; 10 | 11 | // T: element type 12 | // D: dimention length 13 | // A: allocator 14 | template 15 | struct DArray : public DArrayRef { 16 | template 17 | friend struct DArrayRef; 18 | 19 | using base = DArrayRef; 20 | template 21 | DArray(const std::vector& dimension, Args&&... arg) 22 | : dimension_(dimension), base(nullptr, 0, nullptr, nullptr) { 23 | Init(arg...); 24 | PE_ASSERT(std::size(dimension) == D); 25 | } 26 | 27 | DArray() : base(nullptr, 0, nullptr, nullptr) { 28 | dimension_ = std::vector(D, 1); 29 | Init(); 30 | PE_ASSERT(std::size(dimension_) == D); 31 | } 32 | 33 | DArray(const DArray&) = delete; 34 | DArray(DArray&&) = delete; 35 | 36 | DArray& operator=(const DArray&) = delete; 37 | DArray& operator=(DArray&&) = delete; 38 | 39 | ~DArray() { Clear(); } 40 | 41 | template 42 | void Reset(const std::vector& dimension, Args&&... arg) { 43 | Clear(); 44 | this->dimension_ = dimension; 45 | Init(arg...); 46 | } 47 | 48 | template 49 | void Init(Args&&... arg) { 50 | const int d = static_cast(std::size(dimension_)); 51 | element_counts_.clear(); 52 | element_counts_.push_back(1); 53 | for (int i = d - 1; i >= 0; --i) { 54 | element_counts_.push_back(element_counts_.back() * dimension_[i]); 55 | } 56 | std::reverse(element_counts_.begin(), element_counts_.end()); 57 | element_count_ = element_counts_[0]; 58 | base::dimension_ = &dimension_[0]; 59 | base::element_counts_ = &element_counts_[0]; 60 | base::data_ = reinterpret_cast(A::Allocate(element_count_ * sizeof(T))); 61 | for (int64 i = 0; i < element_count_; ++i) { 62 | new (base::data_ + i) T(arg...); 63 | } 64 | } 65 | 66 | void Clear() { 67 | for (int64 i = 0; i < element_count_; ++i) { 68 | (base::data_ + i)->~T(); 69 | } 70 | A::Deallocate(base::data_); 71 | } 72 | 73 | T* data() { return base::data_; } 74 | 75 | DArrayRef Ref() { 76 | return DArrayRef(base::data_, 0, base::dimension_, 77 | base::element_counts_); 78 | } 79 | 80 | private: 81 | std::vector dimension_; 82 | std::vector element_counts_; 83 | int64 element_count_; 84 | }; 85 | 86 | template 87 | struct DArrayRef { 88 | using ValueType = DArrayRef; 89 | using ConstValueType = DArrayRef; 90 | 91 | DArrayRef(T* data, int off, const int64* dimension, 92 | const int64* element_counts) 93 | : data_(data), 94 | off_(off), 95 | dimension_(dimension), 96 | element_counts_(element_counts) {} 97 | 98 | DArrayRef operator[](int64 idx) const { 99 | return DArrayRef(data_ + idx * element_counts_[off_ + 1], 100 | off_ + 1, dimension_, element_counts_); 101 | } 102 | 103 | protected: 104 | T* data_; 105 | const int off_; 106 | const int64* element_counts_; 107 | const int64* dimension_; 108 | }; 109 | 110 | template 111 | struct DArrayRef { 112 | using ValueType = T&; 113 | using ConstValueType = const T&; 114 | 115 | DArrayRef(T* data, int off, const int64* dimension, 116 | const int64* element_counts) 117 | : data_(data), 118 | off_(off), 119 | dimension_(dimension), 120 | element_counts_(element_counts) {} 121 | 122 | T& operator[](int64 idx) { return data_[idx]; } 123 | ConstValueType operator[](int64 idx) const { return data_[idx]; } 124 | 125 | protected: 126 | T* data_; 127 | const int off_; 128 | const int64* element_counts_; 129 | const int64* dimension_; 130 | }; 131 | 132 | template 133 | struct ArrayShape; 134 | 135 | template 136 | struct ArrayShape { 137 | using Next = ArrayShape; 138 | const static int64 D = 1 + Next::D; 139 | const static int64 EC = H * Next::EC; 140 | }; 141 | 142 | template 143 | struct ArrayShape { 144 | const static int64 D = 1; 145 | const static int64 EC = H; 146 | }; 147 | 148 | template 149 | struct FArrayRef; 150 | 151 | // T: element type 152 | // S: array shape 153 | // A: allocator 154 | template 155 | struct FArray : public FArrayRef { 156 | using base = FArrayRef; 157 | 158 | template 159 | FArray(Args&&... arg) : base(nullptr) { 160 | Init(arg...); 161 | } 162 | 163 | FArray(const FArray&) = delete; 164 | FArray(FArray&&) = delete; 165 | 166 | FArray& operator=(const FArray&) = delete; 167 | FArray& operator=(FArray&&) = delete; 168 | 169 | ~FArray() { Clear(); } 170 | 171 | template 172 | void Init(Args&&... arg) { 173 | base::data_ = reinterpret_cast(A::Allocate(S::EC * sizeof(T))); 174 | for (int64 i = 0; i < S::EC; ++i) { 175 | new (base::data_ + i) T(arg...); 176 | } 177 | } 178 | 179 | void Clear() { 180 | for (int64 i = 0; i < S::EC; ++i) { 181 | (base::data_ + i)->~T(); 182 | } 183 | A::Deallocate(base::data_); 184 | } 185 | 186 | T* data() { return base::data_; } 187 | 188 | FArrayRef Ref() { return FArrayRef(base::data_); } 189 | }; 190 | 191 | template 192 | struct FArrayRef { 193 | using NextShape = typename S::Next; 194 | using ValueType = FArrayRef; 195 | using ConstValueType = FArrayRef; 196 | 197 | FArrayRef(T* data) : data_(data) {} 198 | 199 | FArrayRef operator[](int64 idx) const { 200 | return FArrayRef(data_ + idx * NextShape::EC); 201 | } 202 | 203 | protected: 204 | T* data_; 205 | }; 206 | 207 | template 208 | struct FArrayRef> { 209 | using ValueType = T&; 210 | using ConstValueType = const T&; 211 | 212 | FArrayRef(T* data) : data_(data) {} 213 | 214 | ValueType operator[](int64 idx) { return data_[idx]; } 215 | ConstValueType operator[](int64 idx) const { return data_[idx]; } 216 | 217 | protected: 218 | T* data_; 219 | }; 220 | 221 | // T: element type 222 | // X: the count of each dimention 223 | template 224 | using Array = FArray>; 225 | 226 | // T: element type 227 | // A: allocator 228 | // X: the count of each dimention 229 | template 230 | using AArray = FArray, A>; 231 | } // namespace pe 232 | #endif -------------------------------------------------------------------------------- /pe_config: -------------------------------------------------------------------------------- 1 | #ifndef PE_CONFIG_ 2 | #define PE_CONFIG_ 3 | 4 | // This file provides a centralized place to configure pe 5 | 6 | // Auto generated by gen_config.py, and you can edit it manually 7 | 8 | // Configuration priority (first match): 9 | // 1. Compiling command 10 | // 2. The configurations in this file 11 | // 3. The configurations in file pe 12 | 13 | #ifndef ENABLE_ASSERT 14 | #define ENABLE_ASSERT 1 15 | #endif 16 | 17 | #ifndef TRY_TO_USE_INT128 18 | #define TRY_TO_USE_INT128 1 19 | #endif 20 | 21 | #ifndef ENABLE_EIGEN 22 | #define ENABLE_EIGEN 1 23 | #endif 24 | 25 | #ifndef ENABLE_GMP 26 | #define ENABLE_GMP 1 27 | #endif 28 | 29 | #ifndef ENABLE_FLINT 30 | #define ENABLE_FLINT 1 31 | #endif 32 | 33 | #ifndef ENABLE_MPFR 34 | #define ENABLE_MPFR 1 35 | #endif 36 | 37 | #ifndef ENABLE_LIBBF 38 | #define ENABLE_LIBBF 1 39 | #endif 40 | 41 | #ifndef ENABLE_NTL 42 | #define ENABLE_NTL 1 43 | #endif 44 | 45 | #ifndef ENABLE_ZMQ 46 | #define ENABLE_ZMQ 1 47 | #endif 48 | 49 | #ifndef ENABLE_PRIME_COUNT 50 | #define ENABLE_PRIME_COUNT 1 51 | #endif 52 | 53 | #ifndef ENABLE_PRIME_SIEVE 54 | #define ENABLE_PRIME_SIEVE 1 55 | #endif 56 | 57 | #ifndef ENABLE_TCMALLOC 58 | #define ENABLE_TCMALLOC 0 59 | #endif 60 | 61 | #endif -------------------------------------------------------------------------------- /pe_float: -------------------------------------------------------------------------------- 1 | #ifndef PE_FLOAT128_ 2 | #define PE_FLOAT128_ 3 | 4 | #include "pe_base" 5 | #include "pe_int" 6 | 7 | namespace pe { 8 | template 9 | SL int IsNAN(T v) { 10 | return 0; 11 | } 12 | } // namespace pe 13 | 14 | #if PE_HAS_FLOAT128 15 | namespace pe { 16 | namespace internal { 17 | SL std::string ToStringFloat128(float128 f, const char* format_string, 18 | int dig = 20) { 19 | char buff[256]; 20 | const int buff_size = sizeof(buff); 21 | int n = quadmath_snprintf(buff, buff_size, format_string, dig, f); 22 | if (n < buff_size) { 23 | return buff; 24 | } 25 | n = quadmath_snprintf(NULL, 0, format_string, dig, f); 26 | if (n <= -1) { 27 | return ""; 28 | } 29 | char* str = static_cast(malloc(n + 1)); 30 | std::string result; 31 | if (str) { 32 | quadmath_snprintf(str, n + 1, format_string, dig, f); 33 | result = str; 34 | } 35 | free(str); 36 | return result; 37 | } 38 | } // namespace internal 39 | 40 | SL std::string ToString(float128 f, int dig = 20) { 41 | return internal::ToStringFloat128(f, "%#.*Qe", dig); 42 | } 43 | 44 | SL std::string ToStringF(float128 f, int dig = 20) { 45 | return internal::ToStringFloat128(f, "%#.*Qf", dig); 46 | } 47 | 48 | SL std::string to_string(float128 x, int dig = 20) { return ToString(x); } 49 | 50 | SL std::ostream& operator<<(std::ostream& o, float128 f) { 51 | return o << ToString(f, 20); 52 | } 53 | 54 | SL int IsNAN(float128 v) { return isnanq(v); } 55 | 56 | SL float128 Abs(float128 f) { return fabsq(f); } 57 | SL float128 FAbs(float128 f) { return fabsq(f); } 58 | SL float128 Ceil(float128 f) { return ceilq(f); } 59 | SL float128 Floor(float128 f) { return floorq(f); } 60 | SL float128 Trunc(float128 f) { return truncq(f); } 61 | SL float128 Power(float128 f, int p) { 62 | return powq(f, static_cast(p)); 63 | } 64 | 65 | SL float128 Sqrt(float128 f) { return sqrtq(f); } 66 | SL float128 Cos(float128 f) { return cosq(f); } 67 | SL float128 Sin(float128 f) { return sinq(f); } 68 | SL float128 Exp(float128 f) { return expq(f); } 69 | SL float128 Log(float128 f) { return logq(f); } 70 | SL float128 Log10(float128 f) { return log10q(f); } 71 | } // namespace pe 72 | 73 | #endif 74 | 75 | namespace pe { 76 | namespace internal { 77 | template 78 | SL std::string ToStringFloat(T f, const char* format_string, int dig = 20) { 79 | char buff[256]; 80 | const int buff_size = sizeof(buff); 81 | int n = snprintf(buff, buff_size, format_string, dig, f); 82 | if (n < buff_size) { 83 | return buff; 84 | } 85 | n = snprintf(NULL, 0, format_string, dig, f); 86 | if (n <= -1) { 87 | return ""; 88 | } 89 | char* str = static_cast(malloc(n + 1)); 90 | std::string result; 91 | if (str) { 92 | snprintf(str, n + 1, format_string, dig, f); 93 | result = str; 94 | } 95 | free(str); 96 | return result; 97 | } 98 | } // namespace internal 99 | 100 | SL std::string ToString(float f, int dig = 20) { 101 | return internal::ToStringFloat(f, "%#.*e", dig); 102 | } 103 | 104 | SL std::string ToString(double f, int dig = 20) { 105 | return internal::ToStringFloat(f, "%#.*e", dig); 106 | } 107 | 108 | SL std::string ToString(long double f, int dig = 20) { 109 | return internal::ToStringFloat(f, "%#.*Le", dig); 110 | } 111 | 112 | SL std::string ToStringF(float f, int dig = 20) { 113 | return internal::ToStringFloat(f, "%#.*f", dig); 114 | } 115 | 116 | SL std::string ToStringF(double f, int dig = 20) { 117 | return internal::ToStringFloat(f, "%#.*f", dig); 118 | } 119 | 120 | SL std::string ToStringF(long double f, int dig = 20) { 121 | return internal::ToStringFloat(f, "%#.*Lf", dig); 122 | } 123 | 124 | template 125 | SL REQUIRES((is_one_of_v)) RETURN(int) 126 | IsNAN(T v) { 127 | return std::isnan(v); 128 | } 129 | 130 | template 131 | SL REQUIRES((is_one_of_v)) RETURN(T) Abs(T f) { 132 | return std::fabs(f); 133 | } 134 | 135 | template 136 | SL REQUIRES((is_one_of_v)) RETURN(T) FAbs(T f) { 137 | return std::fabs(f); 138 | } 139 | 140 | template 141 | SL REQUIRES((is_one_of_v)) RETURN(T) Floor(T f) { 142 | return std::floor(f); 143 | } 144 | 145 | template 146 | SL REQUIRES((is_one_of_v)) RETURN(T) Ceil(T f) { 147 | return std::ceil(f); 148 | } 149 | 150 | template 151 | SL REQUIRES((is_one_of_v)) RETURN(T) Trunc(T f) { 152 | return std::trunc(f); 153 | } 154 | 155 | template 156 | SL REQUIRES((is_one_of_v)) RETURN(T) 157 | Power(T f, int p) { 158 | return std::pow(f, static_cast(p)); 159 | } 160 | 161 | template 162 | SL REQUIRES((is_one_of_v)) RETURN(T) Sqrt(T f) { 163 | return std::sqrt(f); 164 | } 165 | 166 | template 167 | SL REQUIRES((is_one_of_v)) RETURN(T) Cos(T f) { 168 | return std::cos(f); 169 | } 170 | 171 | template 172 | SL REQUIRES((is_one_of_v)) RETURN(T) Sin(T f) { 173 | return std::sin(f); 174 | } 175 | 176 | template 177 | SL REQUIRES((is_one_of_v)) RETURN(T) Exp(T f) { 178 | return std::exp(f); 179 | } 180 | 181 | template 182 | SL REQUIRES((is_one_of_v)) RETURN(T) Log(T f) { 183 | return std::log(f); 184 | } 185 | 186 | template 187 | SL REQUIRES((is_one_of_v)) RETURN(T) Log10(T f) { 188 | return std::log10(f); 189 | } 190 | } // namespace pe 191 | 192 | #if PE_HAS_CPP20 193 | namespace pe { 194 | template 195 | concept PeFloatUtil = requires(T v) { 196 | requires PeComparable; 197 | Abs(v); 198 | FAbs(v); 199 | Floor(v); 200 | Ceil(v); 201 | Trunc(v); 202 | 203 | Power(v, 0); 204 | Sqrt(v); 205 | 206 | ToString(v); 207 | to_string(v); 208 | 209 | std::cout << v; 210 | }; 211 | 212 | template 213 | concept PeRichFloatUtil = requires(T v) { 214 | requires PeFloatUtil; 215 | Cos(v); 216 | Sin(v); 217 | Exp(v); 218 | Log(v); 219 | Log10(v); 220 | }; 221 | 222 | static_assert(PeRichFloatUtil); 223 | static_assert(PeRichFloatUtil); 224 | static_assert(PeRichFloatUtil); 225 | 226 | #if PE_HAS_FLOAT128 227 | static_assert(PeRichFloatUtil); 228 | #endif 229 | } // namespace pe 230 | #endif 231 | 232 | #endif 233 | -------------------------------------------------------------------------------- /pe_gbi: -------------------------------------------------------------------------------- 1 | #ifndef PE_GBI_ 2 | #define PE_GBI_ 3 | 4 | // General Big integer. 5 | 6 | #include "pe_base" 7 | #include "pe_type_traits" 8 | #include "pe_int" 9 | #include "pe_nt" 10 | #include "pe_fraction" 11 | 12 | namespace pe { 13 | 14 | template 15 | SL REQUIRES((is_gbi_v)) RETURN(int) BitWidth(const T& x) { 16 | return x.BitWidth(); 17 | } 18 | 19 | template 20 | SL REQUIRES((is_gbi_v)) RETURN(int) Popcount(const T& x) { 21 | return x.Popcount(); 22 | } 23 | 24 | template 25 | SL REQUIRES((is_gbi_v)) RETURN(void) SetBit(T& x, int idx) { 26 | x.SetBit(idx); 27 | } 28 | 29 | template 30 | SL REQUIRES((is_gbi_v)) RETURN(void) ResetBit(T& x, int idx) { 31 | x.ResetBit(idx); 32 | } 33 | 34 | template 35 | SL REQUIRES((is_gbi_v)) RETURN(int) GetBit(const T& x, int idx) { 36 | return x.GetBit(idx); 37 | } 38 | 39 | template 40 | SL REQUIRES((is_gbi_v)) RETURN(void) RevBit(T& x, int idx) { 41 | x.RevBit(idx); 42 | } 43 | 44 | template 45 | SL REQUIRES((is_gbi_v)) RETURN(int) IsZero(const T& x) { 46 | return x.IsZero(); 47 | } 48 | 49 | template 50 | SL REQUIRES((is_gbi_v)) RETURN(int) IntSign(const T& x) { 51 | return x.IntSign(); 52 | } 53 | 54 | template 55 | SL REQUIRES((is_gbi_v)) RETURN(int) IsEven(const T& x) { 56 | return x.IsEven(); 57 | } 58 | 59 | template 60 | SL REQUIRES((is_gbi_v)) RETURN(int) IsOdd(const T& x) { 61 | return x.IsOdd(); 62 | } 63 | 64 | template 65 | SL REQUIRES((is_gbi_v && is_gbi_v)) RETURN(int) 66 | SameParity(const T0& v0, const T1& v1) { 67 | return IsEven(v0) == IsEven(v1); 68 | } 69 | 70 | template 71 | SL REQUIRES((is_gbi_v)) RETURN(uint32) LowerBits(const T& x) { 72 | return x.LowerBits(); 73 | } 74 | 75 | template 76 | SL REQUIRES((is_gbi_v)) RETURN(T) Abs(const T& n) { 77 | return IntSign(n) >= 0 ? n : -n; 78 | } 79 | 80 | template 81 | SL REQUIRES((is_gbi_v)) RETURN(T) FAbs(const T& n) { 82 | return IntSign(n) >= 0 ? n : -n; 83 | } 84 | 85 | template 86 | SL REQUIRES((is_gbi_v && is_builtin_integer_v)) RETURN(T) 87 | Power(T x, TN n) { 88 | PE_ASSERT(n >= 0); 89 | T ret = 1; 90 | for (; IntSign(n) > 0; n >>= 1) { 91 | if (n & 1) ret = ret * x; 92 | if (n > 1) x = x * x; 93 | } 94 | return ret; 95 | } 96 | 97 | template 98 | SL REQUIRES((is_gbi_v)) RETURN(std::tuple) 99 | Div(const T& a, const T& b) { 100 | return {a / b, a % b}; 101 | } 102 | 103 | template 104 | SL REQUIRES((is_gbi_v)) RETURN(T) FloorDiv(const T& a, const T& b) { 105 | const int sign_a = IntSign(a); 106 | const int sign_b = IntSign(b); 107 | if (sign_a > 0 && sign_b > 0) { 108 | return a / b; 109 | } 110 | if (sign_a < 0 && sign_b < 0) { 111 | return a / b; 112 | } 113 | T q = a / b; 114 | if (((sign_a < 0) != (sign_b < 0)) && !IsZero(a % b)) { 115 | --q; 116 | } 117 | return q; 118 | } 119 | 120 | template 121 | SL REQUIRES((is_gbi_v)) RETURN(T) CeilDiv(const T& a, const T& b) { 122 | const int sign_a = IntSign(a); 123 | const int sign_b = IntSign(b); 124 | if (sign_a > 0 && sign_b > 0) { 125 | return 1 + (a - 1) / b; 126 | } 127 | if (sign_a < 0 && sign_b < 0) { 128 | return 1 + (a + 1) / b; 129 | } 130 | T q = a / b; 131 | if (((sign_a < 0) == (sign_b < 0)) && !IsZero(a % b)) { 132 | ++q; 133 | } 134 | return q; 135 | } 136 | 137 | template 138 | SL REQUIRES((is_gbi_v)) RETURN(int_promotion_t) 139 | Mod(const T& a, TM mod) { 140 | if (IntSign(a) >= 0) { 141 | return a >= mod ? a % mod : a; 142 | } else { 143 | T tmp = a % mod; 144 | return IntSign(tmp) < 0 ? tmp + mod : tmp; 145 | } 146 | } 147 | 148 | template 149 | SL REQUIRES((is_gbi_v)) RETURN(T) AddMod(T a, const T& b, TM mod) { 150 | a += b; 151 | if (a >= mod) { 152 | a -= mod; 153 | } 154 | return a; 155 | } 156 | 157 | template 158 | SL REQUIRES((is_gbi_v)) RETURN(T) SubMod(T a, const T& b, TM mod) { 159 | a -= b; 160 | if (IntSign(a) < 0) { 161 | a += mod; 162 | } 163 | return a; 164 | } 165 | 166 | template 167 | SL REQUIRES((is_gbi_v)) RETURN(T) MulMod(const T& a, const T& b, TM mod) { 168 | return a * b % mod; 169 | } 170 | 171 | template 172 | SL REQUIRES((is_gbi_v && is_builtin_integer_v)) RETURN(T) 173 | PowerMod(T x, TN n, const T& mod) { 174 | PE_ASSERT(n >= 0); 175 | if (mod == 1) { 176 | return 0; 177 | } 178 | T ret = 1; 179 | x %= mod; 180 | for (; n > 0; n >>= 1) { 181 | if (n & 1) ret = ret * x % mod; 182 | if (n > 1) x = x * x % mod; 183 | } 184 | return ret; 185 | } 186 | 187 | template 188 | SL REQUIRES((is_gbi_v && is_builtin_integer_v && 189 | is_builtin_integer_v)) RETURN(T) PowerMod(T x, TN n, TM mod) { 190 | PE_ASSERT(n >= 0); 191 | if (mod == 1) { 192 | return 0; 193 | } 194 | T ret = 1; 195 | x %= mod; 196 | for (; n > 0; n >>= 1) { 197 | if (n & 1) ret = ret * x % mod; 198 | if (n > 1) x = x * x % mod; 199 | } 200 | return ret; 201 | } 202 | 203 | template 204 | SL REQUIRES((is_gbi_v)) RETURN(T) PowerMod(T x, const T& n, const T& mod) { 205 | PE_ASSERT(IntSign(n) >= 0); 206 | if (mod == 1) { 207 | return 0; 208 | } 209 | T ret = 1; 210 | x %= mod; 211 | const int bit_width = BitWidth(n); 212 | for (int i = 0; i < bit_width; ++i) { 213 | if (GetBit(n, i)) ret = ret * x % mod; 214 | if (i + 1 < bit_width) x = x * x % mod; 215 | } 216 | return ret; 217 | } 218 | 219 | template 220 | SL REQUIRES((is_gbi_v)) RETURN(TT) ToInt(const T& x) { 221 | return x.template ToInt(); 222 | } 223 | 224 | template 225 | SL REQUIRES((is_gbi_v)) RETURN(TT) ToFloat(const T& x) { 226 | return x.template ToFloat(); 227 | } 228 | 229 | template 230 | SL REQUIRES((is_gbi_v)) RETURN(std::string) ToString(const T& x) { 231 | return x.ToString(); 232 | } 233 | 234 | template 235 | SL REQUIRES((is_gbi_v)) RETURN(std::string) to_string(const T& x) { 236 | return x.ToString(); 237 | } 238 | 239 | #if PE_HAS_CPP20 240 | template 241 | concept PeGbi = requires(T v) { 242 | requires static_cast(is_gbi_v); 243 | requires PeArithmeticOperation; 244 | requires PeSelfOperation; 245 | requires PeComparable; 246 | requires PeIntegerUtil; 247 | }; 248 | #endif 249 | } // namespace pe 250 | #endif 251 | -------------------------------------------------------------------------------- /pe_initializer: -------------------------------------------------------------------------------- 1 | #ifndef PE_INITIALIZER_ 2 | #define PE_INITIALIZER_ 3 | 4 | #include "pe_base" 5 | #include "pe_mod" 6 | #include "pe_nt" 7 | #include "pe_fft" 8 | #include "pe_parallel" 9 | 10 | namespace pe { 11 | struct PeInitializer { 12 | PeInitializer& set_max_prime(int64 maxp = 1000000) { 13 | this->maxp = maxp; 14 | return *this; 15 | } 16 | 17 | PeInitializer& set_cal_phi(int cal_phi = 1) { 18 | this->cal_phi = cal_phi; 19 | return *this; 20 | } 21 | 22 | PeInitializer& set_cal_mu(int cal_mu = 1) { 23 | this->cal_mu = cal_mu; 24 | return *this; 25 | } 26 | 27 | PeInitializer& set_cal_rad(int cal_rad = 1) { 28 | this->cal_rad = cal_rad; 29 | return *this; 30 | } 31 | 32 | PeInitializer& set_cal_sigma0(int cal_sigma0 = 1) { 33 | this->cal_sigma0 = cal_sigma0; 34 | return *this; 35 | } 36 | 37 | PeInitializer& set_cal_sigma1(int cal_sigma1 = 1) { 38 | this->cal_sigma1 = cal_sigma1; 39 | return *this; 40 | } 41 | 42 | PeInitializer& set_fft_k(int fft_k = 22) { 43 | this->fft_k = fft_k; 44 | return *this; 45 | } 46 | 47 | PeInitializer& set_ntt32_k(int ntt32_k = 22) { 48 | this->ntt32_k = ntt32_k; 49 | return *this; 50 | } 51 | 52 | PeInitializer& set_ntt64_k(int ntt64_k = 22) { 53 | this->ntt64_k = ntt64_k; 54 | return *this; 55 | } 56 | 57 | PeInitializer& set_mod(int64 default_mod = 1000000007) { 58 | this->default_mod = default_mod; 59 | return *this; 60 | } 61 | 62 | void Init() { 63 | InitNt(); 64 | InitParallel(); 65 | InitFft(fft_k); 66 | InitNtt32(ntt32_k); 67 | InitNtt64(ntt64_k); 68 | DefaultMod::Set(default_mod); 69 | } 70 | 71 | void InitNt() { 72 | DeinitPrimes(); 73 | InitMaxp(maxp); 74 | if (cal_phi == 0 && cal_mu == 0 && cal_rad == 0 && cal_sigma0 == 0 && 75 | cal_sigma1 == 0) { 76 | InitPrimes(); 77 | } else { 78 | InitPrimes(cal_phi, cal_mu, cal_rad, cal_sigma0, cal_sigma1); 79 | } 80 | } 81 | 82 | void InitParallel() { 83 | #if ENABLE_OPENMP 84 | omp_set_nested(1); 85 | omp_set_dynamic(1); 86 | #if !defined(COMPILER_MSVC) 87 | omp_set_max_active_levels(7); 88 | #endif 89 | #endif 90 | 91 | #if OS_TYPE_WIN 92 | // SetProcessPriority(PRIORITY_IDLE); 93 | #endif 94 | 95 | #if ENABLE_FLINT 96 | flint_set_num_threads(8); 97 | #endif 98 | } 99 | 100 | void InitFft(int k = 22) { 101 | if (k >= 0) { 102 | fft::InitFftK(k); 103 | } 104 | } 105 | 106 | void InitNtt32(int k = 22) { 107 | #if defined(HAS_POLY_MUL_NTT32) && HAS_POLY_MUL_NTT32 108 | if (k >= 0) { 109 | ntt32::InitNtt(k); 110 | } 111 | #endif 112 | } 113 | 114 | void InitNtt64(int k = 22) { 115 | #if defined(HAS_POLY_MUL_NTT64) && HAS_POLY_MUL_NTT64 116 | if (k >= 0) { 117 | ntt64::InitNtt(k); 118 | } 119 | #endif 120 | } 121 | 122 | int64 maxp = 1000000; 123 | int cal_phi = 0; 124 | int cal_mu = 0; 125 | int cal_rad = 0; 126 | int cal_sigma0 = 0; 127 | int cal_sigma1 = 0; 128 | 129 | int fft_k = -1; 130 | int ntt32_k = -1; 131 | int ntt64_k = -1; 132 | 133 | int64 default_mod = 1; 134 | }; 135 | } // namespace pe 136 | 137 | #define PE_INIT(...) (pe::PeInitializer{PE_ADD_DOT(__VA_ARGS__)}).Init() 138 | #endif 139 | -------------------------------------------------------------------------------- /pe_io: -------------------------------------------------------------------------------- 1 | #ifndef PE_IO_ 2 | #define PE_IO_ 3 | 4 | #include "pe_base" 5 | #include "pe_type_traits" 6 | 7 | namespace pe { 8 | template 9 | SL T ReadInt() { 10 | struct X { 11 | int dig[256]; 12 | X() { 13 | for (int i = '0'; i <= '9'; ++i) dig[i] = 1; 14 | dig['-'] = 1; 15 | } 16 | }; 17 | static X f; 18 | T v = 0; 19 | int s = 1, c; 20 | for (; !f.dig[c = getchar()];) { 21 | ; 22 | } 23 | if (c == '-') { 24 | s = 0; 25 | } else if (f.dig[c]) { 26 | v = c ^ 48; 27 | } 28 | for (; f.dig[c = getchar()]; v = v * 10 + (c ^ 48)) { 29 | ; 30 | } 31 | return s ? v : -v; 32 | } 33 | 34 | #define Rint read_int 35 | 36 | template 37 | SL T ReadValue() { 38 | T v; 39 | std::cin >> v; 40 | return v; 41 | } 42 | 43 | template <> 44 | inline int ReadValue() { 45 | return ReadInt(); 46 | } 47 | 48 | template <> 49 | inline int64 ReadValue() { 50 | return ReadInt(); 51 | } 52 | 53 | #if PE_HAS_INT128 54 | template <> 55 | inline int128 ReadValue() { 56 | return ReadInt(); 57 | } 58 | #endif 59 | 60 | #define DV_IMPL_1(I, V1) V1 = I 61 | #define DV_IMPL_2(I, V1, ...) V1 = I, DV_IMPL_1(I, __VA_ARGS__) 62 | #define DV_IMPL_3(I, V1, ...) V1 = I, DV_IMPL_2(I, __VA_ARGS__) 63 | #define DV_IMPL_4(I, V1, ...) V1 = I, DV_IMPL_3(I, __VA_ARGS__) 64 | #define DV_IMPL_5(I, V1, ...) V1 = I, DV_IMPL_4(I, __VA_ARGS__) 65 | #define DV_IMPL_6(I, V1, ...) V1 = I, DV_IMPL_5(I, __VA_ARGS__) 66 | #define DV_IMPL_7(I, V1, ...) V1 = I, DV_IMPL_6(I, __VA_ARGS__) 67 | #define DV_IMPL_8(I, V1, ...) V1 = I, DV_IMPL_7(I, __VA_ARGS__) 68 | #define DV_IMPL_9(I, V1, ...) V1 = I, DV_IMPL_8(I, __VA_ARGS__) 69 | #define DV_IMPL_10(I, V1, ...) V1 = I, DV_IMPL_9(I, __VA_ARGS__) 70 | #define DV_IMPL_11(I, V1, ...) V1 = I, DV_IMPL_10(I, __VA_ARGS__) 71 | #define DV_IMPL_12(I, V1, ...) V1 = I, DV_IMPL_11(I, __VA_ARGS__) 72 | #define DV_IMPL_13(I, V1, ...) V1 = I, DV_IMPL_12(I, __VA_ARGS__) 73 | #define DV_IMPL_14(I, V1, ...) V1 = I, DV_IMPL_13(I, __VA_ARGS__) 74 | #define DV_IMPL_15(I, V1, ...) V1 = I, DV_IMPL_14(I, __VA_ARGS__) 75 | #define DV_IMPL_16(I, V1, ...) V1 = I, DV_IMPL_15(I, __VA_ARGS__) 76 | 77 | #define DV_IMPL(n, input, ...) PE_CONCAT(DV_IMPL_, n)(input, __VA_ARGS__) 78 | 79 | #define DV(T, ...) \ 80 | T DV_IMPL(PE_NARG(__VA_ARGS__), pe::ReadValue(), __VA_ARGS__) 81 | #define DVC(T, ...) \ 82 | const T DV_IMPL(PE_NARG(__VA_ARGS__), pe::ReadValue(), __VA_ARGS__) 83 | } // namespace pe 84 | 85 | namespace pe { 86 | 87 | #if OS_TYPE_WIN && ENABLE_CLIPBOARD 88 | SL bool CopyStringToClipboard(const std::string& s) { 89 | OpenClipboard(nullptr); 90 | EmptyClipboard(); 91 | HGLOBAL hg = GlobalAlloc(GMEM_MOVEABLE, std::size(s) + 1); 92 | if (!hg) { 93 | CloseClipboard(); 94 | return false; 95 | } 96 | memcpy(GlobalLock(hg), s.c_str(), std::size(s)); 97 | GlobalUnlock(hg); 98 | SetClipboardData(CF_TEXT, hg); 99 | CloseClipboard(); 100 | GlobalFree(hg); 101 | return true; 102 | } 103 | 104 | SL int PromptYesAndNo(const std::string& title, const std::string& content) { 105 | return MessageBoxA(nullptr, content.c_str(), title.c_str(), MB_OKCANCEL) == 1; 106 | } 107 | 108 | SL void PromptAnswer(const std::string& str) { 109 | std::cout << "Answer:" << std::endl << str << std::endl; 110 | if (PromptYesAndNo("Copy to clipboard?", str)) { 111 | CopyStringToClipboard(str); 112 | } 113 | } 114 | 115 | #else 116 | SL void PromptAnswer(const std::string& str) { 117 | std::cout << "Answer:" << std::endl << str << std::endl; 118 | } 119 | #endif 120 | 121 | template 122 | SL REQUIRES((is_general_integer_v)) RETURN(void) PromptAnswer(const T& v) { 123 | PromptAnswer(ToString(v)); 124 | } 125 | 126 | template 127 | SL void PromptAnswer(const std::string& format, T... v) { 128 | char buff[1024]; 129 | sprintf(buff, format.c_str(), v...); 130 | PromptAnswer(buff); 131 | } 132 | 133 | struct AnswerPrompter { 134 | template 135 | AnswerPrompter& operator()(const std::string& format, T... v) { 136 | char buff[1024]; 137 | sprintf(buff, format.c_str(), v...); 138 | PromptAnswer(buff); 139 | return *this; 140 | } 141 | }; 142 | 143 | template 144 | SL REQUIRES((is_general_integer_v)) 145 | RETURN(AnswerPrompter&) operator<<(AnswerPrompter& ap, T v) { 146 | PromptAnswer(ToString(v)); 147 | return ap; 148 | } 149 | 150 | template 151 | SL REQUIRES((!is_general_integer_v)) 152 | RETURN(AnswerPrompter&) operator<<(AnswerPrompter& ap, T v) { 153 | std::stringstream ss; 154 | ss << v; 155 | PromptAnswer(v); 156 | return ap; 157 | } 158 | 159 | static AnswerPrompter ap; 160 | } // namespace pe 161 | #endif 162 | -------------------------------------------------------------------------------- /pe_memory: -------------------------------------------------------------------------------- 1 | #ifndef PE_MEMORY_ 2 | #define PE_MEMORY_ 3 | 4 | #include "pe_base" 5 | 6 | namespace pe { 7 | #if OS_TYPE_WIN 8 | struct LargeMemory { 9 | public: 10 | LargeMemory() = default; 11 | 12 | ~LargeMemory() { 13 | std::vector vec; 14 | for (auto [p, _] : allocated_) { 15 | vec.push_back(p); 16 | } 17 | 18 | for (auto p : vec) Deallocate(p); 19 | } 20 | 21 | void* Allocate(int64 size) { 22 | HANDLE hMapFile = ::CreateFileMapping( 23 | INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE | SEC_COMMIT, size >> 32, 24 | size % (1LL << 32), nullptr); 25 | assert(GetLastError() == 0); 26 | 27 | void* ptr = ::MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, 0); 28 | assert(GetLastError() == 0); 29 | allocated_.insert({ptr, hMapFile}); 30 | 31 | return ptr; 32 | } 33 | 34 | void Deallocate(void* ptr) { 35 | auto where = allocated_.find(ptr); 36 | if (where == allocated_.end()) return; 37 | 38 | ::UnmapViewOfFile(ptr); 39 | ::CloseHandle(where->second); 40 | allocated_.erase(where); 41 | } 42 | 43 | private: 44 | std::map allocated_; 45 | }; 46 | 47 | SL LargeMemory& LmAllocator() { 48 | static LargeMemory __lm; 49 | return __lm; 50 | } 51 | 52 | struct LmAllocator { 53 | static void* Allocate(int64 size) { return LmAllocator().Allocate(size); } 54 | 55 | static void Deallocate(void* ptr) { LmAllocator().Deallocate(ptr); } 56 | }; 57 | 58 | SL void* LmAllocate(int64 size) { return LmAllocator::Allocate(size); } 59 | 60 | SL void LmDeallocate(void* ptr) { LmAllocator::Deallocate(ptr); } 61 | #endif 62 | 63 | SL void* StdAllocate(int64 size) { return new char[size]; } 64 | 65 | SL void StdDeallocate(void* ptr) { delete[] reinterpret_cast(ptr); } 66 | 67 | struct StdAllocator { 68 | static void* Allocate(int64 size) { return new char[size]; } 69 | static void Deallocate(void* ptr) { delete[] reinterpret_cast(ptr); } 70 | }; 71 | } // namespace pe 72 | #endif 73 | -------------------------------------------------------------------------------- /pe_parallel: -------------------------------------------------------------------------------- 1 | #ifndef PE_PARALLEL_ 2 | #define PE_PARALLEL_ 3 | 4 | #include "pe_base" 5 | #include "pe_mod" 6 | #include "pe_time" 7 | #include "pe_persistance" 8 | 9 | #if OS_TYPE_WIN 10 | 11 | namespace pe { 12 | 13 | enum { 14 | PRIORITY_REALTIME = REALTIME_PRIORITY_CLASS, 15 | PRIORITY_HIGH = HIGH_PRIORITY_CLASS, 16 | PRIORITY_ABOVE_NORMAL = ABOVE_NORMAL_PRIORITY_CLASS, 17 | PRIORITY_NORMAL = NORMAL_PRIORITY_CLASS, 18 | PRIORITY_BELOW_NORMAL = BELOW_NORMAL_PRIORITY_CLASS, 19 | PRIORITY_BACKGROUND = 0x00100000, // PROCESS_MODE_BACKGROUND_BEGIN, 20 | PRIORITY_IDLE = IDLE_PRIORITY_CLASS, 21 | }; 22 | 23 | static inline void SetProcessPriority(int priority) { 24 | ::SetPriorityClass(::GetCurrentProcess(), priority); 25 | } 26 | 27 | static inline void MakeSureProcessSingleton(const char* id) { 28 | std::string mutex_name = "pe_mutex_prefix_"; 29 | mutex_name += id; 30 | HANDLE hMutex = ::OpenMutex(MUTEX_ALL_ACCESS, FALSE, mutex_name.c_str()); 31 | if (hMutex) { 32 | fprintf(stderr, "another process is running\n"); 33 | ::CloseHandle(hMutex); 34 | exit(-1); 35 | return; 36 | } 37 | hMutex = ::CreateMutex(nullptr, TRUE, mutex_name.c_str()); 38 | if (::GetLastError() == ERROR_ALREADY_EXISTS) { 39 | fprintf(stderr, "another process is running\n"); 40 | ::CloseHandle(hMutex); 41 | exit(-1); 42 | return; 43 | } 44 | } 45 | 46 | } // namespace pe 47 | #endif // end OS_TYPE_WIN 48 | 49 | #if ENABLE_OPENMP 50 | namespace pe { 51 | class OmpLock { 52 | public: 53 | OmpLock() { omp_init_lock(&locker_); } 54 | ~OmpLock() { omp_destroy_lock(&locker_); } 55 | OmpLock(const OmpLock&) = delete; 56 | OmpLock& operator=(const OmpLock&) = delete; 57 | void lock() { omp_set_lock(&locker_); } 58 | void unlock() { omp_unset_lock(&locker_); } 59 | 60 | private: 61 | omp_lock_t locker_; 62 | }; 63 | using OmpGuard = std::lock_guard; 64 | } // namespace pe 65 | #endif 66 | #endif 67 | -------------------------------------------------------------------------------- /pe_persistance: -------------------------------------------------------------------------------- 1 | #ifndef PE_PERSISTANCE_ 2 | #define PE_PERSISTANCE_ 3 | 4 | #endif 5 | -------------------------------------------------------------------------------- /pe_poly_base_gmp: -------------------------------------------------------------------------------- 1 | #ifndef PE_POLY_BASE_GMP_ 2 | #define PE_POLY_BASE_GMP_ 3 | 4 | #include "pe_base" 5 | #include "pe_type_traits" 6 | #include "pe_poly_base_common" 7 | 8 | #if ENABLE_GMP 9 | 10 | #define HAS_POLY_MUL_GMP 1 11 | 12 | namespace pe { 13 | namespace gmp { 14 | 15 | namespace bn_poly_mul { 16 | namespace internal { 17 | template 18 | SL REQUIRES((is_builtin_integer_v)) RETURN(void) 19 | InitAsMpz(mpz_t a, const T* X, const int64 n, const int64 need_limb, 20 | int64 mod) { 21 | mpz_setbit(a, 22 | static_cast(need_limb * n * sizeof(mp_limb_t) * 8)); 23 | mpz_ptr ptr = a; 24 | if (mod > 0) { 25 | for (int i = 0; i < n; ++i) { 26 | ptr->_mp_d[i * need_limb] = Mod(X[i], mod); 27 | } 28 | } else { 29 | if (sizeof(T) <= sizeof(mp_limb_t)) { 30 | for (int i = 0; i < n; ++i) { 31 | ptr->_mp_d[i * need_limb] = X[i]; 32 | } 33 | } else { 34 | for (int i = 0; i < n; ++i) { 35 | *reinterpret_cast(ptr->_mp_d + i * need_limb) = X[i]; 36 | } 37 | } 38 | } 39 | mpz_clrbit(a, 40 | static_cast(need_limb * n * sizeof(mp_limb_t) * 8)); 41 | } 42 | 43 | template 44 | SL REQUIRES((is_extended_integer_v)) RETURN(void) 45 | InitAsMpz(mpz_t a, const T* X, const int64 n, const int64 need_limb, 46 | int64 mod) { 47 | mpz_setbit(a, need_limb * n * sizeof(mp_limb_t) * 8); 48 | mpz_ptr ptr = a; 49 | if (mod > 0) { 50 | for (int i = 0; i < n; ++i) { 51 | ptr->_mp_d[i * need_limb] = ToInt(Mod(X[i], mod)); 52 | } 53 | } else { 54 | if (sizeof(T) <= sizeof(mp_limb_t)) { 55 | for (int i = 0; i < n; ++i) { 56 | ptr->_mp_d[i * need_limb] = ToInt(X[i]); 57 | } 58 | } else { 59 | for (int i = 0; i < n; ++i) { 60 | *reinterpret_cast(ptr->_mp_d + i * need_limb) = X[i]; 61 | } 62 | } 63 | } 64 | mpz_clrbit(a, need_limb * n * sizeof(mp_limb_t) * 8); 65 | } 66 | 67 | template 68 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 69 | PolyMulImpl(const T* X, const int64 n, const T* Y, const int64 m, T* result, 70 | int64 mod) { 71 | const int64 mod_bit = mod == 0 ? sizeof(T) * 8 : HighestBitIndex(mod) + 1; 72 | const int64 need_bit = mod_bit * 2 + HighestBitIndex(std::min(n, m)) + 1; 73 | const int64 need_limb = 74 | (need_bit + sizeof(mp_limb_t) * 8 - 1) / (sizeof(mp_limb_t) * 8); 75 | 76 | mpz_t a, b; 77 | mpz_init(a); 78 | mpz_init(b); 79 | 80 | InitAsMpz(a, X, n, need_limb, mod); 81 | InitAsMpz(b, Y, m, need_limb, mod); 82 | 83 | mpz_mul(a, a, b); 84 | 85 | mpz_ptr ptr = a; 86 | const int64 has_size = ptr->_mp_size; 87 | const int64 all_size = n + m - 1; 88 | 89 | if (mod > 0) { 90 | if (need_limb == 1) { 91 | for (int64 i = 0; i < all_size; ++i) { 92 | const int64 offset = i * need_limb; 93 | const mp_limb_t v = offset >= has_size ? 0 : ptr->_mp_d[offset]; 94 | result[i] = v % mod; 95 | } 96 | } else if (need_limb == 2) { 97 | const auto magic1 = (1LL << 32) % mod; 98 | const auto magic2 = MulMod(magic1, magic1, mod); 99 | for (int64 i = 0; i < all_size; ++i) { 100 | const int64 offset = i * need_limb; 101 | const mp_limb_t hi = 102 | offset + 1 >= has_size ? 0 : ptr->_mp_d[offset + 1]; 103 | const mp_limb_t low = offset >= has_size ? 0 : ptr->_mp_d[offset]; 104 | result[i] = AddMod(MulMod(hi % mod, magic2, mod), low % mod, mod); 105 | } 106 | } else { 107 | const auto magic1 = (1LL << 32) % mod; 108 | const auto magic2 = MulMod(magic1, magic1, mod); 109 | for (int64 i = 0; i < all_size; ++i) { 110 | const int64 offset = i * need_limb; 111 | uint64 it = 0; 112 | for (int j = static_cast(need_limb - 1); j >= 0; --j) { 113 | const mp_limb_t v = 114 | offset + j >= has_size ? 0 : ptr->_mp_d[offset + j]; 115 | it = AddMod(MulMod(it, magic2, mod), v % mod, mod); 116 | } 117 | result[i] = it; 118 | } 119 | } 120 | } else { 121 | if (need_limb == 1) { 122 | for (int64 i = 0; i < all_size; ++i) { 123 | const int64 offset = i * need_limb; 124 | const mp_limb_t v = offset >= has_size ? 0 : ptr->_mp_d[offset]; 125 | result[i] = v; 126 | } 127 | } else { 128 | for (int64 i = 0; i < all_size; ++i) { 129 | const int64 offset = i * need_limb; 130 | T it = 0; 131 | for (int j = static_cast(need_limb - 1); j >= 0; --j) { 132 | const mp_limb_t v = 133 | offset + j >= has_size ? 0 : ptr->_mp_d[offset + j]; 134 | it <<= 32; 135 | it <<= 32; 136 | it |= v; 137 | } 138 | result[i] = it; 139 | } 140 | } 141 | } 142 | 143 | mpz_clear(a); 144 | mpz_clear(b); 145 | } 146 | } // namespace internal 147 | 148 | static constexpr PolyMulCoeType kPolyMulMod = 0; 149 | 150 | POLY_MUL_IMPL(PolyMul, internal::PolyMulImpl) 151 | } // namespace bn_poly_mul 152 | 153 | } // namespace gmp 154 | } // namespace pe 155 | #else 156 | #define HAS_POLY_MUL_GMP 0 157 | #endif 158 | 159 | #endif 160 | -------------------------------------------------------------------------------- /pe_poly_base_libbf: -------------------------------------------------------------------------------- 1 | #ifndef PE_POLY_BASE_LIBBF_ 2 | #define PE_POLY_BASE_LIBBF_ 3 | 4 | #include "pe_base" 5 | #include "pe_poly_base_common" 6 | 7 | #if ENABLE_LIBBF && LIMB_BITS == 64 8 | 9 | #define HAS_POLY_LIBBF 1 10 | #define HAS_POLY_MUL_LIBBF 1 11 | 12 | namespace pe { 13 | 14 | namespace libbf { 15 | namespace internal { 16 | extern "C" { 17 | void* PeBfRealloc(void*, void* ptr, size_t size) { return realloc(ptr, size); } 18 | } 19 | 20 | template 21 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 22 | BfNttInit(const T* X, int64 n, int64 mod, bf_context_t* context, 23 | bf_t* target) { 24 | bf_init(context, target); 25 | 26 | bf_set_ui(target, 1); 27 | 28 | bf_resize(target, 2 * n); 29 | target->expn = pe_clzll(ToInt(X[n - 1])); 30 | 31 | const int64 len = target->len; 32 | limb_t* data = target->tab; 33 | if (mod == 0) { 34 | for (int64 i = 0; i < n; ++i) { 35 | T t = X[i]; 36 | PE_ASSERT((t >> 64) == 0); 37 | data[i << 1] = ToInt(X[i]); 38 | data[(i << 1) | 1] = 0; 39 | } 40 | } else { 41 | for (int64 i = 0; i < n; ++i) { 42 | T t = X[i]; 43 | PE_ASSERT((t >> 64) == 0); 44 | data[i << 1] = ToInt(Mod(X[i], mod)); 45 | data[(i << 1) | 1] = 0; 46 | } 47 | } 48 | } 49 | 50 | SL void BfNttDeinit(bf_t* target) { bf_delete(target); } 51 | 52 | static constexpr PolyMulCoeType kPolyMulLargeMod = 0; 53 | 54 | template 55 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 56 | PolyMulImpl(const T* X, int64 n, const T* Y, int64 m, T* result, 57 | int64 mod) { 58 | bf_t x, y; 59 | bf_context_t bf_context; 60 | bf_context_init(&bf_context, PeBfRealloc, nullptr); 61 | 62 | BfNttInit(X, n, mod, &bf_context, &x); 63 | BfNttInit(Y, m, mod, &bf_context, &y); 64 | 65 | bf_t z0, z1, toadd; 66 | bf_init(&bf_context, &z0); 67 | bf_init(&bf_context, &z1); 68 | bf_init(&bf_context, &toadd); 69 | 70 | int offset = x.expn + y.expn; 71 | bf_set_ui(&toadd, 1); 72 | 73 | toadd.expn = 128 + offset; 74 | 75 | bf_mul(&z0, &x, &y, BF_PREC_MAX, 0); 76 | bf_add(&z1, &z0, &toadd, BF_PREC_MAX, 0); 77 | 78 | const int64 size = n + m - 1; 79 | const limb_t* data = z1.tab; 80 | 81 | if (mod > 0) { 82 | const auto magic1 = (1LL << 32) % mod; 83 | const auto magic2 = MulMod(magic1, magic1, mod); 84 | for (int64 i = 0; i < size; ++i) { 85 | const uint64 low = data[i << 1]; 86 | const uint64 hi = data[(i << 1) | 1]; 87 | if (hi == 0) { 88 | result[i] = low % mod; 89 | } else { 90 | result[i] = AddMod(MulMod(hi % mod, magic2, mod), low % mod, mod); 91 | } 92 | } 93 | } else { 94 | for (int64 i = 0; i < size; ++i) { 95 | const uint64 low = data[i << 1]; 96 | const uint64 hi = data[(i << 1) | 1]; 97 | if (hi == 0) { 98 | result[i] = low; 99 | } else { 100 | T it = hi; 101 | it <<= 32; 102 | it <<= 32; 103 | it |= low; 104 | result[i] = it; 105 | } 106 | } 107 | } 108 | 109 | bf_delete(&toadd); 110 | bf_delete(&z1); 111 | bf_delete(&z0); 112 | BfNttDeinit(&y); 113 | BfNttDeinit(&x); 114 | 115 | bf_context_end(&bf_context); 116 | } 117 | } // namespace internal 118 | 119 | static constexpr PolyMulCoeType kPolyMulMod = 120 | Prod(1LL << 63, 1LL << 63) * 2 + 1; 121 | 122 | POLY_MUL_IMPL(PolyMul, internal::PolyMulImpl) 123 | 124 | } // namespace libbf 125 | } // namespace pe 126 | #else 127 | #define HAS_POLY_LIBBF 0 128 | #define HAS_POLY_MUL_LIBBF 0 129 | #endif 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /pe_poly_base_ntl: -------------------------------------------------------------------------------- 1 | #ifndef PE_POLY_BASE_NTL_ 2 | #define PE_POLY_BASE_NTL_ 3 | 4 | #include "pe_base" 5 | #include "pe_type_traits" 6 | #include "pe_poly_base_common" 7 | 8 | #if ENABLE_NTL 9 | 10 | #define HAS_POLY_NTL 1 11 | #define HAS_POLY_MUL_NTL 1 12 | 13 | namespace pe { 14 | namespace ntl { 15 | namespace internal { 16 | template 17 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 18 | InitPoly(NTL::ZZ_pX& p, const T* x, int64 n, int64 mod) { 19 | NTL::ZZ tmp(0); 20 | p.SetLength(n); 21 | if (mod >> 32) { 22 | for (int64 i = 0; i < n; ++i) { 23 | ZZFromBytes(tmp, reinterpret_cast(&x[i]), 24 | sizeof(T)); 25 | p[i] = to_ZZ_p(tmp); 26 | } 27 | } else { 28 | for (int64 i = 0; i < n; ++i) { 29 | p[i] = ToInt(Mod(x[i], mod)); 30 | } 31 | } 32 | } 33 | 34 | template 35 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 36 | CopyPoly(NTL::ZZ_pX& p, T* x, int64 n, int64 mod) { 37 | const int64 d = deg(p); 38 | const int64 m = std::min(d, n - 1); 39 | if (mod >> 32) { 40 | for (int64 i = 0; i <= m; ++i) { 41 | T a; 42 | BytesFromZZ(reinterpret_cast(&a), 43 | reinterpret_cast(p[i]), sizeof(T)); 44 | x[i] = a; 45 | } 46 | } else { 47 | for (int64 i = 0; i <= m; ++i) { 48 | x[i] = static_cast(to_int(reinterpret_cast(p[i]))); 49 | } 50 | } 51 | for (int64 i = m + 1; i < n; ++i) { 52 | x[i] = 0; 53 | } 54 | } 55 | 56 | template 57 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 58 | InitPoly(NTL::zz_pX& p, const T* x, int64 n, long mod) { 59 | p.SetLength(n); 60 | for (int64 i = 0; i < n; ++i) { 61 | p[i]._zz_p__rep = ToInt(Mod(x[i], mod)); 62 | } 63 | } 64 | 65 | template 66 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 67 | CopyPoly(NTL::zz_pX& p, T* x, int64 n, long /*mod*/) { 68 | const int64 d = deg(p); 69 | const int64 m = std::min(d, n - 1); 70 | for (int64 i = 0; i <= m; ++i) { 71 | x[i] = p[i]._zz_p__rep; 72 | } 73 | for (int64 i = m + 1; i < n; ++i) { 74 | x[i] = 0; 75 | } 76 | } 77 | 78 | template 79 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 80 | PolyMulSmallImpl(const T* X, int64 n, const T* Y, int64 m, T* result, 81 | int64 mod) { 82 | PE_ASSERT(mod > 0); 83 | 84 | NTL::zz_p::init(static_cast(mod)); 85 | 86 | NTL::zz_pX x, y, z; 87 | InitPoly(x, X, n, static_cast(mod)); 88 | InitPoly(y, Y, m, static_cast(mod)); 89 | 90 | NTL::mul(z, x, y); 91 | 92 | CopyPoly(z, result, n + m - 1, static_cast(mod)); 93 | } 94 | 95 | template 96 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 97 | PolyMulLargeImpl(const T* X, int64 n, const T* Y, int64 m, T* result, 98 | int64 mod) { 99 | PE_ASSERT(mod > 0); 100 | 101 | NTL::ZZ tmp(0); 102 | ZZFromBytes(tmp, reinterpret_cast(&mod), sizeof(int64)); 103 | NTL::ZZ_p::init(tmp); 104 | 105 | NTL::ZZ_pX x, y, z; 106 | InitPoly(x, X, n, mod); 107 | InitPoly(y, Y, m, mod); 108 | 109 | NTL::mul(z, x, y); 110 | 111 | CopyPoly(z, result, n + m - 1, mod); 112 | } 113 | 114 | template 115 | SL REQUIRES((is_builtin_integer_v || is_extended_integer_v)) RETURN(void) 116 | PolyMulImpl(const T* X, int64 n, const T* Y, int64 m, T* result, 117 | int64 mod) { 118 | if (PolyMulAcceptLengthAndMod(NTL_SP_BOUND, n, m, mod)) { 119 | PolyMulSmallImpl(X, n, Y, m, result, mod); 120 | } 121 | PolyMulLargeImpl(X, n, Y, m, result, mod); 122 | } 123 | } // namespace internal 124 | 125 | static constexpr PolyMulCoeType kPolyMulSmallMod = NTL_SP_BOUND; 126 | static constexpr PolyMulCoeType kPolyMulLargeMod = 0; 127 | 128 | POLY_MUL_IMPL(PolyMulSmall, internal::PolyMulSmallImpl) 129 | POLY_MUL_IMPL(PolyMulLarge, internal::PolyMulLargeImpl) 130 | POLY_MUL_IMPL(PolyMul, internal::PolyMulImpl) 131 | 132 | namespace internal { 133 | template 134 | SL REQUIRES((is_builtin_integer_v)) RETURN(void) 135 | PolyDivAndModSmallModImpl(const T* X, int64 n, const T* Y, int64 m, T* q, 136 | T* r, int64 mod) { 137 | if (m > n) { 138 | if (r != nullptr) { 139 | std::copy(X, X + n, r); 140 | std::fill(r + n, r + m, 0); 141 | } 142 | if (q != nullptr) { 143 | q[0] = 0; 144 | } 145 | return; 146 | } 147 | 148 | NTL::zz_p::init(static_cast(mod)); 149 | 150 | NTL::zz_pX x, y; 151 | InitPoly(x, X, n, mod); 152 | InitPoly(y, Y, m, mod); 153 | 154 | if (q != nullptr && r != nullptr) { 155 | NTL::zz_pX u, v; 156 | 157 | NTL::DivRem(u, v, x, y); 158 | 159 | CopyPoly(u, q, n - m + 1, mod); 160 | CopyPoly(v, r, m, mod); 161 | } else if (q != nullptr) { 162 | NTL::zz_pX u; 163 | 164 | NTL::div(u, x, y); 165 | 166 | CopyPoly(u, q, n - m + 1, mod); 167 | } else if (r != nullptr) { 168 | NTL::zz_pX v; 169 | 170 | NTL::rem(v, x, y); 171 | CopyPoly(v, r, m, mod); 172 | } 173 | } 174 | 175 | template 176 | SL REQUIRES((is_builtin_integer_v)) RETURN(void) 177 | PolyDivAndModLargeModImpl(const T* X, int64 n, const T* Y, int64 m, T* q, 178 | T* r, int64 mod) { 179 | if (m > n) { 180 | if (r != nullptr) { 181 | std::copy(X, X + n, r); 182 | std::fill(r + n, r + m, 0); 183 | } 184 | if (q != nullptr) { 185 | q[0] = 0; 186 | } 187 | return; 188 | } 189 | 190 | NTL::ZZ tmp(0); 191 | ZZFromBytes(tmp, reinterpret_cast(&mod), sizeof(T)); 192 | NTL::ZZ_p::init(tmp); 193 | 194 | NTL::ZZ_pX x, y; 195 | InitPoly(x, X, n, mod); 196 | InitPoly(y, Y, m, mod); 197 | 198 | if (q != nullptr && r != nullptr) { 199 | NTL::ZZ_pX u, v; 200 | 201 | NTL::DivRem(u, v, x, y); 202 | 203 | CopyPoly(u, q, n - m + 1, mod); 204 | CopyPoly(v, r, m, mod); 205 | } else if (q != nullptr) { 206 | NTL::ZZ_pX u; 207 | 208 | NTL::div(u, x, y); 209 | 210 | CopyPoly(u, q, n - m + 1, mod); 211 | } else if (r != nullptr) { 212 | NTL::ZZ_pX v; 213 | 214 | NTL::rem(v, x, y); 215 | CopyPoly(v, r, m, mod); 216 | } 217 | } 218 | 219 | template 220 | SL REQUIRES((is_builtin_integer_v)) RETURN(void) 221 | PolyDivAndModImpl(const T* X, int64 n, const T* Y, int64 m, T* q, T* r, 222 | int64 mod) { 223 | if (mod < NTL_SP_BOUND) { 224 | PolyDivAndModSmallModImpl(X, n, Y, m, q, r, mod); 225 | } else { 226 | PolyDivAndModLargeModImpl(X, n, Y, m, q, r, mod); 227 | } 228 | } 229 | } // namespace internal 230 | 231 | POLY_DIV_AND_MOD_IMPL(PolyDivAndModSmallMod, 232 | internal::PolyDivAndModSmallModImpl) 233 | POLY_DIV_IMPL(PolyDivSmallMod, ntl::PolyDivAndModSmallMod) 234 | POLY_MOD_IMPL(PolyModSmallMod, ntl::PolyDivAndModSmallMod) 235 | 236 | POLY_DIV_AND_MOD_IMPL(PolyDivAndModLargeMod, 237 | internal::PolyDivAndModLargeModImpl) 238 | POLY_DIV_IMPL(PolyDivLargeMod, ntl::PolyDivAndModLargeMod) 239 | POLY_MOD_IMPL(PolyModLargeMod, ntl::PolyDivAndModLargeMod) 240 | 241 | POLY_DIV_AND_MOD_IMPL(PolyDivAndMod, internal::PolyDivAndModImpl) 242 | POLY_DIV_IMPL(PolyDiv, ntl::PolyDivAndMod) 243 | POLY_MOD_IMPL(PolyMod, ntl::PolyDivAndMod) 244 | } // namespace ntl 245 | } // namespace pe 246 | #else 247 | #define HAS_POLY_NTL 0 248 | #define HAS_POLY_MUL_NTL 0 249 | #endif 250 | 251 | #endif 252 | -------------------------------------------------------------------------------- /pe_rand: -------------------------------------------------------------------------------- 1 | #ifndef PE_RAND_ 2 | #define PE_RAND_ 3 | 4 | #include "pe_base" 5 | #include "pe_nt" 6 | 7 | namespace pe { 8 | // C-style random number generator 9 | // ANSI_ISO_9899-1999: The value of the RAND_MAX macro shall be at least 32767. 10 | // We assume RAND_MAX is either 32767 or 2147483647 11 | #if RAND_MAX == 32767 12 | 13 | SL int CRandBit() { return rand() & 1; } 14 | SL int CRand15() { return rand(); } 15 | SL int CRand31() { return CRand15() << 16 | CRand15() << 1 | CRandBit(); } 16 | SL int64 CRand63() { 17 | return static_cast(CRand31()) << 32 | 18 | static_cast(CRand31()) << 1 | CRandBit(); 19 | } 20 | 21 | SL int CRandI() { return CRand31(); } 22 | SL double CRandD() { 23 | static constexpr double coe = 1. / (RAND_MAX + 1); 24 | return coe * rand(); 25 | } 26 | 27 | #elif RAND_MAX == 2147483647 28 | 29 | SL int CRandBit() { return rand() & 1; } 30 | SL int CRand15() { return rand() & 32767; } 31 | SL int CRand31() { return rand(); } 32 | SL int64 CRand63() { 33 | return static_cast(CRand31()) << 32 | 34 | static_cast(CRand31()) << 1 | CRandBit(); 35 | } 36 | 37 | SL int CRandI() { return CRand31(); } 38 | SL double CRandD() { 39 | static constexpr double coe = 1. / (static_cast(RAND_MAX) + 1LL); 40 | return coe * rand(); 41 | } 42 | 43 | #else 44 | 45 | #error "RAND_MAX should be either 32767 or 2147483647" 46 | 47 | #endif 48 | 49 | // C++ style random number generator 50 | template 51 | class RandomGenerator { 52 | public: 53 | RandomGenerator(const RE& engine, const DS& distribution) 54 | : random_engine_(engine), distribution_(distribution) {} 55 | int operator()() { return distribution_(random_engine_); } 56 | 57 | private: 58 | RE random_engine_; 59 | DS distribution_; 60 | }; 61 | 62 | RandomGenerator> SL 63 | MakeUniformGenerator(int seed, int min, int max) { 64 | PE_ASSERT(min <= max); 65 | return RandomGenerator>( 66 | std::mt19937(seed), std::uniform_int_distribution(min, max)); 67 | } 68 | 69 | RandomGenerator> SL 70 | MakeUniformGenerator(int min, int max) { 71 | std::random_device rd; 72 | PE_ASSERT(min <= max); 73 | return RandomGenerator>( 74 | std::mt19937(rd()), std::uniform_int_distribution(min, max)); 75 | } 76 | 77 | template 78 | SL void Halton(ET idx, const int n, RT* result) { 79 | std::fill(result, result + n, 0); 80 | std::vector prime_inv(n, 0); 81 | std::vector t(n, idx); 82 | for (int i = 0; i < n; ++i) prime_inv[i] = static_cast(1. / plist[i]); 83 | for (ET sum = n * idx; sum > 0;) { 84 | for (int i = 0; i < n; ++i) { 85 | const auto d = t[i] % plist[i]; 86 | result[i] += d * prime_inv[i]; 87 | prime_inv[i] /= plist[i]; 88 | sum -= t[i]; 89 | t[i] /= plist[i]; 90 | sum += t[i]; 91 | } 92 | } 93 | } 94 | 95 | template 96 | SL std::vector Halton(ET idx, const int n) { 97 | return Halton(idx, n); 98 | } 99 | 100 | // The idx_th n-dimension vector. 101 | SL std::vector Halton(int64 idx, const int n) { 102 | std::vector r(n, 0); 103 | Halton(idx, n, std::data(r)); 104 | return r; 105 | } 106 | } // namespace pe 107 | #endif 108 | -------------------------------------------------------------------------------- /pe_time: -------------------------------------------------------------------------------- 1 | #ifndef PE_TIME_ 2 | #define PE_TIME_ 3 | 4 | #include "pe_base" 5 | 6 | namespace pe { 7 | using pe_clock_t = std::chrono::high_resolution_clock; 8 | using time_point_t = pe_clock_t::time_point; 9 | using duration_t = pe_clock_t::duration; 10 | using period_t = pe_clock_t::period; 11 | 12 | constexpr int64 MILLI_SEC_CLOCKS = period_t::den / std::milli::den; 13 | constexpr int64 SEC_CLOCKS = period_t::den; 14 | constexpr int64 MIN_CLOCKS = SEC_CLOCKS * 60; 15 | constexpr int64 HOUR_CLOCKS = MIN_CLOCKS * 60; 16 | constexpr int64 DAY_CLOCKS = HOUR_CLOCKS * 24; 17 | 18 | class TimeDelta { 19 | public: 20 | TimeDelta(duration_t duration = duration_t::zero()) : duration_(duration) {} 21 | 22 | int64 NativeTime() const { return duration_.count(); } 23 | double ToMilliSeconds() const { 24 | return 1. * duration_.count() / MILLI_SEC_CLOCKS; 25 | } 26 | double ToSeconds() const { return 1. * duration_.count() / SEC_CLOCKS; } 27 | double ToMinutes() const { return 1. * duration_.count() / MIN_CLOCKS; } 28 | double ToHours() const { return 1. * duration_.count() / HOUR_CLOCKS; } 29 | double ToDays() const { return 1. * duration_.count() / DAY_CLOCKS; } 30 | std::string Format() const { 31 | char temp[128]; 32 | const int64 nano_seconds = duration_.count(); 33 | const int64 day = nano_seconds / DAY_CLOCKS; 34 | const int hour = static_cast(nano_seconds % DAY_CLOCKS / HOUR_CLOCKS); 35 | const int min = static_cast(nano_seconds % HOUR_CLOCKS / MIN_CLOCKS); 36 | const int sec = static_cast(nano_seconds % MIN_CLOCKS / SEC_CLOCKS); 37 | const int msec = 38 | static_cast(nano_seconds % SEC_CLOCKS / MILLI_SEC_CLOCKS); 39 | sprintf(temp, "%" PRId64 ":%02d:%02d:%02d.%03d", day, hour, min, sec, msec); 40 | return temp; 41 | } 42 | 43 | static TimeDelta FromMilliSeconds(int64 t) { 44 | return duration_t(t * MILLI_SEC_CLOCKS); 45 | } 46 | static TimeDelta FromSeconds(int64 t) { return duration_t(t * SEC_CLOCKS); } 47 | static TimeDelta FromMinutes(int64 t) { return duration_t(t * MIN_CLOCKS); } 48 | static TimeDelta FromHours(int64 t) { return duration_t(t * HOUR_CLOCKS); } 49 | static TimeDelta FromDays(int64 t) { return duration_t(t * DAY_CLOCKS); } 50 | 51 | bool operator>(const TimeDelta& o) const { return duration_ > o.duration_; } 52 | bool operator>=(const TimeDelta& o) const { return duration_ >= o.duration_; } 53 | bool operator==(const TimeDelta& o) const { return duration_ == o.duration_; } 54 | bool operator<(const TimeDelta& o) const { return duration_ < o.duration_; } 55 | bool operator<=(const TimeDelta& o) const { return duration_ <= o.duration_; } 56 | 57 | TimeDelta& Add(TimeDelta t) { 58 | duration_ += t.duration_; 59 | return *this; 60 | } 61 | 62 | TimeDelta& Sub(TimeDelta t) { 63 | duration_ -= t.duration_; 64 | return *this; 65 | } 66 | 67 | private: 68 | duration_t duration_; 69 | }; 70 | 71 | class TimeRecorder { 72 | public: 73 | TimeRecorder() : last_time_(pe_clock_t::now()) {} 74 | 75 | int64 Record() { 76 | last_time_ = pe_clock_t::now(); 77 | return last_time_.time_since_epoch().count(); 78 | } 79 | 80 | TimeDelta Elapsed() const { return pe_clock_t::now() - last_time_; } 81 | 82 | std::string usage() const { return Elapsed().Format(); } 83 | 84 | private: 85 | time_point_t last_time_; 86 | }; 87 | 88 | class TimeUsage { 89 | public: 90 | ~TimeUsage() { 91 | fprintf(stderr, "time usage: %s\n", tr_.Elapsed().Format().c_str()); 92 | } 93 | 94 | private: 95 | TimeRecorder tr_; 96 | }; 97 | 98 | #ifndef PE_TEST_MODE 99 | static TimeUsage __time_usage; 100 | #endif 101 | } // namespace pe 102 | #endif 103 | -------------------------------------------------------------------------------- /pe_tree: -------------------------------------------------------------------------------- 1 | #ifndef PE_TREE_ 2 | #define PE_TREE_ 3 | 4 | #include "pe_base" 5 | 6 | namespace pe { 7 | // Maps distinct elements to 1..n 8 | class IndexHelper { 9 | public: 10 | IndexHelper() = default; 11 | 12 | IndexHelper(const std::vector& elements) { Reset(elements); } 13 | 14 | IndexHelper(std::vector&& elements) { Reset(std::move(elements)); } 15 | 16 | template 17 | IndexHelper(IT s, IT e) { 18 | Reset(std::vector(s, e)); 19 | } 20 | 21 | IndexHelper& Reset(const std::vector& elements) { 22 | elements_ = elements; 23 | ResetInternal(); 24 | return *this; 25 | } 26 | 27 | IndexHelper& Reset(std::vector&& elements) { 28 | elements_ = std::move(elements); 29 | ResetInternal(); 30 | return *this; 31 | } 32 | 33 | void ResetInternal() { 34 | std::sort(elements_.begin(), elements_.end()); 35 | size_ = std::unique(elements_.begin(), elements_.end()) - elements_.begin(); 36 | elements_.resize(size_); 37 | } 38 | 39 | int64 size() const { return size_; } 40 | 41 | int64 Index(int64 v) const { return this->operator[](v); } 42 | 43 | int64 operator[](int64 v) const { 44 | const int64 idx = std::lower_bound(elements_.begin(), elements_.end(), v) - 45 | elements_.begin(); 46 | return idx + 1; 47 | } 48 | 49 | private: 50 | std::vector elements_; 51 | int64 size_ = 0; 52 | }; 53 | 54 | template 55 | class BitBase { 56 | public: 57 | BitBase(int size = 0) { Reset(size); } 58 | BitBase(const IndexHelper& ih) { Reset(static_cast(std::size(ih))); } 59 | 60 | Derived& Reset(int new_size) { 61 | size_ = new_size; 62 | data_.resize(size_ + 1); 63 | Clear(); 64 | return static_cast(*this); 65 | } 66 | 67 | Derived& Clear() { 68 | std::fill(data_.begin(), data_.end(), 0); 69 | return static_cast(*this); 70 | } 71 | 72 | public: 73 | std::vector data_; 74 | int size_; 75 | }; 76 | 77 | // Range update bit indexed tree. 78 | // Support range based update and query the value of a specified index. 79 | template 80 | class RUBit : public BitBase> { 81 | public: 82 | using BitBase>::BitBase; 83 | using BitBase>::data_; 84 | using BitBase>::size_; 85 | 86 | RUBit& Update(int x, T delta) { 87 | for (; x <= size_; x += x & -x) { 88 | data_[x] += delta; 89 | } 90 | return *this; 91 | } 92 | 93 | RUBit& Update(int x, int y, T delta) { 94 | return Update(x, delta).Update(y + 1, -delta); 95 | } 96 | 97 | T Query(int x) const { 98 | T ret(0); 99 | for (; x > 0; x -= x & -x) { 100 | ret += data_[x]; 101 | } 102 | return ret; 103 | } 104 | }; 105 | 106 | // Range sum query bit indexed tree. 107 | // Support index based update and value of a range. 108 | template 109 | class RSQBit : public BitBase> { 110 | public: 111 | using BitBase>::BitBase; 112 | using BitBase>::data_; 113 | using BitBase>::size_; 114 | 115 | RSQBit& Update(int x, T delta) { 116 | for (; x <= size_; x += x & -x) { 117 | data_[x] += delta; 118 | } 119 | return *this; 120 | } 121 | 122 | T Query(int x) const { 123 | T ret = 0; 124 | for (; x > 0; x -= x & -x) { 125 | ret += data_[x]; 126 | } 127 | return ret; 128 | } 129 | 130 | T Query(int x, int y) const { return Query(y) - Query(x - 1); } 131 | }; 132 | } // namespace pe 133 | #endif 134 | -------------------------------------------------------------------------------- /pe_vector: -------------------------------------------------------------------------------- 1 | #ifndef PE_VECTOR_ 2 | #define PE_VECTOR_ 3 | 4 | #include "pe_base" 5 | 6 | namespace pe { 7 | 8 | template 9 | SL std::vector VectorAdd(const std::vector& a, const std::vector& b) { 10 | std::vector c(a.size()); 11 | for (int i = 0; i < a.size(); ++i) c[i] = a[i] + b[i]; 12 | return c; 13 | } 14 | 15 | template 16 | SL std::vector VectorSub(const std::vector& a, const std::vector& b) { 17 | std::vector c(a.size()); 18 | for (int i = 0; i < a.size(); ++i) c[i] = a[i] - b[i]; 19 | return c; 20 | } 21 | 22 | template 23 | SL std::vector VectorScale(T t, const std::vector& b) { 24 | std::vector c(b.size()); 25 | for (int i = 0; i < b.size(); ++i) c[i] = t * b[i]; 26 | return c; 27 | } 28 | 29 | template 30 | SL std::vector VectorScale(const std::vector& b, T t) { 31 | std::vector c(b.size()); 32 | for (int i = 0; i < b.size(); ++i) c[i] = t * b[i]; 33 | return c; 34 | } 35 | 36 | template 37 | SL T VectorDotProduct(const std::vector& a, const std::vector& b) { 38 | T ret = 0; 39 | for (int i = 0; i < a.size(); ++i) ret += a[i] * b[i]; 40 | return ret; 41 | } 42 | } // namespace pe 43 | #endif -------------------------------------------------------------------------------- /precompile.bat: -------------------------------------------------------------------------------- 1 | g++ -xc++-header pe.hpp --std=c++20 -fno-diagnostics-color -O2 -march=native -mtune=native -fopenmp -pthread -static 2 | pause -------------------------------------------------------------------------------- /test/BUILD: -------------------------------------------------------------------------------- 1 | load("//toolchain:pe_toolchain.bzl", "pe_binary", "pe_library") 2 | 3 | package( 4 | default_visibility = [ 5 | "//visibility:public", 6 | ], 7 | ) 8 | 9 | pe_binary( 10 | name = "test_perf", 11 | srcs = ["test_perf.c"], 12 | defines = [ 13 | "TEST_ALL", 14 | "ENABLE_ASSERT=0", 15 | "TRY_TO_USE_INT128=1", 16 | "ENABLE_OPENMP=1", 17 | ] 18 | ) 19 | 20 | pe_binary( 21 | name = "test", 22 | srcs = ["pe_test.c"], 23 | defines = [ 24 | "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED", 25 | "ENABLE_ASSERT=0", 26 | "TRY_TO_USE_INT128=1", 27 | "ENABLE_OPENMP=1", 28 | ] 29 | ) 30 | 31 | pe_binary( 32 | name = "test_tcmalloc", 33 | srcs = ["pe_test.c"], 34 | defines = [ 35 | "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED", 36 | "ENABLE_ASSERT=0", 37 | "TRY_TO_USE_INT128=1", 38 | "ENABLE_OPENMP=1", 39 | "ENABLE_TCMALLOC=1", 40 | ], 41 | libs = [ 42 | "tcmalloc_minimal", 43 | "synchronization", 44 | "psapi", 45 | ] 46 | ) 47 | 48 | pe_binary( 49 | name = "test_noint128_noopenmp", 50 | srcs = ["pe_test.c"], 51 | defines = [ 52 | "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED", 53 | "ENABLE_ASSERT=0", 54 | "TRY_TO_USE_INT128=0", 55 | "ENABLE_OPENMP=0", 56 | ] 57 | ) 58 | 59 | pe_binary( 60 | name = "test_int128_noopenmp", 61 | srcs = ["pe_test.c"], 62 | defines = [ 63 | "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED", 64 | "ENABLE_ASSERT=0", 65 | "TRY_TO_USE_INT128=1", 66 | "ENABLE_OPENMP=0", 67 | ] 68 | ) 69 | 70 | pe_binary( 71 | name = "test_int128_openmp", 72 | srcs = ["pe_test.c"], 73 | defines = [ 74 | "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED", 75 | "ENABLE_ASSERT=0", 76 | "TRY_TO_USE_INT128=1", 77 | "ENABLE_OPENMP=1", 78 | ] 79 | ) 80 | 81 | pe_binary( 82 | name = "test_noint128_openmp", 83 | srcs = ["pe_test.c"], 84 | defines = [ 85 | "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED", 86 | "ENABLE_ASSERT=0", 87 | "TRY_TO_USE_INT128=0", 88 | "ENABLE_OPENMP=1", 89 | ] 90 | ) -------------------------------------------------------------------------------- /test/array_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | // struct XYZ { 4 | // XYZ() {dbg("xyz");} 5 | // }; 6 | // struct B { 7 | // B(XYZ& xyz) : xyz(xyz) { 8 | // dbg("B constructed"); 9 | // } 10 | // XYZ& xyz; 11 | // }; 12 | // struct A : public B { 13 | // A() : B(xyz) { 14 | // dbg("A constructed"); 15 | // } 16 | // XYZ xyz; 17 | // }; 18 | 19 | namespace array_test { 20 | SL void ArrayTest() { 21 | DArray vec({5, 6}); 22 | for (int i = 0; i < 5; ++i) { 23 | for (int j = 0; j < 6; ++j) { 24 | vec[i][j] = i * j; 25 | } 26 | } 27 | 28 | vec.Reset({3, 2}); 29 | for (int i = 0; i < 3; ++i) { 30 | for (int j = 0; j < 2; ++j) vec[i][j] = i * j; 31 | } 32 | 33 | Array arr(5); 34 | for (int i = 0; i < 4; ++i) { 35 | for (int j = 0; j < 5; ++j) arr[i][j] = i * j; 36 | } 37 | 38 | AArray arr1(5); 39 | for (int i = 0; i < 4; ++i) { 40 | for (int j = 0; j < 5; ++j) arr1[i][j] = i * j; 41 | } 42 | } 43 | PE_REGISTER_TEST(&ArrayTest, "ArrayTest", SMALL); 44 | } // namespace array_test -------------------------------------------------------------------------------- /test/bi_div_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace bi_test { 4 | template 5 | SL void BiDivTestImpl(int x, int y) { 6 | for (int strategy = 0; strategy < 2; ++strategy) 7 | for (int s1 = -1; s1 <= 1; ++s1) 8 | for (int s2 = -1; s2 <= 1; ++s2) 9 | if (s2 != 0) 10 | for (int id = 0; id < x; ++id) { 11 | std::vector A, B; 12 | if (strategy == 0) { 13 | for (int i = 0; i < y; ++i) { 14 | int t = rand() + 1; 15 | A.push_back(t); 16 | if (i & 1) { 17 | B.push_back(t); 18 | } 19 | } 20 | } else { 21 | for (int i = 0; i < y; ++i) { 22 | A.push_back(rand() + 1); 23 | if (i & 1) { 24 | B.push_back(rand() + 1); 25 | } 26 | } 27 | } 28 | std::string expected_result1; 29 | std::string expected_result2; 30 | { 31 | T a = s1; 32 | T b = s2; 33 | for (auto& iter : A) a *= iter; 34 | for (auto& iter : B) b *= iter; 35 | T c = a / b; 36 | T d = a % b; 37 | expected_result1 = ToString(c); 38 | expected_result2 = ToString(d); 39 | } 40 | std::string actual_result1; 41 | std::string actual_result2; 42 | { 43 | BigInteger a = s1; 44 | BigInteger b = s2; 45 | for (auto& iter : A) a *= iter; 46 | for (auto& iter : B) b *= iter; 47 | auto [c, d] = Div(a, b); 48 | actual_result1 = ToString(c); 49 | actual_result2 = ToString(d); 50 | } 51 | assert(expected_result1 == actual_result1); 52 | assert(expected_result2 == actual_result2); 53 | } 54 | } 55 | 56 | SL void BiDivTestMedium_BigInteger() { BiDivTestImpl(100, 500); } 57 | 58 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 59 | PE_REGISTER_TEST(&BiDivTestMedium_BigInteger, "BiDivTestMedium_BigInteger", 60 | MEDIUM); 61 | #endif 62 | 63 | SL void BiDivTestBig_BigInteger() { BiDivTestImpl(10, 2000); } 64 | 65 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 66 | PE_REGISTER_TEST(&BiDivTestBig_BigInteger, "BiDivTestBig_BigInteger", BIG); 67 | #endif 68 | 69 | #if ENABLE_GMP 70 | SL void BiDivTestMedium_MpInteger() { BiDivTestImpl(100, 500); } 71 | 72 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 73 | PE_REGISTER_TEST(&BiDivTestMedium_MpInteger, "BiDivTestMedium_MpInteger", 74 | MEDIUM); 75 | #endif 76 | 77 | SL void BiDivTestBig_MpInteger() { BiDivTestImpl(10, 2000); } 78 | 79 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 80 | PE_REGISTER_TEST(&BiDivTestBig_MpInteger, "BiDivTestBig_MpInteger", BIG); 81 | #endif 82 | #endif 83 | 84 | } // namespace bi_test 85 | -------------------------------------------------------------------------------- /test/bi_mul_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace bi_test { 4 | template 5 | SL void BiMulTestImpl(int x, int y) { 6 | for (int s1 = -1; s1 <= 1; ++s1) 7 | for (int s2 = -1; s2 <= 1; ++s2) 8 | for (int id = 0; id < x; ++id) { 9 | std::vector A, B; 10 | for (int i = 0; i < y; ++i) { 11 | A.push_back(rand()); 12 | B.push_back(rand()); 13 | } 14 | std::string expected_result; 15 | { 16 | T a = s1; 17 | T b = s2; 18 | for (auto& iter : A) a *= iter; 19 | for (auto& iter : B) b *= iter; 20 | 21 | T c = a * b; 22 | expected_result = ToString(c); 23 | } 24 | std::string actual_result; 25 | { 26 | BigInteger a = s1; 27 | BigInteger b = s2; 28 | for (auto& iter : A) a *= iter; 29 | for (auto& iter : B) b *= iter; 30 | BigInteger c = a * b; 31 | std::stringstream ss; 32 | ss << c; 33 | ss >> actual_result; 34 | } 35 | assert(expected_result == actual_result); 36 | } 37 | } 38 | 39 | SL void BiMulTestMedium_BigInteger() { BiMulTestImpl(1000, 500); } 40 | 41 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 42 | PE_REGISTER_TEST(&BiMulTestMedium_BigInteger, "BiMulTestMedium_BigInteger", 43 | MEDIUM); 44 | #endif 45 | 46 | SL void BiMulTestBig_BigInteger() { BiMulTestImpl(10, 10000); } 47 | 48 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 49 | PE_REGISTER_TEST(&BiMulTestBig_BigInteger, "BiMulTestBig_BigInteger", BIG); 50 | #endif 51 | 52 | #if ENABLE_GMP 53 | SL void BiMulTestMedium_MpInteger() { BiMulTestImpl(1000, 500); } 54 | 55 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 56 | PE_REGISTER_TEST(&BiMulTestMedium_MpInteger, "BiMulTestMedium_MpInteger", 57 | MEDIUM); 58 | #endif 59 | 60 | SL void BiMulTestBig_MpInteger() { BiMulTestImpl(10, 10000); } 61 | 62 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 63 | PE_REGISTER_TEST(&BiMulTestBig_MpInteger, "BiMulTestBig_MpInteger", BIG); 64 | #endif 65 | #endif 66 | } // namespace bi_test 67 | -------------------------------------------------------------------------------- /test/bit_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace bit_test { 4 | #if defined(COMPILER_GNU) 5 | SL void BitTest() { 6 | for (int i = 0; i < 65536; ++i) { 7 | if (i > 0) { 8 | assert(__pe_clz32(i) == __builtin_clz(i)); 9 | assert(__pe_ctz32(i) == __builtin_ctz(i)); 10 | } 11 | assert(__pe_popcount32(i) == __builtin_popcount(i)); 12 | assert(__pe_ffs32(i) == __builtin_ffs(i)); 13 | assert(__pe_parity32(i) == __builtin_parity(i)); 14 | #if defined(STL_GLIBCXX) 15 | if (i > 0) { 16 | assert(__pe_lg32(i) == std::__lg(i)); 17 | } 18 | #endif 19 | } 20 | 21 | for (int i = 0; i < 65536; ++i) { 22 | uint64 target = CRand63(); 23 | if (target > 0) { 24 | assert(__pe_clz64(target) == __builtin_clzll(target)); 25 | assert(__pe_ctz64(target) == __builtin_ctzll(target)); 26 | assert(__pe_popcount64(target) == __builtin_popcountll(target)); 27 | assert(__pe_ffs64(target) == __builtin_ffsll(target)); 28 | assert(__pe_parity64(target) == __builtin_parityll(target)); 29 | #if defined(STL_GLIBCXX) 30 | assert(__pe_lg64(target) == std::__lg(target)); 31 | #endif 32 | } 33 | } 34 | 35 | int x = 0; 36 | SetBit(x, 20); 37 | assert(x == (1 << 20)); 38 | assert(GetBit(x, 20) == 1); 39 | 40 | RevBit(x, 20); 41 | assert(x == 0); 42 | assert(GetBit(x, 20) == 0); 43 | 44 | RevBit(x, 21); 45 | assert(x == (1 << 21)); 46 | assert(GetBit(x, 21) == 1); 47 | 48 | ResetBit(x, 21); 49 | assert(x == 0); 50 | assert(GetBit(x, 21) == 0); 51 | } 52 | 53 | PE_REGISTER_TEST(&BitTest, "BitTest", SMALL); 54 | #endif 55 | } // namespace bit_test 56 | -------------------------------------------------------------------------------- /test/dva_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace dva_test { 4 | SL void TestS0() { 5 | auto orz = PrimePi(10000); 6 | assert(orz[10000] == 1229LL); 7 | 8 | orz = PrimeS0Ex(10000); 9 | assert(orz[10000] == 1229LL); 10 | 11 | orz = PrimePi(100000000); 12 | assert(orz[100000000] == 5761455LL); 13 | 14 | orz = PrimeS0Ex(100000000); 15 | assert(orz[100000000] == 5761455LL); 16 | } 17 | 18 | SL void TestS1() { 19 | int64 s = 0; 20 | for (int i = 2; i <= 10000; ++i) { 21 | if (IsPrime(i)) s += i; 22 | } 23 | assert(s == 5736396LL); 24 | 25 | auto orz = PrimeS1(10000); 26 | assert(orz[10000] == 5736396LL); 27 | 28 | orz = PrimeS1Ex(10000); 29 | assert(orz[10000] == 5736396LL); 30 | 31 | s = 0; 32 | for (int i = 2; i <= 1000000; ++i) { 33 | if (IsPrime(i)) s += i; 34 | } 35 | assert(s == 37550402023LL); 36 | 37 | orz = PrimeS1(1000000); 38 | assert(orz[1000000] == 37550402023LL); 39 | 40 | orz = PrimeS1Ex(1000000); 41 | assert(orz[1000000] == 37550402023LL); 42 | } 43 | 44 | SL void DvaTest() { 45 | TestS0(); 46 | TestS1(); 47 | } 48 | 49 | PE_REGISTER_TEST(&DvaTest, "DvaTest", SMALL); 50 | } // namespace dva_test 51 | -------------------------------------------------------------------------------- /test/fft_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace fft_test { 4 | #if HAS_POLY_MUL_FLINT 5 | SL void RandomTest() { 6 | srand(123456789); 7 | { 8 | // 8e13 9 | // 1e5+19 10 | const int64 mod = 100019; 11 | std::vector x, y; 12 | for (int i = 0; i < 7000; ++i) { 13 | x.push_back((uint64)CRand63() % mod); 14 | y.push_back((uint64)CRand63() % mod); 15 | } 16 | 17 | int t0 = clock(); 18 | std::vector ans0 = flint::PolyMul(x, y, mod); 19 | int t1 = clock(); 20 | std::vector ans1 = fft::PolyMulFft(x, y, mod); 21 | int t2 = clock(); 22 | std::vector ans2 = fft::PolyMulFftSmall(x, y, mod); 23 | int t3 = clock(); 24 | // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << " " << (t3-t1)*1e-3 25 | // << std::endl; 26 | 27 | assert(ans0 == ans1); 28 | assert(ans0 == ans2); 29 | } 30 | { 31 | // 1e15 32 | // 1e9+7 33 | const int64 mod = 1000000007; 34 | std::vector x, y; 35 | for (int i = 0; i < 1020000; ++i) { 36 | x.push_back((uint64)CRand63() % mod); 37 | y.push_back((uint64)CRand63() % mod); 38 | } 39 | 40 | int t0 = clock(); 41 | std::vector ans0 = flint::PolyMul(x, y, mod); 42 | int t1 = clock(); 43 | std::vector ans1 = fft::PolyMulFft(x, y, mod); 44 | int t2 = clock(); 45 | // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl; 46 | 47 | assert(ans0 == ans1); 48 | } 49 | { 50 | // 8e14 51 | // 1e10+19 52 | const int64 mod = 10000000019; 53 | std::vector x, y; 54 | for (int i = 0; i < 80000; ++i) { 55 | x.push_back((uint64)CRand63() % mod); 56 | y.push_back((uint64)CRand63() % mod); 57 | } 58 | 59 | int t0 = clock(); 60 | std::vector ans0 = flint::PolyMul(x, y, mod); 61 | int t1 = clock(); 62 | std::vector ans1 = fft::PolyMulFft(x, y, mod); 63 | int t2 = clock(); 64 | // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl; 65 | 66 | assert(ans0 == ans1); 67 | } 68 | } 69 | 70 | SL void LimitTest() { 71 | { 72 | // 10018*10018*2048=205537943552 2.06e11 73 | // 1e5+19 74 | const int64 mod = 100019; 75 | std::vector x, y; 76 | for (int i = 0; i < 2048; ++i) { 77 | x.push_back(mod - 1); 78 | y.push_back(mod - 1); 79 | } 80 | 81 | int t0 = clock(); 82 | std::vector ans0 = flint::PolyMul(x, y, mod); 83 | int t1 = clock(); 84 | std::vector ans1 = fft::PolyMulFft(x, y, mod); 85 | int t2 = clock(); 86 | std::vector ans2 = fft::PolyMulFftSmall(x, y, mod); 87 | int t3 = clock(); 88 | // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << " " << (t3-t1)*1e-3 89 | // << std::endl; 90 | 91 | assert(ans0 == ans1); 92 | assert(ans0 == ans2); 93 | } 94 | { 95 | // 1000000007*339750=339750002378250=3.39e14 96 | // 1e9+7 97 | const int64 mod = 1000000007; 98 | std::vector x, y; 99 | for (int i = 0; i < 339750; ++i) { 100 | x.push_back(mod - 1); 101 | y.push_back(mod - 1); 102 | } 103 | 104 | int t0 = clock(); 105 | std::vector ans0 = flint::PolyMul(x, y, mod); 106 | int t1 = clock(); 107 | std::vector ans1 = fft::PolyMulFft(x, y, mod); 108 | int t2 = clock(); 109 | // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl; 110 | assert(ans0 == ans1); 111 | } 112 | { 113 | // 10000000019*44064=440640000837216=4.4e14 114 | // 1e10+19 115 | const int64 mod = 10000000019; 116 | std::vector x, y; 117 | for (int i = 0; i < 44064; ++i) { 118 | x.push_back(mod - 1); 119 | y.push_back(mod - 1); 120 | } 121 | 122 | int t0 = clock(); 123 | std::vector ans0 = flint::PolyMul(x, y, mod); 124 | int t1 = clock(); 125 | std::vector ans1 = fft::PolyMulFft(x, y, mod); 126 | int t2 = clock(); 127 | // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl; 128 | assert(ans0 == ans1); 129 | } 130 | } 131 | 132 | SL void FftTest() { 133 | RandomTest(); 134 | LimitTest(); 135 | } 136 | PE_REGISTER_TEST(&FftTest, "FftTest", SMALL); 137 | #endif 138 | } // namespace fft_test 139 | -------------------------------------------------------------------------------- /test/init_inv_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace init_inv_test { 4 | constexpr int64 mod = 1000000007; 5 | 6 | SL void InitInvTest() { 7 | constexpr int n = 1000000; 8 | std::vector sresult(n + 1); 9 | std::vector lresult(n + 1); 10 | InitInverse(std::data(sresult), n, mod); 11 | InitInverse(std::data(lresult), n, mod); 12 | for (int i = 1; i <= n; ++i) { 13 | assert((int64)i * sresult[i] % mod == 1); 14 | assert(sresult[i] == lresult[i]); 15 | } 16 | } 17 | 18 | PE_REGISTER_TEST(&InitInvTest, "InitInvTest", SMALL); 19 | } // namespace init_inv_test 20 | -------------------------------------------------------------------------------- /test/int128_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace print_int128_test { 4 | #if PE_HAS_INT128 5 | SL void PrintInt128Test() { 6 | int128 x = 1; 7 | for (int i = 0; i < 127; ++i) { 8 | assert(ToString(x) == ToString(BigInteger(x))); 9 | assert(ToString(-x) == ToString(BigInteger(-x))); 10 | x <<= 1; 11 | } 12 | x = 0; 13 | assert(ToString(x) == ToString(BigInteger(x))); 14 | assert(ToString(-x) == ToString(BigInteger(-x))); 15 | } 16 | 17 | PE_REGISTER_TEST(&PrintInt128Test, "PrintInt128Test", SMALL); 18 | 19 | SL void Int128LiteralTest() { 20 | const int128 p1 = 10; 21 | const int128 p2 = p1 * p1; 22 | const int128 p4 = p2 * p2; 23 | const int128 p8 = p4 * p4; 24 | const int128 p16 = p8 * p8; 25 | const int128 p32 = p16 * p16; 26 | assert(p32 == "100000000000000000000000000000000"_i128); 27 | assert(p32 == "+100000000000000000000000000000000"_i128); 28 | assert(-p32 == "-100000000000000000000000000000000"_i128); 29 | assert(p32 == "100000000000000000000000000000000"_u128); 30 | assert(p32 == "+100000000000000000000000000000000"_u128); 31 | const int128 b62 = 1LL << 62; 32 | const int128 b63 = b62 << 1; 33 | const int128 b126 = b63 * b63; 34 | assert(b63 == "9223372036854775808"_i128); 35 | assert(b126 == "85070591730234615865843651857942052864"_i128); 36 | const uint128 b127 = (uint128)b126 << 1; 37 | assert(b127 == "170141183460469231731687303715884105728"_u128); 38 | } 39 | 40 | PE_REGISTER_TEST(&Int128LiteralTest, "Int128LiteralTest", SMALL); 41 | #endif 42 | } // namespace print_int128_test 43 | -------------------------------------------------------------------------------- /test/misc_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace misc_test { 4 | SL void MiscTest() { 5 | GaussianEliminationSolver solver; 6 | solver.Init(10, 10); 7 | for (int i = 0; i < 10; ++i) { 8 | solver.At(i, 10) = 10 - i; 9 | for (int j = i; j < 10; ++j) { 10 | solver.At(i, j) = 1; 11 | } 12 | } 13 | auto v = solver.Solve(); 14 | for (int i = 0; i < 10; ++i) { 15 | assert(FAbs(v[i] - 1) < 1e-10); 16 | } 17 | 18 | auto vtos = [=](const std::vector& vec) { 19 | std::stringstream ss; 20 | ss << vec; 21 | return ss.str(); 22 | }; 23 | 24 | std::vector vec; 25 | assert(vtos(vec) == "{}"); 26 | 27 | vec.push_back(1); 28 | assert(vtos(vec) == "{1}"); 29 | 30 | vec.push_back(2); 31 | assert(vtos(vec) == "{1, 2}"); 32 | 33 | vec.push_back(3); 34 | assert(vtos(vec) == "{1, 2, 3}"); 35 | } 36 | 37 | PE_REGISTER_TEST(&MiscTest, "MiscTest", SMALL); 38 | 39 | SL void CountPtInCircleTest() { 40 | for (int64 n = 0; n <= 100; ++n) { 41 | int64 u = CountPtInCircle(n); 42 | int64 v = CountPtInCircleBf(n); 43 | int64 ans = 0; 44 | const int t = (int)SqrtI(n); 45 | for (int x = -t; x <= t; ++x) { 46 | for (int y = -t; y <= t; ++y) ans += sq(x) + sq(y) <= n; 47 | } 48 | assert(u == ans); 49 | assert(v == ans); 50 | } 51 | #if 1 52 | for (int64 i = 1; i <= 10000; ++i) { 53 | int64 u = CountPtInCircleQ1(i); 54 | int64 v = CountPtInCircleQ1Bf(i); 55 | if (u != v) { 56 | std::cerr << i << " " << u << " " << v << std::endl; 57 | } 58 | assert(u == v); 59 | } 60 | #endif 61 | 62 | #if !defined(CONTINUOUS_INTEGRATION_TEST) 63 | // 9999999999999907 7853981733966909 7853981733966913 64 | for (int64 i = 10000; i <= 100000000000000000; i = i * 10) { 65 | for (int64 j = -3; j <= 3; ++j) { 66 | int64 target = i + j; 67 | int64 u = CountPtInCircleQ1(target); 68 | int64 v = CountPtInCircleQ1Bf(target); 69 | if (u != v) { 70 | std::cerr << target << " " << u << " " << v << std::endl; 71 | } 72 | assert(u == v); 73 | } 74 | } 75 | #endif 76 | } 77 | 78 | PE_REGISTER_TEST(&CountPtInCircleTest, "CountPtInCircleTest", MEDIUM); 79 | 80 | #if PE_HAS_INT128 81 | SL void SumSigma0Test() { 82 | #if 1 83 | for (int64 i = 1; i <= 10000; ++i) { 84 | int64 u = SumSigma0(i); 85 | int64 v = SumSigma0Bf(i); 86 | auto w = min25::sigma0_sum_fast(i); 87 | if (u != v || v != w || u != w) { 88 | std::cerr << i << " " << u << " " << v << " " << w << std::endl; 89 | } 90 | assert(u == v); 91 | assert(u == w); 92 | } 93 | #endif 94 | 95 | // 9999999999999907 7853981733966909 7853981733966913 96 | for (int64 i = 10000; i <= 100000000000000000; i = i * 10) { 97 | for (int64 j = -3; j <= 3; ++j) { 98 | int64 target = i + j; 99 | int64 u = SumSigma0(target); 100 | int64 v = SumSigma0Bf(target); 101 | auto w = min25::sigma0_sum_fast(target); 102 | if (u != v || v != w || u != w) { 103 | std::cerr << target << " " << u << " " << v << " " << w << std::endl; 104 | } 105 | assert(u == v); 106 | assert(u == w); 107 | } 108 | } 109 | } 110 | 111 | PE_REGISTER_TEST(&SumSigma0Test, "SumSigma0Test", SUPER); 112 | #endif 113 | 114 | SL int64 IntDivFloor(int64 a, int64 b) { 115 | if (b < 0) a = -a; 116 | if (a % b == 0) return a / b; 117 | if (a >= 0) return a / b; 118 | return a / b - 1; 119 | } 120 | 121 | SL void SolveInequatilityGE2Test() { 122 | for (int64 x1 = -1000; x1 <= 1000; ++x1) { 123 | for (int64 x2 = x1; x2 <= 1000; ++x2) { 124 | // (100 x-x1)(100 x-x2) >= 0 125 | // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 >= 0 126 | const int64 A = 10000; 127 | const int64 B = -(100 * x1 + 100 * x2); 128 | const int64 C = x1 * x2; 129 | int64 u = IntDivFloor(x1, 100); 130 | int64 v = x2 % 100 == 0 ? IntDivFloor(x2, 100) : IntDivFloor(x2, 100) + 1; 131 | auto ans = SolveInequatilityGE2(A, B, C); 132 | if (u == v || u + 1 == v) { 133 | assert(std::size(ans) == 1); 134 | assert(ans[0].x1 == -IntegerRange64::inf); 135 | assert(ans[0].x2 == IntegerRange64::inf); 136 | } else { 137 | assert(std::size(ans) == 2); 138 | assert(ans[0].x1 == -IntegerRange64::inf); 139 | assert(ans[0].x2 == u); 140 | assert(ans[1].x1 == v); 141 | assert(ans[1].x2 == IntegerRange64::inf); 142 | } 143 | } 144 | } 145 | } 146 | PE_REGISTER_TEST(&SolveInequatilityGE2Test, "SolveInequatilityGE2Test", SMALL); 147 | 148 | SL void SolveInequatilityG2Test() { 149 | for (int64 x1 = -1000; x1 <= 1000; ++x1) { 150 | for (int64 x2 = x1; x2 <= 1000; ++x2) { 151 | // (100 x-x1)(100 x-x2) > 0 152 | // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 > 0 153 | const int64 A = 10000; 154 | const int64 B = -(100 * x1 + 100 * x2); 155 | const int64 C = x1 * x2; 156 | int64 u = x1 % 100 == 0 ? IntDivFloor(x1, 100) - 1 : IntDivFloor(x1, 100); 157 | int64 v = IntDivFloor(x2, 100) + 1; 158 | auto ans = SolveInequatilityG2(A, B, C); 159 | if (u == v || u + 1 == v) { 160 | assert(std::size(ans) == 1); 161 | assert(ans[0].x1 == -IntegerRange64::inf); 162 | assert(ans[0].x2 == IntegerRange64::inf); 163 | } else { 164 | assert(std::size(ans) == 2); 165 | assert(ans[0].x1 == -IntegerRange64::inf); 166 | assert(ans[0].x2 == u); 167 | assert(ans[1].x1 == v); 168 | assert(ans[1].x2 == IntegerRange64::inf); 169 | } 170 | } 171 | } 172 | } 173 | PE_REGISTER_TEST(&SolveInequatilityG2Test, "SolveInequatilityG2Test", SMALL); 174 | 175 | SL void SolveInequatilityLE2Test() { 176 | for (int64 x1 = -1000; x1 <= 1000; ++x1) { 177 | for (int64 x2 = x1; x2 <= 1000; ++x2) { 178 | // (100 x-x1)(100 x-x2) <= 0 179 | // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 <= 0 180 | const int64 A = 10000; 181 | const int64 B = -(100 * x1 + 100 * x2); 182 | const int64 C = x1 * x2; 183 | int64 u = x1 % 100 == 0 ? IntDivFloor(x1, 100) : IntDivFloor(x1, 100) + 1; 184 | int64 v = IntDivFloor(x2, 100); 185 | auto ans = SolveInequatilityLE2(A, B, C); 186 | if (u > v) { 187 | assert(std::size(ans) == 0); 188 | } else { 189 | assert(std::size(ans) == 1); 190 | assert(ans[0].x1 == u); 191 | assert(ans[0].x2 == v); 192 | } 193 | } 194 | } 195 | } 196 | PE_REGISTER_TEST(&SolveInequatilityLE2Test, "SolveInequatilityLE2Test", SMALL); 197 | 198 | SL void SolveInequatilityL2Test() { 199 | for (int64 x1 = -1000; x1 <= 1000; ++x1) { 200 | for (int64 x2 = x1; x2 <= 1000; ++x2) { 201 | // (100 x-x1)(100 x-x2) < 0 202 | // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 < 0 203 | const int64 A = 10000; 204 | const int64 B = -(100 * x1 + 100 * x2); 205 | const int64 C = x1 * x2; 206 | int64 u = IntDivFloor(x1, 100) + 1; 207 | int64 v = x2 % 100 == 0 ? IntDivFloor(x2, 100) - 1 : IntDivFloor(x2, 100); 208 | auto ans = SolveInequatilityL2(A, B, C); 209 | if (u > v) { 210 | assert(std::size(ans) == 0); 211 | } else { 212 | assert(std::size(ans) == 1); 213 | assert(ans[0].x1 == u); 214 | assert(ans[0].x2 == v); 215 | } 216 | } 217 | } 218 | } 219 | PE_REGISTER_TEST(&SolveInequatilityL2Test, "SolveInequatilityL2Test", SMALL); 220 | } // namespace misc_test 221 | -------------------------------------------------------------------------------- /test/mod_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace mod_test { 4 | #if PE_HAS_INT128 5 | template 6 | struct ValueHolder {}; 7 | 8 | template <> 9 | struct ValueHolder { 10 | static const int32 values[]; 11 | static const int32 mods[]; 12 | }; 13 | 14 | // we don't consider: -2147483648 15 | const int32 ValueHolder::values[] = {-2147483647, -1073741824, -1, 0, 1, 16 | 1073741824, 2147483647}; 17 | const int32 ValueHolder::mods[] = {1, 1073741824, 2147483647}; 18 | 19 | template <> 20 | struct ValueHolder { 21 | static const uint32 values[]; 22 | static const uint32 mods[]; 23 | }; 24 | const uint32 ValueHolder::values[] = {0u, 1u, 2147483648u, 4294967295u}; 25 | const uint32 ValueHolder::mods[] = {1u, 2147483648u, 4294967295u}; 26 | 27 | template <> 28 | struct ValueHolder { 29 | static const int64 values[]; 30 | static const int64 mods[]; 31 | }; 32 | const int64 ValueHolder::values[] = {-9223372036854775807ll, 33 | -4611686018427387904ll, 34 | -2147483647ll, 35 | -1073741824ll, 36 | -1ll, 37 | 0ll, 38 | 1ll, 39 | 1073741824ll, 40 | 2147483647ll, 41 | 4611686018427387904ll, 42 | 9223372036854775807ll}; 43 | const int64 ValueHolder::mods[] = {1ll, 1073741824ll, 2147483647ll, 44 | 4611686018427387904ll, 45 | 9223372036854775807ll}; 46 | 47 | template <> 48 | struct ValueHolder { 49 | static const uint64 values[]; 50 | static const uint64 mods[]; 51 | }; 52 | const uint64 ValueHolder::values[] = {0u, 53 | 1ULL, 54 | 2147483648ULL, 55 | 2147483647ULL, 56 | 9223372036854775807ULL, 57 | 18446744073709551615ULL}; 58 | const uint64 ValueHolder::mods[] = {1ULL, 2147483648ULL, 2147483647ULL, 59 | 9223372036854775807ULL, 60 | 18446744073709551615ULL}; 61 | 62 | SL void ModTest() { 63 | #define REGULATE_MOD_TEST(T1, T2) \ 64 | for (T1 v : ValueHolder::values) \ 65 | for (T2 m : ValueHolder::mods) { \ 66 | int128 x = v; \ 67 | int128 y = m; \ 68 | x %= y; \ 69 | if (x < 0) x += y; \ 70 | auto ans = Mod(v, m); \ 71 | if (ans != x) { \ 72 | dbg(v); \ 73 | dbg(m); \ 74 | dbg(ans); \ 75 | dbg(x); \ 76 | } \ 77 | assert(ans == x); \ 78 | } 79 | REGULATE_MOD_TEST(int32, int32) 80 | REGULATE_MOD_TEST(uint32, int32) 81 | REGULATE_MOD_TEST(int64, int32) 82 | REGULATE_MOD_TEST(uint64, int32) 83 | REGULATE_MOD_TEST(int32, uint32) 84 | REGULATE_MOD_TEST(uint32, uint32) 85 | REGULATE_MOD_TEST(int64, uint32) 86 | REGULATE_MOD_TEST(uint64, uint32) 87 | 88 | REGULATE_MOD_TEST(int32, int64) 89 | REGULATE_MOD_TEST(uint32, int64) 90 | REGULATE_MOD_TEST(int64, int64) 91 | REGULATE_MOD_TEST(uint64, int64) 92 | REGULATE_MOD_TEST(int32, uint64) 93 | REGULATE_MOD_TEST(uint32, uint64) 94 | REGULATE_MOD_TEST(int64, uint64) 95 | REGULATE_MOD_TEST(uint64, uint64) 96 | } 97 | 98 | PE_REGISTER_TEST(&ModTest, "ModTest", SMALL); 99 | #endif 100 | 101 | #if PE_HAS_INT128 102 | SL void FracModTest() { 103 | const int mod = 1000000007; 104 | for (int64 n = 1; n <= 10; ++n) { 105 | int64 v = FracMod({n, n + 1, 2 * n + 1}, {2, 3}, mod); 106 | int128 expected = (int128)n * (n + 1) * (2 * n + 1) / 6 % mod; 107 | assert(v == expected); 108 | } 109 | 110 | for (int i = 1; i <= 10; ++i) { 111 | int64 n = 100000000000 + i; 112 | int64 v = FracMod({n, n + 1, 2 * n + 1}, {2, 3}, mod); 113 | int128 expected = (int128)n * (n + 1) * (2 * n + 1) / 6 % mod; 114 | assert(v == expected); 115 | } 116 | } 117 | 118 | PE_REGISTER_TEST(&FracModTest, "FracModTest", SMALL); 119 | #endif 120 | } // namespace mod_test 121 | -------------------------------------------------------------------------------- /test/mpf_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace mpf_test { 4 | #if HAS_MPF 5 | SL void MpfTest() { 6 | // std::cout << Mpf::getDefaultPrec() << std::endl; 7 | Mpf::SetDefaultPrec(200); 8 | 9 | Mpf x(1); 10 | x /= 10; 11 | #if 0 12 | std::cout << x.toString(20) << std::endl; 13 | x = -x.Power(10); 14 | std::cout << x.toString(20) << std::endl; 15 | std::cout << x.toLongDouble() << std::endl; 16 | std::cout << Mpf().toString(20) << std::endl; 17 | std::cout << Mpf("-.1123456789e31").toString(30) << std::endl; 18 | std::cout << Mpf(".1123456789e31").toString(30) << std::endl; 19 | std::cout << Mpf(100).toString(30) << std::endl; 20 | mpf_t tester; 21 | mpf_init(tester); 22 | mpf_set_str(tester, "112345678987654321", 10); 23 | std::cout << Mpf("12345678987654321").toString() << std::endl; 24 | std::cout << mpf_get_d(tester) << std::endl; 25 | std::cout << mpf_get_prec(tester) << std::endl; 26 | Mpf y(1); 27 | y *= 123456789; 28 | y *= 1000000000; 29 | y += 123456789; 30 | std::cout << y.toString() << std::endl; 31 | #endif 32 | } 33 | 34 | PE_REGISTER_TEST(&MpfTest, "MpfTest", SMALL); 35 | #endif 36 | } // namespace mpf_test 37 | -------------------------------------------------------------------------------- /test/parallel_sort_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace parallel_sort_test { 4 | constexpr int n = 1000000; 5 | 6 | SL void ParallelSortTest() { 7 | std::vector arr(n); 8 | for (int i = 0; i < n; ++i) arr[i] = rand(); 9 | TimeRecorder tr; 10 | ParallelSort<8>(std::data(arr), std::data(arr) + n); 11 | std::cerr << tr.Elapsed().Format() << std::endl; 12 | for (int i = 1; i < n; ++i) { 13 | assert(arr[i - 1] <= arr[i]); 14 | } 15 | } 16 | 17 | PE_REGISTER_TEST(&ParallelSortTest, "ParallelSortTest", SMALL); 18 | } // namespace parallel_sort_test 19 | -------------------------------------------------------------------------------- /test/pe_test.h: -------------------------------------------------------------------------------- 1 | #ifndef __PE_TEST_H__ 2 | #define __PE_TEST_H__ 3 | 4 | #define PE_TEST_MODE 5 | 6 | #include 7 | using namespace pe; 8 | 9 | enum TestSize { 10 | SMALL = 0, 11 | MEDIUM = 1, 12 | BIG = 2, 13 | SUPER = 3, 14 | SPECIFIED = 4, 15 | }; 16 | 17 | SL bool SameStringIgnoreCase(std::string_view a, std::string_view b) { 18 | if (a.size() != b.size()) { 19 | return false; 20 | } 21 | for (int i = 0; i < a.size(); ++i) { 22 | if (std::tolower(a[i]) != std::tolower(b[i])) { 23 | return false; 24 | } 25 | } 26 | return true; 27 | } 28 | 29 | SL std::optional ParseTestSize(std::string_view test_size) { 30 | if (SameStringIgnoreCase(test_size, "SMALL")) { 31 | return SMALL; 32 | } else if (SameStringIgnoreCase(test_size, "MEDIUM")) { 33 | return MEDIUM; 34 | } else if (SameStringIgnoreCase(test_size, "BIG")) { 35 | return BIG; 36 | } else if (SameStringIgnoreCase(test_size, "SUPER")) { 37 | return SUPER; 38 | } else if (SameStringIgnoreCase(test_size, "SPECIFIED")) { 39 | return SPECIFIED; 40 | } else { 41 | return std::nullopt; 42 | } 43 | } 44 | 45 | SL std::vector ParseTestSizeList(std::string s) { 46 | std::vector ret; 47 | 48 | std::replace(s.begin(), s.end(), ',', ' '); 49 | std::replace(s.begin(), s.end(), '(', ' '); 50 | std::replace(s.begin(), s.end(), ')', ' '); 51 | std::stringstream ss(s); 52 | std::string item; 53 | while (ss >> item) { 54 | std::optional size = ParseTestSize(item); 55 | if (!size.has_value()) { 56 | std::cerr << "Cannot parse test size: " << item << std::endl; 57 | } else { 58 | ret.push_back(*size); 59 | } 60 | } 61 | return ret; 62 | } 63 | 64 | struct TestItem { 65 | std::function test; 66 | std::string file; 67 | std::string description; 68 | TestSize test_size; 69 | }; 70 | 71 | struct TestRegistry { 72 | std::vector tests; 73 | }; 74 | 75 | TestRegistry& GetTestRegistry(); 76 | 77 | #define MAKE_INITIALIZER_NAME_IMPL(LINE_NUMBER) __register_test_##LINE_NUMBER 78 | #define MAKE_INITIALIZER_NAME(LINE_NUMBER) \ 79 | MAKE_INITIALIZER_NAME_IMPL(LINE_NUMBER) 80 | 81 | #if PE_HAS_CPP20 82 | #define PE_REGISTER_TEST(test, description, test_size) \ 83 | static int MAKE_INITIALIZER_NAME(__LINE__) = ([]() { \ 84 | GetTestRegistry().tests.push_back( \ 85 | {test, std::source_location::current().file_name(), description, \ 86 | test_size}); \ 87 | return 0; \ 88 | })() 89 | #else 90 | #define MAKE_FILE_NAME_IMPL(FILE_NAME) std::string(FILE_NAME) 91 | #define MAKE_FILE_NAME(FILE_NAME) MAKE_FILE_NAME_IMPL(FILE_NAME) 92 | #define PE_REGISTER_TEST(test, description, test_size) \ 93 | static int MAKE_INITIALIZER_NAME(__LINE__) = ([]() { \ 94 | GetTestRegistry().tests.push_back( \ 95 | {test, MAKE_FILE_NAME(__FILE__), description, test_size}); \ 96 | return 0; \ 97 | })() 98 | #endif 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /test/poly_algo_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace poly_algo_test { 4 | const int64 mod = 1000000007; 5 | 6 | SL void PolyMultiPointEvaluationTest() { 7 | srand(123456789); 8 | std::vector data; 9 | int n = 5000; 10 | const int64 mod = 1000000007; 11 | for (int i = 1; i <= n; ++i) data.push_back(i); 12 | std::vector v; 13 | for (int i = 1; i <= n; ++i) v.push_back(i % 10007); 14 | { 15 | TimeRecorder tr; 16 | std::vector result = PolyMultipointEvaluateNormal(data, v, mod); 17 | // std::cout << tr.Elapsed().Format() << std::endl; 18 | for (int i = 1; i <= n; ++i) { 19 | int64 value = PolyEvaluate(data, i % 10007, mod); 20 | assert(value == result[i - 1]); 21 | } 22 | } 23 | { 24 | TimeRecorder tr; 25 | std::vector result = PolyMultipointEvaluateBls(data, v, mod); 26 | // std::cout << tr.Elapsed().Format() << std::endl; 27 | for (int i = 1; i <= n; ++i) { 28 | int64 value = PolyEvaluate(data, i % 10007, mod); 29 | assert(value == result[i - 1]); 30 | } 31 | } 32 | #if HAS_POLY_FLINT 33 | { 34 | TimeRecorder tr; 35 | std::vector result = flint::PolyMultipointEvaluate(data, v, mod); 36 | // std::cout << tr.Elapsed().Format() << std::endl; 37 | for (int i = 1; i <= n; ++i) { 38 | int64 value = PolyEvaluate(data, i % 10007, mod); 39 | assert(value == result[i - 1]); 40 | } 41 | } 42 | #endif 43 | } 44 | PE_REGISTER_TEST(&PolyMultiPointEvaluationTest, "PolyMultiPointEvaluationTest", 45 | SMALL); 46 | 47 | SL void PolyBatchMulTest() { 48 | const int mod = 10007; 49 | std::vector data = {1, 1, 2, 1, 3, 1}; 50 | std::vector result = PolyBatchMul(data, mod); 51 | 52 | std::vector expected = {6, 11, 6, 1}; 53 | assert(expected == result); 54 | } 55 | PE_REGISTER_TEST(&PolyBatchMulTest, "PolyBatchMulTest", SMALL); 56 | 57 | SL void GenBernoulliNumberTest() { 58 | const int mod = 10007; 59 | assert((GenBernoulliNumber(7, mod) == 60 | std::vector{1, 5003, 1668, 0, 7672, 0, 4527, 0})); 61 | } 62 | PE_REGISTER_TEST(&GenBernoulliNumberTest, "GenBernoulliNumberTest", SMALL); 63 | 64 | SL void GenStirling1ColumnTest() { 65 | const int mod = 10007; 66 | assert((pmod::GenStirling1Column(3, 10, mod) == 67 | std::vector{0, 0, 0, 1, 6, 35, 225, 1624, 3125, 8047, 1881})); 68 | } 69 | PE_REGISTER_TEST(&GenStirling1ColumnTest, "GenStirling1ColumnTest", SMALL); 70 | 71 | SL void GenStirling1Test() { 72 | const int mod = 10007; 73 | assert((GenStirling1(7, mod) == 74 | std::vector{0, 720, 1764, 1624, 735, 175, 21, 1})); 75 | } 76 | PE_REGISTER_TEST(&GenStirling1Test, "GenStirling1Test", SMALL); 77 | 78 | SL void GenStirling2Test() { 79 | const int mod = 10007; 80 | assert((pmod::GenStirling2(7, mod) == 81 | std::vector{0, 1, 63, 301, 350, 140, 21, 1})); 82 | } 83 | PE_REGISTER_TEST(&GenStirling2Test, "GenStirling2Test", SMALL); 84 | 85 | SL void GetGFCoefficientTest() { 86 | { 87 | // Fibonacci sequence 88 | std::vector A = {1, -1, -1}; 89 | std::vector B = {0, 1}; 90 | std::vector result = {0, 1}; 91 | for (int i = 2; i <= 30; ++i) { 92 | result.push_back(AddMod(result[i - 2], result[i - 1], mod)); 93 | } 94 | std::vector x = GetGFCoefficientSeries(A, B, 30, mod); 95 | for (int i = 0; i <= 30; ++i) { 96 | assert(result[i] == x[i]); 97 | } 98 | } 99 | 100 | { 101 | // Dollar exchange. 102 | // Concret math 103 | // 7 Generating Functions 104 | // 7.3 Solving Recurrences 105 | // Example 4: A closed form for change. 106 | int64 dp[10000 + 1] = {1}; 107 | int64 can[5] = {1, 5, 10, 25, 50}; 108 | for (int64 each : can) { 109 | for (int j = 0; j + each <= 10000; ++j) { 110 | if (dp[j]) { 111 | dp[j + each] = AddMod(dp[j + each], dp[j], mod); 112 | } 113 | } 114 | } 115 | 116 | int64 coe[100] = {0}; 117 | for (int i = 0; i < 1 << 5; ++i) { 118 | int s = 0; 119 | int bc = 0; 120 | for (int j = 0; j < 5; ++j) { 121 | if (i & (1 << j)) ++bc, s += (int)can[j]; 122 | } 123 | if (bc & 1) { 124 | --coe[s]; 125 | } else { 126 | ++coe[s]; 127 | } 128 | } 129 | std::vector gfresult = GetGFCoefficientSeries( 130 | std::vector(coe, coe + 92), {1}, 10000, mod); 131 | for (int i = 0; i <= 10000; ++i) assert(dp[i] == gfresult[i]); 132 | 133 | std::string mine = ToString(GetGFCoefficientAt( 134 | std::vector(coe, coe + 92), {1}, 100000000, mod)); 135 | std::string expected = ToString("66666793333412666685000001"_bi % mod); 136 | assert(mine == expected); 137 | } 138 | } 139 | PE_REGISTER_TEST(&GetGFCoefficientTest, "GetGFCoefficientTest", SMALL); 140 | 141 | SL void LinearRecurrenceTest() { 142 | const int64 P = 1000000009; 143 | std::vector s = {0, 1, 1, 2, 3, 5}; 144 | std::vector v = *FindLinearRecurrence(s, P); 145 | assert(v[0] == P - 1); 146 | assert(v[1] == P - 1); 147 | assert(v[2] == 1); 148 | const int n = static_cast(std::size(v)); 149 | int64 ans = 0; 150 | for (int i = 0; i < n; ++i) ans += v[i] * s[i]; 151 | assert(ans == P); 152 | 153 | ans = LinearRecurrenceValueAt(v, s, 38, P); 154 | assert(ans == 39088169LL); 155 | 156 | std::vector t = *FindLinearRecurrence({0, 1, 1, 2, 3, 5, 8, 13}, 31); 157 | assert(t[0] == 30); 158 | assert(t[1] == 30); 159 | assert(t[2] == 1); 160 | assert(*FindLinearRecurrenceValueAt({0, 1, 1, 2, 3, 5, 8, 13}, 38, P) == 161 | 39088169); 162 | } 163 | PE_REGISTER_TEST(&LinearRecurrenceTest, "LinearRecurrenceTest", SMALL); 164 | 165 | SL void SeqExprTest() { 166 | { 167 | Sequence a; 168 | (void)a; 169 | assert((a[1] + a[2]).ValueAt({0, 1}, 20, mod) == 6765); 170 | assert((a[1] + a[2]).ValueAtWithCharPoly({0, 1}, 20, mod) == 6765); 171 | assert((a[1] + a[2]).SumAt({0, 1}, 20, mod) == 17710); 172 | assert((a[1] + a[2]).SumAtWithCharPoly({0, 1}, 20, mod) == 17710); 173 | assert(((a[1] + a[2]).Generate({0, 1}, 20, mod) == 174 | std::vector{0, 1, 1, 2, 3, 5, 8, 175 | 13, 21, 34, 55, 89, 144, 233, 176 | 377, 610, 987, 1597, 2584, 4181, 6765})); 177 | } 178 | { 179 | Sequence a; 180 | (void)a; 181 | assert((a[1] + a[2]).ValueAt({0, 1}, 1000, mod) == 517691607); 182 | assert((a[1] + a[2]).ValueAtWithCharPoly({0, 1}, 1000, mod) == 517691607); 183 | assert((a[1] + a[2]).SumAt({0, 1}, 1000, mod) == 625271545); 184 | assert((a[1] + a[2]).SumAtWithCharPoly({0, 1}, 1000, mod) == 625271545); 185 | } 186 | { 187 | using MT = NMod64; 188 | Sequence a; 189 | (void)a; 190 | assert((a[1] + a[2]).ValueAt({0, 1}, 1000).value() == 517691607); 191 | assert((a[1] + a[2]).SumAt({0, 1}, 1000).value() == 625271545); 192 | } 193 | } 194 | PE_REGISTER_TEST(&SeqExprTest, "SeqExprTest", SMALL); 195 | } // namespace poly_algo_test 196 | -------------------------------------------------------------------------------- /test/poly_div_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace poly_div_test { 4 | #if !defined(ONLY_RUN_PE_IMPLEMENTATION) 5 | #define ONLY_RUN_PE_IMPLEMENTATION 0 6 | #endif 7 | using poly_div_t = std::vector (*)(const std::vector&, 8 | const std::vector&, int64); 9 | struct DivImpl { 10 | poly_div_t impl; 11 | int size; // 0:small, 1:large 12 | const char* name; 13 | }; 14 | 15 | DivImpl div_impl[] = { 16 | {&PolyDivDc, 1, "dc"}, 17 | {&PolyDivNormal, 0, "normal"}, 18 | #if HAS_POLY_FLINT && !ONLY_RUN_PE_IMPLEMENTATION 19 | {&flint::PolyDiv, 1, "flint"}, 20 | #endif 21 | #if HAS_POLY_NTL && !ONLY_RUN_PE_IMPLEMENTATION 22 | {&ntl::PolyDivLargeMod, 1, "ntl lm"}, 23 | {&ntl::PolyDiv, 1, "ntl"}, 24 | #endif 25 | }; 26 | 27 | const char* data_policy[3] = { 28 | "random", 29 | "min mod", 30 | "max mod", 31 | }; 32 | 33 | SL void TestImpl(int dp, int size, int n, int64 mod) { 34 | fprintf(stderr, "%-8s : data = %s, size = %d, n = %d, mod = %lld\n", "config", 35 | data_policy[dp], size, n, (long long)mod); 36 | 37 | std::vector x, y; 38 | srand(123456789); 39 | if (dp == 0) { 40 | for (int i = 0; i < n; ++i) x.push_back((uint64)CRand63() % mod); 41 | for (int i = 0; i < n / 2; ++i) y.push_back((uint64)CRand63() % mod); 42 | x[n - 1] = y[n / 2 - 1] = 1; 43 | } else { 44 | for (int i = 0; i < n; ++i) x.push_back(dp == 1 ? 0 : mod - 1); 45 | for (int i = 0; i < n / 2; ++i) y.push_back(dp == 1 ? 0 : mod - 1); 46 | x[n - 1] = y[n / 2 - 1] = 1; 47 | } 48 | 49 | const int M = std::size(div_impl); 50 | 51 | std::vector expected; 52 | for (int i = 0; i < M; ++i) { 53 | DivImpl who = div_impl[i]; 54 | if (i > 0) { 55 | if (who.size < size) { 56 | continue; 57 | } 58 | } 59 | clock_t start = clock(); 60 | std::vector result = who.impl(x, y, mod); 61 | clock_t end = clock(); 62 | fprintf(stderr, "%-8s : %.3f\n", who.name, 63 | 1. * (end - start) / CLOCKS_PER_SEC); 64 | if (i == 0) { 65 | expected = result; 66 | } else { 67 | assert(expected == result); 68 | } 69 | } 70 | } 71 | 72 | SL void PolyDivTest() { 73 | for (int dp = 0; dp < 3; ++dp) { 74 | for (int n : {128, 2048, 1000000, 1479725}) { 75 | for (int64 mod : {100019LL, 100000000003LL, 316227766016779LL}) { 76 | TestImpl(dp, n > 2048, n, mod); 77 | } 78 | } 79 | } 80 | } 81 | PE_REGISTER_TEST(&PolyDivTest, "PolyDivTest", SUPER); 82 | 83 | SL void PolyDivPerformanceTest() { 84 | constexpr std::array mods = {100019, 1000003, 1000000007, 85 | 100000000003, 316227766016779}; 86 | constexpr int min_log2 = 10; 87 | constexpr int max_log2 = 20; 88 | for (int level = 0; level < mods.size(); ++level) { 89 | printf("mod = %llu\n", (unsigned long long)mods[level]); 90 | const uint64 mod = mods[level]; 91 | 92 | printf("log2(n) "); 93 | 94 | for (int n = 10; n <= 20; ++n) { 95 | printf("%-6d ", n); 96 | } 97 | 98 | puts(""); 99 | 100 | const int M = std::size(div_impl); 101 | 102 | std::vector expected; 103 | for (int i = 0; i < M; ++i) { 104 | DivImpl who = div_impl[i]; 105 | 106 | printf("%-8s ", who.name); 107 | srand(314159); 108 | for (int n = min_log2; n <= max_log2; ++n) { 109 | if (who.size == 0 && n > 14) { 110 | printf("%-6s ", "-"); 111 | continue; 112 | } 113 | const int size = 1 << n; 114 | std::vector x, y; 115 | for (int i = 0; i < size; ++i) x.push_back((uint64)CRand63() % mod); 116 | for (int i = 0; i < size / 2; ++i) y.push_back((uint64)CRand63() % mod); 117 | x[size - 1] = y[size / 2 - 1] = 1; 118 | 119 | clock_t start = clock(); 120 | who.impl(x, y, mod); 121 | clock_t end = clock(); 122 | #if 1 123 | printf("%-6.3f ", 1. * (end - start) / CLOCKS_PER_SEC); 124 | #else 125 | uint64 a = n * (1 << n); 126 | uint64 b = end - start; 127 | printf("%-6.3f ", 1e5 * b / a); 128 | #endif 129 | } 130 | puts(""); 131 | } 132 | } 133 | } 134 | 135 | PE_REGISTER_TEST(&PolyDivPerformanceTest, "PolyDivPerformanceTest", SUPER); 136 | } // namespace poly_div_test 137 | -------------------------------------------------------------------------------- /test/prime_pi_sum_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace prime_pi_sum_test { 4 | std::vector ps(1000001); 5 | std::vector pc(1000001); 6 | 7 | SL void VerifyCnt(const int64 n, const DVA& result) { 8 | int64 v = static_cast(std::sqrt(n)); 9 | for (int j = 1; j <= v; ++j) { 10 | assert(result[j] == pc[j]); 11 | assert(result[n / j] == pc[n / j]); 12 | } 13 | } 14 | 15 | SL void VerifySum(const int64 n, const DVA& result) { 16 | int64 v = static_cast(std::sqrt(n)); 17 | for (int j = 1; j <= v; ++j) { 18 | assert(result[j] == ps[j]); 19 | assert(result[n / j] == ps[n / j]); 20 | } 21 | } 22 | 23 | SL void SmallTest() { 24 | for (int i = 1; i <= 100000; ++i) { 25 | const int n = i; 26 | VerifyCnt(n, PrimePi(i)); 27 | VerifySum(n, PrimeSum(i)); 28 | } 29 | } 30 | 31 | SL void PrimePiSumTest() { 32 | for (int i = 1; i <= 1000000; ++i) { 33 | pc[i] = pc[i - 1] + (IsPrime(i) ? 1 : 0); 34 | ps[i] = ps[i - 1] + (IsPrime(i) ? i : 0); 35 | } 36 | 37 | SmallTest(); 38 | 39 | assert((PrimePi(10000000))[10000000] == kPrimePi[7]); 40 | assert((PrimePi(100000000))[100000000] == kPrimePi[8]); 41 | assert((PrimePi(1000000000))[1000000000] == kPrimePi[9]); 42 | assert((PrimePi(10000000000))[10000000000] == kPrimePi[10]); 43 | // assert((PrimePi(100000000000))[100000000000] == kPrimePi[11]); 44 | // assert((PrimePi(1000000000000))[1000000000000] == kPrimePi[12]); 45 | } 46 | 47 | PE_REGISTER_TEST(&PrimePiSumTest, "PrimePiSumTest", BIG); 48 | 49 | SL void PrimePiSumPModTest() { 50 | const int64 N = 100000; 51 | for (int mod = 1; mod <= 30; ++mod) { 52 | int64 result[32] = {0}; 53 | for (int i = 0; i < pcnt && plist[i] <= N; ++i) ++result[plist[i] % mod]; 54 | auto v = PrimeS0PMod(N, mod); 55 | for (int j = 0; j < mod; ++j) { 56 | assert(result[j] == v[j][N]); 57 | } 58 | } 59 | for (int mod = 1; mod <= 30; ++mod) { 60 | int64 result[32] = {0}; 61 | for (int i = 0; i < pcnt && plist[i] <= N; ++i) { 62 | result[plist[i] % mod] += plist[i]; 63 | } 64 | auto v = PrimeS1PMod(N, mod); 65 | for (int j = 0; j < mod; ++j) { 66 | assert(result[j] == v[j][N]); 67 | } 68 | } 69 | const int64 M = 10007; 70 | for (int mod = 1; mod <= 30; ++mod) { 71 | int64 result[32] = {0}; 72 | for (int i = 0; i < pcnt && plist[i] <= N; ++i) ++result[plist[i] % mod]; 73 | auto v = PrimeS0PMod>>(N, mod); 74 | for (int j = 0; j < mod; ++j) { 75 | assert(result[j] % M == v[j][N].value()); 76 | } 77 | } 78 | for (int mod = 1; mod <= 30; ++mod) { 79 | int64 result[32] = {0}; 80 | for (int i = 0; i < pcnt && plist[i] <= N; ++i) { 81 | result[plist[i] % mod] += plist[i]; 82 | } 83 | auto v = PrimeS1PMod>>(N, mod); 84 | for (int j = 0; j < mod; ++j) { 85 | assert(result[j] % M == v[j][N].value()); 86 | } 87 | } 88 | } 89 | PE_REGISTER_TEST(&PrimePiSumPModTest, "PrimePiSumPModTest", SMALL); 90 | } // namespace prime_pi_sum_test 91 | -------------------------------------------------------------------------------- /test/test_compile_each.bat: -------------------------------------------------------------------------------- 1 | test_compile_each.py 2 | pause -------------------------------------------------------------------------------- /test/test_compile_each.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # -*- coding: UTF-8 -*- 3 | import os 4 | import sys 5 | import shutil 6 | import subprocess 7 | import time 8 | 9 | CURRENT_DIRECTORY = os.getcwd() 10 | PARENT_DIRECTORY = os.pardir 11 | 12 | 13 | def DurationPartsFromNs(duration): 14 | min_part = duration // 1000000000 // 60 15 | sec_part = duration // 1000000000 % 60 16 | millisec_part = duration // 1000000 % 1000 17 | return (min_part, sec_part, millisec_part) 18 | 19 | 20 | def FormatNs(duration): 21 | return '%d:%02d.%03d' % DurationPartsFromNs(duration) 22 | 23 | 24 | def main(): 25 | ret = 0 26 | for file in os.listdir(PARENT_DIRECTORY): 27 | if not file.startswith('pe'): 28 | continue 29 | filename, file_ext_name = os.path.splitext(file) 30 | if file_ext_name != '': 31 | continue 32 | content = '#include <%s>\n int main(){}' % file 33 | with open('main.cc', 'wb') as tempf: 34 | tempf.write(content.encode('utf8')) 35 | print('Compile %s' % file) 36 | start_time = time.perf_counter_ns() 37 | ret = os.system('pe++.py main.cc -hc') 38 | time_usage = FormatNs(time.perf_counter_ns() - start_time) 39 | if os.path.exists('main.cc'): 40 | os.remove('main.cc') 41 | print('Done, return code = %d, time usage = %s' % (ret, time_usage)) 42 | print() 43 | if ret != 0: 44 | break 45 | if os.path.exists('a.exe'): 46 | os.remove('a.exe') 47 | return ret 48 | 49 | 50 | if __name__ == "__main__": 51 | sys.exit(main()) -------------------------------------------------------------------------------- /test/test_int128_noopenmp.bat: -------------------------------------------------------------------------------- 1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0 2 | pause -------------------------------------------------------------------------------- /test/test_int128_openmp.bat: -------------------------------------------------------------------------------- 1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 2 | pause -------------------------------------------------------------------------------- /test/test_noint128_noopenmp.bat: -------------------------------------------------------------------------------- 1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=0 -DENABLE_OPENMP=0 2 | pause -------------------------------------------------------------------------------- /test/test_noint128_openmp.bat: -------------------------------------------------------------------------------- 1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DTRY_TO_USE_INT128=0 -DENABLE_OPENMP=1 2 | pause -------------------------------------------------------------------------------- /test/test_perf.bat: -------------------------------------------------------------------------------- 1 | pe++.py test_perf.c -DTEST_ALL -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 && a.exe -r 10 2 | pause -------------------------------------------------------------------------------- /test/test_perf.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | #define ONLY_RUN_PE_IMPLEMENTATION 1 4 | 5 | #if 1 6 | #include "poly_mul_test.c" 7 | //#include "poly_div_test.c" 8 | //#include "bi_div_test.c" 9 | //#include "bi_mul_test.c" 10 | //#include "extended_signed_int_test.c" 11 | //#include "extended_unsigned_int_test.c" 12 | #endif 13 | 14 | TestRegistry& GetTestRegistry() { 15 | static TestRegistry tester; 16 | return tester; 17 | } 18 | 19 | static inline std::string FormatSecond(double s) { 20 | char buff[256]; 21 | sprintf(buff, "%.3f", s); 22 | return buff; 23 | } 24 | 25 | static inline std::string FormatSecond(TimeDelta d) { 26 | char buff[256]; 27 | sprintf(buff, "%.3f", d.ToSeconds()); 28 | return buff; 29 | } 30 | 31 | int main(int argc, char* argv[]) { 32 | int run_count = 2; 33 | 34 | for (int i = 1; i < argc;) { 35 | std::string c = argv[i]; 36 | const int size = static_cast(std::size(c)); 37 | int j = 0; 38 | while (j < size && c[j] == '-') ++j; 39 | c = c.substr(j); 40 | if (c == "r") { 41 | if (i + 1 < argc) { 42 | run_count = atoi(argv[i + 1]); 43 | i += 2; 44 | } else { 45 | fprintf(stderr, "Run count is unspecified"); 46 | exit(-1); 47 | } 48 | } else { 49 | fprintf(stderr, "Unknown flags: %s", argv[i]); 50 | exit(-1); 51 | } 52 | } 53 | 54 | std::cout << std::endl; 55 | std::cout << "run_count: " << run_count << std::endl; 56 | std::cout << std::endl; 57 | 58 | PeInitializer() 59 | .set_cal_phi() 60 | .set_cal_mu() 61 | .set_fft_k() 62 | .set_ntt32_k() 63 | .set_ntt64_k() 64 | .Init(); 65 | 66 | auto& tester = GetTestRegistry(); 67 | const int size = (int)std::size(tester.tests); 68 | 69 | TableFormatter tf; 70 | auto& header = tf.AppendLine(); 71 | std::vector total_timings; 72 | for (int i = 0; i < size; ++i) { 73 | auto& item = tester.tests[i]; 74 | header.push_back(item.description); 75 | total_timings.push_back({}); 76 | } 77 | header.push_back("Total"); 78 | total_timings.push_back({}); 79 | 80 | for (int _ = 0; _ < run_count; ++_) { 81 | std::cout << "Test run " << _ << std::endl; 82 | bool isFirstTest = true; 83 | TimeDelta test_suite_timing; 84 | auto& line = tf.AppendLine(); 85 | for (int i = 0; i < size; ++i) { 86 | auto& item = tester.tests[i]; 87 | if (!isFirstTest) { 88 | std::cout << std::endl; 89 | } 90 | std::cout << "Begin " << item.description << std::endl; 91 | TimeRecorder tr; 92 | item.test(); 93 | std::cout << "End " << item.description << std::endl; 94 | auto usage = tr.Elapsed(); 95 | std::cout << "Time usage " << usage.Format() << std::endl; 96 | 97 | line.push_back(FormatSecond(usage.ToSeconds())); 98 | total_timings[i].Add(usage); 99 | test_suite_timing.Add(usage); 100 | isFirstTest = false; 101 | } 102 | line.push_back(FormatSecond(test_suite_timing.ToSeconds())); 103 | total_timings[size].Add(test_suite_timing); 104 | 105 | std::cout << std::endl << "Test run " << _ << " finished" << std::endl; 106 | std::cout << "Time usage " << test_suite_timing.Format() << std::endl 107 | << std::endl; 108 | } 109 | 110 | { 111 | auto& line = tf.AppendLine(); 112 | for (int i = 0; i <= size; ++i) { 113 | line.push_back(""); 114 | } 115 | } 116 | 117 | { 118 | auto& line = tf.AppendLine(); 119 | for (int i = 0; i <= size; ++i) { 120 | line.push_back(FormatSecond(total_timings[i].ToSeconds())); 121 | } 122 | } 123 | 124 | { 125 | auto& line = tf.AppendLine(); 126 | for (int i = 0; i <= size; ++i) { 127 | line.push_back(FormatSecond(total_timings[i].ToSeconds() / run_count)); 128 | } 129 | } 130 | 131 | tf.Render(std::cout); 132 | 133 | std::cout << std::endl; 134 | 135 | tf.SetDefaultFormat(TableFormatter::NoAlign()) 136 | .SetSeparator(",") 137 | .Render(std::cout); 138 | return 0; 139 | } -------------------------------------------------------------------------------- /test/tree_test.c: -------------------------------------------------------------------------------- 1 | #include "pe_test.h" 2 | 3 | namespace tree_test { 4 | SL void RuBitTest() { 5 | const int n = 100; 6 | int data[n + 1] = {0}; 7 | RUBit tree(n); 8 | for (int iter = 0; iter < 100; ++iter) { 9 | if (rand() % 2 == 0) { 10 | for (int i = 0; i < 100; ++i) { 11 | int u = rand() % n + 1, v = rand() % n + 1; 12 | int w = rand() % n - 50; 13 | if (u > v) std::swap(u, v); 14 | tree.Update(u, v, w); 15 | for (int j = u; j <= v; ++j) data[j] += w; 16 | } 17 | } else { 18 | for (int i = 1; i <= 100; ++i) { 19 | assert(tree.Query(i) == data[i]); 20 | } 21 | } 22 | } 23 | } 24 | 25 | SL void RsqBitTest() { 26 | const int n = 100; 27 | int data[n + 1] = {0}; 28 | RSQBit tree(n); 29 | for (int iter = 0; iter < 100; ++iter) { 30 | if (rand() % 2 == 0) { 31 | for (int i = 0; i < 100; ++i) { 32 | int u = rand() % n + 1, v = rand() % n + 1; 33 | if (u > v) std::swap(u, v); 34 | int s = 0; 35 | for (int j = u; j <= v; ++j) s += data[j]; 36 | assert(tree.Query(u, v) == s); 37 | } 38 | } else { 39 | for (int i = 1; i <= 100; ++i) { 40 | int w = rand() % n - 50; 41 | data[i] += w; 42 | tree.Update(i, w); 43 | } 44 | } 45 | } 46 | } 47 | 48 | SL void TreeTest() { 49 | RuBitTest(); 50 | RsqBitTest(); 51 | } 52 | 53 | PE_REGISTER_TEST(&TreeTest, "TreeTest", SMALL); 54 | } // namespace tree_test 55 | -------------------------------------------------------------------------------- /test_all.bat: -------------------------------------------------------------------------------- 1 | bazel clean 2 | bazel run test:test 3 | bazel run test:test_tcmalloc 4 | bazel run test:test_noint128_noopenmp 5 | bazel run test:test_int128_noopenmp 6 | bazel run test:test_int128_openmp 7 | bazel run test:test_noint128_openmp -------------------------------------------------------------------------------- /toolchain/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baihacker/pe/a149689695a1be4ff987b6b8c5c3297f7f1a86ba/toolchain/BUILD --------------------------------------------------------------------------------