├── .bazelrc
├── .github
    └── workflows
    │   └── ci.yml
├── .travis.yml
├── BUILD
├── README.md
├── WORKSPACE
├── benchmarks
    ├── benchmark.md
    ├── benchmark_20180629.md
    ├── benchmark_20190918.md
    ├── benchmark_20191012.md
    ├── benchmark_20191013.md
    ├── benchmark_20200628.md
    ├── benchmark_20230101.md
    ├── benchmark_20250315.md
    ├── benchmark_20250601.md
    ├── format_test_result.py
    └── perf_test_result.txt
├── build_all.bat
├── example
    ├── BUILD
    ├── bi_example_pe483.c
    ├── billion_sort.c
    ├── bit.c
    ├── build_each.bat
    ├── build_each.py
    ├── continued_fraction.c
    ├── count_carlitz_words.c
    ├── dfa_counter.c
    ├── dfa_summer.c
    ├── dva.c
    ├── example.c
    ├── linear_recurrence.c
    ├── matrix_power.c
    ├── mma_find_recurrence.c
    ├── mma_interpolating_polynomial.c
    ├── mma_to_cpp.c
    ├── mod_number.c
    ├── multiplicative_function_prefix_sum_common_function.c
    ├── multiplicative_function_prefix_sum_mavlue_base.c
    ├── parallel_cal_prime_pi.c
    ├── partition_mobius.c
    ├── pe_db.c
    ├── power_sum.c
    ├── prime_power_sum.c
    ├── random_sample.c
    ├── range.c
    └── sym_poly.c
├── format.py
├── gen_config.py
├── legacy
    ├── pe_poly.hpp
    └── pe_sym_poly.hpp
├── libraries_on_win64.md
├── pe
├── pe.hpp
├── pe_algo
├── pe_array
├── pe_base
├── pe_bi32
├── pe_bit
├── pe_config
├── pe_db
├── pe_dpe
├── pe_extended_int
├── pe_extended_signed_int
├── pe_extended_unsigned_int
├── pe_fft
├── pe_float
├── pe_fraction
├── pe_gbi
├── pe_geometry
├── pe_initializer
├── pe_int
├── pe_int_algo
├── pe_internal
├── pe_io
├── pe_mat
├── pe_memory
├── pe_misc
├── pe_mma
├── pe_mod
├── pe_mpf
├── pe_mpz
├── pe_nt
├── pe_nt_base
├── pe_ntf
├── pe_parallel
├── pe_parallel_algo
├── pe_persistance
├── pe_poly_algo
├── pe_poly_base
├── pe_poly_base_common
├── pe_poly_base_flint
├── pe_poly_base_gmp
├── pe_poly_base_libbf
├── pe_poly_base_min25
├── pe_poly_base_ntl
├── pe_rand
├── pe_range
├── pe_serialization
├── pe_span
├── pe_sym_poly
├── pe_time
├── pe_tree
├── pe_type_traits
├── pe_vector
├── precompile.bat
├── test
    ├── BUILD
    ├── algo_test.c
    ├── array_test.c
    ├── bi_div_test.c
    ├── bi_mul_test.c
    ├── bit_test.c
    ├── dva_test.c
    ├── extended_signed_int_test.c
    ├── extended_unsigned_int_test.c
    ├── fft_test.c
    ├── gbi_test.c
    ├── init_inv_test.c
    ├── int128_test.c
    ├── mat_mul_test.c
    ├── misc_test.c
    ├── mod_test.c
    ├── mpf_test.c
    ├── nt_test.c
    ├── parallel_sort_test.c
    ├── pe_test.c
    ├── pe_test.h
    ├── poly_algo_test.c
    ├── poly_div_test.c
    ├── poly_mul_test.c
    ├── prime_pi_sum_test.c
    ├── test_compile_each.bat
    ├── test_compile_each.py
    ├── test_int128_noopenmp.bat
    ├── test_int128_openmp.bat
    ├── test_noint128_noopenmp.bat
    ├── test_noint128_openmp.bat
    ├── test_perf.bat
    ├── test_perf.c
    └── tree_test.c
├── test_all.bat
└── toolchain
    ├── BUILD
    └── pe_toolchain.bzl


/.bazelrc:
--------------------------------------------------------------------------------
1 | startup --output_user_root=D:/bazel-output
2 | build --action_env=C_INCLUDE_PATH=D:/Hilbert/usr/include;D:/Hilbert/usr/include/pe;D:/Hilbert/usr/include/flint
3 | build --action_env=LIBRARY_PATH=D:/Hilbert/usr/lib
4 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: PE Tests
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   gcc-build-and-test:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |       - name: Checkout repository
11 |         uses: actions/checkout@v3
12 | 
13 |       - name: Install dependencies
14 |         run: sudo apt-get update && sudo apt-get install -y g++ libgmp-dev libflint-dev libntl-dev
15 | 
16 |       - name: Build project
17 |         run: |
18 |           export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
19 |           export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
20 |           g++ test/pe_test.c -o ./pe_test.out --std=c++20 -O3 -march=native -mtune=native -fopenmp -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 -DENABLE_EIGEN=0 -DENABLE_GMP=1 -DENABLE_FLINT=0 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_ZMQ=0 -DENABLE_LIBBF=0 -DENABLE_PRIME_COUNT=0 -DENABLE_PRIME_SIEVE=0 -DENABLED_TEST=SMALL,MEDIUM,BIG -DCONTINUOUS_INTEGRATION_TEST
21 | 
22 |       - name: Run tests
23 |         run: ./pe_test.out
24 | 
25 |   gcc-build-examples:
26 |     runs-on: ubuntu-latest
27 | 
28 |     steps:
29 |       - name: Checkout repository
30 |         uses: actions/checkout@v3
31 | 
32 |       - name: Install dependencies
33 |         run: sudo apt-get update && sudo apt-get install -y g++ libgmp-dev libflint-dev libntl-dev
34 | 
35 |       - name: Install Bazel
36 |         uses: bazel-contrib/setup-bazel@0.14.0
37 | 
38 |       - name: Build examples
39 |         run: |
40 |           export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
41 |           export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
42 |           bazel --ignore_all_rc_files build //example:gcc_builds --action_env=CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH} --action_env=LIBRARY_PATH=${LIBRARY_PATH}
43 | 
44 |   clang-build-and-test:
45 |     runs-on: ubuntu-latest
46 | 
47 |     steps:
48 |       - name: Checkout repository
49 |         uses: actions/checkout@v3
50 | 
51 |       - name: Install dependencies
52 |         run: sudo apt-get update && sudo apt-get install -y clang libgmp-dev libflint-dev libntl-dev
53 | 
54 |       - name: Build project
55 |         run: |
56 |           export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
57 |           export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
58 |           clang++ -x c++ test/pe_test.c -o ./pe_test.out --std=c++20 -O3 -march=native -mtune=native -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0 -DENABLE_EIGEN=0 -DENABLE_GMP=1 -DENABLE_FLINT=0 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_ZMQ=0 -DENABLE_LIBBF=0 -DENABLE_PRIME_COUNT=0 -DENABLE_PRIME_SIEVE=0 -DENABLED_TEST=SMALL,MEDIUM,BIG -DCONTINUOUS_INTEGRATION_TEST
59 | 
60 |       - name: Run tests
61 |         run: ./pe_test.out
62 | 
63 |   msvc-build-and-test:
64 |     runs-on: windows-latest
65 | 
66 |     steps:
67 |       - name: Checkout repository
68 |         uses: actions/checkout@v3
69 | 
70 |       - name: Install dependencies
71 |         run: |
72 |           #Invoke-WebRequest -Uri "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip" -OutFile "eigen.zip"
73 |           #Expand-Archive -Path "eigen.zip" -DestinationPath "C:\eigen"
74 | 
75 |       - uses: ilammy/msvc-dev-cmd@v1.4.1
76 | 
77 |       - name: Build project
78 |         run: |
79 |           cl test\pe_test.c /TP /GS /GL /W3 /Gy /Zc:wchar_t /Zi /Gm- /O2 /Zc:inline /fp:precise /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /errorReport:prompt /WX- /Zc:forScope /Gd /Oi /MT /openmp /std:c++20 /FC /EHsc /nologo /diagnostics:classic  /DENABLE_ASSERT=0 /DTRY_TO_USE_INT128=1 /DENABLE_OPENMP=1 /DENABLE_EIGEN=0 /DENABLE_GMP=0 /DENABLE_FLINT=0 /DENABLE_MPFR=0 /DENABLE_NTL=0 /DENABLE_ZMQ=0 /DENABLE_LIBBF=0 /DENABLE_PRIME_COUNT=0 /DENABLE_PRIME_SIEVE=0 /DENABLED_TEST=SMALL,MEDIUM,BIG /DCONTINUOUS_INTEGRATION_TEST /I "$env:GITHUB_WORKSPACE" /I "C:\eigen\eigen-3.4.0"
80 | 
81 |       - name: Run tests
82 |         run: |
83 |           .\pe_test.exe
84 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: focal
 2 | language: cpp
 3 | env:
 4 |   - BUILD_ARGUMENTS="--std=c++17 -O3 -march=native -mtune=native -lgmpxx -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0 -DENABLE_EIGEN=1 -DENABLE_GMP=1 -DENABLE_FLINT=1 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_LIBBF=0" BUILD_CMD_PUSH="clang++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST -DNO_SUPER_TEST" BUILD_CMD_CRON="clang++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST"
 5 |   - BUILD_ARGUMENTS="--std=c++17 -O3 -march=native -mtune=native -fopenmp -lgmpxx -lflint -lmpfr -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 -DENABLE_EIGEN=1 -DENABLE_GMP=1 -DENABLE_FLINT=1 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_LIBBF=0" BUILD_CMD_PUSH="g++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST -DNO_SUPER_TEST" BUILD_CMD_CRON="g++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST"
 6 | script:
 7 |   - sudo apt-get update
 8 |   - sudo apt-get -y install libeigen3-dev
 9 |   - sudo apt-get -y install libgmp-dev
10 |   - sudo apt-get -y install libflint-dev
11 |   - sudo apt-get -y install libntl-dev
12 |   - export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
13 |   - export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
14 |   - echo ${TRAVIS_EVENT_TYPE}
15 |   - echo && [ "${TRAVIS_EVENT_TYPE}" == "push" ] && ${BUILD_CMD_PUSH} && ./a.out || [ "${TRAVIS_EVENT_TYPE}" != "push" ] && echo "skip push build"
16 |   - echo && [ "${TRAVIS_EVENT_TYPE}" == "cron" ] && ${BUILD_CMD_CRON} && ./a.out || [ "${TRAVIS_EVENT_TYPE}" != "cron" ] && echo "skip cron build"
17 | 


--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
 1 | load("//toolchain:pe_toolchain.bzl", "pe_library")
 2 | 
 3 | package(
 4 |     default_visibility = [
 5 |         "//visibility:public",
 6 |     ],
 7 | )
 8 | 
 9 | [pe_library(name = x + "_lib", srcs = [x]) for x in glob(["pe_*"])]
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PE: C++ Library for Project Euler
  2 | 
  3 | [![Build Status (Windows/MSVC)](https://ci.appveyor.com/api/projects/status/scaji00tde2gb7uy?svg=true)](https://ci.appveyor.com/project/baihacker/pe-win-msvc)
  4 | [![Build Status (Ubuntu/GCC)](https://ci.appveyor.com/api/projects/status/9bt606nax24anyen?svg=true)](https://ci.appveyor.com/project/baihacker/pe-ubuntu-gcc)
  5 | [![Build Status (Ubuntu/Clang)](https://ci.appveyor.com/api/projects/status/nw243uvs95i0bj85?svg=true)](https://ci.appveyor.com/project/baihacker/pe-ubuntu-clang)
  6 | [![Build Status](https://github.com/baihacker/pe/actions/workflows/ci.yml/badge.svg)](https://github.com/baihacker/pe/actions)
  7 | [![GitHub Releases](https://img.shields.io/github/release/baihacker/pe.svg)](https://github.com/baihacker/pe/releases)
  8 | <a href="https://projecteuler.net/recent" target="_blank">![Project Euler](https://projecteuler.net/profile/baihacker.png?)</a>
  9 | 
 10 | **PE** is a C++ library designed to solve problems on [Project Euler](https://projecteuler.net/recent).
 11 | 
 12 | ## Prerequisites
 13 | 
 14 | To use this library, you need a C++ development environment that supports:
 15 | * C++17 or later.
 16 | * Building `x86_64` targets.
 17 | 
 18 | ## Installation
 19 | 
 20 | 1. **Include the Library:**
 21 |    - Place all the library files into a directory of your choice.
 22 |    - Ensure that `#include <pe.hpp>` is by adding the directory to the `CPLUS_INCLUDE_PATH` environment variable.
 23 | 
 24 | 2. **Configure the Library:**
 25 |    - Run **[gen_config.py](https://github.com/baihacker/pe/blob/master/gen_config.py)** from the installation directory to generate **[pe_config](https://github.com/baihacker/pe/blob/master/pe_config)**.
 26 |      - This script generates a static configuration file with default values. You can manually edit this file after generation.
 27 |        - `ENABLE_ASSERT`: Enable assertions for certain inputs or conditions.
 28 |        - `TRY_TO_USE_INT128`: Check if the compiler supports `int128` and enable it. Set to `0` to disable `int128` even if supported.
 29 |      - The script also automatically detects the presence of third-party libraries and sets the appropriate flags:
 30 |        - `ENABLE_EIGEN`: Use [Eigen](http://eigen.tuxfamily.org/index.php?title=Main_Page).
 31 |        - `ENABLE_GMP`: Use [GMP](https://gmplib.org).
 32 |        - `ENABLE_FLINT`: Use [FLINT](http://www.flintlib.org).
 33 |        - `ENABLE_MPFR`: Use [MPFR](https://www.mpfr.org).
 34 |        - `ENABLE_LIBBF`: Use [libbf](https://bellard.org/libbf).
 35 |        - `ENABLE_NTL`: Use [NTL](https://www.shoup.net/ntl/download.html).
 36 |        - `ENABLE_ZMQ`: Use [ZeroMQ](https://zeromq.org/).
 37 |        - `ENABLE_PRIME_COUNT`: Use [PrimeCount](https://github.com/kimwalisch/primecount).
 38 |        - `ENABLE_PRIME_SIEVE`: Use [PrimeSieve](https://github.com/kimwalisch/primesieve).
 39 |        - `ENABLE_TCMALLOC`: Use [tcmalloc](https://github.com/gperftools/gperftools).
 40 |    - Manually edit **[pe_config](https://github.com/baihacker/pe/blob/master/pe_config)** to add or modify configuration items as needed:
 41 |      - `ENABLE_OPENMP`: Enable [OpenMP](http://www.openmp.org). The script doesn't generate the default config for OpenMP.
 42 | 
 43 | 3. **(Optional) Generate Precompiled Header:**
 44 |    - Run `g++ -xc++-header pe.hpp` in the installation directory to create a precompiled header (`pe.hpp.gch`).
 45 |    - You may add additional compiler options if required (e.g., `g++ -xc++-header pe.hpp --std=c++17 -O3 -march=native -fopenmp`).
 46 | 
 47 | ## Usage
 48 | 
 49 | For a quick start, refer to [example.c](https://github.com/baihacker/pe/blob/master/example/example.c).
 50 | 
 51 | ## File List
 52 | 
 53 | - **pe**: Contains all implementation files.
 54 | - **pe.hpp**: Header file for generating the precompiled header. Includes the core library.
 55 | - **pe_algo**: Contains various algorithms.
 56 | - **pe_array**: Array implementation with compile-time and runtime dimension length. Supports element counts exceeding `int32` limits and custom allocators.
 57 | - **pe_base**: Pre-included headers, macros, typedefs, and basic inline functions.
 58 | - **pe_bi32**: Big integer implementation with base `1 << 32`.
 59 | - **pe_bit**: Bit manipulation utilities.
 60 | - **pe_config**: Centralized configuration file for PE.
 61 | - **pe_db**: Load and save pre-calculated results, such as prime pi and prime sum.
 62 | - **pe_dpe**: Distributed computation.
 63 | - **pe_extended_int**: Extended integer types.
 64 | - **pe_extended_signed_int**: Extended signed integer types.
 65 | - **pe_extended_unsigned_int**: Extended unsigned integer types.
 66 | - **pe_fft**: Fast Fourier Transform and polynomial multiplication utilities.
 67 | - **pe_float**: Functions for unified float operations including `__float128`.
 68 | - **pe_fraction**: Fraction arithmetic operations.
 69 | - **pe_gbi**: General big integer operations, corresponding to `pe_nt`.
 70 | - **pe_geometry**: Support for `Point2D` and `Point3D`.
 71 | - **pe_initializer**: Helper classes and macros for library initialization.
 72 | - **pe_int**: Basic integer utilities.
 73 | - **pe_int_algo**: Integer algorithm for extended integer and general big integer.
 74 | - **pe_internal**: Includes configuration, defines necessary types/macros, and third-party libraries.
 75 | - **pe_io**: Methods and macros for simplified or accelerated I/O operations.
 76 | - **pe_mat**: Matrix operations.
 77 | - **pe_memory**: Memory management utilities (Windows only).
 78 | - **pe_misc**: Miscellaneous utility functions.
 79 | - **pe_mma**: Support for MMA: helper methods or classes for MMA code generation.
 80 | - **pe_mod**: Modular arithmetic utilities.
 81 | - **pe_mpf**: Multi-precision floating number based on gmp.
 82 | - **pe_mpz**: Multi-precision integer based on gmp.
 83 | - **pe_nt**: Core number theory utilities.
 84 | - **pe_nt_base**: Prime list generation, integer factorization, prime testing, and computations of φ and μ.
 85 | - **pe_parallel**: Simple framework for multi-threaded problem-solving (Windows only).
 86 | - **pe_parallel_algo**: Parallel algorithms.
 87 | - **pe_persistance**: Key-Value Persistence (may support Linux with adjustments).
 88 | - **pe_poly_algo**: Polynomial algorithms.
 89 | - **pe_poly_base**: Basic polynomial algorithms.
 90 | - **pe_poly_base_flint**: Polynomial algorithms based on FLINT.
 91 | - **pe_poly_base_gmp**: Polynomial algorithms based on gmp.
 92 | - **pe_poly_base_libbf**: Polynomial algorithms based on libbf.
 93 | - **pe_poly_base_min25**: Polynomial algorithms from [Min_25](https://github.com/min-25), including the fastest polynomial multiplication.
 94 | - **pe_poly_base_ntl**: Polynomial algorithms based on NTL.
 95 | - **pe_rand**: Random number generation utilities.
 96 | - **pe_range**: Range implementation.
 97 | - **pe_serialization**: Objects serialization.
 98 | - **pe_span**: Implementation of `Span`.
 99 | - **pe_sym_poly**: Symbolic polynomial operations.
100 | - **pe_time**: Utilities for `TimeDelta` and `TimeRecorder`.
101 | - **pe_tree**: Tree-based data structures.
102 | - **pe_type_traits**: Type trait utilities.
103 | - **pe_vector**: Vector operations.
104 | 


--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baihacker/pe/a149689695a1be4ff987b6b8c5c3297f7f1a86ba/WORKSPACE


--------------------------------------------------------------------------------
/benchmarks/benchmark_20180629.md:
--------------------------------------------------------------------------------
  1 | # Benchmark
  2 | 
  3 | ## Ntt benchmark
  4 | 
  5 | ### Test Environment:
  6 | 
  7 | * OS:  Win10 Pro 1803
  8 | * CPU: i7-4790K (MMX, SSE, SSE2, SSE3, SSE4.1, SSE4.2, EM64T, VT-x, AES, AVX, AVX2, FMA3)
  9 | * Compiler: MinGW-x86_64-8.1.0-win32-seh-rt_v6-rev0
 10 | * Msys2: msys2-x86_64-20190524
 11 | * Test code: [Ntt test](https://github.com/baihacker/pe/blob/master/test/ntt_test.c)
 12 | * Build libbf:
 13 |   > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o libbf.avx2.o libbf.c
 14 |  
 15 |   > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o cutils.avx2.o cutils.c
 16 |  
 17 |   > gcc-ar crv libbf.avx2.a cutils.avx2.o libbf.avx2.o
 18 | * Build test target:
 19 |   > -o a.exe --std=c++11 -O3 -march=native -mtune=native -fopenmp -Wl,--stack,268435456 -lbf -lgmpxx -lflint -lgmp -lmpfr -lmpir
 20 | 
 21 | ### Test result
 22 | 
 23 | #### Openmp enabled
 24 | ```cpp
 25 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
 26 | flint n  : 0.407
 27 | flint p  : 1.156
 28 | ntt32 s  : 1.031
 29 | ntt32 l  : 1.156
 30 | ntt64 s  : 1.578
 31 | ntt64 l  : 1.656
 32 | Min_25 s : 0.156
 33 | Min_25 l : 0.203
 34 | libbf    : 0.828
 35 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
 36 | flint n  : 1.234
 37 | flint p  : 2.890
 38 | ntt32 l  : 2.375
 39 | ntt64 l  : 3.391
 40 | Min_25 l : 0.343
 41 | libbf    : 1.359
 42 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
 43 | flint n  : 0.843
 44 | flint p  : 2.063
 45 | ntt64 l  : 1.671
 46 | Min_25 l : 0.203
 47 | libbf    : 0.859
 48 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
 49 | flint n  : 0.422
 50 | flint p  : 1.234
 51 | ntt32 s  : 0.984
 52 | ntt32 l  : 1.156
 53 | ntt64 s  : 1.515
 54 | ntt64 l  : 1.651
 55 | Min_25 s : 0.141
 56 | Min_25 l : 0.172
 57 | libbf    : 0.813
 58 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
 59 | flint n  : 1.234
 60 | flint p  : 2.750
 61 | ntt32 l  : 2.359
 62 | ntt64 l  : 3.422
 63 | Min_25 l : 0.359
 64 | libbf    : 1.375
 65 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
 66 | flint n  : 0.829
 67 | flint p  : 2.077
 68 | ntt64 l  : 1.703
 69 | Min_25 l : 0.172
 70 | libbf    : 0.859
 71 | 
 72 | mod = 100019
 73 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
 74 | flint n  0.016  0.000  0.000  0.000  0.000  0.000  0.016  0.032  0.079  0.219  0.422
 75 | flint p  0.000  0.000  0.000  0.000  0.016  0.031  0.063  0.140  0.282  0.594  1.219
 76 | ntt32 s  0.000  0.000  0.016  0.000  0.016  0.047  0.062  0.109  0.235  0.484  1.000
 77 | ntt32 l  0.000  0.000  0.000  0.015  0.016  0.047  0.062  0.125  0.281  0.578  1.187
 78 | ntt64 s  0.000  0.000  0.016  0.016  0.031  0.062  0.094  0.172  0.375  0.734  1.531
 79 | ntt64 l  0.000  0.000  0.016  0.015  0.032  0.062  0.094  0.188  0.406  0.813  1.687
 80 | Min_25 s 0.000  0.015  0.000  0.000  0.000  0.016  0.016  0.016  0.032  0.062  0.141
 81 | Min_25 l 0.000  0.000  0.000  0.000  0.000  0.015  0.016  0.032  0.031  0.078  0.172
 82 | libbf    0.000  0.000  0.000  0.000  0.000  0.015  0.047  0.093  0.188  0.406  0.843
 83 | mod = 100000000003
 84 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
 85 | flint n  0.016  0.000  0.000  0.000  0.000  0.016  0.031  0.062  0.140  0.390  0.859
 86 | flint p  0.000  0.000  0.000  0.000  0.016  0.047  0.094  0.203  0.422  0.906  1.907
 87 | ntt32 l  0.000  0.000  0.000  0.015  0.031  0.047  0.078  0.141  0.297  0.594  1.187
 88 | ntt64 l  0.016  0.000  0.000  0.015  0.031  0.063  0.093  0.203  0.391  0.828  1.672
 89 | Min_25 l 0.000  0.000  0.000  0.000  0.016  0.015  0.016  0.031  0.047  0.094  0.265
 90 | libbf    0.000  0.000  0.000  0.000  0.016  0.031  0.047  0.094  0.203  0.421  0.875
 91 | mod = 316227766016779
 92 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
 93 | flint n  0.000  0.000  0.000  0.000  0.000  0.016  0.047  0.110  0.203  0.391  0.828
 94 | flint p  0.000  0.016  0.000  0.016  0.031  0.047  0.125  0.234  0.468  1.016  2.124
 95 | ntt64 l  0.000  0.000  0.016  0.016  0.031  0.062  0.094  0.204  0.406  0.813  1.672
 96 | Min_25 l 0.000  0.000  0.016  0.000  0.000  0.000  0.015  0.016  0.046  0.078  0.172
 97 | libbf    0.000  0.000  0.015  0.000  0.016  0.016  0.047  0.093  0.203  0.437  0.875
 98 | ```
 99 | 
100 | #### Openmp disabled (option "-fopenmp" removed)
101 | ```cpp
102 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
103 | flint n  : 0.406
104 | flint p  : 1.172
105 | ntt32 s  : 2.968
106 | ntt32 l  : 4.531
107 | ntt64 s  : 2.500
108 | ntt64 l  : 5.015
109 | Min_25 s : 0.172
110 | Min_25 l : 0.344
111 | libbf    : 0.812
112 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
113 | flint n  : 1.234
114 | flint p  : 2.906
115 | ntt32 l  : 9.470
116 | ntt64 l  : 10.530
117 | Min_25 l : 0.719
118 | libbf    : 1.359
119 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
120 | flint n  : 0.828
121 | flint p  : 2.078
122 | ntt64 l  : 5.077
123 | Min_25 l : 0.391
124 | libbf    : 0.844
125 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
126 | flint n  : 0.406
127 | flint p  : 1.250
128 | ntt32 s  : 2.970
129 | ntt32 l  : 4.531
130 | ntt64 s  : 2.469
131 | ntt64 l  : 4.952
132 | Min_25 s : 0.156
133 | Min_25 l : 0.328
134 | libbf    : 0.813
135 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
136 | flint n  : 1.218
137 | flint p  : 2.766
138 | ntt32 l  : 9.483
139 | ntt64 l  : 10.517
140 | Min_25 l : 0.734
141 | libbf    : 1.359
142 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
143 | flint n  : 0.828
144 | flint p  : 2.078
145 | ntt64 l  : 5.000
146 | Min_25 l : 0.422
147 | libbf    : 0.844
148 | 
149 | mod = 100019
150 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
151 | flint n  0.000  0.000  0.000  0.016  0.000  0.016  0.015  0.032  0.078  0.219  0.421
152 | flint p  0.000  0.000  0.000  0.016  0.016  0.031  0.062  0.140  0.281  0.578  1.234
153 | ntt32 s  0.000  0.000  0.015  0.016  0.031  0.078  0.141  0.297  0.641  1.390  3.000
154 | ntt32 l  0.000  0.000  0.016  0.031  0.047  0.109  0.219  0.469  0.985  2.093  4.546
155 | ntt64 s  0.000  0.000  0.016  0.000  0.015  0.047  0.125  0.266  0.562  1.188  2.500
156 | ntt64 l  0.000  0.000  0.016  0.031  0.047  0.110  0.250  0.531  1.110  2.343  4.999
157 | Min_25 s 0.000  0.000  0.000  0.000  0.016  0.000  0.000  0.031  0.032  0.062  0.171
158 | Min_25 l 0.000  0.000  0.000  0.000  0.000  0.000  0.015  0.047  0.078  0.172  0.360
159 | libbf    0.000  0.000  0.000  0.015  0.000  0.015  0.047  0.094  0.203  0.406  0.812
160 | mod = 100000000003
161 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
162 | flint n  0.000  0.000  0.000  0.000  0.015  0.016  0.031  0.046  0.125  0.391  0.875
163 | flint p  0.000  0.000  0.015  0.016  0.015  0.047  0.094  0.203  0.422  0.907  1.891
164 | ntt32 l  0.000  0.015  0.016  0.016  0.062  0.109  0.219  0.469  1.000  2.140  4.562
165 | ntt64 l  0.000  0.000  0.000  0.032  0.062  0.125  0.250  0.531  1.125  2.375  5.077
166 | Min_25 l 0.000  0.000  0.000  0.000  0.016  0.016  0.031  0.046  0.093  0.187  0.406
167 | libbf    0.000  0.000  0.000  0.000  0.015  0.031  0.078  0.093  0.203  0.422  0.859
168 | mod = 316227766016779
169 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
170 | flint n  0.000  0.000  0.016  0.000  0.016  0.015  0.047  0.093  0.203  0.391  0.828
171 | flint p  0.000  0.000  0.015  0.000  0.016  0.047  0.109  0.234  0.469  1.015  2.140
172 | ntt64 l  0.000  0.000  0.016  0.031  0.063  0.125  0.265  0.531  1.125  2.406  5.109
173 | Min_25 l 0.000  0.000  0.000  0.000  0.015  0.016  0.031  0.047  0.078  0.188  0.422
174 | libbf    0.000  0.000  0.000  0.016  0.000  0.015  0.047  0.094  0.219  0.437  0.859
175 | ```
176 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_20190918.md:
--------------------------------------------------------------------------------
  1 | # Benchmark
  2 | 
  3 | ## Ntt benchmark
  4 | 
  5 | ### Test Environment:
  6 | 
  7 | * Date： 2019.09.18
  8 | * OS:  Win10 Pro 1903 (18362.356)
  9 | * CPU: i9-9900K (MMX, SSE, SSE2, SSE3, SSE4.1, SSE4.2, EM64T, VT-x, AES, AVX, AVX2, FMA3, TSX)
 10 | * Compiler: MinGW-x86_64-8.1.0-win32-seh-rt_v6-rev0
 11 | * Msys2: msys2-x86_64-20190524
 12 | * Test code: [Ntt test](https://github.com/baihacker/pe/blob/master/test/ntt_test.c)
 13 | * Build libbf:
 14 |   > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o libbf.avx2.o libbf.c
 15 |  
 16 |   > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o cutils.avx2.o cutils.c
 17 |  
 18 |   > gcc-ar crv libbf.avx2.a cutils.avx2.o libbf.avx2.o
 19 | * Build test target:
 20 |   > -o a.exe --std=c++14 -fno-diagnostics-color -O3 -march=native -mtune=native -fopenmp -Wl,--stack,268435456 -static -s -lbf -lgmpxx -lflint -lgmp -lmpfr -lmpir
 21 | 
 22 | ### Test result
 23 | 
 24 | #### Openmp enabled
 25 | ```cpp
 26 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
 27 | flint n  : 0.306
 28 | flint p  : 1.029
 29 | ntt32 s  : 0.816
 30 | ntt32 l  : 0.870
 31 | ntt64 s  : 1.376
 32 | ntt64 l  : 1.422
 33 | Min_25 s : 0.114
 34 | Min_25 l : 0.125
 35 | libbf    : 0.635
 36 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
 37 | flint n  : 0.919
 38 | flint p  : 2.633
 39 | ntt32 l  : 1.743
 40 | ntt64 l  : 2.857
 41 | Min_25 l : 0.255
 42 | libbf    : 1.084
 43 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
 44 | flint n  : 0.623
 45 | flint p  : 1.869
 46 | ntt64 l  : 1.429
 47 | Min_25 l : 0.133
 48 | libbf    : 0.682
 49 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
 50 | flint n  : 0.313
 51 | flint p  : 1.087
 52 | ntt32 s  : 0.813
 53 | ntt32 l  : 0.878
 54 | ntt64 s  : 1.354
 55 | ntt64 l  : 1.419
 56 | Min_25 s : 0.111
 57 | Min_25 l : 0.125
 58 | libbf    : 0.645
 59 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
 60 | flint n  : 0.922
 61 | flint p  : 2.502
 62 | ntt32 l  : 1.743
 63 | ntt64 l  : 2.839
 64 | Min_25 l : 0.253
 65 | libbf    : 1.088
 66 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
 67 | flint n  : 0.624
 68 | flint p  : 1.873
 69 | ntt64 l  : 1.406
 70 | Min_25 l : 0.133
 71 | libbf    : 0.682
 72 | 
 73 | mod = 100019
 74 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
 75 | flint n  0.000  0.000  0.001  0.001  0.003  0.006  0.012  0.028  0.059  0.160  0.318
 76 | flint p  0.001  0.001  0.003  0.006  0.014  0.028  0.058  0.123  0.253  0.532  1.097
 77 | ntt32 s  0.001  0.003  0.003  0.007  0.015  0.032  0.048  0.101  0.201  0.403  0.819
 78 | ntt32 l  0.001  0.002  0.004  0.008  0.016  0.032  0.051  0.110  0.223  0.427  0.877
 79 | ntt64 s  0.001  0.002  0.006  0.012  0.025  0.055  0.081  0.165  0.338  0.677  1.376
 80 | ntt64 l  0.001  0.003  0.006  0.012  0.026  0.055  0.081  0.173  0.353  0.704  1.413
 81 | Min_25 s 0.000  0.000  0.001  0.001  0.002  0.002  0.006  0.014  0.027  0.054  0.112
 82 | Min_25 l 0.001  0.001  0.000  0.001  0.003  0.006  0.010  0.016  0.030  0.059  0.128
 83 | libbf    0.000  0.001  0.002  0.005  0.009  0.022  0.048  0.072  0.149  0.310  0.645
 84 | mod = 100000000003
 85 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
 86 | flint n  0.001  0.000  0.001  0.002  0.005  0.010  0.021  0.045  0.099  0.301  0.645
 87 | flint p  0.001  0.002  0.005  0.010  0.021  0.043  0.092  0.190  0.397  0.828  1.727
 88 | ntt32 l  0.001  0.002  0.004  0.008  0.017  0.035  0.053  0.111  0.217  0.439  0.869
 89 | ntt64 l  0.001  0.003  0.005  0.012  0.027  0.057  0.085  0.174  0.352  0.705  1.431
 90 | Min_25 l 0.001  0.001  0.001  0.002  0.004  0.008  0.013  0.020  0.034  0.062  0.133
 91 | libbf    0.001  0.001  0.002  0.004  0.010  0.024  0.045  0.077  0.159  0.330  0.682
 92 | mod = 316227766016779
 93 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
 94 | flint n  0.000  0.000  0.001  0.002  0.005  0.012  0.029  0.072  0.152  0.298  0.625
 95 | flint p  0.001  0.002  0.006  0.011  0.022  0.049  0.104  0.208  0.430  0.910  1.910
 96 | ntt64 l  0.001  0.003  0.006  0.013  0.027  0.056  0.085  0.173  0.350  0.707  1.434
 97 | Min_25 l 0.000  0.001  0.001  0.002  0.003  0.008  0.013  0.020  0.034  0.066  0.157
 98 | libbf    0.000  0.001  0.002  0.004  0.010  0.024  0.051  0.077  0.159  0.333  0.692
 99 | ```
100 | 
101 | #### Openmp disabled (option "-fopenmp" removed)
102 | ```cpp
103 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
104 | flint n  : 0.303
105 | flint p  : 1.063
106 | ntt32 s  : 2.720
107 | ntt32 l  : 4.035
108 | ntt64 s  : 2.284
109 | ntt64 l  : 4.605
110 | Min_25 s : 0.121
111 | Min_25 l : 0.264
112 | libbf    : 0.638
113 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
114 | flint n  : 0.932
115 | flint p  : 2.630
116 | ntt32 l  : 8.624
117 | ntt64 l  : 9.764
118 | Min_25 l : 0.617
119 | libbf    : 1.083
120 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
121 | flint n  : 0.628
122 | flint p  : 1.884
123 | ntt64 l  : 4.664
124 | Min_25 l : 0.341
125 | libbf    : 0.682
126 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
127 | flint n  : 0.312
128 | flint p  : 1.092
129 | ntt32 s  : 2.703
130 | ntt32 l  : 4.038
131 | ntt64 s  : 2.254
132 | ntt64 l  : 4.560
133 | Min_25 s : 0.121
134 | Min_25 l : 0.262
135 | libbf    : 0.650
136 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
137 | flint n  : 0.929
138 | flint p  : 2.527
139 | ntt32 l  : 8.570
140 | ntt64 l  : 9.727
141 | Min_25 l : 0.648
142 | libbf    : 1.095
143 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
144 | flint n  : 0.630
145 | flint p  : 1.885
146 | ntt64 l  : 4.601
147 | Min_25 l : 0.332
148 | libbf    : 0.687
149 | 
150 | mod = 100019
151 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
152 | flint n  0.000  0.001  0.000  0.001  0.002  0.005  0.011  0.028  0.060  0.159  0.317
153 | flint p  0.001  0.001  0.003  0.006  0.014  0.029  0.060  0.123  0.251  0.534  1.094
154 | ntt32 s  0.001  0.003  0.006  0.014  0.030  0.064  0.137  0.288  0.607  1.287  2.722
155 | ntt32 l  0.002  0.004  0.009  0.022  0.045  0.096  0.204  0.430  0.901  1.910  4.044
156 | ntt64 s  0.001  0.002  0.006  0.011  0.025  0.053  0.113  0.244  0.516  1.082  2.438
157 | ntt64 l  0.003  0.005  0.012  0.024  0.054  0.115  0.247  0.525  1.096  2.230  4.652
158 | Min_25 s 0.000  0.000  0.001  0.001  0.001  0.003  0.007  0.014  0.029  0.058  0.132
159 | Min_25 l 0.000  0.001  0.000  0.001  0.003  0.006  0.014  0.029  0.061  0.131  0.267
160 | libbf    0.001  0.000  0.002  0.004  0.009  0.019  0.044  0.073  0.150  0.311  0.649
161 | mod = 100000000003
162 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
163 | flint n  0.001  0.000  0.001  0.001  0.005  0.009  0.021  0.046  0.098  0.302  0.651
164 | flint p  0.001  0.002  0.005  0.010  0.022  0.045  0.092  0.190  0.400  -0.662 1.775
165 | ntt32 l  0.002  0.005  0.010  0.021  0.046  0.097  0.206  0.438  0.915  2.034  4.193
166 | ntt64 l  0.002  0.005  0.011  0.025  0.052  0.118  0.247  0.500  1.050  2.217  4.675
167 | Min_25 l 0.000  0.000  0.001  0.001  0.003  0.009  0.018  0.039  0.076  0.158  0.328
168 | libbf    0.001  0.001  0.002  0.005  0.009  0.021  0.047  0.077  0.159  0.328  0.688
169 | mod = 316227766016779
170 | log2(n)  10     11     12     13     14     15     16     17     18     19     20
171 | flint n  0.000  0.001  0.001  0.002  0.005  0.012  0.029  0.072  0.153  0.299  0.630
172 | flint p  0.002  0.003  0.005  0.011  0.023  0.048  0.105  0.213  0.437  0.933  1.912
173 | ntt64 l  0.002  0.005  0.012  0.024  0.052  0.110  0.237  0.499  1.054  2.224  4.694
174 | Min_25 l 0.001  0.001  0.001  0.002  0.004  0.009  0.018  0.038  0.079  0.166  0.344
175 | libbf    0.001  0.001  0.002  0.004  0.010  0.023  0.047  0.076  0.159  0.335  0.699
176 | ```
177 | 


--------------------------------------------------------------------------------
/benchmarks/format_test_result.py:
--------------------------------------------------------------------------------
 1 | #! python
 2 | #-*- coding: utf8 -*-
 3 | import os
 4 | import pprint
 5 | 
 6 | 
 7 | def load_db_data(file):
 8 |   file = file.decode(encoding='utf8', errors='ignore').encode(encoding='gbk',
 9 |                                                               errors='ignore')
10 |   if os.path.exists(file):
11 |     with open(file, 'rb') as tempf:
12 |       result = tempf.read()
13 |       return True, result
14 |   return False, ''
15 | 
16 | 
17 | def load_db():
18 |   data = load_db_data('perf_test_result.txt')[1]
19 |   return eval(data)
20 | 
21 | 
22 | class MyPrettyPrinter(pprint.PrettyPrinter):
23 | 
24 |   def format(self, object, context, maxlevels, level):
25 |     return pprint.PrettyPrinter.format(self, object, context, maxlevels, level)
26 | 
27 | 
28 | def export(db, file):
29 |   with open(file, 'w') as tempf:
30 |     pp = MyPrettyPrinter(stream=tempf)
31 |     pp.pprint(db)
32 |     #tempf.write(str(db))
33 | 
34 | 
35 | if __name__ == '__main__':
36 |   db = load_db()
37 |   #export(db, "formated.txt")


--------------------------------------------------------------------------------
/build_all.bat:
--------------------------------------------------------------------------------
1 | bazel clean && bazel build //test:all //example:all //:all


--------------------------------------------------------------------------------
/example/BUILD:
--------------------------------------------------------------------------------
 1 | load("//toolchain:pe_toolchain.bzl", "pe_binary")
 2 | 
 3 | package(
 4 |     default_visibility = [
 5 |         "//visibility:public",
 6 |     ],
 7 | )
 8 | 
 9 | [pe_binary(name = x[:-2], srcs = [x]) for x in glob(["*.c"])]
10 | [pe_binary(name = x[:-2] + "_gcc",
11 |            srcs = [x],
12 |            executable_suffix = ".out",
13 |            enable_pe_flags = False,
14 |            copts = [
15 |              "-std=c++17",
16 |              "-Wno-delete-incomplete",
17 |              "-Wno-shift-count-overflow",
18 |              "-O2",
19 |              "-march=native",
20 |              "-mtune=native",
21 |              "-fopenmp"],
22 |            defines = [
23 |              "ENABLE_ASSERT=0",
24 |              "TRY_TO_USE_INT128=1",
25 |              "ENABLE_OPENMP=1",
26 |              "ENABLE_EIGEN=0",
27 |              "ENABLE_GMP=1",
28 |              "ENABLE_FLINT=1",
29 |              "ENABLE_MPFR=1",
30 |              "ENABLE_NTL=1",
31 |              "ENABLE_ZMQ=0",
32 |              "ENABLE_LIBBF=0",
33 |              "ENABLE_PRIME_COUNT=0",
34 |              "ENABLE_PRIME_SIEVE=0",
35 |              "TEST_ALL",
36 |              "CONTINUOUS_INTEGRATION_TEST",
37 |              "NO_SUPER_TEST"],
38 |            linkopts = [
39 |              "-fopenmp",
40 |              "-lflint",
41 |              "-lmpfr",
42 |              "-lntl",
43 |              "-lgmp"],
44 |            cc_path = "g++",
45 |            ) for x in glob(["*.c"])]
46 | 
47 | filegroup(name ="gcc_builds", srcs = [x[:-2] + "_gcc" for x in glob(["*.c"])])


--------------------------------------------------------------------------------
/example/bi_example_pe483.c:
--------------------------------------------------------------------------------
 1 | 
 2 | // Project Euler 483	Repeated permutation
 3 | // reference answer:
 4 | // N = 100:           53817203945.52453
 5 | // output:            53817203945
 6 | // N = 150:           55335570173801.14
 7 | // output:            55335570173801
 8 | #include <pe.hpp>
 9 | using namespace pe;
10 | const int N = 100;
11 | BigInteger choose[505][505];
12 | BigInteger fac[501];
13 | void init() {
14 |   for (int i = 0; i <= 500; ++i)
15 |     for (int j = 0; j <= i; ++j)
16 |       choose[i][j] =
17 |           (j == i || j == 0) ? 1 : choose[i - 1][j] + choose[i - 1][j - 1];
18 |   fac[0] = 1;
19 |   for (int i = 1; i <= 500; ++i) fac[i] = fac[i - 1] * i;
20 | }
21 | BigInteger dp[N + 1];
22 | std::map<int128, BigInteger> orz[N + 1];
23 | int main() {
24 |   init();
25 |   dp[0] = 1;
26 |   orz[0][1] = 1;
27 |   for (int i = 1; i <= N; ++i) {
28 |     std::cerr << i << " ";
29 |     for (int j = N; j >= i; --j) {
30 |       BigInteger total = 0;
31 |       std::map<int128, BigInteger> inc;
32 |       const int curr_step = i;
33 |       const int n = j;
34 |       for (int x = 1; x * curr_step <= n; ++x) {
35 |         BigInteger t = 1, u = 1;
36 |         for (int i = 0, j = n; i < x; ++i) {
37 |           t = t * choose[j][curr_step];
38 |           j -= curr_step;
39 |           u = u * fac[curr_step - 1];
40 |         }
41 |         BigInteger magic = t / fac[x] * u;
42 |         total += magic * dp[n - curr_step * x];
43 |         foreach (it, orz[n - x * curr_step]) {
44 |           int128 d = Gcd((int128)curr_step, it.first);
45 |           int128 now = curr_step / d * it.first;
46 |           inc[now] += it.second * magic;
47 |         }
48 |       }
49 |       dp[n] += total;
50 |       foreach (it, inc) orz[n][it.first] += it.second;
51 |     }
52 |     std::cerr << dp[N] << std::endl;
53 |   }
54 | 
55 |   std::cerr << dp[N] << std::endl;
56 |   BigInteger s = 0;
57 |   foreach (it, orz[N])
58 |     s += BigInteger(it.first) * BigInteger(it.first) * it.second;
59 |   std::cerr << s / dp[N] << std::endl;
60 |   return 0;
61 | }
62 | 


--------------------------------------------------------------------------------
/example/billion_sort.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | #if OS_TYPE_WIN
 5 | const int64 N = 1000000000;
 6 | LargeMemory lm;
 7 | 
 8 | int main() {
 9 |   float* data = reinterpret_cast<float*>(lm.Allocate(N * sizeof(float)));
10 |   dbg("memory ready");
11 | 
12 |   for (int i = 0; i < N; ++i) data[i] = 1. * rand() / RAND_MAX;
13 |   dbg("data ready");
14 | 
15 |   TimeRecorder tr;
16 |   ParallelSort<30>(data, data + N);
17 |   // std::sort(data, data+N);
18 |   dbg("sorted");
19 | 
20 |   std::cerr << tr.Elapsed().Format() << std::endl;
21 |   return 0;
22 | }
23 | #else
24 | int main() { return 0; }
25 | #endif


--------------------------------------------------------------------------------
/example/bit.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | using namespace std;
 4 | 
 5 | int main() {
 6 |   TableFormatter tf;
 7 |   auto& line = tf.AppendLine();
 8 |   line.push_back("n");
 9 |   line.push_back("BitWidth");
10 |   line.push_back("BitFloor");
11 |   line.push_back("BitCeil");
12 |   line.push_back("HighestBitIndex");
13 |   line.push_back("HighestBit");
14 |   line.push_back("LowestBitIndex");
15 |   line.push_back("LowestBit");
16 |   for (int n = 0; n < 32; ++n) {
17 |     auto& line = tf.AppendLine();
18 |     line.push_back(ToString(n));
19 |     line.push_back(ToString(BitWidth(n)));
20 |     line.push_back(ToString(BitFloor(n)));
21 |     line.push_back(ToString(BitCeil(n)));
22 |     line.push_back(ToString(HighestBitIndex(n)));
23 |     line.push_back(ToString(HighestBit(n)));
24 |     line.push_back(ToString(LowestBitIndex(n)));
25 |     line.push_back(ToString(LowestBit(n)));
26 |   }
27 |   tf.Render(std::cout);
28 |   return 0;
29 | }
30 | 
31 | /*
32 | n     BitWidth    BitFloor    BitCeil    HighestBitIndex    HighestBit    LowestBitIndex    LowestBit
33 | 0     0           0           1          -1                 0             -1                0
34 | 1     1           1           1          0                  1             0                 1
35 | 2     2           2           2          1                  2             1                 2
36 | 3     2           2           4          1                  2             0                 1
37 | 4     3           4           4          2                  4             2                 4
38 | 5     3           4           8          2                  4             0                 1
39 | 6     3           4           8          2                  4             1                 2
40 | 7     3           4           8          2                  4             0                 1
41 | 8     4           8           8          3                  8             3                 8
42 | 9     4           8           16         3                  8             0                 1
43 | 10    4           8           16         3                  8             1                 2
44 | 11    4           8           16         3                  8             0                 1
45 | 12    4           8           16         3                  8             2                 4
46 | 13    4           8           16         3                  8             0                 1
47 | 14    4           8           16         3                  8             1                 2
48 | 15    4           8           16         3                  8             0                 1
49 | 16    5           16          16         4                  16            4                 16
50 | 17    5           16          32         4                  16            0                 1
51 | 18    5           16          32         4                  16            1                 2
52 | 19    5           16          32         4                  16            0                 1
53 | 20    5           16          32         4                  16            2                 4
54 | 21    5           16          32         4                  16            0                 1
55 | 22    5           16          32         4                  16            1                 2
56 | 23    5           16          32         4                  16            0                 1
57 | 24    5           16          32         4                  16            3                 8
58 | 25    5           16          32         4                  16            0                 1
59 | 26    5           16          32         4                  16            1                 2
60 | 27    5           16          32         4                  16            0                 1
61 | 28    5           16          32         4                  16            2                 4
62 | 29    5           16          32         4                  16            0                 1
63 | 30    5           16          32         4                  16            1                 2
64 | 31    5           16          32         4                  16            0                 1
65 | */


--------------------------------------------------------------------------------
/example/build_each.bat:
--------------------------------------------------------------------------------
1 | build_each.py
2 | pause


--------------------------------------------------------------------------------
/example/build_each.py:
--------------------------------------------------------------------------------
 1 | #! python3
 2 | # -*- coding: UTF-8 -*-
 3 | import os
 4 | import sys
 5 | import time
 6 | 
 7 | CURRENT_DIRECTORY = os.getcwd()
 8 | 
 9 | 
10 | def DurationPartsFromNs(duration):
11 |   min_part = duration // 1000000000 // 60
12 |   sec_part = duration // 1000000000 % 60
13 |   millisec_part = duration // 1000000 % 1000
14 |   return (min_part, sec_part, millisec_part)
15 | 
16 | 
17 | def FormatNs(duration):
18 |   return '%d:%02d.%03d' % DurationPartsFromNs(duration)
19 | 
20 | 
21 | def main():
22 |   ret = 0
23 |   for file in os.listdir(CURRENT_DIRECTORY):
24 |     _, file_ext_name = os.path.splitext(file)
25 |     if file_ext_name != '.c':
26 |       continue
27 |     print('Compile %s' % file)
28 |     start_time = time.perf_counter_ns()
29 |     ret = os.system('pe++.py %s -hc' % file)
30 |     time_usage = FormatNs(time.perf_counter_ns() - start_time)
31 |     print('Done, return code = %d, time usage = %s' % (ret, time_usage))
32 |     print()
33 |     if ret != 0:
34 |       print('Failed to compile %s' % file)
35 |       break
36 |   if os.path.exists('a.exe'):
37 |     os.remove('a.exe')
38 |   return ret
39 | 
40 | 
41 | if __name__ == '__main__':
42 |   sys.exit(main())
43 | 


--------------------------------------------------------------------------------
/example/continued_fraction.c:
--------------------------------------------------------------------------------
 1 | #include "pe.hpp"
 2 | using namespace pe;
 3 | 
 4 | template <typename T>
 5 | void demo() {
 6 |   std::vector<int> data = {1, 2, 2, 2, 2, 2, 2, 2, 2, 2};
 7 |   for (int i = 0; i < 10; ++i) {
 8 |     std::cout << FromCf<T>(data, i) << std::endl;
 9 |   }
10 |   std::cout << FromCfN<T>(data) << std::endl;
11 |   for (int i = 50; i <= 50; ++i) {
12 |     std::cout << i << " " << ToCf<T>(0, 1, 6, 1, i) << std::endl;
13 |   }
14 | 
15 |   std::cout << ToCf<T>(0, 1, 2, 1, 10) << std::endl;
16 |   std::cout << FromCf<T>(ToCf<T>(0, 1, 2, 1, 50)) << std::endl;
17 | 
18 |   std::cout << ToCf<T>(123456, 654321) << std::endl;
19 |   std::cout << FromCf<T>(ToCf<T>(123456, 654321)) << std::endl;
20 | }
21 | 
22 | int main() {
23 |   demo<BigInteger>();
24 | #if ENABLE_GMP
25 |   demo<MpInteger>();
26 | #endif
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/example/count_carlitz_words.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | // Given the number of each letter, find the number of words without adjacent
 5 | // letter of the same
 6 | 
 7 | int brute_force(const std::vector<int64>& vec) {
 8 |   int size = 0;
 9 |   for (auto& iter : vec) size += iter;
10 | 
11 |   std::vector<int> data;
12 |   int id = 0;
13 |   for (auto iter : vec) {
14 |     for (int j = 0; j < iter; ++j) data.push_back(id);
15 |     ++id;
16 |   }
17 | 
18 |   int ret = 0;
19 |   do {
20 |     int ok = 1;
21 |     for (int i = 0; i < size - 1; ++i) {
22 |       int idx = (i + 1) % size;
23 |       if (i != idx && data[i] == data[idx]) {
24 |         ok = 0;
25 |         break;
26 |       }
27 |     }
28 |     ret += ok;
29 |   } while (next_permutation(data.begin(), data.end()));
30 |   return ret;
31 | }
32 | 
33 | int main() {
34 |   CarlitzWordsCounter counter(1000000007, 1000000);
35 | 
36 |   std::vector<std::vector<int64>> test_data = {
37 |       {1},    {2},          {1, 1},    {1, 3},       {2, 2},    {3, 7},
38 |       {2, 3}, {2, 2, 2},    {2, 3, 3}, {3, 3, 2, 2}, {3, 3, 3}, {2, 2, 2, 2, 2},
39 |       {4, 4}, {4, 4, 2, 2}, {5, 5, 5},
40 |   };
41 | 
42 |   for (const std::vector<int64>& iter : test_data) {
43 |     std::cout << brute_force(iter) << " " << counter.Cal(iter) << std::endl;
44 |   }
45 |   return 0;
46 | }


--------------------------------------------------------------------------------
/example/dfa_counter.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | // Count the number which contains 3.
 5 | 
 6 | int64 cal(int64 n) {
 7 |   for (; n; n /= 10)
 8 |     if (n % 10 == 3) return 1;
 9 |   return 0;
10 | }
11 | 
12 | int64 bf(int64 n) { return XRange<int64>(1LL, n).Map<int64>(cal).Sum(); }
13 | 
14 | int main() {
15 |   PE_INIT(maxp = 1000000);
16 | 
17 |   // state 0: initial state
18 |   // state 1: 3 is not seen
19 |   // state 2: 3 is seen
20 |   DfaCounter<int64> counter1;
21 |   counter1.Init(3, 10, 16);
22 |   for (int i = 1; i <= 9; ++i)
23 |     if (i != 3) counter1.AddTrans(0, i, 1);
24 |   counter1.AddTrans(0, 3, 2);
25 |   counter1.AddTrans(0, 0, 0);
26 |   for (int i = 0; i <= 9; ++i)
27 |     if (i != 3) counter1.AddTrans(1, i, 1);
28 |   counter1.AddTrans(1, 3, 2);
29 |   for (int i = 0; i <= 9; ++i) counter1.AddTrans(2, i, 2);
30 |   counter1.MarkTargetState(2);
31 | 
32 |   // In counter2, the dfa doesn't accept leading zeros, in other words, if it's
33 |   // initial state and 0 comes, the targe state is invalid. So we need to call
34 |   // set_count_each_len(1).
35 |   //
36 |   // state 0: initial state
37 |   // state 1: 3 is not seen
38 |   // state 2: 3 is seen
39 |   // state 3: invalid
40 |   DfaCounter<int64> counter2;
41 |   counter2.Init(4, 10, 16);
42 |   for (int i = 1; i <= 9; ++i)
43 |     if (i != 3) counter2.AddTrans(0, i, 1);
44 |   counter2.AddTrans(0, 3, 2);
45 |   for (int i = 0; i <= 9; ++i)
46 |     if (i != 3) counter2.AddTrans(1, i, 1);
47 |   counter2.AddTrans(1, 3, 2);
48 |   for (int i = 0; i <= 9; ++i) counter2.AddTrans(2, i, 2);
49 |   counter2.MarkTargetState(2);
50 |   counter2.AddTrans(0, 0, 3);
51 |   for (int i = 0; i <= 9; ++i) counter2.AddTrans(3, i, 3);
52 |   counter2.set_count_each_len(1);
53 | 
54 |   for (int64 n = 10; n <= 1000000; n *= 10) {
55 |     auto a = bf(n);
56 |     auto b = counter1.Cal(n);
57 |     auto c = counter2.Cal(n);
58 |     std::cout << n << "\t" << a << "\t" << b << "\t" << c << std::endl;
59 |   }
60 |   return 0;
61 | }


--------------------------------------------------------------------------------
/example/dfa_summer.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | // Compute the 3rd power of the number which contains 3.
 5 | const int64 mod = 1000000007;
 6 | using MT = NMod64<mod>;
 7 | 
 8 | int64 cal(int64 n) {
 9 |   const int64 m = n;
10 |   for (; n; n /= 10)
11 |     if (n % 10 == 3) return m * m % mod * m % mod;
12 |   return 0;
13 | }
14 | 
15 | int64 bf(int64 n) { return XRange<int64>(1LL, n).Map<int64>(cal).SumMod(mod); }
16 | 
17 | int main() {
18 |   PE_INIT(maxp = 1000000);
19 | 
20 |   // state 0: initial state
21 |   // state 1: 3 is not seen
22 |   // state 2: 3 is seen
23 |   DfaSummer<MT> summer1;
24 |   summer1.Init(3, 3, 10, 16);
25 |   for (int i = 1; i <= 9; ++i)
26 |     if (i != 3) summer1.AddTrans(0, i, 1);
27 |   summer1.AddTrans(0, 3, 2);
28 |   summer1.AddTrans(0, 0, 0);
29 |   for (int i = 0; i <= 9; ++i)
30 |     if (i != 3) summer1.AddTrans(1, i, 1);
31 |   summer1.AddTrans(1, 3, 2);
32 |   for (int i = 0; i <= 9; ++i) summer1.AddTrans(2, i, 2);
33 |   summer1.MarkTargetState(2);
34 | 
35 |   // In summer2, the dfa doesn't accept leading zeros, in other words, if it's
36 |   // initial state and 0 comes, the targe state is invalid. So we need to call
37 |   // set_count_each_len(1).
38 |   //
39 |   // state 0: initial state
40 |   // state 1: 3 is not seen
41 |   // state 2: 3 is seen
42 |   // state 3: invalid
43 |   DfaSummer<MT> summer2;
44 |   summer2.Init(4, 3, 10, 16);
45 |   for (int i = 1; i <= 9; ++i)
46 |     if (i != 3) summer2.AddTrans(0, i, 1);
47 |   summer2.AddTrans(0, 3, 2);
48 |   for (int i = 0; i <= 9; ++i)
49 |     if (i != 3) summer2.AddTrans(1, i, 1);
50 |   summer2.AddTrans(1, 3, 2);
51 |   for (int i = 0; i <= 9; ++i) summer2.AddTrans(2, i, 2);
52 |   summer2.MarkTargetState(2);
53 |   summer2.AddTrans(0, 0, 3);
54 |   for (int i = 0; i <= 9; ++i) summer2.AddTrans(3, i, 3);
55 |   summer2.set_count_each_len(1);
56 | 
57 |   for (int64 n = 10; n <= 1000000; n *= 10) {
58 |     auto a = bf(n);
59 |     auto b = summer1.Cal(n);
60 |     auto c = summer2.Cal(n);
61 |     std::cout << n << "\t" << a << "\t" << b << "\t" << c << std::endl;
62 |   }
63 |   return 0;
64 | }


--------------------------------------------------------------------------------
/example/dva.c:
--------------------------------------------------------------------------------
  1 | #include <pe.hpp>
  2 | using namespace std;
  3 | using namespace pe;
  4 | 
  5 | const int64 mod = 1000000007;
  6 | using MT = NMod64<mod>;
  7 | 
  8 | // https://en.wikipedia.org/wiki/Arithmetic_function#Relations_among_the_functions
  9 | void ConvolutionExamples() {
 10 |   const int64 n = 10000000;
 11 | 
 12 |   auto eps = MakePrefixSumEpsilon<MT>(n);
 13 |   auto one = MakePrefixSumOne<MT>(n);
 14 |   auto mu = MakePrefixSumMu<MT>(n);
 15 |   auto id = MakePrefixSumId<MT>(n);
 16 |   auto phi = MakePrefixSumPhi<MT>(n);
 17 |   {
 18 |     // eps = one * mu
 19 |     auto t = DVAConv<MT>(one, mu);
 20 |     for (int i = 1; i < t.key_size; ++i) {
 21 |       PE_ASSERT(t.values[i] == eps.values[i]);
 22 |       // cout << t.values[i] << " " << eps.values[i] << endl;
 23 |       // cout << (t.values[i] == eps.values[i]) << endl;
 24 |     }
 25 |   }
 26 |   {
 27 |     // phi = mu * id
 28 |     auto t = DVAConv<MT>(mu, id);
 29 |     for (int i = 1; i < t.key_size; ++i) {
 30 |       PE_ASSERT(t.values[i] == phi.values[i]);
 31 |     }
 32 |   }
 33 |   {
 34 |     // id = one * phi
 35 |     auto t = DVAConv<MT>(one, phi);
 36 |     for (int i = 1; i < t.key_size; ++i) {
 37 |       PE_ASSERT(t.values[i] == id.values[i]);
 38 |     }
 39 |   }
 40 | 
 41 |   // Divisor count
 42 |   DVA<MT> d0(n);
 43 |   {
 44 |     for (int i = 1; i <= n; ++i) {
 45 |       int64 me = 1;
 46 |       for (auto iter : Factorize(i)) me *= iter.second + 1;
 47 |       d0[i] += me;
 48 |     }
 49 |     for (int i = 1; i < d0.key_size; ++i) d0.values[i] += d0.values[i - 1];
 50 |   }
 51 |   {
 52 |     // d0 = one * one
 53 |     // d0 = id0 * one
 54 |     auto t = DVAConv<MT>(one, one);
 55 |     for (int i = 1; i < t.key_size; ++i) {
 56 |       PE_ASSERT(t.values[i] == d0.values[i]);
 57 |     }
 58 |   }
 59 | 
 60 |   // Divisor sum
 61 |   DVA<MT> d1(n);
 62 |   {
 63 |     for (int i = 1; i <= n; ++i) {
 64 |       int64 me = 0;
 65 |       for (auto iter : GetFactors(i)) me += iter;
 66 |       d1[i] += me;
 67 |     }
 68 |     for (int i = 1; i < d1.key_size; ++i) d1.values[i] += d1.values[i - 1];
 69 |   }
 70 |   {
 71 |     // d1 = id * one
 72 |     // d1 = id1 * one
 73 |     auto t = DVAConv<MT>(id, one);
 74 |     for (int i = 1; i < t.key_size; ++i) {
 75 |       PE_ASSERT(t.values[i] == d1.values[i]);
 76 |     }
 77 |   }
 78 | 
 79 |   // Divisor square sum
 80 |   DVA<MT> d2(n);
 81 |   {
 82 |     for (int i = 1; i <= n; ++i) {
 83 |       int64 me = 0;
 84 |       for (auto iter : GetFactors(i)) me += iter * iter;
 85 |       d2[i] += me;
 86 |     }
 87 |     for (int i = 1; i < d2.key_size; ++i) d2.values[i] += d2.values[i - 1];
 88 |   }
 89 |   DVA<MT> id2(n);
 90 |   {
 91 |     for (int i = 1; i < id2.key_size; ++i) {
 92 |       id2.values[i] = P2SumMod(id2.keys[i], mod);
 93 |     }
 94 |   }
 95 |   {
 96 |     // d2 = id2 * one
 97 |     auto t = DVAConv<MT>(id2, one);
 98 |     for (int i = 1; i < t.key_size; ++i) {
 99 |       PE_ASSERT(t.values[i] == d2.values[i]);
100 |     }
101 |   }
102 | 
103 |   // Prime omega
104 |   // https://en.wikipedia.org/wiki/Prime_omega_function
105 |   DVA<MT> omega(n);
106 |   {
107 |     for (int i = 1; i <= n; ++i) {
108 |       omega[i] += Factorize(i).size();
109 |     }
110 |     for (int i = 1; i < omega.key_size; ++i)
111 |       omega.values[i] += omega.values[i - 1];
112 |   }
113 |   {
114 |     // omega = primeq * one
115 |     auto t = DVAConv<MT>(PrimeS0<MT>(n), one);
116 |     for (int i = 1; i < t.key_size; ++i) {
117 |       PE_ASSERT(t.values[i] == omega.values[i]);
118 |     }
119 |   }
120 | }
121 | 
122 | int main() {
123 |   PE_INIT(maxp = 10000000, cal_phi = 1, cal_mu = 1);
124 |   ConvolutionExamples();
125 |   return 0;
126 | }
127 | 


--------------------------------------------------------------------------------
/example/linear_recurrence.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | const int64 mod = 1000000007;
 5 | 
 6 | int64 solve_nth(int64 n) {
 7 |   auto ans = MatrixPowerMod<mod>(
 8 |       [=](auto& m, auto& v) {
 9 |         m(0, 0) = 1;
10 |         m(0, 1) = 1;
11 |         m(1, 0) = 1;
12 |         v[0] = 1;
13 |         v[1] = 0;
14 |       },
15 |       2, n);
16 |   return ans[1];
17 | }
18 | 
19 | int64 solve_nth_sum(int64 n) {
20 |   auto ans = MatrixPowerMod<mod>(
21 |       [=](auto& m, auto& v) {
22 |         m(0, 0) = 1;
23 |         m(0, 1) = 1;
24 |         m(1, 0) = 1;
25 |         m(2, 0) = 1;
26 |         m(2, 2) = 1;
27 |         v[0] = 1;
28 |         v[1] = 0;
29 |         v[2] = 0;
30 |       },
31 |       3, n);
32 |   return ans[2];
33 | }
34 | 
35 | int main() {
36 |   const std::vector<int64> init = {0LL, 1LL, 1LL, 2LL, 3LL, 5LL, 8LL};
37 | 
38 |   for (int64 n = 1; n <= 1000000000; n *= 10) {
39 |     // Use Berlekamp Massey algorithm to find the recurrence.
40 |     // The minimal initial element count is: 2 * order + 1
41 |     const int64 ans0 = *FindLinearRecurrenceValueAt(init, n, mod);
42 |     // Compute the nth element assuming the recurrence is known.
43 |     const int64 ans1 =
44 |         LinearRecurrenceValueAt({mod - 1, mod - 1, 1}, init, n, mod);
45 |     // Use matric multiplication to compute the nth element.
46 |     const int64 ans2 = solve_nth(n);
47 |     std::cout << "n = " << n << std::endl;
48 |     std::cout << "Ans0 = " << ans0 << std::endl;
49 |     std::cout << "Ans1 = " << ans1 << std::endl;
50 |     std::cout << "Ans2 = " << ans2 << std::endl;
51 |     std::cout << std::endl;
52 |   }
53 | 
54 |   for (int64 n = 1; n <= 1000000000; n *= 10) {
55 |     // The minimal initial element count is: 2 * (order + 1) + 1
56 |     const int64 ans1 =
57 |         LinearRecurrenceSumAt({mod - 1, mod - 1, 1}, init, n, mod);
58 |     const int64 ans2 = solve_nth_sum(n);
59 |     std::cout << "n = " << n << std::endl;
60 |     std::cout << "Ans1 = " << ans1 << std::endl;
61 |     std::cout << "Ans2 = " << ans2 << std::endl;
62 |     std::cout << std::endl;
63 |   }
64 |   return 0;
65 | }


--------------------------------------------------------------------------------
/example/matrix_power.c:
--------------------------------------------------------------------------------
  1 | #include <pe.hpp>
  2 | using namespace pe;
  3 | 
  4 | const int64 mod = 316227766016779;
  5 | using MT = NMod64<mod>;
  6 | 
  7 | // Computes
  8 | // |1 1|^n *  |1|
  9 | // |1 0|      |0|
 10 | 
 11 | // Mod is specified at compiling time.
 12 | // The element type is choosed internally.
 13 | int64 solve0(int64 n) {
 14 |   auto ans = MatrixPowerMod<mod>(
 15 |       [=](auto& m, auto& v) {
 16 |         m(0, 0) = 1;
 17 |         m(0, 1) = 1;
 18 |         m(1, 0) = 1;
 19 |         v[0] = 1;
 20 |         v[1] = 0;
 21 |       },
 22 |       2, n);
 23 |   return ans[0];
 24 | }
 25 | 
 26 | // Mod is associated with T at compiling time.
 27 | int64 solve1(int64 n) {
 28 |   auto ans = MatrixPowerMod<MT>(
 29 |       [=](auto& m, auto& v) {
 30 |         m(0, 0) = 1;
 31 |         m(0, 1) = 1;
 32 |         m(1, 0) = 1;
 33 |         v[0] = 1;
 34 |         v[1] = 0;
 35 |       },
 36 |       2, n);
 37 |   return ans[0].value();
 38 | }
 39 | 
 40 | #if ENABLE_EIGEN
 41 | // Mod is associated with T at runtime.
 42 | // Different threads use different mod.
 43 | int64 solve2(int64 n, int64 rmod) {
 44 |   SetEigenNbThreads(1);
 45 |   TLMod64::Set(rmod);
 46 |   auto ans = MatrixPowerMod<TLNMod64<>>(
 47 |       [=](auto& m, auto& v) {
 48 |         m(0, 0) = 1;
 49 |         m(0, 1) = 1;
 50 |         m(1, 0) = 1;
 51 |         v[0] = 1;
 52 |         v[1] = 0;
 53 |       },
 54 |       2, n);
 55 |   SetEigenNbThreads(0);
 56 |   return ans[0].value();
 57 | }
 58 | #endif
 59 | 
 60 | // Mod is associated with T at runtime.
 61 | // All the threads use the same mod.
 62 | int64 solve3(int64 n, int64 rmod) {
 63 |   using T = NModNumber<DefaultMod>;
 64 |   DefaultMod::Set(rmod);
 65 |   auto ans = MatrixPowerMod<T>(
 66 |       [=](auto& m, auto& v) {
 67 |         m(0, 0) = 1;
 68 |         m(0, 1) = 1;
 69 |         m(1, 0) = 1;
 70 |         v[0] = 1;
 71 |         v[1] = 0;
 72 |       },
 73 |       2, n);
 74 |   return ans[0].value();
 75 | }
 76 | 
 77 | // Mod is specified at runtime.
 78 | // If int128 is available, use int128 as element type, different threads use
 79 | // different mod.
 80 | // Otherwise, use DefaultMod, all the threads use the same mod, and this is same
 81 | // as solve3 and the difference is solve3 needs to set the default mod
 82 | // explicitly and solve4 will set it automatically.
 83 | int64 solve4(int64 n, int64 mod) {
 84 |   auto ans = MatrixPowerMod(
 85 |       [=](auto& m, auto& v) {
 86 |         m(0, 0) = 1;
 87 |         m(0, 1) = 1;
 88 |         m(1, 0) = 1;
 89 |         v[0] = 1;
 90 |         v[1] = 0;
 91 |       },
 92 |       2, n, mod);
 93 |   return ans[0];
 94 | }
 95 | 
 96 | int main() {
 97 |   PE_INIT(maxp = 200000000);
 98 | 
 99 |   for (int64 n = 1; n <= 1000000000; n *= 10) {
100 |     std::cout << "n = " << n << std::endl;
101 |     int64 ans0 = solve0(n);
102 |     int64 ans1 = solve1(n);
103 | #if ENABLE_EIGEN
104 |     int64 ans2 = solve2(n, mod);
105 | #endif
106 |     int64 ans3 = solve3(n, mod);
107 |     int64 ans4 = solve4(n, mod);
108 |     std::cout << "Ans0 = " << ans0 << std::endl;
109 |     std::cout << "Ans1 = " << ans1 << std::endl;
110 | #if ENABLE_EIGEN
111 |     std::cout << "Ans2 = " << ans2 << std::endl;
112 | #endif
113 |     std::cout << "Ans3 = " << ans3 << std::endl;
114 |     std::cout << "Ans4 = " << ans4 << std::endl;
115 |     std::cout << std::endl;
116 |   }
117 | 
118 |   for (int64 rmod = mod; rmod <= mod + 100; ++rmod)
119 |     if (IsPrime(rmod)) {
120 |       const int64 n = 1000000000;
121 |       std::cout << "n = " << n << std::endl;
122 |       std::cout << "rmod = " << rmod << std::endl;
123 | #if ENABLE_EIGEN
124 |       int64 ans2 = solve2(n, rmod);
125 | #endif
126 |       int64 ans3 = solve3(n, rmod);
127 |       int64 ans4 = solve4(n, rmod);
128 | #if ENABLE_EIGEN
129 |       std::cout << "Ans2 = " << ans2 << std::endl;
130 | #endif
131 |       std::cout << "Ans3 = " << ans3 << std::endl;
132 |       std::cout << "Ans4 = " << ans4 << std::endl;
133 |       std::cout << std::endl;
134 |     }
135 | 
136 |   return 0;
137 | }


--------------------------------------------------------------------------------
/example/mma_find_recurrence.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | // http://oeis.org/A001499
 5 | // Oeis gives two recurrence formula:
 6 | // 2 a[n] = 2 n (n-1) a[n-1] + n (n-1)^2 a[n-2]
 7 | // 2 a[n] = n (n-1)^2 ((2 n - 3) a[n-2] + (n-2)^2 a[n-3])
 8 | //
 9 | // This example uses some sample data to guess the formula.
10 | // Note: the result may not be unique.
11 | 
12 | int main() {
13 |   mma::FRHelper<BigInteger> helper;
14 |   helper
15 |       .set_values({1, 0, 1, 6, 90, 2040, 67950, 3110940, 187530840, 14398171200,
16 |                    1371785398200})
17 |       // In most cases we can use all the available check points, but
18 |       // sometimes it has different recurrence according to n. For example, when
19 |       // n is odd, it has a recurrence formula. When n is even, it has a
20 |       // different one.
21 |       .set_check_points({3, 5, 7})
22 |       .set_offsets({1, 2})  // Tune the offset manually
23 |       .set_max_degree(5)    // Tune the polynomial degree manually
24 |       .set_max_abs_coe(4)   // Tune the coefficient manually
25 |       .set_leading(2);      // Tune the leading manually
26 | 
27 |   std::cout << helper << std::endl << std::endl;
28 | 
29 |   // The output of the generated mathematica command is
30 |   const std::string result =
31 |       "{{x1p0 -> 0, x1p1 -> -2, x1p2 -> 2, x1p3 -> 0, x1p4 -> 0, x1p5 "
32 |       "-> 0, x2p0 -> 0, x2p1 -> 1, x2p2 -> -2, x2p3 -> 1, x2p4 -> 0, "
33 |       "x2p5 -> 0}}";
34 | 
35 |   // Validate the result by sample data.
36 |   helper.Validate(result);
37 | 
38 |   // Validate the result by more data.
39 |   std::vector<BigInteger> dp0 = {1, 0, 1, 6, 90, 2040, 67950};
40 |   for (int64 i = 7; i < 100; ++i) {
41 |     dp0.push_back((2 * i * (i - 1) * dp0[i - 1] + i * sq(i - 1) * dp0[i - 2]) /
42 |                   2);
43 |   }
44 | 
45 |   std::vector<BigInteger> dp1 = {1, 0, 1, 6, 90, 2040, 67950};
46 |   for (int64 i = 7; i < 100; ++i) {
47 |     dp1.push_back(i * sq(i - 1) *
48 |                   ((2 * i - 3) * dp1[i - 2] + sq(i - 2) * dp1[i - 3]) / 2);
49 |   }
50 | 
51 |   helper.Validate(dp0, result);
52 |   helper.Validate(dp1, result);
53 | 
54 |   return 0;
55 | }


--------------------------------------------------------------------------------
/example/mma_interpolating_polynomial.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | int64 f(int64 x) { return 1 + 2 * x + 3 * x * x + 4 * x * x * x; }
 5 | 
 6 | int64 g(int64 x, int64 y) {
 7 |   return 1 + 2 * x + 3 * y + 4 * x * y + 5 * x * x + 6 * y * y;
 8 | }
 9 | 
10 | int main() {
11 |   PE_INIT(maxp = 2000000);
12 |   {
13 |     mma::IntPoly ip;
14 |     ip.Reset({"x"});  // one variable x.
15 |     for (int64 x = 0; x <= 10; ++x) {
16 |       ip.Add({x}, f(x));
17 |     }
18 |     std::cout << ip << std::endl;
19 |     // output:
20 |     // InterpolatingPolynomial[{{0, 1}, {1, 10}, {2, 49}, {3, 142}, {4, 313},
21 |     // {5, 586}, {6, 985}, {7, 1534}, {8, 2257}, {9, 3178}, {10, 4321}}, x]
22 |     // Simplified mma output:
23 |     // 1 + 2 x + 3 x^2 + 4 x^3
24 |   }
25 |   {
26 |     mma::IntPoly2D ip(2,  // x's degree
27 |                       2,  // y's degree
28 |                       2   // max a + b of terms in the form of x^a y^b
29 |     );
30 |     for (int64 x = 0; x <= 4; ++x)
31 |       for (int64 y = 0; y <= 4; ++y) {
32 |         ip.Add({x, y}, g(x, y));
33 |       }
34 |     std::cout << ip << std::endl;
35 |     // output:
36 |     // Values[Solve[{1 x0 + 0 x1 + 0 x2 + 0 x3 + 0 x4 + 0 x5 == 1 && 1 x0 + 1 x1
37 |     // + 1 x2 + 0 x3 + 0 x4 + 0 x5 == 8 && 1 x0 + 2 x1 + 4 x2 + 0 x3 + 0 x4 + 0
38 |     // x5 == 25 && 1 x0 + 3 x1 + 9 x2 + 0 x3 + 0 x4 + 0 x5 == 52 && 1 x0 + 4 x1
39 |     // + 16 x2 + 0 x3 + 0 x4 + 0 x5 == 89 && 1 x0 + 0 x1 + 0 x2 + 1 x3 + 0 x4 +
40 |     // 1 x5 == 6 && 1 x0 + 1 x1 + 1 x2 + 1 x3 + 1 x4 + 1 x5 == 16 && 1 x0 + 2 x1
41 |     // + 4 x2 + 1 x3 + 2 x4 + 1 x5 == 36 && 1 x0 + 3 x1 + 9 x2 + 1 x3 + 3 x4 + 1
42 |     // x5
43 |     // == 66 && 1 x0 + 4 x1 + 16 x2 + 1 x3 + 4 x4 + 1 x5 == 106 && 1 x0 + 0 x1 +
44 |     // 0 x2 + 2 x3 + 0 x4 + 4 x5 == 19&& 1 x0 + 1 x1 + 1 x2 + 2 x3 + 2 x4 + 4 x5
45 |     // == 32 && 1 x0 + 2 x1 + 4 x2 + 2 x3 + 4 x4 + 4 x5 == 55 && 1 x0 + 3 x1 + 9
46 |     // x2 + 2 x3 + 6 x4 + 4 x5 == 88 && 1 x0 + 4 x1 + 16 x2 + 2 x3 + 8 x4 + 4 x5
47 |     // == 131 && 1 x0 + 0 x1 + 0 x2 + 3 x3 + 0 x4 + 9 x5 == 40 && 1 x0 + 1 x1 +
48 |     // 1 x2 + 3 x3 + 3 x4 + 9 x5 == 56 && 1 x0 + 2 x1 + 4 x2+ 3 x3 + 6 x4 + 9 x5
49 |     // == 82 && 1 x0 + 3 x1 + 9 x2 + 3 x3 + 9 x4 + 9 x5 == 118 && 1 x0 + 4 x1 +
50 |     // 16 x2 + 3 x3 + 12 x4 + 9 x5 == 164 && 1 x0 + 0 x1 + 0 x2 + 4 x3 + 0 x4 +
51 |     // 16 x5 == 69 && 1 x0 + 1 x1 + 1 x2 + 4 x3 + 4 x4 + 16 x5== 88 && 1 x0 + 2
52 |     // x1 + 4 x2 + 4 x3 + 8 x4 + 16 x5 == 117 && 1 x0 + 3 x1 + 9 x2 + 4 x3 + 12
53 |     // x4 + 16 x5 == 156 && 1 x0 + 4 x1 + 16 x2 + 4 x3 + 16 x4 + 16 x5 == 205},
54 |     // {x0, x1, x2, x3, x4, x5}]] mma output:
55 |     // {{1, 3, 6, 2, 4, 5}}
56 |     ip.Show(std::cout, {1, 3, 6, 2, 4, 5});
57 |     // output:
58 |     // 1 + 3 y + 6 y^2 + 2 x + 4 x y + 5 x^2
59 |   }
60 |   return 0;
61 | }


--------------------------------------------------------------------------------
/example/mma_to_cpp.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | int main() {
 5 |   PE_INIT(maxp = 2000000);
 6 |   // Convert mathematica expression to cpp
 7 |   // For mathematica function, it only supports Sqrt
 8 |   for (auto a : mma::Compile(
 9 |            "Sqrt(-A^4 + 2 A^2 B^2 - B^4 + 2 A^2 x1^2 + 2 B^2 x1^2 - x1^4 - 4 "
10 |            "A^2 x1 x2 - 4 B^2 x1 x2 + 4 x1^3 x2 + 2 A^2 x2^2 + 2 B^2 x2^2 - "
11 |            "6 x1^2 x2^2 + 4 x1 x2^3 - x2^4)/(2 Sqrt(x1^2 - 2 x1 x2 + "
12 |            "x2^2))")) {
13 |     std::cout << a << std::endl;
14 |   }
15 |   // output:
16 |   // sqrt(-A * A * A * A + 2 * A * A * B * B - B * B * B * B + 2 * A * A * x1 *
17 |   // x1 + 2 * B * B * x1 * x1 - x1 * x1 * x1 * x1 - 4 * A * A * x1 * x2 - 4 * B
18 |   // * B * x1 * x2 + 4 * x1 * x1 * x1 * x2 + 2 * A * A * x2 * x2 + 2 * B * B *
19 |   // x2 * x2 - 6 * x1 * x1 * x2 * x2 + 4 * x1 * x2 * x2 * x2 - x2 * x2 * x2 *
20 |   // x2) / (2
21 |   // * sqrt(x1 * x1 - 2 * x1 * x2 + x2 * x2))
22 |   // Convert mathematica expression to cpp using modular arithmetic.
23 |   for (auto a : mma::CompileMod("(a^4+a b)*7/b")) {
24 |     std::cout << a << std::endl;
25 |   }
26 |   // output:
27 |   // int64 foo(int64 a, int64 b, int64 mod) {
28 |   //   const int64 t0 = a % mod;
29 |   //   const int64 t1 = 4;
30 |   //   const int64 t2 = PowerMod(t0, t1, mod) % mod;
31 |   //   const int64 t3 = b % mod;
32 |   //   const int64 t4 = (t0 * t3) % mod;
33 |   //   const int64 t5 = (t2 + t4) % mod;
34 |   //   const int64 t6 = 7 % mod;
35 |   //   const int64 t7 = (t5 * t6) % mod;
36 |   //   const int64 t8 = t7 * ModInv(t3, mod) % mod;
37 |   //   return t8;
38 |   // }
39 |   return 0;
40 | }


--------------------------------------------------------------------------------
/example/mod_number.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | const int64 mod = 1000000007;
 5 | 
 6 | int main() {
 7 |   {
 8 |     NMod64<mod> a(5);
 9 |     std::cout << a.Power(10) << std::endl;
10 |     std::cout << Power(a, 10) << std::endl;
11 |     std::cout << PowerMod(a, 10) << std::endl;
12 |     std::cout << PowerMod(a, 10, mod) << std::endl;
13 |     std::cout << PowerMod(a, 10) << std::endl;
14 |     std::cout << PowerMod(a, 10, mod) << std::endl;
15 |   }
16 |   {
17 |     TLMod<int64>::Set(mod);
18 |     TLNMod64<> a(5);
19 |     std::cout << a.Power(10) << std::endl;
20 |     std::cout << Power(a, 10) << std::endl;
21 |     std::cout << PowerMod(a, 10) << std::endl;
22 |     std::cout << PowerMod(a, 10, mod) << std::endl;
23 |     std::cout << PowerMod(a, 10) << std::endl;
24 |     std::cout << PowerMod(a, 10, mod) << std::endl;
25 |   }
26 |   {
27 |     DefaultMod::Set(mod);
28 |     NModNumber<DefaultMod> a(5);
29 |     std::cout << a.Power(10) << std::endl;
30 |     std::cout << Power(a, 10) << std::endl;
31 |     std::cout << PowerMod(a, 10) << std::endl;
32 |     std::cout << PowerMod(a, 10, mod) << std::endl;
33 |     std::cout << PowerMod(a, 10) << std::endl;
34 |     std::cout << PowerMod(a, 10, mod) << std::endl;
35 |   }
36 |   {
37 |       // NModNumber<MemMod<int64>> a(5);
38 |   }
39 | 
40 |   {
41 |     NModM64<mod> a(5);
42 |     std::cout << a.Power(10) << std::endl;
43 |     std::cout << Power(a, 10) << std::endl;
44 |     std::cout << PowerMod(a, 10) << std::endl;
45 |     std::cout << PowerMod(a, 10, mod) << std::endl;
46 |     std::cout << PowerMod(a, 10) << std::endl;
47 |     std::cout << PowerMod(a, 10, mod) << std::endl;
48 |   }
49 |   {
50 |     TLMod<int64>::Set(mod);
51 |     TLNModM64<> a(5);
52 |     std::cout << a.Power(10) << std::endl;
53 |     std::cout << Power(a, 10) << std::endl;
54 |     std::cout << PowerMod(a, 10) << std::endl;
55 |     std::cout << PowerMod(a, 10, mod) << std::endl;
56 |     std::cout << PowerMod(a, 10) << std::endl;
57 |     std::cout << PowerMod(a, 10, mod) << std::endl;
58 |   }
59 |   {
60 |     DefaultMod::Set(mod);
61 |     NModNumberM<DefaultMod> a(5);
62 |     std::cout << a.Power(10) << std::endl;
63 |     std::cout << Power(a, 10) << std::endl;
64 |     std::cout << PowerMod(a, 10) << std::endl;
65 |     std::cout << PowerMod(a, 10, mod) << std::endl;
66 |     std::cout << PowerMod(a, 10) << std::endl;
67 |     std::cout << PowerMod(a, 10, mod) << std::endl;
68 |   }
69 |   {
70 |     // Not recommended.
71 |     NModNumberM<MemMod<int64>> a(5, MemMod<int64>(mod));
72 |     std::cout << a.Power(10) << std::endl;
73 |     std::cout << Power(a, 10) << std::endl;
74 |     std::cout << PowerMod(a, 10) << std::endl;
75 |     std::cout << PowerMod(a, 10, mod) << std::endl;
76 |     std::cout << PowerMod(a, 10) << std::endl;
77 |     std::cout << PowerMod(a, 10, mod) << std::endl;
78 |   }
79 |   return 0;
80 | }


--------------------------------------------------------------------------------
/example/multiplicative_function_prefix_sum_common_function.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | const int64 mod = 1000000007;
 5 | 
 6 | int main() {
 7 |   PE_INIT(maxp = 2000000, cal_phi = 1, cal_mu = 1);
 8 | 
 9 |   SFCounter sf_counter;
10 |   MuSummer<int128> mu_summer;
11 |   MuPhiSummer<int128> mu_phi_summer;
12 |   Sigma0Summer<int128> sigma0_summer;
13 |   MuPhiSumModer mu_phi_sum_moder(mod);
14 |   Sigma0SumModer sigma0_sum_moder(mod);
15 | 
16 |   for (int64 n = 1; n <= 10000000000; n *= 10) {
17 |     std::cout << n << " = " << n << std::endl;
18 |     std::cout << "# square free number " << sf_counter.Cal(n) << std::endl;
19 |     std::cout << "sum mu " << mu_summer.Cal(n) << std::endl;
20 |     std::cout << "sum mu " << mu_phi_summer.CalSumMu(n) << std::endl;
21 |     std::cout << "sum phi " << mu_phi_summer.CalSumPhi(n) << std::endl;
22 |     std::cout << "sum sigma0 " << sigma0_summer.Cal(n) << std::endl;
23 | 
24 |     std::cout << "sum mu mod " << mod << " " << mu_phi_sum_moder.CalSumMu(n)
25 |               << std::endl;
26 |     std::cout << "sum phi mod " << mod << " " << mu_phi_sum_moder.CalSumPhi(n)
27 |               << std::endl;
28 |     std::cout << "sum sigma0 mod " << mod << " " << sigma0_sum_moder.Cal(n)
29 |               << std::endl;
30 |     std::cout << std::endl;
31 |   }
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/example/parallel_cal_prime_pi.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | #if 0
 5 | const int TN = 8;
 6 | int64 CalPi0(int64 n) {
 7 |   int64 result = PARALLEL_RESULT(
 8 |   BEGIN_PARALLEL
 9 |     FROM 1 TO n EACH_BLOCK_IS 10000000 CACHE ""
10 |     THREADS TN
11 |     MAP {
12 |         return IsPrimeEx(key);
13 |       }
14 |     REDUCE {
15 |         result += value;
16 |         return result;
17 |       }
18 |   END_PARALLEL);
19 |   return result;
20 | }
21 | 
22 | struct CalPI : public ParallelRangeT<CalPI> {
23 |   int64 UpdateResult(int64 result, int64 value) { return result + value; }
24 |   int64 WorkOnBlock(int64 first, int64 last, int64 worker) {
25 |     int64 t = 0;
26 |     for (int64 i = first; i <= last; ++i) t += IsPrimeEx(i);
27 |     return t;
28 |   }
29 | };
30 | 
31 | int64 CalPi1(int64 n) {
32 |   return CalPI()
33 |       .From(1)
34 |       .To(n)
35 |       .DividedBy(10000000)
36 |       .SetThreadsCount(TN)
37 |       .Start()
38 |       .Result();
39 | }
40 | 
41 | int main() {
42 |   PE_INIT(maxp = 2000000);
43 | 
44 |   const int m = 8;
45 |   const int n = Power(10, m);
46 | 
47 |   int64 ans0 = CalPi0(n);
48 |   int64 ans1 = CalPi1(n);
49 |   std::cout << "n = " << n << std::endl;
50 |   std::cout << "Expected: " << kPrimePi[m] << std::endl;
51 |   std::cout << "CalPi0: " << ans0 << std::endl;
52 |   std::cout << "CalPi1: " << ans1 << std::endl;
53 | 
54 |   return 0;
55 | }
56 | #else
57 | int main() {
58 |   return 0;
59 | }
60 | #endif


--------------------------------------------------------------------------------
/example/partition_mobius.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | const int S = 500;
 5 | // The number of non-negative solution for
 6 | // 1 x1 + 2 x2 + 3 x3 + 4 x4 + 5 x5 = S
 7 | // x1, x2, ..., x5 are distinct.
 8 | 
 9 | const int64 mod = 1000000007;
10 | using MT = NMod64<mod>;
11 | 
12 | char used[S + 1];
13 | int64 dfs(int now, int s) {
14 |   if (now == 6) return s == 0;
15 |   int64 ret = 0;
16 |   for (int i = 0; i <= S && now * i <= s; ++i)
17 |     if (used[i] == 0) {
18 |       used[i] = 1;
19 |       ret += dfs(now + 1, s - now * i);
20 |       used[i] = 0;
21 |     }
22 |   return ret;
23 | }
24 | 
25 | int64 solve0() { return dfs(1, S); }
26 | 
27 | std::map<std::vector<int>, int64> cache;
28 | int64 compute(std::vector<int> coe) {
29 |   // sum(coe[i] * x_i) = S
30 |   auto where = cache.find(coe);
31 |   if (where != cache.end()) {
32 |     return where->second;
33 |   }
34 |   int64 dp[S + 1] = {1};
35 |   for (int& iter : coe)
36 |     for (int i = 0; i + iter <= S; ++i) {
37 |       dp[i + iter] += dp[i];
38 |     }
39 |   return dp[S];
40 | }
41 | 
42 | int64 solve1() {
43 |   PartitionMobius pm(mod);
44 |   std::vector<int> pattern = {1, 2, 3, 4, 5};
45 |   MT ret = 0;
46 |   for (Partition p : Partition::GenPartitions(5)) {
47 |     std::map<int, int> mem;
48 |     for (int i = 0; i < 5; ++i) mem[p.colors[i]] += pattern[i];
49 |     std::vector<int> key;
50 |     for (auto& i : mem) key.push_back(i.second);
51 |     std::sort(std::begin(key), std::end(key));
52 |     ret += compute(key) * pm.Cal(p) % mod;
53 |   }
54 |   return ret.value();
55 | }
56 | 
57 | int main() {
58 |   std::cout << solve0() << std::endl;
59 |   std::cout << solve1() << std::endl;
60 |   return 0;
61 | }


--------------------------------------------------------------------------------
/example/pe_db.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | void MakeDb() {
 5 |   PeDb db("D:/");
 6 |   const int64 n = 100000000000000;
 7 |   db.MergePrimePi(PrimeS0Parallel<int64>(n));
 8 | #if PE_HAS_INT128
 9 |   db.MergePrimeSum(PrimeS1Parallel<int128>(n));
10 | #endif
11 |   db.Save();
12 | }
13 | 
14 | int main() {
15 |   PE_INIT(maxp = 70000000);
16 |   // MakeDb();
17 |   const int64 n = 10000000000;
18 |   PeDb db("D:/");
19 |   db.Load();
20 | 
21 |   {
22 |     DVA<int64> dva = PrimeS0Ex<int64>(n);
23 |     DVA<int64> dva1(n);
24 |     db.FillPrimePi(dva1);
25 |     for (int i = 0; i < dva.key_size; ++i) {
26 |       if (dva.values[i] != dva1.values[i]) {
27 |         std::cout << dva.values[i] << std::endl;
28 |         std::cout << dva1.values[i] << std::endl;
29 |       }
30 |       assert(dva.values[i] == dva1.values[i]);
31 |     }
32 |     std::cout << dva[n] << std::endl;
33 |     std::cout << dva1[n] << std::endl;
34 |     // https://oeis.org/A006880
35 |     std::cout << db.PrimePi(Power(10LL, 14)) << std::endl;
36 |   }
37 | #if PE_HAS_INT128
38 |   {
39 |     DVA<int128> dva = PrimeS1Ex<int128>(n);
40 |     DVA<int128> dva1(n);
41 |     db.FillPrimeSum(dva1);
42 |     for (int i = 0; i < dva.key_size; ++i) {
43 |       if (dva.values[i] != dva1.values[i]) {
44 |         std::cout << dva.values[i] << std::endl;
45 |         std::cout << dva1.values[i] << std::endl;
46 |       }
47 |       assert(dva.values[i] == dva1.values[i]);
48 |     }
49 |     std::cout << dva[n] << std::endl;
50 |     std::cout << dva1[n] << std::endl;
51 |     // https://oeis.org/A046731
52 |     std::cout << db.PrimeSum(Power(10LL, 14)) << std::endl;
53 |   }
54 | #endif
55 |   return 0;
56 | }


--------------------------------------------------------------------------------
/example/power_sum.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | const int64 mod = 1000000007;
 5 | using MT = NMod64<mod>;
 6 | 
 7 | using SumfunctionType = int64 (*)(int64 n, int64 mod);
 8 | SumfunctionType sum_function[8] = {
 9 |     nullptr,   &P1SumMod, &P2SumMod, &P3SumMod,
10 |     &P4SumMod, &P5SumMod, &P6SumMod, &P7SumMod,
11 | };
12 | 
13 | int main() {
14 |   PE_INIT(maxp = 2000000);
15 |   PowerSumModer moder0(mod);    // maxk = 128
16 |   PowerSumModerB moder1(mod);   // maxk = 128
17 |   PowerSumModerB1 moder2(mod);  // maxk = 128
18 |   for (int k = 0; k <= 10; ++k) {
19 |     std::cout << "k = " << k << std::endl;
20 |     for (int64 n = 1; n <= 1000000000; n *= 10) {
21 |       std::cout << "n = " << n << std::endl;
22 |       std::vector<int64> ans = PowerSumModBatch(n, k, mod);
23 |       std::cout << "InitPowerSumMod   " << ans[k] << std::endl;
24 |       std::cout << "PowerSumModer     " << moder0.Cal(n, k) << std::endl;
25 |       std::cout << "PowerSumModerB    " << moder1.Cal(n, k) << std::endl;
26 |       std::cout << "PowerSumModerB1   " << moder2.Cal(n, k) << std::endl;
27 |       if (k >= 1 && k <= 7) {
28 |         std::cout << "P" << k << "SumMod          " << sum_function[k](n, mod)
29 |                   << std::endl;
30 |       }
31 |       std::cout << std::endl;
32 |     }
33 |     std::cout << std::endl;
34 |   }
35 |   return 0;
36 | }


--------------------------------------------------------------------------------
/example/prime_power_sum.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | const int64 mod = 1000000007;
 5 | using MT = NMod64<mod>;
 6 | 
 7 | void prime_s0() {
 8 |   CachedPi cp;
 9 |   for (int64 n = 1; n <= 10000000000; n *= 10) {
10 |     std::cout << "n = " << n << std::endl;
11 |     std::cout << "PrimeS0         " << PrimeS0<int64>(n)[n] % mod << std::endl;
12 |     std::cout << "PrimeS0Parallel " << PrimeS0Parallel<int64>(n)[n] % mod
13 |               << std::endl;
14 |     std::cout << "PrimeS0Ex       " << PrimeS0Ex<int64>(n)[n] % mod
15 |               << std::endl;
16 |     std::cout << "PrimeS0         " << PrimeS0<MT>(n)[n] << std::endl;
17 |     std::cout << "PrimeS0Parallel " << PrimeS0Parallel<MT>(n)[n] << std::endl;
18 |     std::cout << "PrimeS0Ex       " << PrimeS0Ex<MT>(n)[n] << std::endl;
19 |     std::cout << "PrimeSkEx       " << PrimeSkEx<mod>(n, 0)[n] << std::endl;
20 |     std::cout << "CachedPi        " << cp.Cal(n) % mod << std::endl;
21 |     std::cout << std::endl;
22 |   }
23 | }
24 | 
25 | void prime_s1() {
26 |   for (int64 n = 1; n <= 10000000000; n *= 10) {
27 |     std::cout << "n = " << n << std::endl;
28 |     std::cout << "PrimeS1         " << PrimeS1<int64>(n)[n] % mod << std::endl;
29 |     std::cout << "PrimeS1Parallel " << PrimeS1Parallel<int64>(n)[n] % mod
30 |               << std::endl;
31 |     std::cout << "PrimeS1Ex       " << PrimeS1Ex<int64>(n)[n] % mod
32 |               << std::endl;
33 |     std::cout << "PrimeS1         " << PrimeS1<MT>(n)[n] << std::endl;
34 |     std::cout << "PrimeS1Parallel " << PrimeS1Parallel<MT>(n)[n] << std::endl;
35 |     std::cout << "PrimeS1Ex       " << PrimeS1Ex<MT>(n)[n] << std::endl;
36 |     std::cout << "PrimeSkEx       " << PrimeSkEx<mod>(n, 1)[n] << std::endl;
37 |     std::cout << std::endl;
38 |   }
39 | }
40 | 
41 | void prime_pmod_s0() {
42 |   for (int64 n = 1; n <= 1000000000; n *= 10) {
43 |     std::cout << "n = " << n << std::endl;
44 |     for (int pmod = 2; pmod <= 7; ++pmod) {
45 |       auto ans0 = PrimeS0PMod<MT>(n, pmod);
46 |       auto ans1 = PrimeSkPMod<mod>(n, 0, pmod);
47 |       std::cout << "pmod = " << pmod << std::endl;
48 |       for (int j = 0; j < pmod; ++j) {
49 |         // number of prime such that p % pmod = j
50 |         std::cout << "p % " << pmod << " = " << j << " " << ans0[j][n] << " "
51 |                   << ans1[j][n] << std::endl;
52 |       }
53 |       std::cout << std::endl;
54 |     }
55 |     std::cout << std::endl;
56 |   }
57 | }
58 | 
59 | void prime_pmod_s1() {
60 |   for (int64 n = 1; n <= 1000000000; n *= 10) {
61 |     std::cout << "n = " << n << std::endl;
62 |     for (int pmod = 2; pmod <= 7; ++pmod) {
63 |       auto ans0 = PrimeS1PMod<MT>(n, pmod);
64 |       auto ans1 = PrimeSkPMod<mod>(n, 1, pmod);
65 |       std::cout << "pmod = " << pmod << std::endl;
66 |       for (int j = 0; j < pmod; ++j) {
67 |         // sum of prime such that p % pmod = j
68 |         std::cout << "p % " << pmod << " = " << j << " " << ans0[j][n] << " "
69 |                   << ans1[j][n] << std::endl;
70 |       }
71 |       std::cout << std::endl;
72 |     }
73 |     std::cout << std::endl;
74 |   }
75 | }
76 | 
77 | int main() {
78 |   PE_INIT(maxp = 2000000);
79 | 
80 |   prime_s0();
81 |   prime_s1();
82 |   prime_pmod_s0();
83 |   prime_pmod_s1();
84 |   return 0;
85 | }


--------------------------------------------------------------------------------
/example/random_sample.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | const int sample_count = 1000000;
 5 | 
 6 | const double coe15 = 1. / (1ULL << 15);
 7 | double CalPi_CRand15() {
 8 |   int ok = 0;
 9 |   for (int i = 0; i < sample_count; ++i) {
10 |     double p[]{coe15 * CRand15(), coe15 * CRand15()};
11 |     if (p[0] * p[0] + p[1] * p[1] < 1) {
12 |       ++ok;
13 |     }
14 |   }
15 |   return 4. * ok / sample_count;
16 | }
17 | 
18 | const double coe31 = 1. / (1ULL << 31);
19 | double CalPi_CRand31() {
20 |   int ok = 0;
21 |   for (int i = 0; i < sample_count; ++i) {
22 |     double p[]{coe31 * CRand31(), coe31 * CRand31()};
23 |     if (p[0] * p[0] + p[1] * p[1] < 1) {
24 |       ++ok;
25 |     }
26 |   }
27 |   return 4. * ok / sample_count;
28 | }
29 | 
30 | const double coe63 = 1. / (1ULL << 63);
31 | double CalPi_CRand63() {
32 |   int ok = 0;
33 |   for (int i = 0; i < sample_count; ++i) {
34 |     double p[]{coe63 * CRand63(), coe63 * CRand63()};
35 |     if (p[0] * p[0] + p[1] * p[1] < 1) {
36 |       ++ok;
37 |     }
38 |   }
39 |   return 4. * ok / sample_count;
40 | }
41 | 
42 | const int maxn = 2000000000;
43 | auto rand_generator = MakeUniformGenerator(0, maxn - 1);
44 | const double randcoe = 1. / maxn;
45 | double CalPi_Rand() {
46 |   int ok = 0;
47 |   for (int i = 0; i < sample_count; ++i) {
48 |     double p[]{randcoe * rand_generator(), randcoe * rand_generator()};
49 |     if (p[0] * p[0] + p[1] * p[1] < 1) {
50 |       ++ok;
51 |     }
52 |   }
53 |   return 4. * ok / sample_count;
54 | }
55 | 
56 | double CalPi_Halton() {
57 |   int ok = 0;
58 |   for (int i = 0; i < sample_count; ++i) {
59 |     std::vector<double> p = Halton(i, 2);
60 |     if (p[0] * p[0] + p[1] * p[1] < 1) {
61 |       ++ok;
62 |     }
63 |   }
64 |   return 4. * ok / sample_count;
65 | }
66 | 
67 | #if HAS_MPF
68 | double CalPi_Mpf() {
69 |   gmp_randstate_t state;
70 |   gmp_randinit_mt(state);
71 |   Mpf::SetDefaultPrec(200);
72 |   int ok = 0;
73 |   for (int i = 0; i < sample_count; ++i) {
74 |     Mpf a;
75 |     Mpf b;
76 |     mpf_urandomb(a.mpf(), state, 128);
77 |     mpf_urandomb(b.mpf(), state, 128);
78 |     if (a * a + b * b < 1) {
79 |       ++ok;
80 |     }
81 |   }
82 |   return 4. * ok / sample_count;
83 | }
84 | #endif
85 | 
86 | int main() {
87 |   PE_INIT(maxp = 1000000);
88 |   printf("CRand15\t%.16f\n", CalPi_CRand15());
89 |   printf("CRand31\t%.16f\n", CalPi_CRand31());
90 |   printf("CRand63\t%.16f\n", CalPi_CRand63());
91 |   printf("Rand\t%.16f\n", CalPi_Rand());
92 |   printf("Halton\t%.16f\n", CalPi_Halton());
93 | #if HAS_MPF
94 |   printf("Mpf\t%.16f\n", CalPi_Mpf());
95 | #endif
96 |   return 0;
97 | }


--------------------------------------------------------------------------------
/example/sym_poly.c:
--------------------------------------------------------------------------------
 1 | #include <pe.hpp>
 2 | using namespace pe;
 3 | 
 4 | // Guess the solution to a^2=b^2+c^2+bc where
 5 | // a = t1, b = t2, c = t3, c7 = 1, c8 = 0, c9 = -1
 6 | SymPoly<int64> t1("c1 m^2 + c2 m n + c3 n^2");
 7 | SymPoly<int64> t2("c4 m^2 + c5 m n + c6 n^2");
 8 | SymPoly<int64> t3("c7 m^2 + c8 m n + c9 n^2");
 9 | 
10 | SymPoly<int64> target = SymPoly<int64>("a^2-b^2-c^2-b c")
11 |                             .Replace("a", t1)
12 |                             .Replace("b", t2)
13 |                             .Replace("c", t3);
14 | 
15 | int his[10];
16 | void dfs(int now, const SymPoly<int64>& p) {
17 |   if (now == 10) {
18 |     if (std::empty(p.terms()) && his[7] == 1 && his[8] == 0 && his[9] == -1) {
19 |       auto aa =
20 |           t1.Replace("c1", his[1]).Replace("c2", his[2]).Replace("c3", his[3]);
21 |       auto bb =
22 |           t2.Replace("c4", his[4]).Replace("c5", his[5]).Replace("c6", his[6]);
23 |       auto cc =
24 |           t3.Replace("c7", his[7]).Replace("c8", his[8]).Replace("c9", his[9]);
25 |       if (std::empty(aa.terms()) || std::empty(bb.terms()) || std::empty(cc.terms()))
26 |         return;
27 |       std::cout << "a = " << aa << std::endl;
28 |       std::cout << "b = " << bb << std::endl;
29 |       std::cout << "c = " << cc << std::endl;
30 |       std::cout << std::endl;
31 |     }
32 |   } else {
33 |     std::string me = "c" + ToString(now);
34 |     for (int i = -1; i <= 2; ++i) {
35 |       his[now] = i;
36 |       dfs(now + 1, p.Replace(me, i));
37 |     }
38 |   }
39 | }
40 | 
41 | int main() {
42 |   std::cout << target.Replace("n", 1).Replace("m", 1) << std::endl;
43 |   dfs(1, target);
44 |   return 0;
45 | }


--------------------------------------------------------------------------------
/format.py:
--------------------------------------------------------------------------------
 1 | #! python2
 2 | import os
 3 | import subprocess
 4 | 
 5 | CURRENT_DIRECTORY = os.getcwd()
 6 | # BINARY_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
 7 | 
 8 | compile_commands_template = """[{
 9 |     "directory":
10 |         "$(CURRENT_DIRECTORY)",
11 |     "file":
12 |         "pe",
13 |     "arguments": [
14 |         "clang++.exe", "-xc++", "pe", "--driver-mode=g++", "-c", "--std=c++17",
15 |         "-O3", "-march=native", "-mtune=native",
16 |         "--target=x86_64-w64-windows-gnu", "-fopenmp"
17 |     ]
18 | }]"""
19 | 
20 | tidy_options = [
21 |     '-checks=-*', 'google-readability-casting',
22 |     'google-readability-braces-around-statements',
23 |     'google-readability-namespace-comments', 'performance-*', 'modernize-use-*',
24 |     '-modernize-use-trailing-return-type', '-modernize-use-nodiscard',
25 |     'misc-unused-parameters'
26 | ]
27 | 
28 | tidy_cmd = [
29 |     'run-clang-tidy.py', ','.join(tidy_options), '-header-filter=pe.*',
30 |     '-export-fixes=format-fixes.yaml', '-fix', 'pe'
31 | ]
32 | 
33 | 
34 | def tidy_code():
35 |   with open('compile_commands.json', 'wb') as tempf:
36 |     tempf.write(
37 |         compile_commands_template.replace(
38 |             '$(CURRENT_DIRECTORY)', CURRENT_DIRECTORY.replace('\\', '\\\\')))
39 |   os.system(' '.join(tidy_cmd))
40 |   os.remove('compile_commands.json')
41 |   os.remove('format-fixes.yaml')
42 | 
43 | 
44 | def should_format(filename):
45 |   if filename in ['parallel_cal_prime_pi.c']:
46 |     return False
47 | 
48 |   _, file_ext_name = os.path.splitext(filename)
49 | 
50 |   return file_ext_name in ['', '.h', '.hpp', '.c', '.cxx', '.cpp']
51 | 
52 | 
53 | def format_code():
54 |   for rt, _, files in os.walk(CURRENT_DIRECTORY):
55 |     if rt.find('.git') != -1:
56 |       continue
57 |     for f in files:
58 |       if should_format(f):
59 |         fpath = os.path.join(rt, f)
60 |         print(fpath)
61 |         subprocess.call('clang-format -style=Google -sort-includes=0 -i %s' %
62 |                         fpath)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |   tidy_code()
67 |   format_code()
68 | 


--------------------------------------------------------------------------------
/gen_config.py:
--------------------------------------------------------------------------------
 1 | #! python3
 2 | # -*- coding: UTF-8 -*-
 3 | import os
 4 | 
 5 | CURRENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
 6 | TARGET_FILENAME = 'pe_config'
 7 | TARGET_PATH = os.path.join(CURRENT_DIRECTORY, TARGET_FILENAME)
 8 | 
 9 | SPLITTER = ';' if os.name == 'nt' else ':'
10 | CHECKING_PATHS = os.environ.get('CPLUS_INCLUDE_PATH', '').split(SPLITTER)
11 | 
12 | RULES = [
13 |     ('ENABLE_EIGEN', ['Eigen/Dense']),
14 |     ('ENABLE_GMP', ['gmp.h']),
15 |     ('ENABLE_FLINT', ['flint.h']),
16 |     ('ENABLE_MPFR', ['mpfr.h']),
17 |     #('ENABLE_MPIR', ['mpir.h']),
18 |     ('ENABLE_LIBBF', ['libbf.h']),
19 |     ('ENABLE_NTL', ['NTL/ZZ.h']),
20 |     ('ENABLE_ZMQ', ['zmq/zmq.h']),
21 |     ('ENABLE_PRIME_COUNT', ['primecount.hpp']),
22 |     ('ENABLE_PRIME_SIEVE', ['primesieve.hpp']),
23 |     ('ENABLE_TCMALLOC', []), # Always disable
24 | ]
25 | 
26 | content = []
27 | 
28 | 
29 | def add_define(key, value):
30 |   content.append('#ifndef %s' % key)
31 |   content.append('#define %s %s' % (key, value))
32 |   content.append('#endif')
33 |   content.append('')
34 | 
35 | 
36 | def check_target(path):
37 |   for folder in CHECKING_PATHS:
38 |     if not os.path.exists(folder):
39 |       continue
40 |     for i in path:
41 |       if os.path.exists(os.path.join(folder, i)):
42 |         return True
43 |   return False
44 | 
45 | 
46 | def main():
47 |   content.append('#ifndef PE_CONFIG_')
48 |   content.append('#define PE_CONFIG_')
49 |   content.append('')
50 |   content.append('// This file provides a centralized place to configure pe')
51 |   content.append('')
52 |   content.append(
53 |       '// Auto generated by gen_config.py, and you can edit it manually')
54 |   content.append('')
55 |   content.append('// Configuration priority (first match):')
56 |   content.append('// 1. Compiling command')
57 |   content.append('// 2. The configurations in this file')
58 |   content.append('// 3. The configurations in file pe')
59 |   content.append('')
60 |   add_define('ENABLE_ASSERT', '1')
61 |   add_define('TRY_TO_USE_INT128', '1')
62 |   for (key, value) in RULES:
63 |     ok = check_target(value)
64 |     add_define(key, '1' if ok else '0')
65 |   content.append('#endif')
66 |   with open(TARGET_PATH, 'wb') as tempf:
67 |     tempf.write('\r\n'.join(content).encode('utf8'))
68 | 
69 | 
70 | if __name__ == '__main__':
71 |   main()
72 | 


--------------------------------------------------------------------------------
/legacy/pe_poly.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef PE_POLY_
  2 | #define PE_POLY_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_type_traits"
  6 | #include "pe_mod"
  7 | #include "pe_nt"
  8 | #include "pe_poly_base"
  9 | 
 10 | namespace pe {
 11 | struct NModPoly {
 12 |   int64 mod;
 13 |   std::vector<int64> data;
 14 | 
 15 |   NModPoly(int64 mod = 1) : mod(mod) {}
 16 | 
 17 |   NModPoly(const std::vector<int64>& data, int64 mod,
 18 |            int adjust_leading_zero = 1)
 19 |       : data(data), mod(mod) {
 20 |     AdjustMod();
 21 |     if (adjust_leading_zero) {
 22 |       AdjustLeadingZeros();
 23 |     }
 24 |   }
 25 | 
 26 |   NModPoly(std::vector<int64>&& data, int64 mod, int adjust_leading_zero = 1)
 27 |       : data(std::move(data)), mod(mod) {
 28 |     AdjustMod();
 29 |     if (adjust_leading_zero) {
 30 |       AdjustLeadingZeros();
 31 |     }
 32 |   }
 33 | 
 34 |   NModPoly& operator=(const std::vector<int64>& v) {
 35 |     data = v;
 36 |     return *this;
 37 |   }
 38 | 
 39 |   NModPoly& operator=(const std::vector<int64>&& v) {
 40 |     data = v;
 41 |     return *this;
 42 |   }
 43 | 
 44 |   NModPoly(const NModPoly& p) = default;
 45 |   NModPoly(NModPoly&& p) = default;
 46 |   NModPoly& operator=(const NModPoly& other) = default;
 47 |   NModPoly& operator=(NModPoly&& other) = default;
 48 | 
 49 |   NModPoly& AdjustLeadingZeros() {
 50 |     AdjustPolyLeadingZero(data);
 51 |     return *this;
 52 |   }
 53 | 
 54 |   NModPoly& AdjustMod() {
 55 |     for (auto& iter : data) {
 56 |       iter = Mod(iter, mod);
 57 |     }
 58 |     return *this;
 59 |   }
 60 | 
 61 |   int64 deg() const { return static_cast<int64>(std::size(data) - 1); }
 62 |   int64 size() const { return static_cast<int64>(std::size(data)); }
 63 | 
 64 |   NModPoly& Resize(int64 n) {
 65 |     const int64 m = static_cast<int64>(std::size(data));
 66 |     data.resize(n);
 67 |     for (int64 i = m; i < n; ++i) {
 68 |       data[i] = 0;
 69 |     }
 70 |     return *this;
 71 |   }
 72 | 
 73 |   NModPoly& Redeg(int64 n) { return Resize(n + 1); }
 74 | 
 75 |   int64& operator[](int64 idx) {
 76 |     PE_ASSERT(idx >= 0 && idx < static_cast<int64>(std::size(data)));
 77 |     return data[idx];
 78 |   }
 79 | 
 80 |   int64 operator[](int64 idx) const {
 81 |     PE_ASSERT(idx >= 0 && idx < static_cast<int64>(std::size(data)));
 82 |     return data[idx];
 83 |   }
 84 | 
 85 |   int64 At(int64 idx) const {
 86 |     return idx >= 0 && idx < static_cast<int64>(std::size(data)) ? data[idx]
 87 |                                                                  : 0;
 88 |   }
 89 | 
 90 |   bool IsZero() const { return std::size(data) == 1 && data[0] == 0; }
 91 | 
 92 |   int64 ValueAt(int64 v) const {
 93 |     int64 ret = 0;
 94 |     const int64 t = Mod(v, mod);
 95 |     for (int64 i = deg(); i >= 0; --i) {
 96 |       ret = AddMod(MulMod(ret, t, mod), data[i], mod);
 97 |     }
 98 |     return ret;
 99 |   }
100 | 
101 |   NModPoly LowerTerms(int64 n, int adjust_leading_zero = 1) const {
102 |     const int64 m = std::min(n, static_cast<int64>(std::size(data)));
103 |     return NModPoly(std::vector<int64>(data.begin(), data.begin() + m), mod,
104 |                     adjust_leading_zero);
105 |   }
106 | 
107 |   NModPoly Inv(int64 n) const;
108 | };
109 | 
110 | template <int64 M>
111 | struct NModPolyT : public NModPoly {
112 |   NModPolyT() : NModPoly(M) {}
113 | 
114 |   NModPolyT(const std::vector<int64>& data) : NModPoly(data, M) {}
115 | 
116 |   NModPolyT(std::vector<int64>&& data) : NModPoly(std::move(data), M) {}
117 | 
118 |   NModPolyT(std::initializer_list<int64> l)
119 |       : NModPolyT(std::vector<int64>(l)) {}
120 | };
121 | 
122 | SL NModPoly PolyMul(const NModPoly& X, const NModPoly& Y) {
123 |   return NModPoly{PolyMul(X.data, Y.data, X.mod), X.mod};
124 | }
125 | 
126 | SL NModPoly PolyInv(const NModPoly& x, int64 n) {
127 |   return NModPoly(PolyInv(x.data, n, x.mod), x.mod);
128 | }
129 | 
130 | #define PPOLY_DIV_AND_MOD_IMPL(PolyDivAndMod, PolyDivAndModImpl)       \
131 |   SL std::tuple<NModPoly, NModPoly> PolyDivAndMod(const NModPoly& X,   \
132 |                                                   const NModPoly& Y) { \
133 |     auto [q, r] = PolyDivAndModImpl(X.data, Y.data, X.mod);            \
134 |     return std::make_tuple(NModPoly(std::move(q), X.mod),              \
135 |                            NModPoly(std::move(r), X.mod));             \
136 |   }
137 | 
138 | #define PPOLY_DIV_IMPL(PolyDiv, PolyDivImpl)                    \
139 |   SL NModPoly PolyDiv(const NModPoly& X, const NModPoly& Y) {   \
140 |     return NModPoly(PolyDivImpl(X.data, Y.data, X.mod), X.mod); \
141 |   }
142 | 
143 | #define PPOLY_MOD_IMPL(PolyMod, PolyModImpl)                    \
144 |   SL NModPoly PolyMod(const NModPoly& X, const NModPoly& Y) {   \
145 |     return NModPoly(PolyModImpl(X.data, Y.data, X.mod), X.mod); \
146 |   }
147 | 
148 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndModDc, pe::PolyDivAndModDc)
149 | PPOLY_DIV_IMPL(PolyDivDc, pe::PolyDivDc)
150 | PPOLY_MOD_IMPL(PolyModDc, pe::PolyModDc)
151 | 
152 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndModNormal, pe::PolyDivAndModNormal)
153 | PPOLY_DIV_IMPL(PolyDivNormal, pe::PolyDivNormal)
154 | PPOLY_MOD_IMPL(PolyModNormal, pe::PolyModNormal)
155 | 
156 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndMod, pe::PolyDivAndMod)
157 | PPOLY_DIV_IMPL(PolyDiv, pe::PolyDiv)
158 | PPOLY_MOD_IMPL(PolyMod, pe::PolyMod)
159 | 
160 | inline NModPoly NModPoly::Inv(int64 n) const {
161 |   auto t = PolyInv(data, n, mod);
162 |   return NModPoly(std::vector<int64>(static_cast<int64*>(&t[0]),
163 |                                      static_cast<int64*>(&t[0]) + std::size(t)),
164 |                   mod);
165 | }
166 | 
167 | NModPoly operator<<(const NModPoly& p, int64 m) {
168 |   return NModPoly(PolyShiftLeft<int64>(p.data, m), p.mod);
169 | }
170 | 
171 | NModPoly operator>>(const NModPoly& p, int64 m) {
172 |   return NModPoly(PolyShiftRight<int64>(p.data, m), p.mod);
173 | }
174 | 
175 | SL NModPoly operator-(const NModPoly& x, const NModPoly& y) {
176 |   return NModPoly(PolySub(x.data, y.data, x.mod), x.mod).AdjustLeadingZeros();
177 | }
178 | 
179 | SL NModPoly operator+(const NModPoly& x, const NModPoly& y) {
180 |   return NModPoly(PolyAdd(x.data, y.data, x.mod), x.mod).AdjustLeadingZeros();
181 | }
182 | 
183 | SL NModPoly operator*(const NModPoly& x, int64 v) {
184 |   std::vector<int64> data(x.data);
185 |   for (auto& iter : data) {
186 |     iter = iter * v % x.mod;
187 |   }
188 |   return NModPoly(std::move(data), x.mod);
189 | }
190 | 
191 | SL NModPoly operator*(int64 v, const NModPoly& x) { return x * v; }
192 | 
193 | SL NModPoly operator*(const NModPoly& x, const NModPoly& y) {
194 |   return PolyMul(x, y);
195 | }
196 | 
197 | SL NModPoly operator/(const NModPoly& x, const NModPoly& y) {
198 |   return PolyDiv(x, y);
199 | }
200 | 
201 | SL NModPoly operator%(const NModPoly& x, const NModPoly& y) {
202 |   return PolyMod(x, y);
203 | }
204 | 
205 | SL int operator==(const NModPoly& x, const NModPoly& y) {
206 |   return x.mod == y.mod && x.data == y.data;
207 | }
208 | 
209 | // x^n % mod
210 | SL NModPoly operator%(int64 n, const NModPoly& mod) {
211 |   NModPoly x{{0, 1}, mod.mod};
212 |   NModPoly ret{{1}, mod.mod};
213 |   for (; n > 0; n >>= 1) {
214 |     if (n & 1) {
215 |       ret = PolyMod(x * ret, mod);
216 |     }
217 |     if (n > 1) {
218 |       x = PolyMod(x * x, mod);
219 |     }
220 |   }
221 |   return ret;
222 | }
223 | 
224 | SL std::ostream& operator<<(std::ostream& o, const NModPoly& p) {
225 |   const int64 n = static_cast<int64>(std::size(p.data));
226 |   for (int64 i = 0; i < n - 1; ++i) {
227 |     o << p.data[i] << ", ";
228 |   }
229 |   return o << p.data[n - 1];
230 | }
231 | }  // namespace pe
232 | #endif
233 | 


--------------------------------------------------------------------------------
/legacy/pe_sym_poly.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef PE_SYM_POLY_
  2 | #define PE_SYM_POLY_
  3 | 
  4 | #include "pe_base"
  5 | 
  6 | namespace pe {
  7 | // [+- ]*
  8 | SL std::vector<int> ParseSgnList(const std::string& s, int& i) {
  9 |   const int size = static_cast<int>(std::size(s));
 10 |   std::vector<int> sgns;
 11 |   for (;;) {
 12 |     while (i < size && std::isspace(s[i])) ++i;
 13 |     if (i < size && (s[i] == '+' || s[i] == '-')) {
 14 |       sgns.push_back(s[i] == '+' ? 1 : -1);
 15 |       ++i;
 16 |     } else {
 17 |       return sgns;
 18 |     }
 19 |   }
 20 | }
 21 | 
 22 | SL std::string ParseDigList(const std::string& s, int& i) {
 23 |   const int size = static_cast<int>(std::size(s));
 24 |   while (i < size && std::isspace(s[i])) ++i;
 25 |   if (i < size && std::isdigit(s[i])) {
 26 |     int start = i;
 27 |     while (i < size && std::isdigit(s[i])) {
 28 |       ++i;
 29 |     }
 30 |     return s.substr(start, i - start);
 31 |   }
 32 |   return "";
 33 | }
 34 | 
 35 | SL std::string ParseIdentifier(const std::string& s, int& i) {
 36 |   const int size = static_cast<int>(std::size(s));
 37 |   while (i < size && std::isspace(s[i])) ++i;
 38 |   if (i < size && (s[i] == '_' || std::isalpha(s[i]))) {
 39 |     int start = i;
 40 |     while (i < size &&
 41 |            (s[i] == '_' || std::isdigit(s[i]) || std::isalpha(s[i]))) {
 42 |       ++i;
 43 |     }
 44 |     return s.substr(start, i - start);
 45 |   }
 46 |   return "";
 47 | }
 48 | 
 49 | SL std::vector<std::pair<std::string, std::string>> ParseSingleTermList(
 50 |     const std::string& s, int& i) {
 51 |   std::vector<std::pair<std::string, std::string>> result;
 52 |   const int size = static_cast<int>(std::size(s));
 53 |   for (;;) {
 54 |     while (i < size && std::isspace(s[i])) ++i;
 55 |     if (s[i] == '*') {
 56 |       ++i;
 57 |       while (i < size && std::isspace(s[i])) ++i;
 58 |     }
 59 |     std::string variable = ParseIdentifier(s, i);
 60 |     if (std::empty(variable)) variable = ParseDigList(s, i);
 61 |     if (std::empty(variable)) return result;
 62 |     while (i < size && std::isspace(s[i])) ++i;
 63 |     if (i < size && (s[i] == '^' && !std::isdigit(variable[0]))) {
 64 |       ++i;
 65 |       std::string e = ParseDigList(s, i);
 66 |       result.emplace_back(variable, e);
 67 |     } else {
 68 |       result.emplace_back(variable, "");
 69 |     }
 70 |   }
 71 | }
 72 | 
 73 | template <typename CT>
 74 | SL CT EvaluateDigs(const std::vector<int>& sgns, const std::string& digs,
 75 |                    CT defaultVal = 0) {
 76 |   int s = 1;
 77 |   for (const auto& iter : sgns) {
 78 |     if (iter == -1) s = -s;
 79 |   }
 80 |   CT v = 0;
 81 |   for (const auto& iter : digs) v = v * 10 + iter - '0';
 82 |   if (std::empty(digs)) v = defaultVal;
 83 |   return s == 1 ? v : -v;
 84 | }
 85 | 
 86 | template <typename CT>
 87 | SL std::pair<TermKey, CT> EvaluateSingleTermList(
 88 |     const std::vector<std::pair<std::string, std::string>>& singleTerms) {
 89 |   std::map<std::string, int> t;
 90 |   CT c = 1;
 91 |   for (const auto& iter : singleTerms) {
 92 |     if (std::isdigit(iter.first[0])) {
 93 |       c *= EvaluateDigs<int>({}, iter.first, 1);
 94 |     } else {
 95 |       t[iter.first] += EvaluateDigs<int>({}, iter.second, 1);
 96 |     }
 97 |   }
 98 |   TermKey key;
 99 |   for (const auto& iter : t) key.emplace_back(iter.first, iter.second);
100 |   return {key, c};
101 | }
102 | 
103 | template <typename CT>
104 | SL std::pair<TermKey, CT> ParseTerm(const std::string& s, int& i) {
105 |   auto sgns = ParseSgnList(s, i);
106 |   auto singleTerms = ParseSingleTermList(s, i);
107 |   auto t = EvaluateSingleTermList<CT>(singleTerms);
108 |   auto sgn = EvaluateDigs<CT>(sgns, "", 1);
109 |   return {t.first,
110 |           std::empty(singleTerms) ? CT(0) : (sgn == 1 ? t.second : -t.second)};
111 | }
112 | 
113 | template <typename CT>
114 | SL std::pair<TermKey, CT> ParseTerm(const std::string& s) {
115 |   int i = 0;
116 |   return ParseTerm<CT>(s, i);
117 | }
118 | 
119 | template <typename CT>
120 | SL std::map<TermKey, CT> ParseSymPolyTerms(const std::string& s) {
121 |   const int size = static_cast<int>(std::size(s));
122 |   int i = 0;
123 |   std::map<TermKey, CT> terms;
124 |   for (;;) {
125 |     while (i < size && std::isspace(s[i])) ++i;
126 |     int j = i;
127 |     auto t = ParseTerm<CT>(s, i);
128 |     if (j == i) {
129 |       if (i < size) {
130 |         std::cerr << "Unknown: " << s.substr(i) << std::endl;
131 |       }
132 |       break;
133 |     }
134 |     terms[t.first] += t.second;
135 |   }
136 |   return terms;
137 | }
138 | 
139 | template <typename CT>
140 | SL SymPoly<CT> ParseSymPoly(const std::string& s) {
141 |   return SymPoly<CT>(ParseSymPolyTerms<CT>(s));
142 | }
143 | 
144 | SL TermKey ToTermKey(const std::string& s) {
145 |   int i = 0;
146 |   return ParseTerm<int64>(s, i).first;
147 | }
148 | }


--------------------------------------------------------------------------------
/pe:
--------------------------------------------------------------------------------
 1 | #ifndef PE_
 2 | #define PE_
 3 | 
 4 | // Base
 5 | #include "pe_base"
 6 | #include "pe_type_traits"
 7 | #include "pe_span"
 8 | #include "pe_bit"
 9 | #include "pe_mod"  // Modular arithmetic
10 | #include "pe_int"
11 | #include "pe_extended_int"
12 | #include "pe_float"
13 | #include "pe_vector"
14 | 
15 | // General util
16 | #include "pe_io"
17 | #include "pe_time"
18 | #include "pe_persistance"
19 | #include "pe_tree"
20 | #include "pe_rand"
21 | 
22 | // Range
23 | #include "pe_range"
24 | 
25 | // Matrix arithmetic
26 | #include "pe_mat"
27 | 
28 | // Number theory arithmetic
29 | #include "pe_nt_base"
30 | #include "pe_nt"
31 | 
32 | // Fraction arithmetic
33 | #include "pe_fraction"
34 | 
35 | // Parallel support
36 | #include "pe_parallel"
37 | #include "pe_parallel_algo"
38 | 
39 | // Polynomial
40 | #include "pe_poly_base"
41 | #include "pe_poly_algo"
42 | 
43 | // fft
44 | #include "pe_fft"
45 | 
46 | // Big integer
47 | #include "pe_gbi"
48 | #include "pe_bi32"
49 | #include "pe_mpz"
50 | 
51 | // Geometry
52 | #include "pe_geometry"
53 | 
54 | // Large memory support (windows)
55 | #include "pe_memory"
56 | 
57 | // MP extension
58 | #include "pe_mpf"
59 | 
60 | #include "pe_serialization"
61 | 
62 | // Misc
63 | #include "pe_misc"
64 | #include "pe_mma"
65 | 
66 | #include "pe_array"
67 | #include "pe_ntf"
68 | #include "pe_algo"
69 | #include "pe_sym_poly"
70 | #include "pe_db"
71 | #include "pe_int_algo"
72 | 
73 | #include "pe_dpe"
74 | 
75 | #include "pe_initializer"
76 | 
77 | // Undefined or unspecified behavior
78 | // https://en.cppreference.com/w/cpp/language/extending_std
79 | namespace std {
80 | // Required by
81 | // 1. operator << for std::vector<T>, std::set<T>, std::map<T>, etc.
82 | // 2. pe::int128, pe::uint128, pe::float128 which are non-class types.
83 | using pe::operator<<;
84 | }  // namespace std
85 | 
86 | #endif
87 | 


--------------------------------------------------------------------------------
/pe.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef PE_HPP_
 2 | #define PE_HPP_
 3 | 
 4 | // Use
 5 | // g++ -xc++-header pe.hpp --std=c++20 -fno-diagnostics-color -O3 -march=native
 6 | // -mtune=native -fopenmp -pthread -static
 7 | // to generate pe.hpp.gch
 8 | #include <pe>
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/pe_array:
--------------------------------------------------------------------------------
  1 | #ifndef PE_ARRAY_
  2 | #define PE_ARRAY_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_memory"
  6 | 
  7 | namespace pe {
  8 | template <typename T, int D>
  9 | struct DArrayRef;
 10 | 
 11 | // T: element type
 12 | // D: dimention length
 13 | // A: allocator
 14 | template <typename T, int D, typename A = StdAllocator>
 15 | struct DArray : public DArrayRef<T, D> {
 16 |   template <typename TT, int DD>
 17 |   friend struct DArrayRef;
 18 | 
 19 |   using base = DArrayRef<T, D>;
 20 |   template <class... Args>
 21 |   DArray(const std::vector<int64>& dimension, Args&&... arg)
 22 |       : dimension_(dimension), base(nullptr, 0, nullptr, nullptr) {
 23 |     Init(arg...);
 24 |     PE_ASSERT(std::size(dimension) == D);
 25 |   }
 26 | 
 27 |   DArray() : base(nullptr, 0, nullptr, nullptr) {
 28 |     dimension_ = std::vector<int64>(D, 1);
 29 |     Init();
 30 |     PE_ASSERT(std::size(dimension_) == D);
 31 |   }
 32 | 
 33 |   DArray(const DArray&) = delete;
 34 |   DArray(DArray&&) = delete;
 35 | 
 36 |   DArray& operator=(const DArray&) = delete;
 37 |   DArray& operator=(DArray&&) = delete;
 38 | 
 39 |   ~DArray() { Clear(); }
 40 | 
 41 |   template <class... Args>
 42 |   void Reset(const std::vector<int64>& dimension, Args&&... arg) {
 43 |     Clear();
 44 |     this->dimension_ = dimension;
 45 |     Init(arg...);
 46 |   }
 47 | 
 48 |   template <class... Args>
 49 |   void Init(Args&&... arg) {
 50 |     const int d = static_cast<int>(std::size(dimension_));
 51 |     element_counts_.clear();
 52 |     element_counts_.push_back(1);
 53 |     for (int i = d - 1; i >= 0; --i) {
 54 |       element_counts_.push_back(element_counts_.back() * dimension_[i]);
 55 |     }
 56 |     std::reverse(element_counts_.begin(), element_counts_.end());
 57 |     element_count_ = element_counts_[0];
 58 |     base::dimension_ = &dimension_[0];
 59 |     base::element_counts_ = &element_counts_[0];
 60 |     base::data_ = reinterpret_cast<T*>(A::Allocate(element_count_ * sizeof(T)));
 61 |     for (int64 i = 0; i < element_count_; ++i) {
 62 |       new (base::data_ + i) T(arg...);
 63 |     }
 64 |   }
 65 | 
 66 |   void Clear() {
 67 |     for (int64 i = 0; i < element_count_; ++i) {
 68 |       (base::data_ + i)->~T();
 69 |     }
 70 |     A::Deallocate(base::data_);
 71 |   }
 72 | 
 73 |   T* data() { return base::data_; }
 74 | 
 75 |   DArrayRef<T, D> Ref() {
 76 |     return DArrayRef<T, D>(base::data_, 0, base::dimension_,
 77 |                            base::element_counts_);
 78 |   }
 79 | 
 80 |  private:
 81 |   std::vector<int64> dimension_;
 82 |   std::vector<int64> element_counts_;
 83 |   int64 element_count_;
 84 | };
 85 | 
 86 | template <typename T, int D>
 87 | struct DArrayRef {
 88 |   using ValueType = DArrayRef<T, D - 1>;
 89 |   using ConstValueType = DArrayRef<T, D - 1>;
 90 | 
 91 |   DArrayRef(T* data, int off, const int64* dimension,
 92 |             const int64* element_counts)
 93 |       : data_(data),
 94 |         off_(off),
 95 |         dimension_(dimension),
 96 |         element_counts_(element_counts) {}
 97 | 
 98 |   DArrayRef<T, D - 1> operator[](int64 idx) const {
 99 |     return DArrayRef<T, D - 1>(data_ + idx * element_counts_[off_ + 1],
100 |                                off_ + 1, dimension_, element_counts_);
101 |   }
102 | 
103 |  protected:
104 |   T* data_;
105 |   const int off_;
106 |   const int64* element_counts_;
107 |   const int64* dimension_;
108 | };
109 | 
110 | template <typename T>
111 | struct DArrayRef<T, 1> {
112 |   using ValueType = T&;
113 |   using ConstValueType = const T&;
114 | 
115 |   DArrayRef(T* data, int off, const int64* dimension,
116 |             const int64* element_counts)
117 |       : data_(data),
118 |         off_(off),
119 |         dimension_(dimension),
120 |         element_counts_(element_counts) {}
121 | 
122 |   T& operator[](int64 idx) { return data_[idx]; }
123 |   ConstValueType operator[](int64 idx) const { return data_[idx]; }
124 | 
125 |  protected:
126 |   T* data_;
127 |   const int off_;
128 |   const int64* element_counts_;
129 |   const int64* dimension_;
130 | };
131 | 
132 | template <int64... X>
133 | struct ArrayShape;
134 | 
135 | template <int64 H, int64... X>
136 | struct ArrayShape<H, X...> {
137 |   using Next = ArrayShape<X...>;
138 |   const static int64 D = 1 + Next::D;
139 |   const static int64 EC = H * Next::EC;
140 | };
141 | 
142 | template <int64 H>
143 | struct ArrayShape<H> {
144 |   const static int64 D = 1;
145 |   const static int64 EC = H;
146 | };
147 | 
148 | template <typename T, typename S>
149 | struct FArrayRef;
150 | 
151 | // T: element type
152 | // S: array shape
153 | // A: allocator
154 | template <typename T, typename S, typename A = StdAllocator>
155 | struct FArray : public FArrayRef<T, S> {
156 |   using base = FArrayRef<T, S>;
157 | 
158 |   template <class... Args>
159 |   FArray(Args&&... arg) : base(nullptr) {
160 |     Init(arg...);
161 |   }
162 | 
163 |   FArray(const FArray&) = delete;
164 |   FArray(FArray&&) = delete;
165 | 
166 |   FArray& operator=(const FArray&) = delete;
167 |   FArray& operator=(FArray&&) = delete;
168 | 
169 |   ~FArray() { Clear(); }
170 | 
171 |   template <class... Args>
172 |   void Init(Args&&... arg) {
173 |     base::data_ = reinterpret_cast<T*>(A::Allocate(S::EC * sizeof(T)));
174 |     for (int64 i = 0; i < S::EC; ++i) {
175 |       new (base::data_ + i) T(arg...);
176 |     }
177 |   }
178 | 
179 |   void Clear() {
180 |     for (int64 i = 0; i < S::EC; ++i) {
181 |       (base::data_ + i)->~T();
182 |     }
183 |     A::Deallocate(base::data_);
184 |   }
185 | 
186 |   T* data() { return base::data_; }
187 | 
188 |   FArrayRef<T, S> Ref() { return FArrayRef<T, S>(base::data_); }
189 | };
190 | 
191 | template <typename T, typename S>
192 | struct FArrayRef {
193 |   using NextShape = typename S::Next;
194 |   using ValueType = FArrayRef<T, NextShape>;
195 |   using ConstValueType = FArrayRef<T, NextShape>;
196 | 
197 |   FArrayRef(T* data) : data_(data) {}
198 | 
199 |   FArrayRef<T, NextShape> operator[](int64 idx) const {
200 |     return FArrayRef<T, NextShape>(data_ + idx * NextShape::EC);
201 |   }
202 | 
203 |  protected:
204 |   T* data_;
205 | };
206 | 
207 | template <typename T, int64 H>
208 | struct FArrayRef<T, ArrayShape<H>> {
209 |   using ValueType = T&;
210 |   using ConstValueType = const T&;
211 | 
212 |   FArrayRef(T* data) : data_(data) {}
213 | 
214 |   ValueType operator[](int64 idx) { return data_[idx]; }
215 |   ConstValueType operator[](int64 idx) const { return data_[idx]; }
216 | 
217 |  protected:
218 |   T* data_;
219 | };
220 | 
221 | // T: element type
222 | // X: the count of each dimention
223 | template <typename T, int64... X>
224 | using Array = FArray<T, ArrayShape<X...>>;
225 | 
226 | // T: element type
227 | // A: allocator
228 | // X: the count of each dimention
229 | template <typename T, typename A, int64... X>
230 | using AArray = FArray<T, ArrayShape<X...>, A>;
231 | }  // namespace pe
232 | #endif


--------------------------------------------------------------------------------
/pe_config:
--------------------------------------------------------------------------------
 1 | #ifndef PE_CONFIG_
 2 | #define PE_CONFIG_
 3 | 
 4 | // This file provides a centralized place to configure pe
 5 | 
 6 | // Auto generated by gen_config.py, and you can edit it manually
 7 | 
 8 | // Configuration priority (first match):
 9 | // 1. Compiling command
10 | // 2. The configurations in this file
11 | // 3. The configurations in file pe
12 | 
13 | #ifndef ENABLE_ASSERT
14 | #define ENABLE_ASSERT 1
15 | #endif
16 | 
17 | #ifndef TRY_TO_USE_INT128
18 | #define TRY_TO_USE_INT128 1
19 | #endif
20 | 
21 | #ifndef ENABLE_EIGEN
22 | #define ENABLE_EIGEN 1
23 | #endif
24 | 
25 | #ifndef ENABLE_GMP
26 | #define ENABLE_GMP 1
27 | #endif
28 | 
29 | #ifndef ENABLE_FLINT
30 | #define ENABLE_FLINT 1
31 | #endif
32 | 
33 | #ifndef ENABLE_MPFR
34 | #define ENABLE_MPFR 1
35 | #endif
36 | 
37 | #ifndef ENABLE_LIBBF
38 | #define ENABLE_LIBBF 1
39 | #endif
40 | 
41 | #ifndef ENABLE_NTL
42 | #define ENABLE_NTL 1
43 | #endif
44 | 
45 | #ifndef ENABLE_ZMQ
46 | #define ENABLE_ZMQ 1
47 | #endif
48 | 
49 | #ifndef ENABLE_PRIME_COUNT
50 | #define ENABLE_PRIME_COUNT 1
51 | #endif
52 | 
53 | #ifndef ENABLE_PRIME_SIEVE
54 | #define ENABLE_PRIME_SIEVE 1
55 | #endif
56 | 
57 | #ifndef ENABLE_TCMALLOC
58 | #define ENABLE_TCMALLOC 0
59 | #endif
60 | 
61 | #endif


--------------------------------------------------------------------------------
/pe_float:
--------------------------------------------------------------------------------
  1 | #ifndef PE_FLOAT128_
  2 | #define PE_FLOAT128_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_int"
  6 | 
  7 | namespace pe {
  8 | template <typename T>
  9 | SL int IsNAN(T v) {
 10 |   return 0;
 11 | }
 12 | }  // namespace pe
 13 | 
 14 | #if PE_HAS_FLOAT128
 15 | namespace pe {
 16 | namespace internal {
 17 | SL std::string ToStringFloat128(float128 f, const char* format_string,
 18 |                                 int dig = 20) {
 19 |   char buff[256];
 20 |   const int buff_size = sizeof(buff);
 21 |   int n = quadmath_snprintf(buff, buff_size, format_string, dig, f);
 22 |   if (n < buff_size) {
 23 |     return buff;
 24 |   }
 25 |   n = quadmath_snprintf(NULL, 0, format_string, dig, f);
 26 |   if (n <= -1) {
 27 |     return "";
 28 |   }
 29 |   char* str = static_cast<char*>(malloc(n + 1));
 30 |   std::string result;
 31 |   if (str) {
 32 |     quadmath_snprintf(str, n + 1, format_string, dig, f);
 33 |     result = str;
 34 |   }
 35 |   free(str);
 36 |   return result;
 37 | }
 38 | }  // namespace internal
 39 | 
 40 | SL std::string ToString(float128 f, int dig = 20) {
 41 |   return internal::ToStringFloat128(f, "%#.*Qe", dig);
 42 | }
 43 | 
 44 | SL std::string ToStringF(float128 f, int dig = 20) {
 45 |   return internal::ToStringFloat128(f, "%#.*Qf", dig);
 46 | }
 47 | 
 48 | SL std::string to_string(float128 x, int dig = 20) { return ToString(x); }
 49 | 
 50 | SL std::ostream& operator<<(std::ostream& o, float128 f) {
 51 |   return o << ToString(f, 20);
 52 | }
 53 | 
 54 | SL int IsNAN(float128 v) { return isnanq(v); }
 55 | 
 56 | SL float128 Abs(float128 f) { return fabsq(f); }
 57 | SL float128 FAbs(float128 f) { return fabsq(f); }
 58 | SL float128 Ceil(float128 f) { return ceilq(f); }
 59 | SL float128 Floor(float128 f) { return floorq(f); }
 60 | SL float128 Trunc(float128 f) { return truncq(f); }
 61 | SL float128 Power(float128 f, int p) {
 62 |   return powq(f, static_cast<float128>(p));
 63 | }
 64 | 
 65 | SL float128 Sqrt(float128 f) { return sqrtq(f); }
 66 | SL float128 Cos(float128 f) { return cosq(f); }
 67 | SL float128 Sin(float128 f) { return sinq(f); }
 68 | SL float128 Exp(float128 f) { return expq(f); }
 69 | SL float128 Log(float128 f) { return logq(f); }
 70 | SL float128 Log10(float128 f) { return log10q(f); }
 71 | }  // namespace pe
 72 | 
 73 | #endif
 74 | 
 75 | namespace pe {
 76 | namespace internal {
 77 | template <typename T>
 78 | SL std::string ToStringFloat(T f, const char* format_string, int dig = 20) {
 79 |   char buff[256];
 80 |   const int buff_size = sizeof(buff);
 81 |   int n = snprintf(buff, buff_size, format_string, dig, f);
 82 |   if (n < buff_size) {
 83 |     return buff;
 84 |   }
 85 |   n = snprintf(NULL, 0, format_string, dig, f);
 86 |   if (n <= -1) {
 87 |     return "";
 88 |   }
 89 |   char* str = static_cast<char*>(malloc(n + 1));
 90 |   std::string result;
 91 |   if (str) {
 92 |     snprintf(str, n + 1, format_string, dig, f);
 93 |     result = str;
 94 |   }
 95 |   free(str);
 96 |   return result;
 97 | }
 98 | }  // namespace internal
 99 | 
100 | SL std::string ToString(float f, int dig = 20) {
101 |   return internal::ToStringFloat<float>(f, "%#.*e", dig);
102 | }
103 | 
104 | SL std::string ToString(double f, int dig = 20) {
105 |   return internal::ToStringFloat<double>(f, "%#.*e", dig);
106 | }
107 | 
108 | SL std::string ToString(long double f, int dig = 20) {
109 |   return internal::ToStringFloat<long double>(f, "%#.*Le", dig);
110 | }
111 | 
112 | SL std::string ToStringF(float f, int dig = 20) {
113 |   return internal::ToStringFloat<float>(f, "%#.*f", dig);
114 | }
115 | 
116 | SL std::string ToStringF(double f, int dig = 20) {
117 |   return internal::ToStringFloat<double>(f, "%#.*f", dig);
118 | }
119 | 
120 | SL std::string ToStringF(long double f, int dig = 20) {
121 |   return internal::ToStringFloat<long double>(f, "%#.*Lf", dig);
122 | }
123 | 
124 | template <typename T>
125 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(int)
126 |     IsNAN(T v) {
127 |   return std::isnan(v);
128 | }
129 | 
130 | template <typename T>
131 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Abs(T f) {
132 |   return std::fabs(f);
133 | }
134 | 
135 | template <typename T>
136 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) FAbs(T f) {
137 |   return std::fabs(f);
138 | }
139 | 
140 | template <typename T>
141 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Floor(T f) {
142 |   return std::floor(f);
143 | }
144 | 
145 | template <typename T>
146 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Ceil(T f) {
147 |   return std::ceil(f);
148 | }
149 | 
150 | template <typename T>
151 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Trunc(T f) {
152 |   return std::trunc(f);
153 | }
154 | 
155 | template <typename T>
156 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T)
157 |     Power(T f, int p) {
158 |   return std::pow(f, static_cast<T>(p));
159 | }
160 | 
161 | template <typename T>
162 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Sqrt(T f) {
163 |   return std::sqrt(f);
164 | }
165 | 
166 | template <typename T>
167 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Cos(T f) {
168 |   return std::cos(f);
169 | }
170 | 
171 | template <typename T>
172 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Sin(T f) {
173 |   return std::sin(f);
174 | }
175 | 
176 | template <typename T>
177 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Exp(T f) {
178 |   return std::exp(f);
179 | }
180 | 
181 | template <typename T>
182 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Log(T f) {
183 |   return std::log(f);
184 | }
185 | 
186 | template <typename T>
187 | SL REQUIRES((is_one_of_v<T, float, double, long double>)) RETURN(T) Log10(T f) {
188 |   return std::log10(f);
189 | }
190 | }  // namespace pe
191 | 
192 | #if PE_HAS_CPP20
193 | namespace pe {
194 | template <class T>
195 | concept PeFloatUtil = requires(T v) {
196 |   requires PeComparable<T>;
197 |   Abs(v);
198 |   FAbs(v);
199 |   Floor(v);
200 |   Ceil(v);
201 |   Trunc(v);
202 | 
203 |   Power(v, 0);
204 |   Sqrt(v);
205 | 
206 |   ToString(v);
207 |   to_string(v);
208 | 
209 |   std::cout << v;
210 | };
211 | 
212 | template <class T>
213 | concept PeRichFloatUtil = requires(T v) {
214 |   requires PeFloatUtil<T>;
215 |   Cos(v);
216 |   Sin(v);
217 |   Exp(v);
218 |   Log(v);
219 |   Log10(v);
220 | };
221 | 
222 | static_assert(PeRichFloatUtil<float>);
223 | static_assert(PeRichFloatUtil<double>);
224 | static_assert(PeRichFloatUtil<long double>);
225 | 
226 | #if PE_HAS_FLOAT128
227 | static_assert(PeRichFloatUtil<float128>);
228 | #endif
229 | }  // namespace pe
230 | #endif
231 | 
232 | #endif
233 | 


--------------------------------------------------------------------------------
/pe_gbi:
--------------------------------------------------------------------------------
  1 | #ifndef PE_GBI_
  2 | #define PE_GBI_
  3 | 
  4 | // General Big integer.
  5 | 
  6 | #include "pe_base"
  7 | #include "pe_type_traits"
  8 | #include "pe_int"
  9 | #include "pe_nt"
 10 | #include "pe_fraction"
 11 | 
 12 | namespace pe {
 13 | 
 14 | template <typename T>
 15 | SL REQUIRES((is_gbi_v<T>)) RETURN(int) BitWidth(const T& x) {
 16 |   return x.BitWidth();
 17 | }
 18 | 
 19 | template <typename T>
 20 | SL REQUIRES((is_gbi_v<T>)) RETURN(int) Popcount(const T& x) {
 21 |   return x.Popcount();
 22 | }
 23 | 
 24 | template <typename T>
 25 | SL REQUIRES((is_gbi_v<T>)) RETURN(void) SetBit(T& x, int idx) {
 26 |   x.SetBit(idx);
 27 | }
 28 | 
 29 | template <typename T>
 30 | SL REQUIRES((is_gbi_v<T>)) RETURN(void) ResetBit(T& x, int idx) {
 31 |   x.ResetBit(idx);
 32 | }
 33 | 
 34 | template <typename T>
 35 | SL REQUIRES((is_gbi_v<T>)) RETURN(int) GetBit(const T& x, int idx) {
 36 |   return x.GetBit(idx);
 37 | }
 38 | 
 39 | template <typename T>
 40 | SL REQUIRES((is_gbi_v<T>)) RETURN(void) RevBit(T& x, int idx) {
 41 |   x.RevBit(idx);
 42 | }
 43 | 
 44 | template <typename T>
 45 | SL REQUIRES((is_gbi_v<T>)) RETURN(int) IsZero(const T& x) {
 46 |   return x.IsZero();
 47 | }
 48 | 
 49 | template <typename T>
 50 | SL REQUIRES((is_gbi_v<T>)) RETURN(int) IntSign(const T& x) {
 51 |   return x.IntSign();
 52 | }
 53 | 
 54 | template <typename T>
 55 | SL REQUIRES((is_gbi_v<T>)) RETURN(int) IsEven(const T& x) {
 56 |   return x.IsEven();
 57 | }
 58 | 
 59 | template <typename T>
 60 | SL REQUIRES((is_gbi_v<T>)) RETURN(int) IsOdd(const T& x) {
 61 |   return x.IsOdd();
 62 | }
 63 | 
 64 | template <typename T0, typename T1>
 65 | SL REQUIRES((is_gbi_v<T0> && is_gbi_v<T1>)) RETURN(int)
 66 |     SameParity(const T0& v0, const T1& v1) {
 67 |   return IsEven(v0) == IsEven(v1);
 68 | }
 69 | 
 70 | template <typename T>
 71 | SL REQUIRES((is_gbi_v<T>)) RETURN(uint32) LowerBits(const T& x) {
 72 |   return x.LowerBits();
 73 | }
 74 | 
 75 | template <typename T>
 76 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) Abs(const T& n) {
 77 |   return IntSign(n) >= 0 ? n : -n;
 78 | }
 79 | 
 80 | template <typename T>
 81 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) FAbs(const T& n) {
 82 |   return IntSign(n) >= 0 ? n : -n;
 83 | }
 84 | 
 85 | template <typename T, typename TN>
 86 | SL REQUIRES((is_gbi_v<T> && is_builtin_integer_v<TN>)) RETURN(T)
 87 |     Power(T x, TN n) {
 88 |   PE_ASSERT(n >= 0);
 89 |   T ret = 1;
 90 |   for (; IntSign(n) > 0; n >>= 1) {
 91 |     if (n & 1) ret = ret * x;
 92 |     if (n > 1) x = x * x;
 93 |   }
 94 |   return ret;
 95 | }
 96 | 
 97 | template <typename T>
 98 | SL REQUIRES((is_gbi_v<T>)) RETURN(std::tuple<T, T>)
 99 |     Div(const T& a, const T& b) {
100 |   return {a / b, a % b};
101 | }
102 | 
103 | template <typename T>
104 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) FloorDiv(const T& a, const T& b) {
105 |   const int sign_a = IntSign(a);
106 |   const int sign_b = IntSign(b);
107 |   if (sign_a > 0 && sign_b > 0) {
108 |     return a / b;
109 |   }
110 |   if (sign_a < 0 && sign_b < 0) {
111 |     return a / b;
112 |   }
113 |   T q = a / b;
114 |   if (((sign_a < 0) != (sign_b < 0)) && !IsZero(a % b)) {
115 |     --q;
116 |   }
117 |   return q;
118 | }
119 | 
120 | template <typename T>
121 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) CeilDiv(const T& a, const T& b) {
122 |   const int sign_a = IntSign(a);
123 |   const int sign_b = IntSign(b);
124 |   if (sign_a > 0 && sign_b > 0) {
125 |     return 1 + (a - 1) / b;
126 |   }
127 |   if (sign_a < 0 && sign_b < 0) {
128 |     return 1 + (a + 1) / b;
129 |   }
130 |   T q = a / b;
131 |   if (((sign_a < 0) == (sign_b < 0)) && !IsZero(a % b)) {
132 |     ++q;
133 |   }
134 |   return q;
135 | }
136 | 
137 | template <typename T, typename TM>
138 | SL REQUIRES((is_gbi_v<T>)) RETURN(int_promotion_t<T, TM>)
139 |     Mod(const T& a, TM mod) {
140 |   if (IntSign(a) >= 0) {
141 |     return a >= mod ? a % mod : a;
142 |   } else {
143 |     T tmp = a % mod;
144 |     return IntSign(tmp) < 0 ? tmp + mod : tmp;
145 |   }
146 | }
147 | 
148 | template <typename T, typename TM>
149 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) AddMod(T a, const T& b, TM mod) {
150 |   a += b;
151 |   if (a >= mod) {
152 |     a -= mod;
153 |   }
154 |   return a;
155 | }
156 | 
157 | template <typename T, typename TM>
158 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) SubMod(T a, const T& b, TM mod) {
159 |   a -= b;
160 |   if (IntSign(a) < 0) {
161 |     a += mod;
162 |   }
163 |   return a;
164 | }
165 | 
166 | template <typename T, typename TM>
167 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) MulMod(const T& a, const T& b, TM mod) {
168 |   return a * b % mod;
169 | }
170 | 
171 | template <typename T, typename TN>
172 | SL REQUIRES((is_gbi_v<T> && is_builtin_integer_v<TN>)) RETURN(T)
173 |     PowerMod(T x, TN n, const T& mod) {
174 |   PE_ASSERT(n >= 0);
175 |   if (mod == 1) {
176 |     return 0;
177 |   }
178 |   T ret = 1;
179 |   x %= mod;
180 |   for (; n > 0; n >>= 1) {
181 |     if (n & 1) ret = ret * x % mod;
182 |     if (n > 1) x = x * x % mod;
183 |   }
184 |   return ret;
185 | }
186 | 
187 | template <typename T, typename TN, typename TM>
188 | SL REQUIRES((is_gbi_v<T> && is_builtin_integer_v<TN> &&
189 |              is_builtin_integer_v<TM>)) RETURN(T) PowerMod(T x, TN n, TM mod) {
190 |   PE_ASSERT(n >= 0);
191 |   if (mod == 1) {
192 |     return 0;
193 |   }
194 |   T ret = 1;
195 |   x %= mod;
196 |   for (; n > 0; n >>= 1) {
197 |     if (n & 1) ret = ret * x % mod;
198 |     if (n > 1) x = x * x % mod;
199 |   }
200 |   return ret;
201 | }
202 | 
203 | template <typename T>
204 | SL REQUIRES((is_gbi_v<T>)) RETURN(T) PowerMod(T x, const T& n, const T& mod) {
205 |   PE_ASSERT(IntSign(n) >= 0);
206 |   if (mod == 1) {
207 |     return 0;
208 |   }
209 |   T ret = 1;
210 |   x %= mod;
211 |   const int bit_width = BitWidth(n);
212 |   for (int i = 0; i < bit_width; ++i) {
213 |     if (GetBit(n, i)) ret = ret * x % mod;
214 |     if (i + 1 < bit_width) x = x * x % mod;
215 |   }
216 |   return ret;
217 | }
218 | 
219 | template <typename TT, typename T>
220 | SL REQUIRES((is_gbi_v<T>)) RETURN(TT) ToInt(const T& x) {
221 |   return x.template ToInt<TT>();
222 | }
223 | 
224 | template <typename TT, typename T>
225 | SL REQUIRES((is_gbi_v<T>)) RETURN(TT) ToFloat(const T& x) {
226 |   return x.template ToFloat<TT>();
227 | }
228 | 
229 | template <typename T>
230 | SL REQUIRES((is_gbi_v<T>)) RETURN(std::string) ToString(const T& x) {
231 |   return x.ToString();
232 | }
233 | 
234 | template <typename T>
235 | SL REQUIRES((is_gbi_v<T>)) RETURN(std::string) to_string(const T& x) {
236 |   return x.ToString();
237 | }
238 | 
239 | #if PE_HAS_CPP20
240 | template <class T>
241 | concept PeGbi = requires(T v) {
242 |   requires static_cast<bool>(is_gbi_v<T>);
243 |   requires PeArithmeticOperation<T>;
244 |   requires PeSelfOperation<T>;
245 |   requires PeComparable<T>;
246 |   requires PeIntegerUtil<T>;
247 | };
248 | #endif
249 | }  // namespace pe
250 | #endif
251 | 


--------------------------------------------------------------------------------
/pe_initializer:
--------------------------------------------------------------------------------
  1 | #ifndef PE_INITIALIZER_
  2 | #define PE_INITIALIZER_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_mod"
  6 | #include "pe_nt"
  7 | #include "pe_fft"
  8 | #include "pe_parallel"
  9 | 
 10 | namespace pe {
 11 | struct PeInitializer {
 12 |   PeInitializer& set_max_prime(int64 maxp = 1000000) {
 13 |     this->maxp = maxp;
 14 |     return *this;
 15 |   }
 16 | 
 17 |   PeInitializer& set_cal_phi(int cal_phi = 1) {
 18 |     this->cal_phi = cal_phi;
 19 |     return *this;
 20 |   }
 21 | 
 22 |   PeInitializer& set_cal_mu(int cal_mu = 1) {
 23 |     this->cal_mu = cal_mu;
 24 |     return *this;
 25 |   }
 26 | 
 27 |   PeInitializer& set_cal_rad(int cal_rad = 1) {
 28 |     this->cal_rad = cal_rad;
 29 |     return *this;
 30 |   }
 31 | 
 32 |   PeInitializer& set_cal_sigma0(int cal_sigma0 = 1) {
 33 |     this->cal_sigma0 = cal_sigma0;
 34 |     return *this;
 35 |   }
 36 | 
 37 |   PeInitializer& set_cal_sigma1(int cal_sigma1 = 1) {
 38 |     this->cal_sigma1 = cal_sigma1;
 39 |     return *this;
 40 |   }
 41 | 
 42 |   PeInitializer& set_fft_k(int fft_k = 22) {
 43 |     this->fft_k = fft_k;
 44 |     return *this;
 45 |   }
 46 | 
 47 |   PeInitializer& set_ntt32_k(int ntt32_k = 22) {
 48 |     this->ntt32_k = ntt32_k;
 49 |     return *this;
 50 |   }
 51 | 
 52 |   PeInitializer& set_ntt64_k(int ntt64_k = 22) {
 53 |     this->ntt64_k = ntt64_k;
 54 |     return *this;
 55 |   }
 56 | 
 57 |   PeInitializer& set_mod(int64 default_mod = 1000000007) {
 58 |     this->default_mod = default_mod;
 59 |     return *this;
 60 |   }
 61 | 
 62 |   void Init() {
 63 |     InitNt();
 64 |     InitParallel();
 65 |     InitFft(fft_k);
 66 |     InitNtt32(ntt32_k);
 67 |     InitNtt64(ntt64_k);
 68 |     DefaultMod::Set(default_mod);
 69 |   }
 70 | 
 71 |   void InitNt() {
 72 |     DeinitPrimes();
 73 |     InitMaxp(maxp);
 74 |     if (cal_phi == 0 && cal_mu == 0 && cal_rad == 0 && cal_sigma0 == 0 &&
 75 |         cal_sigma1 == 0) {
 76 |       InitPrimes();
 77 |     } else {
 78 |       InitPrimes(cal_phi, cal_mu, cal_rad, cal_sigma0, cal_sigma1);
 79 |     }
 80 |   }
 81 | 
 82 |   void InitParallel() {
 83 | #if ENABLE_OPENMP
 84 |     omp_set_nested(1);
 85 |     omp_set_dynamic(1);
 86 | #if !defined(COMPILER_MSVC)
 87 |     omp_set_max_active_levels(7);
 88 | #endif
 89 | #endif
 90 | 
 91 | #if OS_TYPE_WIN
 92 |     // SetProcessPriority(PRIORITY_IDLE);
 93 | #endif
 94 | 
 95 | #if ENABLE_FLINT
 96 |     flint_set_num_threads(8);
 97 | #endif
 98 |   }
 99 | 
100 |   void InitFft(int k = 22) {
101 |     if (k >= 0) {
102 |       fft::InitFftK(k);
103 |     }
104 |   }
105 | 
106 |   void InitNtt32(int k = 22) {
107 | #if defined(HAS_POLY_MUL_NTT32) && HAS_POLY_MUL_NTT32
108 |     if (k >= 0) {
109 |       ntt32::InitNtt(k);
110 |     }
111 | #endif
112 |   }
113 | 
114 |   void InitNtt64(int k = 22) {
115 | #if defined(HAS_POLY_MUL_NTT64) && HAS_POLY_MUL_NTT64
116 |     if (k >= 0) {
117 |       ntt64::InitNtt(k);
118 |     }
119 | #endif
120 |   }
121 | 
122 |   int64 maxp = 1000000;
123 |   int cal_phi = 0;
124 |   int cal_mu = 0;
125 |   int cal_rad = 0;
126 |   int cal_sigma0 = 0;
127 |   int cal_sigma1 = 0;
128 | 
129 |   int fft_k = -1;
130 |   int ntt32_k = -1;
131 |   int ntt64_k = -1;
132 | 
133 |   int64 default_mod = 1;
134 | };
135 | }  // namespace pe
136 | 
137 | #define PE_INIT(...) (pe::PeInitializer{PE_ADD_DOT(__VA_ARGS__)}).Init()
138 | #endif
139 | 


--------------------------------------------------------------------------------
/pe_io:
--------------------------------------------------------------------------------
  1 | #ifndef PE_IO_
  2 | #define PE_IO_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_type_traits"
  6 | 
  7 | namespace pe {
  8 | template <typename T>
  9 | SL T ReadInt() {
 10 |   struct X {
 11 |     int dig[256];
 12 |     X() {
 13 |       for (int i = '0'; i <= '9'; ++i) dig[i] = 1;
 14 |       dig['-'] = 1;
 15 |     }
 16 |   };
 17 |   static X f;
 18 |   T v = 0;
 19 |   int s = 1, c;
 20 |   for (; !f.dig[c = getchar()];) {
 21 |     ;
 22 |   }
 23 |   if (c == '-') {
 24 |     s = 0;
 25 |   } else if (f.dig[c]) {
 26 |     v = c ^ 48;
 27 |   }
 28 |   for (; f.dig[c = getchar()]; v = v * 10 + (c ^ 48)) {
 29 |     ;
 30 |   }
 31 |   return s ? v : -v;
 32 | }
 33 | 
 34 | #define Rint read_int<int>
 35 | 
 36 | template <typename T>
 37 | SL T ReadValue() {
 38 |   T v;
 39 |   std::cin >> v;
 40 |   return v;
 41 | }
 42 | 
 43 | template <>
 44 | inline int ReadValue<int>() {
 45 |   return ReadInt<int>();
 46 | }
 47 | 
 48 | template <>
 49 | inline int64 ReadValue<int64>() {
 50 |   return ReadInt<int64>();
 51 | }
 52 | 
 53 | #if PE_HAS_INT128
 54 | template <>
 55 | inline int128 ReadValue<int128>() {
 56 |   return ReadInt<int128>();
 57 | }
 58 | #endif
 59 | 
 60 | #define DV_IMPL_1(I, V1) V1 = I
 61 | #define DV_IMPL_2(I, V1, ...) V1 = I, DV_IMPL_1(I, __VA_ARGS__)
 62 | #define DV_IMPL_3(I, V1, ...) V1 = I, DV_IMPL_2(I, __VA_ARGS__)
 63 | #define DV_IMPL_4(I, V1, ...) V1 = I, DV_IMPL_3(I, __VA_ARGS__)
 64 | #define DV_IMPL_5(I, V1, ...) V1 = I, DV_IMPL_4(I, __VA_ARGS__)
 65 | #define DV_IMPL_6(I, V1, ...) V1 = I, DV_IMPL_5(I, __VA_ARGS__)
 66 | #define DV_IMPL_7(I, V1, ...) V1 = I, DV_IMPL_6(I, __VA_ARGS__)
 67 | #define DV_IMPL_8(I, V1, ...) V1 = I, DV_IMPL_7(I, __VA_ARGS__)
 68 | #define DV_IMPL_9(I, V1, ...) V1 = I, DV_IMPL_8(I, __VA_ARGS__)
 69 | #define DV_IMPL_10(I, V1, ...) V1 = I, DV_IMPL_9(I, __VA_ARGS__)
 70 | #define DV_IMPL_11(I, V1, ...) V1 = I, DV_IMPL_10(I, __VA_ARGS__)
 71 | #define DV_IMPL_12(I, V1, ...) V1 = I, DV_IMPL_11(I, __VA_ARGS__)
 72 | #define DV_IMPL_13(I, V1, ...) V1 = I, DV_IMPL_12(I, __VA_ARGS__)
 73 | #define DV_IMPL_14(I, V1, ...) V1 = I, DV_IMPL_13(I, __VA_ARGS__)
 74 | #define DV_IMPL_15(I, V1, ...) V1 = I, DV_IMPL_14(I, __VA_ARGS__)
 75 | #define DV_IMPL_16(I, V1, ...) V1 = I, DV_IMPL_15(I, __VA_ARGS__)
 76 | 
 77 | #define DV_IMPL(n, input, ...) PE_CONCAT(DV_IMPL_, n)(input, __VA_ARGS__)
 78 | 
 79 | #define DV(T, ...) \
 80 |   T DV_IMPL(PE_NARG(__VA_ARGS__), pe::ReadValue<T>(), __VA_ARGS__)
 81 | #define DVC(T, ...) \
 82 |   const T DV_IMPL(PE_NARG(__VA_ARGS__), pe::ReadValue<T>(), __VA_ARGS__)
 83 | }  // namespace pe
 84 | 
 85 | namespace pe {
 86 | 
 87 | #if OS_TYPE_WIN && ENABLE_CLIPBOARD
 88 | SL bool CopyStringToClipboard(const std::string& s) {
 89 |   OpenClipboard(nullptr);
 90 |   EmptyClipboard();
 91 |   HGLOBAL hg = GlobalAlloc(GMEM_MOVEABLE, std::size(s) + 1);
 92 |   if (!hg) {
 93 |     CloseClipboard();
 94 |     return false;
 95 |   }
 96 |   memcpy(GlobalLock(hg), s.c_str(), std::size(s));
 97 |   GlobalUnlock(hg);
 98 |   SetClipboardData(CF_TEXT, hg);
 99 |   CloseClipboard();
100 |   GlobalFree(hg);
101 |   return true;
102 | }
103 | 
104 | SL int PromptYesAndNo(const std::string& title, const std::string& content) {
105 |   return MessageBoxA(nullptr, content.c_str(), title.c_str(), MB_OKCANCEL) == 1;
106 | }
107 | 
108 | SL void PromptAnswer(const std::string& str) {
109 |   std::cout << "Answer:" << std::endl << str << std::endl;
110 |   if (PromptYesAndNo("Copy to clipboard?", str)) {
111 |     CopyStringToClipboard(str);
112 |   }
113 | }
114 | 
115 | #else
116 | SL void PromptAnswer(const std::string& str) {
117 |   std::cout << "Answer:" << std::endl << str << std::endl;
118 | }
119 | #endif
120 | 
121 | template <typename T>
122 | SL REQUIRES((is_general_integer_v<T>)) RETURN(void) PromptAnswer(const T& v) {
123 |   PromptAnswer(ToString(v));
124 | }
125 | 
126 | template <typename... T>
127 | SL void PromptAnswer(const std::string& format, T... v) {
128 |   char buff[1024];
129 |   sprintf(buff, format.c_str(), v...);
130 |   PromptAnswer(buff);
131 | }
132 | 
133 | struct AnswerPrompter {
134 |   template <typename... T>
135 |   AnswerPrompter& operator()(const std::string& format, T... v) {
136 |     char buff[1024];
137 |     sprintf(buff, format.c_str(), v...);
138 |     PromptAnswer(buff);
139 |     return *this;
140 |   }
141 | };
142 | 
143 | template <typename T>
144 | SL REQUIRES((is_general_integer_v<T>))
145 |     RETURN(AnswerPrompter&) operator<<(AnswerPrompter& ap, T v) {
146 |   PromptAnswer(ToString(v));
147 |   return ap;
148 | }
149 | 
150 | template <typename T>
151 | SL REQUIRES((!is_general_integer_v<T>))
152 |     RETURN(AnswerPrompter&) operator<<(AnswerPrompter& ap, T v) {
153 |   std::stringstream ss;
154 |   ss << v;
155 |   PromptAnswer(v);
156 |   return ap;
157 | }
158 | 
159 | static AnswerPrompter ap;
160 | }  // namespace pe
161 | #endif
162 | 


--------------------------------------------------------------------------------
/pe_memory:
--------------------------------------------------------------------------------
 1 | #ifndef PE_MEMORY_
 2 | #define PE_MEMORY_
 3 | 
 4 | #include "pe_base"
 5 | 
 6 | namespace pe {
 7 | #if OS_TYPE_WIN
 8 | struct LargeMemory {
 9 |  public:
10 |   LargeMemory() = default;
11 | 
12 |   ~LargeMemory() {
13 |     std::vector<void*> vec;
14 |     for (auto [p, _] : allocated_) {
15 |       vec.push_back(p);
16 |     }
17 | 
18 |     for (auto p : vec) Deallocate(p);
19 |   }
20 | 
21 |   void* Allocate(int64 size) {
22 |     HANDLE hMapFile = ::CreateFileMapping(
23 |         INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE | SEC_COMMIT, size >> 32,
24 |         size % (1LL << 32), nullptr);
25 |     assert(GetLastError() == 0);
26 | 
27 |     void* ptr = ::MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, 0);
28 |     assert(GetLastError() == 0);
29 |     allocated_.insert({ptr, hMapFile});
30 | 
31 |     return ptr;
32 |   }
33 | 
34 |   void Deallocate(void* ptr) {
35 |     auto where = allocated_.find(ptr);
36 |     if (where == allocated_.end()) return;
37 | 
38 |     ::UnmapViewOfFile(ptr);
39 |     ::CloseHandle(where->second);
40 |     allocated_.erase(where);
41 |   }
42 | 
43 |  private:
44 |   std::map<void*, HANDLE> allocated_;
45 | };
46 | 
47 | SL LargeMemory& LmAllocator() {
48 |   static LargeMemory __lm;
49 |   return __lm;
50 | }
51 | 
52 | struct LmAllocator {
53 |   static void* Allocate(int64 size) { return LmAllocator().Allocate(size); }
54 | 
55 |   static void Deallocate(void* ptr) { LmAllocator().Deallocate(ptr); }
56 | };
57 | 
58 | SL void* LmAllocate(int64 size) { return LmAllocator::Allocate(size); }
59 | 
60 | SL void LmDeallocate(void* ptr) { LmAllocator::Deallocate(ptr); }
61 | #endif
62 | 
63 | SL void* StdAllocate(int64 size) { return new char[size]; }
64 | 
65 | SL void StdDeallocate(void* ptr) { delete[] reinterpret_cast<char*>(ptr); }
66 | 
67 | struct StdAllocator {
68 |   static void* Allocate(int64 size) { return new char[size]; }
69 |   static void Deallocate(void* ptr) { delete[] reinterpret_cast<char*>(ptr); }
70 | };
71 | }  // namespace pe
72 | #endif
73 | 


--------------------------------------------------------------------------------
/pe_parallel:
--------------------------------------------------------------------------------
 1 | #ifndef PE_PARALLEL_
 2 | #define PE_PARALLEL_
 3 | 
 4 | #include "pe_base"
 5 | #include "pe_mod"
 6 | #include "pe_time"
 7 | #include "pe_persistance"
 8 | 
 9 | #if OS_TYPE_WIN
10 | 
11 | namespace pe {
12 | 
13 | enum {
14 |   PRIORITY_REALTIME = REALTIME_PRIORITY_CLASS,
15 |   PRIORITY_HIGH = HIGH_PRIORITY_CLASS,
16 |   PRIORITY_ABOVE_NORMAL = ABOVE_NORMAL_PRIORITY_CLASS,
17 |   PRIORITY_NORMAL = NORMAL_PRIORITY_CLASS,
18 |   PRIORITY_BELOW_NORMAL = BELOW_NORMAL_PRIORITY_CLASS,
19 |   PRIORITY_BACKGROUND = 0x00100000,  // PROCESS_MODE_BACKGROUND_BEGIN,
20 |   PRIORITY_IDLE = IDLE_PRIORITY_CLASS,
21 | };
22 | 
23 | static inline void SetProcessPriority(int priority) {
24 |   ::SetPriorityClass(::GetCurrentProcess(), priority);
25 | }
26 | 
27 | static inline void MakeSureProcessSingleton(const char* id) {
28 |   std::string mutex_name = "pe_mutex_prefix_";
29 |   mutex_name += id;
30 |   HANDLE hMutex = ::OpenMutex(MUTEX_ALL_ACCESS, FALSE, mutex_name.c_str());
31 |   if (hMutex) {
32 |     fprintf(stderr, "another process is running\n");
33 |     ::CloseHandle(hMutex);
34 |     exit(-1);
35 |     return;
36 |   }
37 |   hMutex = ::CreateMutex(nullptr, TRUE, mutex_name.c_str());
38 |   if (::GetLastError() == ERROR_ALREADY_EXISTS) {
39 |     fprintf(stderr, "another process is running\n");
40 |     ::CloseHandle(hMutex);
41 |     exit(-1);
42 |     return;
43 |   }
44 | }
45 | 
46 | }  // namespace pe
47 | #endif  // end OS_TYPE_WIN
48 | 
49 | #if ENABLE_OPENMP
50 | namespace pe {
51 | class OmpLock {
52 |  public:
53 |   OmpLock() { omp_init_lock(&locker_); }
54 |   ~OmpLock() { omp_destroy_lock(&locker_); }
55 |   OmpLock(const OmpLock&) = delete;
56 |   OmpLock& operator=(const OmpLock&) = delete;
57 |   void lock() { omp_set_lock(&locker_); }
58 |   void unlock() { omp_unset_lock(&locker_); }
59 | 
60 |  private:
61 |   omp_lock_t locker_;
62 | };
63 | using OmpGuard = std::lock_guard<pe::OmpLock>;
64 | }  // namespace pe
65 | #endif
66 | #endif
67 | 


--------------------------------------------------------------------------------
/pe_persistance:
--------------------------------------------------------------------------------
1 | #ifndef PE_PERSISTANCE_
2 | #define PE_PERSISTANCE_
3 | 
4 | #endif
5 | 


--------------------------------------------------------------------------------
/pe_poly_base_gmp:
--------------------------------------------------------------------------------
  1 | #ifndef PE_POLY_BASE_GMP_
  2 | #define PE_POLY_BASE_GMP_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_type_traits"
  6 | #include "pe_poly_base_common"
  7 | 
  8 | #if ENABLE_GMP
  9 | 
 10 | #define HAS_POLY_MUL_GMP 1
 11 | 
 12 | namespace pe {
 13 | namespace gmp {
 14 | 
 15 | namespace bn_poly_mul {
 16 | namespace internal {
 17 | template <typename T>
 18 | SL REQUIRES((is_builtin_integer_v<T>)) RETURN(void)
 19 |     InitAsMpz(mpz_t a, const T* X, const int64 n, const int64 need_limb,
 20 |               int64 mod) {
 21 |   mpz_setbit(a,
 22 |              static_cast<mp_bitcnt_t>(need_limb * n * sizeof(mp_limb_t) * 8));
 23 |   mpz_ptr ptr = a;
 24 |   if (mod > 0) {
 25 |     for (int i = 0; i < n; ++i) {
 26 |       ptr->_mp_d[i * need_limb] = Mod(X[i], mod);
 27 |     }
 28 |   } else {
 29 |     if (sizeof(T) <= sizeof(mp_limb_t)) {
 30 |       for (int i = 0; i < n; ++i) {
 31 |         ptr->_mp_d[i * need_limb] = X[i];
 32 |       }
 33 |     } else {
 34 |       for (int i = 0; i < n; ++i) {
 35 |         *reinterpret_cast<T*>(ptr->_mp_d + i * need_limb) = X[i];
 36 |       }
 37 |     }
 38 |   }
 39 |   mpz_clrbit(a,
 40 |              static_cast<mp_bitcnt_t>(need_limb * n * sizeof(mp_limb_t) * 8));
 41 | }
 42 | 
 43 | template <typename T>
 44 | SL REQUIRES((is_extended_integer_v<T>)) RETURN(void)
 45 |     InitAsMpz(mpz_t a, const T* X, const int64 n, const int64 need_limb,
 46 |               int64 mod) {
 47 |   mpz_setbit(a, need_limb * n * sizeof(mp_limb_t) * 8);
 48 |   mpz_ptr ptr = a;
 49 |   if (mod > 0) {
 50 |     for (int i = 0; i < n; ++i) {
 51 |       ptr->_mp_d[i * need_limb] = ToInt<mp_limb_t>(Mod(X[i], mod));
 52 |     }
 53 |   } else {
 54 |     if (sizeof(T) <= sizeof(mp_limb_t)) {
 55 |       for (int i = 0; i < n; ++i) {
 56 |         ptr->_mp_d[i * need_limb] = ToInt<mp_limb_t>(X[i]);
 57 |       }
 58 |     } else {
 59 |       for (int i = 0; i < n; ++i) {
 60 |         *reinterpret_cast<T*>(ptr->_mp_d + i * need_limb) = X[i];
 61 |       }
 62 |     }
 63 |   }
 64 |   mpz_clrbit(a, need_limb * n * sizeof(mp_limb_t) * 8);
 65 | }
 66 | 
 67 | template <typename T>
 68 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 69 |     PolyMulImpl(const T* X, const int64 n, const T* Y, const int64 m, T* result,
 70 |                 int64 mod) {
 71 |   const int64 mod_bit = mod == 0 ? sizeof(T) * 8 : HighestBitIndex(mod) + 1;
 72 |   const int64 need_bit = mod_bit * 2 + HighestBitIndex(std::min(n, m)) + 1;
 73 |   const int64 need_limb =
 74 |       (need_bit + sizeof(mp_limb_t) * 8 - 1) / (sizeof(mp_limb_t) * 8);
 75 | 
 76 |   mpz_t a, b;
 77 |   mpz_init(a);
 78 |   mpz_init(b);
 79 | 
 80 |   InitAsMpz<T>(a, X, n, need_limb, mod);
 81 |   InitAsMpz<T>(b, Y, m, need_limb, mod);
 82 | 
 83 |   mpz_mul(a, a, b);
 84 | 
 85 |   mpz_ptr ptr = a;
 86 |   const int64 has_size = ptr->_mp_size;
 87 |   const int64 all_size = n + m - 1;
 88 | 
 89 |   if (mod > 0) {
 90 |     if (need_limb == 1) {
 91 |       for (int64 i = 0; i < all_size; ++i) {
 92 |         const int64 offset = i * need_limb;
 93 |         const mp_limb_t v = offset >= has_size ? 0 : ptr->_mp_d[offset];
 94 |         result[i] = v % mod;
 95 |       }
 96 |     } else if (need_limb == 2) {
 97 |       const auto magic1 = (1LL << 32) % mod;
 98 |       const auto magic2 = MulMod(magic1, magic1, mod);
 99 |       for (int64 i = 0; i < all_size; ++i) {
100 |         const int64 offset = i * need_limb;
101 |         const mp_limb_t hi =
102 |             offset + 1 >= has_size ? 0 : ptr->_mp_d[offset + 1];
103 |         const mp_limb_t low = offset >= has_size ? 0 : ptr->_mp_d[offset];
104 |         result[i] = AddMod(MulMod(hi % mod, magic2, mod), low % mod, mod);
105 |       }
106 |     } else {
107 |       const auto magic1 = (1LL << 32) % mod;
108 |       const auto magic2 = MulMod(magic1, magic1, mod);
109 |       for (int64 i = 0; i < all_size; ++i) {
110 |         const int64 offset = i * need_limb;
111 |         uint64 it = 0;
112 |         for (int j = static_cast<int>(need_limb - 1); j >= 0; --j) {
113 |           const mp_limb_t v =
114 |               offset + j >= has_size ? 0 : ptr->_mp_d[offset + j];
115 |           it = AddMod(MulMod(it, magic2, mod), v % mod, mod);
116 |         }
117 |         result[i] = it;
118 |       }
119 |     }
120 |   } else {
121 |     if (need_limb == 1) {
122 |       for (int64 i = 0; i < all_size; ++i) {
123 |         const int64 offset = i * need_limb;
124 |         const mp_limb_t v = offset >= has_size ? 0 : ptr->_mp_d[offset];
125 |         result[i] = v;
126 |       }
127 |     } else {
128 |       for (int64 i = 0; i < all_size; ++i) {
129 |         const int64 offset = i * need_limb;
130 |         T it = 0;
131 |         for (int j = static_cast<int>(need_limb - 1); j >= 0; --j) {
132 |           const mp_limb_t v =
133 |               offset + j >= has_size ? 0 : ptr->_mp_d[offset + j];
134 |           it <<= 32;
135 |           it <<= 32;
136 |           it |= v;
137 |         }
138 |         result[i] = it;
139 |       }
140 |     }
141 |   }
142 | 
143 |   mpz_clear(a);
144 |   mpz_clear(b);
145 | }
146 | }  // namespace internal
147 | 
148 | static constexpr PolyMulCoeType kPolyMulMod = 0;
149 | 
150 | POLY_MUL_IMPL(PolyMul, internal::PolyMulImpl)
151 | }  // namespace bn_poly_mul
152 | 
153 | }  // namespace gmp
154 | }  // namespace pe
155 | #else
156 | #define HAS_POLY_MUL_GMP 0
157 | #endif
158 | 
159 | #endif
160 | 


--------------------------------------------------------------------------------
/pe_poly_base_libbf:
--------------------------------------------------------------------------------
  1 | #ifndef PE_POLY_BASE_LIBBF_
  2 | #define PE_POLY_BASE_LIBBF_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_poly_base_common"
  6 | 
  7 | #if ENABLE_LIBBF && LIMB_BITS == 64
  8 | 
  9 | #define HAS_POLY_LIBBF 1
 10 | #define HAS_POLY_MUL_LIBBF 1
 11 | 
 12 | namespace pe {
 13 | 
 14 | namespace libbf {
 15 | namespace internal {
 16 | extern "C" {
 17 | void* PeBfRealloc(void*, void* ptr, size_t size) { return realloc(ptr, size); }
 18 | }
 19 | 
 20 | template <typename T>
 21 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 22 |     BfNttInit(const T* X, int64 n, int64 mod, bf_context_t* context,
 23 |               bf_t* target) {
 24 |   bf_init(context, target);
 25 | 
 26 |   bf_set_ui(target, 1);
 27 | 
 28 |   bf_resize(target, 2 * n);
 29 |   target->expn = pe_clzll(ToInt<limb_t>(X[n - 1]));
 30 | 
 31 |   const int64 len = target->len;
 32 |   limb_t* data = target->tab;
 33 |   if (mod == 0) {
 34 |     for (int64 i = 0; i < n; ++i) {
 35 |       T t = X[i];
 36 |       PE_ASSERT((t >> 64) == 0);
 37 |       data[i << 1] = ToInt<limb_t>(X[i]);
 38 |       data[(i << 1) | 1] = 0;
 39 |     }
 40 |   } else {
 41 |     for (int64 i = 0; i < n; ++i) {
 42 |       T t = X[i];
 43 |       PE_ASSERT((t >> 64) == 0);
 44 |       data[i << 1] = ToInt<limb_t>(Mod(X[i], mod));
 45 |       data[(i << 1) | 1] = 0;
 46 |     }
 47 |   }
 48 | }
 49 | 
 50 | SL void BfNttDeinit(bf_t* target) { bf_delete(target); }
 51 | 
 52 | static constexpr PolyMulCoeType kPolyMulLargeMod = 0;
 53 | 
 54 | template <typename T>
 55 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 56 |     PolyMulImpl(const T* X, int64 n, const T* Y, int64 m, T* result,
 57 |                 int64 mod) {
 58 |   bf_t x, y;
 59 |   bf_context_t bf_context;
 60 |   bf_context_init(&bf_context, PeBfRealloc, nullptr);
 61 | 
 62 |   BfNttInit(X, n, mod, &bf_context, &x);
 63 |   BfNttInit(Y, m, mod, &bf_context, &y);
 64 | 
 65 |   bf_t z0, z1, toadd;
 66 |   bf_init(&bf_context, &z0);
 67 |   bf_init(&bf_context, &z1);
 68 |   bf_init(&bf_context, &toadd);
 69 | 
 70 |   int offset = x.expn + y.expn;
 71 |   bf_set_ui(&toadd, 1);
 72 | 
 73 |   toadd.expn = 128 + offset;
 74 | 
 75 |   bf_mul(&z0, &x, &y, BF_PREC_MAX, 0);
 76 |   bf_add(&z1, &z0, &toadd, BF_PREC_MAX, 0);
 77 | 
 78 |   const int64 size = n + m - 1;
 79 |   const limb_t* data = z1.tab;
 80 | 
 81 |   if (mod > 0) {
 82 |     const auto magic1 = (1LL << 32) % mod;
 83 |     const auto magic2 = MulMod(magic1, magic1, mod);
 84 |     for (int64 i = 0; i < size; ++i) {
 85 |       const uint64 low = data[i << 1];
 86 |       const uint64 hi = data[(i << 1) | 1];
 87 |       if (hi == 0) {
 88 |         result[i] = low % mod;
 89 |       } else {
 90 |         result[i] = AddMod(MulMod(hi % mod, magic2, mod), low % mod, mod);
 91 |       }
 92 |     }
 93 |   } else {
 94 |     for (int64 i = 0; i < size; ++i) {
 95 |       const uint64 low = data[i << 1];
 96 |       const uint64 hi = data[(i << 1) | 1];
 97 |       if (hi == 0) {
 98 |         result[i] = low;
 99 |       } else {
100 |         T it = hi;
101 |         it <<= 32;
102 |         it <<= 32;
103 |         it |= low;
104 |         result[i] = it;
105 |       }
106 |     }
107 |   }
108 | 
109 |   bf_delete(&toadd);
110 |   bf_delete(&z1);
111 |   bf_delete(&z0);
112 |   BfNttDeinit(&y);
113 |   BfNttDeinit(&x);
114 | 
115 |   bf_context_end(&bf_context);
116 | }
117 | }  // namespace internal
118 | 
119 | static constexpr PolyMulCoeType kPolyMulMod =
120 |     Prod<PolyMulCoeType>(1LL << 63, 1LL << 63) * 2 + 1;
121 | 
122 | POLY_MUL_IMPL(PolyMul, internal::PolyMulImpl)
123 | 
124 | }  // namespace libbf
125 | }  // namespace pe
126 | #else
127 | #define HAS_POLY_LIBBF 0
128 | #define HAS_POLY_MUL_LIBBF 0
129 | #endif
130 | 
131 | #endif
132 | 


--------------------------------------------------------------------------------
/pe_poly_base_ntl:
--------------------------------------------------------------------------------
  1 | #ifndef PE_POLY_BASE_NTL_
  2 | #define PE_POLY_BASE_NTL_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_type_traits"
  6 | #include "pe_poly_base_common"
  7 | 
  8 | #if ENABLE_NTL
  9 | 
 10 | #define HAS_POLY_NTL 1
 11 | #define HAS_POLY_MUL_NTL 1
 12 | 
 13 | namespace pe {
 14 | namespace ntl {
 15 | namespace internal {
 16 | template <typename T>
 17 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 18 |     InitPoly(NTL::ZZ_pX& p, const T* x, int64 n, int64 mod) {
 19 |   NTL::ZZ tmp(0);
 20 |   p.SetLength(n);
 21 |   if (mod >> 32) {
 22 |     for (int64 i = 0; i < n; ++i) {
 23 |       ZZFromBytes(tmp, reinterpret_cast<const unsigned char*>(&x[i]),
 24 |                   sizeof(T));
 25 |       p[i] = to_ZZ_p(tmp);
 26 |     }
 27 |   } else {
 28 |     for (int64 i = 0; i < n; ++i) {
 29 |       p[i] = ToInt<uint64>(Mod(x[i], mod));
 30 |     }
 31 |   }
 32 | }
 33 | 
 34 | template <typename T>
 35 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 36 |     CopyPoly(NTL::ZZ_pX& p, T* x, int64 n, int64 mod) {
 37 |   const int64 d = deg(p);
 38 |   const int64 m = std::min(d, n - 1);
 39 |   if (mod >> 32) {
 40 |     for (int64 i = 0; i <= m; ++i) {
 41 |       T a;
 42 |       BytesFromZZ(reinterpret_cast<unsigned char*>(&a),
 43 |                   reinterpret_cast<NTL::ZZ&>(p[i]), sizeof(T));
 44 |       x[i] = a;
 45 |     }
 46 |   } else {
 47 |     for (int64 i = 0; i <= m; ++i) {
 48 |       x[i] = static_cast<T>(to_int(reinterpret_cast<NTL::ZZ&>(p[i])));
 49 |     }
 50 |   }
 51 |   for (int64 i = m + 1; i < n; ++i) {
 52 |     x[i] = 0;
 53 |   }
 54 | }
 55 | 
 56 | template <typename T>
 57 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 58 |     InitPoly(NTL::zz_pX& p, const T* x, int64 n, long mod) {
 59 |   p.SetLength(n);
 60 |   for (int64 i = 0; i < n; ++i) {
 61 |     p[i]._zz_p__rep = ToInt<long>(Mod(x[i], mod));
 62 |   }
 63 | }
 64 | 
 65 | template <typename T>
 66 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 67 |     CopyPoly(NTL::zz_pX& p, T* x, int64 n, long /*mod*/) {
 68 |   const int64 d = deg(p);
 69 |   const int64 m = std::min(d, n - 1);
 70 |   for (int64 i = 0; i <= m; ++i) {
 71 |     x[i] = p[i]._zz_p__rep;
 72 |   }
 73 |   for (int64 i = m + 1; i < n; ++i) {
 74 |     x[i] = 0;
 75 |   }
 76 | }
 77 | 
 78 | template <typename T>
 79 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 80 |     PolyMulSmallImpl(const T* X, int64 n, const T* Y, int64 m, T* result,
 81 |                      int64 mod) {
 82 |   PE_ASSERT(mod > 0);
 83 | 
 84 |   NTL::zz_p::init(static_cast<long>(mod));
 85 | 
 86 |   NTL::zz_pX x, y, z;
 87 |   InitPoly(x, X, n, static_cast<long>(mod));
 88 |   InitPoly(y, Y, m, static_cast<long>(mod));
 89 | 
 90 |   NTL::mul(z, x, y);
 91 | 
 92 |   CopyPoly(z, result, n + m - 1, static_cast<long>(mod));
 93 | }
 94 | 
 95 | template <typename T>
 96 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
 97 |     PolyMulLargeImpl(const T* X, int64 n, const T* Y, int64 m, T* result,
 98 |                      int64 mod) {
 99 |   PE_ASSERT(mod > 0);
100 | 
101 |   NTL::ZZ tmp(0);
102 |   ZZFromBytes(tmp, reinterpret_cast<const unsigned char*>(&mod), sizeof(int64));
103 |   NTL::ZZ_p::init(tmp);
104 | 
105 |   NTL::ZZ_pX x, y, z;
106 |   InitPoly(x, X, n, mod);
107 |   InitPoly(y, Y, m, mod);
108 | 
109 |   NTL::mul(z, x, y);
110 | 
111 |   CopyPoly(z, result, n + m - 1, mod);
112 | }
113 | 
114 | template <typename T>
115 | SL REQUIRES((is_builtin_integer_v<T> || is_extended_integer_v<T>)) RETURN(void)
116 |     PolyMulImpl(const T* X, int64 n, const T* Y, int64 m, T* result,
117 |                 int64 mod) {
118 |   if (PolyMulAcceptLengthAndMod(NTL_SP_BOUND, n, m, mod)) {
119 |     PolyMulSmallImpl(X, n, Y, m, result, mod);
120 |   }
121 |   PolyMulLargeImpl(X, n, Y, m, result, mod);
122 | }
123 | }  // namespace internal
124 | 
125 | static constexpr PolyMulCoeType kPolyMulSmallMod = NTL_SP_BOUND;
126 | static constexpr PolyMulCoeType kPolyMulLargeMod = 0;
127 | 
128 | POLY_MUL_IMPL(PolyMulSmall, internal::PolyMulSmallImpl)
129 | POLY_MUL_IMPL(PolyMulLarge, internal::PolyMulLargeImpl)
130 | POLY_MUL_IMPL(PolyMul, internal::PolyMulImpl)
131 | 
132 | namespace internal {
133 | template <typename T>
134 | SL REQUIRES((is_builtin_integer_v<T>)) RETURN(void)
135 |     PolyDivAndModSmallModImpl(const T* X, int64 n, const T* Y, int64 m, T* q,
136 |                               T* r, int64 mod) {
137 |   if (m > n) {
138 |     if (r != nullptr) {
139 |       std::copy(X, X + n, r);
140 |       std::fill(r + n, r + m, 0);
141 |     }
142 |     if (q != nullptr) {
143 |       q[0] = 0;
144 |     }
145 |     return;
146 |   }
147 | 
148 |   NTL::zz_p::init(static_cast<long>(mod));
149 | 
150 |   NTL::zz_pX x, y;
151 |   InitPoly(x, X, n, mod);
152 |   InitPoly(y, Y, m, mod);
153 | 
154 |   if (q != nullptr && r != nullptr) {
155 |     NTL::zz_pX u, v;
156 | 
157 |     NTL::DivRem(u, v, x, y);
158 | 
159 |     CopyPoly(u, q, n - m + 1, mod);
160 |     CopyPoly(v, r, m, mod);
161 |   } else if (q != nullptr) {
162 |     NTL::zz_pX u;
163 | 
164 |     NTL::div(u, x, y);
165 | 
166 |     CopyPoly(u, q, n - m + 1, mod);
167 |   } else if (r != nullptr) {
168 |     NTL::zz_pX v;
169 | 
170 |     NTL::rem(v, x, y);
171 |     CopyPoly(v, r, m, mod);
172 |   }
173 | }
174 | 
175 | template <typename T>
176 | SL REQUIRES((is_builtin_integer_v<T>)) RETURN(void)
177 |     PolyDivAndModLargeModImpl(const T* X, int64 n, const T* Y, int64 m, T* q,
178 |                               T* r, int64 mod) {
179 |   if (m > n) {
180 |     if (r != nullptr) {
181 |       std::copy(X, X + n, r);
182 |       std::fill(r + n, r + m, 0);
183 |     }
184 |     if (q != nullptr) {
185 |       q[0] = 0;
186 |     }
187 |     return;
188 |   }
189 | 
190 |   NTL::ZZ tmp(0);
191 |   ZZFromBytes(tmp, reinterpret_cast<const unsigned char*>(&mod), sizeof(T));
192 |   NTL::ZZ_p::init(tmp);
193 | 
194 |   NTL::ZZ_pX x, y;
195 |   InitPoly(x, X, n, mod);
196 |   InitPoly(y, Y, m, mod);
197 | 
198 |   if (q != nullptr && r != nullptr) {
199 |     NTL::ZZ_pX u, v;
200 | 
201 |     NTL::DivRem(u, v, x, y);
202 | 
203 |     CopyPoly(u, q, n - m + 1, mod);
204 |     CopyPoly(v, r, m, mod);
205 |   } else if (q != nullptr) {
206 |     NTL::ZZ_pX u;
207 | 
208 |     NTL::div(u, x, y);
209 | 
210 |     CopyPoly(u, q, n - m + 1, mod);
211 |   } else if (r != nullptr) {
212 |     NTL::ZZ_pX v;
213 | 
214 |     NTL::rem(v, x, y);
215 |     CopyPoly(v, r, m, mod);
216 |   }
217 | }
218 | 
219 | template <typename T>
220 | SL REQUIRES((is_builtin_integer_v<T>)) RETURN(void)
221 |     PolyDivAndModImpl(const T* X, int64 n, const T* Y, int64 m, T* q, T* r,
222 |                       int64 mod) {
223 |   if (mod < NTL_SP_BOUND) {
224 |     PolyDivAndModSmallModImpl<T>(X, n, Y, m, q, r, mod);
225 |   } else {
226 |     PolyDivAndModLargeModImpl<T>(X, n, Y, m, q, r, mod);
227 |   }
228 | }
229 | }  // namespace internal
230 | 
231 | POLY_DIV_AND_MOD_IMPL(PolyDivAndModSmallMod,
232 |                       internal::PolyDivAndModSmallModImpl)
233 | POLY_DIV_IMPL(PolyDivSmallMod, ntl::PolyDivAndModSmallMod)
234 | POLY_MOD_IMPL(PolyModSmallMod, ntl::PolyDivAndModSmallMod)
235 | 
236 | POLY_DIV_AND_MOD_IMPL(PolyDivAndModLargeMod,
237 |                       internal::PolyDivAndModLargeModImpl)
238 | POLY_DIV_IMPL(PolyDivLargeMod, ntl::PolyDivAndModLargeMod)
239 | POLY_MOD_IMPL(PolyModLargeMod, ntl::PolyDivAndModLargeMod)
240 | 
241 | POLY_DIV_AND_MOD_IMPL(PolyDivAndMod, internal::PolyDivAndModImpl)
242 | POLY_DIV_IMPL(PolyDiv, ntl::PolyDivAndMod)
243 | POLY_MOD_IMPL(PolyMod, ntl::PolyDivAndMod)
244 | }  // namespace ntl
245 | }  // namespace pe
246 | #else
247 | #define HAS_POLY_NTL 0
248 | #define HAS_POLY_MUL_NTL 0
249 | #endif
250 | 
251 | #endif
252 | 


--------------------------------------------------------------------------------
/pe_rand:
--------------------------------------------------------------------------------
  1 | #ifndef PE_RAND_
  2 | #define PE_RAND_
  3 | 
  4 | #include "pe_base"
  5 | #include "pe_nt"
  6 | 
  7 | namespace pe {
  8 | // C-style random number generator
  9 | // ANSI_ISO_9899-1999: The value of the RAND_MAX macro shall be at least 32767.
 10 | // We assume RAND_MAX is either 32767 or 2147483647
 11 | #if RAND_MAX == 32767
 12 | 
 13 | SL int CRandBit() { return rand() & 1; }
 14 | SL int CRand15() { return rand(); }
 15 | SL int CRand31() { return CRand15() << 16 | CRand15() << 1 | CRandBit(); }
 16 | SL int64 CRand63() {
 17 |   return static_cast<int64>(CRand31()) << 32 |
 18 |          static_cast<uint32>(CRand31()) << 1 | CRandBit();
 19 | }
 20 | 
 21 | SL int CRandI() { return CRand31(); }
 22 | SL double CRandD() {
 23 |   static constexpr double coe = 1. / (RAND_MAX + 1);
 24 |   return coe * rand();
 25 | }
 26 | 
 27 | #elif RAND_MAX == 2147483647
 28 | 
 29 | SL int CRandBit() { return rand() & 1; }
 30 | SL int CRand15() { return rand() & 32767; }
 31 | SL int CRand31() { return rand(); }
 32 | SL int64 CRand63() {
 33 |   return static_cast<int64>(CRand31()) << 32 |
 34 |          static_cast<uint32>(CRand31()) << 1 | CRandBit();
 35 | }
 36 | 
 37 | SL int CRandI() { return CRand31(); }
 38 | SL double CRandD() {
 39 |   static constexpr double coe = 1. / (static_cast<int64>(RAND_MAX) + 1LL);
 40 |   return coe * rand();
 41 | }
 42 | 
 43 | #else
 44 | 
 45 | #error "RAND_MAX should be either 32767 or 2147483647"
 46 | 
 47 | #endif
 48 | 
 49 | // C++ style random number generator
 50 | template <typename RE, typename DS>
 51 | class RandomGenerator {
 52 |  public:
 53 |   RandomGenerator(const RE& engine, const DS& distribution)
 54 |       : random_engine_(engine), distribution_(distribution) {}
 55 |   int operator()() { return distribution_(random_engine_); }
 56 | 
 57 |  private:
 58 |   RE random_engine_;
 59 |   DS distribution_;
 60 | };
 61 | 
 62 | RandomGenerator<std::mt19937, std::uniform_int_distribution<int>> SL
 63 | MakeUniformGenerator(int seed, int min, int max) {
 64 |   PE_ASSERT(min <= max);
 65 |   return RandomGenerator<std::mt19937, std::uniform_int_distribution<int>>(
 66 |       std::mt19937(seed), std::uniform_int_distribution<int>(min, max));
 67 | }
 68 | 
 69 | RandomGenerator<std::mt19937, std::uniform_int_distribution<int>> SL
 70 | MakeUniformGenerator(int min, int max) {
 71 |   std::random_device rd;
 72 |   PE_ASSERT(min <= max);
 73 |   return RandomGenerator<std::mt19937, std::uniform_int_distribution<int>>(
 74 |       std::mt19937(rd()), std::uniform_int_distribution<int>(min, max));
 75 | }
 76 | 
 77 | template <typename ET, typename RT>
 78 | SL void Halton(ET idx, const int n, RT* result) {
 79 |   std::fill(result, result + n, 0);
 80 |   std::vector<RT> prime_inv(n, 0);
 81 |   std::vector<ET> t(n, idx);
 82 |   for (int i = 0; i < n; ++i) prime_inv[i] = static_cast<RT>(1. / plist[i]);
 83 |   for (ET sum = n * idx; sum > 0;) {
 84 |     for (int i = 0; i < n; ++i) {
 85 |       const auto d = t[i] % plist[i];
 86 |       result[i] += d * prime_inv[i];
 87 |       prime_inv[i] /= plist[i];
 88 |       sum -= t[i];
 89 |       t[i] /= plist[i];
 90 |       sum += t[i];
 91 |     }
 92 |   }
 93 | }
 94 | 
 95 | template <typename ET, typename RT>
 96 | SL std::vector<RT> Halton(ET idx, const int n) {
 97 |   return Halton<ET, RT>(idx, n);
 98 | }
 99 | 
100 | // The idx_th n-dimension vector.
101 | SL std::vector<double> Halton(int64 idx, const int n) {
102 |   std::vector<double> r(n, 0);
103 |   Halton(idx, n, std::data(r));
104 |   return r;
105 | }
106 | }  // namespace pe
107 | #endif
108 | 


--------------------------------------------------------------------------------
/pe_time:
--------------------------------------------------------------------------------
  1 | #ifndef PE_TIME_
  2 | #define PE_TIME_
  3 | 
  4 | #include "pe_base"
  5 | 
  6 | namespace pe {
  7 | using pe_clock_t = std::chrono::high_resolution_clock;
  8 | using time_point_t = pe_clock_t::time_point;
  9 | using duration_t = pe_clock_t::duration;
 10 | using period_t = pe_clock_t::period;
 11 | 
 12 | constexpr int64 MILLI_SEC_CLOCKS = period_t::den / std::milli::den;
 13 | constexpr int64 SEC_CLOCKS = period_t::den;
 14 | constexpr int64 MIN_CLOCKS = SEC_CLOCKS * 60;
 15 | constexpr int64 HOUR_CLOCKS = MIN_CLOCKS * 60;
 16 | constexpr int64 DAY_CLOCKS = HOUR_CLOCKS * 24;
 17 | 
 18 | class TimeDelta {
 19 |  public:
 20 |   TimeDelta(duration_t duration = duration_t::zero()) : duration_(duration) {}
 21 | 
 22 |   int64 NativeTime() const { return duration_.count(); }
 23 |   double ToMilliSeconds() const {
 24 |     return 1. * duration_.count() / MILLI_SEC_CLOCKS;
 25 |   }
 26 |   double ToSeconds() const { return 1. * duration_.count() / SEC_CLOCKS; }
 27 |   double ToMinutes() const { return 1. * duration_.count() / MIN_CLOCKS; }
 28 |   double ToHours() const { return 1. * duration_.count() / HOUR_CLOCKS; }
 29 |   double ToDays() const { return 1. * duration_.count() / DAY_CLOCKS; }
 30 |   std::string Format() const {
 31 |     char temp[128];
 32 |     const int64 nano_seconds = duration_.count();
 33 |     const int64 day = nano_seconds / DAY_CLOCKS;
 34 |     const int hour = static_cast<int>(nano_seconds % DAY_CLOCKS / HOUR_CLOCKS);
 35 |     const int min = static_cast<int>(nano_seconds % HOUR_CLOCKS / MIN_CLOCKS);
 36 |     const int sec = static_cast<int>(nano_seconds % MIN_CLOCKS / SEC_CLOCKS);
 37 |     const int msec =
 38 |         static_cast<int>(nano_seconds % SEC_CLOCKS / MILLI_SEC_CLOCKS);
 39 |     sprintf(temp, "%" PRId64 ":%02d:%02d:%02d.%03d", day, hour, min, sec, msec);
 40 |     return temp;
 41 |   }
 42 | 
 43 |   static TimeDelta FromMilliSeconds(int64 t) {
 44 |     return duration_t(t * MILLI_SEC_CLOCKS);
 45 |   }
 46 |   static TimeDelta FromSeconds(int64 t) { return duration_t(t * SEC_CLOCKS); }
 47 |   static TimeDelta FromMinutes(int64 t) { return duration_t(t * MIN_CLOCKS); }
 48 |   static TimeDelta FromHours(int64 t) { return duration_t(t * HOUR_CLOCKS); }
 49 |   static TimeDelta FromDays(int64 t) { return duration_t(t * DAY_CLOCKS); }
 50 | 
 51 |   bool operator>(const TimeDelta& o) const { return duration_ > o.duration_; }
 52 |   bool operator>=(const TimeDelta& o) const { return duration_ >= o.duration_; }
 53 |   bool operator==(const TimeDelta& o) const { return duration_ == o.duration_; }
 54 |   bool operator<(const TimeDelta& o) const { return duration_ < o.duration_; }
 55 |   bool operator<=(const TimeDelta& o) const { return duration_ <= o.duration_; }
 56 | 
 57 |   TimeDelta& Add(TimeDelta t) {
 58 |     duration_ += t.duration_;
 59 |     return *this;
 60 |   }
 61 | 
 62 |   TimeDelta& Sub(TimeDelta t) {
 63 |     duration_ -= t.duration_;
 64 |     return *this;
 65 |   }
 66 | 
 67 |  private:
 68 |   duration_t duration_;
 69 | };
 70 | 
 71 | class TimeRecorder {
 72 |  public:
 73 |   TimeRecorder() : last_time_(pe_clock_t::now()) {}
 74 | 
 75 |   int64 Record() {
 76 |     last_time_ = pe_clock_t::now();
 77 |     return last_time_.time_since_epoch().count();
 78 |   }
 79 | 
 80 |   TimeDelta Elapsed() const { return pe_clock_t::now() - last_time_; }
 81 | 
 82 |   std::string usage() const { return Elapsed().Format(); }
 83 | 
 84 |  private:
 85 |   time_point_t last_time_;
 86 | };
 87 | 
 88 | class TimeUsage {
 89 |  public:
 90 |   ~TimeUsage() {
 91 |     fprintf(stderr, "time usage: %s\n", tr_.Elapsed().Format().c_str());
 92 |   }
 93 | 
 94 |  private:
 95 |   TimeRecorder tr_;
 96 | };
 97 | 
 98 | #ifndef PE_TEST_MODE
 99 | static TimeUsage __time_usage;
100 | #endif
101 | }  // namespace pe
102 | #endif
103 | 


--------------------------------------------------------------------------------
/pe_tree:
--------------------------------------------------------------------------------
  1 | #ifndef PE_TREE_
  2 | #define PE_TREE_
  3 | 
  4 | #include "pe_base"
  5 | 
  6 | namespace pe {
  7 | // Maps distinct elements to 1..n
  8 | class IndexHelper {
  9 |  public:
 10 |   IndexHelper() = default;
 11 | 
 12 |   IndexHelper(const std::vector<int64>& elements) { Reset(elements); }
 13 | 
 14 |   IndexHelper(std::vector<int64>&& elements) { Reset(std::move(elements)); }
 15 | 
 16 |   template <typename IT>
 17 |   IndexHelper(IT s, IT e) {
 18 |     Reset(std::vector<int64>(s, e));
 19 |   }
 20 | 
 21 |   IndexHelper& Reset(const std::vector<int64>& elements) {
 22 |     elements_ = elements;
 23 |     ResetInternal();
 24 |     return *this;
 25 |   }
 26 | 
 27 |   IndexHelper& Reset(std::vector<int64>&& elements) {
 28 |     elements_ = std::move(elements);
 29 |     ResetInternal();
 30 |     return *this;
 31 |   }
 32 | 
 33 |   void ResetInternal() {
 34 |     std::sort(elements_.begin(), elements_.end());
 35 |     size_ = std::unique(elements_.begin(), elements_.end()) - elements_.begin();
 36 |     elements_.resize(size_);
 37 |   }
 38 | 
 39 |   int64 size() const { return size_; }
 40 | 
 41 |   int64 Index(int64 v) const { return this->operator[](v); }
 42 | 
 43 |   int64 operator[](int64 v) const {
 44 |     const int64 idx = std::lower_bound(elements_.begin(), elements_.end(), v) -
 45 |                       elements_.begin();
 46 |     return idx + 1;
 47 |   }
 48 | 
 49 |  private:
 50 |   std::vector<int64> elements_;
 51 |   int64 size_ = 0;
 52 | };
 53 | 
 54 | template <typename T, typename Derived>
 55 | class BitBase {
 56 |  public:
 57 |   BitBase(int size = 0) { Reset(size); }
 58 |   BitBase(const IndexHelper& ih) { Reset(static_cast<int>(std::size(ih))); }
 59 | 
 60 |   Derived& Reset(int new_size) {
 61 |     size_ = new_size;
 62 |     data_.resize(size_ + 1);
 63 |     Clear();
 64 |     return static_cast<Derived&>(*this);
 65 |   }
 66 | 
 67 |   Derived& Clear() {
 68 |     std::fill(data_.begin(), data_.end(), 0);
 69 |     return static_cast<Derived&>(*this);
 70 |   }
 71 | 
 72 |  public:
 73 |   std::vector<T> data_;
 74 |   int size_;
 75 | };
 76 | 
 77 | // Range update bit indexed tree.
 78 | // Support range based update and query the value of a specified index.
 79 | template <typename T>
 80 | class RUBit : public BitBase<T, RUBit<T>> {
 81 |  public:
 82 |   using BitBase<T, RUBit<T>>::BitBase;
 83 |   using BitBase<T, RUBit<T>>::data_;
 84 |   using BitBase<T, RUBit<T>>::size_;
 85 | 
 86 |   RUBit& Update(int x, T delta) {
 87 |     for (; x <= size_; x += x & -x) {
 88 |       data_[x] += delta;
 89 |     }
 90 |     return *this;
 91 |   }
 92 | 
 93 |   RUBit& Update(int x, int y, T delta) {
 94 |     return Update(x, delta).Update(y + 1, -delta);
 95 |   }
 96 | 
 97 |   T Query(int x) const {
 98 |     T ret(0);
 99 |     for (; x > 0; x -= x & -x) {
100 |       ret += data_[x];
101 |     }
102 |     return ret;
103 |   }
104 | };
105 | 
106 | // Range sum query bit indexed tree.
107 | // Support index based update and value of a range.
108 | template <typename T>
109 | class RSQBit : public BitBase<T, RSQBit<T>> {
110 |  public:
111 |   using BitBase<T, RSQBit<T>>::BitBase;
112 |   using BitBase<T, RSQBit<T>>::data_;
113 |   using BitBase<T, RSQBit<T>>::size_;
114 | 
115 |   RSQBit& Update(int x, T delta) {
116 |     for (; x <= size_; x += x & -x) {
117 |       data_[x] += delta;
118 |     }
119 |     return *this;
120 |   }
121 | 
122 |   T Query(int x) const {
123 |     T ret = 0;
124 |     for (; x > 0; x -= x & -x) {
125 |       ret += data_[x];
126 |     }
127 |     return ret;
128 |   }
129 | 
130 |   T Query(int x, int y) const { return Query(y) - Query(x - 1); }
131 | };
132 | }  // namespace pe
133 | #endif
134 | 


--------------------------------------------------------------------------------
/pe_vector:
--------------------------------------------------------------------------------
 1 | #ifndef PE_VECTOR_
 2 | #define PE_VECTOR_
 3 | 
 4 | #include "pe_base"
 5 | 
 6 | namespace pe {
 7 | 
 8 | template <typename T>
 9 | SL std::vector<T> VectorAdd(const std::vector<T>& a, const std::vector<T>& b) {
10 |   std::vector<T> c(a.size());
11 |   for (int i = 0; i < a.size(); ++i) c[i] = a[i] + b[i];
12 |   return c;
13 | }
14 | 
15 | template <typename T>
16 | SL std::vector<T> VectorSub(const std::vector<T>& a, const std::vector<T>& b) {
17 |   std::vector<T> c(a.size());
18 |   for (int i = 0; i < a.size(); ++i) c[i] = a[i] - b[i];
19 |   return c;
20 | }
21 | 
22 | template <typename T>
23 | SL std::vector<T> VectorScale(T t, const std::vector<T>& b) {
24 |   std::vector<T> c(b.size());
25 |   for (int i = 0; i < b.size(); ++i) c[i] = t * b[i];
26 |   return c;
27 | }
28 | 
29 | template <typename T>
30 | SL std::vector<T> VectorScale(const std::vector<T>& b, T t) {
31 |   std::vector<T> c(b.size());
32 |   for (int i = 0; i < b.size(); ++i) c[i] = t * b[i];
33 |   return c;
34 | }
35 | 
36 | template <typename T>
37 | SL T VectorDotProduct(const std::vector<T>& a, const std::vector<T>& b) {
38 |   T ret = 0;
39 |   for (int i = 0; i < a.size(); ++i) ret += a[i] * b[i];
40 |   return ret;
41 | }
42 | }  // namespace pe
43 | #endif


--------------------------------------------------------------------------------
/precompile.bat:
--------------------------------------------------------------------------------
1 | g++ -xc++-header pe.hpp --std=c++20 -fno-diagnostics-color -O2 -march=native -mtune=native -fopenmp -pthread -static
2 | pause


--------------------------------------------------------------------------------
/test/BUILD:
--------------------------------------------------------------------------------
 1 | load("//toolchain:pe_toolchain.bzl", "pe_binary", "pe_library")
 2 | 
 3 | package(
 4 |     default_visibility = [
 5 |         "//visibility:public",
 6 |     ],
 7 | )
 8 | 
 9 | pe_binary(
10 |   name = "test_perf",
11 |   srcs = ["test_perf.c"],
12 |   defines = [
13 |     "TEST_ALL",
14 |     "ENABLE_ASSERT=0",
15 |     "TRY_TO_USE_INT128=1",
16 |     "ENABLE_OPENMP=1",
17 |   ]
18 | )
19 | 
20 | pe_binary(
21 |   name = "test",
22 |   srcs = ["pe_test.c"],
23 |   defines = [
24 |     "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED",
25 |     "ENABLE_ASSERT=0",
26 |     "TRY_TO_USE_INT128=1",
27 |     "ENABLE_OPENMP=1",
28 |   ]
29 | )
30 | 
31 | pe_binary(
32 |   name = "test_tcmalloc",
33 |   srcs = ["pe_test.c"],
34 |   defines = [
35 |     "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED",
36 |     "ENABLE_ASSERT=0",
37 |     "TRY_TO_USE_INT128=1",
38 |     "ENABLE_OPENMP=1",
39 |     "ENABLE_TCMALLOC=1",
40 |   ],
41 |   libs = [
42 |     "tcmalloc_minimal",
43 |     "synchronization",
44 |     "psapi",
45 |   ]
46 | )
47 | 
48 | pe_binary(
49 |   name = "test_noint128_noopenmp",
50 |   srcs = ["pe_test.c"],
51 |   defines = [
52 |     "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED",
53 |     "ENABLE_ASSERT=0",
54 |     "TRY_TO_USE_INT128=0",
55 |     "ENABLE_OPENMP=0",
56 |   ]
57 | )
58 | 
59 | pe_binary(
60 |   name = "test_int128_noopenmp",
61 |   srcs = ["pe_test.c"],
62 |   defines = [
63 |     "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED",
64 |     "ENABLE_ASSERT=0",
65 |     "TRY_TO_USE_INT128=1",
66 |     "ENABLE_OPENMP=0",
67 |   ]
68 | )
69 | 
70 | pe_binary(
71 |   name = "test_int128_openmp",
72 |   srcs = ["pe_test.c"],
73 |   defines = [
74 |     "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED",
75 |     "ENABLE_ASSERT=0",
76 |     "TRY_TO_USE_INT128=1",
77 |     "ENABLE_OPENMP=1",
78 |   ]
79 | )
80 | 
81 | pe_binary(
82 |   name = "test_noint128_openmp",
83 |   srcs = ["pe_test.c"],
84 |   defines = [
85 |     "ENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED",
86 |     "ENABLE_ASSERT=0",
87 |     "TRY_TO_USE_INT128=0",
88 |     "ENABLE_OPENMP=1",
89 |   ]
90 | )


--------------------------------------------------------------------------------
/test/array_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | // struct XYZ {
 4 | //   XYZ() {dbg("xyz");}
 5 | // };
 6 | // struct B {
 7 | //   B(XYZ& xyz) : xyz(xyz) {
 8 | //     dbg("B constructed");
 9 | //   }
10 | //   XYZ& xyz;
11 | // };
12 | // struct A : public B {
13 | //   A() : B(xyz) {
14 | //     dbg("A constructed");
15 | //   }
16 | //   XYZ xyz;
17 | // };
18 | 
19 | namespace array_test {
20 | SL void ArrayTest() {
21 |   DArray<int, 2> vec({5, 6});
22 |   for (int i = 0; i < 5; ++i) {
23 |     for (int j = 0; j < 6; ++j) {
24 |       vec[i][j] = i * j;
25 |     }
26 |   }
27 | 
28 |   vec.Reset({3, 2});
29 |   for (int i = 0; i < 3; ++i) {
30 |     for (int j = 0; j < 2; ++j) vec[i][j] = i * j;
31 |   }
32 | 
33 |   Array<int, 4, 5> arr(5);
34 |   for (int i = 0; i < 4; ++i) {
35 |     for (int j = 0; j < 5; ++j) arr[i][j] = i * j;
36 |   }
37 | 
38 |   AArray<int, StdAllocator, 4, 5> arr1(5);
39 |   for (int i = 0; i < 4; ++i) {
40 |     for (int j = 0; j < 5; ++j) arr1[i][j] = i * j;
41 |   }
42 | }
43 | PE_REGISTER_TEST(&ArrayTest, "ArrayTest", SMALL);
44 | }  // namespace array_test


--------------------------------------------------------------------------------
/test/bi_div_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace bi_test {
 4 | template <typename T>
 5 | SL void BiDivTestImpl(int x, int y) {
 6 |   for (int strategy = 0; strategy < 2; ++strategy)
 7 |     for (int s1 = -1; s1 <= 1; ++s1)
 8 |       for (int s2 = -1; s2 <= 1; ++s2)
 9 |         if (s2 != 0)
10 |           for (int id = 0; id < x; ++id) {
11 |             std::vector<int> A, B;
12 |             if (strategy == 0) {
13 |               for (int i = 0; i < y; ++i) {
14 |                 int t = rand() + 1;
15 |                 A.push_back(t);
16 |                 if (i & 1) {
17 |                   B.push_back(t);
18 |                 }
19 |               }
20 |             } else {
21 |               for (int i = 0; i < y; ++i) {
22 |                 A.push_back(rand() + 1);
23 |                 if (i & 1) {
24 |                   B.push_back(rand() + 1);
25 |                 }
26 |               }
27 |             }
28 |             std::string expected_result1;
29 |             std::string expected_result2;
30 |             {
31 |               T a = s1;
32 |               T b = s2;
33 |               for (auto& iter : A) a *= iter;
34 |               for (auto& iter : B) b *= iter;
35 |               T c = a / b;
36 |               T d = a % b;
37 |               expected_result1 = ToString(c);
38 |               expected_result2 = ToString(d);
39 |             }
40 |             std::string actual_result1;
41 |             std::string actual_result2;
42 |             {
43 |               BigInteger a = s1;
44 |               BigInteger b = s2;
45 |               for (auto& iter : A) a *= iter;
46 |               for (auto& iter : B) b *= iter;
47 |               auto [c, d] = Div(a, b);
48 |               actual_result1 = ToString(c);
49 |               actual_result2 = ToString(d);
50 |             }
51 |             assert(expected_result1 == actual_result1);
52 |             assert(expected_result2 == actual_result2);
53 |           }
54 | }
55 | 
56 | SL void BiDivTestMedium_BigInteger() { BiDivTestImpl<BigInteger>(100, 500); }
57 | 
58 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
59 | PE_REGISTER_TEST(&BiDivTestMedium_BigInteger, "BiDivTestMedium_BigInteger",
60 |                  MEDIUM);
61 | #endif
62 | 
63 | SL void BiDivTestBig_BigInteger() { BiDivTestImpl<BigInteger>(10, 2000); }
64 | 
65 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
66 | PE_REGISTER_TEST(&BiDivTestBig_BigInteger, "BiDivTestBig_BigInteger", BIG);
67 | #endif
68 | 
69 | #if ENABLE_GMP
70 | SL void BiDivTestMedium_MpInteger() { BiDivTestImpl<MpInteger>(100, 500); }
71 | 
72 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
73 | PE_REGISTER_TEST(&BiDivTestMedium_MpInteger, "BiDivTestMedium_MpInteger",
74 |                  MEDIUM);
75 | #endif
76 | 
77 | SL void BiDivTestBig_MpInteger() { BiDivTestImpl<MpInteger>(10, 2000); }
78 | 
79 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
80 | PE_REGISTER_TEST(&BiDivTestBig_MpInteger, "BiDivTestBig_MpInteger", BIG);
81 | #endif
82 | #endif
83 | 
84 | }  // namespace bi_test
85 | 


--------------------------------------------------------------------------------
/test/bi_mul_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace bi_test {
 4 | template <typename T>
 5 | SL void BiMulTestImpl(int x, int y) {
 6 |   for (int s1 = -1; s1 <= 1; ++s1)
 7 |     for (int s2 = -1; s2 <= 1; ++s2)
 8 |       for (int id = 0; id < x; ++id) {
 9 |         std::vector<int> A, B;
10 |         for (int i = 0; i < y; ++i) {
11 |           A.push_back(rand());
12 |           B.push_back(rand());
13 |         }
14 |         std::string expected_result;
15 |         {
16 |           T a = s1;
17 |           T b = s2;
18 |           for (auto& iter : A) a *= iter;
19 |           for (auto& iter : B) b *= iter;
20 | 
21 |           T c = a * b;
22 |           expected_result = ToString(c);
23 |         }
24 |         std::string actual_result;
25 |         {
26 |           BigInteger a = s1;
27 |           BigInteger b = s2;
28 |           for (auto& iter : A) a *= iter;
29 |           for (auto& iter : B) b *= iter;
30 |           BigInteger c = a * b;
31 |           std::stringstream ss;
32 |           ss << c;
33 |           ss >> actual_result;
34 |         }
35 |         assert(expected_result == actual_result);
36 |       }
37 | }
38 | 
39 | SL void BiMulTestMedium_BigInteger() { BiMulTestImpl<BigInteger>(1000, 500); }
40 | 
41 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
42 | PE_REGISTER_TEST(&BiMulTestMedium_BigInteger, "BiMulTestMedium_BigInteger",
43 |                  MEDIUM);
44 | #endif
45 | 
46 | SL void BiMulTestBig_BigInteger() { BiMulTestImpl<BigInteger>(10, 10000); }
47 | 
48 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
49 | PE_REGISTER_TEST(&BiMulTestBig_BigInteger, "BiMulTestBig_BigInteger", BIG);
50 | #endif
51 | 
52 | #if ENABLE_GMP
53 | SL void BiMulTestMedium_MpInteger() { BiMulTestImpl<MpInteger>(1000, 500); }
54 | 
55 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
56 | PE_REGISTER_TEST(&BiMulTestMedium_MpInteger, "BiMulTestMedium_MpInteger",
57 |                  MEDIUM);
58 | #endif
59 | 
60 | SL void BiMulTestBig_MpInteger() { BiMulTestImpl<MpInteger>(10, 10000); }
61 | 
62 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
63 | PE_REGISTER_TEST(&BiMulTestBig_MpInteger, "BiMulTestBig_MpInteger", BIG);
64 | #endif
65 | #endif
66 | }  // namespace bi_test
67 | 


--------------------------------------------------------------------------------
/test/bit_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace bit_test {
 4 | #if defined(COMPILER_GNU)
 5 | SL void BitTest() {
 6 |   for (int i = 0; i < 65536; ++i) {
 7 |     if (i > 0) {
 8 |       assert(__pe_clz32(i) == __builtin_clz(i));
 9 |       assert(__pe_ctz32(i) == __builtin_ctz(i));
10 |     }
11 |     assert(__pe_popcount32(i) == __builtin_popcount(i));
12 |     assert(__pe_ffs32(i) == __builtin_ffs(i));
13 |     assert(__pe_parity32(i) == __builtin_parity(i));
14 | #if defined(STL_GLIBCXX)
15 |     if (i > 0) {
16 |       assert(__pe_lg32(i) == std::__lg(i));
17 |     }
18 | #endif
19 |   }
20 | 
21 |   for (int i = 0; i < 65536; ++i) {
22 |     uint64 target = CRand63();
23 |     if (target > 0) {
24 |       assert(__pe_clz64(target) == __builtin_clzll(target));
25 |       assert(__pe_ctz64(target) == __builtin_ctzll(target));
26 |       assert(__pe_popcount64(target) == __builtin_popcountll(target));
27 |       assert(__pe_ffs64(target) == __builtin_ffsll(target));
28 |       assert(__pe_parity64(target) == __builtin_parityll(target));
29 | #if defined(STL_GLIBCXX)
30 |       assert(__pe_lg64(target) == std::__lg(target));
31 | #endif
32 |     }
33 |   }
34 | 
35 |   int x = 0;
36 |   SetBit(x, 20);
37 |   assert(x == (1 << 20));
38 |   assert(GetBit(x, 20) == 1);
39 | 
40 |   RevBit(x, 20);
41 |   assert(x == 0);
42 |   assert(GetBit(x, 20) == 0);
43 | 
44 |   RevBit(x, 21);
45 |   assert(x == (1 << 21));
46 |   assert(GetBit(x, 21) == 1);
47 | 
48 |   ResetBit(x, 21);
49 |   assert(x == 0);
50 |   assert(GetBit(x, 21) == 0);
51 | }
52 | 
53 | PE_REGISTER_TEST(&BitTest, "BitTest", SMALL);
54 | #endif
55 | }  // namespace bit_test
56 | 


--------------------------------------------------------------------------------
/test/dva_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace dva_test {
 4 | SL void TestS0() {
 5 |   auto orz = PrimePi<int64>(10000);
 6 |   assert(orz[10000] == 1229LL);
 7 | 
 8 |   orz = PrimeS0Ex<int64>(10000);
 9 |   assert(orz[10000] == 1229LL);
10 | 
11 |   orz = PrimePi<int64>(100000000);
12 |   assert(orz[100000000] == 5761455LL);
13 | 
14 |   orz = PrimeS0Ex<int64>(100000000);
15 |   assert(orz[100000000] == 5761455LL);
16 | }
17 | 
18 | SL void TestS1() {
19 |   int64 s = 0;
20 |   for (int i = 2; i <= 10000; ++i) {
21 |     if (IsPrime(i)) s += i;
22 |   }
23 |   assert(s == 5736396LL);
24 | 
25 |   auto orz = PrimeS1<int64>(10000);
26 |   assert(orz[10000] == 5736396LL);
27 | 
28 |   orz = PrimeS1Ex<int64>(10000);
29 |   assert(orz[10000] == 5736396LL);
30 | 
31 |   s = 0;
32 |   for (int i = 2; i <= 1000000; ++i) {
33 |     if (IsPrime(i)) s += i;
34 |   }
35 |   assert(s == 37550402023LL);
36 | 
37 |   orz = PrimeS1<int64>(1000000);
38 |   assert(orz[1000000] == 37550402023LL);
39 | 
40 |   orz = PrimeS1Ex<int64>(1000000);
41 |   assert(orz[1000000] == 37550402023LL);
42 | }
43 | 
44 | SL void DvaTest() {
45 |   TestS0();
46 |   TestS1();
47 | }
48 | 
49 | PE_REGISTER_TEST(&DvaTest, "DvaTest", SMALL);
50 | }  // namespace dva_test
51 | 


--------------------------------------------------------------------------------
/test/fft_test.c:
--------------------------------------------------------------------------------
  1 | #include "pe_test.h"
  2 | 
  3 | namespace fft_test {
  4 | #if HAS_POLY_MUL_FLINT
  5 | SL void RandomTest() {
  6 |   srand(123456789);
  7 |   {
  8 |     // 8e13
  9 |     // 1e5+19
 10 |     const int64 mod = 100019;
 11 |     std::vector<uint64> x, y;
 12 |     for (int i = 0; i < 7000; ++i) {
 13 |       x.push_back((uint64)CRand63() % mod);
 14 |       y.push_back((uint64)CRand63() % mod);
 15 |     }
 16 | 
 17 |     int t0 = clock();
 18 |     std::vector<uint64> ans0 = flint::PolyMul(x, y, mod);
 19 |     int t1 = clock();
 20 |     std::vector<uint64> ans1 = fft::PolyMulFft(x, y, mod);
 21 |     int t2 = clock();
 22 |     std::vector<uint64> ans2 = fft::PolyMulFftSmall(x, y, mod);
 23 |     int t3 = clock();
 24 |     // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << " " << (t3-t1)*1e-3
 25 |     // << std::endl;
 26 | 
 27 |     assert(ans0 == ans1);
 28 |     assert(ans0 == ans2);
 29 |   }
 30 |   {
 31 |     // 1e15
 32 |     // 1e9+7
 33 |     const int64 mod = 1000000007;
 34 |     std::vector<uint64> x, y;
 35 |     for (int i = 0; i < 1020000; ++i) {
 36 |       x.push_back((uint64)CRand63() % mod);
 37 |       y.push_back((uint64)CRand63() % mod);
 38 |     }
 39 | 
 40 |     int t0 = clock();
 41 |     std::vector<uint64> ans0 = flint::PolyMul(x, y, mod);
 42 |     int t1 = clock();
 43 |     std::vector<uint64> ans1 = fft::PolyMulFft(x, y, mod);
 44 |     int t2 = clock();
 45 |     // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl;
 46 | 
 47 |     assert(ans0 == ans1);
 48 |   }
 49 |   {
 50 |     // 8e14
 51 |     // 1e10+19
 52 |     const int64 mod = 10000000019;
 53 |     std::vector<uint64> x, y;
 54 |     for (int i = 0; i < 80000; ++i) {
 55 |       x.push_back((uint64)CRand63() % mod);
 56 |       y.push_back((uint64)CRand63() % mod);
 57 |     }
 58 | 
 59 |     int t0 = clock();
 60 |     std::vector<uint64> ans0 = flint::PolyMul(x, y, mod);
 61 |     int t1 = clock();
 62 |     std::vector<uint64> ans1 = fft::PolyMulFft(x, y, mod);
 63 |     int t2 = clock();
 64 |     // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl;
 65 | 
 66 |     assert(ans0 == ans1);
 67 |   }
 68 | }
 69 | 
 70 | SL void LimitTest() {
 71 |   {
 72 |     // 10018*10018*2048=205537943552 2.06e11
 73 |     // 1e5+19
 74 |     const int64 mod = 100019;
 75 |     std::vector<uint64> x, y;
 76 |     for (int i = 0; i < 2048; ++i) {
 77 |       x.push_back(mod - 1);
 78 |       y.push_back(mod - 1);
 79 |     }
 80 | 
 81 |     int t0 = clock();
 82 |     std::vector<uint64> ans0 = flint::PolyMul(x, y, mod);
 83 |     int t1 = clock();
 84 |     std::vector<uint64> ans1 = fft::PolyMulFft(x, y, mod);
 85 |     int t2 = clock();
 86 |     std::vector<uint64> ans2 = fft::PolyMulFftSmall(x, y, mod);
 87 |     int t3 = clock();
 88 |     // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << " " << (t3-t1)*1e-3
 89 |     // << std::endl;
 90 | 
 91 |     assert(ans0 == ans1);
 92 |     assert(ans0 == ans2);
 93 |   }
 94 |   {
 95 |     // 1000000007*339750=339750002378250=3.39e14
 96 |     // 1e9+7
 97 |     const int64 mod = 1000000007;
 98 |     std::vector<uint64> x, y;
 99 |     for (int i = 0; i < 339750; ++i) {
100 |       x.push_back(mod - 1);
101 |       y.push_back(mod - 1);
102 |     }
103 | 
104 |     int t0 = clock();
105 |     std::vector<uint64> ans0 = flint::PolyMul(x, y, mod);
106 |     int t1 = clock();
107 |     std::vector<uint64> ans1 = fft::PolyMulFft(x, y, mod);
108 |     int t2 = clock();
109 |     // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl;
110 |     assert(ans0 == ans1);
111 |   }
112 |   {
113 |     // 10000000019*44064=440640000837216=4.4e14
114 |     // 1e10+19
115 |     const int64 mod = 10000000019;
116 |     std::vector<uint64> x, y;
117 |     for (int i = 0; i < 44064; ++i) {
118 |       x.push_back(mod - 1);
119 |       y.push_back(mod - 1);
120 |     }
121 | 
122 |     int t0 = clock();
123 |     std::vector<uint64> ans0 = flint::PolyMul(x, y, mod);
124 |     int t1 = clock();
125 |     std::vector<uint64> ans1 = fft::PolyMulFft(x, y, mod);
126 |     int t2 = clock();
127 |     // std::cerr << (t1 - t0)*1e-3 << " " << (t2-t1)*1e-3 << std::endl;
128 |     assert(ans0 == ans1);
129 |   }
130 | }
131 | 
132 | SL void FftTest() {
133 |   RandomTest();
134 |   LimitTest();
135 | }
136 | PE_REGISTER_TEST(&FftTest, "FftTest", SMALL);
137 | #endif
138 | }  // namespace fft_test
139 | 


--------------------------------------------------------------------------------
/test/init_inv_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace init_inv_test {
 4 | constexpr int64 mod = 1000000007;
 5 | 
 6 | SL void InitInvTest() {
 7 |   constexpr int n = 1000000;
 8 |   std::vector<int> sresult(n + 1);
 9 |   std::vector<int64> lresult(n + 1);
10 |   InitInverse(std::data(sresult), n, mod);
11 |   InitInverse(std::data(lresult), n, mod);
12 |   for (int i = 1; i <= n; ++i) {
13 |     assert((int64)i * sresult[i] % mod == 1);
14 |     assert(sresult[i] == lresult[i]);
15 |   }
16 | }
17 | 
18 | PE_REGISTER_TEST(&InitInvTest, "InitInvTest", SMALL);
19 | }  // namespace init_inv_test
20 | 


--------------------------------------------------------------------------------
/test/int128_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace print_int128_test {
 4 | #if PE_HAS_INT128
 5 | SL void PrintInt128Test() {
 6 |   int128 x = 1;
 7 |   for (int i = 0; i < 127; ++i) {
 8 |     assert(ToString(x) == ToString(BigInteger(x)));
 9 |     assert(ToString(-x) == ToString(BigInteger(-x)));
10 |     x <<= 1;
11 |   }
12 |   x = 0;
13 |   assert(ToString(x) == ToString(BigInteger(x)));
14 |   assert(ToString(-x) == ToString(BigInteger(-x)));
15 | }
16 | 
17 | PE_REGISTER_TEST(&PrintInt128Test, "PrintInt128Test", SMALL);
18 | 
19 | SL void Int128LiteralTest() {
20 |   const int128 p1 = 10;
21 |   const int128 p2 = p1 * p1;
22 |   const int128 p4 = p2 * p2;
23 |   const int128 p8 = p4 * p4;
24 |   const int128 p16 = p8 * p8;
25 |   const int128 p32 = p16 * p16;
26 |   assert(p32 == "100000000000000000000000000000000"_i128);
27 |   assert(p32 == "+100000000000000000000000000000000"_i128);
28 |   assert(-p32 == "-100000000000000000000000000000000"_i128);
29 |   assert(p32 == "100000000000000000000000000000000"_u128);
30 |   assert(p32 == "+100000000000000000000000000000000"_u128);
31 |   const int128 b62 = 1LL << 62;
32 |   const int128 b63 = b62 << 1;
33 |   const int128 b126 = b63 * b63;
34 |   assert(b63 == "9223372036854775808"_i128);
35 |   assert(b126 == "85070591730234615865843651857942052864"_i128);
36 |   const uint128 b127 = (uint128)b126 << 1;
37 |   assert(b127 == "170141183460469231731687303715884105728"_u128);
38 | }
39 | 
40 | PE_REGISTER_TEST(&Int128LiteralTest, "Int128LiteralTest", SMALL);
41 | #endif
42 | }  // namespace print_int128_test
43 | 


--------------------------------------------------------------------------------
/test/misc_test.c:
--------------------------------------------------------------------------------
  1 | #include "pe_test.h"
  2 | 
  3 | namespace misc_test {
  4 | SL void MiscTest() {
  5 |   GaussianEliminationSolver solver;
  6 |   solver.Init(10, 10);
  7 |   for (int i = 0; i < 10; ++i) {
  8 |     solver.At(i, 10) = 10 - i;
  9 |     for (int j = i; j < 10; ++j) {
 10 |       solver.At(i, j) = 1;
 11 |     }
 12 |   }
 13 |   auto v = solver.Solve();
 14 |   for (int i = 0; i < 10; ++i) {
 15 |     assert(FAbs(v[i] - 1) < 1e-10);
 16 |   }
 17 | 
 18 |   auto vtos = [=](const std::vector<int64>& vec) {
 19 |     std::stringstream ss;
 20 |     ss << vec;
 21 |     return ss.str();
 22 |   };
 23 | 
 24 |   std::vector<int64> vec;
 25 |   assert(vtos(vec) == "{}");
 26 | 
 27 |   vec.push_back(1);
 28 |   assert(vtos(vec) == "{1}");
 29 | 
 30 |   vec.push_back(2);
 31 |   assert(vtos(vec) == "{1, 2}");
 32 | 
 33 |   vec.push_back(3);
 34 |   assert(vtos(vec) == "{1, 2, 3}");
 35 | }
 36 | 
 37 | PE_REGISTER_TEST(&MiscTest, "MiscTest", SMALL);
 38 | 
 39 | SL void CountPtInCircleTest() {
 40 |   for (int64 n = 0; n <= 100; ++n) {
 41 |     int64 u = CountPtInCircle(n);
 42 |     int64 v = CountPtInCircleBf(n);
 43 |     int64 ans = 0;
 44 |     const int t = (int)SqrtI(n);
 45 |     for (int x = -t; x <= t; ++x) {
 46 |       for (int y = -t; y <= t; ++y) ans += sq(x) + sq(y) <= n;
 47 |     }
 48 |     assert(u == ans);
 49 |     assert(v == ans);
 50 |   }
 51 | #if 1
 52 |   for (int64 i = 1; i <= 10000; ++i) {
 53 |     int64 u = CountPtInCircleQ1(i);
 54 |     int64 v = CountPtInCircleQ1Bf(i);
 55 |     if (u != v) {
 56 |       std::cerr << i << " " << u << " " << v << std::endl;
 57 |     }
 58 |     assert(u == v);
 59 |   }
 60 | #endif
 61 | 
 62 | #if !defined(CONTINUOUS_INTEGRATION_TEST)
 63 |   // 9999999999999907 7853981733966909 7853981733966913
 64 |   for (int64 i = 10000; i <= 100000000000000000; i = i * 10) {
 65 |     for (int64 j = -3; j <= 3; ++j) {
 66 |       int64 target = i + j;
 67 |       int64 u = CountPtInCircleQ1(target);
 68 |       int64 v = CountPtInCircleQ1Bf(target);
 69 |       if (u != v) {
 70 |         std::cerr << target << " " << u << " " << v << std::endl;
 71 |       }
 72 |       assert(u == v);
 73 |     }
 74 |   }
 75 | #endif
 76 | }
 77 | 
 78 | PE_REGISTER_TEST(&CountPtInCircleTest, "CountPtInCircleTest", MEDIUM);
 79 | 
 80 | #if PE_HAS_INT128
 81 | SL void SumSigma0Test() {
 82 | #if 1
 83 |   for (int64 i = 1; i <= 10000; ++i) {
 84 |     int64 u = SumSigma0(i);
 85 |     int64 v = SumSigma0Bf(i);
 86 |     auto w = min25::sigma0_sum_fast(i);
 87 |     if (u != v || v != w || u != w) {
 88 |       std::cerr << i << " " << u << " " << v << " " << w << std::endl;
 89 |     }
 90 |     assert(u == v);
 91 |     assert(u == w);
 92 |   }
 93 | #endif
 94 | 
 95 |   // 9999999999999907 7853981733966909 7853981733966913
 96 |   for (int64 i = 10000; i <= 100000000000000000; i = i * 10) {
 97 |     for (int64 j = -3; j <= 3; ++j) {
 98 |       int64 target = i + j;
 99 |       int64 u = SumSigma0(target);
100 |       int64 v = SumSigma0Bf(target);
101 |       auto w = min25::sigma0_sum_fast(target);
102 |       if (u != v || v != w || u != w) {
103 |         std::cerr << target << " " << u << " " << v << " " << w << std::endl;
104 |       }
105 |       assert(u == v);
106 |       assert(u == w);
107 |     }
108 |   }
109 | }
110 | 
111 | PE_REGISTER_TEST(&SumSigma0Test, "SumSigma0Test", SUPER);
112 | #endif
113 | 
114 | SL int64 IntDivFloor(int64 a, int64 b) {
115 |   if (b < 0) a = -a;
116 |   if (a % b == 0) return a / b;
117 |   if (a >= 0) return a / b;
118 |   return a / b - 1;
119 | }
120 | 
121 | SL void SolveInequatilityGE2Test() {
122 |   for (int64 x1 = -1000; x1 <= 1000; ++x1) {
123 |     for (int64 x2 = x1; x2 <= 1000; ++x2) {
124 |       // (100 x-x1)(100 x-x2) >= 0
125 |       // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 >= 0
126 |       const int64 A = 10000;
127 |       const int64 B = -(100 * x1 + 100 * x2);
128 |       const int64 C = x1 * x2;
129 |       int64 u = IntDivFloor(x1, 100);
130 |       int64 v = x2 % 100 == 0 ? IntDivFloor(x2, 100) : IntDivFloor(x2, 100) + 1;
131 |       auto ans = SolveInequatilityGE2<int64>(A, B, C);
132 |       if (u == v || u + 1 == v) {
133 |         assert(std::size(ans) == 1);
134 |         assert(ans[0].x1 == -IntegerRange64::inf);
135 |         assert(ans[0].x2 == IntegerRange64::inf);
136 |       } else {
137 |         assert(std::size(ans) == 2);
138 |         assert(ans[0].x1 == -IntegerRange64::inf);
139 |         assert(ans[0].x2 == u);
140 |         assert(ans[1].x1 == v);
141 |         assert(ans[1].x2 == IntegerRange64::inf);
142 |       }
143 |     }
144 |   }
145 | }
146 | PE_REGISTER_TEST(&SolveInequatilityGE2Test, "SolveInequatilityGE2Test", SMALL);
147 | 
148 | SL void SolveInequatilityG2Test() {
149 |   for (int64 x1 = -1000; x1 <= 1000; ++x1) {
150 |     for (int64 x2 = x1; x2 <= 1000; ++x2) {
151 |       // (100 x-x1)(100 x-x2) > 0
152 |       // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 > 0
153 |       const int64 A = 10000;
154 |       const int64 B = -(100 * x1 + 100 * x2);
155 |       const int64 C = x1 * x2;
156 |       int64 u = x1 % 100 == 0 ? IntDivFloor(x1, 100) - 1 : IntDivFloor(x1, 100);
157 |       int64 v = IntDivFloor(x2, 100) + 1;
158 |       auto ans = SolveInequatilityG2<int64>(A, B, C);
159 |       if (u == v || u + 1 == v) {
160 |         assert(std::size(ans) == 1);
161 |         assert(ans[0].x1 == -IntegerRange64::inf);
162 |         assert(ans[0].x2 == IntegerRange64::inf);
163 |       } else {
164 |         assert(std::size(ans) == 2);
165 |         assert(ans[0].x1 == -IntegerRange64::inf);
166 |         assert(ans[0].x2 == u);
167 |         assert(ans[1].x1 == v);
168 |         assert(ans[1].x2 == IntegerRange64::inf);
169 |       }
170 |     }
171 |   }
172 | }
173 | PE_REGISTER_TEST(&SolveInequatilityG2Test, "SolveInequatilityG2Test", SMALL);
174 | 
175 | SL void SolveInequatilityLE2Test() {
176 |   for (int64 x1 = -1000; x1 <= 1000; ++x1) {
177 |     for (int64 x2 = x1; x2 <= 1000; ++x2) {
178 |       // (100 x-x1)(100 x-x2) <= 0
179 |       // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 <= 0
180 |       const int64 A = 10000;
181 |       const int64 B = -(100 * x1 + 100 * x2);
182 |       const int64 C = x1 * x2;
183 |       int64 u = x1 % 100 == 0 ? IntDivFloor(x1, 100) : IntDivFloor(x1, 100) + 1;
184 |       int64 v = IntDivFloor(x2, 100);
185 |       auto ans = SolveInequatilityLE2<int64>(A, B, C);
186 |       if (u > v) {
187 |         assert(std::size(ans) == 0);
188 |       } else {
189 |         assert(std::size(ans) == 1);
190 |         assert(ans[0].x1 == u);
191 |         assert(ans[0].x2 == v);
192 |       }
193 |     }
194 |   }
195 | }
196 | PE_REGISTER_TEST(&SolveInequatilityLE2Test, "SolveInequatilityLE2Test", SMALL);
197 | 
198 | SL void SolveInequatilityL2Test() {
199 |   for (int64 x1 = -1000; x1 <= 1000; ++x1) {
200 |     for (int64 x2 = x1; x2 <= 1000; ++x2) {
201 |       // (100 x-x1)(100 x-x2) < 0
202 |       // 10000 x^2-(100 x1 + 100 x2) x + x1 x2 < 0
203 |       const int64 A = 10000;
204 |       const int64 B = -(100 * x1 + 100 * x2);
205 |       const int64 C = x1 * x2;
206 |       int64 u = IntDivFloor(x1, 100) + 1;
207 |       int64 v = x2 % 100 == 0 ? IntDivFloor(x2, 100) - 1 : IntDivFloor(x2, 100);
208 |       auto ans = SolveInequatilityL2<int64>(A, B, C);
209 |       if (u > v) {
210 |         assert(std::size(ans) == 0);
211 |       } else {
212 |         assert(std::size(ans) == 1);
213 |         assert(ans[0].x1 == u);
214 |         assert(ans[0].x2 == v);
215 |       }
216 |     }
217 |   }
218 | }
219 | PE_REGISTER_TEST(&SolveInequatilityL2Test, "SolveInequatilityL2Test", SMALL);
220 | }  // namespace misc_test
221 | 


--------------------------------------------------------------------------------
/test/mod_test.c:
--------------------------------------------------------------------------------
  1 | #include "pe_test.h"
  2 | 
  3 | namespace mod_test {
  4 | #if PE_HAS_INT128
  5 | template <typename T>
  6 | struct ValueHolder {};
  7 | 
  8 | template <>
  9 | struct ValueHolder<int32> {
 10 |   static const int32 values[];
 11 |   static const int32 mods[];
 12 | };
 13 | 
 14 | // we don't consider: -2147483648
 15 | const int32 ValueHolder<int32>::values[] = {-2147483647, -1073741824, -1, 0, 1,
 16 |                                             1073741824,  2147483647};
 17 | const int32 ValueHolder<int32>::mods[] = {1, 1073741824, 2147483647};
 18 | 
 19 | template <>
 20 | struct ValueHolder<uint32> {
 21 |   static const uint32 values[];
 22 |   static const uint32 mods[];
 23 | };
 24 | const uint32 ValueHolder<uint32>::values[] = {0u, 1u, 2147483648u, 4294967295u};
 25 | const uint32 ValueHolder<uint32>::mods[] = {1u, 2147483648u, 4294967295u};
 26 | 
 27 | template <>
 28 | struct ValueHolder<int64> {
 29 |   static const int64 values[];
 30 |   static const int64 mods[];
 31 | };
 32 | const int64 ValueHolder<int64>::values[] = {-9223372036854775807ll,
 33 |                                             -4611686018427387904ll,
 34 |                                             -2147483647ll,
 35 |                                             -1073741824ll,
 36 |                                             -1ll,
 37 |                                             0ll,
 38 |                                             1ll,
 39 |                                             1073741824ll,
 40 |                                             2147483647ll,
 41 |                                             4611686018427387904ll,
 42 |                                             9223372036854775807ll};
 43 | const int64 ValueHolder<int64>::mods[] = {1ll, 1073741824ll, 2147483647ll,
 44 |                                           4611686018427387904ll,
 45 |                                           9223372036854775807ll};
 46 | 
 47 | template <>
 48 | struct ValueHolder<uint64> {
 49 |   static const uint64 values[];
 50 |   static const uint64 mods[];
 51 | };
 52 | const uint64 ValueHolder<uint64>::values[] = {0u,
 53 |                                               1ULL,
 54 |                                               2147483648ULL,
 55 |                                               2147483647ULL,
 56 |                                               9223372036854775807ULL,
 57 |                                               18446744073709551615ULL};
 58 | const uint64 ValueHolder<uint64>::mods[] = {1ULL, 2147483648ULL, 2147483647ULL,
 59 |                                             9223372036854775807ULL,
 60 |                                             18446744073709551615ULL};
 61 | 
 62 | SL void ModTest() {
 63 | #define REGULATE_MOD_TEST(T1, T2)        \
 64 |   for (T1 v : ValueHolder<T1>::values)   \
 65 |     for (T2 m : ValueHolder<T2>::mods) { \
 66 |       int128 x = v;                      \
 67 |       int128 y = m;                      \
 68 |       x %= y;                            \
 69 |       if (x < 0) x += y;                 \
 70 |       auto ans = Mod(v, m);              \
 71 |       if (ans != x) {                    \
 72 |         dbg(v);                          \
 73 |         dbg(m);                          \
 74 |         dbg(ans);                        \
 75 |         dbg(x);                          \
 76 |       }                                  \
 77 |       assert(ans == x);                  \
 78 |     }
 79 |   REGULATE_MOD_TEST(int32, int32)
 80 |   REGULATE_MOD_TEST(uint32, int32)
 81 |   REGULATE_MOD_TEST(int64, int32)
 82 |   REGULATE_MOD_TEST(uint64, int32)
 83 |   REGULATE_MOD_TEST(int32, uint32)
 84 |   REGULATE_MOD_TEST(uint32, uint32)
 85 |   REGULATE_MOD_TEST(int64, uint32)
 86 |   REGULATE_MOD_TEST(uint64, uint32)
 87 | 
 88 |   REGULATE_MOD_TEST(int32, int64)
 89 |   REGULATE_MOD_TEST(uint32, int64)
 90 |   REGULATE_MOD_TEST(int64, int64)
 91 |   REGULATE_MOD_TEST(uint64, int64)
 92 |   REGULATE_MOD_TEST(int32, uint64)
 93 |   REGULATE_MOD_TEST(uint32, uint64)
 94 |   REGULATE_MOD_TEST(int64, uint64)
 95 |   REGULATE_MOD_TEST(uint64, uint64)
 96 | }
 97 | 
 98 | PE_REGISTER_TEST(&ModTest, "ModTest", SMALL);
 99 | #endif
100 | 
101 | #if PE_HAS_INT128
102 | SL void FracModTest() {
103 |   const int mod = 1000000007;
104 |   for (int64 n = 1; n <= 10; ++n) {
105 |     int64 v = FracMod<int64, int64>({n, n + 1, 2 * n + 1}, {2, 3}, mod);
106 |     int128 expected = (int128)n * (n + 1) * (2 * n + 1) / 6 % mod;
107 |     assert(v == expected);
108 |   }
109 | 
110 |   for (int i = 1; i <= 10; ++i) {
111 |     int64 n = 100000000000 + i;
112 |     int64 v = FracMod<int64, int64>({n, n + 1, 2 * n + 1}, {2, 3}, mod);
113 |     int128 expected = (int128)n * (n + 1) * (2 * n + 1) / 6 % mod;
114 |     assert(v == expected);
115 |   }
116 | }
117 | 
118 | PE_REGISTER_TEST(&FracModTest, "FracModTest", SMALL);
119 | #endif
120 | }  // namespace mod_test
121 | 


--------------------------------------------------------------------------------
/test/mpf_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace mpf_test {
 4 | #if HAS_MPF
 5 | SL void MpfTest() {
 6 |   // std::cout << Mpf::getDefaultPrec() << std::endl;
 7 |   Mpf::SetDefaultPrec(200);
 8 | 
 9 |   Mpf x(1);
10 |   x /= 10;
11 | #if 0
12 |   std::cout << x.toString(20) << std::endl;
13 |   x = -x.Power(10);
14 |   std::cout << x.toString(20) << std::endl;
15 |   std::cout << x.toLongDouble() << std::endl;
16 |   std::cout << Mpf().toString(20) << std::endl;
17 |   std::cout << Mpf("-.1123456789e31").toString(30) << std::endl;
18 |   std::cout << Mpf(".1123456789e31").toString(30) << std::endl;
19 |   std::cout << Mpf(100).toString(30) << std::endl;
20 |   mpf_t tester;
21 |   mpf_init(tester);
22 |   mpf_set_str(tester, "112345678987654321", 10);
23 |   std::cout << Mpf("12345678987654321").toString() << std::endl;
24 |   std::cout << mpf_get_d(tester) << std::endl;
25 |   std::cout << mpf_get_prec(tester) << std::endl;
26 |   Mpf y(1);
27 |   y *= 123456789;
28 |   y *= 1000000000;
29 |   y += 123456789;
30 |   std::cout << y.toString() << std::endl;
31 | #endif
32 | }
33 | 
34 | PE_REGISTER_TEST(&MpfTest, "MpfTest", SMALL);
35 | #endif
36 | }  // namespace mpf_test
37 | 


--------------------------------------------------------------------------------
/test/parallel_sort_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace parallel_sort_test {
 4 | constexpr int n = 1000000;
 5 | 
 6 | SL void ParallelSortTest() {
 7 |   std::vector<int> arr(n);
 8 |   for (int i = 0; i < n; ++i) arr[i] = rand();
 9 |   TimeRecorder tr;
10 |   ParallelSort<8>(std::data(arr), std::data(arr) + n);
11 |   std::cerr << tr.Elapsed().Format() << std::endl;
12 |   for (int i = 1; i < n; ++i) {
13 |     assert(arr[i - 1] <= arr[i]);
14 |   }
15 | }
16 | 
17 | PE_REGISTER_TEST(&ParallelSortTest, "ParallelSortTest", SMALL);
18 | }  // namespace parallel_sort_test
19 | 


--------------------------------------------------------------------------------
/test/pe_test.h:
--------------------------------------------------------------------------------
  1 | #ifndef __PE_TEST_H__
  2 | #define __PE_TEST_H__
  3 | 
  4 | #define PE_TEST_MODE
  5 | 
  6 | #include <pe>
  7 | using namespace pe;
  8 | 
  9 | enum TestSize {
 10 |   SMALL = 0,
 11 |   MEDIUM = 1,
 12 |   BIG = 2,
 13 |   SUPER = 3,
 14 |   SPECIFIED = 4,
 15 | };
 16 | 
 17 | SL bool SameStringIgnoreCase(std::string_view a, std::string_view b) {
 18 |   if (a.size() != b.size()) {
 19 |     return false;
 20 |   }
 21 |   for (int i = 0; i < a.size(); ++i) {
 22 |     if (std::tolower(a[i]) != std::tolower(b[i])) {
 23 |       return false;
 24 |     }
 25 |   }
 26 |   return true;
 27 | }
 28 | 
 29 | SL std::optional<TestSize> ParseTestSize(std::string_view test_size) {
 30 |   if (SameStringIgnoreCase(test_size, "SMALL")) {
 31 |     return SMALL;
 32 |   } else if (SameStringIgnoreCase(test_size, "MEDIUM")) {
 33 |     return MEDIUM;
 34 |   } else if (SameStringIgnoreCase(test_size, "BIG")) {
 35 |     return BIG;
 36 |   } else if (SameStringIgnoreCase(test_size, "SUPER")) {
 37 |     return SUPER;
 38 |   } else if (SameStringIgnoreCase(test_size, "SPECIFIED")) {
 39 |     return SPECIFIED;
 40 |   } else {
 41 |     return std::nullopt;
 42 |   }
 43 | }
 44 | 
 45 | SL std::vector<TestSize> ParseTestSizeList(std::string s) {
 46 |   std::vector<TestSize> ret;
 47 | 
 48 |   std::replace(s.begin(), s.end(), ',', ' ');
 49 |   std::replace(s.begin(), s.end(), '(', ' ');
 50 |   std::replace(s.begin(), s.end(), ')', ' ');
 51 |   std::stringstream ss(s);
 52 |   std::string item;
 53 |   while (ss >> item) {
 54 |     std::optional<TestSize> size = ParseTestSize(item);
 55 |     if (!size.has_value()) {
 56 |       std::cerr << "Cannot parse test size: " << item << std::endl;
 57 |     } else {
 58 |       ret.push_back(*size);
 59 |     }
 60 |   }
 61 |   return ret;
 62 | }
 63 | 
 64 | struct TestItem {
 65 |   std::function<void()> test;
 66 |   std::string file;
 67 |   std::string description;
 68 |   TestSize test_size;
 69 | };
 70 | 
 71 | struct TestRegistry {
 72 |   std::vector<TestItem> tests;
 73 | };
 74 | 
 75 | TestRegistry& GetTestRegistry();
 76 | 
 77 | #define MAKE_INITIALIZER_NAME_IMPL(LINE_NUMBER) __register_test_##LINE_NUMBER
 78 | #define MAKE_INITIALIZER_NAME(LINE_NUMBER) \
 79 |   MAKE_INITIALIZER_NAME_IMPL(LINE_NUMBER)
 80 | 
 81 | #if PE_HAS_CPP20
 82 | #define PE_REGISTER_TEST(test, description, test_size)                   \
 83 |   static int MAKE_INITIALIZER_NAME(__LINE__) = ([]() {                   \
 84 |     GetTestRegistry().tests.push_back(                                   \
 85 |         {test, std::source_location::current().file_name(), description, \
 86 |          test_size});                                                    \
 87 |     return 0;                                                            \
 88 |   })()
 89 | #else
 90 | #define MAKE_FILE_NAME_IMPL(FILE_NAME) std::string(FILE_NAME)
 91 | #define MAKE_FILE_NAME(FILE_NAME) MAKE_FILE_NAME_IMPL(FILE_NAME)
 92 | #define PE_REGISTER_TEST(test, description, test_size)             \
 93 |   static int MAKE_INITIALIZER_NAME(__LINE__) = ([]() {             \
 94 |     GetTestRegistry().tests.push_back(                             \
 95 |         {test, MAKE_FILE_NAME(__FILE__), description, test_size}); \
 96 |     return 0;                                                      \
 97 |   })()
 98 | #endif
 99 | 
100 | #endif
101 | 


--------------------------------------------------------------------------------
/test/poly_algo_test.c:
--------------------------------------------------------------------------------
  1 | #include "pe_test.h"
  2 | 
  3 | namespace poly_algo_test {
  4 | const int64 mod = 1000000007;
  5 | 
  6 | SL void PolyMultiPointEvaluationTest() {
  7 |   srand(123456789);
  8 |   std::vector<int64> data;
  9 |   int n = 5000;
 10 |   const int64 mod = 1000000007;
 11 |   for (int i = 1; i <= n; ++i) data.push_back(i);
 12 |   std::vector<int64> v;
 13 |   for (int i = 1; i <= n; ++i) v.push_back(i % 10007);
 14 |   {
 15 |     TimeRecorder tr;
 16 |     std::vector<int64> result = PolyMultipointEvaluateNormal(data, v, mod);
 17 |     // std::cout << tr.Elapsed().Format() << std::endl;
 18 |     for (int i = 1; i <= n; ++i) {
 19 |       int64 value = PolyEvaluate<int64>(data, i % 10007, mod);
 20 |       assert(value == result[i - 1]);
 21 |     }
 22 |   }
 23 |   {
 24 |     TimeRecorder tr;
 25 |     std::vector<int64> result = PolyMultipointEvaluateBls(data, v, mod);
 26 |     // std::cout << tr.Elapsed().Format() << std::endl;
 27 |     for (int i = 1; i <= n; ++i) {
 28 |       int64 value = PolyEvaluate<int64>(data, i % 10007, mod);
 29 |       assert(value == result[i - 1]);
 30 |     }
 31 |   }
 32 | #if HAS_POLY_FLINT
 33 |   {
 34 |     TimeRecorder tr;
 35 |     std::vector<int64> result = flint::PolyMultipointEvaluate(data, v, mod);
 36 |     // std::cout << tr.Elapsed().Format() << std::endl;
 37 |     for (int i = 1; i <= n; ++i) {
 38 |       int64 value = PolyEvaluate<int64>(data, i % 10007, mod);
 39 |       assert(value == result[i - 1]);
 40 |     }
 41 |   }
 42 | #endif
 43 | }
 44 | PE_REGISTER_TEST(&PolyMultiPointEvaluationTest, "PolyMultiPointEvaluationTest",
 45 |                  SMALL);
 46 | 
 47 | SL void PolyBatchMulTest() {
 48 |   const int mod = 10007;
 49 |   std::vector<int64> data = {1, 1, 2, 1, 3, 1};
 50 |   std::vector<int64> result = PolyBatchMul(data, mod);
 51 | 
 52 |   std::vector<int64> expected = {6, 11, 6, 1};
 53 |   assert(expected == result);
 54 | }
 55 | PE_REGISTER_TEST(&PolyBatchMulTest, "PolyBatchMulTest", SMALL);
 56 | 
 57 | SL void GenBernoulliNumberTest() {
 58 |   const int mod = 10007;
 59 |   assert((GenBernoulliNumber(7, mod) ==
 60 |           std::vector<int64>{1, 5003, 1668, 0, 7672, 0, 4527, 0}));
 61 | }
 62 | PE_REGISTER_TEST(&GenBernoulliNumberTest, "GenBernoulliNumberTest", SMALL);
 63 | 
 64 | SL void GenStirling1ColumnTest() {
 65 |   const int mod = 10007;
 66 |   assert((pmod::GenStirling1Column(3, 10, mod) ==
 67 |           std::vector<int64>{0, 0, 0, 1, 6, 35, 225, 1624, 3125, 8047, 1881}));
 68 | }
 69 | PE_REGISTER_TEST(&GenStirling1ColumnTest, "GenStirling1ColumnTest", SMALL);
 70 | 
 71 | SL void GenStirling1Test() {
 72 |   const int mod = 10007;
 73 |   assert((GenStirling1(7, mod) ==
 74 |           std::vector<int64>{0, 720, 1764, 1624, 735, 175, 21, 1}));
 75 | }
 76 | PE_REGISTER_TEST(&GenStirling1Test, "GenStirling1Test", SMALL);
 77 | 
 78 | SL void GenStirling2Test() {
 79 |   const int mod = 10007;
 80 |   assert((pmod::GenStirling2(7, mod) ==
 81 |           std::vector<int64>{0, 1, 63, 301, 350, 140, 21, 1}));
 82 | }
 83 | PE_REGISTER_TEST(&GenStirling2Test, "GenStirling2Test", SMALL);
 84 | 
 85 | SL void GetGFCoefficientTest() {
 86 |   {
 87 |     // Fibonacci sequence
 88 |     std::vector<int64> A = {1, -1, -1};
 89 |     std::vector<int64> B = {0, 1};
 90 |     std::vector<int64> result = {0, 1};
 91 |     for (int i = 2; i <= 30; ++i) {
 92 |       result.push_back(AddMod(result[i - 2], result[i - 1], mod));
 93 |     }
 94 |     std::vector<int64> x = GetGFCoefficientSeries(A, B, 30, mod);
 95 |     for (int i = 0; i <= 30; ++i) {
 96 |       assert(result[i] == x[i]);
 97 |     }
 98 |   }
 99 | 
100 |   {
101 |     // Dollar exchange.
102 |     // Concret math
103 |     // 7 Generating Functions
104 |     // 7.3 Solving Recurrences
105 |     // Example 4: A closed form for change.
106 |     int64 dp[10000 + 1] = {1};
107 |     int64 can[5] = {1, 5, 10, 25, 50};
108 |     for (int64 each : can) {
109 |       for (int j = 0; j + each <= 10000; ++j) {
110 |         if (dp[j]) {
111 |           dp[j + each] = AddMod(dp[j + each], dp[j], mod);
112 |         }
113 |       }
114 |     }
115 | 
116 |     int64 coe[100] = {0};
117 |     for (int i = 0; i < 1 << 5; ++i) {
118 |       int s = 0;
119 |       int bc = 0;
120 |       for (int j = 0; j < 5; ++j) {
121 |         if (i & (1 << j)) ++bc, s += (int)can[j];
122 |       }
123 |       if (bc & 1) {
124 |         --coe[s];
125 |       } else {
126 |         ++coe[s];
127 |       }
128 |     }
129 |     std::vector<int64> gfresult = GetGFCoefficientSeries(
130 |         std::vector<int64>(coe, coe + 92), {1}, 10000, mod);
131 |     for (int i = 0; i <= 10000; ++i) assert(dp[i] == gfresult[i]);
132 | 
133 |     std::string mine = ToString(GetGFCoefficientAt(
134 |         std::vector<int64>(coe, coe + 92), {1}, 100000000, mod));
135 |     std::string expected = ToString("66666793333412666685000001"_bi % mod);
136 |     assert(mine == expected);
137 |   }
138 | }
139 | PE_REGISTER_TEST(&GetGFCoefficientTest, "GetGFCoefficientTest", SMALL);
140 | 
141 | SL void LinearRecurrenceTest() {
142 |   const int64 P = 1000000009;
143 |   std::vector<int64> s = {0, 1, 1, 2, 3, 5};
144 |   std::vector<int64> v = *FindLinearRecurrence(s, P);
145 |   assert(v[0] == P - 1);
146 |   assert(v[1] == P - 1);
147 |   assert(v[2] == 1);
148 |   const int n = static_cast<int>(std::size(v));
149 |   int64 ans = 0;
150 |   for (int i = 0; i < n; ++i) ans += v[i] * s[i];
151 |   assert(ans == P);
152 | 
153 |   ans = LinearRecurrenceValueAt(v, s, 38, P);
154 |   assert(ans == 39088169LL);
155 | 
156 |   std::vector<int64> t = *FindLinearRecurrence({0, 1, 1, 2, 3, 5, 8, 13}, 31);
157 |   assert(t[0] == 30);
158 |   assert(t[1] == 30);
159 |   assert(t[2] == 1);
160 |   assert(*FindLinearRecurrenceValueAt({0, 1, 1, 2, 3, 5, 8, 13}, 38, P) ==
161 |          39088169);
162 | }
163 | PE_REGISTER_TEST(&LinearRecurrenceTest, "LinearRecurrenceTest", SMALL);
164 | 
165 | SL void SeqExprTest() {
166 |   {
167 |     Sequence a;
168 |     (void)a;
169 |     assert((a[1] + a[2]).ValueAt({0, 1}, 20, mod) == 6765);
170 |     assert((a[1] + a[2]).ValueAtWithCharPoly({0, 1}, 20, mod) == 6765);
171 |     assert((a[1] + a[2]).SumAt({0, 1}, 20, mod) == 17710);
172 |     assert((a[1] + a[2]).SumAtWithCharPoly({0, 1}, 20, mod) == 17710);
173 |     assert(((a[1] + a[2]).Generate({0, 1}, 20, mod) ==
174 |             std::vector<int64>{0,   1,   1,   2,    3,    5,    8,
175 |                                13,  21,  34,  55,   89,   144,  233,
176 |                                377, 610, 987, 1597, 2584, 4181, 6765}));
177 |   }
178 |   {
179 |     Sequence a;
180 |     (void)a;
181 |     assert((a[1] + a[2]).ValueAt({0, 1}, 1000, mod) == 517691607);
182 |     assert((a[1] + a[2]).ValueAtWithCharPoly({0, 1}, 1000, mod) == 517691607);
183 |     assert((a[1] + a[2]).SumAt({0, 1}, 1000, mod) == 625271545);
184 |     assert((a[1] + a[2]).SumAtWithCharPoly({0, 1}, 1000, mod) == 625271545);
185 |   }
186 |   {
187 |     using MT = NMod64<mod>;
188 |     Sequence<MT> a;
189 |     (void)a;
190 |     assert((a[1] + a[2]).ValueAt({0, 1}, 1000).value() == 517691607);
191 |     assert((a[1] + a[2]).SumAt({0, 1}, 1000).value() == 625271545);
192 |   }
193 | }
194 | PE_REGISTER_TEST(&SeqExprTest, "SeqExprTest", SMALL);
195 | }  // namespace poly_algo_test
196 | 


--------------------------------------------------------------------------------
/test/poly_div_test.c:
--------------------------------------------------------------------------------
  1 | #include "pe_test.h"
  2 | 
  3 | namespace poly_div_test {
  4 | #if !defined(ONLY_RUN_PE_IMPLEMENTATION)
  5 | #define ONLY_RUN_PE_IMPLEMENTATION 0
  6 | #endif
  7 | using poly_div_t = std::vector<uint64> (*)(const std::vector<uint64>&,
  8 |                                            const std::vector<uint64>&, int64);
  9 | struct DivImpl {
 10 |   poly_div_t impl;
 11 |   int size;  // 0:small, 1:large
 12 |   const char* name;
 13 | };
 14 | 
 15 | DivImpl div_impl[] = {
 16 |     {&PolyDivDc<uint64>, 1, "dc"},
 17 |     {&PolyDivNormal<uint64>, 0, "normal"},
 18 | #if HAS_POLY_FLINT && !ONLY_RUN_PE_IMPLEMENTATION
 19 |     {&flint::PolyDiv<uint64>, 1, "flint"},
 20 | #endif
 21 | #if HAS_POLY_NTL && !ONLY_RUN_PE_IMPLEMENTATION
 22 |     {&ntl::PolyDivLargeMod<uint64>, 1, "ntl lm"},
 23 |     {&ntl::PolyDiv<uint64>, 1, "ntl"},
 24 | #endif
 25 | };
 26 | 
 27 | const char* data_policy[3] = {
 28 |     "random",
 29 |     "min mod",
 30 |     "max mod",
 31 | };
 32 | 
 33 | SL void TestImpl(int dp, int size, int n, int64 mod) {
 34 |   fprintf(stderr, "%-8s : data = %s, size = %d, n = %d, mod = %lld\n", "config",
 35 |           data_policy[dp], size, n, (long long)mod);
 36 | 
 37 |   std::vector<uint64> x, y;
 38 |   srand(123456789);
 39 |   if (dp == 0) {
 40 |     for (int i = 0; i < n; ++i) x.push_back((uint64)CRand63() % mod);
 41 |     for (int i = 0; i < n / 2; ++i) y.push_back((uint64)CRand63() % mod);
 42 |     x[n - 1] = y[n / 2 - 1] = 1;
 43 |   } else {
 44 |     for (int i = 0; i < n; ++i) x.push_back(dp == 1 ? 0 : mod - 1);
 45 |     for (int i = 0; i < n / 2; ++i) y.push_back(dp == 1 ? 0 : mod - 1);
 46 |     x[n - 1] = y[n / 2 - 1] = 1;
 47 |   }
 48 | 
 49 |   const int M = std::size(div_impl);
 50 | 
 51 |   std::vector<uint64> expected;
 52 |   for (int i = 0; i < M; ++i) {
 53 |     DivImpl who = div_impl[i];
 54 |     if (i > 0) {
 55 |       if (who.size < size) {
 56 |         continue;
 57 |       }
 58 |     }
 59 |     clock_t start = clock();
 60 |     std::vector<uint64> result = who.impl(x, y, mod);
 61 |     clock_t end = clock();
 62 |     fprintf(stderr, "%-8s : %.3f\n", who.name,
 63 |             1. * (end - start) / CLOCKS_PER_SEC);
 64 |     if (i == 0) {
 65 |       expected = result;
 66 |     } else {
 67 |       assert(expected == result);
 68 |     }
 69 |   }
 70 | }
 71 | 
 72 | SL void PolyDivTest() {
 73 |   for (int dp = 0; dp < 3; ++dp) {
 74 |     for (int n : {128, 2048, 1000000, 1479725}) {
 75 |       for (int64 mod : {100019LL, 100000000003LL, 316227766016779LL}) {
 76 |         TestImpl(dp, n > 2048, n, mod);
 77 |       }
 78 |     }
 79 |   }
 80 | }
 81 | PE_REGISTER_TEST(&PolyDivTest, "PolyDivTest", SUPER);
 82 | 
 83 | SL void PolyDivPerformanceTest() {
 84 |   constexpr std::array<uint64, 5> mods = {100019, 1000003, 1000000007,
 85 |                                           100000000003, 316227766016779};
 86 |   constexpr int min_log2 = 10;
 87 |   constexpr int max_log2 = 20;
 88 |   for (int level = 0; level < mods.size(); ++level) {
 89 |     printf("mod = %llu\n", (unsigned long long)mods[level]);
 90 |     const uint64 mod = mods[level];
 91 | 
 92 |     printf("log2(n)  ");
 93 | 
 94 |     for (int n = 10; n <= 20; ++n) {
 95 |       printf("%-6d ", n);
 96 |     }
 97 | 
 98 |     puts("");
 99 | 
100 |     const int M = std::size(div_impl);
101 | 
102 |     std::vector<uint64> expected;
103 |     for (int i = 0; i < M; ++i) {
104 |       DivImpl who = div_impl[i];
105 | 
106 |       printf("%-8s ", who.name);
107 |       srand(314159);
108 |       for (int n = min_log2; n <= max_log2; ++n) {
109 |         if (who.size == 0 && n > 14) {
110 |           printf("%-6s ", "-");
111 |           continue;
112 |         }
113 |         const int size = 1 << n;
114 |         std::vector<uint64> x, y;
115 |         for (int i = 0; i < size; ++i) x.push_back((uint64)CRand63() % mod);
116 |         for (int i = 0; i < size / 2; ++i) y.push_back((uint64)CRand63() % mod);
117 |         x[size - 1] = y[size / 2 - 1] = 1;
118 | 
119 |         clock_t start = clock();
120 |         who.impl(x, y, mod);
121 |         clock_t end = clock();
122 | #if 1
123 |         printf("%-6.3f ", 1. * (end - start) / CLOCKS_PER_SEC);
124 | #else
125 |         uint64 a = n * (1 << n);
126 |         uint64 b = end - start;
127 |         printf("%-6.3f ", 1e5 * b / a);
128 | #endif
129 |       }
130 |       puts("");
131 |     }
132 |   }
133 | }
134 | 
135 | PE_REGISTER_TEST(&PolyDivPerformanceTest, "PolyDivPerformanceTest", SUPER);
136 | }  // namespace poly_div_test
137 | 


--------------------------------------------------------------------------------
/test/prime_pi_sum_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace prime_pi_sum_test {
 4 | std::vector<int64> ps(1000001);
 5 | std::vector<int64> pc(1000001);
 6 | 
 7 | SL void VerifyCnt(const int64 n, const DVA<int64>& result) {
 8 |   int64 v = static_cast<int64>(std::sqrt(n));
 9 |   for (int j = 1; j <= v; ++j) {
10 |     assert(result[j] == pc[j]);
11 |     assert(result[n / j] == pc[n / j]);
12 |   }
13 | }
14 | 
15 | SL void VerifySum(const int64 n, const DVA<int64>& result) {
16 |   int64 v = static_cast<int64>(std::sqrt(n));
17 |   for (int j = 1; j <= v; ++j) {
18 |     assert(result[j] == ps[j]);
19 |     assert(result[n / j] == ps[n / j]);
20 |   }
21 | }
22 | 
23 | SL void SmallTest() {
24 |   for (int i = 1; i <= 100000; ++i) {
25 |     const int n = i;
26 |     VerifyCnt(n, PrimePi<int64>(i));
27 |     VerifySum(n, PrimeSum<int64>(i));
28 |   }
29 | }
30 | 
31 | SL void PrimePiSumTest() {
32 |   for (int i = 1; i <= 1000000; ++i) {
33 |     pc[i] = pc[i - 1] + (IsPrime(i) ? 1 : 0);
34 |     ps[i] = ps[i - 1] + (IsPrime(i) ? i : 0);
35 |   }
36 | 
37 |   SmallTest();
38 | 
39 |   assert((PrimePi<int64>(10000000))[10000000] == kPrimePi[7]);
40 |   assert((PrimePi<int64>(100000000))[100000000] == kPrimePi[8]);
41 |   assert((PrimePi<int64>(1000000000))[1000000000] == kPrimePi[9]);
42 |   assert((PrimePi<int64>(10000000000))[10000000000] == kPrimePi[10]);
43 |   // assert((PrimePi<int64>(100000000000))[100000000000] == kPrimePi[11]);
44 |   // assert((PrimePi<int64>(1000000000000))[1000000000000] == kPrimePi[12]);
45 | }
46 | 
47 | PE_REGISTER_TEST(&PrimePiSumTest, "PrimePiSumTest", BIG);
48 | 
49 | SL void PrimePiSumPModTest() {
50 |   const int64 N = 100000;
51 |   for (int mod = 1; mod <= 30; ++mod) {
52 |     int64 result[32] = {0};
53 |     for (int i = 0; i < pcnt && plist[i] <= N; ++i) ++result[plist[i] % mod];
54 |     auto v = PrimeS0PMod<int64>(N, mod);
55 |     for (int j = 0; j < mod; ++j) {
56 |       assert(result[j] == v[j][N]);
57 |     }
58 |   }
59 |   for (int mod = 1; mod <= 30; ++mod) {
60 |     int64 result[32] = {0};
61 |     for (int i = 0; i < pcnt && plist[i] <= N; ++i) {
62 |       result[plist[i] % mod] += plist[i];
63 |     }
64 |     auto v = PrimeS1PMod<int64>(N, mod);
65 |     for (int j = 0; j < mod; ++j) {
66 |       assert(result[j] == v[j][N]);
67 |     }
68 |   }
69 |   const int64 M = 10007;
70 |   for (int mod = 1; mod <= 30; ++mod) {
71 |     int64 result[32] = {0};
72 |     for (int i = 0; i < pcnt && plist[i] <= N; ++i) ++result[plist[i] % mod];
73 |     auto v = PrimeS0PMod<NModNumber<CCMod64<M>>>(N, mod);
74 |     for (int j = 0; j < mod; ++j) {
75 |       assert(result[j] % M == v[j][N].value());
76 |     }
77 |   }
78 |   for (int mod = 1; mod <= 30; ++mod) {
79 |     int64 result[32] = {0};
80 |     for (int i = 0; i < pcnt && plist[i] <= N; ++i) {
81 |       result[plist[i] % mod] += plist[i];
82 |     }
83 |     auto v = PrimeS1PMod<NModNumber<CCMod64<M>>>(N, mod);
84 |     for (int j = 0; j < mod; ++j) {
85 |       assert(result[j] % M == v[j][N].value());
86 |     }
87 |   }
88 | }
89 | PE_REGISTER_TEST(&PrimePiSumPModTest, "PrimePiSumPModTest", SMALL);
90 | }  // namespace prime_pi_sum_test
91 | 


--------------------------------------------------------------------------------
/test/test_compile_each.bat:
--------------------------------------------------------------------------------
1 | test_compile_each.py
2 | pause


--------------------------------------------------------------------------------
/test/test_compile_each.py:
--------------------------------------------------------------------------------
 1 | #! python3
 2 | # -*- coding: UTF-8 -*-
 3 | import os
 4 | import sys
 5 | import shutil
 6 | import subprocess
 7 | import time
 8 | 
 9 | CURRENT_DIRECTORY = os.getcwd()
10 | PARENT_DIRECTORY = os.pardir
11 | 
12 | 
13 | def DurationPartsFromNs(duration):
14 |   min_part = duration // 1000000000 // 60
15 |   sec_part = duration // 1000000000 % 60
16 |   millisec_part = duration // 1000000 % 1000
17 |   return (min_part, sec_part, millisec_part)
18 | 
19 | 
20 | def FormatNs(duration):
21 |   return '%d:%02d.%03d' % DurationPartsFromNs(duration)
22 | 
23 | 
24 | def main():
25 |   ret = 0
26 |   for file in os.listdir(PARENT_DIRECTORY):
27 |     if not file.startswith('pe'):
28 |       continue
29 |     filename, file_ext_name = os.path.splitext(file)
30 |     if file_ext_name != '':
31 |       continue
32 |     content = '#include <%s>\n int main(){}' % file
33 |     with open('main.cc', 'wb') as tempf:
34 |       tempf.write(content.encode('utf8'))
35 |     print('Compile %s' % file)
36 |     start_time = time.perf_counter_ns()
37 |     ret = os.system('pe++.py main.cc -hc')
38 |     time_usage = FormatNs(time.perf_counter_ns() - start_time)
39 |     if os.path.exists('main.cc'):
40 |       os.remove('main.cc')
41 |     print('Done, return code = %d, time usage = %s' % (ret, time_usage))
42 |     print()
43 |     if ret != 0:
44 |       break
45 |   if os.path.exists('a.exe'):
46 |     os.remove('a.exe')
47 |   return ret
48 | 
49 | 
50 | if __name__ == "__main__":
51 |   sys.exit(main())


--------------------------------------------------------------------------------
/test/test_int128_noopenmp.bat:
--------------------------------------------------------------------------------
1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0
2 | pause


--------------------------------------------------------------------------------
/test/test_int128_openmp.bat:
--------------------------------------------------------------------------------
1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1
2 | pause


--------------------------------------------------------------------------------
/test/test_noint128_noopenmp.bat:
--------------------------------------------------------------------------------
1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=0 -DENABLE_OPENMP=0
2 | pause


--------------------------------------------------------------------------------
/test/test_noint128_openmp.bat:
--------------------------------------------------------------------------------
1 | pe++.py pe_test.c -r -- -DENABLED_TEST=SMALL,MEDIUM,BIG,SUPER,SPECIFIED -DTRY_TO_USE_INT128=0 -DENABLE_OPENMP=1
2 | pause


--------------------------------------------------------------------------------
/test/test_perf.bat:
--------------------------------------------------------------------------------
1 | pe++.py test_perf.c -DTEST_ALL -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 && a.exe -r 10
2 | pause


--------------------------------------------------------------------------------
/test/test_perf.c:
--------------------------------------------------------------------------------
  1 | #include "pe_test.h"
  2 | 
  3 | #define ONLY_RUN_PE_IMPLEMENTATION 1
  4 | 
  5 | #if 1
  6 | #include "poly_mul_test.c"
  7 | //#include "poly_div_test.c"
  8 | //#include "bi_div_test.c"
  9 | //#include "bi_mul_test.c"
 10 | //#include "extended_signed_int_test.c"
 11 | //#include "extended_unsigned_int_test.c"
 12 | #endif
 13 | 
 14 | TestRegistry& GetTestRegistry() {
 15 |   static TestRegistry tester;
 16 |   return tester;
 17 | }
 18 | 
 19 | static inline std::string FormatSecond(double s) {
 20 |   char buff[256];
 21 |   sprintf(buff, "%.3f", s);
 22 |   return buff;
 23 | }
 24 | 
 25 | static inline std::string FormatSecond(TimeDelta d) {
 26 |   char buff[256];
 27 |   sprintf(buff, "%.3f", d.ToSeconds());
 28 |   return buff;
 29 | }
 30 | 
 31 | int main(int argc, char* argv[]) {
 32 |   int run_count = 2;
 33 | 
 34 |   for (int i = 1; i < argc;) {
 35 |     std::string c = argv[i];
 36 |     const int size = static_cast<int>(std::size(c));
 37 |     int j = 0;
 38 |     while (j < size && c[j] == '-') ++j;
 39 |     c = c.substr(j);
 40 |     if (c == "r") {
 41 |       if (i + 1 < argc) {
 42 |         run_count = atoi(argv[i + 1]);
 43 |         i += 2;
 44 |       } else {
 45 |         fprintf(stderr, "Run count is unspecified");
 46 |         exit(-1);
 47 |       }
 48 |     } else {
 49 |       fprintf(stderr, "Unknown flags: %s", argv[i]);
 50 |       exit(-1);
 51 |     }
 52 |   }
 53 | 
 54 |   std::cout << std::endl;
 55 |   std::cout << "run_count: " << run_count << std::endl;
 56 |   std::cout << std::endl;
 57 | 
 58 |   PeInitializer()
 59 |       .set_cal_phi()
 60 |       .set_cal_mu()
 61 |       .set_fft_k()
 62 |       .set_ntt32_k()
 63 |       .set_ntt64_k()
 64 |       .Init();
 65 | 
 66 |   auto& tester = GetTestRegistry();
 67 |   const int size = (int)std::size(tester.tests);
 68 | 
 69 |   TableFormatter tf;
 70 |   auto& header = tf.AppendLine();
 71 |   std::vector<TimeDelta> total_timings;
 72 |   for (int i = 0; i < size; ++i) {
 73 |     auto& item = tester.tests[i];
 74 |     header.push_back(item.description);
 75 |     total_timings.push_back({});
 76 |   }
 77 |   header.push_back("Total");
 78 |   total_timings.push_back({});
 79 | 
 80 |   for (int _ = 0; _ < run_count; ++_) {
 81 |     std::cout << "Test run " << _ << std::endl;
 82 |     bool isFirstTest = true;
 83 |     TimeDelta test_suite_timing;
 84 |     auto& line = tf.AppendLine();
 85 |     for (int i = 0; i < size; ++i) {
 86 |       auto& item = tester.tests[i];
 87 |       if (!isFirstTest) {
 88 |         std::cout << std::endl;
 89 |       }
 90 |       std::cout << "Begin " << item.description << std::endl;
 91 |       TimeRecorder tr;
 92 |       item.test();
 93 |       std::cout << "End " << item.description << std::endl;
 94 |       auto usage = tr.Elapsed();
 95 |       std::cout << "Time usage " << usage.Format() << std::endl;
 96 | 
 97 |       line.push_back(FormatSecond(usage.ToSeconds()));
 98 |       total_timings[i].Add(usage);
 99 |       test_suite_timing.Add(usage);
100 |       isFirstTest = false;
101 |     }
102 |     line.push_back(FormatSecond(test_suite_timing.ToSeconds()));
103 |     total_timings[size].Add(test_suite_timing);
104 | 
105 |     std::cout << std::endl << "Test run " << _ << " finished" << std::endl;
106 |     std::cout << "Time usage " << test_suite_timing.Format() << std::endl
107 |               << std::endl;
108 |   }
109 | 
110 |   {
111 |     auto& line = tf.AppendLine();
112 |     for (int i = 0; i <= size; ++i) {
113 |       line.push_back("");
114 |     }
115 |   }
116 | 
117 |   {
118 |     auto& line = tf.AppendLine();
119 |     for (int i = 0; i <= size; ++i) {
120 |       line.push_back(FormatSecond(total_timings[i].ToSeconds()));
121 |     }
122 |   }
123 | 
124 |   {
125 |     auto& line = tf.AppendLine();
126 |     for (int i = 0; i <= size; ++i) {
127 |       line.push_back(FormatSecond(total_timings[i].ToSeconds() / run_count));
128 |     }
129 |   }
130 | 
131 |   tf.Render(std::cout);
132 | 
133 |   std::cout << std::endl;
134 | 
135 |   tf.SetDefaultFormat(TableFormatter::NoAlign())
136 |       .SetSeparator(",")
137 |       .Render(std::cout);
138 |   return 0;
139 | }


--------------------------------------------------------------------------------
/test/tree_test.c:
--------------------------------------------------------------------------------
 1 | #include "pe_test.h"
 2 | 
 3 | namespace tree_test {
 4 | SL void RuBitTest() {
 5 |   const int n = 100;
 6 |   int data[n + 1] = {0};
 7 |   RUBit<int> tree(n);
 8 |   for (int iter = 0; iter < 100; ++iter) {
 9 |     if (rand() % 2 == 0) {
10 |       for (int i = 0; i < 100; ++i) {
11 |         int u = rand() % n + 1, v = rand() % n + 1;
12 |         int w = rand() % n - 50;
13 |         if (u > v) std::swap(u, v);
14 |         tree.Update(u, v, w);
15 |         for (int j = u; j <= v; ++j) data[j] += w;
16 |       }
17 |     } else {
18 |       for (int i = 1; i <= 100; ++i) {
19 |         assert(tree.Query(i) == data[i]);
20 |       }
21 |     }
22 |   }
23 | }
24 | 
25 | SL void RsqBitTest() {
26 |   const int n = 100;
27 |   int data[n + 1] = {0};
28 |   RSQBit<int> tree(n);
29 |   for (int iter = 0; iter < 100; ++iter) {
30 |     if (rand() % 2 == 0) {
31 |       for (int i = 0; i < 100; ++i) {
32 |         int u = rand() % n + 1, v = rand() % n + 1;
33 |         if (u > v) std::swap(u, v);
34 |         int s = 0;
35 |         for (int j = u; j <= v; ++j) s += data[j];
36 |         assert(tree.Query(u, v) == s);
37 |       }
38 |     } else {
39 |       for (int i = 1; i <= 100; ++i) {
40 |         int w = rand() % n - 50;
41 |         data[i] += w;
42 |         tree.Update(i, w);
43 |       }
44 |     }
45 |   }
46 | }
47 | 
48 | SL void TreeTest() {
49 |   RuBitTest();
50 |   RsqBitTest();
51 | }
52 | 
53 | PE_REGISTER_TEST(&TreeTest, "TreeTest", SMALL);
54 | }  // namespace tree_test
55 | 


--------------------------------------------------------------------------------
/test_all.bat:
--------------------------------------------------------------------------------
1 | bazel clean
2 | bazel run test:test
3 | bazel run test:test_tcmalloc
4 | bazel run test:test_noint128_noopenmp
5 | bazel run test:test_int128_noopenmp
6 | bazel run test:test_int128_openmp
7 | bazel run test:test_noint128_openmp


--------------------------------------------------------------------------------
/toolchain/BUILD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baihacker/pe/a149689695a1be4ff987b6b8c5c3297f7f1a86ba/toolchain/BUILD


--------------------------------------------------------------------------------