├── .bazelrc
├── .github
└── workflows
│ └── ci.yml
├── .travis.yml
├── BUILD
├── README.md
├── WORKSPACE
├── benchmarks
├── benchmark.md
├── benchmark_20180629.md
├── benchmark_20190918.md
├── benchmark_20191012.md
├── benchmark_20191013.md
├── benchmark_20200628.md
├── benchmark_20230101.md
├── benchmark_20250315.md
├── benchmark_20250601.md
├── format_test_result.py
└── perf_test_result.txt
├── build_all.bat
├── example
├── BUILD
├── bi_example_pe483.c
├── billion_sort.c
├── bit.c
├── build_each.bat
├── build_each.py
├── continued_fraction.c
├── count_carlitz_words.c
├── dfa_counter.c
├── dfa_summer.c
├── dva.c
├── example.c
├── linear_recurrence.c
├── matrix_power.c
├── mma_find_recurrence.c
├── mma_interpolating_polynomial.c
├── mma_to_cpp.c
├── mod_number.c
├── multiplicative_function_prefix_sum_common_function.c
├── multiplicative_function_prefix_sum_mavlue_base.c
├── parallel_cal_prime_pi.c
├── partition_mobius.c
├── pe_db.c
├── power_sum.c
├── prime_power_sum.c
├── random_sample.c
├── range.c
└── sym_poly.c
├── format.py
├── gen_config.py
├── legacy
├── pe_poly.hpp
└── pe_sym_poly.hpp
├── libraries_on_win64.md
├── pe
├── pe.hpp
├── pe_algo
├── pe_array
├── pe_base
├── pe_bi32
├── pe_bit
├── pe_config
├── pe_db
├── pe_dpe
├── pe_extended_int
├── pe_extended_signed_int
├── pe_extended_unsigned_int
├── pe_fft
├── pe_float
├── pe_fraction
├── pe_gbi
├── pe_geometry
├── pe_initializer
├── pe_int
├── pe_int_algo
├── pe_internal
├── pe_io
├── pe_mat
├── pe_memory
├── pe_misc
├── pe_mma
├── pe_mod
├── pe_mpf
├── pe_mpz
├── pe_nt
├── pe_nt_base
├── pe_ntf
├── pe_parallel
├── pe_parallel_algo
├── pe_persistance
├── pe_poly_algo
├── pe_poly_base
├── pe_poly_base_common
├── pe_poly_base_flint
├── pe_poly_base_gmp
├── pe_poly_base_libbf
├── pe_poly_base_min25
├── pe_poly_base_ntl
├── pe_rand
├── pe_range
├── pe_serialization
├── pe_span
├── pe_sym_poly
├── pe_time
├── pe_tree
├── pe_type_traits
├── pe_vector
├── precompile.bat
├── test
├── BUILD
├── algo_test.c
├── array_test.c
├── bi_div_test.c
├── bi_mul_test.c
├── bit_test.c
├── dva_test.c
├── extended_signed_int_test.c
├── extended_unsigned_int_test.c
├── fft_test.c
├── gbi_test.c
├── init_inv_test.c
├── int128_test.c
├── mat_mul_test.c
├── misc_test.c
├── mod_test.c
├── mpf_test.c
├── nt_test.c
├── parallel_sort_test.c
├── pe_test.c
├── pe_test.h
├── poly_algo_test.c
├── poly_div_test.c
├── poly_mul_test.c
├── prime_pi_sum_test.c
├── test_compile_each.bat
├── test_compile_each.py
├── test_int128_noopenmp.bat
├── test_int128_openmp.bat
├── test_noint128_noopenmp.bat
├── test_noint128_openmp.bat
├── test_perf.bat
├── test_perf.c
└── tree_test.c
├── test_all.bat
└── toolchain
├── BUILD
└── pe_toolchain.bzl
/.bazelrc:
--------------------------------------------------------------------------------
1 | startup --output_user_root=D:/bazel-output
2 | build --action_env=C_INCLUDE_PATH=D:/Hilbert/usr/include;D:/Hilbert/usr/include/pe;D:/Hilbert/usr/include/flint
3 | build --action_env=LIBRARY_PATH=D:/Hilbert/usr/lib
4 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: PE Tests
2 |
3 | on: [push]
4 |
5 | jobs:
6 | gcc-build-and-test:
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - name: Checkout repository
11 | uses: actions/checkout@v3
12 |
13 | - name: Install dependencies
14 | run: sudo apt-get update && sudo apt-get install -y g++ libgmp-dev libflint-dev libntl-dev
15 |
16 | - name: Build project
17 | run: |
18 | export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
19 | export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
20 | g++ test/pe_test.c -o ./pe_test.out --std=c++20 -O3 -march=native -mtune=native -fopenmp -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 -DENABLE_EIGEN=0 -DENABLE_GMP=1 -DENABLE_FLINT=0 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_ZMQ=0 -DENABLE_LIBBF=0 -DENABLE_PRIME_COUNT=0 -DENABLE_PRIME_SIEVE=0 -DENABLED_TEST=SMALL,MEDIUM,BIG -DCONTINUOUS_INTEGRATION_TEST
21 |
22 | - name: Run tests
23 | run: ./pe_test.out
24 |
25 | gcc-build-examples:
26 | runs-on: ubuntu-latest
27 |
28 | steps:
29 | - name: Checkout repository
30 | uses: actions/checkout@v3
31 |
32 | - name: Install dependencies
33 | run: sudo apt-get update && sudo apt-get install -y g++ libgmp-dev libflint-dev libntl-dev
34 |
35 | - name: Install Bazel
36 | uses: bazel-contrib/setup-bazel@0.14.0
37 |
38 | - name: Build examples
39 | run: |
40 | export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
41 | export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
42 | bazel --ignore_all_rc_files build //example:gcc_builds --action_env=CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH} --action_env=LIBRARY_PATH=${LIBRARY_PATH}
43 |
44 | clang-build-and-test:
45 | runs-on: ubuntu-latest
46 |
47 | steps:
48 | - name: Checkout repository
49 | uses: actions/checkout@v3
50 |
51 | - name: Install dependencies
52 | run: sudo apt-get update && sudo apt-get install -y clang libgmp-dev libflint-dev libntl-dev
53 |
54 | - name: Build project
55 | run: |
56 | export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
57 | export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
58 | clang++ -x c++ test/pe_test.c -o ./pe_test.out --std=c++20 -O3 -march=native -mtune=native -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0 -DENABLE_EIGEN=0 -DENABLE_GMP=1 -DENABLE_FLINT=0 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_ZMQ=0 -DENABLE_LIBBF=0 -DENABLE_PRIME_COUNT=0 -DENABLE_PRIME_SIEVE=0 -DENABLED_TEST=SMALL,MEDIUM,BIG -DCONTINUOUS_INTEGRATION_TEST
59 |
60 | - name: Run tests
61 | run: ./pe_test.out
62 |
63 | msvc-build-and-test:
64 | runs-on: windows-latest
65 |
66 | steps:
67 | - name: Checkout repository
68 | uses: actions/checkout@v3
69 |
70 | - name: Install dependencies
71 | run: |
72 | #Invoke-WebRequest -Uri "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip" -OutFile "eigen.zip"
73 | #Expand-Archive -Path "eigen.zip" -DestinationPath "C:\eigen"
74 |
75 | - uses: ilammy/msvc-dev-cmd@v1.4.1
76 |
77 | - name: Build project
78 | run: |
79 | cl test\pe_test.c /TP /GS /GL /W3 /Gy /Zc:wchar_t /Zi /Gm- /O2 /Zc:inline /fp:precise /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /errorReport:prompt /WX- /Zc:forScope /Gd /Oi /MT /openmp /std:c++20 /FC /EHsc /nologo /diagnostics:classic /DENABLE_ASSERT=0 /DTRY_TO_USE_INT128=1 /DENABLE_OPENMP=1 /DENABLE_EIGEN=0 /DENABLE_GMP=0 /DENABLE_FLINT=0 /DENABLE_MPFR=0 /DENABLE_NTL=0 /DENABLE_ZMQ=0 /DENABLE_LIBBF=0 /DENABLE_PRIME_COUNT=0 /DENABLE_PRIME_SIEVE=0 /DENABLED_TEST=SMALL,MEDIUM,BIG /DCONTINUOUS_INTEGRATION_TEST /I "$env:GITHUB_WORKSPACE" /I "C:\eigen\eigen-3.4.0"
80 |
81 | - name: Run tests
82 | run: |
83 | .\pe_test.exe
84 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: focal
2 | language: cpp
3 | env:
4 | - BUILD_ARGUMENTS="--std=c++17 -O3 -march=native -mtune=native -lgmpxx -lmpfr -lflint -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=0 -DENABLE_EIGEN=1 -DENABLE_GMP=1 -DENABLE_FLINT=1 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_LIBBF=0" BUILD_CMD_PUSH="clang++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST -DNO_SUPER_TEST" BUILD_CMD_CRON="clang++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST"
5 | - BUILD_ARGUMENTS="--std=c++17 -O3 -march=native -mtune=native -fopenmp -lgmpxx -lflint -lmpfr -lntl -lgmp -DENABLE_ASSERT=0 -DTRY_TO_USE_INT128=1 -DENABLE_OPENMP=1 -DENABLE_EIGEN=1 -DENABLE_GMP=1 -DENABLE_FLINT=1 -DENABLE_MPFR=1 -DENABLE_NTL=1 -DENABLE_LIBBF=0" BUILD_CMD_PUSH="g++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST -DNO_SUPER_TEST" BUILD_CMD_CRON="g++ ./test/pe_test.c -o ./a.out ${BUILD_ARGUMENTS} -DTEST_ALL -DCONTINUOUS_INTEGRATION_TEST"
6 | script:
7 | - sudo apt-get update
8 | - sudo apt-get -y install libeigen3-dev
9 | - sudo apt-get -y install libgmp-dev
10 | - sudo apt-get -y install libflint-dev
11 | - sudo apt-get -y install libntl-dev
12 | - export CPLUS_INCLUDE_PATH="$(pwd)":"/usr/include/flint":"/usr/include/eigen3":${CPLUS_INCLUDE_PATH}
13 | - export LIBRARY_PATH="/usr/lib":${LIBRARY_PATH}
14 | - echo ${TRAVIS_EVENT_TYPE}
15 | - echo && [ "${TRAVIS_EVENT_TYPE}" == "push" ] && ${BUILD_CMD_PUSH} && ./a.out || [ "${TRAVIS_EVENT_TYPE}" != "push" ] && echo "skip push build"
16 | - echo && [ "${TRAVIS_EVENT_TYPE}" == "cron" ] && ${BUILD_CMD_CRON} && ./a.out || [ "${TRAVIS_EVENT_TYPE}" != "cron" ] && echo "skip cron build"
17 |
--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
1 | load("//toolchain:pe_toolchain.bzl", "pe_library")
2 |
3 | package(
4 | default_visibility = [
5 | "//visibility:public",
6 | ],
7 | )
8 |
9 | [pe_library(name = x + "_lib", srcs = [x]) for x in glob(["pe_*"])]
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PE: C++ Library for Project Euler
2 |
3 | [](https://ci.appveyor.com/project/baihacker/pe-win-msvc)
4 | [](https://ci.appveyor.com/project/baihacker/pe-ubuntu-gcc)
5 | [](https://ci.appveyor.com/project/baihacker/pe-ubuntu-clang)
6 | [](https://github.com/baihacker/pe/actions)
7 | [](https://github.com/baihacker/pe/releases)
8 | 
9 |
10 | **PE** is a C++ library designed to solve problems on [Project Euler](https://projecteuler.net/recent).
11 |
12 | ## Prerequisites
13 |
14 | To use this library, you need a C++ development environment that supports:
15 | * C++17 or later.
16 | * Building `x86_64` targets.
17 |
18 | ## Installation
19 |
20 | 1. **Include the Library:**
21 | - Place all the library files into a directory of your choice.
22 | - Ensure that `#include ` is by adding the directory to the `CPLUS_INCLUDE_PATH` environment variable.
23 |
24 | 2. **Configure the Library:**
25 | - Run **[gen_config.py](https://github.com/baihacker/pe/blob/master/gen_config.py)** from the installation directory to generate **[pe_config](https://github.com/baihacker/pe/blob/master/pe_config)**.
26 | - This script generates a static configuration file with default values. You can manually edit this file after generation.
27 | - `ENABLE_ASSERT`: Enable assertions for certain inputs or conditions.
28 | - `TRY_TO_USE_INT128`: Check if the compiler supports `int128` and enable it. Set to `0` to disable `int128` even if supported.
29 | - The script also automatically detects the presence of third-party libraries and sets the appropriate flags:
30 | - `ENABLE_EIGEN`: Use [Eigen](http://eigen.tuxfamily.org/index.php?title=Main_Page).
31 | - `ENABLE_GMP`: Use [GMP](https://gmplib.org).
32 | - `ENABLE_FLINT`: Use [FLINT](http://www.flintlib.org).
33 | - `ENABLE_MPFR`: Use [MPFR](https://www.mpfr.org).
34 | - `ENABLE_LIBBF`: Use [libbf](https://bellard.org/libbf).
35 | - `ENABLE_NTL`: Use [NTL](https://www.shoup.net/ntl/download.html).
36 | - `ENABLE_ZMQ`: Use [ZeroMQ](https://zeromq.org/).
37 | - `ENABLE_PRIME_COUNT`: Use [PrimeCount](https://github.com/kimwalisch/primecount).
38 | - `ENABLE_PRIME_SIEVE`: Use [PrimeSieve](https://github.com/kimwalisch/primesieve).
39 | - `ENABLE_TCMALLOC`: Use [tcmalloc](https://github.com/gperftools/gperftools).
40 | - Manually edit **[pe_config](https://github.com/baihacker/pe/blob/master/pe_config)** to add or modify configuration items as needed:
41 | - `ENABLE_OPENMP`: Enable [OpenMP](http://www.openmp.org). The script doesn't generate the default config for OpenMP.
42 |
43 | 3. **(Optional) Generate Precompiled Header:**
44 | - Run `g++ -xc++-header pe.hpp` in the installation directory to create a precompiled header (`pe.hpp.gch`).
45 | - You may add additional compiler options if required (e.g., `g++ -xc++-header pe.hpp --std=c++17 -O3 -march=native -fopenmp`).
46 |
47 | ## Usage
48 |
49 | For a quick start, refer to [example.c](https://github.com/baihacker/pe/blob/master/example/example.c).
50 |
51 | ## File List
52 |
53 | - **pe**: Contains all implementation files.
54 | - **pe.hpp**: Header file for generating the precompiled header. Includes the core library.
55 | - **pe_algo**: Contains various algorithms.
56 | - **pe_array**: Array implementation with compile-time and runtime dimension length. Supports element counts exceeding `int32` limits and custom allocators.
57 | - **pe_base**: Pre-included headers, macros, typedefs, and basic inline functions.
58 | - **pe_bi32**: Big integer implementation with base `1 << 32`.
59 | - **pe_bit**: Bit manipulation utilities.
60 | - **pe_config**: Centralized configuration file for PE.
61 | - **pe_db**: Load and save pre-calculated results, such as prime pi and prime sum.
62 | - **pe_dpe**: Distributed computation.
63 | - **pe_extended_int**: Extended integer types.
64 | - **pe_extended_signed_int**: Extended signed integer types.
65 | - **pe_extended_unsigned_int**: Extended unsigned integer types.
66 | - **pe_fft**: Fast Fourier Transform and polynomial multiplication utilities.
67 | - **pe_float**: Functions for unified float operations including `__float128`.
68 | - **pe_fraction**: Fraction arithmetic operations.
69 | - **pe_gbi**: General big integer operations, corresponding to `pe_nt`.
70 | - **pe_geometry**: Support for `Point2D` and `Point3D`.
71 | - **pe_initializer**: Helper classes and macros for library initialization.
72 | - **pe_int**: Basic integer utilities.
73 | - **pe_int_algo**: Integer algorithm for extended integer and general big integer.
74 | - **pe_internal**: Includes configuration, defines necessary types/macros, and third-party libraries.
75 | - **pe_io**: Methods and macros for simplified or accelerated I/O operations.
76 | - **pe_mat**: Matrix operations.
77 | - **pe_memory**: Memory management utilities (Windows only).
78 | - **pe_misc**: Miscellaneous utility functions.
79 | - **pe_mma**: Support for MMA: helper methods or classes for MMA code generation.
80 | - **pe_mod**: Modular arithmetic utilities.
81 | - **pe_mpf**: Multi-precision floating number based on gmp.
82 | - **pe_mpz**: Multi-precision integer based on gmp.
83 | - **pe_nt**: Core number theory utilities.
84 | - **pe_nt_base**: Prime list generation, integer factorization, prime testing, and computations of φ and μ.
85 | - **pe_parallel**: Simple framework for multi-threaded problem-solving (Windows only).
86 | - **pe_parallel_algo**: Parallel algorithms.
87 | - **pe_persistance**: Key-Value Persistence (may support Linux with adjustments).
88 | - **pe_poly_algo**: Polynomial algorithms.
89 | - **pe_poly_base**: Basic polynomial algorithms.
90 | - **pe_poly_base_flint**: Polynomial algorithms based on FLINT.
91 | - **pe_poly_base_gmp**: Polynomial algorithms based on gmp.
92 | - **pe_poly_base_libbf**: Polynomial algorithms based on libbf.
93 | - **pe_poly_base_min25**: Polynomial algorithms from [Min_25](https://github.com/min-25), including the fastest polynomial multiplication.
94 | - **pe_poly_base_ntl**: Polynomial algorithms based on NTL.
95 | - **pe_rand**: Random number generation utilities.
96 | - **pe_range**: Range implementation.
97 | - **pe_serialization**: Objects serialization.
98 | - **pe_span**: Implementation of `Span`.
99 | - **pe_sym_poly**: Symbolic polynomial operations.
100 | - **pe_time**: Utilities for `TimeDelta` and `TimeRecorder`.
101 | - **pe_tree**: Tree-based data structures.
102 | - **pe_type_traits**: Type trait utilities.
103 | - **pe_vector**: Vector operations.
104 |
--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baihacker/pe/a149689695a1be4ff987b6b8c5c3297f7f1a86ba/WORKSPACE
--------------------------------------------------------------------------------
/benchmarks/benchmark_20180629.md:
--------------------------------------------------------------------------------
1 | # Benchmark
2 |
3 | ## Ntt benchmark
4 |
5 | ### Test Environment:
6 |
7 | * OS: Win10 Pro 1803
8 | * CPU: i7-4790K (MMX, SSE, SSE2, SSE3, SSE4.1, SSE4.2, EM64T, VT-x, AES, AVX, AVX2, FMA3)
9 | * Compiler: MinGW-x86_64-8.1.0-win32-seh-rt_v6-rev0
10 | * Msys2: msys2-x86_64-20190524
11 | * Test code: [Ntt test](https://github.com/baihacker/pe/blob/master/test/ntt_test.c)
12 | * Build libbf:
13 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o libbf.avx2.o libbf.c
14 |
15 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o cutils.avx2.o cutils.c
16 |
17 | > gcc-ar crv libbf.avx2.a cutils.avx2.o libbf.avx2.o
18 | * Build test target:
19 | > -o a.exe --std=c++11 -O3 -march=native -mtune=native -fopenmp -Wl,--stack,268435456 -lbf -lgmpxx -lflint -lgmp -lmpfr -lmpir
20 |
21 | ### Test result
22 |
23 | #### Openmp enabled
24 | ```cpp
25 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
26 | flint n : 0.407
27 | flint p : 1.156
28 | ntt32 s : 1.031
29 | ntt32 l : 1.156
30 | ntt64 s : 1.578
31 | ntt64 l : 1.656
32 | Min_25 s : 0.156
33 | Min_25 l : 0.203
34 | libbf : 0.828
35 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
36 | flint n : 1.234
37 | flint p : 2.890
38 | ntt32 l : 2.375
39 | ntt64 l : 3.391
40 | Min_25 l : 0.343
41 | libbf : 1.359
42 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
43 | flint n : 0.843
44 | flint p : 2.063
45 | ntt64 l : 1.671
46 | Min_25 l : 0.203
47 | libbf : 0.859
48 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
49 | flint n : 0.422
50 | flint p : 1.234
51 | ntt32 s : 0.984
52 | ntt32 l : 1.156
53 | ntt64 s : 1.515
54 | ntt64 l : 1.651
55 | Min_25 s : 0.141
56 | Min_25 l : 0.172
57 | libbf : 0.813
58 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
59 | flint n : 1.234
60 | flint p : 2.750
61 | ntt32 l : 2.359
62 | ntt64 l : 3.422
63 | Min_25 l : 0.359
64 | libbf : 1.375
65 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
66 | flint n : 0.829
67 | flint p : 2.077
68 | ntt64 l : 1.703
69 | Min_25 l : 0.172
70 | libbf : 0.859
71 |
72 | mod = 100019
73 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
74 | flint n 0.016 0.000 0.000 0.000 0.000 0.000 0.016 0.032 0.079 0.219 0.422
75 | flint p 0.000 0.000 0.000 0.000 0.016 0.031 0.063 0.140 0.282 0.594 1.219
76 | ntt32 s 0.000 0.000 0.016 0.000 0.016 0.047 0.062 0.109 0.235 0.484 1.000
77 | ntt32 l 0.000 0.000 0.000 0.015 0.016 0.047 0.062 0.125 0.281 0.578 1.187
78 | ntt64 s 0.000 0.000 0.016 0.016 0.031 0.062 0.094 0.172 0.375 0.734 1.531
79 | ntt64 l 0.000 0.000 0.016 0.015 0.032 0.062 0.094 0.188 0.406 0.813 1.687
80 | Min_25 s 0.000 0.015 0.000 0.000 0.000 0.016 0.016 0.016 0.032 0.062 0.141
81 | Min_25 l 0.000 0.000 0.000 0.000 0.000 0.015 0.016 0.032 0.031 0.078 0.172
82 | libbf 0.000 0.000 0.000 0.000 0.000 0.015 0.047 0.093 0.188 0.406 0.843
83 | mod = 100000000003
84 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
85 | flint n 0.016 0.000 0.000 0.000 0.000 0.016 0.031 0.062 0.140 0.390 0.859
86 | flint p 0.000 0.000 0.000 0.000 0.016 0.047 0.094 0.203 0.422 0.906 1.907
87 | ntt32 l 0.000 0.000 0.000 0.015 0.031 0.047 0.078 0.141 0.297 0.594 1.187
88 | ntt64 l 0.016 0.000 0.000 0.015 0.031 0.063 0.093 0.203 0.391 0.828 1.672
89 | Min_25 l 0.000 0.000 0.000 0.000 0.016 0.015 0.016 0.031 0.047 0.094 0.265
90 | libbf 0.000 0.000 0.000 0.000 0.016 0.031 0.047 0.094 0.203 0.421 0.875
91 | mod = 316227766016779
92 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
93 | flint n 0.000 0.000 0.000 0.000 0.000 0.016 0.047 0.110 0.203 0.391 0.828
94 | flint p 0.000 0.016 0.000 0.016 0.031 0.047 0.125 0.234 0.468 1.016 2.124
95 | ntt64 l 0.000 0.000 0.016 0.016 0.031 0.062 0.094 0.204 0.406 0.813 1.672
96 | Min_25 l 0.000 0.000 0.016 0.000 0.000 0.000 0.015 0.016 0.046 0.078 0.172
97 | libbf 0.000 0.000 0.015 0.000 0.016 0.016 0.047 0.093 0.203 0.437 0.875
98 | ```
99 |
100 | #### Openmp disabled (option "-fopenmp" removed)
101 | ```cpp
102 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
103 | flint n : 0.406
104 | flint p : 1.172
105 | ntt32 s : 2.968
106 | ntt32 l : 4.531
107 | ntt64 s : 2.500
108 | ntt64 l : 5.015
109 | Min_25 s : 0.172
110 | Min_25 l : 0.344
111 | libbf : 0.812
112 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
113 | flint n : 1.234
114 | flint p : 2.906
115 | ntt32 l : 9.470
116 | ntt64 l : 10.530
117 | Min_25 l : 0.719
118 | libbf : 1.359
119 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
120 | flint n : 0.828
121 | flint p : 2.078
122 | ntt64 l : 5.077
123 | Min_25 l : 0.391
124 | libbf : 0.844
125 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
126 | flint n : 0.406
127 | flint p : 1.250
128 | ntt32 s : 2.970
129 | ntt32 l : 4.531
130 | ntt64 s : 2.469
131 | ntt64 l : 4.952
132 | Min_25 s : 0.156
133 | Min_25 l : 0.328
134 | libbf : 0.813
135 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
136 | flint n : 1.218
137 | flint p : 2.766
138 | ntt32 l : 9.483
139 | ntt64 l : 10.517
140 | Min_25 l : 0.734
141 | libbf : 1.359
142 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
143 | flint n : 0.828
144 | flint p : 2.078
145 | ntt64 l : 5.000
146 | Min_25 l : 0.422
147 | libbf : 0.844
148 |
149 | mod = 100019
150 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
151 | flint n 0.000 0.000 0.000 0.016 0.000 0.016 0.015 0.032 0.078 0.219 0.421
152 | flint p 0.000 0.000 0.000 0.016 0.016 0.031 0.062 0.140 0.281 0.578 1.234
153 | ntt32 s 0.000 0.000 0.015 0.016 0.031 0.078 0.141 0.297 0.641 1.390 3.000
154 | ntt32 l 0.000 0.000 0.016 0.031 0.047 0.109 0.219 0.469 0.985 2.093 4.546
155 | ntt64 s 0.000 0.000 0.016 0.000 0.015 0.047 0.125 0.266 0.562 1.188 2.500
156 | ntt64 l 0.000 0.000 0.016 0.031 0.047 0.110 0.250 0.531 1.110 2.343 4.999
157 | Min_25 s 0.000 0.000 0.000 0.000 0.016 0.000 0.000 0.031 0.032 0.062 0.171
158 | Min_25 l 0.000 0.000 0.000 0.000 0.000 0.000 0.015 0.047 0.078 0.172 0.360
159 | libbf 0.000 0.000 0.000 0.015 0.000 0.015 0.047 0.094 0.203 0.406 0.812
160 | mod = 100000000003
161 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
162 | flint n 0.000 0.000 0.000 0.000 0.015 0.016 0.031 0.046 0.125 0.391 0.875
163 | flint p 0.000 0.000 0.015 0.016 0.015 0.047 0.094 0.203 0.422 0.907 1.891
164 | ntt32 l 0.000 0.015 0.016 0.016 0.062 0.109 0.219 0.469 1.000 2.140 4.562
165 | ntt64 l 0.000 0.000 0.000 0.032 0.062 0.125 0.250 0.531 1.125 2.375 5.077
166 | Min_25 l 0.000 0.000 0.000 0.000 0.016 0.016 0.031 0.046 0.093 0.187 0.406
167 | libbf 0.000 0.000 0.000 0.000 0.015 0.031 0.078 0.093 0.203 0.422 0.859
168 | mod = 316227766016779
169 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
170 | flint n 0.000 0.000 0.016 0.000 0.016 0.015 0.047 0.093 0.203 0.391 0.828
171 | flint p 0.000 0.000 0.015 0.000 0.016 0.047 0.109 0.234 0.469 1.015 2.140
172 | ntt64 l 0.000 0.000 0.016 0.031 0.063 0.125 0.265 0.531 1.125 2.406 5.109
173 | Min_25 l 0.000 0.000 0.000 0.000 0.015 0.016 0.031 0.047 0.078 0.188 0.422
174 | libbf 0.000 0.000 0.000 0.016 0.000 0.015 0.047 0.094 0.219 0.437 0.859
175 | ```
176 |
--------------------------------------------------------------------------------
/benchmarks/benchmark_20190918.md:
--------------------------------------------------------------------------------
1 | # Benchmark
2 |
3 | ## Ntt benchmark
4 |
5 | ### Test Environment:
6 |
7 | * Date: 2019.09.18
8 | * OS: Win10 Pro 1903 (18362.356)
9 | * CPU: i9-9900K (MMX, SSE, SSE2, SSE3, SSE4.1, SSE4.2, EM64T, VT-x, AES, AVX, AVX2, FMA3, TSX)
10 | * Compiler: MinGW-x86_64-8.1.0-win32-seh-rt_v6-rev0
11 | * Msys2: msys2-x86_64-20190524
12 | * Test code: [Ntt test](https://github.com/baihacker/pe/blob/master/test/ntt_test.c)
13 | * Build libbf:
14 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o libbf.avx2.o libbf.c
15 |
16 | > gcc -Wall -O3 -mavx -mavx2 -mfma -mbmi2 -c -o cutils.avx2.o cutils.c
17 |
18 | > gcc-ar crv libbf.avx2.a cutils.avx2.o libbf.avx2.o
19 | * Build test target:
20 | > -o a.exe --std=c++14 -fno-diagnostics-color -O3 -march=native -mtune=native -fopenmp -Wl,--stack,268435456 -static -s -lbf -lgmpxx -lflint -lgmp -lmpfr -lmpir
21 |
22 | ### Test result
23 |
24 | #### Openmp enabled
25 | ```cpp
26 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
27 | flint n : 0.306
28 | flint p : 1.029
29 | ntt32 s : 0.816
30 | ntt32 l : 0.870
31 | ntt64 s : 1.376
32 | ntt64 l : 1.422
33 | Min_25 s : 0.114
34 | Min_25 l : 0.125
35 | libbf : 0.635
36 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
37 | flint n : 0.919
38 | flint p : 2.633
39 | ntt32 l : 1.743
40 | ntt64 l : 2.857
41 | Min_25 l : 0.255
42 | libbf : 1.084
43 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
44 | flint n : 0.623
45 | flint p : 1.869
46 | ntt64 l : 1.429
47 | Min_25 l : 0.133
48 | libbf : 0.682
49 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
50 | flint n : 0.313
51 | flint p : 1.087
52 | ntt32 s : 0.813
53 | ntt32 l : 0.878
54 | ntt64 s : 1.354
55 | ntt64 l : 1.419
56 | Min_25 s : 0.111
57 | Min_25 l : 0.125
58 | libbf : 0.645
59 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
60 | flint n : 0.922
61 | flint p : 2.502
62 | ntt32 l : 1.743
63 | ntt64 l : 2.839
64 | Min_25 l : 0.253
65 | libbf : 1.088
66 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
67 | flint n : 0.624
68 | flint p : 1.873
69 | ntt64 l : 1.406
70 | Min_25 l : 0.133
71 | libbf : 0.682
72 |
73 | mod = 100019
74 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
75 | flint n 0.000 0.000 0.001 0.001 0.003 0.006 0.012 0.028 0.059 0.160 0.318
76 | flint p 0.001 0.001 0.003 0.006 0.014 0.028 0.058 0.123 0.253 0.532 1.097
77 | ntt32 s 0.001 0.003 0.003 0.007 0.015 0.032 0.048 0.101 0.201 0.403 0.819
78 | ntt32 l 0.001 0.002 0.004 0.008 0.016 0.032 0.051 0.110 0.223 0.427 0.877
79 | ntt64 s 0.001 0.002 0.006 0.012 0.025 0.055 0.081 0.165 0.338 0.677 1.376
80 | ntt64 l 0.001 0.003 0.006 0.012 0.026 0.055 0.081 0.173 0.353 0.704 1.413
81 | Min_25 s 0.000 0.000 0.001 0.001 0.002 0.002 0.006 0.014 0.027 0.054 0.112
82 | Min_25 l 0.001 0.001 0.000 0.001 0.003 0.006 0.010 0.016 0.030 0.059 0.128
83 | libbf 0.000 0.001 0.002 0.005 0.009 0.022 0.048 0.072 0.149 0.310 0.645
84 | mod = 100000000003
85 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
86 | flint n 0.001 0.000 0.001 0.002 0.005 0.010 0.021 0.045 0.099 0.301 0.645
87 | flint p 0.001 0.002 0.005 0.010 0.021 0.043 0.092 0.190 0.397 0.828 1.727
88 | ntt32 l 0.001 0.002 0.004 0.008 0.017 0.035 0.053 0.111 0.217 0.439 0.869
89 | ntt64 l 0.001 0.003 0.005 0.012 0.027 0.057 0.085 0.174 0.352 0.705 1.431
90 | Min_25 l 0.001 0.001 0.001 0.002 0.004 0.008 0.013 0.020 0.034 0.062 0.133
91 | libbf 0.001 0.001 0.002 0.004 0.010 0.024 0.045 0.077 0.159 0.330 0.682
92 | mod = 316227766016779
93 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
94 | flint n 0.000 0.000 0.001 0.002 0.005 0.012 0.029 0.072 0.152 0.298 0.625
95 | flint p 0.001 0.002 0.006 0.011 0.022 0.049 0.104 0.208 0.430 0.910 1.910
96 | ntt64 l 0.001 0.003 0.006 0.013 0.027 0.056 0.085 0.173 0.350 0.707 1.434
97 | Min_25 l 0.000 0.001 0.001 0.002 0.003 0.008 0.013 0.020 0.034 0.066 0.157
98 | libbf 0.000 0.001 0.002 0.004 0.010 0.024 0.051 0.077 0.159 0.333 0.692
99 | ```
100 |
101 | #### Openmp disabled (option "-fopenmp" removed)
102 | ```cpp
103 | ntt test : data = random, size = 0, n = 1000000, mod = 100019
104 | flint n : 0.303
105 | flint p : 1.063
106 | ntt32 s : 2.720
107 | ntt32 l : 4.035
108 | ntt64 s : 2.284
109 | ntt64 l : 4.605
110 | Min_25 s : 0.121
111 | Min_25 l : 0.264
112 | libbf : 0.638
113 | ntt test : data = random, size = 1, n = 1479725, mod = 100000000003
114 | flint n : 0.932
115 | flint p : 2.630
116 | ntt32 l : 8.624
117 | ntt64 l : 9.764
118 | Min_25 l : 0.617
119 | libbf : 1.083
120 | ntt test : data = random, size = 2, n = 1000000, mod = 316227766016779
121 | flint n : 0.628
122 | flint p : 1.884
123 | ntt64 l : 4.664
124 | Min_25 l : 0.341
125 | libbf : 0.682
126 | ntt test : data = max mod, size = 0, n = 999996, mod = 1000003
127 | flint n : 0.312
128 | flint p : 1.092
129 | ntt32 s : 2.703
130 | ntt32 l : 4.038
131 | ntt64 s : 2.254
132 | ntt64 l : 4.560
133 | Min_25 s : 0.121
134 | Min_25 l : 0.262
135 | libbf : 0.650
136 | ntt test : data = max mod, size = 1, n = 1479725, mod = 100000000003
137 | flint n : 0.929
138 | flint p : 2.527
139 | ntt32 l : 8.570
140 | ntt64 l : 9.727
141 | Min_25 l : 0.648
142 | libbf : 1.095
143 | ntt test : data = max mod, size = 2, n = 1000000, mod = 316227766016779
144 | flint n : 0.630
145 | flint p : 1.885
146 | ntt64 l : 4.601
147 | Min_25 l : 0.332
148 | libbf : 0.687
149 |
150 | mod = 100019
151 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
152 | flint n 0.000 0.001 0.000 0.001 0.002 0.005 0.011 0.028 0.060 0.159 0.317
153 | flint p 0.001 0.001 0.003 0.006 0.014 0.029 0.060 0.123 0.251 0.534 1.094
154 | ntt32 s 0.001 0.003 0.006 0.014 0.030 0.064 0.137 0.288 0.607 1.287 2.722
155 | ntt32 l 0.002 0.004 0.009 0.022 0.045 0.096 0.204 0.430 0.901 1.910 4.044
156 | ntt64 s 0.001 0.002 0.006 0.011 0.025 0.053 0.113 0.244 0.516 1.082 2.438
157 | ntt64 l 0.003 0.005 0.012 0.024 0.054 0.115 0.247 0.525 1.096 2.230 4.652
158 | Min_25 s 0.000 0.000 0.001 0.001 0.001 0.003 0.007 0.014 0.029 0.058 0.132
159 | Min_25 l 0.000 0.001 0.000 0.001 0.003 0.006 0.014 0.029 0.061 0.131 0.267
160 | libbf 0.001 0.000 0.002 0.004 0.009 0.019 0.044 0.073 0.150 0.311 0.649
161 | mod = 100000000003
162 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
163 | flint n 0.001 0.000 0.001 0.001 0.005 0.009 0.021 0.046 0.098 0.302 0.651
164 | flint p 0.001 0.002 0.005 0.010 0.022 0.045 0.092 0.190 0.400 -0.662 1.775
165 | ntt32 l 0.002 0.005 0.010 0.021 0.046 0.097 0.206 0.438 0.915 2.034 4.193
166 | ntt64 l 0.002 0.005 0.011 0.025 0.052 0.118 0.247 0.500 1.050 2.217 4.675
167 | Min_25 l 0.000 0.000 0.001 0.001 0.003 0.009 0.018 0.039 0.076 0.158 0.328
168 | libbf 0.001 0.001 0.002 0.005 0.009 0.021 0.047 0.077 0.159 0.328 0.688
169 | mod = 316227766016779
170 | log2(n) 10 11 12 13 14 15 16 17 18 19 20
171 | flint n 0.000 0.001 0.001 0.002 0.005 0.012 0.029 0.072 0.153 0.299 0.630
172 | flint p 0.002 0.003 0.005 0.011 0.023 0.048 0.105 0.213 0.437 0.933 1.912
173 | ntt64 l 0.002 0.005 0.012 0.024 0.052 0.110 0.237 0.499 1.054 2.224 4.694
174 | Min_25 l 0.001 0.001 0.001 0.002 0.004 0.009 0.018 0.038 0.079 0.166 0.344
175 | libbf 0.001 0.001 0.002 0.004 0.010 0.023 0.047 0.076 0.159 0.335 0.699
176 | ```
177 |
--------------------------------------------------------------------------------
/benchmarks/format_test_result.py:
--------------------------------------------------------------------------------
1 | #! python
2 | #-*- coding: utf8 -*-
3 | import os
4 | import pprint
5 |
6 |
7 | def load_db_data(file):
8 | file = file.decode(encoding='utf8', errors='ignore').encode(encoding='gbk',
9 | errors='ignore')
10 | if os.path.exists(file):
11 | with open(file, 'rb') as tempf:
12 | result = tempf.read()
13 | return True, result
14 | return False, ''
15 |
16 |
17 | def load_db():
18 | data = load_db_data('perf_test_result.txt')[1]
19 | return eval(data)
20 |
21 |
22 | class MyPrettyPrinter(pprint.PrettyPrinter):
23 |
24 | def format(self, object, context, maxlevels, level):
25 | return pprint.PrettyPrinter.format(self, object, context, maxlevels, level)
26 |
27 |
28 | def export(db, file):
29 | with open(file, 'w') as tempf:
30 | pp = MyPrettyPrinter(stream=tempf)
31 | pp.pprint(db)
32 | #tempf.write(str(db))
33 |
34 |
35 | if __name__ == '__main__':
36 | db = load_db()
37 | #export(db, "formated.txt")
--------------------------------------------------------------------------------
/build_all.bat:
--------------------------------------------------------------------------------
1 | bazel clean && bazel build //test:all //example:all //:all
--------------------------------------------------------------------------------
/example/BUILD:
--------------------------------------------------------------------------------
1 | load("//toolchain:pe_toolchain.bzl", "pe_binary")
2 |
3 | package(
4 | default_visibility = [
5 | "//visibility:public",
6 | ],
7 | )
8 |
9 | [pe_binary(name = x[:-2], srcs = [x]) for x in glob(["*.c"])]
10 | [pe_binary(name = x[:-2] + "_gcc",
11 | srcs = [x],
12 | executable_suffix = ".out",
13 | enable_pe_flags = False,
14 | copts = [
15 | "-std=c++17",
16 | "-Wno-delete-incomplete",
17 | "-Wno-shift-count-overflow",
18 | "-O2",
19 | "-march=native",
20 | "-mtune=native",
21 | "-fopenmp"],
22 | defines = [
23 | "ENABLE_ASSERT=0",
24 | "TRY_TO_USE_INT128=1",
25 | "ENABLE_OPENMP=1",
26 | "ENABLE_EIGEN=0",
27 | "ENABLE_GMP=1",
28 | "ENABLE_FLINT=1",
29 | "ENABLE_MPFR=1",
30 | "ENABLE_NTL=1",
31 | "ENABLE_ZMQ=0",
32 | "ENABLE_LIBBF=0",
33 | "ENABLE_PRIME_COUNT=0",
34 | "ENABLE_PRIME_SIEVE=0",
35 | "TEST_ALL",
36 | "CONTINUOUS_INTEGRATION_TEST",
37 | "NO_SUPER_TEST"],
38 | linkopts = [
39 | "-fopenmp",
40 | "-lflint",
41 | "-lmpfr",
42 | "-lntl",
43 | "-lgmp"],
44 | cc_path = "g++",
45 | ) for x in glob(["*.c"])]
46 |
47 | filegroup(name ="gcc_builds", srcs = [x[:-2] + "_gcc" for x in glob(["*.c"])])
--------------------------------------------------------------------------------
/example/bi_example_pe483.c:
--------------------------------------------------------------------------------
1 |
2 | // Project Euler 483 Repeated permutation
3 | // reference answer:
4 | // N = 100: 53817203945.52453
5 | // output: 53817203945
6 | // N = 150: 55335570173801.14
7 | // output: 55335570173801
8 | #include
9 | using namespace pe;
10 | const int N = 100;
11 | BigInteger choose[505][505];
12 | BigInteger fac[501];
13 | void init() {
14 | for (int i = 0; i <= 500; ++i)
15 | for (int j = 0; j <= i; ++j)
16 | choose[i][j] =
17 | (j == i || j == 0) ? 1 : choose[i - 1][j] + choose[i - 1][j - 1];
18 | fac[0] = 1;
19 | for (int i = 1; i <= 500; ++i) fac[i] = fac[i - 1] * i;
20 | }
21 | BigInteger dp[N + 1];
22 | std::map orz[N + 1];
23 | int main() {
24 | init();
25 | dp[0] = 1;
26 | orz[0][1] = 1;
27 | for (int i = 1; i <= N; ++i) {
28 | std::cerr << i << " ";
29 | for (int j = N; j >= i; --j) {
30 | BigInteger total = 0;
31 | std::map inc;
32 | const int curr_step = i;
33 | const int n = j;
34 | for (int x = 1; x * curr_step <= n; ++x) {
35 | BigInteger t = 1, u = 1;
36 | for (int i = 0, j = n; i < x; ++i) {
37 | t = t * choose[j][curr_step];
38 | j -= curr_step;
39 | u = u * fac[curr_step - 1];
40 | }
41 | BigInteger magic = t / fac[x] * u;
42 | total += magic * dp[n - curr_step * x];
43 | foreach (it, orz[n - x * curr_step]) {
44 | int128 d = Gcd((int128)curr_step, it.first);
45 | int128 now = curr_step / d * it.first;
46 | inc[now] += it.second * magic;
47 | }
48 | }
49 | dp[n] += total;
50 | foreach (it, inc) orz[n][it.first] += it.second;
51 | }
52 | std::cerr << dp[N] << std::endl;
53 | }
54 |
55 | std::cerr << dp[N] << std::endl;
56 | BigInteger s = 0;
57 | foreach (it, orz[N])
58 | s += BigInteger(it.first) * BigInteger(it.first) * it.second;
59 | std::cerr << s / dp[N] << std::endl;
60 | return 0;
61 | }
62 |
--------------------------------------------------------------------------------
/example/billion_sort.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | #if OS_TYPE_WIN
5 | const int64 N = 1000000000;
6 | LargeMemory lm;
7 |
8 | int main() {
9 | float* data = reinterpret_cast(lm.Allocate(N * sizeof(float)));
10 | dbg("memory ready");
11 |
12 | for (int i = 0; i < N; ++i) data[i] = 1. * rand() / RAND_MAX;
13 | dbg("data ready");
14 |
15 | TimeRecorder tr;
16 | ParallelSort<30>(data, data + N);
17 | // std::sort(data, data+N);
18 | dbg("sorted");
19 |
20 | std::cerr << tr.Elapsed().Format() << std::endl;
21 | return 0;
22 | }
23 | #else
24 | int main() { return 0; }
25 | #endif
--------------------------------------------------------------------------------
/example/bit.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 | using namespace std;
4 |
5 | int main() {
6 | TableFormatter tf;
7 | auto& line = tf.AppendLine();
8 | line.push_back("n");
9 | line.push_back("BitWidth");
10 | line.push_back("BitFloor");
11 | line.push_back("BitCeil");
12 | line.push_back("HighestBitIndex");
13 | line.push_back("HighestBit");
14 | line.push_back("LowestBitIndex");
15 | line.push_back("LowestBit");
16 | for (int n = 0; n < 32; ++n) {
17 | auto& line = tf.AppendLine();
18 | line.push_back(ToString(n));
19 | line.push_back(ToString(BitWidth(n)));
20 | line.push_back(ToString(BitFloor(n)));
21 | line.push_back(ToString(BitCeil(n)));
22 | line.push_back(ToString(HighestBitIndex(n)));
23 | line.push_back(ToString(HighestBit(n)));
24 | line.push_back(ToString(LowestBitIndex(n)));
25 | line.push_back(ToString(LowestBit(n)));
26 | }
27 | tf.Render(std::cout);
28 | return 0;
29 | }
30 |
31 | /*
32 | n BitWidth BitFloor BitCeil HighestBitIndex HighestBit LowestBitIndex LowestBit
33 | 0 0 0 1 -1 0 -1 0
34 | 1 1 1 1 0 1 0 1
35 | 2 2 2 2 1 2 1 2
36 | 3 2 2 4 1 2 0 1
37 | 4 3 4 4 2 4 2 4
38 | 5 3 4 8 2 4 0 1
39 | 6 3 4 8 2 4 1 2
40 | 7 3 4 8 2 4 0 1
41 | 8 4 8 8 3 8 3 8
42 | 9 4 8 16 3 8 0 1
43 | 10 4 8 16 3 8 1 2
44 | 11 4 8 16 3 8 0 1
45 | 12 4 8 16 3 8 2 4
46 | 13 4 8 16 3 8 0 1
47 | 14 4 8 16 3 8 1 2
48 | 15 4 8 16 3 8 0 1
49 | 16 5 16 16 4 16 4 16
50 | 17 5 16 32 4 16 0 1
51 | 18 5 16 32 4 16 1 2
52 | 19 5 16 32 4 16 0 1
53 | 20 5 16 32 4 16 2 4
54 | 21 5 16 32 4 16 0 1
55 | 22 5 16 32 4 16 1 2
56 | 23 5 16 32 4 16 0 1
57 | 24 5 16 32 4 16 3 8
58 | 25 5 16 32 4 16 0 1
59 | 26 5 16 32 4 16 1 2
60 | 27 5 16 32 4 16 0 1
61 | 28 5 16 32 4 16 2 4
62 | 29 5 16 32 4 16 0 1
63 | 30 5 16 32 4 16 1 2
64 | 31 5 16 32 4 16 0 1
65 | */
--------------------------------------------------------------------------------
/example/build_each.bat:
--------------------------------------------------------------------------------
1 | build_each.py
2 | pause
--------------------------------------------------------------------------------
/example/build_each.py:
--------------------------------------------------------------------------------
1 | #! python3
2 | # -*- coding: UTF-8 -*-
3 | import os
4 | import sys
5 | import time
6 |
7 | CURRENT_DIRECTORY = os.getcwd()
8 |
9 |
10 | def DurationPartsFromNs(duration):
11 | min_part = duration // 1000000000 // 60
12 | sec_part = duration // 1000000000 % 60
13 | millisec_part = duration // 1000000 % 1000
14 | return (min_part, sec_part, millisec_part)
15 |
16 |
17 | def FormatNs(duration):
18 | return '%d:%02d.%03d' % DurationPartsFromNs(duration)
19 |
20 |
21 | def main():
22 | ret = 0
23 | for file in os.listdir(CURRENT_DIRECTORY):
24 | _, file_ext_name = os.path.splitext(file)
25 | if file_ext_name != '.c':
26 | continue
27 | print('Compile %s' % file)
28 | start_time = time.perf_counter_ns()
29 | ret = os.system('pe++.py %s -hc' % file)
30 | time_usage = FormatNs(time.perf_counter_ns() - start_time)
31 | print('Done, return code = %d, time usage = %s' % (ret, time_usage))
32 | print()
33 | if ret != 0:
34 | print('Failed to compile %s' % file)
35 | break
36 | if os.path.exists('a.exe'):
37 | os.remove('a.exe')
38 | return ret
39 |
40 |
41 | if __name__ == '__main__':
42 | sys.exit(main())
43 |
--------------------------------------------------------------------------------
/example/continued_fraction.c:
--------------------------------------------------------------------------------
1 | #include "pe.hpp"
2 | using namespace pe;
3 |
4 | template
5 | void demo() {
6 | std::vector data = {1, 2, 2, 2, 2, 2, 2, 2, 2, 2};
7 | for (int i = 0; i < 10; ++i) {
8 | std::cout << FromCf(data, i) << std::endl;
9 | }
10 | std::cout << FromCfN(data) << std::endl;
11 | for (int i = 50; i <= 50; ++i) {
12 | std::cout << i << " " << ToCf(0, 1, 6, 1, i) << std::endl;
13 | }
14 |
15 | std::cout << ToCf(0, 1, 2, 1, 10) << std::endl;
16 | std::cout << FromCf(ToCf(0, 1, 2, 1, 50)) << std::endl;
17 |
18 | std::cout << ToCf(123456, 654321) << std::endl;
19 | std::cout << FromCf(ToCf(123456, 654321)) << std::endl;
20 | }
21 |
22 | int main() {
23 | demo();
24 | #if ENABLE_GMP
25 | demo();
26 | #endif
27 | return 0;
28 | }
29 |
--------------------------------------------------------------------------------
/example/count_carlitz_words.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | // Given the number of each letter, find the number of words without adjacent
5 | // letter of the same
6 |
7 | int brute_force(const std::vector& vec) {
8 | int size = 0;
9 | for (auto& iter : vec) size += iter;
10 |
11 | std::vector data;
12 | int id = 0;
13 | for (auto iter : vec) {
14 | for (int j = 0; j < iter; ++j) data.push_back(id);
15 | ++id;
16 | }
17 |
18 | int ret = 0;
19 | do {
20 | int ok = 1;
21 | for (int i = 0; i < size - 1; ++i) {
22 | int idx = (i + 1) % size;
23 | if (i != idx && data[i] == data[idx]) {
24 | ok = 0;
25 | break;
26 | }
27 | }
28 | ret += ok;
29 | } while (next_permutation(data.begin(), data.end()));
30 | return ret;
31 | }
32 |
33 | int main() {
34 | CarlitzWordsCounter counter(1000000007, 1000000);
35 |
36 | std::vector> test_data = {
37 | {1}, {2}, {1, 1}, {1, 3}, {2, 2}, {3, 7},
38 | {2, 3}, {2, 2, 2}, {2, 3, 3}, {3, 3, 2, 2}, {3, 3, 3}, {2, 2, 2, 2, 2},
39 | {4, 4}, {4, 4, 2, 2}, {5, 5, 5},
40 | };
41 |
42 | for (const std::vector& iter : test_data) {
43 | std::cout << brute_force(iter) << " " << counter.Cal(iter) << std::endl;
44 | }
45 | return 0;
46 | }
--------------------------------------------------------------------------------
/example/dfa_counter.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | // Count the number which contains 3.
5 |
6 | int64 cal(int64 n) {
7 | for (; n; n /= 10)
8 | if (n % 10 == 3) return 1;
9 | return 0;
10 | }
11 |
12 | int64 bf(int64 n) { return XRange(1LL, n).Map(cal).Sum(); }
13 |
14 | int main() {
15 | PE_INIT(maxp = 1000000);
16 |
17 | // state 0: initial state
18 | // state 1: 3 is not seen
19 | // state 2: 3 is seen
20 | DfaCounter counter1;
21 | counter1.Init(3, 10, 16);
22 | for (int i = 1; i <= 9; ++i)
23 | if (i != 3) counter1.AddTrans(0, i, 1);
24 | counter1.AddTrans(0, 3, 2);
25 | counter1.AddTrans(0, 0, 0);
26 | for (int i = 0; i <= 9; ++i)
27 | if (i != 3) counter1.AddTrans(1, i, 1);
28 | counter1.AddTrans(1, 3, 2);
29 | for (int i = 0; i <= 9; ++i) counter1.AddTrans(2, i, 2);
30 | counter1.MarkTargetState(2);
31 |
32 | // In counter2, the dfa doesn't accept leading zeros, in other words, if it's
33 | // initial state and 0 comes, the targe state is invalid. So we need to call
34 | // set_count_each_len(1).
35 | //
36 | // state 0: initial state
37 | // state 1: 3 is not seen
38 | // state 2: 3 is seen
39 | // state 3: invalid
40 | DfaCounter counter2;
41 | counter2.Init(4, 10, 16);
42 | for (int i = 1; i <= 9; ++i)
43 | if (i != 3) counter2.AddTrans(0, i, 1);
44 | counter2.AddTrans(0, 3, 2);
45 | for (int i = 0; i <= 9; ++i)
46 | if (i != 3) counter2.AddTrans(1, i, 1);
47 | counter2.AddTrans(1, 3, 2);
48 | for (int i = 0; i <= 9; ++i) counter2.AddTrans(2, i, 2);
49 | counter2.MarkTargetState(2);
50 | counter2.AddTrans(0, 0, 3);
51 | for (int i = 0; i <= 9; ++i) counter2.AddTrans(3, i, 3);
52 | counter2.set_count_each_len(1);
53 |
54 | for (int64 n = 10; n <= 1000000; n *= 10) {
55 | auto a = bf(n);
56 | auto b = counter1.Cal(n);
57 | auto c = counter2.Cal(n);
58 | std::cout << n << "\t" << a << "\t" << b << "\t" << c << std::endl;
59 | }
60 | return 0;
61 | }
--------------------------------------------------------------------------------
/example/dfa_summer.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | // Compute the 3rd power of the number which contains 3.
5 | const int64 mod = 1000000007;
6 | using MT = NMod64;
7 |
8 | int64 cal(int64 n) {
9 | const int64 m = n;
10 | for (; n; n /= 10)
11 | if (n % 10 == 3) return m * m % mod * m % mod;
12 | return 0;
13 | }
14 |
15 | int64 bf(int64 n) { return XRange(1LL, n).Map(cal).SumMod(mod); }
16 |
17 | int main() {
18 | PE_INIT(maxp = 1000000);
19 |
20 | // state 0: initial state
21 | // state 1: 3 is not seen
22 | // state 2: 3 is seen
23 | DfaSummer summer1;
24 | summer1.Init(3, 3, 10, 16);
25 | for (int i = 1; i <= 9; ++i)
26 | if (i != 3) summer1.AddTrans(0, i, 1);
27 | summer1.AddTrans(0, 3, 2);
28 | summer1.AddTrans(0, 0, 0);
29 | for (int i = 0; i <= 9; ++i)
30 | if (i != 3) summer1.AddTrans(1, i, 1);
31 | summer1.AddTrans(1, 3, 2);
32 | for (int i = 0; i <= 9; ++i) summer1.AddTrans(2, i, 2);
33 | summer1.MarkTargetState(2);
34 |
35 | // In summer2, the dfa doesn't accept leading zeros, in other words, if it's
36 | // initial state and 0 comes, the targe state is invalid. So we need to call
37 | // set_count_each_len(1).
38 | //
39 | // state 0: initial state
40 | // state 1: 3 is not seen
41 | // state 2: 3 is seen
42 | // state 3: invalid
43 | DfaSummer summer2;
44 | summer2.Init(4, 3, 10, 16);
45 | for (int i = 1; i <= 9; ++i)
46 | if (i != 3) summer2.AddTrans(0, i, 1);
47 | summer2.AddTrans(0, 3, 2);
48 | for (int i = 0; i <= 9; ++i)
49 | if (i != 3) summer2.AddTrans(1, i, 1);
50 | summer2.AddTrans(1, 3, 2);
51 | for (int i = 0; i <= 9; ++i) summer2.AddTrans(2, i, 2);
52 | summer2.MarkTargetState(2);
53 | summer2.AddTrans(0, 0, 3);
54 | for (int i = 0; i <= 9; ++i) summer2.AddTrans(3, i, 3);
55 | summer2.set_count_each_len(1);
56 |
57 | for (int64 n = 10; n <= 1000000; n *= 10) {
58 | auto a = bf(n);
59 | auto b = summer1.Cal(n);
60 | auto c = summer2.Cal(n);
61 | std::cout << n << "\t" << a << "\t" << b << "\t" << c << std::endl;
62 | }
63 | return 0;
64 | }
--------------------------------------------------------------------------------
/example/dva.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace std;
3 | using namespace pe;
4 |
5 | const int64 mod = 1000000007;
6 | using MT = NMod64;
7 |
8 | // https://en.wikipedia.org/wiki/Arithmetic_function#Relations_among_the_functions
9 | void ConvolutionExamples() {
10 | const int64 n = 10000000;
11 |
12 | auto eps = MakePrefixSumEpsilon(n);
13 | auto one = MakePrefixSumOne(n);
14 | auto mu = MakePrefixSumMu(n);
15 | auto id = MakePrefixSumId(n);
16 | auto phi = MakePrefixSumPhi(n);
17 | {
18 | // eps = one * mu
19 | auto t = DVAConv(one, mu);
20 | for (int i = 1; i < t.key_size; ++i) {
21 | PE_ASSERT(t.values[i] == eps.values[i]);
22 | // cout << t.values[i] << " " << eps.values[i] << endl;
23 | // cout << (t.values[i] == eps.values[i]) << endl;
24 | }
25 | }
26 | {
27 | // phi = mu * id
28 | auto t = DVAConv(mu, id);
29 | for (int i = 1; i < t.key_size; ++i) {
30 | PE_ASSERT(t.values[i] == phi.values[i]);
31 | }
32 | }
33 | {
34 | // id = one * phi
35 | auto t = DVAConv(one, phi);
36 | for (int i = 1; i < t.key_size; ++i) {
37 | PE_ASSERT(t.values[i] == id.values[i]);
38 | }
39 | }
40 |
41 | // Divisor count
42 | DVA d0(n);
43 | {
44 | for (int i = 1; i <= n; ++i) {
45 | int64 me = 1;
46 | for (auto iter : Factorize(i)) me *= iter.second + 1;
47 | d0[i] += me;
48 | }
49 | for (int i = 1; i < d0.key_size; ++i) d0.values[i] += d0.values[i - 1];
50 | }
51 | {
52 | // d0 = one * one
53 | // d0 = id0 * one
54 | auto t = DVAConv(one, one);
55 | for (int i = 1; i < t.key_size; ++i) {
56 | PE_ASSERT(t.values[i] == d0.values[i]);
57 | }
58 | }
59 |
60 | // Divisor sum
61 | DVA d1(n);
62 | {
63 | for (int i = 1; i <= n; ++i) {
64 | int64 me = 0;
65 | for (auto iter : GetFactors(i)) me += iter;
66 | d1[i] += me;
67 | }
68 | for (int i = 1; i < d1.key_size; ++i) d1.values[i] += d1.values[i - 1];
69 | }
70 | {
71 | // d1 = id * one
72 | // d1 = id1 * one
73 | auto t = DVAConv(id, one);
74 | for (int i = 1; i < t.key_size; ++i) {
75 | PE_ASSERT(t.values[i] == d1.values[i]);
76 | }
77 | }
78 |
79 | // Divisor square sum
80 | DVA d2(n);
81 | {
82 | for (int i = 1; i <= n; ++i) {
83 | int64 me = 0;
84 | for (auto iter : GetFactors(i)) me += iter * iter;
85 | d2[i] += me;
86 | }
87 | for (int i = 1; i < d2.key_size; ++i) d2.values[i] += d2.values[i - 1];
88 | }
89 | DVA id2(n);
90 | {
91 | for (int i = 1; i < id2.key_size; ++i) {
92 | id2.values[i] = P2SumMod(id2.keys[i], mod);
93 | }
94 | }
95 | {
96 | // d2 = id2 * one
97 | auto t = DVAConv(id2, one);
98 | for (int i = 1; i < t.key_size; ++i) {
99 | PE_ASSERT(t.values[i] == d2.values[i]);
100 | }
101 | }
102 |
103 | // Prime omega
104 | // https://en.wikipedia.org/wiki/Prime_omega_function
105 | DVA omega(n);
106 | {
107 | for (int i = 1; i <= n; ++i) {
108 | omega[i] += Factorize(i).size();
109 | }
110 | for (int i = 1; i < omega.key_size; ++i)
111 | omega.values[i] += omega.values[i - 1];
112 | }
113 | {
114 | // omega = primeq * one
115 | auto t = DVAConv(PrimeS0(n), one);
116 | for (int i = 1; i < t.key_size; ++i) {
117 | PE_ASSERT(t.values[i] == omega.values[i]);
118 | }
119 | }
120 | }
121 |
122 | int main() {
123 | PE_INIT(maxp = 10000000, cal_phi = 1, cal_mu = 1);
124 | ConvolutionExamples();
125 | return 0;
126 | }
127 |
--------------------------------------------------------------------------------
/example/linear_recurrence.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int64 mod = 1000000007;
5 |
6 | int64 solve_nth(int64 n) {
7 | auto ans = MatrixPowerMod(
8 | [=](auto& m, auto& v) {
9 | m(0, 0) = 1;
10 | m(0, 1) = 1;
11 | m(1, 0) = 1;
12 | v[0] = 1;
13 | v[1] = 0;
14 | },
15 | 2, n);
16 | return ans[1];
17 | }
18 |
19 | int64 solve_nth_sum(int64 n) {
20 | auto ans = MatrixPowerMod(
21 | [=](auto& m, auto& v) {
22 | m(0, 0) = 1;
23 | m(0, 1) = 1;
24 | m(1, 0) = 1;
25 | m(2, 0) = 1;
26 | m(2, 2) = 1;
27 | v[0] = 1;
28 | v[1] = 0;
29 | v[2] = 0;
30 | },
31 | 3, n);
32 | return ans[2];
33 | }
34 |
35 | int main() {
36 | const std::vector init = {0LL, 1LL, 1LL, 2LL, 3LL, 5LL, 8LL};
37 |
38 | for (int64 n = 1; n <= 1000000000; n *= 10) {
39 | // Use Berlekamp Massey algorithm to find the recurrence.
40 | // The minimal initial element count is: 2 * order + 1
41 | const int64 ans0 = *FindLinearRecurrenceValueAt(init, n, mod);
42 | // Compute the nth element assuming the recurrence is known.
43 | const int64 ans1 =
44 | LinearRecurrenceValueAt({mod - 1, mod - 1, 1}, init, n, mod);
45 | // Use matric multiplication to compute the nth element.
46 | const int64 ans2 = solve_nth(n);
47 | std::cout << "n = " << n << std::endl;
48 | std::cout << "Ans0 = " << ans0 << std::endl;
49 | std::cout << "Ans1 = " << ans1 << std::endl;
50 | std::cout << "Ans2 = " << ans2 << std::endl;
51 | std::cout << std::endl;
52 | }
53 |
54 | for (int64 n = 1; n <= 1000000000; n *= 10) {
55 | // The minimal initial element count is: 2 * (order + 1) + 1
56 | const int64 ans1 =
57 | LinearRecurrenceSumAt({mod - 1, mod - 1, 1}, init, n, mod);
58 | const int64 ans2 = solve_nth_sum(n);
59 | std::cout << "n = " << n << std::endl;
60 | std::cout << "Ans1 = " << ans1 << std::endl;
61 | std::cout << "Ans2 = " << ans2 << std::endl;
62 | std::cout << std::endl;
63 | }
64 | return 0;
65 | }
--------------------------------------------------------------------------------
/example/matrix_power.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int64 mod = 316227766016779;
5 | using MT = NMod64;
6 |
7 | // Computes
8 | // |1 1|^n * |1|
9 | // |1 0| |0|
10 |
11 | // Mod is specified at compiling time.
12 | // The element type is choosed internally.
13 | int64 solve0(int64 n) {
14 | auto ans = MatrixPowerMod(
15 | [=](auto& m, auto& v) {
16 | m(0, 0) = 1;
17 | m(0, 1) = 1;
18 | m(1, 0) = 1;
19 | v[0] = 1;
20 | v[1] = 0;
21 | },
22 | 2, n);
23 | return ans[0];
24 | }
25 |
26 | // Mod is associated with T at compiling time.
27 | int64 solve1(int64 n) {
28 | auto ans = MatrixPowerMod(
29 | [=](auto& m, auto& v) {
30 | m(0, 0) = 1;
31 | m(0, 1) = 1;
32 | m(1, 0) = 1;
33 | v[0] = 1;
34 | v[1] = 0;
35 | },
36 | 2, n);
37 | return ans[0].value();
38 | }
39 |
40 | #if ENABLE_EIGEN
41 | // Mod is associated with T at runtime.
42 | // Different threads use different mod.
43 | int64 solve2(int64 n, int64 rmod) {
44 | SetEigenNbThreads(1);
45 | TLMod64::Set(rmod);
46 | auto ans = MatrixPowerMod>(
47 | [=](auto& m, auto& v) {
48 | m(0, 0) = 1;
49 | m(0, 1) = 1;
50 | m(1, 0) = 1;
51 | v[0] = 1;
52 | v[1] = 0;
53 | },
54 | 2, n);
55 | SetEigenNbThreads(0);
56 | return ans[0].value();
57 | }
58 | #endif
59 |
60 | // Mod is associated with T at runtime.
61 | // All the threads use the same mod.
62 | int64 solve3(int64 n, int64 rmod) {
63 | using T = NModNumber;
64 | DefaultMod::Set(rmod);
65 | auto ans = MatrixPowerMod(
66 | [=](auto& m, auto& v) {
67 | m(0, 0) = 1;
68 | m(0, 1) = 1;
69 | m(1, 0) = 1;
70 | v[0] = 1;
71 | v[1] = 0;
72 | },
73 | 2, n);
74 | return ans[0].value();
75 | }
76 |
77 | // Mod is specified at runtime.
78 | // If int128 is available, use int128 as element type, different threads use
79 | // different mod.
80 | // Otherwise, use DefaultMod, all the threads use the same mod, and this is same
81 | // as solve3 and the difference is solve3 needs to set the default mod
82 | // explicitly and solve4 will set it automatically.
83 | int64 solve4(int64 n, int64 mod) {
84 | auto ans = MatrixPowerMod(
85 | [=](auto& m, auto& v) {
86 | m(0, 0) = 1;
87 | m(0, 1) = 1;
88 | m(1, 0) = 1;
89 | v[0] = 1;
90 | v[1] = 0;
91 | },
92 | 2, n, mod);
93 | return ans[0];
94 | }
95 |
96 | int main() {
97 | PE_INIT(maxp = 200000000);
98 |
99 | for (int64 n = 1; n <= 1000000000; n *= 10) {
100 | std::cout << "n = " << n << std::endl;
101 | int64 ans0 = solve0(n);
102 | int64 ans1 = solve1(n);
103 | #if ENABLE_EIGEN
104 | int64 ans2 = solve2(n, mod);
105 | #endif
106 | int64 ans3 = solve3(n, mod);
107 | int64 ans4 = solve4(n, mod);
108 | std::cout << "Ans0 = " << ans0 << std::endl;
109 | std::cout << "Ans1 = " << ans1 << std::endl;
110 | #if ENABLE_EIGEN
111 | std::cout << "Ans2 = " << ans2 << std::endl;
112 | #endif
113 | std::cout << "Ans3 = " << ans3 << std::endl;
114 | std::cout << "Ans4 = " << ans4 << std::endl;
115 | std::cout << std::endl;
116 | }
117 |
118 | for (int64 rmod = mod; rmod <= mod + 100; ++rmod)
119 | if (IsPrime(rmod)) {
120 | const int64 n = 1000000000;
121 | std::cout << "n = " << n << std::endl;
122 | std::cout << "rmod = " << rmod << std::endl;
123 | #if ENABLE_EIGEN
124 | int64 ans2 = solve2(n, rmod);
125 | #endif
126 | int64 ans3 = solve3(n, rmod);
127 | int64 ans4 = solve4(n, rmod);
128 | #if ENABLE_EIGEN
129 | std::cout << "Ans2 = " << ans2 << std::endl;
130 | #endif
131 | std::cout << "Ans3 = " << ans3 << std::endl;
132 | std::cout << "Ans4 = " << ans4 << std::endl;
133 | std::cout << std::endl;
134 | }
135 |
136 | return 0;
137 | }
--------------------------------------------------------------------------------
/example/mma_find_recurrence.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | // http://oeis.org/A001499
5 | // Oeis gives two recurrence formula:
6 | // 2 a[n] = 2 n (n-1) a[n-1] + n (n-1)^2 a[n-2]
7 | // 2 a[n] = n (n-1)^2 ((2 n - 3) a[n-2] + (n-2)^2 a[n-3])
8 | //
9 | // This example uses some sample data to guess the formula.
10 | // Note: the result may not be unique.
11 |
12 | int main() {
13 | mma::FRHelper helper;
14 | helper
15 | .set_values({1, 0, 1, 6, 90, 2040, 67950, 3110940, 187530840, 14398171200,
16 | 1371785398200})
17 | // In most cases we can use all the available check points, but
18 | // sometimes it has different recurrence according to n. For example, when
19 | // n is odd, it has a recurrence formula. When n is even, it has a
20 | // different one.
21 | .set_check_points({3, 5, 7})
22 | .set_offsets({1, 2}) // Tune the offset manually
23 | .set_max_degree(5) // Tune the polynomial degree manually
24 | .set_max_abs_coe(4) // Tune the coefficient manually
25 | .set_leading(2); // Tune the leading manually
26 |
27 | std::cout << helper << std::endl << std::endl;
28 |
29 | // The output of the generated mathematica command is
30 | const std::string result =
31 | "{{x1p0 -> 0, x1p1 -> -2, x1p2 -> 2, x1p3 -> 0, x1p4 -> 0, x1p5 "
32 | "-> 0, x2p0 -> 0, x2p1 -> 1, x2p2 -> -2, x2p3 -> 1, x2p4 -> 0, "
33 | "x2p5 -> 0}}";
34 |
35 | // Validate the result by sample data.
36 | helper.Validate(result);
37 |
38 | // Validate the result by more data.
39 | std::vector dp0 = {1, 0, 1, 6, 90, 2040, 67950};
40 | for (int64 i = 7; i < 100; ++i) {
41 | dp0.push_back((2 * i * (i - 1) * dp0[i - 1] + i * sq(i - 1) * dp0[i - 2]) /
42 | 2);
43 | }
44 |
45 | std::vector dp1 = {1, 0, 1, 6, 90, 2040, 67950};
46 | for (int64 i = 7; i < 100; ++i) {
47 | dp1.push_back(i * sq(i - 1) *
48 | ((2 * i - 3) * dp1[i - 2] + sq(i - 2) * dp1[i - 3]) / 2);
49 | }
50 |
51 | helper.Validate(dp0, result);
52 | helper.Validate(dp1, result);
53 |
54 | return 0;
55 | }
--------------------------------------------------------------------------------
/example/mma_interpolating_polynomial.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | int64 f(int64 x) { return 1 + 2 * x + 3 * x * x + 4 * x * x * x; }
5 |
6 | int64 g(int64 x, int64 y) {
7 | return 1 + 2 * x + 3 * y + 4 * x * y + 5 * x * x + 6 * y * y;
8 | }
9 |
10 | int main() {
11 | PE_INIT(maxp = 2000000);
12 | {
13 | mma::IntPoly ip;
14 | ip.Reset({"x"}); // one variable x.
15 | for (int64 x = 0; x <= 10; ++x) {
16 | ip.Add({x}, f(x));
17 | }
18 | std::cout << ip << std::endl;
19 | // output:
20 | // InterpolatingPolynomial[{{0, 1}, {1, 10}, {2, 49}, {3, 142}, {4, 313},
21 | // {5, 586}, {6, 985}, {7, 1534}, {8, 2257}, {9, 3178}, {10, 4321}}, x]
22 | // Simplified mma output:
23 | // 1 + 2 x + 3 x^2 + 4 x^3
24 | }
25 | {
26 | mma::IntPoly2D ip(2, // x's degree
27 | 2, // y's degree
28 | 2 // max a + b of terms in the form of x^a y^b
29 | );
30 | for (int64 x = 0; x <= 4; ++x)
31 | for (int64 y = 0; y <= 4; ++y) {
32 | ip.Add({x, y}, g(x, y));
33 | }
34 | std::cout << ip << std::endl;
35 | // output:
36 | // Values[Solve[{1 x0 + 0 x1 + 0 x2 + 0 x3 + 0 x4 + 0 x5 == 1 && 1 x0 + 1 x1
37 | // + 1 x2 + 0 x3 + 0 x4 + 0 x5 == 8 && 1 x0 + 2 x1 + 4 x2 + 0 x3 + 0 x4 + 0
38 | // x5 == 25 && 1 x0 + 3 x1 + 9 x2 + 0 x3 + 0 x4 + 0 x5 == 52 && 1 x0 + 4 x1
39 | // + 16 x2 + 0 x3 + 0 x4 + 0 x5 == 89 && 1 x0 + 0 x1 + 0 x2 + 1 x3 + 0 x4 +
40 | // 1 x5 == 6 && 1 x0 + 1 x1 + 1 x2 + 1 x3 + 1 x4 + 1 x5 == 16 && 1 x0 + 2 x1
41 | // + 4 x2 + 1 x3 + 2 x4 + 1 x5 == 36 && 1 x0 + 3 x1 + 9 x2 + 1 x3 + 3 x4 + 1
42 | // x5
43 | // == 66 && 1 x0 + 4 x1 + 16 x2 + 1 x3 + 4 x4 + 1 x5 == 106 && 1 x0 + 0 x1 +
44 | // 0 x2 + 2 x3 + 0 x4 + 4 x5 == 19&& 1 x0 + 1 x1 + 1 x2 + 2 x3 + 2 x4 + 4 x5
45 | // == 32 && 1 x0 + 2 x1 + 4 x2 + 2 x3 + 4 x4 + 4 x5 == 55 && 1 x0 + 3 x1 + 9
46 | // x2 + 2 x3 + 6 x4 + 4 x5 == 88 && 1 x0 + 4 x1 + 16 x2 + 2 x3 + 8 x4 + 4 x5
47 | // == 131 && 1 x0 + 0 x1 + 0 x2 + 3 x3 + 0 x4 + 9 x5 == 40 && 1 x0 + 1 x1 +
48 | // 1 x2 + 3 x3 + 3 x4 + 9 x5 == 56 && 1 x0 + 2 x1 + 4 x2+ 3 x3 + 6 x4 + 9 x5
49 | // == 82 && 1 x0 + 3 x1 + 9 x2 + 3 x3 + 9 x4 + 9 x5 == 118 && 1 x0 + 4 x1 +
50 | // 16 x2 + 3 x3 + 12 x4 + 9 x5 == 164 && 1 x0 + 0 x1 + 0 x2 + 4 x3 + 0 x4 +
51 | // 16 x5 == 69 && 1 x0 + 1 x1 + 1 x2 + 4 x3 + 4 x4 + 16 x5== 88 && 1 x0 + 2
52 | // x1 + 4 x2 + 4 x3 + 8 x4 + 16 x5 == 117 && 1 x0 + 3 x1 + 9 x2 + 4 x3 + 12
53 | // x4 + 16 x5 == 156 && 1 x0 + 4 x1 + 16 x2 + 4 x3 + 16 x4 + 16 x5 == 205},
54 | // {x0, x1, x2, x3, x4, x5}]] mma output:
55 | // {{1, 3, 6, 2, 4, 5}}
56 | ip.Show(std::cout, {1, 3, 6, 2, 4, 5});
57 | // output:
58 | // 1 + 3 y + 6 y^2 + 2 x + 4 x y + 5 x^2
59 | }
60 | return 0;
61 | }
--------------------------------------------------------------------------------
/example/mma_to_cpp.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | int main() {
5 | PE_INIT(maxp = 2000000);
6 | // Convert mathematica expression to cpp
7 | // For mathematica function, it only supports Sqrt
8 | for (auto a : mma::Compile(
9 | "Sqrt(-A^4 + 2 A^2 B^2 - B^4 + 2 A^2 x1^2 + 2 B^2 x1^2 - x1^4 - 4 "
10 | "A^2 x1 x2 - 4 B^2 x1 x2 + 4 x1^3 x2 + 2 A^2 x2^2 + 2 B^2 x2^2 - "
11 | "6 x1^2 x2^2 + 4 x1 x2^3 - x2^4)/(2 Sqrt(x1^2 - 2 x1 x2 + "
12 | "x2^2))")) {
13 | std::cout << a << std::endl;
14 | }
15 | // output:
16 | // sqrt(-A * A * A * A + 2 * A * A * B * B - B * B * B * B + 2 * A * A * x1 *
17 | // x1 + 2 * B * B * x1 * x1 - x1 * x1 * x1 * x1 - 4 * A * A * x1 * x2 - 4 * B
18 | // * B * x1 * x2 + 4 * x1 * x1 * x1 * x2 + 2 * A * A * x2 * x2 + 2 * B * B *
19 | // x2 * x2 - 6 * x1 * x1 * x2 * x2 + 4 * x1 * x2 * x2 * x2 - x2 * x2 * x2 *
20 | // x2) / (2
21 | // * sqrt(x1 * x1 - 2 * x1 * x2 + x2 * x2))
22 | // Convert mathematica expression to cpp using modular arithmetic.
23 | for (auto a : mma::CompileMod("(a^4+a b)*7/b")) {
24 | std::cout << a << std::endl;
25 | }
26 | // output:
27 | // int64 foo(int64 a, int64 b, int64 mod) {
28 | // const int64 t0 = a % mod;
29 | // const int64 t1 = 4;
30 | // const int64 t2 = PowerMod(t0, t1, mod) % mod;
31 | // const int64 t3 = b % mod;
32 | // const int64 t4 = (t0 * t3) % mod;
33 | // const int64 t5 = (t2 + t4) % mod;
34 | // const int64 t6 = 7 % mod;
35 | // const int64 t7 = (t5 * t6) % mod;
36 | // const int64 t8 = t7 * ModInv(t3, mod) % mod;
37 | // return t8;
38 | // }
39 | return 0;
40 | }
--------------------------------------------------------------------------------
/example/mod_number.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int64 mod = 1000000007;
5 |
6 | int main() {
7 | {
8 | NMod64 a(5);
9 | std::cout << a.Power(10) << std::endl;
10 | std::cout << Power(a, 10) << std::endl;
11 | std::cout << PowerMod(a, 10) << std::endl;
12 | std::cout << PowerMod(a, 10, mod) << std::endl;
13 | std::cout << PowerMod(a, 10) << std::endl;
14 | std::cout << PowerMod(a, 10, mod) << std::endl;
15 | }
16 | {
17 | TLMod::Set(mod);
18 | TLNMod64<> a(5);
19 | std::cout << a.Power(10) << std::endl;
20 | std::cout << Power(a, 10) << std::endl;
21 | std::cout << PowerMod(a, 10) << std::endl;
22 | std::cout << PowerMod(a, 10, mod) << std::endl;
23 | std::cout << PowerMod(a, 10) << std::endl;
24 | std::cout << PowerMod(a, 10, mod) << std::endl;
25 | }
26 | {
27 | DefaultMod::Set(mod);
28 | NModNumber a(5);
29 | std::cout << a.Power(10) << std::endl;
30 | std::cout << Power(a, 10) << std::endl;
31 | std::cout << PowerMod(a, 10) << std::endl;
32 | std::cout << PowerMod(a, 10, mod) << std::endl;
33 | std::cout << PowerMod(a, 10) << std::endl;
34 | std::cout << PowerMod(a, 10, mod) << std::endl;
35 | }
36 | {
37 | // NModNumber> a(5);
38 | }
39 |
40 | {
41 | NModM64 a(5);
42 | std::cout << a.Power(10) << std::endl;
43 | std::cout << Power(a, 10) << std::endl;
44 | std::cout << PowerMod(a, 10) << std::endl;
45 | std::cout << PowerMod(a, 10, mod) << std::endl;
46 | std::cout << PowerMod(a, 10) << std::endl;
47 | std::cout << PowerMod(a, 10, mod) << std::endl;
48 | }
49 | {
50 | TLMod::Set(mod);
51 | TLNModM64<> a(5);
52 | std::cout << a.Power(10) << std::endl;
53 | std::cout << Power(a, 10) << std::endl;
54 | std::cout << PowerMod(a, 10) << std::endl;
55 | std::cout << PowerMod(a, 10, mod) << std::endl;
56 | std::cout << PowerMod(a, 10) << std::endl;
57 | std::cout << PowerMod(a, 10, mod) << std::endl;
58 | }
59 | {
60 | DefaultMod::Set(mod);
61 | NModNumberM a(5);
62 | std::cout << a.Power(10) << std::endl;
63 | std::cout << Power(a, 10) << std::endl;
64 | std::cout << PowerMod(a, 10) << std::endl;
65 | std::cout << PowerMod(a, 10, mod) << std::endl;
66 | std::cout << PowerMod(a, 10) << std::endl;
67 | std::cout << PowerMod(a, 10, mod) << std::endl;
68 | }
69 | {
70 | // Not recommended.
71 | NModNumberM> a(5, MemMod(mod));
72 | std::cout << a.Power(10) << std::endl;
73 | std::cout << Power(a, 10) << std::endl;
74 | std::cout << PowerMod(a, 10) << std::endl;
75 | std::cout << PowerMod(a, 10, mod) << std::endl;
76 | std::cout << PowerMod(a, 10) << std::endl;
77 | std::cout << PowerMod(a, 10, mod) << std::endl;
78 | }
79 | return 0;
80 | }
--------------------------------------------------------------------------------
/example/multiplicative_function_prefix_sum_common_function.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int64 mod = 1000000007;
5 |
6 | int main() {
7 | PE_INIT(maxp = 2000000, cal_phi = 1, cal_mu = 1);
8 |
9 | SFCounter sf_counter;
10 | MuSummer mu_summer;
11 | MuPhiSummer mu_phi_summer;
12 | Sigma0Summer sigma0_summer;
13 | MuPhiSumModer mu_phi_sum_moder(mod);
14 | Sigma0SumModer sigma0_sum_moder(mod);
15 |
16 | for (int64 n = 1; n <= 10000000000; n *= 10) {
17 | std::cout << n << " = " << n << std::endl;
18 | std::cout << "# square free number " << sf_counter.Cal(n) << std::endl;
19 | std::cout << "sum mu " << mu_summer.Cal(n) << std::endl;
20 | std::cout << "sum mu " << mu_phi_summer.CalSumMu(n) << std::endl;
21 | std::cout << "sum phi " << mu_phi_summer.CalSumPhi(n) << std::endl;
22 | std::cout << "sum sigma0 " << sigma0_summer.Cal(n) << std::endl;
23 |
24 | std::cout << "sum mu mod " << mod << " " << mu_phi_sum_moder.CalSumMu(n)
25 | << std::endl;
26 | std::cout << "sum phi mod " << mod << " " << mu_phi_sum_moder.CalSumPhi(n)
27 | << std::endl;
28 | std::cout << "sum sigma0 mod " << mod << " " << sigma0_sum_moder.Cal(n)
29 | << std::endl;
30 | std::cout << std::endl;
31 | }
32 | return 0;
33 | }
34 |
--------------------------------------------------------------------------------
/example/parallel_cal_prime_pi.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | #if 0
5 | const int TN = 8;
6 | int64 CalPi0(int64 n) {
7 | int64 result = PARALLEL_RESULT(
8 | BEGIN_PARALLEL
9 | FROM 1 TO n EACH_BLOCK_IS 10000000 CACHE ""
10 | THREADS TN
11 | MAP {
12 | return IsPrimeEx(key);
13 | }
14 | REDUCE {
15 | result += value;
16 | return result;
17 | }
18 | END_PARALLEL);
19 | return result;
20 | }
21 |
22 | struct CalPI : public ParallelRangeT {
23 | int64 UpdateResult(int64 result, int64 value) { return result + value; }
24 | int64 WorkOnBlock(int64 first, int64 last, int64 worker) {
25 | int64 t = 0;
26 | for (int64 i = first; i <= last; ++i) t += IsPrimeEx(i);
27 | return t;
28 | }
29 | };
30 |
31 | int64 CalPi1(int64 n) {
32 | return CalPI()
33 | .From(1)
34 | .To(n)
35 | .DividedBy(10000000)
36 | .SetThreadsCount(TN)
37 | .Start()
38 | .Result();
39 | }
40 |
41 | int main() {
42 | PE_INIT(maxp = 2000000);
43 |
44 | const int m = 8;
45 | const int n = Power(10, m);
46 |
47 | int64 ans0 = CalPi0(n);
48 | int64 ans1 = CalPi1(n);
49 | std::cout << "n = " << n << std::endl;
50 | std::cout << "Expected: " << kPrimePi[m] << std::endl;
51 | std::cout << "CalPi0: " << ans0 << std::endl;
52 | std::cout << "CalPi1: " << ans1 << std::endl;
53 |
54 | return 0;
55 | }
56 | #else
57 | int main() {
58 | return 0;
59 | }
60 | #endif
--------------------------------------------------------------------------------
/example/partition_mobius.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int S = 500;
5 | // The number of non-negative solution for
6 | // 1 x1 + 2 x2 + 3 x3 + 4 x4 + 5 x5 = S
7 | // x1, x2, ..., x5 are distinct.
8 |
9 | const int64 mod = 1000000007;
10 | using MT = NMod64;
11 |
12 | char used[S + 1];
13 | int64 dfs(int now, int s) {
14 | if (now == 6) return s == 0;
15 | int64 ret = 0;
16 | for (int i = 0; i <= S && now * i <= s; ++i)
17 | if (used[i] == 0) {
18 | used[i] = 1;
19 | ret += dfs(now + 1, s - now * i);
20 | used[i] = 0;
21 | }
22 | return ret;
23 | }
24 |
25 | int64 solve0() { return dfs(1, S); }
26 |
27 | std::map, int64> cache;
28 | int64 compute(std::vector coe) {
29 | // sum(coe[i] * x_i) = S
30 | auto where = cache.find(coe);
31 | if (where != cache.end()) {
32 | return where->second;
33 | }
34 | int64 dp[S + 1] = {1};
35 | for (int& iter : coe)
36 | for (int i = 0; i + iter <= S; ++i) {
37 | dp[i + iter] += dp[i];
38 | }
39 | return dp[S];
40 | }
41 |
42 | int64 solve1() {
43 | PartitionMobius pm(mod);
44 | std::vector pattern = {1, 2, 3, 4, 5};
45 | MT ret = 0;
46 | for (Partition p : Partition::GenPartitions(5)) {
47 | std::map mem;
48 | for (int i = 0; i < 5; ++i) mem[p.colors[i]] += pattern[i];
49 | std::vector key;
50 | for (auto& i : mem) key.push_back(i.second);
51 | std::sort(std::begin(key), std::end(key));
52 | ret += compute(key) * pm.Cal(p) % mod;
53 | }
54 | return ret.value();
55 | }
56 |
57 | int main() {
58 | std::cout << solve0() << std::endl;
59 | std::cout << solve1() << std::endl;
60 | return 0;
61 | }
--------------------------------------------------------------------------------
/example/pe_db.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | void MakeDb() {
5 | PeDb db("D:/");
6 | const int64 n = 100000000000000;
7 | db.MergePrimePi(PrimeS0Parallel(n));
8 | #if PE_HAS_INT128
9 | db.MergePrimeSum(PrimeS1Parallel(n));
10 | #endif
11 | db.Save();
12 | }
13 |
14 | int main() {
15 | PE_INIT(maxp = 70000000);
16 | // MakeDb();
17 | const int64 n = 10000000000;
18 | PeDb db("D:/");
19 | db.Load();
20 |
21 | {
22 | DVA dva = PrimeS0Ex(n);
23 | DVA dva1(n);
24 | db.FillPrimePi(dva1);
25 | for (int i = 0; i < dva.key_size; ++i) {
26 | if (dva.values[i] != dva1.values[i]) {
27 | std::cout << dva.values[i] << std::endl;
28 | std::cout << dva1.values[i] << std::endl;
29 | }
30 | assert(dva.values[i] == dva1.values[i]);
31 | }
32 | std::cout << dva[n] << std::endl;
33 | std::cout << dva1[n] << std::endl;
34 | // https://oeis.org/A006880
35 | std::cout << db.PrimePi(Power(10LL, 14)) << std::endl;
36 | }
37 | #if PE_HAS_INT128
38 | {
39 | DVA dva = PrimeS1Ex(n);
40 | DVA dva1(n);
41 | db.FillPrimeSum(dva1);
42 | for (int i = 0; i < dva.key_size; ++i) {
43 | if (dva.values[i] != dva1.values[i]) {
44 | std::cout << dva.values[i] << std::endl;
45 | std::cout << dva1.values[i] << std::endl;
46 | }
47 | assert(dva.values[i] == dva1.values[i]);
48 | }
49 | std::cout << dva[n] << std::endl;
50 | std::cout << dva1[n] << std::endl;
51 | // https://oeis.org/A046731
52 | std::cout << db.PrimeSum(Power(10LL, 14)) << std::endl;
53 | }
54 | #endif
55 | return 0;
56 | }
--------------------------------------------------------------------------------
/example/power_sum.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int64 mod = 1000000007;
5 | using MT = NMod64;
6 |
7 | using SumfunctionType = int64 (*)(int64 n, int64 mod);
8 | SumfunctionType sum_function[8] = {
9 | nullptr, &P1SumMod, &P2SumMod, &P3SumMod,
10 | &P4SumMod, &P5SumMod, &P6SumMod, &P7SumMod,
11 | };
12 |
13 | int main() {
14 | PE_INIT(maxp = 2000000);
15 | PowerSumModer moder0(mod); // maxk = 128
16 | PowerSumModerB moder1(mod); // maxk = 128
17 | PowerSumModerB1 moder2(mod); // maxk = 128
18 | for (int k = 0; k <= 10; ++k) {
19 | std::cout << "k = " << k << std::endl;
20 | for (int64 n = 1; n <= 1000000000; n *= 10) {
21 | std::cout << "n = " << n << std::endl;
22 | std::vector ans = PowerSumModBatch(n, k, mod);
23 | std::cout << "InitPowerSumMod " << ans[k] << std::endl;
24 | std::cout << "PowerSumModer " << moder0.Cal(n, k) << std::endl;
25 | std::cout << "PowerSumModerB " << moder1.Cal(n, k) << std::endl;
26 | std::cout << "PowerSumModerB1 " << moder2.Cal(n, k) << std::endl;
27 | if (k >= 1 && k <= 7) {
28 | std::cout << "P" << k << "SumMod " << sum_function[k](n, mod)
29 | << std::endl;
30 | }
31 | std::cout << std::endl;
32 | }
33 | std::cout << std::endl;
34 | }
35 | return 0;
36 | }
--------------------------------------------------------------------------------
/example/prime_power_sum.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int64 mod = 1000000007;
5 | using MT = NMod64;
6 |
7 | void prime_s0() {
8 | CachedPi cp;
9 | for (int64 n = 1; n <= 10000000000; n *= 10) {
10 | std::cout << "n = " << n << std::endl;
11 | std::cout << "PrimeS0 " << PrimeS0(n)[n] % mod << std::endl;
12 | std::cout << "PrimeS0Parallel " << PrimeS0Parallel(n)[n] % mod
13 | << std::endl;
14 | std::cout << "PrimeS0Ex " << PrimeS0Ex(n)[n] % mod
15 | << std::endl;
16 | std::cout << "PrimeS0 " << PrimeS0(n)[n] << std::endl;
17 | std::cout << "PrimeS0Parallel " << PrimeS0Parallel(n)[n] << std::endl;
18 | std::cout << "PrimeS0Ex " << PrimeS0Ex(n)[n] << std::endl;
19 | std::cout << "PrimeSkEx " << PrimeSkEx(n, 0)[n] << std::endl;
20 | std::cout << "CachedPi " << cp.Cal(n) % mod << std::endl;
21 | std::cout << std::endl;
22 | }
23 | }
24 |
25 | void prime_s1() {
26 | for (int64 n = 1; n <= 10000000000; n *= 10) {
27 | std::cout << "n = " << n << std::endl;
28 | std::cout << "PrimeS1 " << PrimeS1(n)[n] % mod << std::endl;
29 | std::cout << "PrimeS1Parallel " << PrimeS1Parallel(n)[n] % mod
30 | << std::endl;
31 | std::cout << "PrimeS1Ex " << PrimeS1Ex(n)[n] % mod
32 | << std::endl;
33 | std::cout << "PrimeS1 " << PrimeS1(n)[n] << std::endl;
34 | std::cout << "PrimeS1Parallel " << PrimeS1Parallel(n)[n] << std::endl;
35 | std::cout << "PrimeS1Ex " << PrimeS1Ex(n)[n] << std::endl;
36 | std::cout << "PrimeSkEx " << PrimeSkEx(n, 1)[n] << std::endl;
37 | std::cout << std::endl;
38 | }
39 | }
40 |
41 | void prime_pmod_s0() {
42 | for (int64 n = 1; n <= 1000000000; n *= 10) {
43 | std::cout << "n = " << n << std::endl;
44 | for (int pmod = 2; pmod <= 7; ++pmod) {
45 | auto ans0 = PrimeS0PMod(n, pmod);
46 | auto ans1 = PrimeSkPMod(n, 0, pmod);
47 | std::cout << "pmod = " << pmod << std::endl;
48 | for (int j = 0; j < pmod; ++j) {
49 | // number of prime such that p % pmod = j
50 | std::cout << "p % " << pmod << " = " << j << " " << ans0[j][n] << " "
51 | << ans1[j][n] << std::endl;
52 | }
53 | std::cout << std::endl;
54 | }
55 | std::cout << std::endl;
56 | }
57 | }
58 |
59 | void prime_pmod_s1() {
60 | for (int64 n = 1; n <= 1000000000; n *= 10) {
61 | std::cout << "n = " << n << std::endl;
62 | for (int pmod = 2; pmod <= 7; ++pmod) {
63 | auto ans0 = PrimeS1PMod(n, pmod);
64 | auto ans1 = PrimeSkPMod(n, 1, pmod);
65 | std::cout << "pmod = " << pmod << std::endl;
66 | for (int j = 0; j < pmod; ++j) {
67 | // sum of prime such that p % pmod = j
68 | std::cout << "p % " << pmod << " = " << j << " " << ans0[j][n] << " "
69 | << ans1[j][n] << std::endl;
70 | }
71 | std::cout << std::endl;
72 | }
73 | std::cout << std::endl;
74 | }
75 | }
76 |
77 | int main() {
78 | PE_INIT(maxp = 2000000);
79 |
80 | prime_s0();
81 | prime_s1();
82 | prime_pmod_s0();
83 | prime_pmod_s1();
84 | return 0;
85 | }
--------------------------------------------------------------------------------
/example/random_sample.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | const int sample_count = 1000000;
5 |
6 | const double coe15 = 1. / (1ULL << 15);
7 | double CalPi_CRand15() {
8 | int ok = 0;
9 | for (int i = 0; i < sample_count; ++i) {
10 | double p[]{coe15 * CRand15(), coe15 * CRand15()};
11 | if (p[0] * p[0] + p[1] * p[1] < 1) {
12 | ++ok;
13 | }
14 | }
15 | return 4. * ok / sample_count;
16 | }
17 |
18 | const double coe31 = 1. / (1ULL << 31);
19 | double CalPi_CRand31() {
20 | int ok = 0;
21 | for (int i = 0; i < sample_count; ++i) {
22 | double p[]{coe31 * CRand31(), coe31 * CRand31()};
23 | if (p[0] * p[0] + p[1] * p[1] < 1) {
24 | ++ok;
25 | }
26 | }
27 | return 4. * ok / sample_count;
28 | }
29 |
30 | const double coe63 = 1. / (1ULL << 63);
31 | double CalPi_CRand63() {
32 | int ok = 0;
33 | for (int i = 0; i < sample_count; ++i) {
34 | double p[]{coe63 * CRand63(), coe63 * CRand63()};
35 | if (p[0] * p[0] + p[1] * p[1] < 1) {
36 | ++ok;
37 | }
38 | }
39 | return 4. * ok / sample_count;
40 | }
41 |
42 | const int maxn = 2000000000;
43 | auto rand_generator = MakeUniformGenerator(0, maxn - 1);
44 | const double randcoe = 1. / maxn;
45 | double CalPi_Rand() {
46 | int ok = 0;
47 | for (int i = 0; i < sample_count; ++i) {
48 | double p[]{randcoe * rand_generator(), randcoe * rand_generator()};
49 | if (p[0] * p[0] + p[1] * p[1] < 1) {
50 | ++ok;
51 | }
52 | }
53 | return 4. * ok / sample_count;
54 | }
55 |
56 | double CalPi_Halton() {
57 | int ok = 0;
58 | for (int i = 0; i < sample_count; ++i) {
59 | std::vector p = Halton(i, 2);
60 | if (p[0] * p[0] + p[1] * p[1] < 1) {
61 | ++ok;
62 | }
63 | }
64 | return 4. * ok / sample_count;
65 | }
66 |
67 | #if HAS_MPF
68 | double CalPi_Mpf() {
69 | gmp_randstate_t state;
70 | gmp_randinit_mt(state);
71 | Mpf::SetDefaultPrec(200);
72 | int ok = 0;
73 | for (int i = 0; i < sample_count; ++i) {
74 | Mpf a;
75 | Mpf b;
76 | mpf_urandomb(a.mpf(), state, 128);
77 | mpf_urandomb(b.mpf(), state, 128);
78 | if (a * a + b * b < 1) {
79 | ++ok;
80 | }
81 | }
82 | return 4. * ok / sample_count;
83 | }
84 | #endif
85 |
86 | int main() {
87 | PE_INIT(maxp = 1000000);
88 | printf("CRand15\t%.16f\n", CalPi_CRand15());
89 | printf("CRand31\t%.16f\n", CalPi_CRand31());
90 | printf("CRand63\t%.16f\n", CalPi_CRand63());
91 | printf("Rand\t%.16f\n", CalPi_Rand());
92 | printf("Halton\t%.16f\n", CalPi_Halton());
93 | #if HAS_MPF
94 | printf("Mpf\t%.16f\n", CalPi_Mpf());
95 | #endif
96 | return 0;
97 | }
--------------------------------------------------------------------------------
/example/sym_poly.c:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace pe;
3 |
4 | // Guess the solution to a^2=b^2+c^2+bc where
5 | // a = t1, b = t2, c = t3, c7 = 1, c8 = 0, c9 = -1
6 | SymPoly t1("c1 m^2 + c2 m n + c3 n^2");
7 | SymPoly t2("c4 m^2 + c5 m n + c6 n^2");
8 | SymPoly t3("c7 m^2 + c8 m n + c9 n^2");
9 |
10 | SymPoly target = SymPoly("a^2-b^2-c^2-b c")
11 | .Replace("a", t1)
12 | .Replace("b", t2)
13 | .Replace("c", t3);
14 |
15 | int his[10];
16 | void dfs(int now, const SymPoly& p) {
17 | if (now == 10) {
18 | if (std::empty(p.terms()) && his[7] == 1 && his[8] == 0 && his[9] == -1) {
19 | auto aa =
20 | t1.Replace("c1", his[1]).Replace("c2", his[2]).Replace("c3", his[3]);
21 | auto bb =
22 | t2.Replace("c4", his[4]).Replace("c5", his[5]).Replace("c6", his[6]);
23 | auto cc =
24 | t3.Replace("c7", his[7]).Replace("c8", his[8]).Replace("c9", his[9]);
25 | if (std::empty(aa.terms()) || std::empty(bb.terms()) || std::empty(cc.terms()))
26 | return;
27 | std::cout << "a = " << aa << std::endl;
28 | std::cout << "b = " << bb << std::endl;
29 | std::cout << "c = " << cc << std::endl;
30 | std::cout << std::endl;
31 | }
32 | } else {
33 | std::string me = "c" + ToString(now);
34 | for (int i = -1; i <= 2; ++i) {
35 | his[now] = i;
36 | dfs(now + 1, p.Replace(me, i));
37 | }
38 | }
39 | }
40 |
41 | int main() {
42 | std::cout << target.Replace("n", 1).Replace("m", 1) << std::endl;
43 | dfs(1, target);
44 | return 0;
45 | }
--------------------------------------------------------------------------------
/format.py:
--------------------------------------------------------------------------------
1 | #! python2
2 | import os
3 | import subprocess
4 |
5 | CURRENT_DIRECTORY = os.getcwd()
6 | # BINARY_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
7 |
8 | compile_commands_template = """[{
9 | "directory":
10 | "$(CURRENT_DIRECTORY)",
11 | "file":
12 | "pe",
13 | "arguments": [
14 | "clang++.exe", "-xc++", "pe", "--driver-mode=g++", "-c", "--std=c++17",
15 | "-O3", "-march=native", "-mtune=native",
16 | "--target=x86_64-w64-windows-gnu", "-fopenmp"
17 | ]
18 | }]"""
19 |
20 | tidy_options = [
21 | '-checks=-*', 'google-readability-casting',
22 | 'google-readability-braces-around-statements',
23 | 'google-readability-namespace-comments', 'performance-*', 'modernize-use-*',
24 | '-modernize-use-trailing-return-type', '-modernize-use-nodiscard',
25 | 'misc-unused-parameters'
26 | ]
27 |
28 | tidy_cmd = [
29 | 'run-clang-tidy.py', ','.join(tidy_options), '-header-filter=pe.*',
30 | '-export-fixes=format-fixes.yaml', '-fix', 'pe'
31 | ]
32 |
33 |
34 | def tidy_code():
35 | with open('compile_commands.json', 'wb') as tempf:
36 | tempf.write(
37 | compile_commands_template.replace(
38 | '$(CURRENT_DIRECTORY)', CURRENT_DIRECTORY.replace('\\', '\\\\')))
39 | os.system(' '.join(tidy_cmd))
40 | os.remove('compile_commands.json')
41 | os.remove('format-fixes.yaml')
42 |
43 |
44 | def should_format(filename):
45 | if filename in ['parallel_cal_prime_pi.c']:
46 | return False
47 |
48 | _, file_ext_name = os.path.splitext(filename)
49 |
50 | return file_ext_name in ['', '.h', '.hpp', '.c', '.cxx', '.cpp']
51 |
52 |
53 | def format_code():
54 | for rt, _, files in os.walk(CURRENT_DIRECTORY):
55 | if rt.find('.git') != -1:
56 | continue
57 | for f in files:
58 | if should_format(f):
59 | fpath = os.path.join(rt, f)
60 | print(fpath)
61 | subprocess.call('clang-format -style=Google -sort-includes=0 -i %s' %
62 | fpath)
63 |
64 |
65 | if __name__ == '__main__':
66 | tidy_code()
67 | format_code()
68 |
--------------------------------------------------------------------------------
/gen_config.py:
--------------------------------------------------------------------------------
1 | #! python3
2 | # -*- coding: UTF-8 -*-
3 | import os
4 |
5 | CURRENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
6 | TARGET_FILENAME = 'pe_config'
7 | TARGET_PATH = os.path.join(CURRENT_DIRECTORY, TARGET_FILENAME)
8 |
9 | SPLITTER = ';' if os.name == 'nt' else ':'
10 | CHECKING_PATHS = os.environ.get('CPLUS_INCLUDE_PATH', '').split(SPLITTER)
11 |
12 | RULES = [
13 | ('ENABLE_EIGEN', ['Eigen/Dense']),
14 | ('ENABLE_GMP', ['gmp.h']),
15 | ('ENABLE_FLINT', ['flint.h']),
16 | ('ENABLE_MPFR', ['mpfr.h']),
17 | #('ENABLE_MPIR', ['mpir.h']),
18 | ('ENABLE_LIBBF', ['libbf.h']),
19 | ('ENABLE_NTL', ['NTL/ZZ.h']),
20 | ('ENABLE_ZMQ', ['zmq/zmq.h']),
21 | ('ENABLE_PRIME_COUNT', ['primecount.hpp']),
22 | ('ENABLE_PRIME_SIEVE', ['primesieve.hpp']),
23 | ('ENABLE_TCMALLOC', []), # Always disable
24 | ]
25 |
26 | content = []
27 |
28 |
29 | def add_define(key, value):
30 | content.append('#ifndef %s' % key)
31 | content.append('#define %s %s' % (key, value))
32 | content.append('#endif')
33 | content.append('')
34 |
35 |
36 | def check_target(path):
37 | for folder in CHECKING_PATHS:
38 | if not os.path.exists(folder):
39 | continue
40 | for i in path:
41 | if os.path.exists(os.path.join(folder, i)):
42 | return True
43 | return False
44 |
45 |
46 | def main():
47 | content.append('#ifndef PE_CONFIG_')
48 | content.append('#define PE_CONFIG_')
49 | content.append('')
50 | content.append('// This file provides a centralized place to configure pe')
51 | content.append('')
52 | content.append(
53 | '// Auto generated by gen_config.py, and you can edit it manually')
54 | content.append('')
55 | content.append('// Configuration priority (first match):')
56 | content.append('// 1. Compiling command')
57 | content.append('// 2. The configurations in this file')
58 | content.append('// 3. The configurations in file pe')
59 | content.append('')
60 | add_define('ENABLE_ASSERT', '1')
61 | add_define('TRY_TO_USE_INT128', '1')
62 | for (key, value) in RULES:
63 | ok = check_target(value)
64 | add_define(key, '1' if ok else '0')
65 | content.append('#endif')
66 | with open(TARGET_PATH, 'wb') as tempf:
67 | tempf.write('\r\n'.join(content).encode('utf8'))
68 |
69 |
70 | if __name__ == '__main__':
71 | main()
72 |
--------------------------------------------------------------------------------
/legacy/pe_poly.hpp:
--------------------------------------------------------------------------------
1 | #ifndef PE_POLY_
2 | #define PE_POLY_
3 |
4 | #include "pe_base"
5 | #include "pe_type_traits"
6 | #include "pe_mod"
7 | #include "pe_nt"
8 | #include "pe_poly_base"
9 |
10 | namespace pe {
11 | struct NModPoly {
12 | int64 mod;
13 | std::vector data;
14 |
15 | NModPoly(int64 mod = 1) : mod(mod) {}
16 |
17 | NModPoly(const std::vector& data, int64 mod,
18 | int adjust_leading_zero = 1)
19 | : data(data), mod(mod) {
20 | AdjustMod();
21 | if (adjust_leading_zero) {
22 | AdjustLeadingZeros();
23 | }
24 | }
25 |
26 | NModPoly(std::vector&& data, int64 mod, int adjust_leading_zero = 1)
27 | : data(std::move(data)), mod(mod) {
28 | AdjustMod();
29 | if (adjust_leading_zero) {
30 | AdjustLeadingZeros();
31 | }
32 | }
33 |
34 | NModPoly& operator=(const std::vector& v) {
35 | data = v;
36 | return *this;
37 | }
38 |
39 | NModPoly& operator=(const std::vector&& v) {
40 | data = v;
41 | return *this;
42 | }
43 |
44 | NModPoly(const NModPoly& p) = default;
45 | NModPoly(NModPoly&& p) = default;
46 | NModPoly& operator=(const NModPoly& other) = default;
47 | NModPoly& operator=(NModPoly&& other) = default;
48 |
49 | NModPoly& AdjustLeadingZeros() {
50 | AdjustPolyLeadingZero(data);
51 | return *this;
52 | }
53 |
54 | NModPoly& AdjustMod() {
55 | for (auto& iter : data) {
56 | iter = Mod(iter, mod);
57 | }
58 | return *this;
59 | }
60 |
61 | int64 deg() const { return static_cast(std::size(data) - 1); }
62 | int64 size() const { return static_cast(std::size(data)); }
63 |
64 | NModPoly& Resize(int64 n) {
65 | const int64 m = static_cast(std::size(data));
66 | data.resize(n);
67 | for (int64 i = m; i < n; ++i) {
68 | data[i] = 0;
69 | }
70 | return *this;
71 | }
72 |
73 | NModPoly& Redeg(int64 n) { return Resize(n + 1); }
74 |
75 | int64& operator[](int64 idx) {
76 | PE_ASSERT(idx >= 0 && idx < static_cast(std::size(data)));
77 | return data[idx];
78 | }
79 |
80 | int64 operator[](int64 idx) const {
81 | PE_ASSERT(idx >= 0 && idx < static_cast(std::size(data)));
82 | return data[idx];
83 | }
84 |
85 | int64 At(int64 idx) const {
86 | return idx >= 0 && idx < static_cast(std::size(data)) ? data[idx]
87 | : 0;
88 | }
89 |
90 | bool IsZero() const { return std::size(data) == 1 && data[0] == 0; }
91 |
92 | int64 ValueAt(int64 v) const {
93 | int64 ret = 0;
94 | const int64 t = Mod(v, mod);
95 | for (int64 i = deg(); i >= 0; --i) {
96 | ret = AddMod(MulMod(ret, t, mod), data[i], mod);
97 | }
98 | return ret;
99 | }
100 |
101 | NModPoly LowerTerms(int64 n, int adjust_leading_zero = 1) const {
102 | const int64 m = std::min(n, static_cast(std::size(data)));
103 | return NModPoly(std::vector(data.begin(), data.begin() + m), mod,
104 | adjust_leading_zero);
105 | }
106 |
107 | NModPoly Inv(int64 n) const;
108 | };
109 |
110 | template
111 | struct NModPolyT : public NModPoly {
112 | NModPolyT() : NModPoly(M) {}
113 |
114 | NModPolyT(const std::vector& data) : NModPoly(data, M) {}
115 |
116 | NModPolyT(std::vector&& data) : NModPoly(std::move(data), M) {}
117 |
118 | NModPolyT(std::initializer_list l)
119 | : NModPolyT(std::vector(l)) {}
120 | };
121 |
122 | SL NModPoly PolyMul(const NModPoly& X, const NModPoly& Y) {
123 | return NModPoly{PolyMul(X.data, Y.data, X.mod), X.mod};
124 | }
125 |
126 | SL NModPoly PolyInv(const NModPoly& x, int64 n) {
127 | return NModPoly(PolyInv(x.data, n, x.mod), x.mod);
128 | }
129 |
130 | #define PPOLY_DIV_AND_MOD_IMPL(PolyDivAndMod, PolyDivAndModImpl) \
131 | SL std::tuple PolyDivAndMod(const NModPoly& X, \
132 | const NModPoly& Y) { \
133 | auto [q, r] = PolyDivAndModImpl(X.data, Y.data, X.mod); \
134 | return std::make_tuple(NModPoly(std::move(q), X.mod), \
135 | NModPoly(std::move(r), X.mod)); \
136 | }
137 |
138 | #define PPOLY_DIV_IMPL(PolyDiv, PolyDivImpl) \
139 | SL NModPoly PolyDiv(const NModPoly& X, const NModPoly& Y) { \
140 | return NModPoly(PolyDivImpl(X.data, Y.data, X.mod), X.mod); \
141 | }
142 |
143 | #define PPOLY_MOD_IMPL(PolyMod, PolyModImpl) \
144 | SL NModPoly PolyMod(const NModPoly& X, const NModPoly& Y) { \
145 | return NModPoly(PolyModImpl(X.data, Y.data, X.mod), X.mod); \
146 | }
147 |
148 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndModDc, pe::PolyDivAndModDc)
149 | PPOLY_DIV_IMPL(PolyDivDc, pe::PolyDivDc)
150 | PPOLY_MOD_IMPL(PolyModDc, pe::PolyModDc)
151 |
152 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndModNormal, pe::PolyDivAndModNormal)
153 | PPOLY_DIV_IMPL(PolyDivNormal, pe::PolyDivNormal)
154 | PPOLY_MOD_IMPL(PolyModNormal, pe::PolyModNormal)
155 |
156 | PPOLY_DIV_AND_MOD_IMPL(PolyDivAndMod, pe::PolyDivAndMod)
157 | PPOLY_DIV_IMPL(PolyDiv, pe::PolyDiv)
158 | PPOLY_MOD_IMPL(PolyMod, pe::PolyMod)
159 |
160 | inline NModPoly NModPoly::Inv(int64 n) const {
161 | auto t = PolyInv(data, n, mod);
162 | return NModPoly(std::vector(static_cast(&t[0]),
163 | static_cast(&t[0]) + std::size(t)),
164 | mod);
165 | }
166 |
167 | NModPoly operator<<(const NModPoly& p, int64 m) {
168 | return NModPoly(PolyShiftLeft(p.data, m), p.mod);
169 | }
170 |
171 | NModPoly operator>>(const NModPoly& p, int64 m) {
172 | return NModPoly(PolyShiftRight(p.data, m), p.mod);
173 | }
174 |
175 | SL NModPoly operator-(const NModPoly& x, const NModPoly& y) {
176 | return NModPoly(PolySub(x.data, y.data, x.mod), x.mod).AdjustLeadingZeros();
177 | }
178 |
179 | SL NModPoly operator+(const NModPoly& x, const NModPoly& y) {
180 | return NModPoly(PolyAdd(x.data, y.data, x.mod), x.mod).AdjustLeadingZeros();
181 | }
182 |
183 | SL NModPoly operator*(const NModPoly& x, int64 v) {
184 | std::vector data(x.data);
185 | for (auto& iter : data) {
186 | iter = iter * v % x.mod;
187 | }
188 | return NModPoly(std::move(data), x.mod);
189 | }
190 |
191 | SL NModPoly operator*(int64 v, const NModPoly& x) { return x * v; }
192 |
193 | SL NModPoly operator*(const NModPoly& x, const NModPoly& y) {
194 | return PolyMul(x, y);
195 | }
196 |
197 | SL NModPoly operator/(const NModPoly& x, const NModPoly& y) {
198 | return PolyDiv(x, y);
199 | }
200 |
201 | SL NModPoly operator%(const NModPoly& x, const NModPoly& y) {
202 | return PolyMod(x, y);
203 | }
204 |
205 | SL int operator==(const NModPoly& x, const NModPoly& y) {
206 | return x.mod == y.mod && x.data == y.data;
207 | }
208 |
209 | // x^n % mod
210 | SL NModPoly operator%(int64 n, const NModPoly& mod) {
211 | NModPoly x{{0, 1}, mod.mod};
212 | NModPoly ret{{1}, mod.mod};
213 | for (; n > 0; n >>= 1) {
214 | if (n & 1) {
215 | ret = PolyMod(x * ret, mod);
216 | }
217 | if (n > 1) {
218 | x = PolyMod(x * x, mod);
219 | }
220 | }
221 | return ret;
222 | }
223 |
224 | SL std::ostream& operator<<(std::ostream& o, const NModPoly& p) {
225 | const int64 n = static_cast(std::size(p.data));
226 | for (int64 i = 0; i < n - 1; ++i) {
227 | o << p.data[i] << ", ";
228 | }
229 | return o << p.data[n - 1];
230 | }
231 | } // namespace pe
232 | #endif
233 |
--------------------------------------------------------------------------------
/legacy/pe_sym_poly.hpp:
--------------------------------------------------------------------------------
1 | #ifndef PE_SYM_POLY_
2 | #define PE_SYM_POLY_
3 |
4 | #include "pe_base"
5 |
6 | namespace pe {
7 | // [+- ]*
8 | SL std::vector ParseSgnList(const std::string& s, int& i) {
9 | const int size = static_cast(std::size(s));
10 | std::vector sgns;
11 | for (;;) {
12 | while (i < size && std::isspace(s[i])) ++i;
13 | if (i < size && (s[i] == '+' || s[i] == '-')) {
14 | sgns.push_back(s[i] == '+' ? 1 : -1);
15 | ++i;
16 | } else {
17 | return sgns;
18 | }
19 | }
20 | }
21 |
22 | SL std::string ParseDigList(const std::string& s, int& i) {
23 | const int size = static_cast(std::size(s));
24 | while (i < size && std::isspace(s[i])) ++i;
25 | if (i < size && std::isdigit(s[i])) {
26 | int start = i;
27 | while (i < size && std::isdigit(s[i])) {
28 | ++i;
29 | }
30 | return s.substr(start, i - start);
31 | }
32 | return "";
33 | }
34 |
35 | SL std::string ParseIdentifier(const std::string& s, int& i) {
36 | const int size = static_cast(std::size(s));
37 | while (i < size && std::isspace(s[i])) ++i;
38 | if (i < size && (s[i] == '_' || std::isalpha(s[i]))) {
39 | int start = i;
40 | while (i < size &&
41 | (s[i] == '_' || std::isdigit(s[i]) || std::isalpha(s[i]))) {
42 | ++i;
43 | }
44 | return s.substr(start, i - start);
45 | }
46 | return "";
47 | }
48 |
49 | SL std::vector> ParseSingleTermList(
50 | const std::string& s, int& i) {
51 | std::vector> result;
52 | const int size = static_cast(std::size(s));
53 | for (;;) {
54 | while (i < size && std::isspace(s[i])) ++i;
55 | if (s[i] == '*') {
56 | ++i;
57 | while (i < size && std::isspace(s[i])) ++i;
58 | }
59 | std::string variable = ParseIdentifier(s, i);
60 | if (std::empty(variable)) variable = ParseDigList(s, i);
61 | if (std::empty(variable)) return result;
62 | while (i < size && std::isspace(s[i])) ++i;
63 | if (i < size && (s[i] == '^' && !std::isdigit(variable[0]))) {
64 | ++i;
65 | std::string e = ParseDigList(s, i);
66 | result.emplace_back(variable, e);
67 | } else {
68 | result.emplace_back(variable, "");
69 | }
70 | }
71 | }
72 |
73 | template
74 | SL CT EvaluateDigs(const std::vector& sgns, const std::string& digs,
75 | CT defaultVal = 0) {
76 | int s = 1;
77 | for (const auto& iter : sgns) {
78 | if (iter == -1) s = -s;
79 | }
80 | CT v = 0;
81 | for (const auto& iter : digs) v = v * 10 + iter - '0';
82 | if (std::empty(digs)) v = defaultVal;
83 | return s == 1 ? v : -v;
84 | }
85 |
86 | template
87 | SL std::pair EvaluateSingleTermList(
88 | const std::vector>& singleTerms) {
89 | std::map t;
90 | CT c = 1;
91 | for (const auto& iter : singleTerms) {
92 | if (std::isdigit(iter.first[0])) {
93 | c *= EvaluateDigs({}, iter.first, 1);
94 | } else {
95 | t[iter.first] += EvaluateDigs({}, iter.second, 1);
96 | }
97 | }
98 | TermKey key;
99 | for (const auto& iter : t) key.emplace_back(iter.first, iter.second);
100 | return {key, c};
101 | }
102 |
103 | template
104 | SL std::pair ParseTerm(const std::string& s, int& i) {
105 | auto sgns = ParseSgnList(s, i);
106 | auto singleTerms = ParseSingleTermList(s, i);
107 | auto t = EvaluateSingleTermList(singleTerms);
108 | auto sgn = EvaluateDigs(sgns, "", 1);
109 | return {t.first,
110 | std::empty(singleTerms) ? CT(0) : (sgn == 1 ? t.second : -t.second)};
111 | }
112 |
113 | template
114 | SL std::pair ParseTerm(const std::string& s) {
115 | int i = 0;
116 | return ParseTerm(s, i);
117 | }
118 |
119 | template
120 | SL std::map ParseSymPolyTerms(const std::string& s) {
121 | const int size = static_cast(std::size(s));
122 | int i = 0;
123 | std::map terms;
124 | for (;;) {
125 | while (i < size && std::isspace(s[i])) ++i;
126 | int j = i;
127 | auto t = ParseTerm(s, i);
128 | if (j == i) {
129 | if (i < size) {
130 | std::cerr << "Unknown: " << s.substr(i) << std::endl;
131 | }
132 | break;
133 | }
134 | terms[t.first] += t.second;
135 | }
136 | return terms;
137 | }
138 |
139 | template
140 | SL SymPoly ParseSymPoly(const std::string& s) {
141 | return SymPoly(ParseSymPolyTerms(s));
142 | }
143 |
144 | SL TermKey ToTermKey(const std::string& s) {
145 | int i = 0;
146 | return ParseTerm(s, i).first;
147 | }
148 | }
--------------------------------------------------------------------------------
/pe:
--------------------------------------------------------------------------------
1 | #ifndef PE_
2 | #define PE_
3 |
4 | // Base
5 | #include "pe_base"
6 | #include "pe_type_traits"
7 | #include "pe_span"
8 | #include "pe_bit"
9 | #include "pe_mod" // Modular arithmetic
10 | #include "pe_int"
11 | #include "pe_extended_int"
12 | #include "pe_float"
13 | #include "pe_vector"
14 |
15 | // General util
16 | #include "pe_io"
17 | #include "pe_time"
18 | #include "pe_persistance"
19 | #include "pe_tree"
20 | #include "pe_rand"
21 |
22 | // Range
23 | #include "pe_range"
24 |
25 | // Matrix arithmetic
26 | #include "pe_mat"
27 |
28 | // Number theory arithmetic
29 | #include "pe_nt_base"
30 | #include "pe_nt"
31 |
32 | // Fraction arithmetic
33 | #include "pe_fraction"
34 |
35 | // Parallel support
36 | #include "pe_parallel"
37 | #include "pe_parallel_algo"
38 |
39 | // Polynomial
40 | #include "pe_poly_base"
41 | #include "pe_poly_algo"
42 |
43 | // fft
44 | #include "pe_fft"
45 |
46 | // Big integer
47 | #include "pe_gbi"
48 | #include "pe_bi32"
49 | #include "pe_mpz"
50 |
51 | // Geometry
52 | #include "pe_geometry"
53 |
54 | // Large memory support (windows)
55 | #include "pe_memory"
56 |
57 | // MP extension
58 | #include "pe_mpf"
59 |
60 | #include "pe_serialization"
61 |
62 | // Misc
63 | #include "pe_misc"
64 | #include "pe_mma"
65 |
66 | #include "pe_array"
67 | #include "pe_ntf"
68 | #include "pe_algo"
69 | #include "pe_sym_poly"
70 | #include "pe_db"
71 | #include "pe_int_algo"
72 |
73 | #include "pe_dpe"
74 |
75 | #include "pe_initializer"
76 |
77 | // Undefined or unspecified behavior
78 | // https://en.cppreference.com/w/cpp/language/extending_std
79 | namespace std {
80 | // Required by
81 | // 1. operator << for std::vector, std::set, std::map, etc.
82 | // 2. pe::int128, pe::uint128, pe::float128 which are non-class types.
83 | using pe::operator<<;
84 | } // namespace std
85 |
86 | #endif
87 |
--------------------------------------------------------------------------------
/pe.hpp:
--------------------------------------------------------------------------------
1 | #ifndef PE_HPP_
2 | #define PE_HPP_
3 |
4 | // Use
5 | // g++ -xc++-header pe.hpp --std=c++20 -fno-diagnostics-color -O3 -march=native
6 | // -mtune=native -fopenmp -pthread -static
7 | // to generate pe.hpp.gch
8 | #include
9 |
10 | #endif
11 |
--------------------------------------------------------------------------------
/pe_array:
--------------------------------------------------------------------------------
1 | #ifndef PE_ARRAY_
2 | #define PE_ARRAY_
3 |
4 | #include "pe_base"
5 | #include "pe_memory"
6 |
7 | namespace pe {
8 | template
9 | struct DArrayRef;
10 |
11 | // T: element type
12 | // D: dimention length
13 | // A: allocator
14 | template
15 | struct DArray : public DArrayRef {
16 | template
17 | friend struct DArrayRef;
18 |
19 | using base = DArrayRef;
20 | template
21 | DArray(const std::vector& dimension, Args&&... arg)
22 | : dimension_(dimension), base(nullptr, 0, nullptr, nullptr) {
23 | Init(arg...);
24 | PE_ASSERT(std::size(dimension) == D);
25 | }
26 |
27 | DArray() : base(nullptr, 0, nullptr, nullptr) {
28 | dimension_ = std::vector(D, 1);
29 | Init();
30 | PE_ASSERT(std::size(dimension_) == D);
31 | }
32 |
33 | DArray(const DArray&) = delete;
34 | DArray(DArray&&) = delete;
35 |
36 | DArray& operator=(const DArray&) = delete;
37 | DArray& operator=(DArray&&) = delete;
38 |
39 | ~DArray() { Clear(); }
40 |
41 | template
42 | void Reset(const std::vector& dimension, Args&&... arg) {
43 | Clear();
44 | this->dimension_ = dimension;
45 | Init(arg...);
46 | }
47 |
48 | template
49 | void Init(Args&&... arg) {
50 | const int d = static_cast(std::size(dimension_));
51 | element_counts_.clear();
52 | element_counts_.push_back(1);
53 | for (int i = d - 1; i >= 0; --i) {
54 | element_counts_.push_back(element_counts_.back() * dimension_[i]);
55 | }
56 | std::reverse(element_counts_.begin(), element_counts_.end());
57 | element_count_ = element_counts_[0];
58 | base::dimension_ = &dimension_[0];
59 | base::element_counts_ = &element_counts_[0];
60 | base::data_ = reinterpret_cast(A::Allocate(element_count_ * sizeof(T)));
61 | for (int64 i = 0; i < element_count_; ++i) {
62 | new (base::data_ + i) T(arg...);
63 | }
64 | }
65 |
66 | void Clear() {
67 | for (int64 i = 0; i < element_count_; ++i) {
68 | (base::data_ + i)->~T();
69 | }
70 | A::Deallocate(base::data_);
71 | }
72 |
73 | T* data() { return base::data_; }
74 |
75 | DArrayRef Ref() {
76 | return DArrayRef(base::data_, 0, base::dimension_,
77 | base::element_counts_);
78 | }
79 |
80 | private:
81 | std::vector dimension_;
82 | std::vector element_counts_;
83 | int64 element_count_;
84 | };
85 |
86 | template
87 | struct DArrayRef {
88 | using ValueType = DArrayRef;
89 | using ConstValueType = DArrayRef;
90 |
91 | DArrayRef(T* data, int off, const int64* dimension,
92 | const int64* element_counts)
93 | : data_(data),
94 | off_(off),
95 | dimension_(dimension),
96 | element_counts_(element_counts) {}
97 |
98 | DArrayRef operator[](int64 idx) const {
99 | return DArrayRef(data_ + idx * element_counts_[off_ + 1],
100 | off_ + 1, dimension_, element_counts_);
101 | }
102 |
103 | protected:
104 | T* data_;
105 | const int off_;
106 | const int64* element_counts_;
107 | const int64* dimension_;
108 | };
109 |
110 | template
111 | struct DArrayRef {
112 | using ValueType = T&;
113 | using ConstValueType = const T&;
114 |
115 | DArrayRef(T* data, int off, const int64* dimension,
116 | const int64* element_counts)
117 | : data_(data),
118 | off_(off),
119 | dimension_(dimension),
120 | element_counts_(element_counts) {}
121 |
122 | T& operator[](int64 idx) { return data_[idx]; }
123 | ConstValueType operator[](int64 idx) const { return data_[idx]; }
124 |
125 | protected:
126 | T* data_;
127 | const int off_;
128 | const int64* element_counts_;
129 | const int64* dimension_;
130 | };
131 |
132 | template
133 | struct ArrayShape;
134 |
135 | template
136 | struct ArrayShape {
137 | using Next = ArrayShape;
138 | const static int64 D = 1 + Next::D;
139 | const static int64 EC = H * Next::EC;
140 | };
141 |
142 | template
143 | struct ArrayShape {
144 | const static int64 D = 1;
145 | const static int64 EC = H;
146 | };
147 |
148 | template
149 | struct FArrayRef;
150 |
151 | // T: element type
152 | // S: array shape
153 | // A: allocator
154 | template
155 | struct FArray : public FArrayRef {
156 | using base = FArrayRef;
157 |
158 | template
159 | FArray(Args&&... arg) : base(nullptr) {
160 | Init(arg...);
161 | }
162 |
163 | FArray(const FArray&) = delete;
164 | FArray(FArray&&) = delete;
165 |
166 | FArray& operator=(const FArray&) = delete;
167 | FArray& operator=(FArray&&) = delete;
168 |
169 | ~FArray() { Clear(); }
170 |
171 | template
172 | void Init(Args&&... arg) {
173 | base::data_ = reinterpret_cast(A::Allocate(S::EC * sizeof(T)));
174 | for (int64 i = 0; i < S::EC; ++i) {
175 | new (base::data_ + i) T(arg...);
176 | }
177 | }
178 |
179 | void Clear() {
180 | for (int64 i = 0; i < S::EC; ++i) {
181 | (base::data_ + i)->~T();
182 | }
183 | A::Deallocate(base::data_);
184 | }
185 |
186 | T* data() { return base::data_; }
187 |
188 | FArrayRef Ref() { return FArrayRef(base::data_); }
189 | };
190 |
191 | template
192 | struct FArrayRef {
193 | using NextShape = typename S::Next;
194 | using ValueType = FArrayRef;
195 | using ConstValueType = FArrayRef;
196 |
197 | FArrayRef(T* data) : data_(data) {}
198 |
199 | FArrayRef operator[](int64 idx) const {
200 | return FArrayRef(data_ + idx * NextShape::EC);
201 | }
202 |
203 | protected:
204 | T* data_;
205 | };
206 |
207 | template
208 | struct FArrayRef> {
209 | using ValueType = T&;
210 | using ConstValueType = const T&;
211 |
212 | FArrayRef(T* data) : data_(data) {}
213 |
214 | ValueType operator[](int64 idx) { return data_[idx]; }
215 | ConstValueType operator[](int64 idx) const { return data_[idx]; }
216 |
217 | protected:
218 | T* data_;
219 | };
220 |
221 | // T: element type
222 | // X: the count of each dimention
223 | template
224 | using Array = FArray>;
225 |
226 | // T: element type
227 | // A: allocator
228 | // X: the count of each dimention
229 | template
230 | using AArray = FArray, A>;
231 | } // namespace pe
232 | #endif
--------------------------------------------------------------------------------
/pe_config:
--------------------------------------------------------------------------------
1 | #ifndef PE_CONFIG_
2 | #define PE_CONFIG_
3 |
4 | // This file provides a centralized place to configure pe
5 |
6 | // Auto generated by gen_config.py, and you can edit it manually
7 |
8 | // Configuration priority (first match):
9 | // 1. Compiling command
10 | // 2. The configurations in this file
11 | // 3. The configurations in file pe
12 |
13 | #ifndef ENABLE_ASSERT
14 | #define ENABLE_ASSERT 1
15 | #endif
16 |
17 | #ifndef TRY_TO_USE_INT128
18 | #define TRY_TO_USE_INT128 1
19 | #endif
20 |
21 | #ifndef ENABLE_EIGEN
22 | #define ENABLE_EIGEN 1
23 | #endif
24 |
25 | #ifndef ENABLE_GMP
26 | #define ENABLE_GMP 1
27 | #endif
28 |
29 | #ifndef ENABLE_FLINT
30 | #define ENABLE_FLINT 1
31 | #endif
32 |
33 | #ifndef ENABLE_MPFR
34 | #define ENABLE_MPFR 1
35 | #endif
36 |
37 | #ifndef ENABLE_LIBBF
38 | #define ENABLE_LIBBF 1
39 | #endif
40 |
41 | #ifndef ENABLE_NTL
42 | #define ENABLE_NTL 1
43 | #endif
44 |
45 | #ifndef ENABLE_ZMQ
46 | #define ENABLE_ZMQ 1
47 | #endif
48 |
49 | #ifndef ENABLE_PRIME_COUNT
50 | #define ENABLE_PRIME_COUNT 1
51 | #endif
52 |
53 | #ifndef ENABLE_PRIME_SIEVE
54 | #define ENABLE_PRIME_SIEVE 1
55 | #endif
56 |
57 | #ifndef ENABLE_TCMALLOC
58 | #define ENABLE_TCMALLOC 0
59 | #endif
60 |
61 | #endif
--------------------------------------------------------------------------------
/pe_float:
--------------------------------------------------------------------------------
1 | #ifndef PE_FLOAT128_
2 | #define PE_FLOAT128_
3 |
4 | #include "pe_base"
5 | #include "pe_int"
6 |
7 | namespace pe {
8 | template
9 | SL int IsNAN(T v) {
10 | return 0;
11 | }
12 | } // namespace pe
13 |
14 | #if PE_HAS_FLOAT128
15 | namespace pe {
16 | namespace internal {
17 | SL std::string ToStringFloat128(float128 f, const char* format_string,
18 | int dig = 20) {
19 | char buff[256];
20 | const int buff_size = sizeof(buff);
21 | int n = quadmath_snprintf(buff, buff_size, format_string, dig, f);
22 | if (n < buff_size) {
23 | return buff;
24 | }
25 | n = quadmath_snprintf(NULL, 0, format_string, dig, f);
26 | if (n <= -1) {
27 | return "";
28 | }
29 | char* str = static_cast(malloc(n + 1));
30 | std::string result;
31 | if (str) {
32 | quadmath_snprintf(str, n + 1, format_string, dig, f);
33 | result = str;
34 | }
35 | free(str);
36 | return result;
37 | }
38 | } // namespace internal
39 |
40 | SL std::string ToString(float128 f, int dig = 20) {
41 | return internal::ToStringFloat128(f, "%#.*Qe", dig);
42 | }
43 |
44 | SL std::string ToStringF(float128 f, int dig = 20) {
45 | return internal::ToStringFloat128(f, "%#.*Qf", dig);
46 | }
47 |
48 | SL std::string to_string(float128 x, int dig = 20) { return ToString(x); }
49 |
50 | SL std::ostream& operator<<(std::ostream& o, float128 f) {
51 | return o << ToString(f, 20);
52 | }
53 |
54 | SL int IsNAN(float128 v) { return isnanq(v); }
55 |
56 | SL float128 Abs(float128 f) { return fabsq(f); }
57 | SL float128 FAbs(float128 f) { return fabsq(f); }
58 | SL float128 Ceil(float128 f) { return ceilq(f); }
59 | SL float128 Floor(float128 f) { return floorq(f); }
60 | SL float128 Trunc(float128 f) { return truncq(f); }
61 | SL float128 Power(float128 f, int p) {
62 | return powq(f, static_cast(p));
63 | }
64 |
65 | SL float128 Sqrt(float128 f) { return sqrtq(f); }
66 | SL float128 Cos(float128 f) { return cosq(f); }
67 | SL float128 Sin(float128 f) { return sinq(f); }
68 | SL float128 Exp(float128 f) { return expq(f); }
69 | SL float128 Log(float128 f) { return logq(f); }
70 | SL float128 Log10(float128 f) { return log10q(f); }
71 | } // namespace pe
72 |
73 | #endif
74 |
75 | namespace pe {
76 | namespace internal {
77 | template
78 | SL std::string ToStringFloat(T f, const char* format_string, int dig = 20) {
79 | char buff[256];
80 | const int buff_size = sizeof(buff);
81 | int n = snprintf(buff, buff_size, format_string, dig, f);
82 | if (n < buff_size) {
83 | return buff;
84 | }
85 | n = snprintf(NULL, 0, format_string, dig, f);
86 | if (n <= -1) {
87 | return "";
88 | }
89 | char* str = static_cast