├── .gitignore ├── Python ├── __pycache__ │ ├── Jacobian.cpython-310.pyc │ ├── linear_phi.cpython-310.pyc │ ├── Eigenvalues.cpython-310.pyc │ ├── Phi_functions.cpython-310.pyc │ ├── imag_Leja_phi.cpython-310.pyc │ ├── real_Leja_phi.cpython-310.pyc │ ├── Divided_Difference.cpython-310.pyc │ └── real_Leja_linear_exp.cpython-310.pyc ├── Constant │ ├── __pycache__ │ │ ├── EPI3.cpython-310.pyc │ │ ├── EPI4.cpython-310.pyc │ │ ├── EPIRK4s3.cpython-310.pyc │ │ ├── EPIRK4s3A.cpython-310.pyc │ │ ├── EPIRK4s3B.cpython-310.pyc │ │ ├── EPIRK5P1.cpython-310.pyc │ │ ├── EXPRB32.cpython-310.pyc │ │ ├── EXPRB42.cpython-310.pyc │ │ ├── EXPRB43.cpython-310.pyc │ │ ├── EXPRB53s3.cpython-310.pyc │ │ ├── EXPRB54s4.cpython-310.pyc │ │ ├── Cons_ExpInt.cpython-310.pyc │ │ └── Rosenbrock_Euler.cpython-310.pyc │ ├── Cons_ExpInt.py │ ├── Rosenbrock_Euler.py │ ├── EPI3.py │ ├── EXPRB42.py │ ├── EXPRB32.py │ ├── EPI4.py │ ├── EPIRK4s3B.py │ ├── EPIRK4s3A.py │ ├── EXPRB43.py │ ├── EPIRK4s3.py │ ├── EXPRB53s3.py │ └── EPIRK5P1.py ├── Variable │ ├── __pycache__ │ │ ├── EPIRK4s3.cpython-310.pyc │ │ ├── EPIRK4s3A.cpython-310.pyc │ │ ├── EPIRK5P1.cpython-310.pyc │ │ ├── EXPRB32.cpython-310.pyc │ │ ├── EXPRB43.cpython-310.pyc │ │ ├── EXPRB53s3.cpython-310.pyc │ │ ├── EXPRB54s4.cpython-310.pyc │ │ └── Var_ExpInt.cpython-310.pyc │ ├── Var_ExpInt.py │ ├── EXPRB32.py │ ├── EPIRK4s3A.py │ ├── EXPRB43.py │ ├── EPIRK4s3.py │ ├── EXPRB53s3.py │ └── EPIRK5P1.py ├── Test │ └── Test_data │ │ └── Constant │ │ ├── Burgers │ │ ├── T_final_0.001 │ │ │ ├── N_400_eta_10 │ │ │ │ └── EXPRB32 │ │ │ │ │ ├── N_cfl_8.00 │ │ │ │ │ └── Results.txt │ │ │ │ │ ├── N_cfl_0.10 │ │ │ │ │ └── Results.txt │ │ │ │ │ ├── N_cfl_1.00 │ │ │ │ │ └── Results.txt │ │ │ │ │ ├── N_cfl_10.00 │ │ │ │ │ └── Results.txt │ │ │ │ │ ├── N_cfl_2.00 │ │ │ │ │ └── Results.txt │ │ │ │ │ └── N_cfl_4.00 │ │ │ │ │ └── Results.txt │ │ │ └── N_300_eta_10 │ │ │ │ ├── EXPRB32 │ │ │ │ ├── N_cfl_1.00 │ │ │ │ │ ├── Results.txt │ │ │ │ │ └── Final_data.txt │ │ │ │ ├── N_cfl_10.00 │ │ │ │ │ └── Results.txt │ │ │ │ ├── N_cfl_2.00 │ │ │ │ │ └── Results.txt │ │ │ │ ├── N_cfl_4.00 │ │ │ │ │ └── Results.txt │ │ │ │ └── N_cfl_8.00 │ │ │ │ │ └── Results.txt │ │ │ │ ├── EXPRB43 │ │ │ │ ├── N_cfl_1.00 │ │ │ │ │ ├── Results.txt │ │ │ │ │ └── Final_data.txt │ │ │ │ ├── N_cfl_2.00 │ │ │ │ │ ├── Results.txt │ │ │ │ │ └── Final_data.txt │ │ │ │ ├── N_cfl_4.00 │ │ │ │ │ ├── Results.txt │ │ │ │ │ └── Final_data.txt │ │ │ │ ├── N_cfl_8.00 │ │ │ │ │ └── Results.txt │ │ │ │ ├── N_cfl_0.10 │ │ │ │ │ ├── Results.txt │ │ │ │ │ └── Final_data.txt │ │ │ │ └── N_cfl_10.00 │ │ │ │ │ └── Results.txt │ │ │ │ └── EXPRB53s3 │ │ │ │ └── N_cfl_0.10 │ │ │ │ ├── Results.txt │ │ │ │ └── Final_data.txt │ │ └── T_final_0.005 │ │ │ └── N_400_eta_10 │ │ │ ├── EPI3 │ │ │ ├── N_cfl_1000.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_20.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_30.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_4.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_40.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_60.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_8.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_10.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_50.00 │ │ │ │ └── Results.txt │ │ │ └── N_cfl_300.00 │ │ │ │ └── Results.txt │ │ │ ├── EPI4 │ │ │ ├── N_cfl_1000.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_20.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_30.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_40.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_60.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_10.00 │ │ │ │ └── Results.txt │ │ │ └── N_cfl_50.00 │ │ │ │ └── Results.txt │ │ │ ├── EXPRB32 │ │ │ ├── N_cfl_1.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_10.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_2.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_20.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_30.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_4.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_40.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_50.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_60.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_8.00 │ │ │ │ └── Results.txt │ │ │ └── N_cfl_0.10 │ │ │ │ └── Results.txt │ │ │ ├── EXPRB42 │ │ │ ├── N_cfl_1.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_10.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_20.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_30.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_4.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_40.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_50.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_60.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_8.00 │ │ │ │ └── Results.txt │ │ │ ├── N_cfl_0.05 │ │ │ │ └── Results.txt │ │ │ └── N_cfl_0.10 │ │ │ │ └── Results.txt │ │ │ └── EXPRB43 │ │ │ ├── N_cfl_10.00 │ │ │ └── Results.txt │ │ │ ├── N_cfl_20.00 │ │ │ └── Results.txt │ │ │ ├── N_cfl_30.00 │ │ │ └── Results.txt │ │ │ ├── N_cfl_4.00 │ │ │ └── Results.txt │ │ │ ├── N_cfl_40.00 │ │ │ └── Results.txt │ │ │ ├── N_cfl_50.00 │ │ │ └── Results.txt │ │ │ ├── N_cfl_60.00 │ │ │ └── Results.txt │ │ │ ├── N_cfl_8.00 │ │ │ └── Results.txt │ │ │ └── N_cfl_0.10 │ │ │ └── Results.txt │ │ └── Time_order_plots.py ├── Jacobian.py ├── Divided_Difference.py ├── Readme.md ├── Eigenvalues.py ├── linear_phi.py ├── imag_Leja_exp.py ├── real_Leja_exp.py ├── real_Leja_phi_nl.py ├── imag_Leja_phi_nl.py ├── Phi_functions.py ├── real_Leja_phi.py ├── imag_Leja_phi.py └── real_Leja_linear_exp.py ├── CUDA ├── error_check.hpp ├── Test │ ├── Problems.hpp │ ├── Dif_Adv_2D.hpp │ └── Burgers_2D.hpp ├── Integrators │ ├── Readme.md │ ├── Rosenbrock_Euler.hpp │ ├── EXPRB32.hpp │ ├── EXPRB42.hpp │ └── EPIRK4s3B.hpp ├── Divided_Differences.hpp ├── Readme.md ├── Timer.hpp ├── Eigenvalues.hpp ├── Phi_functions.hpp ├── Jacobian_vector.hpp ├── Kernels.hpp ├── functions.hpp ├── real_Leja_exp.hpp └── real_Leja_phi_nl.hpp ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | CUDA/.vscode/ 3 | CUDA/Test/build/ 4 | 5 | Python/Constant/__pycache__/ 6 | Python/Test/Test_data/ 7 | -------------------------------------------------------------------------------- /Python/__pycache__/Jacobian.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Jacobian.cpython-310.pyc -------------------------------------------------------------------------------- /Python/__pycache__/linear_phi.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/linear_phi.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EPI3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPI3.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EPI4.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPI4.cpython-310.pyc -------------------------------------------------------------------------------- /Python/__pycache__/Eigenvalues.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Eigenvalues.cpython-310.pyc -------------------------------------------------------------------------------- /Python/__pycache__/Phi_functions.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Phi_functions.cpython-310.pyc -------------------------------------------------------------------------------- /Python/__pycache__/imag_Leja_phi.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/imag_Leja_phi.cpython-310.pyc -------------------------------------------------------------------------------- /Python/__pycache__/real_Leja_phi.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/real_Leja_phi.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EPIRK4s3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK4s3.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EPIRK4s3A.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK4s3A.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EPIRK4s3B.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK4s3B.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EPIRK5P1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK5P1.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EXPRB32.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB32.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EXPRB42.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB42.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EXPRB43.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB43.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EXPRB53s3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB53s3.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/EXPRB54s4.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB54s4.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/EPIRK4s3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EPIRK4s3.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/EPIRK4s3A.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EPIRK4s3A.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/EPIRK5P1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EPIRK5P1.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/EXPRB32.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB32.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/EXPRB43.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB43.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/EXPRB53s3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB53s3.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/EXPRB54s4.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB54s4.cpython-310.pyc -------------------------------------------------------------------------------- /Python/__pycache__/Divided_Difference.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Divided_Difference.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/Cons_ExpInt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/Cons_ExpInt.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Variable/__pycache__/Var_ExpInt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/Var_ExpInt.cpython-310.pyc -------------------------------------------------------------------------------- /Python/__pycache__/real_Leja_linear_exp.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/real_Leja_linear_exp.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Constant/__pycache__/Rosenbrock_Euler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/Rosenbrock_Euler.cpython-310.pyc -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_8.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.387047 2 | 3 | Number of matrix-vector products = 1967 4 | 5 | Step size 6 | 2.5e-05 7 | 8 | Time steps 9 | 41 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_1000.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.189571 2 | 3 | Number of matrix-vector products = 1578 4 | 5 | Step size 6 | 0.003125 7 | 8 | Time steps 9 | 2 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_20.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.661831 2 | 3 | Number of matrix-vector products = 2914 4 | 5 | Step size 6 | 6.25e-05 7 | 8 | Time steps 9 | 80 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_30.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.479964 2 | 3 | Number of matrix-vector products = 2348 4 | 5 | Step size 6 | 9.375e-05 7 | 8 | Time steps 9 | 54 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_4.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:02.943203 2 | 3 | Number of matrix-vector products = 9220 4 | 5 | Step size 6 | 1.25e-05 7 | 8 | Time steps 9 | 400 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_40.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.399840 2 | 3 | Number of matrix-vector products = 2290 4 | 5 | Step size 6 | 0.000125 7 | 8 | Time steps 9 | 40 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_60.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.292836 2 | 3 | Number of matrix-vector products = 1914 4 | 5 | Step size 6 | 0.0001875 7 | 8 | Time steps 9 | 27 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_8.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.595718 2 | 3 | Number of matrix-vector products = 6031 4 | 5 | Step size 6 | 2.5e-05 7 | 8 | Time steps 9 | 201 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_1000.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.232522 2 | 3 | Number of matrix-vector products = 1756 4 | 5 | Step size 6 | 0.003125 7 | 8 | Time steps 9 | 2 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_20.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.690699 2 | 3 | Number of matrix-vector products = 3412 4 | 5 | Step size 6 | 6.25e-05 7 | 8 | Time steps 9 | 80 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_30.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.493130 2 | 3 | Number of matrix-vector products = 2636 4 | 5 | Step size 6 | 9.375e-05 7 | 8 | Time steps 9 | 54 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_40.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.388064 2 | 3 | Number of matrix-vector products = 2188 4 | 5 | Step size 6 | 0.000125 7 | 8 | Time steps 9 | 40 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_60.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.282080 2 | 3 | Number of matrix-vector products = 1781 4 | 5 | Step size 6 | 0.0001875 7 | 8 | Time steps 9 | 27 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_0.10/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:21.486156 2 | 3 | Number of matrix-vector products = 57607 4 | 5 | Step size 6 | 3.125e-07 7 | 8 | Time steps 9 | 3201 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_1.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:02.251305 2 | 3 | Number of matrix-vector products = 8753 4 | 5 | Step size 6 | 3.125e-06 7 | 8 | Time steps 9 | 321 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.294953 2 | 3 | Number of matrix-vector products = 1664 4 | 5 | Step size 6 | 3.125e-05 7 | 8 | Time steps 9 | 32 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_2.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.216480 2 | 3 | Number of matrix-vector products = 4967 4 | 5 | Step size 6 | 6.25e-06 7 | 8 | Time steps 9 | 161 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_4.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.640484 2 | 3 | Number of matrix-vector products = 3054 4 | 5 | Step size 6 | 1.25e-05 7 | 8 | Time steps 9 | 80 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.315484 2 | 3 | Number of matrix-vector products = 5307 4 | 5 | Step size 6 | 3.125e-05 7 | 8 | Time steps 9 | 160 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_50.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.321794 2 | 3 | Number of matrix-vector products = 1849 4 | 5 | Step size 6 | 0.00015625 7 | 8 | Time steps 9 | 32 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.252215 2 | 3 | Number of matrix-vector products = 5158 4 | 5 | Step size 6 | 3.125e-05 7 | 8 | Time steps 9 | 160 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_50.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.335634 2 | 3 | Number of matrix-vector products = 1934 4 | 5 | Step size 6 | 0.00015625 7 | 8 | Time steps 9 | 32 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_1.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:21.945567 2 | 3 | Number of matrix-vector products = 48008 4 | 5 | Step size 6 | 3.125e-06 7 | 8 | Time steps 9 | 1601 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:02.522143 2 | 3 | Number of matrix-vector products = 9600 4 | 5 | Step size 6 | 3.125e-05 7 | 8 | Time steps 9 | 160 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_2.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:11.431219 2 | 3 | Number of matrix-vector products = 28885 4 | 5 | Step size 6 | 6.25e-06 7 | 8 | Time steps 9 | 800 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_20.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.338655 2 | 3 | Number of matrix-vector products = 6160 4 | 5 | Step size 6 | 6.25e-05 7 | 8 | Time steps 9 | 80 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_30.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.962021 2 | 3 | Number of matrix-vector products = 4937 4 | 5 | Step size 6 | 9.375e-05 7 | 8 | Time steps 9 | 54 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_4.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:06.107865 2 | 3 | Number of matrix-vector products = 17200 4 | 5 | Step size 6 | 1.25e-05 7 | 8 | Time steps 9 | 400 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_40.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.763579 2 | 3 | Number of matrix-vector products = 4198 4 | 5 | Step size 6 | 0.000125 7 | 8 | Time steps 9 | 40 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_50.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.627861 2 | 3 | Number of matrix-vector products = 3616 4 | 5 | Step size 6 | 0.00015625 7 | 8 | Time steps 9 | 32 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_60.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.546479 2 | 3 | Number of matrix-vector products = 3233 4 | 5 | Step size 6 | 0.0001875 7 | 8 | Time steps 9 | 27 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_8.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:03.125937 2 | 3 | Number of matrix-vector products = 10820 4 | 5 | Step size 6 | 2.5e-05 7 | 8 | Time steps 9 | 201 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_1.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:22.839835 2 | 3 | Number of matrix-vector products = 54408 4 | 5 | Step size 6 | 3.125e-06 7 | 8 | Time steps 9 | 1601 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:02.384218 2 | 3 | Number of matrix-vector products = 7849 4 | 5 | Step size 6 | 3.125e-05 7 | 8 | Time steps 9 | 160 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_20.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.367789 2 | 3 | Number of matrix-vector products = 5840 4 | 5 | Step size 6 | 6.25e-05 7 | 8 | Time steps 9 | 80 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_30.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.960106 2 | 3 | Number of matrix-vector products = 4667 4 | 5 | Step size 6 | 9.375e-05 7 | 8 | Time steps 9 | 54 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_4.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:06.008707 2 | 3 | Number of matrix-vector products = 16799 4 | 5 | Step size 6 | 1.25e-05 7 | 8 | Time steps 9 | 400 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_40.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.728547 2 | 3 | Number of matrix-vector products = 3881 4 | 5 | Step size 6 | 0.000125 7 | 8 | Time steps 9 | 40 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_50.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.613602 2 | 3 | Number of matrix-vector products = 3456 4 | 5 | Step size 6 | 0.00015625 7 | 8 | Time steps 9 | 32 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_60.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.534762 2 | 3 | Number of matrix-vector products = 3139 4 | 5 | Step size 6 | 0.0001875 7 | 8 | Time steps 9 | 27 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_8.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:03.096457 2 | 3 | Number of matrix-vector products = 10608 4 | 5 | Step size 6 | 2.5e-05 7 | 8 | Time steps 9 | 201 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:03.706325 2 | 3 | Number of matrix-vector products = 14400 4 | 5 | Step size 6 | 3.125e-05 7 | 8 | Time steps 9 | 160 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_20.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.982020 2 | 3 | Number of matrix-vector products = 9121 4 | 5 | Step size 6 | 6.25e-05 7 | 8 | Time steps 9 | 80 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_30.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.459127 2 | 3 | Number of matrix-vector products = 7616 4 | 5 | Step size 6 | 9.375e-05 7 | 8 | Time steps 9 | 54 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_4.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:08.574330 2 | 3 | Number of matrix-vector products = 24043 4 | 5 | Step size 6 | 1.25e-05 7 | 8 | Time steps 9 | 400 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_40.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.135677 2 | 3 | Number of matrix-vector products = 6120 4 | 5 | Step size 6 | 0.000125 7 | 8 | Time steps 9 | 40 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_50.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.913770 2 | 3 | Number of matrix-vector products = 5440 4 | 5 | Step size 6 | 0.00015625 7 | 8 | Time steps 9 | 32 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_60.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.826547 2 | 3 | Number of matrix-vector products = 5327 4 | 5 | Step size 6 | 0.0001875 7 | 8 | Time steps 9 | 27 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_8.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:04.623961 2 | 3 | Number of matrix-vector products = 15024 4 | 5 | Step size 6 | 2.5e-05 7 | 8 | Time steps 9 | 201 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_0.10/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:01:48.492381 2 | 3 | Number of matrix-vector products = 320002 4 | 5 | Step size 6 | 3.125e-07 7 | 8 | Time steps 9 | 16000 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_0.05/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:06:46.518278 2 | 3 | Number of matrix-vector products = 512000 4 | 5 | Step size 6 | 1.5625e-07 7 | 8 | Time steps 9 | 32000 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_0.10/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:03:24.095980 2 | 3 | Number of matrix-vector products = 305060 4 | 5 | Step size 6 | 3.125e-07 7 | 8 | Time steps 9 | 16000 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_0.10/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:05:10.953690 2 | 3 | Number of matrix-vector products = 403723 4 | 5 | Step size 6 | 3.125e-07 7 | 8 | Time steps 9 | 16000 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_1.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.351668 2 | 3 | Number of matrix-vector products = 5513 4 | 5 | Step size 6 | 5.555555555555557e-06 7 | 8 | Time steps 9 | 181 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.169843 2 | 3 | Number of matrix-vector products = 936 4 | 5 | Step size 6 | 5.5555555555555565e-05 7 | 8 | Time steps 9 | 18 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_2.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.664692 2 | 3 | Number of matrix-vector products = 2790 4 | 5 | Step size 6 | 1.1111111111111113e-05 7 | 8 | Time steps 9 | 90 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_4.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.407233 2 | 3 | Number of matrix-vector products = 2234 4 | 5 | Step size 6 | 2.2222222222222227e-05 7 | 8 | Time steps 9 | 46 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_8.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.206144 2 | 3 | Number of matrix-vector products = 1116 4 | 5 | Step size 6 | 4.444444444444445e-05 7 | 8 | Time steps 9 | 23 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_1.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.910957 2 | 3 | Number of matrix-vector products = 7496 4 | 5 | Step size 6 | 5.555555555555557e-06 7 | 8 | Time steps 9 | 181 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_2.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:01.011472 2 | 3 | Number of matrix-vector products = 4125 4 | 5 | Step size 6 | 1.1111111111111113e-05 7 | 8 | Time steps 9 | 90 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_4.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.569080 2 | 3 | Number of matrix-vector products = 2957 4 | 5 | Step size 6 | 2.2222222222222227e-05 7 | 8 | Time steps 9 | 46 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_8.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.300638 2 | 3 | Number of matrix-vector products = 1582 4 | 5 | Step size 6 | 4.444444444444445e-05 7 | 8 | Time steps 9 | 23 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_300.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.111079 2 | 3 | Number of matrix-vector products = 915 4 | 5 | Step size 6 | 0.0009375000000000001 7 | 8 | Time steps 9 | 6 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_0.10/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:17.245968 2 | 3 | Number of matrix-vector products = 47307 4 | 5 | Step size 6 | 5.555555555555557e-07 7 | 8 | Time steps 9 | 1801 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_10.00/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:00.242730 2 | 3 | Number of matrix-vector products = 1332 4 | 5 | Step size 6 | 5.5555555555555565e-05 7 | 8 | Time steps 9 | 18 10 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB53s3/N_cfl_0.10/Results.txt: -------------------------------------------------------------------------------- 1 | Time elapsed (secs): 0:00:24.081329 2 | 3 | Number of matrix-vector products = 64007 4 | 5 | Step size 6 | 5.555555555555557e-07 7 | 8 | Time steps 9 | 1801 10 | -------------------------------------------------------------------------------- /Python/Jacobian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###? Jacobian_vector = (RHS(u + epsilon*v) - RHS(u))/epsilon 4 | def Jacobian(RHS, u, v, rhs_u, *args): 5 | 6 | ###* epsilon is normalised to norm(u) 7 | epsilon = 1e-7 * np.linalg.norm(u) 8 | 9 | ###* J(u) * v = (RHS(u + epsilon*v) - RHS(u))/epsilon 10 | Jacobian_vector = (RHS(u + (epsilon * v), *args) - rhs_u)/epsilon 11 | 12 | return Jacobian_vector -------------------------------------------------------------------------------- /Python/Divided_Difference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def Divided_Difference(X, diffs): 4 | """ 5 | Parameters 6 | ---------- 7 | X : Leja points 8 | diffs : Phi function array 9 | 10 | Returns 11 | ------- 12 | div_diff : Polynomial coefficients 13 | 14 | """ 15 | 16 | N = len(X) 17 | div_diff = diffs 18 | 19 | for ii in range(1, N): 20 | div_diff[ii:N] = (div_diff[ii:N] - div_diff[ii - 1])/(X[ii:N] - X[ii - 1]) 21 | 22 | return div_diff 23 | -------------------------------------------------------------------------------- /CUDA/error_check.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __CUDACC__ 4 | 5 | using namespace std; 6 | 7 | //* This piece of code has been taken from 8 | //* https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api 9 | 10 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 11 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) 12 | { 13 | if (code != cudaSuccess) 14 | { 15 | fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 16 | if (abort) exit(code); 17 | } 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /Python/Variable/Var_ExpInt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Fri Aug 13 15:31:46 2021 3 | 4 | @author: Pranab JD 5 | 6 | Description: - 7 | Contains several EXPRB and EPIRK methods. 8 | 9 | """ 10 | 11 | import sys 12 | sys.path.insert(1, "../") 13 | 14 | from EXPRB32 import EXPRB32 #! 2nd and 3rd order 15 | from EXPRB43 import EXPRB43 #! 3rd and 4th order 16 | from EXPRB53s3 import EXPRB53s3 #! 3rd and 5th order 17 | from EXPRB54s4 import EXPRB54s4 #! 4th and 5th order 18 | 19 | from EPIRK4s3 import EPIRK4s3 #! 3rd and 4th order 20 | from EPIRK4s3A import EPIRK4s3A #! 3rd and 4th order 21 | from EPIRK5P1 import EPIRK5P1 #! 4th and 5th order -------------------------------------------------------------------------------- /CUDA/Test/Problems.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | using namespace std; 4 | 5 | //? ====================================================================================== ?// 6 | 7 | struct Problems_2D 8 | { 9 | int N; 10 | double dx; 11 | double dy; 12 | double velocity; 13 | 14 | //! Constructor 15 | Problems_2D(int _N, double _dx, double _dy, double _velocity) : N{_N}, dx{_dx}, dy{_dy}, velocity{_velocity} {} 16 | 17 | //! Destructor 18 | ~Problems_2D() {} 19 | }; 20 | 21 | //? Periodic BC 22 | #ifdef __CUDACC__ 23 | __host__ __device__ 24 | #endif 25 | int PBC(int ii, int jj, int N) 26 | { 27 | if(ii < 0) 28 | ii = ii + N; 29 | if(ii >= N) 30 | ii = ii - N; 31 | if(jj < 0) 32 | jj = jj + N; 33 | if(jj >= N) 34 | jj = jj - N; 35 | return N*ii + jj; 36 | } 37 | 38 | //? ====================================================================================== ?// -------------------------------------------------------------------------------- /Python/Constant/Cons_ExpInt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Fri Aug 13 15:31:46 2021 3 | 4 | @author: Pranab JD 5 | 6 | Description: - 7 | Contains several EXPRB and EPIRK methods. 8 | 9 | """ 10 | 11 | import sys 12 | sys.path.insert(1, "../") 13 | 14 | from Rosenbrock_Euler import Rosenbrock_Euler #! 2nd order 15 | from EXPRB32 import EXPRB32 #! 3rd order 16 | from EXPRB43 import EXPRB43 #! 4th order 17 | from EXPRB42 import EXPRB42 #! 4th order 18 | from EXPRB53s3 import EXPRB53s3 #! 5th order 19 | 20 | from EPIRK4s3 import EPIRK4s3 #! 4th order 21 | from EPIRK4s3A import EPIRK4s3A #! 4th order 22 | from EPIRK4s3B import EPIRK4s3B #! 4th order 23 | # from EPIRK5P1 import EPIRK5P1 #! 5th order 24 | 25 | from EPI3 import EPI3 #! 3rd order 26 | from EPI4 import EPI4 #! 4th order -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Pranab J. Deka 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_1.00/Final_data.txt: -------------------------------------------------------------------------------- 1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_0.10/Final_data.txt: -------------------------------------------------------------------------------- 1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_1.00/Final_data.txt: -------------------------------------------------------------------------------- 1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_4.00/Final_data.txt: -------------------------------------------------------------------------------- 1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB53s3/N_cfl_0.10/Final_data.txt: -------------------------------------------------------------------------------- 1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan -------------------------------------------------------------------------------- /CUDA/Integrators/Readme.md: -------------------------------------------------------------------------------- 1 | # [LeXInt::CUDA::Integrators](#) 2 | 3 | Here, we have a collection of exponential integrators. Rosenbrock-Euler and EPIRK4s3B do not have an embedded error estimate, whilst the rest do. Exponential integrators call the ``real_Leja_phi`` function to approximate $\varphi_l(z)$ functions applied to the relevant vectors. 4 | 5 | ## Invoking the exponential integrators 6 | 7 | - Add ``#include "./LeXInt/CUDA/Leja.hpp"`` in the main file (main.cpp or main.cu). 8 | 9 | - Create an object of the class as ``Leja(N, integrator_name)``, where 'N' is the total number of grid points and 'integrator_name' corresponds to the desired exponential integrator. E.g., ``Leja leja_gpu{N, EXPRB32}``; where ``RHS``is RHS class that contains the RHS operator. 10 | 11 | - Invoke the object of the class ``Leja`` as ``leja_gpu.embed_exp_int`` for embedded exponential integrators or ``leja_gpu.exp_int`` for non-embedded exponential integrators. For more info, see `Test -> test_2D.cu (lines 231 and 250)`. 12 | 13 | ## Technical Aspects 14 | 15 | * `c` and `Gamma` have to be determined prior to invoking an exponential integrator. See `Test -> test_2D.cu (lines 167 to 172)`. 16 | 17 | * `iters` determines the number of Leja iterations per time step. This may be considered as a proxy of the computational cost. 18 | -------------------------------------------------------------------------------- /CUDA/Divided_Differences.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace LeXInt 6 | { 7 | vector Divided_Differences(const vector& X, vector coeffs) 8 | { 9 | //* ------------------------------------------------------------------------- 10 | //* Compute the coefficients for polynomial interpolation. 11 | //* 12 | //* Parameters 13 | //* ----------- 14 | //* X : vector 15 | //* Set of Leja points 16 | //* 17 | //* coeffs : vector 18 | //* Vector of which coeffs are to be computed 19 | //* 20 | //* Returns 21 | //* ---------- 22 | //* coeffs : vector 23 | //* Coefficients 24 | //* ------------------------------------------------------------------------- 25 | 26 | //* Number of interpolation (Leja) points 27 | int N = X.size(); 28 | 29 | //* Compute the divided differences 30 | for (int ii = 1; ii < N; ii++) 31 | { 32 | for (int jj = 0; jj < ii; jj++) 33 | { 34 | coeffs[ii] = (coeffs[ii] - coeffs[jj])/(X[ii] - X[jj]); 35 | } 36 | } 37 | 38 | return coeffs; 39 | } 40 | } -------------------------------------------------------------------------------- /CUDA/Readme.md: -------------------------------------------------------------------------------- 1 | # CUDA 2 | ![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=for-the-badge&logo=c%2B%2B&logoColor=white) 3 | ![nVIDIA](https://img.shields.io/badge/nVIDIA-%2376B900.svg?style=for-the-badge&logo=nVIDIA&logoColor=white) 4 | ![VS Code](https://img.shields.io/badge/VSCode-0078D4?style=for-the-badge&logo=visual%20studio%20code&logoColor=white) 5 | ![Github](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white) 6 | 7 | Test examples for C++ and CUDA implementations can be found in *Test → Test_2D.cpp* and *Test → Test_2D.cu*, respectively. To run the codes, use `bash run_cpp.sh` or `bash run_cuda.sh`. Alternatively, you could also use *sbatch* instead of *bash* if you have *slurm* installed on your computer. Problems considered include the linear diffusion-advection equation and the nonlinear Burgers' equation. To add other problems, simply define the relevant RHS function (as defined in *Burgers_2D.hpp* or *Dif_Adv_2D.hpp*) and the initial condition(s) in the test files. 8 | 9 | ## Requirements 10 | - gcc and nvcc compilers 11 | - NVIDIA GPU 12 | - CUDA 11.2 (or later) 13 | ## Remarks 14 | 1. Before running the test files, please select (comment or uncomment) the desired problem and integrator (lines 79 - 89) in *Test_2D.cpp* or *Test_2D.cu*. 15 | 2. If you get the error *"Warning!! Max. number of Leja points reached without convergence!!"*, consider reducing the time step size (dt) or increasing the number of Leja points (line 130 in *Leja.hpp*). 16 | 3. For multidimensional problems, the (input/output) data containers are expected to lie contiguous in memory. 17 | 4. If the user-specified RHS function consists of additional parameters, one could potentially construct a ***class*** and have these supplementary parameters localised to the ***class***: 18 | ```cpp 19 | struct RHS 20 | { 21 | RHS(*args) 22 | void operator(input, output) 23 | { 24 | rhs(input, output, *args) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /Python/Readme.md: -------------------------------------------------------------------------------- 1 | # Python 2 | 3 | ![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue) 4 | ![VS Code](https://img.shields.io/badge/VSCode-0078D4?style=for-the-badge&logo=visual%20studio%20code&logoColor=white) 5 | ![Github](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white) 6 | 7 | Test examples for constant and adaptive (or variable) step size implementation for the Leja-based exponential integrators can be found in *Test → Constant_test.py* or *Adaptive_test.py*. Problems considered include the Burgers' equation and the Allen-Cahn equation. To run scripts, use the following commands: `python3 Constant_test.py` or `python3 Adaptive_test.py`. To add other problems, simply define the relevant **RHS_function** and the desired initial condition(s). 8 | 9 | ## Requirements 10 | - Python 3.10 (or later) 11 | 12 | ## Remarks 13 | 1. It is expected that the rhs function is defined in the following way: 14 | 15 | ```python 16 | def RHS_function(u): 17 | 18 | ### stencil_applied_to_u = *Apply stencil to 'u'* or *Compute matrix-vector products* 19 | 20 | return stencil_applied_to_u 21 | ``` 22 | If different stencils are used for different physical phenomena (e.g. centered differences for diffusion and upwind for advection), the two stencils applied to 'u' vector are to be combined together. 23 | 24 | 2. LeXInt can be used for multidimensional problems, once the state variable(s) is(are) vectorised or flattened. 25 | 26 | 3. RHS function calls are expected to be the most expensive part of any computation. However, if the RHS function is relatively simple, or if the problem size is small, the computation of the polynomial coefficients using divided differences may become substantial. To avoid unnecessary computation of polynomial coefficients, we set the default number of Leja points (to be used) to 500 in the test problems. If you get the warning *"Warning!! Max. # of Leja points reached without convergence!!"*, consider increasing the number of Leja points to 1000, 2000, etc. or reducing the time step size (dt). 27 | -------------------------------------------------------------------------------- /CUDA/Timer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #ifdef _OPENMP 9 | #include 10 | #endif 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | namespace LeXInt 17 | { 18 | /// This timer class measures the elapsed time between two events. Timers can be 19 | /// started and stopped repeatedly. The total time as well as the average time 20 | /// between two events can be queried using the total() and average() methods, 21 | /// respectively. 22 | struct timer { 23 | timespec t_start; 24 | bool running; 25 | double elapsed = 0.0; 26 | unsigned counter; 27 | 28 | timer() { 29 | counter = 0; 30 | running = false; 31 | } 32 | 33 | void start() { 34 | clock_gettime(CLOCK_REALTIME, &t_start); 35 | running = true; 36 | } 37 | 38 | void restart() { 39 | elapsed = 0.0; 40 | counter = 0; 41 | } 42 | 43 | double stop() { 44 | if(running == false) { 45 | ::std::cout << "WARNING: timer::stop() has been called without calling timer::start() first." << ::std::endl; 46 | return 0.0; 47 | } else { 48 | timespec t_end; 49 | clock_gettime(CLOCK_REALTIME, &t_end); 50 | int sec = t_end.tv_sec-t_start.tv_sec; 51 | double nsec = ((double)(t_end.tv_nsec-t_start.tv_nsec)); 52 | if(nsec < 0.0) { 53 | nsec += 1e9; 54 | sec--; 55 | } 56 | double t = (double)sec + nsec/1e9; 57 | counter++; 58 | elapsed += t; 59 | return t; 60 | } 61 | } 62 | 63 | double total() { 64 | return elapsed; 65 | } 66 | 67 | double average() { 68 | return elapsed/double(counter); 69 | } 70 | 71 | unsigned count() { 72 | return counter; 73 | } 74 | }; 75 | } 76 | 77 | -------------------------------------------------------------------------------- /Python/Constant/Rosenbrock_Euler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def Rosenbrock_Euler(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_roseu : numpy array 33 | Output state variable(s) after time dt (2nd order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | D. A. Pope 40 | An exponential method of numerical integration of ordinary differential equations, Commun. ACM 6 (8) (1963) 491-493. 41 | doi:10.1145/366707.367592 42 | 43 | """ 44 | 45 | ###? RHS evaluated at 'u' 46 | rhs_u = RHS_function(u) 47 | 48 | ###? Array of zeros vectors 49 | zero_vec = np.zeros(np.shape(u)) 50 | 51 | ###? dt * J(u).z 52 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 53 | 54 | ###? Interpolation of RHS(u) at 1 55 | u_flux, rhs_calls, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 56 | 57 | ###? 2nd order solution; u_roseu = u + phi_1(J(u) dt) f(u) dt 58 | u_roseu = u + u_flux 59 | 60 | ###? Proxy of computational cost 61 | num_rhs_calls = rhs_calls + 2 62 | 63 | return u_roseu, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/Eigenvalues.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Thu Aug 8 20:22 2022 3 | 4 | @author: Pranab JD 5 | 6 | Description: - 7 | Functions to determine the largest eigenvalue of a 8 | matrix/related matrix. 9 | 10 | Gershgorin's disks can be used only if the matrix is 11 | explicitly available. For matrix-free implementation, 12 | choose power iterations. 13 | """ 14 | 15 | import sys 16 | import numpy as np 17 | 18 | sys.path.insert(1, "./LeXInt/Python/") 19 | from Jacobian import Jacobian 20 | 21 | def Gershgorin(A): 22 | """ 23 | Parameters 24 | ---------- 25 | A : N x N matrix 26 | 27 | Returns 28 | ------- 29 | eig_real : Largest real eigenvalue (negative magnitude) 30 | eig_imag : Largest imaginary eigenvalue 31 | 32 | """ 33 | 34 | ###? Divide matrix 'A' into Hermitian and skew-Hermitian 35 | A_Herm = (A + A.T.conj())/2 36 | A_SkewHerm = (A - A.T.conj())/2 37 | 38 | eig_real = - np.max(np.sum(abs(A_Herm), 1)) # Has to be NEGATIVE 39 | eig_imag = np.max(np.sum(abs(A_SkewHerm), 1)) 40 | 41 | return eig_real, eig_imag 42 | 43 | def Power_iteration(u, RHS_function): 44 | """ 45 | Parameters 46 | ---------- 47 | u : Input state variable(s) 48 | RHS_function : RHS function 49 | 50 | Returns 51 | ------- 52 | largest_eigen_value : Largest eigenvalue (within 2% accuracy) 53 | 3*ii : Number of RHS calls 54 | 55 | """ 56 | 57 | tol = 0.02 #? 2% tolerance 58 | niters = 1000 #? Max. number of iterations 59 | eigenvalue_ii_1 = 0 #? Eigenvalue at ii-1 60 | vector = np.ones(np.shape(u)) #? Initial estimate of eigenvector 61 | rhs_u = RHS_function(u) #? RHS evaluated at 'u' 62 | 63 | for ii in range(niters): 64 | 65 | ###? Compute new eigenvector 66 | eigenvector = Jacobian(RHS_function, u, vector, rhs_u) 67 | 68 | ###? Norm of eigenvector = eigenvalue 69 | eigenvalue = np.linalg.norm(eigenvector) 70 | 71 | ###? Normalize eigenvector to eigenvalue; new estimate of eigenvector 72 | vector = eigenvector/eigenvalue 73 | 74 | ###? Check convergence for eigenvalues (eigenvalues converge faster than eigenvectors) 75 | if (abs(eigenvalue - eigenvalue_ii_1) <= (tol*eigenvalue + tol)): 76 | largest_eigen_value = eigenvalue 77 | break 78 | 79 | ###? This value becomes the previous one 80 | eigenvalue_ii_1 = eigenvalue 81 | 82 | return largest_eigen_value, ii+1 83 | -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Time_order_plots.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Thu Nov 2 11:55 2023 3 | 4 | @author: PJD 5 | """ 6 | 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | ### ======================================================= ### 11 | 12 | ### Reference Solution 13 | path = "./Burgers/T_final_0.005/N_400_eta_10/" 14 | file_ref = path + "/EXPRB42/N_cfl_0.10/Final_data.txt" 15 | data_set_ref = np.loadtxt(file_ref) 16 | N = len(data_set_ref) 17 | 18 | def l1_error(data_set): 19 | return np.mean(abs(data_set - data_set_ref)) 20 | 21 | def compute_error(solver): 22 | 23 | file_1 = path + solver + "/N_cfl_10.00/Final_data.txt" 24 | file_2 = path + solver + "/N_cfl_20.00/Final_data.txt" 25 | file_3 = path + solver + "/N_cfl_30.00/Final_data.txt" 26 | file_4 = path + solver + "/N_cfl_40.00/Final_data.txt" 27 | file_5 = path + solver + "/N_cfl_50.00/Final_data.txt" 28 | file_6 = path + solver + "/N_cfl_60.00/Final_data.txt" 29 | 30 | data_set_1 = np.loadtxt(file_1) 31 | data_set_2 = np.loadtxt(file_2) 32 | data_set_3 = np.loadtxt(file_3) 33 | data_set_4 = np.loadtxt(file_4) 34 | data_set_5 = np.loadtxt(file_5) 35 | data_set_6 = np.loadtxt(file_6) 36 | 37 | error_A = l1_error(data_set_1) 38 | error_B = l1_error(data_set_2) 39 | error_C = l1_error(data_set_3) 40 | error_D = l1_error(data_set_4) 41 | error_E = l1_error(data_set_5) 42 | error_F = l1_error(data_set_6) 43 | 44 | error_array = np.array([error_A, error_B, error_C, error_D, error_E, error_F]) 45 | 46 | return error_array 47 | 48 | ### ======================================================= ### 49 | 50 | ### Given Data Sets 51 | solver_1 = "EXPRB32" 52 | solver_2 = "EXPRB42" 53 | solver_3 = "EPI3" 54 | solver_4 = "EPI4" 55 | 56 | error_1 = compute_error(solver_1) 57 | error_2 = compute_error(solver_2) 58 | error_3 = compute_error(solver_3) 59 | error_4 = compute_error(solver_4) 60 | 61 | ### ======================================================= ### 62 | 63 | ### Plots 64 | X = np.array([10, 20, 30, 40, 50, 60]) 65 | 66 | plt.figure(figsize = (8, 6), dpi = 200) 67 | 68 | plt.loglog(X, error_1, 'bo-', label = "EXPRB32") 69 | plt.loglog(X, error_2, 'rd-', label = "EXPRB42") 70 | plt.loglog(X, error_3, 'gH-', label = "EPI3") 71 | plt.loglog(X, error_4, 'mP-', label = "EPI4") 72 | 73 | 74 | plt.loglog(X, 6e-12*X**3, "c-", label = "O(3)") 75 | plt.loglog(X, 1e-14*X**4, "k-", label = "O(4)") 76 | 77 | plt.tick_params(axis = 'x', which = 'major', labelsize = 16) 78 | plt.tick_params(axis = 'y', which = 'major', labelsize = 16) 79 | 80 | plt.title("Temporal Order", fontsize = 20) 81 | plt.xlabel("Step size", fontsize = 16) 82 | plt.ylabel("l1 error", fontsize = 16) 83 | 84 | plt.legend(prop = {'size': 14}, ncol = 2) 85 | 86 | plt.savefig(path + "/Temporal_order.eps") 87 | 88 | ### ======================================================= ### -------------------------------------------------------------------------------- /CUDA/Integrators/Rosenbrock_Euler.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "../Leja.hpp" 4 | #include "../Phi_functions.hpp" 5 | 6 | namespace LeXInt 7 | { 8 | //? Phi functions interpolated on real Leja points 9 | template 10 | void Ros_Eu(rhs& RHS, //? RHS function 11 | double* u, //? Input state variable(s) 12 | double* u_exprb2, //? Output state variable(s) 13 | double* auxiliary_expint, //? Internal auxiliary variables 14 | double* auxiliary_Leja, //? Internal auxiliary variables (Leja) 15 | size_t N, //? Number of grid points 16 | vector& Leja_X, //? Array of Leja points 17 | double c, //? Shifting factor 18 | double Gamma, //? Scaling factor 19 | double rtol, //? Relative tolerance (normalised desired accuracy) 20 | double atol, //? Absolute tolerance 21 | double dt, //? Step size 22 | int& iters, //? # of iterations needed to converge (iteration variable) 23 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 24 | GPU_handle& cublas_handle //? CuBLAS handle 25 | ) 26 | { 27 | //* ------------------------------------------------------------------------- 28 | 29 | //! u, u_exprb2, auxiliary_expint, auxiliary_Leja, and auxiliary_NL 30 | //! are device vectors if GPU support is activated. 31 | 32 | //* Returns 33 | //* ---------- 34 | //* u_exprb2 : double* 35 | //* 2nd order solution after time dt 36 | //* 37 | //* 38 | //* Reference: 39 | //* D. A. Pope, An exponential method of numerical integration of ordinary differential equations, Commun. ACM 6 (8) (1963) 491-493. 40 | //* doi:10.1145/366707.367592 41 | 42 | //* ------------------------------------------------------------------------- 43 | 44 | //? Assign names and variables 45 | double* f_u = &auxiliary_expint[0]; 46 | 47 | //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt 48 | RHS(u, f_u); 49 | axpby(dt, f_u, f_u, N, GPU); 50 | 51 | //? Interpolation of RHS(u) at 1; phi_1(J(u) dt) f(u) dt 52 | real_Leja_phi(RHS, u, f_u, u_exprb2, auxiliary_Leja, N, {1.0}, 53 | phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters, GPU, cublas_handle); 54 | 55 | //? 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt 56 | axpby(1.0, u, 1.0, u_exprb2, u_exprb2, N, GPU); 57 | } 58 | } -------------------------------------------------------------------------------- /CUDA/Eigenvalues.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Kernels_CUDA_Cpp.hpp" 4 | #include "Jacobian_vector.hpp" 5 | 6 | namespace LeXInt 7 | { 8 | //! Power Iterations 9 | template 10 | void Power_iterations(rhs& RHS, //? RHS function 11 | double* u, //? Input state variable(s) 12 | size_t N, //? Number of grid points 13 | double& largest_eigenvalue, //? Largest eigenvalue (output) 14 | double* auxiliary, //? Internal auxiliary variables (Jv) 15 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 16 | GPU_handle& cublas_handle //? CuBLAS handle 17 | ) 18 | { 19 | double tol = 0.01; //? 1% tolerance 20 | double eigenvalue_ii = 0.0; //? Eigenvalue at ii 21 | double eigenvalue_ii_1 = 0.0; //? Eigenvalue at ii-1 22 | int niters = 1000; //? Max. number of iterations 23 | 24 | //? Allocate memory for internal vectors 25 | double* init_vector = &auxiliary[0]; 26 | double* eigenvector = &auxiliary[N]; 27 | double* auxiliary_Jv = &auxiliary[2*N]; 28 | 29 | //? Set initial estimate of eigenvector = 1.0 30 | eigen_ones(init_vector, N, GPU); 31 | 32 | //? Iterate untill convergence is reached 33 | for (int ii = 0; ii < niters; ii++) 34 | { 35 | //? Compute new eigenvector 36 | Jacobian_vector(RHS, u, init_vector, eigenvector, auxiliary_Jv, N, GPU, cublas_handle); 37 | 38 | //? Norm of eigenvector = eigenvalue 39 | eigenvalue_ii = l2norm(eigenvector, N, GPU, cublas_handle)/sqrt(N); 40 | 41 | //? Normalize eigenvector to eigenvalue; new estimate of eigenvector 42 | axpby(1.0/eigenvalue_ii, eigenvector, init_vector, N, GPU); 43 | 44 | //? Check convergence for eigenvalues (eigenvalues converge faster than eigenvectors) 45 | if (abs(eigenvalue_ii - eigenvalue_ii_1) <= (tol * eigenvalue_ii) + tol) 46 | { 47 | #ifdef __CUDACC__ 48 | //! Error Check 49 | cudaDeviceSynchronize(); 50 | gpuErrchk(cudaPeekAtLastError()); 51 | #endif 52 | 53 | //! Returns the largest eigenvalue in magnitude (needs to multiplied to a safety factor) 54 | largest_eigenvalue = eigenvalue_ii; 55 | break; 56 | } 57 | 58 | //? This value becomes the previous one 59 | eigenvalue_ii_1 = eigenvalue_ii; 60 | } 61 | } 62 | } -------------------------------------------------------------------------------- /Python/Constant/EPI3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EPI3(u, u_prev, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) at the current time step (n) 15 | u_prev : numpy array 16 | State variable(s) at the previous time step (n - 1) 17 | T_final : double 18 | Step size 19 | RHS_function : user-defined function 20 | RHS function 21 | c : double 22 | Shifting factor 23 | Gamma : double 24 | Scaling factor 25 | Leja_X : numpy array 26 | Array of Leja points 27 | tol : double 28 | Accuracy of the polynomial so formed 29 | Real_Imag : int 30 | 0 - Real, 1 - Imaginary 31 | 32 | Returns 33 | ------- 34 | u_epi3 : numpy array 35 | Output state variable(s) after time T_final (3rd order) 36 | num_rhs_calls : int 37 | # of RHS calls 38 | 39 | Reference: 40 | 41 | M. Tokman 42 | Efficient integration of large stiff systems of ODEs with exponential propagation iterative (EPI) methods, J. Comput. Phys. 213 (2) (2006) 748-776 43 | doi:10.1016/j.jcp.2005.08.032 44 | 45 | """ 46 | 47 | ###? RHS evaluated at 'u' 48 | rhs_u = RHS_function(u) 49 | 50 | ###? Array of zeros vectors 51 | zero_vec = np.zeros(np.shape(u)) 52 | 53 | ###? dt * J(u).z 54 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 55 | 56 | ###? J(u) . u 57 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 58 | 59 | ###? Difference of nonlinear remainders at u^{n-1} 60 | R_1 = (RHS_function(u_prev) - Jacobian(RHS_function, u, u_prev, rhs_u)) - (rhs_u - Jacobian_u) 61 | 62 | ###? Interpolation 1; phi_1(J(u) dt) f(u) dt + 2/3 phi_2(J(u) dt) R(u^{n-1}) dt 63 | u_flux, rhs_calls, substeps = linear_phi([zero_vec, rhs_u*T_final, 2/3*R_1*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 64 | 65 | ###? Internal stage; 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + 2/3 phi_2(J(u) dt) R(u^{n-1}) dt 66 | u_epi3 = u + u_flux 67 | 68 | ###? Proxy of computational cost 69 | num_rhs_calls = rhs_calls + 4 70 | 71 | return u_epi3, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/Variable/EXPRB32.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EXPRB32(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_exprb2 : numpy array 33 | Output state variable(s) after time dt (2nd order) 34 | u_exprb3 : numpy array 35 | Output state variable(s) after time dt (3rd order) 36 | num_rhs_calls : int 37 | # of RHS calls 38 | 39 | Reference: 40 | 41 | M. Hochbruck, A. Ostermann, Exponential Integrators, Acta Numer. 19 (2010) 209-286. 42 | doi:10.1017/S0962492910000048 43 | 44 | """ 45 | 46 | ###? RHS evaluated at 'u' 47 | rhs_u = RHS_function(u) 48 | 49 | ###? Array of zeros vectors 50 | zero_vec = np.zeros(np.shape(u)) 51 | 52 | ###? dt * J(u).z 53 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 54 | 55 | ###? Interpolation 1; phi_1(J(u) dt) f(u) dt 56 | u_flux, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 57 | 58 | ###? Internal stage; 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt 59 | u_exprb2 = u + u_flux 60 | 61 | ###? Difference of nonlinear remainders at u_exprb2 62 | R_a = (RHS_function(u_exprb2) - Jacobian(RHS_function, u, u_exprb2, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 63 | 64 | ###? Interpolation 2; phi_3(J(u) dt) R(a) dt 65 | u_nl, rhs_calls_2 = linear_phi([zero_vec, zero_vec, zero_vec, 2*R_a*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 66 | 67 | ###? 3rd order solution; u_3 = u_2 + 2 phi_3(J(u) dt) R(a) dt 68 | u_exprb3 = u_exprb2 + u_nl 69 | 70 | ###? Proxy of computational cost 71 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + 5 72 | 73 | return u_exprb2, u_exprb3, num_rhs_calls -------------------------------------------------------------------------------- /Python/Constant/EXPRB42.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EXPRB42(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_exprb4 : numpy array 33 | Output state variable(s) after time dt (4th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | V. T. Luan 40 | Fourth-order two-stage explicit exponential integrators for time-dependent PDEs, Appl. Numer. Math. 112 (2017) 91-103 41 | doi:10.1016/j.apnum.2016.10.008 42 | 43 | """ 44 | 45 | ###? RHS evaluated at 'u' 46 | rhs_u = RHS_function(u) 47 | 48 | ###? Array of zeros vectors 49 | zero_vec = np.zeros(np.shape(u)) 50 | 51 | ###? dt * J(u).z 52 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 53 | 54 | ###? J(u) . u 55 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 56 | 57 | ###? Interpolation of RHS(u) at 3/4; 3/4 phi_1(3/4 J(u) dt) f(u) dt 58 | u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 3/4, c, Gamma, Leja_X, tol) 59 | 60 | ###? Internal stage 1; a = u + 3/4 phi_1(3/4 J(u) dt) f(u) dt 61 | a = u + u_flux_1 62 | 63 | ###? Difference of nonlinear remainders at a 64 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u) 65 | 66 | ###? Interpolation 2: phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) R(a) dt 67 | u_flux, rhs_calls_2, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, 32/9*R_a*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 68 | 69 | ###? 3rd order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + 32/9 phi_3(J(u) dt) R(a) dt 70 | u_exprb4 = u + u_flux 71 | 72 | ###? Proxy of computational cost 73 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + 4 74 | 75 | return u_exprb4, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/linear_phi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from real_Leja_linear_exp import real_Leja_linear_exp 3 | 4 | def A_tilde(A, B, v): 5 | """ 6 | Form the augmented matrix. 7 | 8 | Args: 9 | A (function handle R^n -> R^n) 10 | B (matrix, n*p) 11 | v (vector, n+p) 12 | 13 | Returns: 14 | y (vector) : A_tilde . v, where A_tilde = [A B; 0 K] and K = [0 I; 0 0] 15 | 16 | 17 | Reference: 18 | 19 | R.B. Sidje, Expokit: A Software Package for Computing Matrix Exponentials, ACM Trans. Math. Softw. 24 (1) (1998) 130 - 156. 20 | doi:10.1145/285861.285868 21 | 22 | """ 23 | 24 | [p, n] = np.shape(B) 25 | 26 | y = np.concatenate([A(v[0:n]).reshape(1, n) + np.dot(v[n:n+p].reshape(1, p), B), 27 | [v[n+1:n+p]], 28 | np.array([0]).reshape(1, 1)], axis = 1) 29 | 30 | return y.reshape(np.shape(y)[1]) 31 | 32 | 33 | def linear_phi(interp_vector, T_final, substeps, Jacobian_vector, integrator_coeff, c, Gamma, Leja_X, tol): 34 | """ 35 | Evaluates a linear combinaton of the phi functions as the 36 | exponential of an augmented matrix. 37 | 38 | polynomial[0:n] = phi_0(A) u(:, 1) + phi_1(A) u(:, 2) + ... + phi_p(A) u(:, p+1) 39 | 40 | Args: 41 | interp_vector (vector n*(p+1)) : Vector to evaluated/interpolated 42 | dt (double) : Step size 43 | Jacobian_vector (function handle) : Jacobian-vector product (multiplied by dt) 44 | c (double) : Shifting factor 45 | Gamma (double) : Scaling factor 46 | Leja_X (vector) : Array of Leja points 47 | tol (double) : Accuracy of the polynomial so formed 48 | 49 | Returns: 50 | polynomial[0:n] (vector) : Linear combinaton of the phi functions 51 | 52 | 53 | Reference: 54 | 55 | R.B. Sidje, Expokit: A Software Package for Computing Matrix Exponentials, ACM Trans. Math. Softw. 24 (1) (1998) 130 - 156. 56 | doi:10.1145/285861.285868 57 | 58 | """ 59 | 60 | ############## --------------------- ############## 61 | 62 | ###TODO: Interpolate on either real Leja or imaginary Leja points 63 | # if Real_Imag == 0: 64 | # Leja_phi = real_Leja_phi 65 | # elif Real_Imag == 1: 66 | # Leja_phi = imag_Leja_phi 67 | # else: 68 | # print("Error!! Choose 0 for real or 1 for imaginary Leja points.") 69 | 70 | ############## --------------------- ############## 71 | 72 | [m, n] = np.shape(interp_vector) 73 | B = np.flipud(interp_vector[1:m]) 74 | p = m - 1 75 | 76 | Atx = lambda x: A_tilde(Jacobian_vector, B, x) 77 | 78 | v = np.concatenate([interp_vector[0], np.zeros(p-1), [1]]) 79 | 80 | polynomial, rhs_calls, substeps = real_Leja_linear_exp(v, T_final, substeps, Atx, integrator_coeff, c, Gamma, Leja_X, tol) 81 | 82 | return polynomial[0:n], rhs_calls, substeps -------------------------------------------------------------------------------- /Python/Constant/EXPRB32.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EXPRB32(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_exprb2 : numpy array 33 | Output state variable(s) after time dt (2nd order) 34 | u_exprb3 : numpy array 35 | Output state variable(s) after time dt (3rd order) 36 | num_rhs_calls : int 37 | # of RHS calls 38 | 39 | Reference: 40 | 41 | M. Hochbruck and A. Ostermann 42 | Exponential Integrators, Acta Numer. 19 (2010) 209-286 43 | doi:10.1017/S0962492910000048 44 | 45 | """ 46 | 47 | ###? RHS evaluated at 'u' 48 | rhs_u = RHS_function(u) 49 | 50 | ###? Array of zeros vectors 51 | zero_vec = np.zeros(np.shape(u)) 52 | 53 | ###? dt * J(u).z 54 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 55 | 56 | ###? J(u) . u 57 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 58 | 59 | ###? Interpolation 1; phi_1(J(u) dt) f(u) dt 60 | u_flux, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 61 | 62 | ###? Internal stage; 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt 63 | u_exprb2 = u + u_flux 64 | 65 | ###? Difference of nonlinear remainders at u_exprb2 66 | R_a = (RHS_function(u_exprb2) - Jacobian(RHS_function, u, u_exprb2, rhs_u)) - (rhs_u - Jacobian_u) 67 | 68 | ###? Interpolation 2; phi_3(J(u) dt) R(a) dt 69 | u_nl, rhs_calls_2, substeps = linear_phi([zero_vec, zero_vec, zero_vec, 2*R_a*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 70 | 71 | ###? 3rd order solution; u_3 = u_2 + 2 phi_3(J(u) dt) R(a) dt 72 | u_exprb3 = u_exprb2 + u_nl 73 | 74 | ###? Proxy of computational cost 75 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + 4 76 | 77 | return u_exprb3, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/imag_Leja_exp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Divided_Difference import Divided_Difference 3 | 4 | def imag_Leja_exp(u, dt, RHS_function, c, Gamma, Leja_X, tol): 5 | """ 6 | Computes the polynomial interpolation of matrix exponential applied to 'u' at imaginary Leja points. 7 | 8 | 9 | Parameters 10 | ---------- 11 | u : numpy array 12 | State variable(s) 13 | dt : double 14 | Step size 15 | RHS_function : user-defined function 16 | RHS function 17 | c : double 18 | Shifting factor 19 | Gamma : double 20 | Scaling factor 21 | Leja_X : numpy array 22 | Array of Leja points 23 | tol : double 24 | Accuracy of the polynomial so formed 25 | 26 | Returns 27 | ---------- 28 | polynomial : numpy array 29 | Polynomial interpolation of 'u' multiplied 30 | by the matrix exponential at imaginary Leja points 31 | ii+1 : int 32 | # of RHS calls 33 | 34 | """ 35 | 36 | ### Matrix exponential (scaled and shifted) 37 | matrix_exponential = np.exp(dt * (c + Gamma*Leja_X) * 1j) 38 | 39 | ### Compute polynomial coefficients 40 | coeffs = Divided_Difference(Leja_X, matrix_exponential) 41 | 42 | ### Form the polynomial: p_0 term 43 | polynomial = coeffs[0] * u + 0*1j 44 | 45 | ### p_1, p_2, ...., p_n terms 46 | max_Leja_pts = len(Leja_X) # Max # of Leja points 47 | y = u.copy() + 0*1j # To avoid changing 'u' 48 | 49 | ### Iterate until convergence is reached 50 | for ii in range(1, max_Leja_pts): 51 | 52 | ### Compute numerical Jacobian (for linear eqs., this is the RHS evaluation at y) 53 | Jacobian_function = RHS_function(y) 54 | 55 | ### y = y * ((z - c)/Gamma - Leja_X) 56 | y = (-1j * Jacobian_function/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1])) 57 | 58 | ### Error estimate 59 | poly_error = np.linalg.norm(y) * abs(coeffs[ii]) 60 | 61 | ### Add the new term to the polynomial 62 | polynomial = polynomial + (coeffs[ii] * y) 63 | 64 | ### If new term to be added < tol, break loop; safety factor = 0.25 65 | if poly_error < 0.25*tol*np.linalg.norm(polynomial): 66 | break 67 | 68 | ### Warning flags 69 | if ii == max_Leja_pts - 1: 70 | print("Warning!! Max. # of Leja points reached without convergence!! Try increasing the number of Leja points. Max available: 10000.") 71 | break 72 | 73 | return np.real(polynomial), ii 74 | -------------------------------------------------------------------------------- /Python/real_Leja_exp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Divided_Difference import Divided_Difference 3 | 4 | def real_Leja_exp(u, dt, RHS_function, c, Gamma, Leja_X, tol): 5 | """ 6 | Computes the polynomial interpolation of matrix exponential applied to 'u' at real Leja points. 7 | 8 | 9 | Parameters 10 | ---------- 11 | u : numpy array 12 | State variable(s) 13 | dt : double 14 | Step size 15 | RHS_function : user-defined function 16 | RHS function 17 | c : double 18 | Shifting factor 19 | Gamma : double 20 | Scaling factor 21 | Leja_X : numpy array 22 | Array of Leja points 23 | tol : double 24 | Accuracy of the polynomial so formed 25 | 26 | Returns 27 | ---------- 28 | polynomial : numpy array 29 | Polynomial interpolation of 'u' multiplied 30 | by the matrix exponential at real Leja points 31 | ii : int 32 | # of Leja points used 33 | 34 | """ 35 | 36 | ###? Initialize parameters and arrays 37 | max_Leja_pts = len(Leja_X) #* Max number of Leja points 38 | y = u.copy() #* To avoid changing 'interp_function' 39 | 40 | ###? Matrix exponential (scaled and shifted) 41 | matrix_exponential = np.exp(dt * (c + Gamma*Leja_X)) 42 | 43 | ###? Compute polynomial coefficients 44 | poly_coeffs = Divided_Difference(Leja_X, matrix_exponential) 45 | 46 | ###? Form the polynomial: 1st term (p_0) 47 | polynomial = poly_coeffs[0] * u 48 | 49 | ###? p_1, p_2, ...., p_n terms; iterate until converges 50 | for ii in range(1, max_Leja_pts): 51 | 52 | ###? y = y * ((z - c)/Gamma - Leja_X) 53 | y = (RHS_function(y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1])) 54 | 55 | ###? Error estimate; poly_error = |coeffs[nn]| ||y|| 56 | poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii]) 57 | 58 | ###? Add the new term to the polynomial 59 | polynomial = polynomial + (poly_coeffs[ii] * y) 60 | 61 | ###? If new term to be added < tol, break loop 62 | if poly_error < tol*np.linalg.norm(polynomial): 63 | # print("Converged! # of Leja points used (exp): ", ii) 64 | break 65 | 66 | ###! Warning flags 67 | if ii == max_Leja_pts - 1: 68 | print("Warning!! Max. # of Leja points reached without convergence!!") 69 | print("Max. Leja points currently set to", max_Leja_pts) 70 | print("Try increasing the number of Leja points. Max available: 10000.\n") 71 | break 72 | 73 | return polynomial, ii -------------------------------------------------------------------------------- /Python/real_Leja_phi_nl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Divided_Difference import Divided_Difference 3 | 4 | def real_Leja_phi_nl(u, dt, RHS_function, c, Gamma, Leja_X, phi_function, tol): 5 | """ 6 | Computes the polynomial interpolation of phi_function applied to 'u' at real Leja points. 7 | 8 | 9 | Parameters 10 | ---------- 11 | u : numpy array 12 | Vector multiplied to phi function 13 | dt : double 14 | Step size 15 | RHS_function : user-defined function 16 | RHS function 17 | 18 | c : double 19 | Shifting factor 20 | Gamma : double 21 | Scaling factor 22 | Leja_X : numpy array 23 | Array of Leja points 24 | phi_function : function 25 | Phi function (typically phi_1) 26 | tol : double 27 | Accuracy of the polynomial so formed 28 | 29 | Returns 30 | ---------- 31 | polynomial : numpy array 32 | Polynomial interpolation of 'u' multiplied 33 | to phi_function at real Leja points 34 | ii : int 35 | # of Leja points used 36 | 37 | """ 38 | 39 | ###? Initialize parameters and arrays 40 | max_Leja_pts = len(Leja_X) #* Max number of Leja points 41 | y = u.copy() #* To avoid changing 'interp_function' 42 | 43 | ###? Phi function applied to 'interp_function' (scaled and shifted) 44 | phi_function_array = phi_function(dt * (c + Gamma*Leja_X)) 45 | 46 | ###? Compute polynomial coefficients 47 | poly_coeffs = Divided_Difference(Leja_X, phi_function_array) 48 | 49 | ###? Form the polynomial: 1st term (p_0) 50 | polynomial = poly_coeffs[0] * u 51 | 52 | ###? p_1, p_2, ...., p_n terms; iterate until converges 53 | for ii in range(1, max_Leja_pts): 54 | 55 | ###? y = y * ((z - c)/Gamma - Leja_X) 56 | y = (RHS_function(y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1])) 57 | 58 | ###? Error estimate; poly_error = |coeffs[nn]| ||y|| 59 | poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii]) 60 | 61 | ###? Add the new term to the polynomial 62 | polynomial = polynomial + (poly_coeffs[ii] * y) 63 | 64 | ###? If new term to be added < tol, break loop 65 | if poly_error < tol*np.linalg.norm(polynomial): 66 | # print("Converged! # of Leja points used (phi nl): ", ii) 67 | break 68 | 69 | ###! Warning flags 70 | if ii == max_Leja_pts - 1: 71 | print("Warning!! Max. # of Leja points reached without convergence!!") 72 | print("Max. Leja points currently set to", max_Leja_pts) 73 | print("Try increasing the number of Leja points. Max available: 10000.\n") 74 | break 75 | 76 | return polynomial, ii -------------------------------------------------------------------------------- /Python/Constant/EPI4.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EPI4(u, u_prev, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) at the current time step (n) 15 | u_prev : numpy array 16 | State variable(s) at the 2 previous time steps (n - 1, n - 2) 17 | T_final : double 18 | Step size 19 | RHS_function : user-defined function 20 | RHS function 21 | c : double 22 | Shifting factor 23 | Gamma : double 24 | Scaling factor 25 | Leja_X : numpy array 26 | Array of Leja points 27 | tol : double 28 | Accuracy of the polynomial so formed 29 | Real_Imag : int 30 | 0 - Real, 1 - Imaginary 31 | 32 | Returns 33 | ------- 34 | u_epi4 : numpy array 35 | Output state variable(s) after time T_final (4th order) 36 | num_rhs_calls : int 37 | # of RHS calls 38 | 39 | Reference: 40 | 41 | S. Gaudreault, M. Charron, V. Dallerit, and M. Tokman 42 | High-order numerical solutions to the shallow-water equations on the rotated cubed-sphere grid, J. Comput. Phys. 449 (2022) 110792. 43 | doi:10.1016/j.jcp.2021.110792 44 | 45 | """ 46 | 47 | ###? RHS evaluated at 'u' 48 | rhs_u = RHS_function(u) 49 | 50 | ###? Array of zeros vectors 51 | zero_vec = np.zeros(np.shape(u)) 52 | 53 | ###? dt * J(u).z 54 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 55 | 56 | ###? J(u) . u 57 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 58 | 59 | ###? EPI4 coefficients 60 | a21 = -3/10; a22 = 3/40 61 | a31 = 32/5; a32 = -11/10 62 | 63 | ###? Difference of nonlinear remainders at u^{n-1} and u^{n-2} 64 | R_1 = (RHS_function(u_prev[:, 0]) - Jacobian(RHS_function, u, u_prev[:, 0], rhs_u)) - (rhs_u - Jacobian_u) 65 | R_2 = (RHS_function(u_prev[:, 1]) - Jacobian(RHS_function, u, u_prev[:, 1], rhs_u)) - (rhs_u - Jacobian_u) 66 | 67 | ###? Interpolation 1; phi_1(J(u) dt) f(u) dt + phi_2(J(u) dt) (a21 R(u^{n-1}) + a22 R(u^{n-2})) dt + phi_3(J(u) dt) (a31 R(u^{n-1}) + a32 R(u^{n-2})) dt 68 | u_flux, rhs_calls, substeps = linear_phi([zero_vec, rhs_u*T_final, (a21*R_1+a22*R_2)*T_final, (a31*R_1+a32*R_2)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 69 | 70 | ###? Internal stage; 4th order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_2(J(u) dt) (a21 R(u^{n-1}) + a22 R(u^{n-2})) dt + phi_3(J(u) dt) (a31 R(u^{n-1}) + a32 R(u^{n-2})) dt 71 | u_epi4 = u + u_flux 72 | 73 | ###? Proxy of computational cost 74 | num_rhs_calls = rhs_calls + 6 75 | 76 | return u_epi4, num_rhs_calls, substeps -------------------------------------------------------------------------------- /CUDA/Phi_functions.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "functions.hpp" 4 | 5 | //? Phi Functions ('z' is assumed to a double) 6 | 7 | namespace LeXInt 8 | { 9 | double phi_1(double z) 10 | { 11 | double phi_1_value; 12 | 13 | if (abs(z) <= 1e-7) 14 | { 15 | phi_1_value = 1./factorial(1) + z * (1./factorial(2) + z * (1./factorial(3) + \ 16 | z * (1./factorial(4) + z * (1./factorial(5) + \ 17 | z * (1./factorial(6) + z * (1./factorial(7) + \ 18 | z * (1./factorial(8) + z * (1./factorial(9) + \ 19 | z * (1./factorial(10) + z * (1./factorial(11))))))))))); 20 | } 21 | else 22 | { 23 | phi_1_value = (exp(z) - 1)/z; 24 | } 25 | 26 | return phi_1_value; 27 | } 28 | 29 | 30 | double phi_2(double z) 31 | { 32 | double phi_2_array; 33 | 34 | if (abs(z) <= 1e-6) 35 | { 36 | phi_2_array = 1./factorial(2) + z * (1./factorial(3) + z * (1./factorial(4) + \ 37 | z * (1./factorial(5) + z * (1./factorial(6) + \ 38 | z * (1./factorial(7) + z * (1./factorial(8) + \ 39 | z * (1./factorial(9) + z * (1./factorial(10) + \ 40 | z * (1./factorial(11) + z * (1./factorial(12))))))))))); 41 | } 42 | else 43 | { 44 | phi_2_array = (exp(z) - z - 1)/(z*z); 45 | } 46 | 47 | return phi_2_array; 48 | } 49 | 50 | 51 | double phi_3(double z) 52 | { 53 | double phi_3_array; 54 | 55 | if (abs(z) <= 1e-5) 56 | { 57 | phi_3_array = 1./factorial(3) + z * (1./factorial(4) + z * (1./factorial(5) + \ 58 | z * (1./factorial(6) + z * (1./factorial(7) + \ 59 | z * (1./factorial(8) + z * (1./factorial(9) + \ 60 | z * (1./factorial(10) + z * (1./factorial(11) + \ 61 | z * (1./factorial(12) + z * (1./factorial(13))))))))))); 62 | } 63 | else 64 | { 65 | phi_3_array = (exp(z) - (z*z)/2 - z - 1)/(z*z*z); 66 | } 67 | 68 | return phi_3_array; 69 | } 70 | 71 | 72 | double phi_4(double z) 73 | { 74 | double phi_4_array; 75 | 76 | if (abs(z) <= 1e-4) 77 | { 78 | phi_4_array = 1./factorial(4) + z * (1./factorial(5) + z * (1./factorial(6) + \ 79 | z * (1./factorial(7) + z * (1./factorial(8) + \ 80 | z * (1./factorial(9) + z * (1./factorial(10) + \ 81 | z * (1./factorial(11) + z * (1./factorial(12) + \ 82 | z * (1./factorial(13) + z * (1./factorial(14))))))))))); 83 | } 84 | else 85 | { 86 | phi_4_array = (exp(z) - (z*z*z)/6 - (z*z)/2 - z - 1)/(z*z*z*z); 87 | } 88 | 89 | return phi_4_array; 90 | } 91 | } -------------------------------------------------------------------------------- /Python/Constant/EPIRK4s3B.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EPIRK4s3B(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_epirk4 : numpy array 33 | Output state variable(s) after time dt (4th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | G. Rainwater and M. Tokman 40 | A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309 41 | doi:10.1016/j.jcp.2016.07.026 42 | 43 | """ 44 | 45 | ###? RHS evaluated at 'u' 46 | rhs_u = RHS_function(u) 47 | 48 | ###? Array of zeros vectors 49 | zero_vec = np.zeros(np.shape(u)) 50 | 51 | ###? dt * J(u).z 52 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 53 | 54 | ###? Interpolation of RHS(u) at 1/2 and 3/4; phi_2({1/2, 3/4} J(u) dt) f(u) dt 55 | u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol) 56 | u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 3/4, c, Gamma, Leja_X, tol) 57 | 58 | ###? Internal stage 1; a = u + 2/3 phi_2(1/2 J(u) dt) f(u) dt 59 | a = u + (2/3 * 2 * u_flux_1) 60 | 61 | ###? Internal stage 2; b = u + phi_2(3/4 J(u) dt) f(u) dt 62 | b = u + (4/3 * u_flux_2) 63 | 64 | ###? Difference of nonlinear remainders at a and b 65 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 66 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 67 | 68 | ###? phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32*R(a) - 27/2*R(b)) dt + phi_4(J(u) dt) (-144*R(a) + 81*R(b)) dt 69 | u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (54*R_a - 16*R_b)*T_final, (-324*R_a + 144*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 70 | 71 | ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (54R(a) - 16R(b)) dt + phi_4(J(u) dt) (-324R(a) + 144R(b)) dt 72 | u_epirk4 = u + u_flux 73 | 74 | ###? Proxy of computational cost 75 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 8 76 | 77 | return u_epirk4, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/Constant/EPIRK4s3A.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EPIRK4s3A(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_epirk4 : numpy array 33 | Output state variable(s) after time dt (4th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | G. Rainwater and M. Tokman 40 | A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309 41 | doi:10.1016/j.jcp.2016.07.026 42 | 43 | """ 44 | 45 | ###? RHS evaluated at 'u' 46 | rhs_u = RHS_function(u) 47 | 48 | ###? Array of zeros vectors 49 | zero_vec = np.zeros(np.shape(u)) 50 | 51 | ###? dt * J(u).z 52 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 53 | 54 | ###? J(u) . u 55 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 56 | 57 | ###? Interpolations 1 & 2; {1/2, 2/3} phi_1({1/2, 2/3} J(u) dt) f(u) dt 58 | u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol) 59 | u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 2/3, c, Gamma, Leja_X, tol) 60 | 61 | ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt 62 | a = u + u_flux_1 63 | 64 | ###? Internal stage 2; b = u + 2/3 phi_1(2/3 J(u) dt) f(u) dt 65 | b = u + u_flux_2 66 | 67 | ###? Difference of nonlinear remainders at a and b 68 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u) 69 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u) 70 | 71 | ###? Interpolation 2; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32*R(a) - 27/2*R(b)) dt + phi_4(J(u) dt) (-144*R(a) + 81*R(b)) dt 72 | u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (32*R_a-27/2*R_b)*T_final, (-144*R_a+81*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 73 | 74 | ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32R(a) - (27/2)R(b)) dt + phi_4(J(u) dt) (-144R(a) + 81R(b)) dt 75 | u_epirk4 = u + u_flux 76 | 77 | ###? Proxy of computational cost 78 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 6 79 | 80 | return u_epirk4, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/Constant/EXPRB43.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EXPRB43(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_exprb4 : numpy array 33 | Output state variable(s) after time dt (4th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | M. Hochbruck and A. Ostermann 40 | Exponential Integrators, Acta Numer. 19 (2010) 209-286 41 | doi:10.1017/S0962492910000048 42 | 43 | """ 44 | 45 | ###? RHS evaluated at 'u' 46 | rhs_u = RHS_function(u) 47 | 48 | ###? Array of zeros vectors 49 | zero_vec = np.zeros(np.shape(u)) 50 | 51 | ###? dt * J(u).z 52 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 53 | 54 | ###? J(u) . u 55 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 56 | 57 | ###? Interpolation 1; 1/2 phi_1(1/2 J(u) dt) f(u) dt 58 | u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol) 59 | 60 | ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt 61 | a = u + u_flux_1 62 | 63 | ###? Difference of nonlinear remainder at a 64 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u) 65 | 66 | ###? Interpolation 2; phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt 67 | u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, (rhs_u + R_a)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 68 | 69 | ###? Internal stage 2; b = u + phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt 70 | b = u + u_flux_2 71 | 72 | ###? Nonlinear remainder at b 73 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u) 74 | 75 | ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt + phi_4(J(u) dt) (-48R(a) + 12R(b)) dt 76 | u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (16*R_a-2*R_b)*T_final, (-48*R_a+12*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 77 | 78 | ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt + phi_4(J(u) dt) (-48R(a) + 12R(b)) dt 79 | u_exprb4 = u + u_flux 80 | 81 | ###? Proxy of computational cost 82 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 6 83 | 84 | return u_exprb4, num_rhs_calls, substeps -------------------------------------------------------------------------------- /CUDA/Jacobian_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Timer.hpp" 4 | #include "Kernels_CUDA_Cpp.hpp" 5 | 6 | namespace LeXInt 7 | { 8 | //? J(u) * y = (F(u + epsilon*y) - F(u - epsilon*y))/(2*epsilon) 9 | template 10 | void Jacobian_vector(rhs& RHS, //? RHS function 11 | double* u, //? Input state variable(s) 12 | double* y, //? Vector to be multiplied to Jacobian 13 | double* Jac_vec, //? Output Jacobian-vector product 14 | double* auxiliary_Jv, //? Internal auxiliary variables 15 | size_t N, //? Number of grid points 16 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 17 | GPU_handle& cublas_handle //? CuBLAS handle 18 | ) 19 | { 20 | //! This function has 10 + (3 * RHS_read_writes) vector reads and writes. 21 | 22 | //? Assign names and variables 23 | double* f_u = &Jac_vec[0]; double* u_eps = &Jac_vec[0]; 24 | double* rhs_u_eps_1 = &auxiliary_Jv[0]; 25 | double* rhs_u_eps_2 = &auxiliary_Jv[N]; 26 | 27 | //? RHS evaluated at 'u'; f_u = RHS(u) 28 | RHS(u, f_u); 29 | 30 | //? epsilon ~ 1e-7 (normalised) 31 | double rhs_norm = l2norm(f_u, N, GPU, cublas_handle)/sqrt(N); 32 | double epsilon = 1e-7*rhs_norm; 33 | 34 | //? u_eps = u + epsilon*y 35 | axpby(1.0, u, epsilon, y, u_eps, N, GPU); 36 | 37 | //? rhs_u_eps_1 = RHS(u + epsilon*y) 38 | RHS(u_eps, rhs_u_eps_1); 39 | 40 | //? u_eps = u - epsilon*y 41 | axpby(1.0, u, -epsilon, y, u_eps, N, GPU); 42 | 43 | //? rhs_u_eps_2 = RHS(u - epsilon*y) 44 | RHS(u_eps, rhs_u_eps_2); 45 | 46 | //? Jac_vec = J(u) * y = (RHS(u + epsilon*y) - RHS(u - epsilon*y))/(2*epsilon) 47 | axpby(1.0/(2.0*epsilon), rhs_u_eps_1, -1.0/(2.0*epsilon), rhs_u_eps_2, Jac_vec, N, GPU); 48 | } 49 | 50 | //? F(y) = f(y) - (J(u) * y) 51 | template 52 | void Nonlinear_remainder(rhs& RHS, //? RHS function 53 | double* u, //? Input state variable(s) 54 | double* y, //? Vector to be multiplied to Jacobian 55 | double* Nonlinear_y, //? Output nonlinear remainder 56 | double* auxiliary_Jv, //? Internal auxiliary variables for Jacobian-vector 57 | size_t N, //? Number of grid points 58 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 59 | GPU_handle& cublas_handle //? CuBLAS handle 60 | ) 61 | { 62 | //! This function has 13 + (4 * RHS_read_writes) vector reads and writes. 63 | 64 | //? Assign names and variables 65 | double* Linear_y = &auxiliary_Jv[0]; 66 | double* Jv = &auxiliary_Jv[N]; 67 | double* f_y = &auxiliary_Jv[3*N]; 68 | 69 | //? J(u) * y = (F(u + epsilon*y) - F(u - epsilon*y))/(2*epsilon) 70 | Jacobian_vector(RHS, u, y, Linear_y, Jv, N, GPU, cublas_handle); 71 | 72 | //? RHS evaluated at 'y'; f_y = RHS(y) 73 | RHS(y, f_y); 74 | 75 | //? F(y) = f(y) - (J(u) * y) 76 | axpby(1.0, f_y, -1.0, Linear_y, Nonlinear_y, N, GPU); 77 | } 78 | } -------------------------------------------------------------------------------- /Python/Variable/EPIRK4s3A.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EPIRK4s3A(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_epirk4 : numpy array 33 | Output state variable(s) after time dt (4th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | G. Rainwater, M. Tokman, A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309. 40 | doi:10.1016/j.jcp.2016.07.026. 41 | 42 | """ 43 | 44 | ###? RHS evaluated at 'u' 45 | rhs_u = RHS_function(u) 46 | 47 | ###? Array of zeros vectors 48 | zero_vec = np.zeros(np.shape(u)) 49 | 50 | ###? dt * J(u).z 51 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 52 | 53 | ###? Interpolations 1 & 2; {1/2, 2/3} phi_1({1/2, 2/3} J(u) dt) f(u) dt 54 | u_flux_1, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/2, c, Gamma, Leja_X, tol) 55 | u_flux_2, rhs_calls_2 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 2/3, c, Gamma, Leja_X, tol) 56 | 57 | ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt 58 | a = u + u_flux_1 59 | 60 | ###? Internal stage 2; b = u + 2/3 phi_1(2/3 J(u) dt) f(u) dt 61 | b = u + u_flux_2 62 | 63 | ###? Difference of nonlinear remainders at a and b 64 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 65 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 66 | 67 | ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32*R(a) - 27/2*R(b)) dt 68 | u_flux, rhs_calls_3 = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (32*R_a - 27/2*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 69 | 70 | ###? Interpolation 4; phi_4(J(u) dt) (-144*R(a) + 81*R(b)) dt 71 | u_nl, rhs_calls_4 = linear_phi([zero_vec, zero_vec, zero_vec, zero_vec, (-144*R_a + 81*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 72 | 73 | ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32R(a) - (27/2)R(b)) dt 74 | u_epirk3 = u + u_flux 75 | 76 | ###? 4th order solution; u_4 = u_3 + phi_4(J(u) dt) (-144R(a) + 81R(b)) dt 77 | u_epirk4 = u_epirk3 + u_nl 78 | 79 | ###? Proxy of computational cost 80 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + 8 81 | 82 | return u_epirk3, u_epirk4, num_rhs_calls -------------------------------------------------------------------------------- /Python/Variable/EXPRB43.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EXPRB43(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_exprb4 : numpy array 33 | Output state variable(s) after time dt (4th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | M. Hochbruck, A. Ostermann, Exponential Integrators, Acta Numer. 19 (2010) 209-286. 40 | doi:10.1017/S0962492910000048. 41 | 42 | """ 43 | 44 | ###? RHS evaluated at 'u' 45 | rhs_u = RHS_function(u) 46 | 47 | ###? Array of zeros vectors 48 | zero_vec = np.zeros(np.shape(u)) 49 | 50 | ###? dt * J(u).z 51 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 52 | 53 | ###? Interpolation 1; 1/2 phi_1(1/2 J(u) dt) f(u) dt 54 | u_flux_1, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/2, c, Gamma, Leja_X, tol) 55 | 56 | ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt 57 | a = u + u_flux_1 58 | 59 | ###? Difference of nonlinear remainder at a 60 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 61 | 62 | ###? Interpolation 2; phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt 63 | u_flux_2, rhs_calls_2 = linear_phi([zero_vec, (rhs_u + R_a)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 64 | 65 | ###? Internal stage 2; b = u + phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt 66 | b = u + u_flux_2 67 | 68 | ###? Nonlinear remainder at b 69 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 70 | 71 | ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt 72 | u_flux, rhs_calls_3 = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (16*R_a - 2*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 73 | 74 | ###? Interpolation 4; phi_4(J(u) dt) (-48R(a) + 12R(b)) dt 75 | u_nl, rhs_calls_4 = linear_phi([zero_vec, zero_vec, zero_vec, zero_vec, (-48*R_a + 12*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 76 | 77 | ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt 78 | u_exprb3 = u + u_flux 79 | 80 | ###? 4th order solution; u_4 = u_3 + phi_4(J(u) dt) (-48R(a) + 12R(b)) dt 81 | u_exprb4 = u + u_nl 82 | 83 | ###? Proxy of computational cost 84 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + 8 85 | 86 | return u_exprb3, u_exprb4, num_rhs_calls -------------------------------------------------------------------------------- /CUDA/Kernels.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | //? ---------------------------------------------------------- 4 | //? 5 | //? Description: 6 | //? A pleothera of kernels are defined here that 7 | //? are used throughout the code. 8 | //? 9 | //? ---------------------------------------------------------- 10 | 11 | 12 | #include "error_check.hpp" 13 | #include "Timer.hpp" 14 | 15 | #ifdef __CUDACC__ 16 | #include 17 | #include 18 | #include 19 | #endif 20 | 21 | struct GPU_handle 22 | { 23 | #ifdef __CUDACC__ 24 | cublasHandle_t cublas_handle; 25 | #endif 26 | 27 | GPU_handle() 28 | { 29 | #ifdef __CUDACC__ 30 | cublasCreate_v2(&cublas_handle); 31 | #endif 32 | } 33 | 34 | ~GPU_handle() 35 | { 36 | #ifdef __CUDACC__ 37 | cublasDestroy(cublas_handle); 38 | #endif 39 | } 40 | }; 41 | 42 | namespace LeXInt 43 | { 44 | #ifdef __CUDACC__ 45 | 46 | //? Set y = x 47 | __global__ void copy_CUDA(double *x, double *y, size_t N) 48 | { 49 | int ii = blockDim.x * blockIdx.x + threadIdx.x; 50 | 51 | if(ii < N) 52 | { 53 | y[ii] = x[ii]; 54 | } 55 | } 56 | 57 | //? ones(y) = (y[0:N] =) 1.0 58 | __global__ void ones_CUDA(double *x, size_t N) 59 | { 60 | int ii = blockDim.x * blockIdx.x + threadIdx.x; 61 | 62 | if(ii < N) 63 | { 64 | x[ii] = 1.0; 65 | } 66 | } 67 | 68 | //? ones(y) = (y[0:N] =) 1.0 69 | __global__ void eigen_ones_CUDA(double *x, size_t N) 70 | { 71 | int ii = blockDim.x * blockIdx.x + threadIdx.x; 72 | 73 | if(ii < N) 74 | { 75 | x[ii] = 0.0; 76 | } 77 | 78 | x[0] = 1.0; 79 | } 80 | 81 | //? y = ax 82 | __global__ void axpby_CUDA(double a, double *x, 83 | double *y, size_t N) 84 | { 85 | int ii = blockDim.x * blockIdx.x + threadIdx.x; 86 | 87 | if(ii < N) 88 | { 89 | y[ii] = (a * x[ii]); 90 | } 91 | } 92 | 93 | //? z = ax + by 94 | __global__ void axpby_CUDA(double a, double *x, 95 | double b, double *y, 96 | double *z, size_t N) 97 | { 98 | int ii = blockDim.x * blockIdx.x + threadIdx.x; 99 | 100 | if(ii < N) 101 | { 102 | z[ii] = (a * x[ii]) + (b * y[ii]); 103 | } 104 | } 105 | 106 | //? w = ax + by + cz 107 | __global__ void axpby_CUDA(double a, double *x, 108 | double b, double *y, 109 | double c, double *z, 110 | double *w, size_t N) 111 | { 112 | int ii = blockDim.x * blockIdx.x + threadIdx.x; 113 | 114 | if(ii < N) 115 | { 116 | w[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]); 117 | } 118 | } 119 | 120 | //? v = ax + by + cz + dw 121 | __global__ void axpby_CUDA(double a, double *x, 122 | double b, double *y, 123 | double c, double *z, 124 | double d, double *w, 125 | double *v, size_t N) 126 | { 127 | int ii = blockDim.x * blockIdx.x + threadIdx.x; 128 | 129 | if(ii < N) 130 | { 131 | v[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]) + (d * w[ii]); 132 | } 133 | } 134 | 135 | #endif 136 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [LeXInt](#) 2 | 3 | 4 | ![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue) 5 | ![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=for-the-badge&logo=c%2B%2B&logoColor=white) 6 | ![nVIDIA](https://img.shields.io/badge/nVIDIA-%2376B900.svg?style=for-the-badge&logo=nVIDIA&logoColor=white) 7 | ![VS Code](https://img.shields.io/badge/VSCode-0078D4?style=for-the-badge&logo=visual%20studio%20code&logoColor=white) 8 | ![Github](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white) 9 | 10 | ascl:2208.009 11 | 12 | 13 | [**Le**](#)ja interpolation for e[**X**](#)ponential [**Int**](#)egrators is a temporal integration package that comprises of a compilation of exponential integrators, specifically, the Exponential Rosenbrock (EXPRB) and Exponential Propagation Iterative Runge-Kutta (EPIRK) solvers. 14 | 15 | The action of the matrix exponential or the $\varphi_l(z)$ functions on a vector is computed using the method of polynomial interpolation at Leja points. For homogenous linear PDEs, one can obtain the ***exact*** solution (in time) by directly computing the matrix exponential using the functions ``real_Leja_exp`` and/or ``imag_Leja_exp``, whereas for nonhomogenous linear PDEs, one can use ``real_Leja_phi_nl`` and/or ``imag_Leja_phi_nl``. The algorithmic details can be found in the cited literature. 16 | 17 | ## Requirements 18 | - For Python: 19 | - Python 3.10 (or later) 20 | 21 | - For C++: 22 | - gcc compiler 23 | 24 | - For CUDA: 25 | - NVIDIA GPU 26 | - CUDA 11.2 (or later) 27 | - nvcc compiler 28 | 29 | ## Literature 30 | The publications associated with this code: 31 | 32 | - Deka, Moriggl, and Einkemmer (2025), *LeXInt: GPU-accelerated Exponential Integrators package*, SoftwareX, 29, 101949
33 | [[DOI]](https://doi.org/10.1016/j.softx.2024.101949) [[arXiv:2310.08344]](https://doi.org/10.48550/arXiv.2310.08344) 34 | 35 | - Deka, Einkemmer, and Tokman (2023), *LeXInt: Package for Exponential Integrators employing Leja interpolation*, SoftwareX, 21, 101302
36 | [[DOI]](https://doi.org/10.1016/j.softx.2022.101302) [[arXiv:2208.08269]](https://doi.org/10.48550/arXiv.2208.08269) 37 | 38 | Other related publications: 39 | - Caliari et al. (2014), *Comparison of software for computing the action of the matrix exponential*, BIT Numer. Math., 54, 113
40 | [[DOI]](https://doi.org/10.1007/s10543-013-0446-0) 41 | 42 | - Deka \& Einkemmer (2022), *Efficient adaptive step size control for exponential integrators*, Comput. Math. Appl., 123, 59
43 | [[DOI]](https://doi.org/10.1016/j.camwa.2022.07.011) [[arXiv:2102.02524]](https://doi.org/10.48550/arXiv.2102.02524) 44 | 45 | - Deka \& Einkemmer (2022), *Exponential Integrators for Resistive Magnetohydrodynamics: Matrix-free Leja Interpolation and Efficient Adaptive Time Stepping*, ApJS, 259, 57
46 | [[DOI]](https://doi.org/10.3847/1538-4365/ac5177) [[arXiv:2108.13622]](https://doi.org/10.48550/arXiv.2108.13622) 47 | 48 | - Hochbruck \& Ostermann (2010), *Exponential integrators*, Acta Numer., 19, 209
49 | [[DOI]](https://doi.org/10.1017/S0962492910000048) 50 | 51 | ## Future Prospects 52 | We will MPI-parallelise the CUDA/C++ code. 53 | 54 | ## Contact 55 | Pranab J. Deka ()
56 | Lukas Einkemmer ()
57 | Mayya Tokman () 58 | 59 | In case you face issues using LeXInt, kindly contact Pranab J. Deka. 60 | 61 | ## Acknowledgements 62 | Alexander Moriggl contributed to the development of the CUDA version. 63 | -------------------------------------------------------------------------------- /Python/Constant/EPIRK4s3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EPIRK4s3(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_epirk4 : numpy array 33 | Output state variable(s) after time dt (4th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | References: 38 | 39 | 1. D. L. Michels, V. T. Luan, M. Tokman 40 | A stiffly accurate integrator for elastodynamic problems, ACM Trans. Graph. 36 (4) (2017) 41 | doi:10.1145/3072959.3073706. 42 | 43 | 2. G. Rainwater and M. Tokman 44 | Designing efficient exponential integrators with EPIRK framework, 45 | in: International Conference of Numerical Analysis and Applied Mathematics (ICNAAM 2016), Vol. 1863 of American Institute of Physics Conference Series, 2017, p. 020007 46 | doi:10.1063/1.4992153 47 | 48 | """ 49 | 50 | ###? RHS evaluated at 'u' 51 | rhs_u = RHS_function(u) 52 | 53 | ###? Array of zeros vectors 54 | zero_vec = np.zeros(np.shape(u)) 55 | 56 | ###? dt * J(u).z 57 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 58 | 59 | ###? J(u) . u 60 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 61 | 62 | ###? Interpolations 1 & 2; {1/8, 1/9} phi_1({1/8, 1/9} J(u) dt) f(u) dt 63 | u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/8, c, Gamma, Leja_X, tol) 64 | u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/9, c, Gamma, Leja_X, tol) 65 | 66 | ###? Internal stage 1; a = u + 1/8 phi_1(1/8 J(u) dt) f(u) dt 67 | a = u + u_flux_1 68 | 69 | ###? Internal stage 2; b = u + 1/9 phi_1(1/9 J(u) dt) f(u) dt 70 | b = u + u_flux_2 71 | 72 | ###? Difference of nonlinear remainders at a and b 73 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u) 74 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u) 75 | 76 | ###? phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (1892*R(a) + 1458*(R(b) - 2*R(a))) dt + phi_4(J(u) dt) (-42336*R(a) - 34992*(R(b) - 2*R(a))) dt 77 | u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (1892*R_a + 1458*(R_b - 2*R_a))*T_final, (-42336*R_a - 34992*(R_b - 2*R_a))*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 78 | 79 | ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (-1024R(a) + 1458R(b)) dt + phi_4(J(u) dt) (27648R(a) - 34992R(b)) dt 80 | u_epirk4 = u + u_flux 81 | 82 | ###? Proxy of computational cost 83 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 6 84 | 85 | return u_epirk4, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/Constant/EXPRB53s3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EXPRB53s3(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_exprb5 : numpy array 33 | Output state variable(s) after time dt (5th order) 34 | num_rhs_calls : int 35 | # of RHS calls 36 | 37 | Reference: 38 | 39 | V. T. Luan and A. Ostermann 40 | Exponential Rosenbrock methods of order five - construction, analysis and numerical comparisons, J. Comput. Appl. Math. 255 (2014) 417-431 41 | doi:10.1016/j.cam.2013.04.041 42 | 43 | """ 44 | 45 | ###? RHS evaluated at 'u' 46 | rhs_u = RHS_function(u) 47 | 48 | ###? Array of zeros vectors 49 | zero_vec = np.zeros(np.shape(u)) 50 | 51 | ###? dt * J(u).z 52 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 53 | 54 | ###? J(u) . u 55 | Jacobian_u = Jacobian(RHS_function, u, u, rhs_u) 56 | 57 | ###? Interpolation 1; 1/2 phi_1(1/2 J(u) dt) f(u) dt 58 | u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol) 59 | 60 | ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt 61 | a = u + u_flux_1 62 | 63 | ###? Difference of nonlinear remainder at a 64 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u) 65 | 66 | ###? Interpolation 2a; 9/10 phi_1(9/10 J(u) dt) f(u) dt + 729/125 phi_3(9/10 J(u) dt)) R(a) dt 67 | u_flux_2a, rhs_calls_2a, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, 10/9*729/125*R_a*T_final], T_final, substeps, Jac_vec, 9/10, c, Gamma, Leja_X, tol) 68 | 69 | ###? Interpolation 2b; 27/25 phi_3(1/2 J(u) dt 70 | u_flux_2b, rhs_calls_2b, substeps = linear_phi([zero_vec, zero_vec, zero_vec, 2*27/25*R_a*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol) 71 | 72 | ###? b = u + 9/10 phi_1(9/10 J(u) dt) f(u) dt + 27/25 phi_3(1/2 J(u) dt + 729/125 phi_3(9/10 J(u) dt)) R(a) dt 73 | b = u + u_flux_2a + u_flux_2b 74 | 75 | ###? Nonlinear remainder at b 76 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u) 77 | 78 | ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt + phi_4(J(u) dt) (-60R(a) + (500/27)R(b)) dt 79 | u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (18*R_a - (250/81)*R_b)*T_final, (-60*R_a + (500/27)*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol) 80 | 81 | ###? 5th order solution; u_5 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt + phi_4(J(u) dt) (-60R(a) + (500/27)R(b)) dt 82 | u_exprb5 = u + u_flux 83 | 84 | ###? Proxy of computational cost 85 | num_rhs_calls = rhs_calls_1 + rhs_calls_2a + rhs_calls_2b + rhs_calls_3 + 6 86 | 87 | return u_exprb5, num_rhs_calls, substeps -------------------------------------------------------------------------------- /Python/imag_Leja_phi_nl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Divided_Difference import Divided_Difference 3 | 4 | def imag_Leja_phi_nl(u, dt, RHS_function, interp_function, c, Gamma, Leja_X, phi_function, tol): 5 | """ 6 | Computes the polynomial interpolation of 'phi_function' applied to 'interp_vector' at imaginary Leja points. 7 | To be used when computation of Jacobian is not needed, i.e. "interp_function" is (or explicitly treated 8 | as) a nonlinear remainder. 9 | 10 | 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | dt : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | interp_vector : numpy array 20 | Vector to be interpolated 21 | c : double 22 | Shifting factor 23 | Gamma : double 24 | Scaling factor 25 | Leja_X : numpy array 26 | Array of Leja points 27 | phi_function : function 28 | phi function 29 | tol : double 30 | Accuracy of the polynomial so formed 31 | 32 | Returns 33 | ---------- 34 | polynomial : numpy array(s) 35 | Polynomial interpolation of 'interp_vector' 36 | multiplied by 'phi_function' at real Leja points 37 | ii : int 38 | # of RHS calls 39 | convergence : int 40 | 0 -> did not converge, 1 -> converged 41 | 42 | """ 43 | 44 | ### Initialize paramters and arrays 45 | convergence = 0 # 0 -> did not converge, 1 -> converged 46 | max_Leja_pts = len(Leja_X) # Max number of Leja points 47 | y = interp_function.copy() # To avoid changing 'interp_function' 48 | 49 | ### Phi function applied to 'interp_function' (scaled and shifted) 50 | phi_function_array = phi_function((c + Gamma*Leja_X) * dt * 1j) 51 | 52 | ### Compute polynomial coefficients 53 | poly_coeffs = Divided_Difference(Leja_X, phi_function_array) 54 | 55 | ### p_0 term 56 | polynomial = interp_function * poly_coeffs[0] + 0*1j 57 | 58 | ### p_1, p_2, ...., p_n terms; iterate until converges 59 | for ii in range(1, max_Leja_pts): 60 | 61 | ### y = y * ((z - c)/Gamma - Leja_X) 62 | y = (-1j * RHS_function(y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1])) 63 | 64 | ### Error estimate 65 | poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii]) 66 | 67 | ### To prevent diverging, restart simulations with smaller dt 68 | if poly_error > 1e17: 69 | convergence = 0 70 | print("Step size too large!! Did not converge.") 71 | return u, ii, convergence 72 | 73 | ### Add the new term to the polynomial 74 | polynomial = polynomial + (poly_coeffs[ii] * y) 75 | 76 | ### If new term to be added < tol, break loop; safety factor = 0.25 77 | if poly_error < 0.25*tol*np.linalg.norm(polynomial): 78 | convergence = 1 79 | # print("# Leja points (phi): ", ii) 80 | break 81 | 82 | ### Warning flags 83 | if ii == max_Leja_pts - 1: 84 | print("Warning!! Max. # of Leja points reached without convergence!! Try increasing the number of Leja points. Max available: 10000.") 85 | break 86 | 87 | return np.real(polynomial), ii, convergence 88 | -------------------------------------------------------------------------------- /Python/Variable/EPIRK4s3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ###! LeXInt functions 4 | from Jacobian import Jacobian 5 | from linear_phi import linear_phi 6 | 7 | ################################################################################################ 8 | 9 | def EPIRK4s3(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 10 | """ 11 | Parameters 12 | ---------- 13 | u : numpy array 14 | State variable(s) 15 | T_final : double 16 | Step size 17 | RHS_function : user-defined function 18 | RHS function 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | Real_Imag : int 28 | 0 - Real, 1 - Imaginary 29 | 30 | Returns 31 | ------- 32 | u_epirk3 : numpy array 33 | Output state variable(s) after time dt (3rd order) 34 | u_epirk4 : numpy array 35 | Output state variable(s) after time dt (4th order) 36 | num_rhs_calls : int 37 | # of RHS calls 38 | 39 | References: 40 | 41 | 1. D. L. Michels, V. T. Luan, M. Tokman, A stiffly accurate integrator for elastodynamic problems, ACM Trans. Graph. 36 (4) (2017). 42 | doi:10.1145/3072959.3073706. 43 | 44 | 2. G. Rainwater, M. Tokman, Designing efficient exponential integrators with EPIRK framework, in: International Conference of Numerical 45 | Analysis and Applied Mathematics (ICNAAM 2016), Vol. 1863 of American Institute of Physics Conference Series, 2017, p. 020007. 46 | doi:10.1063/1.4992153. 47 | 48 | """ 49 | 50 | ###? RHS evaluated at 'u' 51 | rhs_u = RHS_function(u) 52 | 53 | ###? Array of zeros vectors 54 | zero_vec = np.zeros(np.shape(u)) 55 | 56 | ###? dt * J(u).z 57 | Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u) 58 | 59 | ###? Interpolations 1 & 2; {1/8, 1/9} phi_1({1/8, 1/9} J(u) dt) f(u) dt 60 | u_flux_1, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/8, c, Gamma, Leja_X, tol) 61 | u_flux_2, rhs_calls_2 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/9, c, Gamma, Leja_X, tol) 62 | 63 | ###? Internal stage 1; a = u + 1/8 phi_1(1/8 J(u) dt) f(u) dt 64 | a = u + u_flux_1 65 | 66 | ###? Internal stage 2; b = u + 1/9 phi_1(1/9 J(u) dt) f(u) dt 67 | b = u + u_flux_2 68 | 69 | ###? Difference of nonlinear remainders at a and b 70 | R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 71 | R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u)) 72 | 73 | ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (1892*R(a) + 1458*(R(b) - 2*R(a))) dt 74 | u_flux, rhs_calls_3 = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (1892*R_a + 1458*(R_b - 2*R_a))*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 75 | 76 | ###? Interpolation 4; phi_4(J(u) dt) (-42336*R(a) - 34992*(R(b) - 2*R(a))) dt 77 | u_nl, rhs_calls_4 = linear_phi([zero_vec, zero_vec, zero_vec, zero_vec, (-42336*R_a - 34992*(R_b - 2*R_a))*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol) 78 | 79 | ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (-1024R(a) + 1458R(b)) dt 80 | u_epirk3 = u + u_flux 81 | 82 | ###? 4th order solution; u_4 = u_3 + phi_4(J(u) dt) (27648R(a) - 34992R(b)) dt 83 | u_epirk4 = u_epirk3 + u_nl 84 | 85 | ###? Proxy of computational cost 86 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + 8 87 | 88 | return u_epirk3, u_epirk4, num_rhs_calls -------------------------------------------------------------------------------- /CUDA/functions.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | //? ---------------------------------------------------------- 4 | //? 5 | //? Description: 6 | //? A pleothera of functions are defined here that 7 | //? are used throughout the code. 8 | //? 9 | //? ---------------------------------------------------------- 10 | 11 | //! ======================================================================================== !// 12 | 13 | namespace LeXInt 14 | { 15 | //! Return double !// 16 | 17 | double l1norm_Cpp(double* vector, size_t N) 18 | { 19 | 20 | double norm = 0.0; 21 | 22 | #pragma omp parallel for reduction(+:norm) 23 | for (int ii = 0; ii < N; ii++) 24 | { 25 | norm = norm + abs(vector[ii]); 26 | } 27 | 28 | return norm; 29 | } 30 | 31 | double l2norm_Cpp(double* vector, size_t N) 32 | { 33 | 34 | double norm = 0.0; 35 | 36 | #pragma omp parallel for reduction(+:norm) 37 | for (int ii = 0; ii < N; ii++) 38 | { 39 | norm = norm + (vector[ii] * vector[ii]); 40 | } 41 | 42 | return sqrt(norm); 43 | } 44 | 45 | double factorial(int number) 46 | { 47 | double fact = 1.0; 48 | 49 | if (number == 0) 50 | { 51 | fact = 1.0; 52 | } 53 | 54 | else 55 | { 56 | for(int ii = 1; ii <= abs(number); ii++) 57 | { 58 | fact = fact*ii; 59 | } 60 | } 61 | 62 | return fact; 63 | } 64 | 65 | //! ======================================================================================== !// 66 | 67 | //! Return double* !// 68 | 69 | //? ones(y) = (y[0:N] =) 1.0 70 | void ones_Cpp(double *x, size_t N) 71 | { 72 | #pragma omp parallel for 73 | for (int ii = 0; ii < N; ii++) 74 | { 75 | x[ii] = 1.0; 76 | } 77 | } 78 | 79 | //? ones(y) = (y[0:N] =) 1.0 80 | void eigen_ones_Cpp(double *x, size_t N) 81 | { 82 | #pragma omp parallel for 83 | for (int ii = 0; ii < N; ii++) 84 | { 85 | x[ii] = 0.0; 86 | } 87 | 88 | x[0] = 1.0; 89 | } 90 | 91 | //? y = x 92 | void copy_Cpp(double *x, double *y, size_t N) 93 | { 94 | #pragma omp parallel for 95 | for (int ii = 0; ii < N; ii++) 96 | { 97 | y[ii] = x[ii]; 98 | } 99 | } 100 | 101 | //? y = ax 102 | void axpby_Cpp(double a, double *x, 103 | double *y, size_t N) 104 | { 105 | #pragma omp parallel for 106 | for (int ii = 0; ii < N; ii++) 107 | { 108 | y[ii] = (a * x[ii]); 109 | } 110 | } 111 | 112 | //? z = ax + by 113 | void axpby_Cpp(double a, double *x, 114 | double b, double *y, 115 | double *z, size_t N) 116 | { 117 | #pragma omp parallel for 118 | for (int ii = 0; ii < N; ii++) 119 | { 120 | z[ii] = (a * x[ii]) + (b * y[ii]); 121 | } 122 | 123 | } 124 | 125 | //? w = ax + by + cz 126 | void axpby_Cpp(double a, double *x, 127 | double b, double *y, 128 | double c, double *z, 129 | double *w, size_t N) 130 | { 131 | #pragma omp parallel for 132 | for (int ii = 0; ii < N; ii++) 133 | { 134 | w[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]); 135 | } 136 | } 137 | 138 | //? v = ax + by + cz + dw 139 | void axpby_Cpp(double a, double *x, 140 | double b, double *y, 141 | double c, double *z, 142 | double d, double *w, 143 | double *v, size_t N) 144 | { 145 | #pragma omp parallel for 146 | for (int ii = 0; ii < N; ii++) 147 | { 148 | v[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]) + (d * w[ii]); 149 | } 150 | } 151 | } 152 | 153 | //! ======================================================================================== !// -------------------------------------------------------------------------------- /Python/Phi_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ### Phi Functions ('z' is assumed to be an array of doubles or complex doubles) 4 | 5 | def phi_1(z): 6 | 7 | if np.imag(z[0]) != 0.0: 8 | phi_1_array = np.zeros(len(z), dtype = "complex") 9 | else: 10 | phi_1_array = np.zeros(len(z)) 11 | 12 | for ii in range(len(z)): 13 | if abs(z[ii]) <= 1e-7: 14 | phi_1_array[ii] = 1./np.math.factorial(1) + z[ii] * (1./np.math.factorial(2) + z[ii] * (1./np.math.factorial(3) + \ 15 | z[ii] * (1./np.math.factorial(4) + z[ii] * (1./np.math.factorial(5) + \ 16 | z[ii] * (1./np.math.factorial(6) + z[ii] * (1./np.math.factorial(7) + \ 17 | z[ii] * (1./np.math.factorial(8) + z[ii] * (1./np.math.factorial(9) + \ 18 | z[ii] * (1./np.math.factorial(10) + z[ii] * (1./np.math.factorial(11))))))))))) 19 | else: 20 | phi_1_array[ii] = (np.exp(z[ii]) - 1)/z[ii] 21 | 22 | return phi_1_array 23 | 24 | 25 | def phi_2(z): 26 | 27 | if np.imag(z[0]) != 0.0: 28 | phi_2_array = np.zeros(len(z), dtype = "complex") 29 | else: 30 | phi_2_array = np.zeros(len(z)) 31 | 32 | for ii in range(len(z)): 33 | if abs(z[ii]) <= 1e-6: 34 | phi_2_array[ii] = 1./np.math.factorial(2) + z[ii] * (1./np.math.factorial(3) + z[ii] * (1./np.math.factorial(4) + \ 35 | z[ii] * (1./np.math.factorial(5) + z[ii] * (1./np.math.factorial(6) + \ 36 | z[ii] * (1./np.math.factorial(7) + z[ii] * (1./np.math.factorial(8) + \ 37 | z[ii] * (1./np.math.factorial(9) + z[ii] * (1./np.math.factorial(10) + \ 38 | z[ii] * (1./np.math.factorial(11) + z[ii] * (1./np.math.factorial(12))))))))))) 39 | else: 40 | phi_2_array[ii] = (np.exp(z[ii]) - z[ii] - 1)/z[ii]**2 41 | 42 | return phi_2_array 43 | 44 | 45 | def phi_3(z): 46 | 47 | if np.imag(z[0]) != 0.0: 48 | phi_3_array = np.zeros(len(z), dtype = "complex") 49 | else: 50 | phi_3_array = np.zeros(len(z)) 51 | 52 | for ii in range(len(z)): 53 | if abs(z[ii]) <= 1e-5: 54 | phi_3_array[ii] = 1./np.math.factorial(3) + z[ii] * (1./np.math.factorial(4) + z[ii] * (1./np.math.factorial(5) + \ 55 | z[ii] * (1./np.math.factorial(6) + z[ii] * (1./np.math.factorial(7) + \ 56 | z[ii] * (1./np.math.factorial(8) + z[ii] * (1./np.math.factorial(9) + \ 57 | z[ii] * (1./np.math.factorial(10) + z[ii] * (1./np.math.factorial(11) + \ 58 | z[ii] * (1./np.math.factorial(12) + z[ii] * (1./np.math.factorial(13))))))))))) 59 | else: 60 | phi_3_array[ii] = (np.exp(z[ii]) - z[ii]**2/2 - z[ii] - 1)/z[ii]**3 61 | 62 | return phi_3_array 63 | 64 | 65 | def phi_4(z): 66 | 67 | if np.imag(z[0]) != 0.0: 68 | phi_4_array = np.zeros(len(z), dtype = "complex") 69 | else: 70 | phi_4_array = np.zeros(len(z)) 71 | 72 | for ii in range(len(z)): 73 | if abs(z[ii]) <= 1e-3: 74 | phi_4_array[ii] = 1./np.math.factorial(4) + z[ii] * (1./np.math.factorial(5) + z[ii] * (1./np.math.factorial(6) + \ 75 | z[ii] * (1./np.math.factorial(7) + z[ii] * (1./np.math.factorial(8) + \ 76 | z[ii] * (1./np.math.factorial(9) + z[ii] * (1./np.math.factorial(10) + \ 77 | z[ii] * (1./np.math.factorial(11) + z[ii] * (1./np.math.factorial(12) + \ 78 | z[ii] * (1./np.math.factorial(13) + z[ii] * (1./np.math.factorial(14))))))))))) 79 | else: 80 | phi_4_array[ii] = (np.exp(z[ii]) - z[ii]**3/6 - z[ii]**2/2 - z[ii] - 1)/z[ii]**4 81 | 82 | return phi_4_array 83 | -------------------------------------------------------------------------------- /CUDA/Integrators/EXPRB32.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "../Leja.hpp" 4 | #include "../Phi_functions.hpp" 5 | 6 | namespace LeXInt 7 | { 8 | //? Phi functions interpolated on real Leja points 9 | template 10 | void EXPRB32(rhs& RHS, //? RHS function 11 | double* u, //? Input state variable(s) 12 | double* u_exprb2, //? Output state variable(s) (lower order) 13 | double* u_exprb3, //? Output state variable(s) (higher order) 14 | double& error, //? Embedded error estimate 15 | double* auxiliary_expint, //? Internal auxiliary variables (EXPRB32) 16 | double* auxiliary_Leja, //? Internal auxiliary variables (Leja and NL remainders) 17 | size_t N, //? Number of grid points 18 | vector& Leja_X, //? Array of Leja points 19 | double c, //? Shifting factor 20 | double Gamma, //? Scaling factor 21 | double rtol, //? Relative tolerance (normalised desired accuracy) 22 | double atol, //? Absolute tolerance 23 | double dt, //? Step size 24 | int& iters, //? # of iterations needed to converge (iteration variable) 25 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 26 | GPU_handle& cublas_handle //? CuBLAS handle 27 | ) 28 | { 29 | //* ------------------------------------------------------------------------- 30 | 31 | //! u, u_exprb2, u_exprb3, auxiliary_expint, and auxiliary_Leja, 32 | //! are device vectors if GPU support is activated. 33 | 34 | //* Returns 35 | //* ---------- 36 | //* u_exprb2 : double* 37 | //* 2nd order solution after time dt 38 | //* 39 | //* u_exprb3 : double* 40 | //* 3rd order solution after time dt 41 | //* 42 | //* 43 | //* Reference: 44 | //* M. Hochbruck, A. Ostermann, Exponential Integrators, Acta Numer. 19 (2010) 209-286. 45 | //* doi:10.1017/S0962492910000048 46 | 47 | //* ------------------------------------------------------------------------- 48 | 49 | //? Counters for Leja iterations 50 | int iters_1 = 0, iters_2 = 0; 51 | 52 | //? Assign names and variables 53 | double* f_u = &auxiliary_expint[0]; double* u_flux = &u_exprb2[0]; 54 | double* NL_u = &auxiliary_expint[0]; double* NL_a = &u_exprb3[0]; double* R_a = &u_exprb3[0]; 55 | double* u_nl_3 = &auxiliary_expint[0]; double* error_vector = &auxiliary_expint[0]; 56 | 57 | //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt 58 | RHS(u, f_u); 59 | axpby(dt, f_u, f_u, N, GPU); 60 | 61 | //? Interpolation of RHS(u) at 1; u_flux = phi_1(J(u) dt) f(u) dt 62 | real_Leja_phi(RHS, u, f_u, u_flux, auxiliary_Leja, N, {1.0}, 63 | phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters_1, GPU, cublas_handle); 64 | 65 | //! Internal stage 1; 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt 66 | axpby(1.0, u, 1.0, u_flux, u_exprb2, N, GPU); 67 | 68 | //? R_a = (NL_a - NL_u) * dt 69 | Nonlinear_remainder(RHS, u, u, NL_u, auxiliary_Leja, N, GPU, cublas_handle); 70 | Nonlinear_remainder(RHS, u, u_exprb2, NL_a, auxiliary_Leja, N, GPU, cublas_handle); 71 | axpby(dt, NL_a, -dt, NL_u, R_a, N, GPU); 72 | 73 | //? u_nl_3 = phi_3(J(u) dt) R(a) dt 74 | real_Leja_phi(RHS, u, R_a, u_nl_3, auxiliary_Leja, N, {1.0}, 75 | phi_3, Leja_X, c, Gamma, rtol, atol, dt, iters_2, GPU, cublas_handle); 76 | 77 | //! 3rd order solution; u_3 = u_2 + 2 phi_3(J(u) dt) R(a) dt 78 | axpby(1.0, u_exprb2, 2.0, u_nl_3, u_exprb3, N, GPU); 79 | 80 | //? Error estimate 81 | axpby(2.0, u_nl_3, error_vector, N, GPU); 82 | error = l2norm(error_vector, N, GPU, cublas_handle)/sqrt(N); 83 | 84 | //? Total number of Leja iterations 85 | iters = iters_1 + iters_2; 86 | } 87 | } -------------------------------------------------------------------------------- /CUDA/Integrators/EXPRB42.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "../Leja.hpp" 4 | #include "../Phi_functions.hpp" 5 | 6 | namespace LeXInt 7 | { 8 | //? Phi functions interpolated on real Leja points 9 | template 10 | void EXPRB42(rhs& RHS, //? RHS function 11 | double* u, //? Input state variable(s) 12 | double* u_exprb2, //? Output state variable(s) (lower order) 13 | double* u_exprb4, //? Output state variable(s) (higher order) 14 | double& error, //? Embedded error estimate 15 | double* auxiliary_expint, //? Internal auxiliary variables (EXPRB42) 16 | double* auxiliary_Leja, //? Internal auxiliary variables (Leja) 17 | size_t N, //? Number of grid points 18 | vector& Leja_X, //? Array of Leja points 19 | double c, //? Shifting factor 20 | double Gamma, //? Scaling factor 21 | double rtol, //? Relative tolerance (normalised desired accuracy) 22 | double atol, //? Absolute tolerance 23 | double dt, //? Step size 24 | int& iters, //? # of iterations needed to converge (iteration variable) 25 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 26 | GPU_handle& cublas_handle //? CuBLAS handle 27 | ) 28 | { 29 | //* ------------------------------------------------------------------------- 30 | 31 | //! u, u_exprb2, u_exprb4, auxiliary_expint, and auxiliary_Leja, 32 | //! are device vectors if GPU support is activated. 33 | 34 | //* Returns 35 | //* ---------- 36 | //* u_exprb2 : double* 37 | //* 2nd order solution after time dt 38 | //* 39 | //* u_exprb4 : double* 40 | //* 4th order solution after time dt 41 | //* 42 | //* 43 | //* Reference: 44 | //* V. T. Luan, Fourth-order two-stage explicit exponential integrators for time-dependent PDEs, Appl. Numer. Math. 112 (2017) 91-103. 45 | //* doi:10.1016/j.apnum.2016.10.008 46 | 47 | //* ------------------------------------------------------------------------- 48 | 49 | //? Counters for Leja iterations 50 | int iters_1 = 0, iters_2 = 0; 51 | 52 | //? Assign names and variables 53 | double* u_flux = &auxiliary_expint[0]; double* f_u = &u_exprb2[0]; double* a = &u_flux[0]; 54 | double* NL_u = &u_exprb2[0]; double* NL_a = &u_exprb4[0]; double* R_a = &u_exprb2[0]; 55 | double* u_nl_3 = &u_flux[0]; double* error_vector = &u_flux[N]; 56 | 57 | //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt 58 | RHS(u, f_u); 59 | axpby(dt, f_u, f_u, N, GPU); 60 | 61 | //? Vertical interpolation of RHS(u) at 3/4 and 1; u_flux = phi_1({3/4, 1.0} J(u) dt) f_u dt 62 | real_Leja_phi(RHS, u, f_u, u_flux, auxiliary_Leja, N, {3./4., 1.0}, 63 | phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters_1, GPU, cublas_handle); 64 | 65 | //? Internal stage 1; a = u + 3/4 phi_1(3/4 J(u) dt) f(u) dt 66 | axpby(1.0, u, 3./4., &u_flux[0], a, N, GPU); 67 | 68 | //? R_a = (NL_a - NL_u) * dt 69 | Nonlinear_remainder(RHS, u, u, NL_u, auxiliary_Leja, N, GPU, cublas_handle); 70 | Nonlinear_remainder(RHS, u, a, NL_a, auxiliary_Leja, N, GPU, cublas_handle); 71 | axpby(dt, NL_a, -dt, NL_u, R_a, N, GPU); 72 | 73 | //? u_nl_3 = phi_3(J(u) dt) R(a) dt 74 | real_Leja_phi(RHS, u, R_a, u_nl_3, auxiliary_Leja, N, {1.0}, 75 | phi_3, Leja_X, c, Gamma, rtol, atol, dt, iters_2, GPU, cublas_handle); 76 | 77 | //! 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt 78 | axpby(1.0, u, 1.0, &u_flux[N], u_exprb2, N, GPU); 79 | 80 | //! 4th order solution; u_4 = u_2 + 32/9 phi_3(J(u) dt) R(a) dt 81 | axpby(1.0, u_exprb2, 32./9., u_nl_3, u_exprb4, N, GPU); 82 | 83 | //? Error estimate 84 | axpby(32./9., u_nl_3, error_vector, N, GPU); 85 | error = l2norm(error_vector, N, GPU, cublas_handle)/sqrt(N); 86 | 87 | //? Total number of Leja iterations 88 | iters = iters_1 + iters_2; 89 | } 90 | } -------------------------------------------------------------------------------- /CUDA/real_Leja_exp.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Leja.hpp" 4 | #include "Divided_Differences.hpp" 5 | 6 | namespace LeXInt 7 | { 8 | //? Matrix exponential interpolated on real Leja points 9 | template 10 | void real_Leja_exp(rhs& RHS, //? RHS function 11 | double* u, //? Input state variable(s) 12 | double* polynomial, //? Output matrix exponential multiplied by 'u' 13 | double* auxiliary_Leja, //? Internal auxiliary variables (Leja) 14 | size_t N, //? Number of grid points 15 | vector& Leja_X, //? Array of Leja points 16 | double c, //? Shifting factor 17 | double Gamma, //? Scaling factor 18 | double rtol, //? Relative tolerance (normalised desired accuracy) 19 | double atol, //? Absolute tolerance 20 | double dt, //? Step size 21 | int& iters, //? # of iterations needed to converge (iteration variable) 22 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 23 | GPU_handle& cublas_handle //? CuBLAS handle 24 | ) 25 | { 26 | //* ------------------------------------------------------------------------- 27 | 28 | //* Computes the polynomial interpolation of matrix exponential applied to 'u' at real Leja points. 29 | //* 30 | //* Returns 31 | //* ---------- 32 | //* polynomial : double* 33 | //* Polynomial interpolation of 'u' multiplied 34 | //* by the matrix exponential at real Leja points 35 | 36 | //* ------------------------------------------------------------------------- 37 | 38 | int max_Leja_pts = Leja_X.size(); //? Max. # of Leja points 39 | double* Jac_vec = &auxiliary_Leja[0]; //? auxiliary variable for Jacobian-vector product 40 | 41 | //* Matrix exponential (scaled and shifted) 42 | vector matrix_exponential(max_Leja_pts); 43 | 44 | for (int ii = 0; ii < max_Leja_pts; ii++) 45 | { 46 | matrix_exponential[ii] = exp(dt * (c + (Gamma * Leja_X[ii]))); 47 | } 48 | 49 | //* Compute polynomial coefficients 50 | vector coeffs = Divided_Differences(Leja_X, matrix_exponential); 51 | 52 | //* Form the polynomial (first term): polynomial = coeffs[0] * u 53 | axpby(coeffs[0], u, polynomial, N, GPU); 54 | 55 | //? Iterate until converges 56 | for (iters = 1; iters < max_Leja_pts - 1; iters++) 57 | { 58 | //* Compute numerical Jacobian (for linear eqs., this is the RHS evaluation at u) 59 | RHS(u, Jac_vec); 60 | 61 | //* u = u * ((z - c)/Gamma - Leja_X) 62 | axpby(1./Gamma, Jac_vec, (-c/Gamma - Leja_X[iters - 1]), u, u, N, GPU); 63 | 64 | //* Add the new term to the polynomial (polynomial = polynomial + (coeffs[iters] * u)) 65 | axpby(coeffs[iters], u, 1.0, polynomial, polynomial, N, GPU); 66 | 67 | //* Error estimate: poly_error = |coeffs[iters]| ||u|| at every iteration 68 | double poly_error = l2norm(u, N, GPU, cublas_handle)/sqrt(N); 69 | poly_error = abs(coeffs[iters]) * poly_error; 70 | 71 | //* Norm of the polynomial 72 | double poly_norm = l2norm(polynomial, N, GPU, cublas_handle)/sqrt(N); 73 | 74 | //? If new term to be added < tol, break loop 75 | if (poly_error < ((rtol*poly_norm) + atol)) 76 | { 77 | // ::std::cout << "Converged! Iterations: " << iters << ::std::endl; 78 | break; 79 | } 80 | 81 | //! Warning flags 82 | if (iters == max_Leja_pts - 2) 83 | { 84 | ::std::cout << "Warning!! Max. number of Leja points reached without convergence!!" << ::std::endl; 85 | ::std::cout << "Max. Leja points currently set to " << max_Leja_pts << ::std::endl; 86 | ::std::cout << "Try increasing the number of Leja points. Max available: 10000." << ::std::endl; 87 | break; 88 | } 89 | } 90 | } 91 | } -------------------------------------------------------------------------------- /Python/Variable/EXPRB53s3.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(1, "../") 3 | 4 | from Jacobian import * 5 | from Phi_functions import * 6 | from real_Leja_phi import * 7 | from imag_Leja_phi import * 8 | 9 | ################################################################################################ 10 | 11 | def EXPRB53s3(u, dt, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 12 | """ 13 | Parameters 14 | ---------- 15 | u : numpy array 16 | State variable(s) 17 | dt : double 18 | Step size 19 | RHS_function : user-defined function 20 | RHS function 21 | c : double 22 | Shifting factor 23 | Gamma : double 24 | Scaling factor 25 | Leja_X : numpy array 26 | Array of Leja points 27 | tol : double 28 | Accuracy of the polynomial so formed 29 | Real_Imag : int 30 | 0 - Real, 1 - Imaginary 31 | 32 | Returns 33 | ------- 34 | u_exprb3 : numpy array 35 | Output state variable(s) after time dt (3rd order) 36 | u_exprb5 : numpy array 37 | Output state variable(s) after time dt (5th order) 38 | num_rhs_calls : int 39 | # of RHS calls 40 | 41 | Reference: 42 | 43 | V. T. Luan, A. Ostermann, Exponential Rosenbrock methods of order five - construction, analysis and numerical comparisons, J. Comput. Appl. Math. 255 (2014) 417-431. 44 | doi:10.1016/j.cam.2013.04.041. 45 | 46 | """ 47 | 48 | ############## --------------------- ############## 49 | 50 | ###? Interpolate on either real Leja or imaginary Leja points 51 | if Real_Imag == 0: 52 | Leja_phi = real_Leja_phi 53 | elif Real_Imag == 1: 54 | Leja_phi = imag_Leja_phi 55 | else: 56 | print("Error!! Choose 0 for real or 1 for imaginary Leja points.") 57 | 58 | ############## --------------------- ############## 59 | 60 | ###? Vertical interpolation of f(u) at 1/2, 9/10, and 1; phi_1({1/2, 9/10, 1} J(u) dt) f(u) dt 61 | u_flux, rhs_calls_1, convergence = Leja_phi(u, dt, RHS_function, RHS_function(u)*dt, [1/2, 9/10, 1], c, Gamma, Leja_X, phi_1, tol) 62 | 63 | ###? If it does not converge, return (try with smaller dt) 64 | if convergence == 0: 65 | return u, 2.1*u, rhs_calls_1 66 | 67 | ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt 68 | a = u + (1/2 * u_flux[:, 0]) 69 | 70 | ###? Nonlinear remainder at u and a 71 | Nonlinear_u = RHS_function(u) - Jacobian(RHS_function, u, u) 72 | Nonlinear_a = RHS_function(a) - Jacobian(RHS_function, u, a) 73 | R_a = Nonlinear_a - Nonlinear_u 74 | 75 | ###? Vertical interpolation of R(a) at 1/2 and 9/10 76 | b_n_nl, rhs_calls_2, _ = Leja_phi(u, dt, RHS_function, R_a*dt, [1/2, 9/10], c, Gamma, Leja_X, phi_3, tol) 77 | 78 | ###? b = u + 9/10 phi_1(9/10 J(u) dt) f(u) dt + (27/25 phi_3(1/2 J(u) dt) + 729/125 phi_3(9/10 J(u) dt)) R(a) dt 79 | b = u + (9/10 * u_flux[:, 1]) + (27/25 * b_n_nl[:, 0]) + (729/125 * b_n_nl[:, 1]) 80 | 81 | ###? Nonlinear remainder at b 82 | Nonlinear_b = RHS_function(b) - Jacobian(RHS_function, u, b) 83 | R_b = Nonlinear_b - Nonlinear_u 84 | 85 | ###? phi_3(J(u) dt) (2R(a) + (150/81)R(b)) dt 86 | u_nl_4_3, rhs_calls_3, _ = Leja_phi(u, dt, RHS_function, (2*R_a + (150/81)*R_b)*dt, [1], c, Gamma, Leja_X, phi_3, tol) 87 | 88 | ###? phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt 89 | u_nl_5_3, rhs_calls_4, _ = Leja_phi(u, dt, RHS_function, (18*R_a - (250/81)*R_b)*dt, [1], c, Gamma, Leja_X, phi_3, tol) 90 | 91 | ###? phi_3(J(u) dt) (-60R(a) + (500/27)R(b)) dt 92 | u_nl_5_4, rhs_calls_5, _ = Leja_phi(u, dt, RHS_function, (-60*R_a + (500/27)*R_b)*dt, [1], c, Gamma, Leja_X, phi_4, tol) 93 | 94 | ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (2R(a) + (150/81)R(b)) dt 95 | u_exprb3 = u + u_flux[:, 2] + u_nl_4_3[:, 0] 96 | 97 | ###? 5th order solution; u_5 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt + phi_4(J(u) dt) (-60R(a) + (500/27)R(b)) dt 98 | u_exprb5 = u + u_flux[:, 2] + u_nl_5_3[:, 0] + u_nl_5_4[:, 0] 99 | 100 | ###? Proxy of computational cost 101 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + rhs_calls_5 + 13 102 | 103 | return u_exprb3, u_exprb5, num_rhs_calls -------------------------------------------------------------------------------- /CUDA/Test/Dif_Adv_2D.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Problems.hpp" 4 | #include "../error_check.hpp" 5 | 6 | using namespace std; 7 | 8 | //! This function has 2 vector reads and writes. 9 | 10 | //? ====================================================================================== ?// 11 | 12 | #ifdef __CUDACC__ 13 | 14 | __global__ void Dif_Adv_2D(int N, double dx, double dy, double velocity, double* input, double* output) 15 | { 16 | int ii = threadIdx.y + blockIdx.y * blockDim.y; 17 | int jj = threadIdx.x + blockIdx.x * blockDim.x; 18 | 19 | if ((ii >= N) || (jj >= N)) 20 | return; 21 | //? Diffusion 22 | output[N*ii + jj] = (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx) 23 | + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy) 24 | 25 | //? Advection 26 | + velocity/dx 27 | * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)] 28 | - 3.0/6.0 * input[PBC(ii, jj, N)] 29 | + 6.0/6.0 * input[PBC(ii, jj + 1, N)] 30 | - 1.0/6.0 * input[PBC(ii, jj + 2, N)]) 31 | + velocity/dy 32 | * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)] 33 | - 3.0/6.0 * input[PBC(ii, jj, N)] 34 | + 6.0/6.0 * input[PBC(ii + 1, jj, N)] 35 | - 1.0/6.0 * input[PBC(ii + 2, jj, N)]); 36 | } 37 | 38 | #endif 39 | 40 | struct RHS_Dif_Adv_2D:public Problems_2D 41 | { 42 | //? RHS = A_adv.u^2/2.0 + A_dif.u 43 | 44 | //! Constructor 45 | RHS_Dif_Adv_2D(int _N, double _dx, double _dy, double _velocity) : Problems_2D(_N, _dx, _dy, _velocity) {} 46 | 47 | void operator()(double* input, double* output) 48 | { 49 | #ifdef __CUDACC__ 50 | 51 | int num_threads = 16; 52 | dim3 threads(num_threads, num_threads); 53 | dim3 blocks((N + num_threads - 1)/num_threads, (N + num_threads - 1)/num_threads); 54 | 55 | Dif_Adv_2D<<>>(N, dx, dy, velocity, input, output); 56 | 57 | #else 58 | 59 | int num_threads = 32; 60 | 61 | #pragma omp parallel for collapse(2) 62 | for (int blockIdxx = 0; blockIdxx < (N + num_threads - 1)/num_threads; blockIdxx++) 63 | { 64 | for (int blockIdxy = 0; blockIdxy < (N + num_threads - 1)/num_threads; blockIdxy++) 65 | { 66 | for (int threadIdxx = 0; threadIdxx < num_threads; threadIdxx++) 67 | { 68 | for (int threadIdxy = 0; threadIdxy < num_threads; threadIdxy++) 69 | { 70 | int ii = (blockIdxx * num_threads) + threadIdxx; 71 | int jj = (blockIdxy * num_threads) + threadIdxy; 72 | 73 | if ((ii < N) && (jj < N)) 74 | { 75 | //? Diffusion 76 | output[N*ii + jj] = (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx) 77 | + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy) 78 | 79 | //? Advection 80 | + velocity/dx 81 | * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)] 82 | - 3.0/6.0 * input[PBC(ii, jj, N)] 83 | + 6.0/6.0 * input[PBC(ii, jj + 1, N)] 84 | - 1.0/6.0 * input[PBC(ii, jj + 2, N)]) 85 | + velocity/dy 86 | * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)] 87 | - 3.0/6.0 * input[PBC(ii, jj, N)] 88 | + 6.0/6.0 * input[PBC(ii + 1, jj, N)] 89 | - 1.0/6.0 * input[PBC(ii + 2, jj, N)]); 90 | } 91 | } 92 | } 93 | } 94 | } 95 | 96 | #endif 97 | } 98 | 99 | //! Destructor 100 | ~RHS_Dif_Adv_2D() {} 101 | }; 102 | 103 | //? ====================================================================================== ?// -------------------------------------------------------------------------------- /Python/real_Leja_phi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Jacobian import Jacobian 3 | from Divided_Difference import Divided_Difference 4 | 5 | def real_Leja_phi(u, dt, RHS_function, interp_vector, integrator_coeffs, c, Gamma, Leja_X, phi_function, tol): 6 | """ 7 | Computes the polynomial interpolation of 'phi_function' applied to 'interp_vector' at real Leja points. 8 | 9 | 10 | Parameters 11 | ---------- 12 | u : numpy array 13 | State variable(s) 14 | dt : double 15 | Step size 16 | RHS_function : user-defined function 17 | RHS function 18 | interp_vector : numpy array 19 | Vector to be interpolated 20 | integrator_coeff : int 21 | Point where the matrix exponential is to be evaluated 22 | c : double 23 | Shifting factor 24 | Gamma : double 25 | Scaling factor 26 | Leja_X : numpy array 27 | Array of Leja points 28 | phi_function : function 29 | phi function 30 | tol : double 31 | Accuracy of the polynomial so formed 32 | 33 | Returns 34 | ---------- 35 | polynomial : numpy array(s) 36 | Polynomial interpolation of 'interp_vector' 37 | multiplied by 'phi_function' at real Leja points 38 | ii : int 39 | # of Leja points used 40 | convergence : int 41 | 0 -> did not converge, 1 -> converged 42 | 43 | """ 44 | 45 | ###? Initialize parameters and arrays 46 | convergence = 0 #* 0 -> did not converge, 1 -> converged 47 | num_interpolations = len(integrator_coeffs) #* Number of interpolations in vertical 48 | max_Leja_pts = len(Leja_X) #* Max number of Leja points 49 | phi_function_array = np.zeros((len(Leja_X), num_interpolations)) #* Phi function applied to 'interp_vector' 50 | poly_coeffs = np.zeros((len(Leja_X), num_interpolations)) #* Polynomial coefficients 51 | polynomial = np.zeros((len(interp_vector), num_interpolations)) #* Polynomial output 52 | y = interp_vector.copy() #* To avoid changing 'interp_vector' 53 | 54 | ###? Loop for vertical implementation 55 | for ij in range(0, num_interpolations): 56 | 57 | ###? Phi function applied to 'interp_vector' (scaled and shifted) 58 | phi_function_array[:, ij] = phi_function(integrator_coeffs[ij] * dt * (c + Gamma*Leja_X)) 59 | 60 | ###? Compute polynomial coefficients 61 | poly_coeffs[:, ij] = Divided_Difference(Leja_X, phi_function_array[:, ij]) 62 | 63 | ###? Form the polynomial: 1st term (p_0) 64 | polynomial[:, ij] = interp_vector * poly_coeffs[0, ij] 65 | 66 | ###? p_1, p_2, ...., p_n terms; iterate until converges 67 | for ii in range(1, max_Leja_pts): 68 | 69 | ###? y = y * ((z - c)/Gamma - Leja_X) 70 | y = (Jacobian(RHS_function, u, y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1])) 71 | 72 | ###? Error estimate; poly_error = |coeffs[nn]| ||y|| 73 | poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii, np.argmax(integrator_coeffs)]) 74 | 75 | ###? Keep adding terms to the polynomial 76 | for ij in range(0, num_interpolations): 77 | 78 | ###! To prevent diverging, restart simulations with smaller dt 79 | if poly_error > 1e7: 80 | convergence = 0 81 | polynomial[:, ij] = interp_vector 82 | return polynomial, 3*ii, convergence 83 | 84 | ###? Add the new term to the polynomial 85 | polynomial[:, ij] = polynomial[:, ij] + (poly_coeffs[ii, ij] * y) 86 | 87 | ###? If new term to be added < tol, break loop 88 | if poly_error < (tol*np.linalg.norm(polynomial) + tol): 89 | convergence = 1 90 | # print("Converged! # of Leja points used (phi): ", ii) 91 | break 92 | 93 | ###! Warning flags 94 | if ii == max_Leja_pts - 1: 95 | print("Warning!! Max. # of Leja points reached without convergence!!") 96 | print("Reduce dt.") 97 | break 98 | 99 | return polynomial, ii, convergence -------------------------------------------------------------------------------- /CUDA/real_Leja_phi_nl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Leja.hpp" 4 | #include "Phi_functions.hpp" 5 | #include "Divided_Differences.hpp" 6 | 7 | namespace LeXInt 8 | { 9 | //? Phi function interpolated on real Leja points 10 | template 11 | void real_Leja_phi_nl(rhs& RHS, //? RHS function 12 | double* interp_vector, //? Input vector multiplied to phi function 13 | double* polynomial, //? Output vector multiplied to phi function 14 | double* auxiliary_Leja, //? Internal auxiliary variables (Leja) 15 | size_t N, //? Number of grid points 16 | double (* phi_function) (double), //? Phi function 17 | vector& Leja_X, //? Array of Leja points 18 | double c, //? Shifting factor 19 | double Gamma, //? Scaling factor 20 | double rtol, //? Relative tolerance (normalised desired accuracy) 21 | double atol, //? Absolute tolerance 22 | double dt, //? Step size 23 | int& iters, //? # of iterations needed to converge (iteration variable) 24 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 25 | GPU_handle& cublas_handle //? CuBLAS handle 26 | ) 27 | { 28 | //* ------------------------------------------------------------------------- 29 | //* 30 | //* Computes the polynomial interpolation of phi function applied to 'interp_vector' at real Leja points. 31 | //* 32 | //* Returns 33 | //* ---------- 34 | //* polynomial : double* 35 | //* Polynomial interpolation of 'interp_vector', applied to 36 | //* phi function, at real Leja points 37 | //* 38 | //* ------------------------------------------------------------------------- 39 | 40 | int max_Leja_pts = Leja_X.size(); //? Max. # of Leja points 41 | double* Jac_vec = &auxiliary_Leja[0]; //? auxiliary variable for Jacobian-vector product 42 | 43 | //* Phi function applied to 'interp_vector' (scaled and shifted) 44 | vector phi_function_array(max_Leja_pts); 45 | 46 | for (int ii = 0; ii < max_Leja_pts; ii++) 47 | { 48 | phi_function_array[ii] = phi_function(dt * (c + (Gamma * Leja_X[ii]))); 49 | } 50 | 51 | //* Compute polynomial coefficients 52 | vector coeffs = Divided_Differences(Leja_X, phi_function_array); 53 | 54 | //* Form the polynomial (first term): polynomial = coeffs[0] * interp_vector 55 | axpby(coeffs[0], interp_vector, polynomial, N, GPU); 56 | 57 | //? Iterate until converges 58 | for (iters = 1; iters < max_Leja_pts - 1; iters++) 59 | { 60 | //* Compute numerical Jacobian (for linear eqs., this is the RHS evaluation at y) 61 | RHS(interp_vector, Jac_vec); 62 | 63 | //* y = y * ((z - c)/Gamma - Leja_X) 64 | axpby(1./Gamma, Jac_vec, (-c/Gamma - Leja_X[iters - 1]), interp_vector, interp_vector, N, GPU); 65 | 66 | //* Add the new term to the polynomial (polynomial = polynomial + (coeffs[iters] * y)) 67 | axpby(coeffs[iters], interp_vector, 1.0, polynomial, polynomial, N, GPU); 68 | 69 | //* Error estimate: poly_error = |coeffs[iters]| ||interp_vector|| at every iteration 70 | double poly_error = l2norm(interp_vector, N, GPU, cublas_handle)/sqrt(N); 71 | poly_error = abs(coeffs[iters]) * poly_error; 72 | 73 | //* Norm of the polynomial 74 | double poly_norm = l2norm(polynomial, N, GPU, cublas_handle)/sqrt(N); 75 | 76 | //? If new term to be added < tol, break loop 77 | if (poly_error < ((rtol*poly_norm) + atol)) 78 | { 79 | // ::std::cout << "Converged! Iterations: " << iters << ::std::endl; 80 | break; 81 | } 82 | 83 | //! Warning flags 84 | if (iters == max_Leja_pts - 2) 85 | { 86 | ::std::cout << "Warning!! Max. number of Leja points reached without convergence!!" << ::std::endl; 87 | ::std::cout << "Max. Leja points currently set to " << max_Leja_pts << ::std::endl; 88 | ::std::cout << "Try increasing the number of Leja points. Max available: 10000." << ::std::endl; 89 | break; 90 | } 91 | } 92 | } 93 | } -------------------------------------------------------------------------------- /Python/Variable/EPIRK5P1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(1, "../") 3 | 4 | from Jacobian import * 5 | from Phi_functions import * 6 | from real_Leja_phi import * 7 | from imag_Leja_phi import * 8 | 9 | ################################################################################################ 10 | 11 | def EPIRK5P1(u, dt, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 12 | """ 13 | Parameters 14 | ---------- 15 | u : numpy array 16 | State variable(s) 17 | dt : double 18 | Step size 19 | RHS_function : user-defined function 20 | RHS function 21 | c : double 22 | Shifting factor 23 | Gamma : double 24 | Scaling factor 25 | Leja_X : numpy array 26 | Array of Leja points 27 | tol : double 28 | Accuracy of the polynomial so formed 29 | Real_Imag : int 30 | 0 - Real, 1 - Imaginary 31 | 32 | Returns 33 | ------- 34 | u_epirk4 : numpy array 35 | Output state variable(s) after time dt (4th order) 36 | u_epirk5 : numpy array 37 | Output state variable(s) after time dt (5th order) 38 | num_rhs_calls : int 39 | # of RHS calls 40 | 41 | Reference: 42 | 43 | M. Tokman, J. Loffeld, P. Tranquilli, New Adaptive Exponential Propagation Iterative Methods of Runge-Kutta Type, SIAM J. Sci. Comput. 34 (5) (2012) A2650-A2669. 44 | doi:10.1137/110849961. 45 | 46 | """ 47 | 48 | ############## --------------------- ############## 49 | 50 | ###? Interpolate on either real Leja or imaginary Leja points 51 | if Real_Imag == 0: 52 | Leja_phi = real_Leja_phi 53 | elif Real_Imag == 1: 54 | Leja_phi = imag_Leja_phi 55 | else: 56 | print("Error!! Choose 0 for real or 1 for imaginary Leja points.") 57 | 58 | ############## --------------------- ############## 59 | 60 | ###! Parameters of EPIRK5P1 (5th order) 61 | a11 = 0.35129592695058193092 62 | a21 = 0.84405472011657126298 63 | a22 = 1.6905891609568963624 64 | 65 | b1 = 1.0 66 | b2 = 1.2727127317356892397 67 | b3 = 2.2714599265422622275 68 | 69 | g11 = 0.35129592695058193092 70 | g21 = 0.84405472011657126298 71 | g22 = 1.0 72 | g31 = 1.0 73 | g32 = 0.71111095364366870359 74 | g33 = 0.62378111953371494809 75 | 76 | ###! 4th order 77 | g32_4 = 0.5 78 | g33_4 = 1.0 79 | 80 | ############## --------------------- ############## 81 | 82 | ###? Vertical interpolation of RHS_function(u) at g11, g21, and g31; phi_1({g11, g21, g31} J(u) dt) f(u) dt 83 | u_flux, rhs_calls_1, convergence = Leja_phi(u, dt, RHS_function, RHS_function(u)*dt, [g11, g21, g31], c, Gamma, Leja_X, phi_1, tol) 84 | 85 | ###? If it does not converge, return (try with smaller dt) 86 | if convergence == 0: 87 | return u, 2.1*u, rhs_calls_1 88 | 89 | ###? Internal stage 1; a = u + a11 phi_1(g11 J(u) dt) f(u) dt 90 | a = u + (a11 * u_flux[:, 0]) 91 | 92 | ###? Nonlinear remainder at u and a 93 | Nonlinear_u = RHS_function(u) - Jacobian(RHS_function, u, u) 94 | Nonlinear_a = RHS_function(a) - Jacobian(RHS_function, u, a) 95 | R_a = Nonlinear_a - Nonlinear_u 96 | 97 | ###? Vertical interpolation of R_a at g32_4, g32, and g22; phi_1({g11, g21, g31} J(u) dt) R(a) dt 98 | u_nl_1, rhs_calls_2, _ = Leja_phi(u, dt, RHS_function, R_a*dt, [g32_4, g32, g22], c, Gamma, Leja_X, phi_1, tol) 99 | 100 | ###? b = u + a21 phi_1(g21 J(u) dt) f(u) dt + a22 phi_1(g22 J(u) dt) R_a dt 101 | b = u + (a21 * u_flux[:, 1]) + (a22 * u_nl_1[:, 2]) 102 | 103 | ###? Nonlinear remainder at b 104 | Nonlinear_b = RHS_function(b) - Jacobian(RHS_function, u, b) 105 | R_b = Nonlinear_b - Nonlinear_u 106 | 107 | ###? Vertical interpolation of (-2*R(a) + R(b)) at g33 and g33_4; phi_3({g33, g33_4} J(u) dt) (-2*R(a) + R(b)) dt 108 | u_nl_2, rhs_calls_3, _ = Leja_phi(u, dt, RHS_function, (-2*R_a + R_b)*dt, [g33, g33_4], c, Gamma, Leja_X, phi_3, tol) 109 | 110 | ###? 4th order solution; u_4 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32 J(u) dt) R(a) dt + b3 phi_3(g33 J(u) dt) (-2*R(a) + R(b)) dt 111 | u_epirk4 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 0]) + (b3 * u_nl_2[:, 1]) 112 | 113 | ###? 5th order solution; u_5 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32_4 J(u) dt) R(a) dt + b3 phi_3(g33_4 J(u) dt) (-2*R(a) + R(b)) dt 114 | u_epirk5 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 1]) + (b3 * u_nl_2[:, 0]) 115 | 116 | ###? Proxy of computational cost 117 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 13 118 | 119 | return u_epirk4, u_epirk5, num_rhs_calls -------------------------------------------------------------------------------- /Python/Constant/EPIRK5P1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(1, "../") 3 | 4 | from Jacobian import * 5 | from Phi_functions import * 6 | from real_Leja_phi import * 7 | from imag_Leja_phi import * 8 | 9 | ################################################################################################ 10 | 11 | def EPIRK5P1(u, dt, RHS_function, c, Gamma, Leja_X, tol, Real_Imag): 12 | """ 13 | Parameters 14 | ---------- 15 | u : numpy array 16 | State variable(s) 17 | dt : double 18 | Step size 19 | RHS_function : user-defined function 20 | RHS function 21 | c : double 22 | Shifting factor 23 | Gamma : double 24 | Scaling factor 25 | Leja_X : numpy array 26 | Array of Leja points 27 | tol : double 28 | Accuracy of the polynomial so formed 29 | Real_Imag : int 30 | 0 - Real, 1 - Imaginary 31 | 32 | Returns 33 | ------- 34 | u_epirk4 : numpy array 35 | Output state variable(s) after time dt (4th order) 36 | u_epirk5 : numpy array 37 | Output state variable(s) after time dt (5th order) 38 | num_rhs_calls : int 39 | # of RHS calls 40 | 41 | Reference: 42 | 43 | M. Tokman, J. Loffeld, and P. Tranquilli 44 | New Adaptive Exponential Propagation Iterative Methods of Runge-Kutta Type, SIAM J. Sci. Comput. 34 (5) (2012) A2650-A2669 45 | doi:10.1137/110849961 46 | 47 | """ 48 | 49 | ############## --------------------- ############## 50 | 51 | ###? Interpolate on either real Leja or imaginary Leja points 52 | if Real_Imag == 0: 53 | Leja_phi = real_Leja_phi 54 | elif Real_Imag == 1: 55 | Leja_phi = imag_Leja_phi 56 | else: 57 | print("Error!! Choose 0 for real or 1 for imaginary Leja points.") 58 | 59 | ############## --------------------- ############## 60 | 61 | ###! Parameters of EPIRK5P1 (5th order) 62 | a11 = 0.35129592695058193092 63 | a21 = 0.84405472011657126298 64 | a22 = 1.6905891609568963624 65 | 66 | b1 = 1.0 67 | b2 = 1.2727127317356892397 68 | b3 = 2.2714599265422622275 69 | 70 | g11 = 0.35129592695058193092 71 | g21 = 0.84405472011657126298 72 | g22 = 1.0 73 | g31 = 1.0 74 | g32 = 0.71111095364366870359 75 | g33 = 0.62378111953371494809 76 | 77 | ###! 4th order 78 | g32_4 = 0.5 79 | g33_4 = 1.0 80 | 81 | ############## --------------------- ############## 82 | 83 | ###? Vertical interpolation of RHS_function(u) at g11, g21, and g31; phi_1({g11, g21, g31} J(u) dt) f(u) dt 84 | u_flux, rhs_calls_1, convergence = Leja_phi(u, dt, RHS_function, RHS_function(u)*dt, [g11, g21, g31], c, Gamma, Leja_X, phi_1, tol) 85 | 86 | ###? If it does not converge, return (try with smaller dt) 87 | if convergence == 0: 88 | return u, 2.1*u, rhs_calls_1 89 | 90 | ###? Internal stage 1; a = u + a11 phi_1(g11 J(u) dt) f(u) dt 91 | a = u + (a11 * u_flux[:, 0]) 92 | 93 | ###? Nonlinear remainder at u and a 94 | Nonlinear_u = RHS_function(u) - Jacobian(RHS_function, u, u) 95 | Nonlinear_a = RHS_function(a) - Jacobian(RHS_function, u, a) 96 | R_a = Nonlinear_a - Nonlinear_u 97 | 98 | ###? Vertical interpolation of R_a at g32_4, g32, and g22; phi_1({g11, g21, g31} J(u) dt) R(a) dt 99 | u_nl_1, rhs_calls_2, _ = Leja_phi(u, dt, RHS_function, R_a*dt, [g32_4, g32, g22], c, Gamma, Leja_X, phi_1, tol) 100 | 101 | ###? b = u + a21 phi_1(g21 J(u) dt) f(u) dt + a22 phi_1(g22 J(u) dt) R_a dt 102 | b = u + (a21 * u_flux[:, 1]) + (a22 * u_nl_1[:, 2]) 103 | 104 | ###? Nonlinear remainder at b 105 | Nonlinear_b = RHS_function(b) - Jacobian(RHS_function, u, b) 106 | R_b = Nonlinear_b - Nonlinear_u 107 | 108 | ###? Vertical interpolation of (-2*R(a) + R(b)) at g33 and g33_4; phi_3({g33, g33_4} J(u) dt) (-2*R(a) + R(b)) dt 109 | u_nl_2, rhs_calls_3, _ = Leja_phi(u, dt, RHS_function, (-2*R_a + R_b)*dt, [g33, g33_4], c, Gamma, Leja_X, phi_3, tol) 110 | 111 | ###? 4th order solution; u_4 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32 J(u) dt) R(a) dt + b3 phi_3(g33 J(u) dt) (-2*R(a) + R(b)) dt 112 | u_epirk4 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 0]) + (b3 * u_nl_2[:, 1]) 113 | 114 | ###? 5th order solution; u_5 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32_4 J(u) dt) R(a) dt + b3 phi_3(g33_4 J(u) dt) (-2*R(a) + R(b)) dt 115 | u_epirk5 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 1]) + (b3 * u_nl_2[:, 0]) 116 | 117 | ###? Proxy of computational cost 118 | num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 13 119 | 120 | return u_epirk4, u_epirk5, num_rhs_calls -------------------------------------------------------------------------------- /CUDA/Test/Burgers_2D.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Problems.hpp" 4 | #include "../error_check.hpp" 5 | 6 | using namespace std; 7 | 8 | //? ====================================================================================== ?// 9 | 10 | #ifdef __CUDACC__ 11 | 12 | __global__ void Burgers_2D(int N, double dx, double dy, double velocity, double* input, double* output) 13 | { 14 | int ii = threadIdx.y + blockIdx.y * blockDim.y; 15 | int jj = threadIdx.x + blockIdx.x * blockDim.x; 16 | 17 | if ((ii >= N) || (jj >= N)) 18 | return; 19 | 20 | //? Diffusion 21 | output[N*ii + jj] = (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx) 22 | + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy) 23 | 24 | //? Advection (nonlinear) 25 | + velocity/dx 26 | * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)] * input[PBC(ii, jj - 1, N)]/2 27 | - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2 28 | + 6.0/6.0 * input[PBC(ii, jj + 1, N)] * input[PBC(ii, jj + 1, N)] 29 | - 1.0/6.0 * input[PBC(ii, jj + 2, N)] * input[PBC(ii, jj + 2, N)]) 30 | + velocity/dy 31 | * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)] * input[PBC(ii - 1, jj, N)]/2 32 | - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2 33 | + 6.0/6.0 * input[PBC(ii + 1, jj, N)] * input[PBC(ii + 1, jj, N)]/2 34 | - 1.0/6.0 * input[PBC(ii + 2, jj, N)] * input[PBC(ii + 2, jj, N)]/2); 35 | } 36 | 37 | #endif 38 | 39 | struct RHS_Burgers_2D:public Problems_2D 40 | { 41 | //? RHS = A_adv.u^2/2.0 + A_dif.u 42 | 43 | //! Constructor 44 | RHS_Burgers_2D(int _N, double _dx, double _dy, double _velocity) : Problems_2D(_N, _dx, _dy, _velocity) {} 45 | 46 | void operator()(double* input, double* output) 47 | { 48 | #ifdef __CUDACC__ 49 | 50 | int num_threads = 16; 51 | dim3 threads(num_threads, num_threads ); 52 | dim3 blocks((N + num_threads - 1)/num_threads, (N + num_threads - 1)/num_threads); 53 | 54 | Burgers_2D<<>>(N, dx, dy, velocity, input, output); 55 | 56 | #else 57 | 58 | int num_threads = 32; 59 | 60 | #pragma omp parallel for collapse(2) 61 | for (int blockIdxx = 0; blockIdxx < (N + num_threads - 1)/num_threads; blockIdxx++) 62 | { 63 | for (int blockIdxy = 0; blockIdxy < (N + num_threads - 1)/num_threads; blockIdxy++) 64 | { 65 | for (int threadIdxx = 0; threadIdxx < num_threads; threadIdxx++) 66 | { 67 | for (int threadIdxy = 0; threadIdxy < num_threads; threadIdxy++) 68 | { 69 | int ii = (blockIdxx * num_threads) + threadIdxx; 70 | int jj = (blockIdxy * num_threads) + threadIdxy; 71 | 72 | if ((ii < N) && (jj < N)) 73 | { 74 | //? Diffusion 75 | output[N*ii + jj] = (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx) 76 | + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy) 77 | 78 | //? Advection (nonlinear) 79 | + velocity/dx 80 | * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)] * input[PBC(ii, jj - 1, N)]/2 81 | - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2 82 | + 6.0/6.0 * input[PBC(ii, jj + 1, N)] * input[PBC(ii, jj + 1, N)]/2 83 | - 1.0/6.0 * input[PBC(ii, jj + 2, N)] * input[PBC(ii, jj + 2, N)]/2) 84 | + velocity/dy 85 | * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)] * input[PBC(ii - 1, jj, N)]/2 86 | - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2 87 | + 6.0/6.0 * input[PBC(ii + 1, jj, N)] * input[PBC(ii + 1, jj, N)]/2 88 | - 1.0/6.0 * input[PBC(ii + 2, jj, N)] * input[PBC(ii + 2, jj, N)]/2); 89 | } 90 | } 91 | } 92 | } 93 | } 94 | 95 | #endif 96 | } 97 | 98 | //! Destructor 99 | ~RHS_Burgers_2D() {} 100 | }; 101 | 102 | //? ====================================================================================== ?// -------------------------------------------------------------------------------- /Python/imag_Leja_phi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Divided_Difference import Divided_Difference 3 | 4 | def imag_Leja_phi(u, dt, RHS_function, interp_vector, integrator_coeffs, c, Gamma, Leja_X, phi_function, tol): 5 | """ 6 | To be used for computing "phi_function" applied to "interp_vector" (at imaginary Leja points) for 7 | exponential integrators that computes the Jacobian at every time step. 8 | 9 | 10 | Parameters 11 | ---------- 12 | u : numpy array 13 | State variable(s) 14 | dt : double 15 | Step size 16 | RHS_function : user-defined function 17 | RHS function 18 | interp_vector : numpy array 19 | Vector to be interpolated 20 | c : double 21 | Shifting factor 22 | Gamma : double 23 | Scaling factor 24 | Leja_X : numpy array 25 | Array of Leja points 26 | phi_function : function 27 | phi function 28 | tol : double 29 | Accuracy of the polynomial so formed 30 | 31 | Returns 32 | ---------- 33 | polynomial : numpy array(s) 34 | Polynomial interpolation of 'interp_vector' 35 | multiplied by 'phi_function' at real Leja points 36 | ii+1 : int 37 | # of RHS calls 38 | convergence : int 39 | 0 -> did not converge, 1 -> converged 40 | 41 | """ 42 | 43 | ### Initialize parameters and arrays 44 | epsilon = 1e-7 45 | convergence = 0 # 0 -> did not converge, 1 -> converged 46 | num_interpolations = len(integrator_coeffs) # Number of interpolations in vertical 47 | max_Leja_pts = len(Leja_X) # Max number of Leja points 48 | phi_function_array = np.zeros((len(Leja_X), num_interpolations), dtype = "complex") # Phi function applied to 'interp_vector' 49 | poly_coeffs = np.zeros((len(Leja_X), num_interpolations), dtype = "complex") # Polynomial coefficients 50 | polynomial = np.zeros((len(interp_vector), num_interpolations), dtype = "complex") # Polynomial array 51 | rhs_u = RHS_function(u) # RHS evaluated at 'u' 52 | y = interp_vector.copy() + 0*1j # To avoid changing 'interp_vector' 53 | 54 | ### Loop for vertical implementation 55 | for ij in range(0, num_interpolations): 56 | 57 | ### Phi function applied to 'interp_vector' (scaled and shifted) 58 | phi_function_array[:, ij] = phi_function(integrator_coeffs[ij] * dt * (c + Gamma*Leja_X) * 1j) 59 | 60 | ### Compute polynomial coefficients 61 | poly_coeffs[:, ij] = Divided_Difference(Leja_X, phi_function_array[:, ij]) 62 | 63 | ### p_0 term 64 | polynomial[:, ij] = interp_vector * poly_coeffs[0, ij] + 0*1j 65 | 66 | ### p_1, p_2, ...., p_n terms; iterate until converges 67 | for ii in range(1, max_Leja_pts): 68 | 69 | ### Compute numerical Jacobian 70 | Jacobian_function = (RHS_function(u + (epsilon * y)) - rhs_u)/epsilon 71 | 72 | ### y = y * ((z - c)/Gamma - Leja_X) 73 | y = (-1j * Jacobian_function/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1])) 74 | 75 | ### Error estimate 76 | poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii, np.argmax(integrator_coeffs)]) 77 | 78 | ### Keep adding terms to the polynomial 79 | for ij in range(0, num_interpolations): 80 | 81 | ### To prevent diverging, restart simulations with smaller dt 82 | if poly_error > 1e17: 83 | convergence = 0 84 | polynomial[:, ij] = u 85 | return np.real(polynomial), ii+1, convergence 86 | 87 | ### Add the new term to the polynomial 88 | polynomial[:, ij] = polynomial[:, ij] + (poly_coeffs[ii, ij] * y) 89 | 90 | ### If new term to be added < tol, break loop; safety factor = 0.25 91 | if poly_error < 0.25*tol*np.linalg.norm(polynomial): 92 | convergence = 1 93 | # print("Leja points used: ", ii) 94 | break 95 | 96 | ### Warning flags 97 | if ii == max_Leja_pts - 1: 98 | print("Warning!! Max. # of Leja points reached without convergence!! Try increasing the number of Leja points. Max available: 10000.") 99 | break 100 | 101 | return np.real(polynomial), ii+1, convergence 102 | -------------------------------------------------------------------------------- /Python/real_Leja_linear_exp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Divided_Difference import Divided_Difference 3 | 4 | def real_Leja_linear_exp(u, T_f, substeps, RHS_function, integrator_coeff, c, Gamma, Leja_X, tol): 5 | """ 6 | Computes the polynomial interpolation of matrix exponential applied to 'u' at real Leja points. 7 | 8 | 9 | Parameters 10 | ---------- 11 | u : numpy array 12 | State variable(s) 13 | T_f : double 14 | Step size 15 | RHS_function : user-defined function 16 | RHS function 17 | integrator_coeff : int 18 | Point where phi function is to be evaluated 19 | c : double 20 | Shifting factor 21 | Gamma : double 22 | Scaling factor 23 | Leja_X : numpy array 24 | Array of Leja points 25 | tol : double 26 | Accuracy of the polynomial so formed 27 | 28 | Returns 29 | ---------- 30 | polynomial : numpy array 31 | Polynomial interpolation of 'u' multiplied 32 | by the matrix exponential at real Leja points 33 | total_iters : int 34 | Total number of Leja points used 35 | 36 | """ 37 | 38 | ###? Initialize parameters and arrays 39 | y = u.copy() #* To avoid changing 'u' 40 | y_backup = u.copy() #* Backup y - To avoid changing 'u' 41 | 42 | max_Leja_pts = len(Leja_X) #* Max number of Leja points 43 | dt = T_f/substeps #* Initial substep size 44 | time_elapsed = 0 #* Counter for time elapsed 45 | subs = 1 #* Counter for number of substeps 46 | convergence = 0 #* Check for convergence 47 | total_iters = 0 #* Counter for Leja iterations 48 | 49 | ###! Time loop 50 | while time_elapsed < T_f: 51 | 52 | ###? Array to store error incurred (needs to be set to zeros for every substep) 53 | poly_error = np.zeros(max_Leja_pts); 54 | 55 | ###* Adjust final time substep 56 | if abs(T_f - time_elapsed) < 1e-12: 57 | break 58 | elif time_elapsed + dt > T_f: 59 | dt = T_f - time_elapsed 60 | 61 | ###? Compute polynomial coefficients 62 | poly_coeffs = Divided_Difference(Leja_X, np.exp(integrator_coeff * dt * (c + Gamma*Leja_X))) 63 | 64 | ###? Set y = polynomial; save y_backup (same dt) 65 | if convergence == 1: 66 | 67 | y = polynomial; 68 | y_backup = polynomial; 69 | 70 | ###? Set 'y' to previous value (reduce dt) 71 | elif convergence == 0: 72 | 73 | y = y_backup; 74 | 75 | ###? Form the first term of the s^{th} polynomial: p_0 = d_0 * y_0 76 | polynomial = poly_coeffs[0] * y 77 | 78 | ###? p_1, p_2, ...., p_n terms; iterate until converges 79 | for ii in range(1, max_Leja_pts): 80 | 81 | ###? y = y * ((z - c)/Gamma - Leja_X) 82 | y = (RHS_function(y)/(T_f*Gamma)) + (y * (-c/Gamma - Leja_X[ii - 1])) 83 | 84 | ###? Keep adding terms to the polynomial 85 | polynomial = polynomial + (poly_coeffs[ii] * y) 86 | 87 | ###? Error estimate; poly_error = |coeffs[nn]| ||y|| 88 | poly_error[ii] = np.linalg.norm(y) * abs(poly_coeffs[ii]) 89 | 90 | ###! Warning: Check for diverging values, if so, restart iteration with smaller dt 91 | if ii == max_Leja_pts - 1 or poly_error[ii] > 1e3: 92 | 93 | print("Step size: ", dt) 94 | print("Computations wasted: ", ii) 95 | 96 | ###* Update parameters 97 | dt = 0.5 * dt 98 | subs = np.ceil(T_f/dt) 99 | convergence = 0 100 | total_iters = total_iters + ii 101 | 102 | break 103 | 104 | ###? If new term to be added < tol, break loop 105 | if poly_error[ii] < (tol*np.linalg.norm(polynomial) + tol): 106 | 107 | print() 108 | print("Converged! # of Leja points used (exp): ", ii) 109 | print() 110 | 111 | time_elapsed = time_elapsed + dt 112 | total_iters = total_iters + ii 113 | subs = max(substeps, subs) 114 | convergence = 1 115 | 116 | dt = 1.1*dt 117 | 118 | break 119 | 120 | return polynomial, total_iters, subs -------------------------------------------------------------------------------- /CUDA/Integrators/EPIRK4s3B.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "../Leja.hpp" 4 | #include "../Phi_functions.hpp" 5 | 6 | namespace LeXInt 7 | { 8 | //? Phi functions interpolated on real Leja points 9 | template 10 | void EPIRK4s3B(rhs& RHS, //? RHS function 11 | double* u, //? Input state variable(s) 12 | double* u_epirk4, //? Output state variable(s) 13 | double* auxiliary_expint, //? Internal auxiliary variables (EPIRK4s3B) 14 | double* auxiliary_Leja, //? Internal auxiliary variables (Leja) 15 | size_t N, //? Number of grid points 16 | vector& Leja_X, //? Array of Leja points 17 | double c, //? Shifting factor 18 | double Gamma, //? Scaling factor 19 | double rtol, //? Relative tolerance (normalised desired accuracy) 20 | double atol, //? Absolute tolerance 21 | double dt, //? Step size 22 | int& iters, //? # of iterations needed to converge (iteration variable) 23 | bool GPU, //? false (0) --> CPU; true (1) --> GPU 24 | GPU_handle& cublas_handle //? CuBLAS handle 25 | ) 26 | { 27 | //* ------------------------------------------------------------------------- 28 | 29 | //! u, u_epirk4, auxiliary_expint, and auxiliary_Leja 30 | //! are device vectors if GPU support is activated. 31 | 32 | //* Returns 33 | //* ---------- 34 | //* 35 | //* u_epirk4 : double* 36 | //* 4th order solution after time dt 37 | //* 38 | //* 39 | //* Reference: 40 | //* G. Rainwater, M. Tokman, A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309. 41 | //* doi:10.1016/j.jcp.2016.07.026 42 | 43 | //* ------------------------------------------------------------------------- 44 | 45 | //? Counters for Leja iterations 46 | int iters_1 = 0, iters_2 = 0, iters_3 = 0, iters_4 = 0; 47 | 48 | //? Assign names and variables 49 | double* u_flux = &auxiliary_expint[0]; double* f_u = &auxiliary_expint[3*N]; 50 | double* a = &u_flux[0]; double* b = &u_flux[N]; double* NL_u = &u_epirk4[0]; 51 | double* NL_a = &auxiliary_expint[3*N]; double* NL_b = &auxiliary_expint[4*N]; 52 | double* R_a = &auxiliary_expint[3*N]; double* R_b = &auxiliary_expint[4*N]; 53 | double* R_3 = &u_flux[0]; double* R_4 = &u_flux[N]; 54 | double* u_nl_3 = &auxiliary_expint[3*N]; double* u_nl_4 = &auxiliary_expint[4*N]; 55 | 56 | //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt 57 | RHS(u, f_u); 58 | axpby(dt, f_u, f_u, N, GPU); 59 | 60 | //? Vertical interpolation of RHS(u) at 1/2 and 3/4; u_flux[0, 1] = phi_2({1/2, 3/4} J(u) dt) f(u) dt 61 | real_Leja_phi(RHS, u, f_u, u_flux, auxiliary_Leja, N, {1./2., 3./4.}, 62 | phi_2, Leja_X, c, Gamma, rtol, atol, dt, iters_1, GPU, cublas_handle); 63 | 64 | //? Interpolation of RHS(u) at 1; u_flux[2] = phi_1(J(u) dt) f(u) dt 65 | real_Leja_phi(RHS, u, f_u, &u_flux[2*N], auxiliary_Leja, N, {1.0}, 66 | phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters_2, GPU, cublas_handle); 67 | 68 | //? Internal stage 1; a = u + 2/3 phi_2(1/2 J(u) dt) f(u) dt 69 | axpby(1.0, u, 2./3., &u_flux[0], a, N, GPU); 70 | 71 | //? Internal stage 2; b = u + phi_2(3/4 J(u) dt) f(u) dt 72 | axpby(1.0, u, 1.0, &u_flux[N], b, N, GPU); 73 | 74 | //? R_a = (NL_a - NL_u) * dt; R_b = (NL_b - NL_u) * dt 75 | Nonlinear_remainder(RHS, u, u, NL_u, auxiliary_Leja, N, GPU, cublas_handle); 76 | Nonlinear_remainder(RHS, u, a, NL_a, auxiliary_Leja, N, GPU, cublas_handle); 77 | Nonlinear_remainder(RHS, u, b, NL_b, auxiliary_Leja, N, GPU, cublas_handle); 78 | axpby(dt, NL_a, -dt, NL_u, R_a, N, GPU); 79 | axpby(dt, NL_b, -dt, NL_u, R_b, N, GPU); 80 | 81 | //? R_3 = (54R(a) - 16R(b)) dt 82 | axpby(54.0, R_a, -16.0, R_b, R_3, N, GPU); 83 | 84 | //? R_4 = (-324R(a) + 144R(b)) dt 85 | axpby(-324.0, R_a, 144.0, R_b, R_4, N, GPU); 86 | 87 | //? u_nl_3 = phi_3(J(u) dt) (54R(a) - 16R(b)) dt 88 | real_Leja_phi(RHS, u, R_3, u_nl_3, auxiliary_Leja, N, {1.0}, 89 | phi_3, Leja_X, c, Gamma, rtol, atol, dt, iters_3, GPU, cublas_handle); 90 | 91 | //? u_nl_4 = phi_4(J(u) dt) (-324R(a) + 144R(b)) dt 92 | real_Leja_phi(RHS, u, R_4, u_nl_4, auxiliary_Leja, N, {1.0}, 93 | phi_4, Leja_X, c, Gamma, rtol, atol, dt, iters_4, GPU, cublas_handle); 94 | 95 | //! 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (54R(a) - 16R(b)) dt + phi_4(J(u) dt) (-324R(a) + 144R(b)) dt 96 | axpby(1.0, u, 1.0, &u_flux[2*N], 1.0, u_nl_3, 1.0, u_nl_4, u_epirk4, N, GPU); 97 | 98 | //? Total number of Leja iterations 99 | iters = iters_1 + iters_2 + iters_3 + iters_4; 100 | } 101 | } -------------------------------------------------------------------------------- /Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_2.00/Final_data.txt: -------------------------------------------------------------------------------- 1 | 1.0382541662553444 1.037384527544248 1.03715264161397 1.037541471809854 1.038534721026023 1.040116995787637 1.0422738945728103 1.0449920221496682 1.0482589422992037 1.052063074583342 1.0563935491485978 1.061240026561423 1.0665924967247629 1.0724410642583175 1.0787757330925314 1.0855861961457358 1.0928616397420206 1.1005905686814397 1.1087606576532312 1.1173586307904895 1.1263701751838846 1.1357798868846514 1.1455712496597168 1.1557266477956982 1.166227407622735 1.1770538675854545 1.188185472843623 1.199600889385598 1.2112781361833358 1.223194728499155 1.235327829447992 1.2476544072575206 1.2601513898060803 1.2727958201598562 1.2855650016671114 1.2984366384751578 1.3113889614994398 1.324400844520841 1.337451904928437 1.3505225908245961 1.363594253240574 1.376649202854625 1.3896707531105543 1.4026432497171584 1.41555208613157 1.4283837085472162 1.4411256095000375 1.4537663116363568 1.4662953446695146 1.4787032137732852 1.490981363779453 1.5031221373576311 1.5151187318194042 1.5269651507332553 1.5386561572192883 1.5501872237153542 1.5615544836017081 1.5727546824272132 1.5837851310351492 1.5946436586089072 1.6053285697680144 1.6158386017367945 1.626172884083105 1.636330902038385 1.6463124602485322 1.656117650199406 1.665746819352391 1.6752005435139692 1.6844795991342003 1.6935849409841572 1.7025176788758771 1.7112790582065032 1.71987044134136 1.7282932909500455 1.7365491548555125 1.7446396526726076 1.752566462953836 1.760331312626906 1.7679359663451184 1.7753822183245613 1.7826718835971438 1.789806791136884 1.7967887776215177 1.8036196814389693 1.810301337788668 1.8168355743418398 1.8232242067004563 1.82946903598149 1.8355718447047005 1.841534394478185 1.8473584240819427 1.8530456469147476 1.8585977493905776 1.8640163894803394 1.8693031955423833 1.8744597650039636 1.8794876637625806 1.8843884251549599 1.8891635494339223 1.8938145035164524 1.8983427202415983 1.9027495984688807 1.9070365027819214 1.9112047628327946 1.915255674487956 1.9191904988901034 1.9230104626258218 1.9267167583108038 1.9303105444046624 1.93379294533826 1.9371650519722894 1.9404279216500353 1.943582578962767 1.9466300147268552 1.9495711881227655 1.9524070255138817 1.9551384211911642 1.9577662383338477 1.9602913079752067 1.9627144310333637 1.9650363769612234 1.9672578852237617 1.9693796652858777 1.9714023967409424 1.9733267296669093 1.9751532850895772 1.9768826555015147 1.9785154044418138 1.98005206728568 1.9814931519379924 1.982839137781033 1.9840904776017962 1.98524759626945 1.986310892290945 1.9872807370517227 1.9881574757688916 1.9889414269517192 1.9896328837634236 1.9902321124984712 1.9907393547679513 1.9911548257520517 1.9914787158168958 1.9917111900664988 1.991852387976284 1.9919024249765058 1.9918613907723317 1.9917293511266305 1.991506346380492 1.9911923929139712 1.9907874825335954 1.9902915825069278 1.989704635827661 1.9890265612881435 1.9882572534286063 1.9873965827123783 1.9864443949724897 1.9854005126353373 1.9842647331746721 1.983036830742115 1.9817165547497702 1.9803036306654636 1.9787977600172006 1.9771986198465459 1.975505862993539 1.973719118519058 1.971837990752608 1.969862059826093 1.9677908817344645 1.9656239880714983 1.963360885638188 1.9610010575012422 1.9585439615352316 1.955989031583479 1.9533356767334165 1.9505832815631021 1.947731206157098 1.9447787857168055 1.941725331506396 1.9385701293937307 1.935312441360356 1.9319515047608202 1.9284865328817467 1.9249167142895445 1.921241213917281 1.9174591731215709 1.9135697094172373 1.9095719177610484 1.9054648701343329 1.9012476172024988 1.8969191882257939 1.89247859315663 1.8879248228969403 1.8832568516633625 1.8784736390756922 1.8735741328218032 1.868557271645375 1.8634219897126219 1.8581672217821583 1.852791909377699 1.8472950085782598 1.8416754988705104 1.8359323960872322 1.830064764321562 1.824071734106491 1.8179525218570902 1.8117064536458636 1.8053329931125783 1.7988317744472972 1.7922026406882146 1.7854456880496234 1.778561315973352 1.771550285423891 1.7644137828694393 1.757153492406529 1.7497716757106818 1.7422712574044503 1.7346559178362513 1.7269301925992897 1.7190995722543754 1.7111706084496088 1.703151016525113 1.6950497759836656 1.6868772229690505 1.6786451322740064 1.6703667818261974 1.6620569964342617 1.6537321630680106 1.6454102123639385 1.637110561347647 1.6288540082760186 1.6206625763477809 1.6125593003066796 1.6045679510232385 1.5967126972984553 1.5890177014242166 1.5815066510217508 1.5742022311407262 1.5671255419092003 1.5602954709973595 1.553728034627974 1.5474357010707007 1.5414267163290076 1.5357044522581702 1.5302668004995958 1.5251056384318655 1.5202063899057372 1.5155477074600403 1.5111013010864889 1.5068319288416896 1.5026975741857749 1.4986498144547085 1.4946343903575074 1.4905919759230886 1.4864591363565898 1.4821694621885115 1.477654853032192 1.472846921938394 1.4676784830100316 1.462085081274502 1.4560065206467445 1.4493883450071863 1.442183224537922 1.434352208917451 1.4258658039216543 1.4167048426927817 1.4068611227222128 1.396337791197699 1.3851494701492448 1.373322117423665 1.3608926330996856 1.3479082243035363 1.3344255505571212 1.3205096774128722 1.3062328700830257 1.291673262926 1.2769134421070716 1.2620389808775414 1.2471369637550713 1.2322945389255755 1.2175975310922336 1.2031291462176716 1.1889687958268977 1.1751910631151188 1.1618648277005694 1.1490525630355344 1.136809811673925 1.1251848423679132 1.1142184848109382 1.1039441342046252 1.0943879143626771 1.0855689837294882 1.077499963776257 1.070187472030145 1.063632733347547 1.0578322487986065 1.0527784979601293 1.048460650485077 1.0448652678878465 1.0419769752737182 1.039779086110058 --------------------------------------------------------------------------------