├── .gitignore
├── Python
    ├── __pycache__
    │   ├── Jacobian.cpython-310.pyc
    │   ├── linear_phi.cpython-310.pyc
    │   ├── Eigenvalues.cpython-310.pyc
    │   ├── Phi_functions.cpython-310.pyc
    │   ├── imag_Leja_phi.cpython-310.pyc
    │   ├── real_Leja_phi.cpython-310.pyc
    │   ├── Divided_Difference.cpython-310.pyc
    │   └── real_Leja_linear_exp.cpython-310.pyc
    ├── Constant
    │   ├── __pycache__
    │   │   ├── EPI3.cpython-310.pyc
    │   │   ├── EPI4.cpython-310.pyc
    │   │   ├── EPIRK4s3.cpython-310.pyc
    │   │   ├── EPIRK4s3A.cpython-310.pyc
    │   │   ├── EPIRK4s3B.cpython-310.pyc
    │   │   ├── EPIRK5P1.cpython-310.pyc
    │   │   ├── EXPRB32.cpython-310.pyc
    │   │   ├── EXPRB42.cpython-310.pyc
    │   │   ├── EXPRB43.cpython-310.pyc
    │   │   ├── EXPRB53s3.cpython-310.pyc
    │   │   ├── EXPRB54s4.cpython-310.pyc
    │   │   ├── Cons_ExpInt.cpython-310.pyc
    │   │   └── Rosenbrock_Euler.cpython-310.pyc
    │   ├── Cons_ExpInt.py
    │   ├── Rosenbrock_Euler.py
    │   ├── EPI3.py
    │   ├── EXPRB42.py
    │   ├── EXPRB32.py
    │   ├── EPI4.py
    │   ├── EPIRK4s3B.py
    │   ├── EPIRK4s3A.py
    │   ├── EXPRB43.py
    │   ├── EPIRK4s3.py
    │   ├── EXPRB53s3.py
    │   └── EPIRK5P1.py
    ├── Variable
    │   ├── __pycache__
    │   │   ├── EPIRK4s3.cpython-310.pyc
    │   │   ├── EPIRK4s3A.cpython-310.pyc
    │   │   ├── EPIRK5P1.cpython-310.pyc
    │   │   ├── EXPRB32.cpython-310.pyc
    │   │   ├── EXPRB43.cpython-310.pyc
    │   │   ├── EXPRB53s3.cpython-310.pyc
    │   │   ├── EXPRB54s4.cpython-310.pyc
    │   │   └── Var_ExpInt.cpython-310.pyc
    │   ├── Var_ExpInt.py
    │   ├── EXPRB32.py
    │   ├── EPIRK4s3A.py
    │   ├── EXPRB43.py
    │   ├── EPIRK4s3.py
    │   ├── EXPRB53s3.py
    │   └── EPIRK5P1.py
    ├── Test
    │   └── Test_data
    │   │   └── Constant
    │   │       ├── Burgers
    │   │           ├── T_final_0.001
    │   │           │   ├── N_400_eta_10
    │   │           │   │   └── EXPRB32
    │   │           │   │   │   ├── N_cfl_8.00
    │   │           │   │   │       └── Results.txt
    │   │           │   │   │   ├── N_cfl_0.10
    │   │           │   │   │       └── Results.txt
    │   │           │   │   │   ├── N_cfl_1.00
    │   │           │   │   │       └── Results.txt
    │   │           │   │   │   ├── N_cfl_10.00
    │   │           │   │   │       └── Results.txt
    │   │           │   │   │   ├── N_cfl_2.00
    │   │           │   │   │       └── Results.txt
    │   │           │   │   │   └── N_cfl_4.00
    │   │           │   │   │       └── Results.txt
    │   │           │   └── N_300_eta_10
    │   │           │   │   ├── EXPRB32
    │   │           │   │       ├── N_cfl_1.00
    │   │           │   │       │   ├── Results.txt
    │   │           │   │       │   └── Final_data.txt
    │   │           │   │       ├── N_cfl_10.00
    │   │           │   │       │   └── Results.txt
    │   │           │   │       ├── N_cfl_2.00
    │   │           │   │       │   └── Results.txt
    │   │           │   │       ├── N_cfl_4.00
    │   │           │   │       │   └── Results.txt
    │   │           │   │       └── N_cfl_8.00
    │   │           │   │       │   └── Results.txt
    │   │           │   │   ├── EXPRB43
    │   │           │   │       ├── N_cfl_1.00
    │   │           │   │       │   ├── Results.txt
    │   │           │   │       │   └── Final_data.txt
    │   │           │   │       ├── N_cfl_2.00
    │   │           │   │       │   ├── Results.txt
    │   │           │   │       │   └── Final_data.txt
    │   │           │   │       ├── N_cfl_4.00
    │   │           │   │       │   ├── Results.txt
    │   │           │   │       │   └── Final_data.txt
    │   │           │   │       ├── N_cfl_8.00
    │   │           │   │       │   └── Results.txt
    │   │           │   │       ├── N_cfl_0.10
    │   │           │   │       │   ├── Results.txt
    │   │           │   │       │   └── Final_data.txt
    │   │           │   │       └── N_cfl_10.00
    │   │           │   │       │   └── Results.txt
    │   │           │   │   └── EXPRB53s3
    │   │           │   │       └── N_cfl_0.10
    │   │           │   │           ├── Results.txt
    │   │           │   │           └── Final_data.txt
    │   │           └── T_final_0.005
    │   │           │   └── N_400_eta_10
    │   │           │       ├── EPI3
    │   │           │           ├── N_cfl_1000.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_20.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_30.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_4.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_40.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_60.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_8.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_10.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_50.00
    │   │           │           │   └── Results.txt
    │   │           │           └── N_cfl_300.00
    │   │           │           │   └── Results.txt
    │   │           │       ├── EPI4
    │   │           │           ├── N_cfl_1000.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_20.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_30.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_40.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_60.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_10.00
    │   │           │           │   └── Results.txt
    │   │           │           └── N_cfl_50.00
    │   │           │           │   └── Results.txt
    │   │           │       ├── EXPRB32
    │   │           │           ├── N_cfl_1.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_10.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_2.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_20.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_30.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_4.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_40.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_50.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_60.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_8.00
    │   │           │           │   └── Results.txt
    │   │           │           └── N_cfl_0.10
    │   │           │           │   └── Results.txt
    │   │           │       ├── EXPRB42
    │   │           │           ├── N_cfl_1.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_10.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_20.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_30.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_4.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_40.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_50.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_60.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_8.00
    │   │           │           │   └── Results.txt
    │   │           │           ├── N_cfl_0.05
    │   │           │           │   └── Results.txt
    │   │           │           └── N_cfl_0.10
    │   │           │           │   └── Results.txt
    │   │           │       └── EXPRB43
    │   │           │           ├── N_cfl_10.00
    │   │           │               └── Results.txt
    │   │           │           ├── N_cfl_20.00
    │   │           │               └── Results.txt
    │   │           │           ├── N_cfl_30.00
    │   │           │               └── Results.txt
    │   │           │           ├── N_cfl_4.00
    │   │           │               └── Results.txt
    │   │           │           ├── N_cfl_40.00
    │   │           │               └── Results.txt
    │   │           │           ├── N_cfl_50.00
    │   │           │               └── Results.txt
    │   │           │           ├── N_cfl_60.00
    │   │           │               └── Results.txt
    │   │           │           ├── N_cfl_8.00
    │   │           │               └── Results.txt
    │   │           │           └── N_cfl_0.10
    │   │           │               └── Results.txt
    │   │       └── Time_order_plots.py
    ├── Jacobian.py
    ├── Divided_Difference.py
    ├── Readme.md
    ├── Eigenvalues.py
    ├── linear_phi.py
    ├── imag_Leja_exp.py
    ├── real_Leja_exp.py
    ├── real_Leja_phi_nl.py
    ├── imag_Leja_phi_nl.py
    ├── Phi_functions.py
    ├── real_Leja_phi.py
    ├── imag_Leja_phi.py
    └── real_Leja_linear_exp.py
├── CUDA
    ├── error_check.hpp
    ├── Test
    │   ├── Problems.hpp
    │   ├── Dif_Adv_2D.hpp
    │   └── Burgers_2D.hpp
    ├── Integrators
    │   ├── Readme.md
    │   ├── Rosenbrock_Euler.hpp
    │   ├── EXPRB32.hpp
    │   ├── EXPRB42.hpp
    │   └── EPIRK4s3B.hpp
    ├── Divided_Differences.hpp
    ├── Readme.md
    ├── Timer.hpp
    ├── Eigenvalues.hpp
    ├── Phi_functions.hpp
    ├── Jacobian_vector.hpp
    ├── Kernels.hpp
    ├── functions.hpp
    ├── real_Leja_exp.hpp
    └── real_Leja_phi_nl.hpp
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | CUDA/.vscode/
3 | CUDA/Test/build/
4 | 
5 | Python/Constant/__pycache__/
6 | Python/Test/Test_data/
7 | 


--------------------------------------------------------------------------------
/Python/__pycache__/Jacobian.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Jacobian.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/__pycache__/linear_phi.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/linear_phi.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EPI3.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPI3.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EPI4.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPI4.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/__pycache__/Eigenvalues.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Eigenvalues.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/__pycache__/Phi_functions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Phi_functions.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/__pycache__/imag_Leja_phi.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/imag_Leja_phi.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/__pycache__/real_Leja_phi.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/real_Leja_phi.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EPIRK4s3.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK4s3.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EPIRK4s3A.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK4s3A.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EPIRK4s3B.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK4s3B.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EPIRK5P1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EPIRK5P1.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EXPRB32.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB32.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EXPRB42.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB42.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EXPRB43.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB43.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EXPRB53s3.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB53s3.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/EXPRB54s4.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/EXPRB54s4.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/EPIRK4s3.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EPIRK4s3.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/EPIRK4s3A.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EPIRK4s3A.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/EPIRK5P1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EPIRK5P1.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/EXPRB32.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB32.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/EXPRB43.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB43.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/EXPRB53s3.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB53s3.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/EXPRB54s4.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/EXPRB54s4.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/__pycache__/Divided_Difference.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/Divided_Difference.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/Cons_ExpInt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/Cons_ExpInt.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Variable/__pycache__/Var_ExpInt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Variable/__pycache__/Var_ExpInt.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/__pycache__/real_Leja_linear_exp.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/__pycache__/real_Leja_linear_exp.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Constant/__pycache__/Rosenbrock_Euler.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranab-JD/LeXInt/HEAD/Python/Constant/__pycache__/Rosenbrock_Euler.cpython-310.pyc


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_8.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.387047
 2 | 
 3 | Number of matrix-vector products = 1967
 4 | 
 5 | Step size
 6 | 2.5e-05
 7 | 
 8 | Time steps
 9 | 41
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_1000.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.189571
 2 | 
 3 | Number of matrix-vector products = 1578
 4 | 
 5 | Step size
 6 | 0.003125
 7 | 
 8 | Time steps
 9 | 2
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_20.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.661831
 2 | 
 3 | Number of matrix-vector products = 2914
 4 | 
 5 | Step size
 6 | 6.25e-05
 7 | 
 8 | Time steps
 9 | 80
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_30.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.479964
 2 | 
 3 | Number of matrix-vector products = 2348
 4 | 
 5 | Step size
 6 | 9.375e-05
 7 | 
 8 | Time steps
 9 | 54
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_4.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:02.943203
 2 | 
 3 | Number of matrix-vector products = 9220
 4 | 
 5 | Step size
 6 | 1.25e-05
 7 | 
 8 | Time steps
 9 | 400
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_40.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.399840
 2 | 
 3 | Number of matrix-vector products = 2290
 4 | 
 5 | Step size
 6 | 0.000125
 7 | 
 8 | Time steps
 9 | 40
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_60.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.292836
 2 | 
 3 | Number of matrix-vector products = 1914
 4 | 
 5 | Step size
 6 | 0.0001875
 7 | 
 8 | Time steps
 9 | 27
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_8.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.595718
 2 | 
 3 | Number of matrix-vector products = 6031
 4 | 
 5 | Step size
 6 | 2.5e-05
 7 | 
 8 | Time steps
 9 | 201
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_1000.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.232522
 2 | 
 3 | Number of matrix-vector products = 1756
 4 | 
 5 | Step size
 6 | 0.003125
 7 | 
 8 | Time steps
 9 | 2
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_20.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.690699
 2 | 
 3 | Number of matrix-vector products = 3412
 4 | 
 5 | Step size
 6 | 6.25e-05
 7 | 
 8 | Time steps
 9 | 80
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_30.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.493130
 2 | 
 3 | Number of matrix-vector products = 2636
 4 | 
 5 | Step size
 6 | 9.375e-05
 7 | 
 8 | Time steps
 9 | 54
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_40.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.388064
 2 | 
 3 | Number of matrix-vector products = 2188
 4 | 
 5 | Step size
 6 | 0.000125
 7 | 
 8 | Time steps
 9 | 40
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_60.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.282080
 2 | 
 3 | Number of matrix-vector products = 1781
 4 | 
 5 | Step size
 6 | 0.0001875
 7 | 
 8 | Time steps
 9 | 27
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_0.10/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:21.486156
 2 | 
 3 | Number of matrix-vector products = 57607
 4 | 
 5 | Step size
 6 | 3.125e-07
 7 | 
 8 | Time steps
 9 | 3201
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_1.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:02.251305
 2 | 
 3 | Number of matrix-vector products = 8753
 4 | 
 5 | Step size
 6 | 3.125e-06
 7 | 
 8 | Time steps
 9 | 321
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.294953
 2 | 
 3 | Number of matrix-vector products = 1664
 4 | 
 5 | Step size
 6 | 3.125e-05
 7 | 
 8 | Time steps
 9 | 32
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_2.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.216480
 2 | 
 3 | Number of matrix-vector products = 4967
 4 | 
 5 | Step size
 6 | 6.25e-06
 7 | 
 8 | Time steps
 9 | 161
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_400_eta_10/EXPRB32/N_cfl_4.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.640484
 2 | 
 3 | Number of matrix-vector products = 3054
 4 | 
 5 | Step size
 6 | 1.25e-05
 7 | 
 8 | Time steps
 9 | 80
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.315484
 2 | 
 3 | Number of matrix-vector products = 5307
 4 | 
 5 | Step size
 6 | 3.125e-05
 7 | 
 8 | Time steps
 9 | 160
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_50.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.321794
 2 | 
 3 | Number of matrix-vector products = 1849
 4 | 
 5 | Step size
 6 | 0.00015625
 7 | 
 8 | Time steps
 9 | 32
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.252215
 2 | 
 3 | Number of matrix-vector products = 5158
 4 | 
 5 | Step size
 6 | 3.125e-05
 7 | 
 8 | Time steps
 9 | 160
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI4/N_cfl_50.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.335634
 2 | 
 3 | Number of matrix-vector products = 1934
 4 | 
 5 | Step size
 6 | 0.00015625
 7 | 
 8 | Time steps
 9 | 32
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_1.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:21.945567
 2 | 
 3 | Number of matrix-vector products = 48008
 4 | 
 5 | Step size
 6 | 3.125e-06
 7 | 
 8 | Time steps
 9 | 1601
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:02.522143
 2 | 
 3 | Number of matrix-vector products = 9600
 4 | 
 5 | Step size
 6 | 3.125e-05
 7 | 
 8 | Time steps
 9 | 160
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_2.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:11.431219
 2 | 
 3 | Number of matrix-vector products = 28885
 4 | 
 5 | Step size
 6 | 6.25e-06
 7 | 
 8 | Time steps
 9 | 800
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_20.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.338655
 2 | 
 3 | Number of matrix-vector products = 6160
 4 | 
 5 | Step size
 6 | 6.25e-05
 7 | 
 8 | Time steps
 9 | 80
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_30.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.962021
 2 | 
 3 | Number of matrix-vector products = 4937
 4 | 
 5 | Step size
 6 | 9.375e-05
 7 | 
 8 | Time steps
 9 | 54
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_4.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:06.107865
 2 | 
 3 | Number of matrix-vector products = 17200
 4 | 
 5 | Step size
 6 | 1.25e-05
 7 | 
 8 | Time steps
 9 | 400
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_40.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.763579
 2 | 
 3 | Number of matrix-vector products = 4198
 4 | 
 5 | Step size
 6 | 0.000125
 7 | 
 8 | Time steps
 9 | 40
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_50.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.627861
 2 | 
 3 | Number of matrix-vector products = 3616
 4 | 
 5 | Step size
 6 | 0.00015625
 7 | 
 8 | Time steps
 9 | 32
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_60.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.546479
 2 | 
 3 | Number of matrix-vector products = 3233
 4 | 
 5 | Step size
 6 | 0.0001875
 7 | 
 8 | Time steps
 9 | 27
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_8.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:03.125937
 2 | 
 3 | Number of matrix-vector products = 10820
 4 | 
 5 | Step size
 6 | 2.5e-05
 7 | 
 8 | Time steps
 9 | 201
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_1.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:22.839835
 2 | 
 3 | Number of matrix-vector products = 54408
 4 | 
 5 | Step size
 6 | 3.125e-06
 7 | 
 8 | Time steps
 9 | 1601
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:02.384218
 2 | 
 3 | Number of matrix-vector products = 7849
 4 | 
 5 | Step size
 6 | 3.125e-05
 7 | 
 8 | Time steps
 9 | 160
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_20.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.367789
 2 | 
 3 | Number of matrix-vector products = 5840
 4 | 
 5 | Step size
 6 | 6.25e-05
 7 | 
 8 | Time steps
 9 | 80
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_30.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.960106
 2 | 
 3 | Number of matrix-vector products = 4667
 4 | 
 5 | Step size
 6 | 9.375e-05
 7 | 
 8 | Time steps
 9 | 54
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_4.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:06.008707
 2 | 
 3 | Number of matrix-vector products = 16799
 4 | 
 5 | Step size
 6 | 1.25e-05
 7 | 
 8 | Time steps
 9 | 400
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_40.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.728547
 2 | 
 3 | Number of matrix-vector products = 3881
 4 | 
 5 | Step size
 6 | 0.000125
 7 | 
 8 | Time steps
 9 | 40
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_50.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.613602
 2 | 
 3 | Number of matrix-vector products = 3456
 4 | 
 5 | Step size
 6 | 0.00015625
 7 | 
 8 | Time steps
 9 | 32
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_60.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.534762
 2 | 
 3 | Number of matrix-vector products = 3139
 4 | 
 5 | Step size
 6 | 0.0001875
 7 | 
 8 | Time steps
 9 | 27
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_8.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:03.096457
 2 | 
 3 | Number of matrix-vector products = 10608
 4 | 
 5 | Step size
 6 | 2.5e-05
 7 | 
 8 | Time steps
 9 | 201
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:03.706325
 2 | 
 3 | Number of matrix-vector products = 14400
 4 | 
 5 | Step size
 6 | 3.125e-05
 7 | 
 8 | Time steps
 9 | 160
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_20.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.982020
 2 | 
 3 | Number of matrix-vector products = 9121
 4 | 
 5 | Step size
 6 | 6.25e-05
 7 | 
 8 | Time steps
 9 | 80
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_30.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.459127
 2 | 
 3 | Number of matrix-vector products = 7616
 4 | 
 5 | Step size
 6 | 9.375e-05
 7 | 
 8 | Time steps
 9 | 54
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_4.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:08.574330
 2 | 
 3 | Number of matrix-vector products = 24043
 4 | 
 5 | Step size
 6 | 1.25e-05
 7 | 
 8 | Time steps
 9 | 400
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_40.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.135677
 2 | 
 3 | Number of matrix-vector products = 6120
 4 | 
 5 | Step size
 6 | 0.000125
 7 | 
 8 | Time steps
 9 | 40
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_50.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.913770
 2 | 
 3 | Number of matrix-vector products = 5440
 4 | 
 5 | Step size
 6 | 0.00015625
 7 | 
 8 | Time steps
 9 | 32
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_60.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.826547
 2 | 
 3 | Number of matrix-vector products = 5327
 4 | 
 5 | Step size
 6 | 0.0001875
 7 | 
 8 | Time steps
 9 | 27
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_8.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:04.623961
 2 | 
 3 | Number of matrix-vector products = 15024
 4 | 
 5 | Step size
 6 | 2.5e-05
 7 | 
 8 | Time steps
 9 | 201
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB32/N_cfl_0.10/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:01:48.492381
 2 | 
 3 | Number of matrix-vector products = 320002
 4 | 
 5 | Step size
 6 | 3.125e-07
 7 | 
 8 | Time steps
 9 | 16000
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_0.05/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:06:46.518278
 2 | 
 3 | Number of matrix-vector products = 512000
 4 | 
 5 | Step size
 6 | 1.5625e-07
 7 | 
 8 | Time steps
 9 | 32000
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB42/N_cfl_0.10/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:03:24.095980
 2 | 
 3 | Number of matrix-vector products = 305060
 4 | 
 5 | Step size
 6 | 3.125e-07
 7 | 
 8 | Time steps
 9 | 16000
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EXPRB43/N_cfl_0.10/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:05:10.953690
 2 | 
 3 | Number of matrix-vector products = 403723
 4 | 
 5 | Step size
 6 | 3.125e-07
 7 | 
 8 | Time steps
 9 | 16000
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_1.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.351668
 2 | 
 3 | Number of matrix-vector products = 5513
 4 | 
 5 | Step size
 6 | 5.555555555555557e-06
 7 | 
 8 | Time steps
 9 | 181
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.169843
 2 | 
 3 | Number of matrix-vector products = 936
 4 | 
 5 | Step size
 6 | 5.5555555555555565e-05
 7 | 
 8 | Time steps
 9 | 18
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_2.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.664692
 2 | 
 3 | Number of matrix-vector products = 2790
 4 | 
 5 | Step size
 6 | 1.1111111111111113e-05
 7 | 
 8 | Time steps
 9 | 90
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_4.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.407233
 2 | 
 3 | Number of matrix-vector products = 2234
 4 | 
 5 | Step size
 6 | 2.2222222222222227e-05
 7 | 
 8 | Time steps
 9 | 46
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_8.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.206144
 2 | 
 3 | Number of matrix-vector products = 1116
 4 | 
 5 | Step size
 6 | 4.444444444444445e-05
 7 | 
 8 | Time steps
 9 | 23
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_1.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.910957
 2 | 
 3 | Number of matrix-vector products = 7496
 4 | 
 5 | Step size
 6 | 5.555555555555557e-06
 7 | 
 8 | Time steps
 9 | 181
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_2.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:01.011472
 2 | 
 3 | Number of matrix-vector products = 4125
 4 | 
 5 | Step size
 6 | 1.1111111111111113e-05
 7 | 
 8 | Time steps
 9 | 90
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_4.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.569080
 2 | 
 3 | Number of matrix-vector products = 2957
 4 | 
 5 | Step size
 6 | 2.2222222222222227e-05
 7 | 
 8 | Time steps
 9 | 46
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_8.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.300638
 2 | 
 3 | Number of matrix-vector products = 1582
 4 | 
 5 | Step size
 6 | 4.444444444444445e-05
 7 | 
 8 | Time steps
 9 | 23
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.005/N_400_eta_10/EPI3/N_cfl_300.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.111079
 2 | 
 3 | Number of matrix-vector products = 915
 4 | 
 5 | Step size
 6 | 0.0009375000000000001
 7 | 
 8 | Time steps
 9 | 6
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_0.10/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:17.245968
 2 | 
 3 | Number of matrix-vector products = 47307
 4 | 
 5 | Step size
 6 | 5.555555555555557e-07
 7 | 
 8 | Time steps
 9 | 1801
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_10.00/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:00.242730
 2 | 
 3 | Number of matrix-vector products = 1332
 4 | 
 5 | Step size
 6 | 5.5555555555555565e-05
 7 | 
 8 | Time steps
 9 | 18
10 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB53s3/N_cfl_0.10/Results.txt:
--------------------------------------------------------------------------------
 1 | Time elapsed (secs): 0:00:24.081329
 2 | 
 3 | Number of matrix-vector products = 64007
 4 | 
 5 | Step size
 6 | 5.555555555555557e-07
 7 | 
 8 | Time steps
 9 | 1801
10 | 


--------------------------------------------------------------------------------
/Python/Jacobian.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###? Jacobian_vector = (RHS(u + epsilon*v) - RHS(u))/epsilon
 4 | def Jacobian(RHS, u, v, rhs_u, *args):
 5 |    
 6 |     ###* epsilon is normalised to norm(u)
 7 |     epsilon = 1e-7 * np.linalg.norm(u)
 8 |     
 9 |     ###* J(u) * v = (RHS(u + epsilon*v) - RHS(u))/epsilon
10 |     Jacobian_vector = (RHS(u + (epsilon * v), *args) - rhs_u)/epsilon
11 |     
12 |     return Jacobian_vector


--------------------------------------------------------------------------------
/Python/Divided_Difference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def Divided_Difference(X, diffs):
 4 |     """
 5 |     Parameters
 6 |     ----------
 7 |     X       : Leja points
 8 |     diffs   : Phi function array
 9 | 
10 |     Returns
11 |     -------
12 |     div_diff : Polynomial coefficients
13 | 
14 |     """
15 | 
16 |     N = len(X)
17 |     div_diff = diffs
18 |     
19 |     for ii in range(1, N):
20 |         div_diff[ii:N] = (div_diff[ii:N] - div_diff[ii - 1])/(X[ii:N] - X[ii - 1])
21 | 
22 |     return div_diff
23 | 


--------------------------------------------------------------------------------
/CUDA/error_check.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifdef __CUDACC__
 4 | 
 5 | using namespace std;
 6 | 
 7 | //* This piece of code has been taken from 
 8 | //* https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
 9 | 
10 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
11 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
12 | {
13 |    if (code != cudaSuccess) 
14 |    {
15 |       fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
16 |       if (abort) exit(code);
17 |    }  
18 | }
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/Python/Variable/Var_ExpInt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Fri Aug 13 15:31:46 2021
 3 | 
 4 | @author: Pranab JD
 5 | 
 6 | Description: -
 7 |         Contains several EXPRB and EPIRK methods.
 8 | 
 9 | """
10 | 
11 | import sys
12 | sys.path.insert(1, "../")
13 | 
14 | from EXPRB32 import EXPRB32                             #! 2nd and 3rd order
15 | from EXPRB43 import EXPRB43                             #! 3rd and 4th order
16 | from EXPRB53s3 import EXPRB53s3                         #! 3rd and 5th order
17 | from EXPRB54s4 import EXPRB54s4                         #! 4th and 5th order
18 | 
19 | from EPIRK4s3 import EPIRK4s3                           #! 3rd and 4th order
20 | from EPIRK4s3A import EPIRK4s3A                         #! 3rd and 4th order
21 | from EPIRK5P1 import EPIRK5P1                           #! 4th and 5th order


--------------------------------------------------------------------------------
/CUDA/Test/Problems.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | using namespace std;
 4 | 
 5 | //? ====================================================================================== ?//
 6 | 
 7 | struct Problems_2D
 8 | {
 9 |     int N;
10 |     double dx;
11 |     double dy;
12 |     double velocity;
13 | 
14 |     //! Constructor
15 |     Problems_2D(int _N, double _dx, double _dy, double _velocity) : N{_N}, dx{_dx}, dy{_dy}, velocity{_velocity} {}
16 | 
17 |     //! Destructor
18 |     ~Problems_2D() {}
19 | };
20 | 
21 | //? Periodic BC
22 | #ifdef __CUDACC__
23 |     __host__ __device__
24 | #endif
25 | int PBC(int ii, int jj, int N)
26 | {
27 |     if(ii < 0)
28 |         ii = ii + N;
29 |     if(ii >= N)
30 |         ii = ii - N;
31 |     if(jj < 0)
32 |         jj = jj + N;
33 |     if(jj >= N)
34 |         jj = jj - N;
35 |     return N*ii + jj;
36 | }
37 | 
38 | //? ====================================================================================== ?//


--------------------------------------------------------------------------------
/Python/Constant/Cons_ExpInt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Fri Aug 13 15:31:46 2021
 3 | 
 4 | @author: Pranab JD
 5 | 
 6 | Description: -
 7 |         Contains several EXPRB and EPIRK methods.
 8 | 
 9 | """
10 | 
11 | import sys
12 | sys.path.insert(1, "../")
13 | 
14 | from Rosenbrock_Euler import Rosenbrock_Euler           #! 2nd order
15 | from EXPRB32 import EXPRB32                             #! 3rd order
16 | from EXPRB43 import EXPRB43                             #! 4th order
17 | from EXPRB42 import EXPRB42                             #! 4th order
18 | from EXPRB53s3 import EXPRB53s3                         #! 5th order
19 | 
20 | from EPIRK4s3 import EPIRK4s3                           #! 4th order
21 | from EPIRK4s3A import EPIRK4s3A                         #! 4th order
22 | from EPIRK4s3B import EPIRK4s3B                         #! 4th order
23 | # from EPIRK5P1 import EPIRK5P1                           #! 5th order
24 | 
25 | from EPI3 import EPI3                                   #! 3rd order
26 | from EPI4 import EPI4                                   #! 4th order


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Pranab J. Deka
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB32/N_cfl_1.00/Final_data.txt:
--------------------------------------------------------------------------------
1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_0.10/Final_data.txt:
--------------------------------------------------------------------------------
1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_1.00/Final_data.txt:
--------------------------------------------------------------------------------
1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_4.00/Final_data.txt:
--------------------------------------------------------------------------------
1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB53s3/N_cfl_0.10/Final_data.txt:
--------------------------------------------------------------------------------
1 | nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan


--------------------------------------------------------------------------------
/CUDA/Integrators/Readme.md:
--------------------------------------------------------------------------------
 1 | #  [LeXInt::CUDA::Integrators](#)
 2 | 
 3 | Here, we have a collection of exponential integrators. Rosenbrock-Euler and EPIRK4s3B do not have an embedded error estimate, whilst the rest do. Exponential integrators call the ``real_Leja_phi`` function to approximate $\varphi_l(z)$ functions applied to the relevant vectors.
 4 | 
 5 | ## Invoking the exponential integrators
 6 | 
 7 | - Add ``#include "./LeXInt/CUDA/Leja.hpp"`` in the main file (main.cpp or main.cu).
 8 |     
 9 | - Create an object of the class as ``Leja(N, integrator_name)``, where 'N' is the total number of grid points and 'integrator_name' corresponds to the desired exponential integrator. E.g., ``Leja<RHS> leja_gpu{N, EXPRB32}``; where ``RHS``is RHS class that contains the RHS operator.
10 | 
11 | - Invoke the object of the class ``Leja`` as ``leja_gpu.embed_exp_int`` for embedded exponential integrators or ``leja_gpu.exp_int`` for non-embedded exponential integrators. For more info, see `Test -> test_2D.cu (lines 231 and 250)`.
12 | 
13 | ## Technical Aspects
14 | 
15 | * `c` and `Gamma` have to be determined prior to invoking an exponential integrator. See `Test -> test_2D.cu (lines 167 to 172)`.
16 |   
17 | * `iters` determines the number of Leja iterations per time step. This may be considered as a proxy of the computational cost.
18 | 


--------------------------------------------------------------------------------
/CUDA/Divided_Differences.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | namespace LeXInt
 6 | {
 7 |     vector<double> Divided_Differences(const vector<double>& X, vector<double> coeffs)
 8 |     {
 9 |         //* -------------------------------------------------------------------------
10 |         //* Compute the coefficients for polynomial interpolation.
11 |         //*
12 |         //* Parameters
13 |         //* -----------
14 |         //* X                     : vector <double>
15 |         //*                           Set of Leja points
16 |         //* 
17 |         //* coeffs                 : vector <double>
18 |         //*                           Vector of which coeffs are to be computed
19 |         //*
20 |         //* Returns
21 |         //* ----------
22 |         //* coeffs                : vector <double>
23 |         //*                           Coefficients
24 |         //* -------------------------------------------------------------------------
25 | 
26 |         //* Number of interpolation (Leja) points
27 |         int N = X.size();
28 | 
29 |         //* Compute the divided differences
30 |         for (int ii = 1; ii < N; ii++)
31 |         {
32 |             for (int jj = 0; jj < ii; jj++)
33 |             {
34 |                 coeffs[ii] = (coeffs[ii] - coeffs[jj])/(X[ii] - X[jj]);
35 |             }
36 |         }
37 |         
38 |         return coeffs;
39 |     }
40 | }


--------------------------------------------------------------------------------
/CUDA/Readme.md:
--------------------------------------------------------------------------------
 1 | # CUDA
 2 | ![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=for-the-badge&logo=c%2B%2B&logoColor=white)
 3 | ![nVIDIA](https://img.shields.io/badge/nVIDIA-%2376B900.svg?style=for-the-badge&logo=nVIDIA&logoColor=white)
 4 | ![VS Code](https://img.shields.io/badge/VSCode-0078D4?style=for-the-badge&logo=visual%20studio%20code&logoColor=white)
 5 | ![Github](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)
 6 | 
 7 | Test examples for C++ and CUDA implementations can be found in *Test &rarr; Test_2D.cpp* and *Test &rarr; Test_2D.cu*, respectively.  To run the codes, use `bash run_cpp.sh` or `bash run_cuda.sh`. Alternatively, you could also use *sbatch* instead of *bash* if you have *slurm* installed on your computer. Problems considered include the linear diffusion-advection equation and the nonlinear Burgers' equation. To add other problems, simply define the relevant RHS function (as defined in *Burgers_2D.hpp* or *Dif_Adv_2D.hpp*) and the initial condition(s) in the test files.
 8 | 
 9 | ## Requirements
10 | - gcc and nvcc compilers
11 | - NVIDIA GPU
12 | - CUDA 11.2 (or later)
13 | ## Remarks
14 | 1. Before running the test files, please select (comment or uncomment) the desired problem and integrator (lines 79 - 89) in *Test_2D.cpp* or *Test_2D.cu*. 
15 | 2. If you get the error *"Warning!! Max. number of Leja points reached without convergence!!"*, consider reducing the time step size (dt) or increasing the number of Leja points (line 130 in *Leja.hpp*).
16 | 3. For multidimensional problems, the (input/output) data containers are expected to lie contiguous in memory. 
17 | 4. If the user-specified RHS function consists of additional parameters, one could potentially construct a ***class*** and have these supplementary parameters localised to the ***class***:
18 | ```cpp
19 | struct RHS
20 | {
21 |     RHS(*args)
22 |     void operator(input, output)
23 |     {
24 |         rhs(input, output, *args)
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/Python/Readme.md:
--------------------------------------------------------------------------------
 1 | # Python
 2 | 
 3 | ![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)
 4 | ![VS Code](https://img.shields.io/badge/VSCode-0078D4?style=for-the-badge&logo=visual%20studio%20code&logoColor=white)
 5 | ![Github](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)
 6 | 
 7 | Test examples for constant and adaptive (or variable) step size implementation for the Leja-based exponential integrators can be found in *Test &rarr; Constant_test.py* or *Adaptive_test.py*. Problems considered include the Burgers' equation and the Allen-Cahn equation. To run scripts, use the following commands: `python3 Constant_test.py` or `python3 Adaptive_test.py`. To add other problems, simply define the relevant **RHS_function** and the desired initial condition(s).
 8 | 
 9 | ## Requirements
10 | - Python 3.10 (or later)
11 | 
12 | ## Remarks
13 | 1. It is expected that the rhs function is defined in the following way:
14 | 
15 | ```python
16 | def RHS_function(u):
17 | 
18 | 	### stencil_applied_to_u = *Apply stencil to 'u'* or *Compute matrix-vector products*
19 | 
20 | 	return stencil_applied_to_u
21 | ```
22 |    If different stencils are used for different physical phenomena (e.g. centered differences for diffusion and upwind for advection), the two stencils applied to 'u' vector are to be combined together.
23 | 
24 | 2. LeXInt can be used for multidimensional problems, once the state variable(s) is(are) vectorised or flattened.
25 | 
26 | 3. RHS function calls are expected to be the most expensive part of any computation. However, if the RHS function is relatively simple, or if the problem size is small, the computation of the polynomial coefficients using divided differences may become substantial. To avoid unnecessary computation of polynomial coefficients, we set the default number of Leja points (to be used) to 500 in the test problems. If you get the warning *"Warning!! Max. # of Leja points reached without convergence!!"*, consider increasing the number of Leja points to 1000, 2000, etc. or reducing the time step size (dt).
27 | 


--------------------------------------------------------------------------------
/CUDA/Timer.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <map>
 4 | #include <set>
 5 | 
 6 | #include <string.h>
 7 | 
 8 | #ifdef _OPENMP
 9 | #include <omp.h>
10 | #endif
11 | 
12 | #include <iostream>
13 | #include <iomanip>
14 | #include <sys/time.h>
15 | 
16 | namespace LeXInt
17 | {
18 |     /// This timer class measures the elapsed time between two events. Timers can be
19 |     /// started and stopped repeatedly. The total time as well as the average time
20 |     /// between two events can be queried using the total() and average() methods,
21 |     /// respectively.
22 |     struct timer {
23 |         timespec t_start;
24 |         bool running;
25 |         double elapsed = 0.0;
26 |         unsigned counter;
27 | 
28 |         timer() {
29 |             counter = 0;
30 |             running = false;
31 |         }
32 | 
33 |         void start() {
34 |             clock_gettime(CLOCK_REALTIME, &t_start);
35 |             running = true;
36 |         }
37 | 
38 |         void restart() {
39 |             elapsed = 0.0;
40 |             counter = 0;
41 |         }
42 | 
43 |         double stop() {
44 |             if(running == false) {
45 |                 ::std::cout << "WARNING: timer::stop() has been called without calling timer::start() first." << ::std::endl;
46 |                 return 0.0;
47 |             } else {
48 |                 timespec t_end;
49 |                 clock_gettime(CLOCK_REALTIME, &t_end);
50 |                 int sec  = t_end.tv_sec-t_start.tv_sec;
51 |                 double nsec = ((double)(t_end.tv_nsec-t_start.tv_nsec));
52 |                 if(nsec < 0.0) {
53 |                     nsec += 1e9;
54 |                     sec--;
55 |                 }
56 |                 double t = (double)sec + nsec/1e9;
57 |                 counter++;
58 |                 elapsed += t;
59 |                 return t;
60 |             }
61 |         }
62 | 
63 |         double total() {
64 |             return elapsed;
65 |         }
66 | 
67 |         double average() {
68 |             return elapsed/double(counter);
69 |         }
70 | 
71 |         unsigned count() {
72 |             return counter;
73 |         }
74 |     };
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/Python/Constant/Rosenbrock_Euler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def Rosenbrock_Euler(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_roseu                 : numpy array
33 |                                 Output state variable(s) after time dt (2nd order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference:
38 |     
39 |         D. A. Pope 
40 |         An exponential method of numerical integration of ordinary differential equations, Commun. ACM 6 (8) (1963) 491-493.
41 |         doi:10.1145/366707.367592
42 | 
43 |     """
44 |     
45 |     ###? RHS evaluated at 'u'
46 |     rhs_u = RHS_function(u)
47 |     
48 |     ###? Array of zeros vectors
49 |     zero_vec = np.zeros(np.shape(u))
50 |     
51 |     ###? dt * J(u).z
52 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
53 |     
54 |     ###? Interpolation of RHS(u) at 1
55 |     u_flux, rhs_calls, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
56 | 
57 |     ###? 2nd order solution; u_roseu = u + phi_1(J(u) dt) f(u) dt
58 |     u_roseu = u + u_flux
59 | 
60 |     ###? Proxy of computational cost
61 |     num_rhs_calls = rhs_calls + 2
62 | 
63 |     return u_roseu, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/Eigenvalues.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Thu Aug 8 20:22 2022
 3 | 
 4 | @author: Pranab JD
 5 | 
 6 | Description: -
 7 |         Functions to determine the largest eigenvalue of a 
 8 |         matrix/related matrix.
 9 |         
10 |         Gershgorin's disks can be used only if the matrix is 
11 |         explicitly available. For matrix-free implementation, 
12 |         choose power iterations.
13 | """
14 | 
15 | import sys
16 | import numpy as np
17 | 
18 | sys.path.insert(1, "./LeXInt/Python/")
19 | from Jacobian import Jacobian
20 | 
21 | def Gershgorin(A):
22 |     """
23 |     Parameters
24 |     ----------
25 |     A        : N x N matrix
26 | 
27 |     Returns
28 |     -------
29 |     eig_real : Largest real eigenvalue (negative magnitude)
30 |     eig_imag : Largest imaginary eigenvalue
31 | 
32 |     """
33 | 
34 |     ###? Divide matrix 'A' into Hermitian and skew-Hermitian
35 |     A_Herm = (A + A.T.conj())/2
36 |     A_SkewHerm = (A - A.T.conj())/2
37 | 
38 |     eig_real = - np.max(np.sum(abs(A_Herm), 1))       # Has to be NEGATIVE
39 |     eig_imag = np.max(np.sum(abs(A_SkewHerm), 1))
40 | 
41 |     return eig_real, eig_imag
42 | 
43 | def Power_iteration(u, RHS_function):
44 |     """
45 |     Parameters
46 |     ----------
47 |     u                       : Input state variable(s)
48 |     RHS_function	        : RHS function
49 | 
50 |     Returns
51 |     -------
52 |     largest_eigen_value     : Largest eigenvalue (within 2% accuracy)
53 |     3*ii                    : Number of RHS calls
54 | 
55 |     """
56 | 
57 |     tol = 0.02                                  #? 2% tolerance
58 |     niters = 1000                               #? Max. number of iterations                    
59 |     eigenvalue_ii_1 = 0                         #? Eigenvalue at ii-1
60 |     vector = np.ones(np.shape(u))               #? Initial estimate of eigenvector
61 |     rhs_u = RHS_function(u)                     #? RHS evaluated at 'u'
62 | 
63 |     for ii in range(niters):
64 | 
65 |         ###? Compute new eigenvector
66 |         eigenvector = Jacobian(RHS_function, u, vector, rhs_u)
67 | 
68 |         ###? Norm of eigenvector = eigenvalue
69 |         eigenvalue = np.linalg.norm(eigenvector)
70 |         
71 |         ###? Normalize eigenvector to eigenvalue; new estimate of eigenvector
72 |         vector = eigenvector/eigenvalue
73 | 
74 |         ###? Check convergence for eigenvalues (eigenvalues converge faster than eigenvectors)
75 |         if (abs(eigenvalue - eigenvalue_ii_1) <= (tol*eigenvalue + tol)):
76 |             largest_eigen_value = eigenvalue
77 |             break
78 |         
79 |         ###? This value becomes the previous one
80 |         eigenvalue_ii_1 = eigenvalue
81 | 
82 |     return largest_eigen_value, ii+1
83 | 


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Time_order_plots.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Thu Nov 2 11:55 2023
 3 | 
 4 | @author: PJD
 5 | """
 6 | 
 7 | import numpy as np
 8 | import matplotlib.pyplot as plt
 9 | 
10 | ### ======================================================= ###
11 | 
12 | ### Reference Solution
13 | path = "./Burgers/T_final_0.005/N_400_eta_10/"
14 | file_ref = path + "/EXPRB42/N_cfl_0.10/Final_data.txt"
15 | data_set_ref = np.loadtxt(file_ref)
16 | N = len(data_set_ref)
17 | 
18 | def l1_error(data_set):
19 |     return np.mean(abs(data_set - data_set_ref))
20 | 
21 | def compute_error(solver):
22 | 
23 | 	file_1 = path + solver + "/N_cfl_10.00/Final_data.txt"
24 | 	file_2 = path + solver + "/N_cfl_20.00/Final_data.txt"
25 | 	file_3 = path + solver + "/N_cfl_30.00/Final_data.txt"
26 | 	file_4 = path + solver + "/N_cfl_40.00/Final_data.txt"
27 | 	file_5 = path + solver + "/N_cfl_50.00/Final_data.txt"
28 | 	file_6 = path + solver + "/N_cfl_60.00/Final_data.txt"
29 | 
30 | 	data_set_1 = np.loadtxt(file_1)
31 | 	data_set_2 = np.loadtxt(file_2)
32 | 	data_set_3 = np.loadtxt(file_3)
33 | 	data_set_4 = np.loadtxt(file_4)
34 | 	data_set_5 = np.loadtxt(file_5)
35 | 	data_set_6 = np.loadtxt(file_6)
36 | 
37 | 	error_A = l1_error(data_set_1)
38 | 	error_B = l1_error(data_set_2)
39 | 	error_C = l1_error(data_set_3)
40 | 	error_D = l1_error(data_set_4)
41 | 	error_E = l1_error(data_set_5)
42 | 	error_F = l1_error(data_set_6)
43 | 
44 | 	error_array = np.array([error_A, error_B, error_C, error_D, error_E, error_F])
45 | 
46 | 	return error_array
47 | 
48 | ### ======================================================= ###
49 | 
50 | ### Given Data Sets
51 | solver_1 = "EXPRB32"
52 | solver_2 = "EXPRB42"
53 | solver_3 = "EPI3"
54 | solver_4 = "EPI4"
55 | 
56 | error_1 = compute_error(solver_1)
57 | error_2 = compute_error(solver_2)
58 | error_3 = compute_error(solver_3)
59 | error_4 = compute_error(solver_4)
60 | 
61 | ### ======================================================= ###
62 | 
63 | ### Plots
64 | X = np.array([10, 20, 30, 40, 50, 60])
65 | 
66 | plt.figure(figsize = (8, 6), dpi = 200)
67 | 
68 | plt.loglog(X, error_1, 'bo-', label = "EXPRB32")
69 | plt.loglog(X, error_2, 'rd-', label = "EXPRB42")
70 | plt.loglog(X, error_3, 'gH-', label = "EPI3")
71 | plt.loglog(X, error_4, 'mP-', label = "EPI4")
72 | 
73 | 
74 | plt.loglog(X, 6e-12*X**3, "c-", label = "O(3)")
75 | plt.loglog(X, 1e-14*X**4, "k-", label = "O(4)")
76 | 
77 | plt.tick_params(axis = 'x', which = 'major', labelsize = 16)
78 | plt.tick_params(axis = 'y', which = 'major', labelsize = 16)
79 | 
80 | plt.title("Temporal Order", fontsize = 20)
81 | plt.xlabel("Step size", fontsize = 16)
82 | plt.ylabel("l1 error", fontsize = 16)
83 | 
84 | plt.legend(prop = {'size': 14}, ncol = 2)
85 | 
86 | plt.savefig(path + "/Temporal_order.eps")
87 | 
88 | ### ======================================================= ###


--------------------------------------------------------------------------------
/CUDA/Integrators/Rosenbrock_Euler.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "../Leja.hpp"
 4 | #include "../Phi_functions.hpp"
 5 | 
 6 | namespace LeXInt
 7 | {
 8 |     //? Phi functions interpolated on real Leja points
 9 |     template <typename rhs>
10 |     void Ros_Eu(rhs& RHS,                   //? RHS function
11 |                 double* u,                  //? Input state variable(s)
12 |                 double* u_exprb2,           //? Output state variable(s)
13 |                 double* auxiliary_expint,   //? Internal auxiliary variables
14 |                 double* auxiliary_Leja,     //? Internal auxiliary variables (Leja)
15 |                 size_t N,                   //? Number of grid points
16 |                 vector<double>& Leja_X,     //? Array of Leja points
17 |                 double c,                   //? Shifting factor
18 |                 double Gamma,               //? Scaling factor
19 |                 double rtol,                //? Relative tolerance (normalised desired accuracy)
20 |                 double atol,                //? Absolute tolerance
21 |                 double dt,                  //? Step size
22 |                 int& iters,                 //? # of iterations needed to converge (iteration variable)
23 |                 bool GPU,                   //? false (0) --> CPU; true (1) --> GPU
24 |                 GPU_handle& cublas_handle   //? CuBLAS handle
25 |                 )
26 |     {
27 |         //* -------------------------------------------------------------------------
28 | 
29 |         //! u, u_exprb2, auxiliary_expint, auxiliary_Leja, and auxiliary_NL
30 |         //! are device vectors if GPU support is activated.
31 | 
32 |         //*    Returns
33 |         //*    ----------
34 |         //*     u_exprb2                : double*
35 |         //*                                 2nd order solution after time dt
36 |         //*
37 |         //*
38 |         //*    Reference:
39 |         //*         D. A. Pope, An exponential method of numerical integration of ordinary differential equations, Commun. ACM 6 (8) (1963) 491-493.
40 |         //*         doi:10.1145/366707.367592
41 | 
42 |         //* -------------------------------------------------------------------------
43 | 
44 |         //? Assign names and variables
45 |         double* f_u = &auxiliary_expint[0];
46 | 
47 |         //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt
48 |         RHS(u, f_u);
49 |         axpby(dt, f_u, f_u, N, GPU);
50 | 
51 |         //? Interpolation of RHS(u) at 1; phi_1(J(u) dt) f(u) dt
52 |         real_Leja_phi(RHS, u, f_u, u_exprb2, auxiliary_Leja, N, {1.0}, 
53 |                       phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters, GPU, cublas_handle);
54 | 
55 |         //? 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt
56 |         axpby(1.0, u, 1.0, u_exprb2, u_exprb2, N, GPU);
57 |     }
58 | }


--------------------------------------------------------------------------------
/CUDA/Eigenvalues.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Kernels_CUDA_Cpp.hpp"
 4 | #include "Jacobian_vector.hpp"
 5 | 
 6 | namespace LeXInt
 7 | {
 8 |     //! Power Iterations
 9 |     template <typename rhs>
10 |     void Power_iterations(rhs& RHS,                     //? RHS function
11 |                           double* u,                    //? Input state variable(s)
12 |                           size_t N,                     //? Number of grid points
13 |                           double& largest_eigenvalue,   //? Largest eigenvalue (output)
14 |                           double* auxiliary,            //? Internal auxiliary variables (Jv)
15 |                           bool GPU,                     //? false (0) --> CPU; true (1) --> GPU
16 |                           GPU_handle& cublas_handle     //? CuBLAS handle
17 |                           )
18 |     {
19 |         double tol = 0.01;                              //? 1% tolerance
20 |         double eigenvalue_ii = 0.0;                     //? Eigenvalue at ii
21 |         double eigenvalue_ii_1 = 0.0;                   //? Eigenvalue at ii-1
22 |         int niters = 1000;                              //? Max. number of iterations
23 | 
24 |         //? Allocate memory for internal vectors
25 |         double* init_vector = &auxiliary[0];
26 |         double* eigenvector = &auxiliary[N];
27 |         double* auxiliary_Jv = &auxiliary[2*N];
28 | 
29 |         //? Set initial estimate of eigenvector = 1.0
30 |         eigen_ones(init_vector, N, GPU);
31 | 
32 |         //? Iterate untill convergence is reached
33 |         for (int ii = 0; ii < niters; ii++)
34 |         {
35 |             //? Compute new eigenvector
36 |             Jacobian_vector(RHS, u, init_vector, eigenvector, auxiliary_Jv, N, GPU, cublas_handle);
37 | 
38 |             //? Norm of eigenvector = eigenvalue
39 |             eigenvalue_ii = l2norm(eigenvector, N, GPU, cublas_handle)/sqrt(N);
40 | 
41 |             //? Normalize eigenvector to eigenvalue; new estimate of eigenvector
42 |             axpby(1.0/eigenvalue_ii, eigenvector, init_vector, N, GPU);
43 | 
44 |             //? Check convergence for eigenvalues (eigenvalues converge faster than eigenvectors)
45 |             if (abs(eigenvalue_ii - eigenvalue_ii_1) <= (tol * eigenvalue_ii) + tol)
46 |             {
47 |                 #ifdef __CUDACC__
48 |                     //! Error Check
49 |                     cudaDeviceSynchronize();
50 |                     gpuErrchk(cudaPeekAtLastError());
51 |                 #endif
52 | 
53 |                 //! Returns the largest eigenvalue in magnitude (needs to multiplied to a safety factor)
54 |                 largest_eigenvalue = eigenvalue_ii;
55 |                 break;
56 |             }
57 | 
58 |             //? This value becomes the previous one
59 |             eigenvalue_ii_1 = eigenvalue_ii;
60 |         }
61 |     }
62 | }


--------------------------------------------------------------------------------
/Python/Constant/EPI3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EPI3(u, u_prev, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s) at the current time step (n)
15 |     u_prev                  : numpy array
16 |                                 State variable(s) at the previous time step (n - 1)
17 |     T_final                 : double
18 |                                 Step size
19 |     RHS_function            : user-defined function 
20 |                                 RHS function
21 |     c                       : double
22 |                                 Shifting factor
23 |     Gamma                   : double
24 |                                 Scaling factor
25 |     Leja_X                  : numpy array
26 |                                 Array of Leja points
27 |     tol                     : double
28 |                                 Accuracy of the polynomial so formed
29 |     Real_Imag               : int
30 |                                 0 - Real, 1 - Imaginary
31 | 
32 |     Returns
33 |     -------
34 |     u_epi3                  : numpy array
35 |                                 Output state variable(s) after time T_final (3rd order)
36 |     num_rhs_calls           : int
37 |                                 # of RHS calls
38 |     
39 |     Reference:
40 |     
41 |         M. Tokman
42 |         Eﬃcient integration of large stiff systems of ODEs with exponential propagation iterative (EPI) methods, J. Comput. Phys. 213 (2) (2006) 748-776
43 |         doi:10.1016/j.jcp.2005.08.032
44 | 
45 |     """
46 |     
47 |     ###? RHS evaluated at 'u'
48 |     rhs_u = RHS_function(u)
49 | 
50 |     ###? Array of zeros vectors
51 |     zero_vec = np.zeros(np.shape(u))
52 |     
53 |     ###? dt * J(u).z
54 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
55 |     
56 |     ###? J(u) . u
57 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
58 |     
59 |     ###? Difference of nonlinear remainders at u^{n-1}
60 |     R_1 = (RHS_function(u_prev) - Jacobian(RHS_function, u, u_prev, rhs_u)) - (rhs_u - Jacobian_u)
61 |     
62 |     ###? Interpolation 1; phi_1(J(u) dt) f(u) dt + 2/3 phi_2(J(u) dt) R(u^{n-1}) dt
63 |     u_flux, rhs_calls, substeps = linear_phi([zero_vec, rhs_u*T_final, 2/3*R_1*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
64 | 
65 |     ###? Internal stage; 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + 2/3 phi_2(J(u) dt) R(u^{n-1}) dt
66 |     u_epi3 = u + u_flux
67 | 
68 |     ###? Proxy of computational cost
69 |     num_rhs_calls = rhs_calls + 4
70 | 
71 |     return u_epi3, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/Variable/EXPRB32.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EXPRB32(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_exprb2                : numpy array
33 |                                 Output state variable(s) after time dt (2nd order)
34 |     u_exprb3                : numpy array
35 |                                 Output state variable(s) after time dt (3rd order)
36 |     num_rhs_calls           : int
37 |                                 # of RHS calls
38 |     
39 |     Reference:
40 |     
41 |         M. Hochbruck, A. Ostermann, Exponential Integrators, Acta Numer. 19 (2010) 209-286. 
42 |         doi:10.1017/S0962492910000048
43 | 
44 |     """
45 |     
46 |     ###? RHS evaluated at 'u'
47 |     rhs_u = RHS_function(u)
48 | 
49 |     ###? Array of zeros vectors
50 |     zero_vec = np.zeros(np.shape(u))
51 |     
52 |     ###? dt * J(u).z
53 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
54 |     
55 |     ###? Interpolation 1; phi_1(J(u) dt) f(u) dt
56 |     u_flux, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
57 | 
58 |     ###? Internal stage; 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt
59 |     u_exprb2 = u + u_flux
60 | 
61 |     ###? Difference of nonlinear remainders at u_exprb2
62 |     R_a = (RHS_function(u_exprb2) - Jacobian(RHS_function, u, u_exprb2, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
63 | 
64 |     ###? Interpolation 2; phi_3(J(u) dt) R(a) dt
65 |     u_nl, rhs_calls_2 = linear_phi([zero_vec, zero_vec, zero_vec, 2*R_a*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
66 |     
67 |     ###? 3rd order solution; u_3 = u_2 + 2 phi_3(J(u) dt) R(a) dt
68 |     u_exprb3 = u_exprb2 + u_nl
69 | 
70 |     ###? Proxy of computational cost
71 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + 5
72 | 
73 |     return u_exprb2, u_exprb3, num_rhs_calls


--------------------------------------------------------------------------------
/Python/Constant/EXPRB42.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EXPRB42(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_exprb4                : numpy array
33 |                                 Output state variable(s) after time dt (4th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference:
38 |     
39 |         V. T. Luan
40 |         Fourth-order two-stage explicit exponential integrators for time-dependent PDEs, Appl. Numer. Math. 112 (2017) 91-103
41 |         doi:10.1016/j.apnum.2016.10.008
42 | 
43 |     """
44 | 
45 |     ###? RHS evaluated at 'u'
46 |     rhs_u = RHS_function(u)
47 | 
48 |     ###? Array of zeros vectors
49 |     zero_vec = np.zeros(np.shape(u))
50 |     
51 |     ###? dt * J(u).z
52 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
53 |     
54 |     ###? J(u) . u
55 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
56 |     
57 |     ###? Interpolation of RHS(u) at 3/4; 3/4 phi_1(3/4 J(u) dt) f(u) dt
58 |     u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 3/4, c, Gamma, Leja_X, tol)
59 | 
60 |     ###? Internal stage 1; a = u + 3/4 phi_1(3/4 J(u) dt) f(u) dt
61 |     a = u + u_flux_1
62 | 
63 |     ###? Difference of nonlinear remainders at a
64 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u)
65 | 
66 |     ###? Interpolation 2: phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) R(a) dt
67 |     u_flux, rhs_calls_2, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, 32/9*R_a*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
68 |     
69 |     ###? 3rd order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + 32/9 phi_3(J(u) dt) R(a) dt
70 |     u_exprb4 = u + u_flux
71 | 
72 |     ###? Proxy of computational cost
73 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + 4
74 | 
75 |     return u_exprb4, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/linear_phi.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from real_Leja_linear_exp import real_Leja_linear_exp
 3 | 
 4 | def A_tilde(A, B, v):
 5 |     """
 6 |     Form the augmented matrix.
 7 | 
 8 |     Args:
 9 |         A (function handle R^n -> R^n)
10 |         B (matrix, n*p)
11 |         v (vector, n+p)
12 | 
13 |     Returns:
14 |         y (vector) : A_tilde . v, where A_tilde = [A B; 0 K] and K = [0 I; 0 0]
15 |     
16 |         
17 |     Reference: 
18 | 
19 |         R.B. Sidje, Expokit: A Software Package for Computing Matrix Exponentials, ACM Trans. Math. Softw. 24 (1) (1998) 130 - 156.
20 |         doi:10.1145/285861.285868
21 | 
22 |     """
23 | 
24 |     [p, n] = np.shape(B)
25 |     
26 |     y = np.concatenate([A(v[0:n]).reshape(1, n) + np.dot(v[n:n+p].reshape(1, p), B),
27 |                         [v[n+1:n+p]],
28 |                         np.array([0]).reshape(1, 1)], axis = 1)
29 |     
30 |     return y.reshape(np.shape(y)[1])
31 | 
32 | 
33 | def linear_phi(interp_vector, T_final, substeps, Jacobian_vector, integrator_coeff, c, Gamma, Leja_X, tol):
34 |     """
35 |     Evaluates a linear combinaton of the phi functions as the 
36 |     exponential of an augmented matrix.
37 |     
38 |      polynomial[0:n] = phi_0(A) u(:, 1) + phi_1(A) u(:, 2) + ... + phi_p(A) u(:, p+1)
39 | 
40 |     Args:
41 |         interp_vector (vector n*(p+1))      : Vector to evaluated/interpolated
42 |         dt (double)                         : Step size
43 |         Jacobian_vector (function handle)   : Jacobian-vector product (multiplied by dt)
44 |         c (double)                          : Shifting factor
45 |         Gamma (double)                      : Scaling factor
46 |         Leja_X (vector)                     : Array of Leja points
47 |         tol (double)                        : Accuracy of the polynomial so formed
48 | 
49 |     Returns:
50 |          polynomial[0:n] (vector)           : Linear combinaton of the phi functions
51 |     
52 | 
53 |     Reference: 
54 | 
55 |         R.B. Sidje, Expokit: A Software Package for Computing Matrix Exponentials, ACM Trans. Math. Softw. 24 (1) (1998) 130 - 156.
56 |         doi:10.1145/285861.285868
57 | 
58 |     """
59 |     
60 |     ############## --------------------- ##############
61 | 
62 |     ###TODO: Interpolate on either real Leja or imaginary Leja points
63 |     # if Real_Imag == 0:
64 |     #     Leja_phi = real_Leja_phi
65 |     # elif Real_Imag == 1:
66 |     #     Leja_phi = imag_Leja_phi
67 |     # else:
68 |     #     print("Error!! Choose 0 for real or 1 for imaginary Leja points.")
69 |     
70 |     ############## --------------------- ##############
71 |     
72 |     [m, n] = np.shape(interp_vector)
73 |     B = np.flipud(interp_vector[1:m])
74 |     p = m - 1
75 | 
76 |     Atx = lambda x: A_tilde(Jacobian_vector, B, x)
77 |     
78 |     v = np.concatenate([interp_vector[0], np.zeros(p-1), [1]])
79 | 
80 |     polynomial, rhs_calls, substeps = real_Leja_linear_exp(v, T_final, substeps, Atx, integrator_coeff, c, Gamma, Leja_X, tol)
81 |     
82 |     return polynomial[0:n], rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/Constant/EXPRB32.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EXPRB32(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_exprb2                : numpy array
33 |                                 Output state variable(s) after time dt (2nd order)
34 |     u_exprb3                : numpy array
35 |                                 Output state variable(s) after time dt (3rd order)
36 |     num_rhs_calls           : int
37 |                                 # of RHS calls
38 |     
39 |     Reference:
40 |     
41 |         M. Hochbruck and A. Ostermann
42 |         Exponential Integrators, Acta Numer. 19 (2010) 209-286
43 |         doi:10.1017/S0962492910000048
44 | 
45 |     """
46 |     
47 |     ###? RHS evaluated at 'u'
48 |     rhs_u = RHS_function(u)
49 | 
50 |     ###? Array of zeros vectors
51 |     zero_vec = np.zeros(np.shape(u))
52 |     
53 |     ###? dt * J(u).z
54 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
55 |     
56 |     ###? J(u) . u
57 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
58 |     
59 |     ###? Interpolation 1; phi_1(J(u) dt) f(u) dt
60 |     u_flux, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
61 | 
62 |     ###? Internal stage; 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt
63 |     u_exprb2 = u + u_flux
64 | 
65 |     ###? Difference of nonlinear remainders at u_exprb2
66 |     R_a = (RHS_function(u_exprb2) - Jacobian(RHS_function, u, u_exprb2, rhs_u)) - (rhs_u - Jacobian_u)
67 | 
68 |     ###? Interpolation 2; phi_3(J(u) dt) R(a) dt
69 |     u_nl, rhs_calls_2, substeps = linear_phi([zero_vec, zero_vec, zero_vec, 2*R_a*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
70 |     
71 |     ###? 3rd order solution; u_3 = u_2 + 2 phi_3(J(u) dt) R(a) dt
72 |     u_exprb3 = u_exprb2 + u_nl
73 | 
74 |     ###? Proxy of computational cost
75 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + 4
76 | 
77 |     return u_exprb3, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/imag_Leja_exp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Divided_Difference import Divided_Difference
 3 | 
 4 | def imag_Leja_exp(u, dt, RHS_function, c, Gamma, Leja_X, tol):
 5 |     """
 6 |     Computes the polynomial interpolation of matrix exponential applied to 'u' at imaginary Leja points.
 7 | 
 8 | 
 9 |         Parameters
10 |         ----------
11 |         u                       : numpy array
12 |                                     State variable(s)
13 |         dt                      : double
14 |                                     Step size
15 |         RHS_function            : user-defined function 
16 |                                     RHS function
17 |         c                       : double
18 |                                     Shifting factor
19 |         Gamma                   : double
20 |                                     Scaling factor
21 |         Leja_X                  : numpy array
22 |                                     Array of Leja points
23 |         tol                     : double
24 |                                     Accuracy of the polynomial so formed
25 |     
26 |         Returns
27 |         ----------
28 |         polynomial              : numpy array
29 |                                     Polynomial interpolation of 'u' multiplied 
30 |                                     by the matrix exponential at imaginary Leja points
31 |         ii+1                    : int
32 |                                     # of RHS calls
33 | 
34 |     """
35 | 
36 |     ### Matrix exponential (scaled and shifted)
37 |     matrix_exponential = np.exp(dt * (c + Gamma*Leja_X) * 1j)
38 | 
39 |     ### Compute polynomial coefficients
40 |     coeffs = Divided_Difference(Leja_X, matrix_exponential) 
41 | 
42 |     ### Form the polynomial: p_0 term
43 |     polynomial = coeffs[0] * u + 0*1j
44 | 
45 |     ### p_1, p_2, ...., p_n terms
46 |     max_Leja_pts = len(Leja_X)                              # Max # of Leja points    
47 |     y = u.copy() + 0*1j                                     # To avoid changing 'u'
48 | 
49 |     ### Iterate until convergence is reached
50 |     for ii in range(1, max_Leja_pts):
51 |         
52 |         ### Compute numerical Jacobian (for linear eqs., this is the RHS evaluation at y)
53 |         Jacobian_function = RHS_function(y)
54 |         
55 |         ### y = y * ((z - c)/Gamma - Leja_X)
56 |         y = (-1j * Jacobian_function/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1]))
57 | 
58 |         ### Error estimate
59 |         poly_error = np.linalg.norm(y) * abs(coeffs[ii])
60 |         
61 |         ### Add the new term to the polynomial
62 |         polynomial = polynomial + (coeffs[ii] * y)
63 | 
64 |         ### If new term to be added < tol, break loop; safety factor = 0.25
65 |         if  poly_error < 0.25*tol*np.linalg.norm(polynomial):
66 |             break
67 | 
68 |         ### Warning flags
69 |         if ii == max_Leja_pts - 1:
70 |             print("Warning!! Max. # of Leja points reached without convergence!! Try increasing the number of Leja points. Max available: 10000.")
71 |             break
72 | 
73 |     return np.real(polynomial), ii
74 | 


--------------------------------------------------------------------------------
/Python/real_Leja_exp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Divided_Difference import Divided_Difference
 3 | 
 4 | def real_Leja_exp(u, dt, RHS_function, c, Gamma, Leja_X, tol):
 5 |     """
 6 |     Computes the polynomial interpolation of matrix exponential applied to 'u' at real Leja points.
 7 | 
 8 | 
 9 |         Parameters
10 |         ----------
11 |         u                       : numpy array
12 |                                     State variable(s)
13 |         dt                      : double
14 |                                     Step size
15 |         RHS_function            : user-defined function 
16 |                                     RHS function
17 |         c                       : double
18 |                                     Shifting factor
19 |         Gamma                   : double
20 |                                     Scaling factor
21 |         Leja_X                  : numpy array
22 |                                     Array of Leja points
23 |         tol                     : double
24 |                                     Accuracy of the polynomial so formed
25 |     
26 |         Returns
27 |         ----------
28 |         polynomial              : numpy array
29 |                                     Polynomial interpolation of 'u' multiplied 
30 |                                     by the matrix exponential at real Leja points
31 |         ii                      : int
32 |                                     # of Leja points used
33 | 
34 |     """
35 |     
36 |     ###? Initialize parameters and arrays
37 |     max_Leja_pts = len(Leja_X)                                    #* Max number of Leja points  
38 |     y = u.copy()                                                  #* To avoid changing 'interp_function'
39 | 
40 |     ###? Matrix exponential (scaled and shifted)
41 |     matrix_exponential = np.exp(dt * (c + Gamma*Leja_X))
42 | 
43 |     ###? Compute polynomial coefficients
44 |     poly_coeffs = Divided_Difference(Leja_X, matrix_exponential) 
45 | 
46 |     ###? Form the polynomial: 1st term (p_0)
47 |     polynomial = poly_coeffs[0] * u
48 | 
49 |     ###? p_1, p_2, ...., p_n terms; iterate until converges
50 |     for ii in range(1, max_Leja_pts):
51 |         
52 |         ###? y = y * ((z - c)/Gamma - Leja_X)
53 |         y = (RHS_function(y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1]))
54 | 
55 |         ###? Error estimate; poly_error = |coeffs[nn]| ||y||
56 |         poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii])
57 |         
58 |         ###? Add the new term to the polynomial
59 |         polynomial = polynomial + (poly_coeffs[ii] * y)
60 | 
61 |         ###? If new term to be added < tol, break loop
62 |         if  poly_error < tol*np.linalg.norm(polynomial):
63 |             # print("Converged! # of Leja points used (exp): ", ii)
64 |             break
65 | 
66 |         ###! Warning flags
67 |         if ii == max_Leja_pts - 1:
68 |             print("Warning!! Max. # of Leja points reached without convergence!!")
69 |             print("Max. Leja points currently set to", max_Leja_pts)
70 |             print("Try increasing the number of Leja points. Max available: 10000.\n")
71 |             break
72 | 
73 |     return polynomial, ii


--------------------------------------------------------------------------------
/Python/real_Leja_phi_nl.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Divided_Difference import Divided_Difference
 3 | 
 4 | def real_Leja_phi_nl(u, dt, RHS_function, c, Gamma, Leja_X, phi_function, tol):
 5 |     """
 6 |     Computes the polynomial interpolation of phi_function applied to 'u' at real Leja points.
 7 |     
 8 |     
 9 |         Parameters
10 |         ----------
11 |         u                       : numpy array
12 |                                     Vector multiplied to phi function
13 |         dt                      : double
14 |                                     Step size
15 |         RHS_function            : user-defined function 
16 |                                     RHS function
17 | 
18 |         c                       : double
19 |                                     Shifting factor
20 |         Gamma                   : double
21 |                                     Scaling factor
22 |         Leja_X                  : numpy array
23 |                                     Array of Leja points
24 |         phi_function            : function
25 |                                     Phi function (typically phi_1)
26 |         tol                     : double
27 |                                     Accuracy of the polynomial so formed
28 | 
29 |         Returns
30 |         ----------
31 |         polynomial              : numpy array
32 |                                     Polynomial interpolation of 'u' multiplied
33 |                                     to phi_function at real Leja points
34 |         ii                      : int
35 |                                     # of Leja points used
36 | 
37 |     """
38 | 
39 |     ###? Initialize parameters and arrays
40 |     max_Leja_pts = len(Leja_X)                                    #* Max number of Leja points  
41 |     y = u.copy()                                                  #* To avoid changing 'interp_function'
42 |         
43 |     ###? Phi function applied to 'interp_function' (scaled and shifted)
44 |     phi_function_array = phi_function(dt * (c + Gamma*Leja_X))
45 |     
46 |     ###? Compute polynomial coefficients
47 |     poly_coeffs = Divided_Difference(Leja_X, phi_function_array) 
48 | 
49 |     ###? Form the polynomial: 1st term (p_0)
50 |     polynomial = poly_coeffs[0] * u
51 |     
52 |     ###? p_1, p_2, ...., p_n terms; iterate until converges
53 |     for ii in range(1, max_Leja_pts):
54 | 
55 |         ###? y = y * ((z - c)/Gamma - Leja_X)
56 |         y = (RHS_function(y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1]))
57 | 
58 |         ###? Error estimate; poly_error = |coeffs[nn]| ||y||
59 |         poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii])
60 | 
61 |         ###? Add the new term to the polynomial
62 |         polynomial = polynomial + (poly_coeffs[ii] * y)
63 |         
64 |         ###? If new term to be added < tol, break loop
65 |         if  poly_error < tol*np.linalg.norm(polynomial):
66 |             # print("Converged! # of Leja points used (phi nl): ", ii)
67 |             break
68 |         
69 |         ###! Warning flags
70 |         if ii == max_Leja_pts - 1:
71 |             print("Warning!! Max. # of Leja points reached without convergence!!")
72 |             print("Max. Leja points currently set to", max_Leja_pts)
73 |             print("Try increasing the number of Leja points. Max available: 10000.\n")
74 |             break
75 | 
76 |     return polynomial, ii


--------------------------------------------------------------------------------
/Python/Constant/EPI4.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EPI4(u, u_prev, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s) at the current time step (n)
15 |     u_prev                  : numpy array
16 |                                 State variable(s) at the 2 previous time steps (n - 1, n - 2)
17 |     T_final                 : double
18 |                                 Step size
19 |     RHS_function            : user-defined function 
20 |                                 RHS function
21 |     c                       : double
22 |                                 Shifting factor
23 |     Gamma                   : double
24 |                                 Scaling factor
25 |     Leja_X                  : numpy array
26 |                                 Array of Leja points
27 |     tol                     : double
28 |                                 Accuracy of the polynomial so formed
29 |     Real_Imag               : int
30 |                                 0 - Real, 1 - Imaginary
31 | 
32 |     Returns
33 |     -------
34 |     u_epi4                  : numpy array
35 |                                 Output state variable(s) after time T_final (4th order)
36 |     num_rhs_calls           : int
37 |                                 # of RHS calls
38 |     
39 |     Reference:
40 |     
41 |         S. Gaudreault, M. Charron, V. Dallerit, and M. Tokman
42 |         High-order numerical solutions to the shallow-water equations on the rotated cubed-sphere grid, J. Comput. Phys. 449 (2022) 110792. 
43 |         doi:10.1016/j.jcp.2021.110792
44 | 
45 |     """
46 |     
47 |     ###? RHS evaluated at 'u'
48 |     rhs_u = RHS_function(u)
49 | 
50 |     ###? Array of zeros vectors
51 |     zero_vec = np.zeros(np.shape(u))
52 |     
53 |     ###? dt * J(u).z
54 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
55 |     
56 |     ###? J(u) . u
57 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
58 |     
59 |     ###? EPI4 coefficients
60 |     a21 = -3/10; a22 = 3/40
61 |     a31 = 32/5;  a32 = -11/10
62 |     
63 |     ###? Difference of nonlinear remainders at u^{n-1} and u^{n-2}
64 |     R_1 = (RHS_function(u_prev[:, 0]) - Jacobian(RHS_function, u, u_prev[:, 0], rhs_u)) - (rhs_u - Jacobian_u)
65 |     R_2 = (RHS_function(u_prev[:, 1]) - Jacobian(RHS_function, u, u_prev[:, 1], rhs_u)) - (rhs_u - Jacobian_u)
66 |     
67 |     ###? Interpolation 1; phi_1(J(u) dt) f(u) dt + phi_2(J(u) dt) (a21 R(u^{n-1}) + a22 R(u^{n-2})) dt + phi_3(J(u) dt) (a31 R(u^{n-1}) + a32 R(u^{n-2})) dt
68 |     u_flux, rhs_calls, substeps = linear_phi([zero_vec, rhs_u*T_final, (a21*R_1+a22*R_2)*T_final, (a31*R_1+a32*R_2)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
69 | 
70 |     ###? Internal stage; 4th order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_2(J(u) dt) (a21 R(u^{n-1}) + a22 R(u^{n-2})) dt + phi_3(J(u) dt) (a31 R(u^{n-1}) + a32 R(u^{n-2})) dt
71 |     u_epi4 = u + u_flux
72 | 
73 |     ###? Proxy of computational cost
74 |     num_rhs_calls = rhs_calls + 6
75 | 
76 |     return u_epi4, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/CUDA/Phi_functions.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "functions.hpp"
 4 | 
 5 | //? Phi Functions ('z' is assumed to a double)
 6 | 
 7 | namespace LeXInt
 8 | {
 9 |     double phi_1(double z)
10 |     {    
11 |         double phi_1_value;
12 |         
13 |         if (abs(z) <= 1e-7)
14 |         {
15 |             phi_1_value = 1./factorial(1) + z * (1./factorial(2)  + z * (1./factorial(3) + \
16 |                                             z * (1./factorial(4)  + z * (1./factorial(5) + \
17 |                                             z * (1./factorial(6)  + z * (1./factorial(7) + \
18 |                                             z * (1./factorial(8)  + z * (1./factorial(9) + \
19 |                                             z * (1./factorial(10) + z * (1./factorial(11)))))))))));
20 |         }   
21 |         else
22 |         {
23 |             phi_1_value = (exp(z) - 1)/z;
24 |         }
25 |                 
26 |         return phi_1_value;
27 |     }
28 | 
29 | 
30 |     double phi_2(double z)
31 |     {    
32 |         double phi_2_array;
33 |         
34 |         if (abs(z) <= 1e-6)
35 |         {
36 |             phi_2_array = 1./factorial(2) + z * (1./factorial(3)  + z * (1./factorial(4)  + \
37 |                                             z * (1./factorial(5)  + z * (1./factorial(6)  + \
38 |                                             z * (1./factorial(7)  + z * (1./factorial(8)  + \
39 |                                             z * (1./factorial(9)  + z * (1./factorial(10) + \
40 |                                             z * (1./factorial(11) + z * (1./factorial(12)))))))))));
41 |         }
42 |         else
43 |         {
44 |             phi_2_array = (exp(z) - z - 1)/(z*z);
45 |         }
46 |                 
47 |         return phi_2_array;
48 |     }
49 | 
50 | 
51 |     double phi_3(double z)
52 |     {    
53 |         double phi_3_array;
54 |         
55 |         if (abs(z) <= 1e-5)
56 |         {
57 |             phi_3_array = 1./factorial(3) + z * (1./factorial(4)  + z * (1./factorial(5) + \
58 |                                             z * (1./factorial(6)  + z * (1./factorial(7) + \
59 |                                             z * (1./factorial(8)  + z * (1./factorial(9) + \
60 |                                             z * (1./factorial(10) + z * (1./factorial(11) + \
61 |                                             z * (1./factorial(12) + z * (1./factorial(13)))))))))));
62 |         }
63 |         else
64 |         {
65 |             phi_3_array = (exp(z) - (z*z)/2 - z - 1)/(z*z*z);
66 |         }
67 |                 
68 |         return phi_3_array;
69 |     }
70 | 
71 | 
72 |     double phi_4(double z)
73 |     {
74 |         double phi_4_array;
75 |         
76 |         if (abs(z) <= 1e-4)
77 |         {
78 |             phi_4_array = 1./factorial(4) + z * (1./factorial(5)  + z * (1./factorial(6)  + \
79 |                                             z * (1./factorial(7)  + z * (1./factorial(8)  + \
80 |                                             z * (1./factorial(9)  + z * (1./factorial(10) + \
81 |                                             z * (1./factorial(11) + z * (1./factorial(12) + \
82 |                                             z * (1./factorial(13) + z * (1./factorial(14)))))))))));     
83 |         }
84 |         else
85 |         {
86 |             phi_4_array = (exp(z) - (z*z*z)/6 - (z*z)/2 - z - 1)/(z*z*z*z);
87 |         }
88 |                 
89 |         return phi_4_array;
90 |     }
91 | }


--------------------------------------------------------------------------------
/Python/Constant/EPIRK4s3B.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EPIRK4s3B(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_epirk4                : numpy array
33 |                                 Output state variable(s) after time dt (4th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference:
38 |     
39 |         G. Rainwater and M. Tokman
40 |         A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309
41 |         doi:10.1016/j.jcp.2016.07.026
42 | 
43 |     """
44 | 
45 |     ###? RHS evaluated at 'u'
46 |     rhs_u = RHS_function(u)
47 | 
48 |     ###? Array of zeros vectors
49 |     zero_vec = np.zeros(np.shape(u))
50 |     
51 |     ###? dt * J(u).z
52 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
53 |     
54 |     ###? Interpolation of RHS(u) at 1/2 and 3/4; phi_2({1/2, 3/4} J(u) dt) f(u) dt
55 |     u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol)
56 |     u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 3/4, c, Gamma, Leja_X, tol)
57 | 
58 |     ###? Internal stage 1; a = u + 2/3 phi_2(1/2 J(u) dt) f(u) dt
59 |     a = u + (2/3 * 2 * u_flux_1)
60 |     
61 |     ###? Internal stage 2; b = u + phi_2(3/4 J(u) dt) f(u) dt
62 |     b = u + (4/3 * u_flux_2)
63 | 
64 |     ###? Difference of nonlinear remainders at a and b
65 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
66 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
67 |     
68 |     ###? phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32*R(a) - 27/2*R(b)) dt + phi_4(J(u) dt) (-144*R(a) + 81*R(b)) dt
69 |     u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (54*R_a - 16*R_b)*T_final, (-324*R_a + 144*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
70 |      
71 |     ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (54R(a) - 16R(b)) dt + phi_4(J(u) dt) (-324R(a) + 144R(b)) dt
72 |     u_epirk4 = u + u_flux
73 | 
74 |     ###? Proxy of computational cost
75 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 8
76 | 
77 |     return u_epirk4, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/Constant/EPIRK4s3A.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EPIRK4s3A(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_epirk4                : numpy array
33 |                                 Output state variable(s) after time dt (4th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference: 
38 |     
39 |         G. Rainwater and M. Tokman
40 |         A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309
41 |         doi:10.1016/j.jcp.2016.07.026
42 | 
43 |     """
44 |     
45 |     ###? RHS evaluated at 'u'
46 |     rhs_u = RHS_function(u)
47 | 
48 |     ###? Array of zeros vectors
49 |     zero_vec = np.zeros(np.shape(u))
50 |     
51 |     ###? dt * J(u).z
52 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
53 |     
54 |     ###? J(u) . u
55 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
56 | 
57 |     ###? Interpolations 1 & 2; {1/2, 2/3} phi_1({1/2, 2/3} J(u) dt) f(u) dt
58 |     u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol)
59 |     u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 2/3, c, Gamma, Leja_X, tol)
60 | 
61 |     ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt
62 |     a = u + u_flux_1
63 | 
64 |     ###? Internal stage 2; b = u + 2/3 phi_1(2/3 J(u) dt) f(u) dt
65 |     b = u + u_flux_2
66 | 
67 |     ###? Difference of nonlinear remainders at a and b
68 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u)
69 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u)
70 | 
71 |     ###? Interpolation 2; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32*R(a) - 27/2*R(b)) dt + phi_4(J(u) dt) (-144*R(a) + 81*R(b)) dt
72 |     u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (32*R_a-27/2*R_b)*T_final, (-144*R_a+81*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
73 |     
74 |     ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32R(a) - (27/2)R(b)) dt + phi_4(J(u) dt) (-144R(a) + 81R(b)) dt
75 |     u_epirk4 = u + u_flux
76 | 
77 |     ###? Proxy of computational cost
78 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 6
79 | 
80 |     return u_epirk4, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/Constant/EXPRB43.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EXPRB43(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_exprb4                : numpy array
33 |                                 Output state variable(s) after time dt (4th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference:
38 |     
39 |         M. Hochbruck and A. Ostermann
40 |         Exponential Integrators, Acta Numer. 19 (2010) 209-286
41 |         doi:10.1017/S0962492910000048
42 | 
43 |     """
44 |     
45 |     ###? RHS evaluated at 'u'
46 |     rhs_u = RHS_function(u)
47 | 
48 |     ###? Array of zeros vectors
49 |     zero_vec = np.zeros(np.shape(u))
50 |     
51 |     ###? dt * J(u).z
52 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
53 |     
54 |     ###? J(u) . u
55 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
56 |     
57 |     ###? Interpolation 1; 1/2 phi_1(1/2 J(u) dt) f(u) dt
58 |     u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol)
59 | 
60 |     ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt
61 |     a = u + u_flux_1
62 | 
63 |     ###? Difference of nonlinear remainder at a
64 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u)
65 | 
66 |     ###? Interpolation 2; phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt
67 |     u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, (rhs_u + R_a)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
68 | 
69 |     ###? Internal stage 2; b = u + phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt
70 |     b = u + u_flux_2
71 | 
72 |     ###? Nonlinear remainder at b
73 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u)
74 | 
75 |     ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt + phi_4(J(u) dt) (-48R(a) + 12R(b)) dt
76 |     u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (16*R_a-2*R_b)*T_final, (-48*R_a+12*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
77 |     
78 |     ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt + phi_4(J(u) dt) (-48R(a) + 12R(b)) dt
79 |     u_exprb4 = u + u_flux
80 | 
81 |     ###? Proxy of computational cost
82 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 6
83 | 
84 |     return u_exprb4, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/CUDA/Jacobian_vector.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Timer.hpp"
 4 | #include "Kernels_CUDA_Cpp.hpp"
 5 | 
 6 | namespace LeXInt
 7 | {
 8 |     //? J(u) * y = (F(u + epsilon*y) - F(u - epsilon*y))/(2*epsilon)
 9 |     template <typename rhs>
10 |     void Jacobian_vector(rhs& RHS,                      //? RHS function
11 |                          double* u,                     //? Input state variable(s)
12 |                          double* y,                     //? Vector to be multiplied to Jacobian 
13 |                          double* Jac_vec,               //? Output Jacobian-vector product
14 |                          double* auxiliary_Jv,          //? Internal auxiliary variables
15 |                          size_t N,                      //? Number of grid points
16 |                          bool GPU,                      //? false (0) --> CPU; true (1) --> GPU
17 |                          GPU_handle& cublas_handle      //? CuBLAS handle
18 |                          )
19 |     {
20 |         //! This function has 10 + (3 * RHS_read_writes) vector reads and writes.
21 | 
22 |         //? Assign names and variables
23 |         double* f_u = &Jac_vec[0]; double* u_eps = &Jac_vec[0];
24 |         double* rhs_u_eps_1 = &auxiliary_Jv[0];
25 |         double* rhs_u_eps_2 = &auxiliary_Jv[N];
26 | 
27 |         //? RHS evaluated at 'u'; f_u = RHS(u)
28 |         RHS(u, f_u);
29 | 
30 |         //? epsilon ~ 1e-7 (normalised)
31 |         double rhs_norm = l2norm(f_u, N, GPU, cublas_handle)/sqrt(N);
32 |         double epsilon = 1e-7*rhs_norm;
33 |         
34 |         //? u_eps = u + epsilon*y
35 |         axpby(1.0, u, epsilon, y, u_eps, N, GPU); 
36 | 
37 |         //? rhs_u_eps_1 = RHS(u + epsilon*y)
38 |         RHS(u_eps, rhs_u_eps_1);
39 | 
40 |         //? u_eps = u - epsilon*y
41 |         axpby(1.0, u, -epsilon, y, u_eps, N, GPU); 
42 | 
43 |         //? rhs_u_eps_2 = RHS(u - epsilon*y)
44 |         RHS(u_eps, rhs_u_eps_2);
45 | 
46 |         //? Jac_vec = J(u) * y = (RHS(u + epsilon*y) - RHS(u - epsilon*y))/(2*epsilon)
47 |         axpby(1.0/(2.0*epsilon), rhs_u_eps_1, -1.0/(2.0*epsilon), rhs_u_eps_2, Jac_vec, N, GPU);
48 |     }
49 | 
50 |     //? F(y) = f(y) - (J(u) * y)
51 |     template <typename rhs>
52 |     void Nonlinear_remainder(rhs& RHS,                      //? RHS function
53 |                              double* u,                     //? Input state variable(s)
54 |                              double* y,                     //? Vector to be multiplied to Jacobian 
55 |                              double* Nonlinear_y,           //? Output nonlinear remainder       
56 |                              double* auxiliary_Jv,          //? Internal auxiliary variables for Jacobian-vector
57 |                              size_t N,                      //? Number of grid points
58 |                              bool GPU,                      //? false (0) --> CPU; true (1) --> GPU
59 |                              GPU_handle& cublas_handle      //? CuBLAS handle
60 |                              )
61 |     {
62 |         //! This function has 13 + (4 * RHS_read_writes) vector reads and writes.
63 | 
64 |         //? Assign names and variables
65 |         double* Linear_y = &auxiliary_Jv[0];
66 |         double* Jv = &auxiliary_Jv[N]; 
67 |         double* f_y = &auxiliary_Jv[3*N];
68 | 
69 |         //? J(u) * y = (F(u + epsilon*y) - F(u - epsilon*y))/(2*epsilon)
70 |         Jacobian_vector(RHS, u, y, Linear_y, Jv, N, GPU, cublas_handle);
71 | 
72 |         //? RHS evaluated at 'y'; f_y = RHS(y)
73 |         RHS(y, f_y);
74 | 
75 |         //? F(y) = f(y) - (J(u) * y)
76 |         axpby(1.0, f_y, -1.0, Linear_y, Nonlinear_y, N, GPU);
77 |     }
78 | }


--------------------------------------------------------------------------------
/Python/Variable/EPIRK4s3A.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EPIRK4s3A(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_epirk4                : numpy array
33 |                                 Output state variable(s) after time dt (4th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference: 
38 |     
39 |         G. Rainwater, M. Tokman, A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309.
40 |         doi:10.1016/j.jcp.2016.07.026.
41 | 
42 |     """
43 |     
44 |     ###? RHS evaluated at 'u'
45 |     rhs_u = RHS_function(u)
46 | 
47 |     ###? Array of zeros vectors
48 |     zero_vec = np.zeros(np.shape(u))
49 |     
50 |     ###? dt * J(u).z
51 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
52 | 
53 |     ###? Interpolations 1 & 2; {1/2, 2/3} phi_1({1/2, 2/3} J(u) dt) f(u) dt
54 |     u_flux_1, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/2, c, Gamma, Leja_X, tol)
55 |     u_flux_2, rhs_calls_2 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 2/3, c, Gamma, Leja_X, tol)
56 | 
57 |     ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt
58 |     a = u + u_flux_1
59 | 
60 |     ###? Internal stage 2; b = u + 2/3 phi_1(2/3 J(u) dt) f(u) dt
61 |     b = u + u_flux_2
62 | 
63 |     ###? Difference of nonlinear remainders at a and b
64 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
65 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
66 | 
67 |     ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32*R(a) - 27/2*R(b)) dt
68 |     u_flux, rhs_calls_3 = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (32*R_a - 27/2*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
69 |     
70 |     ###? Interpolation 4; phi_4(J(u) dt) (-144*R(a) + 81*R(b)) dt
71 |     u_nl, rhs_calls_4 = linear_phi([zero_vec, zero_vec, zero_vec, zero_vec, (-144*R_a + 81*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
72 |     
73 |     ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (32R(a) - (27/2)R(b)) dt
74 |     u_epirk3 = u + u_flux
75 |     
76 |     ###? 4th order solution; u_4 = u_3 + phi_4(J(u) dt) (-144R(a) + 81R(b)) dt
77 |     u_epirk4 = u_epirk3 + u_nl
78 | 
79 |     ###? Proxy of computational cost
80 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + 8
81 | 
82 |     return u_epirk3, u_epirk4, num_rhs_calls


--------------------------------------------------------------------------------
/Python/Variable/EXPRB43.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EXPRB43(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_exprb4                : numpy array
33 |                                 Output state variable(s) after time dt (4th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference:
38 |     
39 |         M. Hochbruck, A. Ostermann, Exponential Integrators, Acta Numer. 19 (2010) 209-286. 
40 |         doi:10.1017/S0962492910000048.
41 | 
42 |     """
43 |     
44 |     ###? RHS evaluated at 'u'
45 |     rhs_u = RHS_function(u)
46 | 
47 |     ###? Array of zeros vectors
48 |     zero_vec = np.zeros(np.shape(u))
49 |     
50 |     ###? dt * J(u).z
51 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
52 |     
53 |     ###? Interpolation 1; 1/2 phi_1(1/2 J(u) dt) f(u) dt
54 |     u_flux_1, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/2, c, Gamma, Leja_X, tol)
55 | 
56 |     ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt
57 |     a = u + u_flux_1
58 | 
59 |     ###? Difference of nonlinear remainder at a
60 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
61 | 
62 |     ###? Interpolation 2; phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt
63 |     u_flux_2, rhs_calls_2 = linear_phi([zero_vec, (rhs_u + R_a)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
64 | 
65 |     ###? Internal stage 2; b = u + phi_1(J(u) dt) f(u) dt + phi_1(J(u) dt) R(a) dt
66 |     b = u + u_flux_2
67 | 
68 |     ###? Nonlinear remainder at b
69 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
70 | 
71 |     ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt
72 |     u_flux, rhs_calls_3 = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (16*R_a - 2*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
73 |     
74 |     ###? Interpolation 4; phi_4(J(u) dt) (-48R(a) + 12R(b)) dt
75 |     u_nl, rhs_calls_4 = linear_phi([zero_vec, zero_vec, zero_vec, zero_vec, (-48*R_a + 12*R_b)*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
76 |     
77 |     ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (16R(a) - 2R(b)) dt
78 |     u_exprb3 = u + u_flux
79 |     
80 |     ###? 4th order solution; u_4 = u_3 + phi_4(J(u) dt) (-48R(a) + 12R(b)) dt
81 |     u_exprb4 = u + u_nl
82 | 
83 |     ###? Proxy of computational cost
84 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + 8
85 | 
86 |     return u_exprb3, u_exprb4, num_rhs_calls


--------------------------------------------------------------------------------
/CUDA/Kernels.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | //? ----------------------------------------------------------
  4 | //?
  5 | //? Description:
  6 | //?     A pleothera of kernels are defined here that
  7 | //?     are used throughout the code.
  8 | //?
  9 | //? ----------------------------------------------------------
 10 | 
 11 | 
 12 | #include "error_check.hpp"
 13 | #include "Timer.hpp"
 14 | 
 15 | #ifdef __CUDACC__
 16 |     #include <cublas_v2.h>
 17 |     #include <cuda_runtime.h>
 18 |     #include <cuda.h>
 19 | #endif
 20 | 
 21 | struct GPU_handle
 22 | {
 23 |     #ifdef __CUDACC__
 24 |         cublasHandle_t cublas_handle;
 25 |     #endif
 26 | 
 27 |     GPU_handle()
 28 |     {
 29 |         #ifdef __CUDACC__
 30 |             cublasCreate_v2(&cublas_handle);
 31 |         #endif
 32 |     }
 33 | 
 34 |     ~GPU_handle()
 35 |     {
 36 |         #ifdef __CUDACC__
 37 |             cublasDestroy(cublas_handle);
 38 |         #endif
 39 |     }
 40 | };
 41 | 
 42 | namespace LeXInt
 43 | {
 44 |     #ifdef __CUDACC__
 45 | 
 46 |     //? Set y = x
 47 |     __global__ void copy_CUDA(double *x, double *y, size_t N)                    
 48 |     {
 49 |         int ii = blockDim.x * blockIdx.x + threadIdx.x;
 50 | 
 51 |         if(ii < N)
 52 |         {
 53 |             y[ii] = x[ii];
 54 |         }
 55 |     }
 56 | 
 57 |     //? ones(y) = (y[0:N] =) 1.0
 58 |     __global__ void ones_CUDA(double *x, size_t N)                    
 59 |     {
 60 |         int ii = blockDim.x * blockIdx.x + threadIdx.x;
 61 | 
 62 |         if(ii < N)
 63 |         {
 64 |             x[ii] = 1.0;
 65 |         }
 66 |     }
 67 | 
 68 |     //? ones(y) = (y[0:N] =) 1.0
 69 |     __global__ void eigen_ones_CUDA(double *x, size_t N)                    
 70 |     {
 71 |         int ii = blockDim.x * blockIdx.x + threadIdx.x;
 72 | 
 73 |         if(ii < N)
 74 |         {
 75 |             x[ii] = 0.0;
 76 |         }
 77 | 
 78 |         x[0] = 1.0;
 79 |     }
 80 | 
 81 |     //? y = ax
 82 |     __global__ void axpby_CUDA(double a, double *x, 
 83 |                                          double *y, size_t N)                    
 84 |     {
 85 |         int ii = blockDim.x * blockIdx.x + threadIdx.x;
 86 | 
 87 |         if(ii < N)
 88 |         {
 89 |             y[ii] = (a * x[ii]);
 90 |         }
 91 |     }
 92 | 
 93 |     //? z = ax + by
 94 |     __global__ void axpby_CUDA(double a, double *x, 
 95 |                                double b, double *y, 
 96 |                                          double *z, size_t N)
 97 |     {
 98 |         int ii = blockDim.x * blockIdx.x + threadIdx.x;
 99 | 
100 |         if(ii < N) 
101 |         {
102 |             z[ii] = (a * x[ii]) + (b * y[ii]);
103 |         }
104 |     }
105 | 
106 |     //? w = ax + by + cz
107 |     __global__ void axpby_CUDA(double a, double *x, 
108 |                                double b, double *y,
109 |                                double c, double *z, 
110 |                                          double *w, size_t N)
111 |     {
112 |         int ii = blockDim.x * blockIdx.x + threadIdx.x;
113 | 
114 |         if(ii < N) 
115 |         {
116 |             w[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]);
117 |         }
118 |     }
119 | 
120 |     //? v = ax + by + cz + dw
121 |     __global__ void axpby_CUDA(double a, double *x,
122 |                                double b, double *y,
123 |                                double c, double *z,
124 |                                double d, double *w,
125 |                                          double *v, size_t N)
126 |     {
127 |         int ii = blockDim.x * blockIdx.x + threadIdx.x;
128 | 
129 |         if(ii < N) 
130 |         {
131 |             v[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]) + (d * w[ii]);
132 |         }
133 |     }
134 | 
135 |     #endif
136 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #  [LeXInt](#)
 2 | 
 3 | 
 4 | ![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)
 5 | ![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=for-the-badge&logo=c%2B%2B&logoColor=white)
 6 | ![nVIDIA](https://img.shields.io/badge/nVIDIA-%2376B900.svg?style=for-the-badge&logo=nVIDIA&logoColor=white)
 7 | ![VS Code](https://img.shields.io/badge/VSCode-0078D4?style=for-the-badge&logo=visual%20studio%20code&logoColor=white)
 8 | ![Github](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)
 9 | 
10 | <a href="https://ascl.net/2208.009"><img src="https://img.shields.io/badge/ascl-2208.009-blue.svg?colorY=262255" alt="ascl:2208.009" /></a>
11 | <a href="https://opensource.org/license/mit/"><img src="https://img.shields.io/badge/License-MIT-blue.svg" /></a>
12 | 
13 | [**Le**](#)ja interpolation for e[**X**](#)ponential [**Int**](#)egrators is a temporal integration package that comprises of a compilation of exponential integrators, specifically, the Exponential Rosenbrock (EXPRB) and Exponential Propagation Iterative Runge-Kutta (EPIRK) solvers. 
14 | 
15 | The action of the matrix exponential or the $\varphi_l(z)$ functions on a vector is computed using the method of polynomial interpolation at Leja points. For homogenous linear PDEs, one can obtain the ***exact*** solution (in time) by directly computing the matrix exponential using the functions ``real_Leja_exp`` and/or ``imag_Leja_exp``, whereas for nonhomogenous linear PDEs, one can use ``real_Leja_phi_nl`` and/or ``imag_Leja_phi_nl``. The algorithmic details can be found in the cited literature. 
16 | 
17 | ## Requirements
18 | - For Python:
19 |   - Python 3.10 (or later)
20 | 
21 | - For C++:
22 |   - gcc compiler
23 | 
24 | - For CUDA:
25 |   - NVIDIA GPU
26 |   - CUDA 11.2 (or later)
27 |   - nvcc compiler
28 | 
29 | ## Literature
30 | The publications associated with this code:
31 | 
32 | - Deka, Moriggl, and Einkemmer (2025), *LeXInt: GPU-accelerated Exponential Integrators package*, SoftwareX, 29, 101949 <br />
33 | [[DOI]](https://doi.org/10.1016/j.softx.2024.101949) [[arXiv:2310.08344]](https://doi.org/10.48550/arXiv.2310.08344)
34 | 
35 | - Deka, Einkemmer, and Tokman (2023), *LeXInt: Package for Exponential Integrators employing Leja interpolation*, SoftwareX, 21, 101302 <br />
36 | [[DOI]](https://doi.org/10.1016/j.softx.2022.101302) [[arXiv:2208.08269]](https://doi.org/10.48550/arXiv.2208.08269)
37 | 
38 | Other related publications:
39 | - Caliari et al. (2014), *Comparison of software for computing the action of the matrix exponential*, BIT Numer. Math., 54, 113 <br />
40 | [[DOI]](https://doi.org/10.1007/s10543-013-0446-0)
41 | 
42 | - Deka \& Einkemmer (2022), *Efficient adaptive step size control for exponential integrators*, Comput. Math. Appl., 123, 59 <br />
43 | [[DOI]](https://doi.org/10.1016/j.camwa.2022.07.011) [[arXiv:2102.02524]](https://doi.org/10.48550/arXiv.2102.02524)
44 | 
45 | - Deka \& Einkemmer (2022), *Exponential Integrators for Resistive Magnetohydrodynamics: Matrix-free Leja Interpolation and Efficient Adaptive Time Stepping*, ApJS, 259, 57 <br />
46 | [[DOI]](https://doi.org/10.3847/1538-4365/ac5177) [[arXiv:2108.13622]](https://doi.org/10.48550/arXiv.2108.13622)
47 | 
48 | - Hochbruck \& Ostermann (2010), *Exponential integrators*, Acta Numer., 19, 209 <br />
49 | [[DOI]](https://doi.org/10.1017/S0962492910000048)
50 | 
51 | ## Future Prospects
52 | We will MPI-parallelise the CUDA/C++ code.
53 | 
54 | ## Contact
55 | Pranab J. Deka  (<pranab.deka@kuleuven.be>) <br />
56 | Lukas Einkemmer (<lukas.einkemmer@uibk.ac.at>) <br />
57 | Mayya Tokman  (<mtokman@ucmerced.edu>)
58 | 
59 | In case you face issues using LeXInt, kindly contact Pranab J. Deka.
60 | 
61 | ## Acknowledgements
62 | Alexander Moriggl contributed to the development of the CUDA version.
63 | 


--------------------------------------------------------------------------------
/Python/Constant/EPIRK4s3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EPIRK4s3(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_epirk4                : numpy array
33 |                                 Output state variable(s) after time dt (4th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     References:
38 |     
39 |         1. D. L. Michels, V. T. Luan, M. Tokman
40 |         A stiffly accurate integrator for elastodynamic problems, ACM Trans. Graph. 36 (4) (2017)
41 |         doi:10.1145/3072959.3073706.
42 |         
43 |         2. G. Rainwater and M. Tokman
44 |         Designing efficient exponential integrators with EPIRK framework,
45 |         in: International Conference of Numerical Analysis and Applied Mathematics (ICNAAM 2016), Vol. 1863 of American Institute of Physics Conference Series, 2017, p. 020007
46 |         doi:10.1063/1.4992153
47 | 
48 |     """
49 |     
50 |     ###? RHS evaluated at 'u'
51 |     rhs_u = RHS_function(u)
52 | 
53 |     ###? Array of zeros vectors
54 |     zero_vec = np.zeros(np.shape(u))
55 |     
56 |     ###? dt * J(u).z
57 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
58 |     
59 |     ###? J(u) . u
60 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
61 |     
62 |     ###? Interpolations 1 & 2; {1/8, 1/9} phi_1({1/8, 1/9} J(u) dt) f(u) dt
63 |     u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/8, c, Gamma, Leja_X, tol)
64 |     u_flux_2, rhs_calls_2, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/9, c, Gamma, Leja_X, tol)
65 | 
66 |     ###? Internal stage 1; a = u + 1/8 phi_1(1/8 J(u) dt) f(u) dt
67 |     a = u + u_flux_1
68 |     
69 |     ###? Internal stage 2; b = u + 1/9 phi_1(1/9 J(u) dt) f(u) dt
70 |     b = u + u_flux_2
71 | 
72 |     ###? Difference of nonlinear remainders at a and b
73 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u)
74 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u)
75 | 
76 |     ###? phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (1892*R(a) + 1458*(R(b) - 2*R(a))) dt + phi_4(J(u) dt) (-42336*R(a) - 34992*(R(b) - 2*R(a))) dt
77 |     u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (1892*R_a + 1458*(R_b - 2*R_a))*T_final, (-42336*R_a - 34992*(R_b - 2*R_a))*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
78 |     
79 |     ###? 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (-1024R(a) + 1458R(b)) dt + phi_4(J(u) dt) (27648R(a) - 34992R(b)) dt
80 |     u_epirk4 = u + u_flux
81 | 
82 |     ###? Proxy of computational cost
83 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 6
84 | 
85 |     return u_epirk4, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/Constant/EXPRB53s3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EXPRB53s3(u, T_final, substeps, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_exprb5                : numpy array
33 |                                 Output state variable(s) after time dt (5th order)
34 |     num_rhs_calls           : int
35 |                                 # of RHS calls
36 |     
37 |     Reference:
38 |     
39 |         V. T. Luan and A. Ostermann
40 |         Exponential Rosenbrock methods of order five - construction, analysis and numerical comparisons, J. Comput. Appl. Math. 255 (2014) 417-431
41 |         doi:10.1016/j.cam.2013.04.041
42 | 
43 |     """
44 | 
45 |     ###? RHS evaluated at 'u'
46 |     rhs_u = RHS_function(u)
47 | 
48 |     ###? Array of zeros vectors
49 |     zero_vec = np.zeros(np.shape(u))
50 |     
51 |     ###? dt * J(u).z
52 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
53 |     
54 |     ###? J(u) . u
55 |     Jacobian_u = Jacobian(RHS_function, u, u, rhs_u)
56 |     
57 |     ###? Interpolation 1; 1/2 phi_1(1/2 J(u) dt) f(u) dt
58 |     u_flux_1, rhs_calls_1, substeps = linear_phi([zero_vec, rhs_u*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol)
59 | 
60 |     ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt
61 |     a = u + u_flux_1
62 |     
63 |     ###? Difference of nonlinear remainder at a
64 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian_u)
65 |     
66 |     ###? Interpolation 2a; 9/10 phi_1(9/10 J(u) dt) f(u) dt + 729/125 phi_3(9/10 J(u) dt)) R(a) dt
67 |     u_flux_2a, rhs_calls_2a, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, 10/9*729/125*R_a*T_final], T_final, substeps, Jac_vec, 9/10, c, Gamma, Leja_X, tol)
68 |     
69 |     ###? Interpolation 2b; 27/25 phi_3(1/2 J(u) dt
70 |     u_flux_2b, rhs_calls_2b, substeps = linear_phi([zero_vec, zero_vec, zero_vec, 2*27/25*R_a*T_final], T_final, substeps, Jac_vec, 1/2, c, Gamma, Leja_X, tol)
71 | 
72 |     ###? b = u + 9/10 phi_1(9/10 J(u) dt) f(u) dt + 27/25 phi_3(1/2 J(u) dt + 729/125 phi_3(9/10 J(u) dt)) R(a) dt
73 |     b = u + u_flux_2a + u_flux_2b
74 |     
75 |     ###? Nonlinear remainder at b
76 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian_u)
77 |     
78 |     ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt + phi_4(J(u) dt) (-60R(a) + (500/27)R(b)) dt
79 |     u_flux, rhs_calls_3, substeps = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (18*R_a - (250/81)*R_b)*T_final, (-60*R_a + (500/27)*R_b)*T_final], T_final, substeps, Jac_vec, 1, c, Gamma, Leja_X, tol)
80 |     
81 |     ###? 5th order solution; u_5 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt + phi_4(J(u) dt) (-60R(a) + (500/27)R(b)) dt
82 |     u_exprb5 = u + u_flux
83 | 
84 |     ###? Proxy of computational cost
85 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2a + rhs_calls_2b + rhs_calls_3 + 6
86 | 
87 |     return u_exprb5, num_rhs_calls, substeps


--------------------------------------------------------------------------------
/Python/imag_Leja_phi_nl.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Divided_Difference import Divided_Difference
 3 | 
 4 | def imag_Leja_phi_nl(u, dt, RHS_function, interp_function, c, Gamma, Leja_X, phi_function, tol):
 5 |     """
 6 |     Computes the polynomial interpolation of 'phi_function' applied to 'interp_vector' at imaginary Leja points.
 7 |     To be used when computation of Jacobian is not needed, i.e. "interp_function" is (or explicitly treated
 8 |     as) a nonlinear remainder. 
 9 | 
10 | 
11 |         Parameters
12 |         ----------
13 |         u                       : numpy array
14 |                                     State variable(s)
15 |         dt                      : double
16 |                                     Step size
17 |         RHS_function            : user-defined function 
18 |                                     RHS function
19 |         interp_vector           : numpy array
20 |                                     Vector to be interpolated
21 |         c                       : double
22 |                                     Shifting factor
23 |         Gamma                   : double
24 |                                     Scaling factor
25 |         Leja_X                  : numpy array
26 |                                     Array of Leja points
27 |         phi_function            : function
28 |                                     phi function
29 |         tol                     : double
30 |                                     Accuracy of the polynomial so formed
31 | 
32 |         Returns
33 |         ----------
34 |         polynomial              : numpy array(s)
35 |                                     Polynomial interpolation of 'interp_vector' 
36 |                                     multiplied by 'phi_function' at real Leja points
37 |         ii                      : int
38 |                                     # of RHS calls
39 |         convergence             : int
40 |                                     0 -> did not converge, 1 -> converged
41 | 
42 |     """
43 | 
44 |     ### Initialize paramters and arrays
45 |     convergence = 0                                                             # 0 -> did not converge, 1 -> converged
46 |     max_Leja_pts = len(Leja_X)                                                  # Max number of Leja points  
47 |     y = interp_function.copy()                                                  # To avoid changing 'interp_function'
48 |         
49 |     ### Phi function applied to 'interp_function' (scaled and shifted)
50 |     phi_function_array = phi_function((c + Gamma*Leja_X) * dt * 1j)
51 |     
52 |     ### Compute polynomial coefficients
53 |     poly_coeffs = Divided_Difference(Leja_X, phi_function_array) 
54 |     
55 |     ### p_0 term
56 |     polynomial = interp_function * poly_coeffs[0] + 0*1j
57 |     
58 |     ### p_1, p_2, ...., p_n terms; iterate until converges
59 |     for ii in range(1, max_Leja_pts):
60 | 
61 |         ### y = y * ((z - c)/Gamma - Leja_X)
62 |         y = (-1j * RHS_function(y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1]))
63 | 
64 |         ### Error estimate
65 |         poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii])
66 |         
67 |         ### To prevent diverging, restart simulations with smaller dt
68 |         if poly_error > 1e17:
69 |             convergence = 0
70 |             print("Step size too large!! Did not converge.")
71 |             return u, ii, convergence
72 | 
73 |         ### Add the new term to the polynomial
74 |         polynomial = polynomial + (poly_coeffs[ii] * y)
75 |         
76 |         ### If new term to be added < tol, break loop; safety factor = 0.25
77 |         if  poly_error < 0.25*tol*np.linalg.norm(polynomial):
78 |             convergence = 1
79 |             # print("# Leja points (phi): ", ii)
80 |             break
81 |         
82 |         ### Warning flags
83 |         if ii == max_Leja_pts - 1:
84 |             print("Warning!! Max. # of Leja points reached without convergence!! Try increasing the number of Leja points. Max available: 10000.")
85 |             break
86 | 
87 |     return np.real(polynomial), ii, convergence
88 | 


--------------------------------------------------------------------------------
/Python/Variable/EPIRK4s3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ###! LeXInt functions
 4 | from Jacobian import Jacobian
 5 | from linear_phi import linear_phi
 6 | 
 7 | ################################################################################################
 8 | 
 9 | def EPIRK4s3(u, T_final, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
10 |     """
11 |     Parameters
12 |     ----------
13 |     u                       : numpy array
14 |                                 State variable(s)
15 |     T_final                 : double
16 |                                 Step size
17 |     RHS_function            : user-defined function 
18 |                                 RHS function
19 |     c                       : double
20 |                                 Shifting factor
21 |     Gamma                   : double
22 |                                 Scaling factor
23 |     Leja_X                  : numpy array
24 |                                 Array of Leja points
25 |     tol                     : double
26 |                                 Accuracy of the polynomial so formed
27 |     Real_Imag               : int
28 |                                 0 - Real, 1 - Imaginary
29 | 
30 |     Returns
31 |     -------
32 |     u_epirk3                : numpy array
33 |                                 Output state variable(s) after time dt (3rd order)
34 |     u_epirk4                : numpy array
35 |                                 Output state variable(s) after time dt (4th order)
36 |     num_rhs_calls           : int
37 |                                 # of RHS calls
38 |     
39 |     References:
40 |     
41 |         1. D. L. Michels, V. T. Luan, M. Tokman, A stiffly accurate integrator for elastodynamic problems, ACM Trans. Graph. 36 (4) (2017). 
42 |         doi:10.1145/3072959.3073706.
43 |         
44 |         2. G. Rainwater, M. Tokman, Designing efficient exponential integrators with EPIRK framework, in: International Conference of Numerical
45 |         Analysis and Applied Mathematics (ICNAAM 2016), Vol. 1863 of American Institute of Physics Conference Series, 2017, p. 020007.
46 |         doi:10.1063/1.4992153.
47 | 
48 |     """
49 |     
50 |     ###? RHS evaluated at 'u'
51 |     rhs_u = RHS_function(u)
52 | 
53 |     ###? Array of zeros vectors
54 |     zero_vec = np.zeros(np.shape(u))
55 |     
56 |     ###? dt * J(u).z
57 |     Jac_vec = lambda z: T_final * Jacobian(RHS_function, u, z, rhs_u)
58 |     
59 |     ###? Interpolations 1 & 2; {1/8, 1/9} phi_1({1/8, 1/9} J(u) dt) f(u) dt
60 |     u_flux_1, rhs_calls_1 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/8, c, Gamma, Leja_X, tol)
61 |     u_flux_2, rhs_calls_2 = linear_phi([zero_vec, rhs_u*T_final], T_final, Jac_vec, 1/9, c, Gamma, Leja_X, tol)
62 | 
63 |     ###? Internal stage 1; a = u + 1/8 phi_1(1/8 J(u) dt) f(u) dt
64 |     a = u + u_flux_1
65 |     
66 |     ###? Internal stage 2; b = u + 1/9 phi_1(1/9 J(u) dt) f(u) dt
67 |     b = u + u_flux_2
68 | 
69 |     ###? Difference of nonlinear remainders at a and b
70 |     R_a = (RHS_function(a) - Jacobian(RHS_function, u, a, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
71 |     R_b = (RHS_function(b) - Jacobian(RHS_function, u, b, rhs_u)) - (rhs_u - Jacobian(RHS_function, u, u, rhs_u))
72 | 
73 |     ###? Interpolation 3; phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (1892*R(a) + 1458*(R(b) - 2*R(a))) dt
74 |     u_flux, rhs_calls_3 = linear_phi([zero_vec, rhs_u*T_final, zero_vec, (1892*R_a + 1458*(R_b - 2*R_a))*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
75 |     
76 |     ###? Interpolation 4; phi_4(J(u) dt) (-42336*R(a) - 34992*(R(b) - 2*R(a))) dt
77 |     u_nl, rhs_calls_4 = linear_phi([zero_vec, zero_vec, zero_vec, zero_vec, (-42336*R_a - 34992*(R_b - 2*R_a))*T_final], T_final, Jac_vec, 1, c, Gamma, Leja_X, tol)
78 |     
79 |     ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (-1024R(a) + 1458R(b)) dt
80 |     u_epirk3 = u + u_flux
81 |     
82 |     ###? 4th order solution; u_4 = u_3 + phi_4(J(u) dt) (27648R(a) - 34992R(b)) dt
83 |     u_epirk4 = u_epirk3 + u_nl
84 | 
85 |     ###? Proxy of computational cost
86 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + 8
87 | 
88 |     return u_epirk3, u_epirk4, num_rhs_calls


--------------------------------------------------------------------------------
/CUDA/functions.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | //? ----------------------------------------------------------
  4 | //?
  5 | //? Description:
  6 | //?     A pleothera of functions are defined here that
  7 | //?     are used throughout the code.
  8 | //?
  9 | //? ----------------------------------------------------------
 10 | 
 11 | //! ======================================================================================== !//
 12 | 
 13 | namespace LeXInt
 14 | {
 15 |     //! Return double !//
 16 | 
 17 |     double l1norm_Cpp(double* vector, size_t N)
 18 |     {
 19 | 
 20 |         double norm = 0.0;
 21 | 
 22 |         #pragma omp parallel for reduction(+:norm)
 23 |         for (int ii = 0; ii < N; ii++)
 24 |         {
 25 |             norm = norm + abs(vector[ii]);
 26 |         }
 27 | 
 28 |         return norm;
 29 |     }
 30 | 
 31 |     double l2norm_Cpp(double* vector, size_t N)
 32 |     {
 33 | 
 34 |         double norm = 0.0;
 35 | 
 36 |         #pragma omp parallel for reduction(+:norm)
 37 |         for (int ii = 0; ii < N; ii++)
 38 |         {
 39 |             norm = norm + (vector[ii] * vector[ii]);
 40 |         }
 41 | 
 42 |         return sqrt(norm);
 43 |     }
 44 | 
 45 |     double factorial(int number)
 46 |     {
 47 |         double fact = 1.0;
 48 | 
 49 |         if (number == 0)
 50 |         {
 51 |             fact = 1.0;
 52 |         }
 53 | 
 54 |         else
 55 |         {
 56 |             for(int ii = 1; ii <= abs(number); ii++)
 57 |             {    
 58 |                 fact = fact*ii;    
 59 |             }
 60 |         }
 61 | 
 62 |         return fact;
 63 |     }
 64 | 
 65 |     //! ======================================================================================== !//
 66 | 
 67 |     //! Return double* !//
 68 | 
 69 |     //? ones(y) = (y[0:N] =) 1.0
 70 |     void ones_Cpp(double *x, size_t N)                    
 71 |     {
 72 |         #pragma omp parallel for
 73 |         for (int ii = 0; ii < N; ii++)
 74 |         {
 75 |             x[ii] = 1.0;
 76 |         }
 77 |     }
 78 | 
 79 |     //? ones(y) = (y[0:N] =) 1.0
 80 |     void eigen_ones_Cpp(double *x, size_t N)                    
 81 |     {
 82 |         #pragma omp parallel for
 83 |         for (int ii = 0; ii < N; ii++)
 84 |         {
 85 |             x[ii] = 0.0;
 86 |         }
 87 |         
 88 |         x[0] = 1.0;
 89 |     }
 90 | 
 91 |     //? y = x
 92 |     void copy_Cpp(double *x, double *y, size_t N)                    
 93 |     {
 94 |         #pragma omp parallel for
 95 |         for (int ii = 0; ii < N; ii++)
 96 |         {
 97 |             y[ii] = x[ii];
 98 |         }
 99 |     }
100 | 
101 |     //? y = ax
102 |     void axpby_Cpp(double a, double *x, 
103 |                              double *y, size_t N)                    
104 |     {
105 |         #pragma omp parallel for
106 |         for (int ii = 0; ii < N; ii++)
107 |         {
108 |             y[ii] = (a * x[ii]);
109 |         }
110 |     }
111 | 
112 |     //? z = ax + by
113 |     void axpby_Cpp(double a, double *x, 
114 |                    double b, double *y, 
115 |                              double *z, size_t N)
116 |     {
117 |         #pragma omp parallel for
118 |         for (int ii = 0; ii < N; ii++)
119 |         {
120 |             z[ii] = (a * x[ii]) + (b * y[ii]);
121 |         }
122 | 
123 |     }
124 | 
125 |     //? w = ax + by + cz
126 |     void axpby_Cpp(double a, double *x,
127 |                    double b, double *y,
128 |                    double c, double *z, 
129 |                              double *w, size_t N)
130 |     {
131 |         #pragma omp parallel for
132 |         for (int ii = 0; ii < N; ii++)
133 |         {
134 |             w[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]);
135 |         }
136 |     }
137 | 
138 |     //? v = ax + by + cz + dw
139 |     void axpby_Cpp(double a, double *x,
140 |                    double b, double *y,
141 |                    double c, double *z,
142 |                    double d, double *w,
143 |                              double *v, size_t N)
144 |     {
145 |         #pragma omp parallel for
146 |         for (int ii = 0; ii < N; ii++)
147 |         {
148 |             v[ii] = (a * x[ii]) + (b * y[ii]) + (c * z[ii]) + (d * w[ii]);
149 |         }
150 |     }
151 | }
152 | 
153 | //! ======================================================================================== !//


--------------------------------------------------------------------------------
/Python/Phi_functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ### Phi Functions ('z' is assumed to be an array of doubles or complex doubles)
 4 | 
 5 | def phi_1(z):
 6 |     
 7 |     if np.imag(z[0]) != 0.0:
 8 |         phi_1_array = np.zeros(len(z), dtype = "complex")
 9 |     else:
10 |         phi_1_array = np.zeros(len(z))
11 |     
12 |     for ii in range(len(z)):
13 |         if abs(z[ii]) <= 1e-7:
14 |             phi_1_array[ii] = 1./np.math.factorial(1) + z[ii] * (1./np.math.factorial(2)  + z[ii] * (1./np.math.factorial(3) + \
15 |                                                         z[ii] * (1./np.math.factorial(4)  + z[ii] * (1./np.math.factorial(5) + \
16 |                                                         z[ii] * (1./np.math.factorial(6)  + z[ii] * (1./np.math.factorial(7) + \
17 |                                                         z[ii] * (1./np.math.factorial(8)  + z[ii] * (1./np.math.factorial(9) + \
18 |                                                         z[ii] * (1./np.math.factorial(10) + z[ii] * (1./np.math.factorial(11)))))))))))     
19 |         else:
20 |             phi_1_array[ii] = (np.exp(z[ii]) - 1)/z[ii]
21 |             
22 |     return phi_1_array
23 | 
24 | 
25 | def phi_2(z):
26 |     
27 |     if np.imag(z[0]) != 0.0:
28 |         phi_2_array = np.zeros(len(z), dtype = "complex")
29 |     else:
30 |         phi_2_array = np.zeros(len(z))
31 |     
32 |     for ii in range(len(z)):
33 |         if abs(z[ii]) <= 1e-6:
34 |             phi_2_array[ii] = 1./np.math.factorial(2) + z[ii] * (1./np.math.factorial(3)  + z[ii] * (1./np.math.factorial(4)  + \
35 |                                                         z[ii] * (1./np.math.factorial(5)  + z[ii] * (1./np.math.factorial(6)  + \
36 |                                                         z[ii] * (1./np.math.factorial(7)  + z[ii] * (1./np.math.factorial(8)  + \
37 |                                                         z[ii] * (1./np.math.factorial(9)  + z[ii] * (1./np.math.factorial(10) + \
38 |                                                         z[ii] * (1./np.math.factorial(11) + z[ii] * (1./np.math.factorial(12)))))))))))     
39 |         else:
40 |             phi_2_array[ii] = (np.exp(z[ii]) - z[ii] - 1)/z[ii]**2
41 |         
42 |     return phi_2_array
43 | 
44 | 
45 | def phi_3(z):
46 |     
47 |     if np.imag(z[0]) != 0.0:
48 |         phi_3_array = np.zeros(len(z), dtype = "complex")
49 |     else:
50 |         phi_3_array = np.zeros(len(z))
51 |     
52 |     for ii in range(len(z)):
53 |         if abs(z[ii]) <= 1e-5:
54 |             phi_3_array[ii] = 1./np.math.factorial(3) + z[ii] * (1./np.math.factorial(4)  + z[ii] * (1./np.math.factorial(5)  + \
55 |                                                         z[ii] * (1./np.math.factorial(6)  + z[ii] * (1./np.math.factorial(7)  + \
56 |                                                         z[ii] * (1./np.math.factorial(8)  + z[ii] * (1./np.math.factorial(9)  + \
57 |                                                         z[ii] * (1./np.math.factorial(10) + z[ii] * (1./np.math.factorial(11) + \
58 |                                                         z[ii] * (1./np.math.factorial(12) + z[ii] * (1./np.math.factorial(13)))))))))))     
59 |         else:
60 |             phi_3_array[ii] = (np.exp(z[ii]) - z[ii]**2/2 - z[ii] - 1)/z[ii]**3
61 |     
62 |     return phi_3_array
63 | 
64 | 
65 | def phi_4(z):
66 |     
67 |     if np.imag(z[0]) != 0.0:
68 |         phi_4_array = np.zeros(len(z), dtype = "complex")
69 |     else:
70 |         phi_4_array = np.zeros(len(z))
71 |     
72 |     for ii in range(len(z)):
73 |         if abs(z[ii]) <= 1e-3:
74 |             phi_4_array[ii] = 1./np.math.factorial(4) + z[ii] * (1./np.math.factorial(5)  + z[ii] * (1./np.math.factorial(6)  + \
75 |                                                         z[ii] * (1./np.math.factorial(7)  + z[ii] * (1./np.math.factorial(8)  + \
76 |                                                         z[ii] * (1./np.math.factorial(9)  + z[ii] * (1./np.math.factorial(10) + \
77 |                                                         z[ii] * (1./np.math.factorial(11) + z[ii] * (1./np.math.factorial(12) + \
78 |                                                         z[ii] * (1./np.math.factorial(13) + z[ii] * (1./np.math.factorial(14)))))))))))        
79 |         else:
80 |             phi_4_array[ii] = (np.exp(z[ii]) - z[ii]**3/6 - z[ii]**2/2 - z[ii] - 1)/z[ii]**4
81 |         
82 |     return phi_4_array
83 | 


--------------------------------------------------------------------------------
/CUDA/Integrators/EXPRB32.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "../Leja.hpp"
 4 | #include "../Phi_functions.hpp"
 5 | 
 6 | namespace LeXInt
 7 | {
 8 |     //? Phi functions interpolated on real Leja points
 9 |     template <typename rhs>
10 |     void EXPRB32(rhs& RHS,                   //? RHS function
11 |                  double* u,                  //? Input state variable(s)
12 |                  double* u_exprb2,           //? Output state variable(s) (lower order)
13 |                  double* u_exprb3,           //? Output state variable(s) (higher order)
14 |                  double& error,              //? Embedded error estimate
15 |                  double* auxiliary_expint,   //? Internal auxiliary variables (EXPRB32)
16 |                  double* auxiliary_Leja,     //? Internal auxiliary variables (Leja and NL remainders)
17 |                  size_t N,                   //? Number of grid points
18 |                  vector<double>& Leja_X,     //? Array of Leja points
19 |                  double c,                   //? Shifting factor
20 |                  double Gamma,               //? Scaling factor
21 |                  double rtol,                //? Relative tolerance (normalised desired accuracy)
22 |                  double atol,                //? Absolute tolerance
23 |                  double dt,                  //? Step size
24 |                  int& iters,                 //? # of iterations needed to converge (iteration variable)
25 |                  bool GPU,                   //? false (0) --> CPU; true (1) --> GPU
26 |                  GPU_handle& cublas_handle   //? CuBLAS handle
27 |                  )
28 |     {
29 |         //* -------------------------------------------------------------------------
30 | 
31 |         //! u, u_exprb2, u_exprb3, auxiliary_expint, and auxiliary_Leja,
32 |         //! are device vectors if GPU support is activated.
33 | 
34 |         //*    Returns
35 |         //*    ----------
36 |         //*     u_exprb2                : double*
37 |         //*                                 2nd order solution after time dt
38 |         //*     
39 |         //*     u_exprb3                : double* 
40 |         //*                                 3rd order solution after time dt
41 |         //*
42 |         //*
43 |         //*    Reference:
44 |         //*         M. Hochbruck, A. Ostermann, Exponential Integrators, Acta Numer. 19 (2010) 209-286. 
45 |         //*         doi:10.1017/S0962492910000048
46 | 
47 |         //* -------------------------------------------------------------------------
48 | 
49 |         //? Counters for Leja iterations
50 |         int iters_1 = 0, iters_2 = 0;
51 | 
52 |         //? Assign names and variables
53 |         double* f_u = &auxiliary_expint[0]; double* u_flux = &u_exprb2[0]; 
54 |         double* NL_u = &auxiliary_expint[0]; double* NL_a = &u_exprb3[0]; double* R_a = &u_exprb3[0]; 
55 |         double* u_nl_3 = &auxiliary_expint[0]; double* error_vector = &auxiliary_expint[0];
56 | 
57 |         //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt
58 |         RHS(u, f_u);
59 |         axpby(dt, f_u, f_u, N, GPU);
60 | 
61 |         //? Interpolation of RHS(u) at 1; u_flux = phi_1(J(u) dt) f(u) dt
62 |         real_Leja_phi(RHS, u, f_u, u_flux, auxiliary_Leja, N, {1.0}, 
63 |                       phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters_1, GPU, cublas_handle);
64 | 
65 |         //! Internal stage 1; 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt
66 |         axpby(1.0, u, 1.0, u_flux, u_exprb2, N, GPU);
67 | 
68 |         //? R_a = (NL_a - NL_u) * dt
69 |         Nonlinear_remainder(RHS, u, u,        NL_u, auxiliary_Leja, N, GPU, cublas_handle);
70 |         Nonlinear_remainder(RHS, u, u_exprb2, NL_a, auxiliary_Leja, N, GPU, cublas_handle);
71 |         axpby(dt, NL_a, -dt, NL_u, R_a, N, GPU);
72 | 
73 |         //? u_nl_3 = phi_3(J(u) dt) R(a) dt
74 |         real_Leja_phi(RHS, u, R_a, u_nl_3, auxiliary_Leja, N, {1.0}, 
75 |                       phi_3, Leja_X, c, Gamma, rtol, atol, dt, iters_2, GPU, cublas_handle);
76 |                         
77 |         //! 3rd order solution; u_3 = u_2 + 2 phi_3(J(u) dt) R(a) dt
78 |         axpby(1.0, u_exprb2, 2.0, u_nl_3, u_exprb3, N, GPU);
79 | 
80 |         //? Error estimate
81 |         axpby(2.0, u_nl_3, error_vector, N, GPU);
82 |         error = l2norm(error_vector, N, GPU, cublas_handle)/sqrt(N);
83 | 
84 |         //? Total number of Leja iterations
85 |         iters = iters_1 + iters_2;
86 |     }
87 | }


--------------------------------------------------------------------------------
/CUDA/Integrators/EXPRB42.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "../Leja.hpp"
 4 | #include "../Phi_functions.hpp"
 5 | 
 6 | namespace LeXInt
 7 | {
 8 |     //? Phi functions interpolated on real Leja points
 9 |     template <typename rhs>
10 |     void EXPRB42(rhs& RHS,                   //? RHS function
11 |                  double* u,                  //? Input state variable(s)
12 |                  double* u_exprb2,           //? Output state variable(s) (lower order)
13 |                  double* u_exprb4,           //? Output state variable(s) (higher order)
14 |                  double& error,              //? Embedded error estimate
15 |                  double* auxiliary_expint,   //? Internal auxiliary variables (EXPRB42)
16 |                  double* auxiliary_Leja,     //? Internal auxiliary variables (Leja)
17 |                  size_t N,                   //? Number of grid points
18 |                  vector<double>& Leja_X,     //? Array of Leja points
19 |                  double c,                   //? Shifting factor
20 |                  double Gamma,               //? Scaling factor
21 |                  double rtol,                //? Relative tolerance (normalised desired accuracy)
22 |                  double atol,                //? Absolute tolerance
23 |                  double dt,                  //? Step size
24 |                  int& iters,                 //? # of iterations needed to converge (iteration variable)
25 |                  bool GPU,                   //? false (0) --> CPU; true (1) --> GPU
26 |                  GPU_handle& cublas_handle   //? CuBLAS handle
27 |                  )
28 |     {
29 |         //* -------------------------------------------------------------------------
30 | 
31 |         //! u, u_exprb2, u_exprb4, auxiliary_expint, and auxiliary_Leja,
32 |         //! are device vectors if GPU support is activated.
33 | 
34 |         //*    Returns
35 |         //*    ----------
36 |         //*     u_exprb2                : double*
37 |         //*                                 2nd order solution after time dt
38 |         //*     
39 |         //*     u_exprb4                : double* 
40 |         //*                                 4th order solution after time dt
41 |         //*
42 |         //*
43 |         //*    Reference:
44 |         //*         V. T. Luan, Fourth-order two-stage explicit exponential integrators for time-dependent PDEs, Appl. Numer. Math. 112 (2017) 91-103. 
45 |         //*         doi:10.1016/j.apnum.2016.10.008
46 | 
47 |         //* ------------------------------------------------------------------------- 
48 | 
49 |         //? Counters for Leja iterations
50 |         int iters_1 = 0, iters_2 = 0;
51 | 
52 |         //? Assign names and variables
53 |         double* u_flux = &auxiliary_expint[0]; double* f_u = &u_exprb2[0]; double* a = &u_flux[0];
54 |         double* NL_u = &u_exprb2[0]; double* NL_a = &u_exprb4[0]; double* R_a = &u_exprb2[0]; 
55 |         double* u_nl_3 = &u_flux[0]; double* error_vector = &u_flux[N];
56 | 
57 |         //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt
58 |         RHS(u, f_u);
59 |         axpby(dt, f_u, f_u, N, GPU);
60 | 
61 |         //? Vertical interpolation of RHS(u) at 3/4 and 1; u_flux = phi_1({3/4, 1.0} J(u) dt) f_u dt
62 |         real_Leja_phi(RHS, u, f_u, u_flux, auxiliary_Leja, N, {3./4., 1.0}, 
63 |                       phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters_1, GPU, cublas_handle);
64 | 
65 |         //? Internal stage 1; a = u + 3/4 phi_1(3/4 J(u) dt) f(u) dt
66 |         axpby(1.0, u, 3./4., &u_flux[0], a, N, GPU);
67 | 
68 |         //? R_a = (NL_a - NL_u) * dt
69 |         Nonlinear_remainder(RHS, u, u, NL_u, auxiliary_Leja, N, GPU, cublas_handle);
70 |         Nonlinear_remainder(RHS, u, a, NL_a, auxiliary_Leja, N, GPU, cublas_handle);
71 |         axpby(dt, NL_a, -dt, NL_u, R_a, N, GPU);
72 | 
73 |         //? u_nl_3 = phi_3(J(u) dt) R(a) dt
74 |         real_Leja_phi(RHS, u, R_a, u_nl_3, auxiliary_Leja, N, {1.0}, 
75 |                       phi_3, Leja_X, c, Gamma, rtol, atol, dt, iters_2, GPU, cublas_handle);
76 | 
77 |         //! 2nd order solution; u_2 = u + phi_1(J(u) dt) f(u) dt
78 |         axpby(1.0, u, 1.0, &u_flux[N], u_exprb2, N, GPU);
79 | 
80 |         //! 4th order solution; u_4 = u_2 + 32/9 phi_3(J(u) dt) R(a) dt
81 |         axpby(1.0, u_exprb2, 32./9., u_nl_3, u_exprb4, N, GPU);
82 | 
83 |         //? Error estimate
84 |         axpby(32./9., u_nl_3, error_vector, N, GPU);
85 |         error = l2norm(error_vector, N, GPU, cublas_handle)/sqrt(N);
86 | 
87 |         //? Total number of Leja iterations
88 |         iters = iters_1 + iters_2;
89 |     }
90 | }


--------------------------------------------------------------------------------
/CUDA/real_Leja_exp.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Leja.hpp"
 4 | #include "Divided_Differences.hpp"
 5 | 
 6 | namespace LeXInt
 7 | {
 8 |     //? Matrix exponential interpolated on real Leja points
 9 |     template <typename rhs>
10 |     void real_Leja_exp(rhs& RHS,                       //? RHS function
11 |                        double* u,                      //? Input state variable(s)
12 |                        double* polynomial,             //? Output matrix exponential multiplied by 'u'
13 |                        double* auxiliary_Leja,         //? Internal auxiliary variables (Leja)
14 |                        size_t N,                       //? Number of grid points
15 |                        vector<double>& Leja_X,         //? Array of Leja points
16 |                        double c,                       //? Shifting factor
17 |                        double Gamma,                   //? Scaling factor
18 |                        double rtol,                    //? Relative tolerance (normalised desired accuracy)
19 |                        double atol,                    //? Absolute tolerance
20 |                        double dt,                      //? Step size
21 |                        int& iters,                     //? # of iterations needed to converge (iteration variable)
22 |                        bool GPU,                       //? false (0) --> CPU; true (1) --> GPU
23 |                        GPU_handle& cublas_handle       //? CuBLAS handle
24 |                        )
25 |     {
26 |         //* -------------------------------------------------------------------------
27 | 
28 |         //* Computes the polynomial interpolation of matrix exponential applied to 'u' at real Leja points.
29 |         //*
30 |         //*    Returns
31 |         //*    ----------
32 |         //*    polynomial        : double*
33 |         //*                             Polynomial interpolation of 'u' multiplied 
34 |         //*                             by the matrix exponential at real Leja points
35 | 
36 |         //* -------------------------------------------------------------------------
37 |         
38 |         int max_Leja_pts = Leja_X.size();                               //? Max. # of Leja points
39 |         double* Jac_vec = &auxiliary_Leja[0];                           //? auxiliary variable for Jacobian-vector product
40 | 
41 |         //* Matrix exponential (scaled and shifted)
42 |         vector<double> matrix_exponential(max_Leja_pts);
43 | 
44 |         for (int ii = 0; ii < max_Leja_pts; ii++)
45 |         {
46 |             matrix_exponential[ii] = exp(dt * (c + (Gamma * Leja_X[ii])));
47 |         }
48 |         
49 |         //* Compute polynomial coefficients
50 |         vector<double> coeffs = Divided_Differences(Leja_X, matrix_exponential);
51 | 
52 |         //* Form the polynomial (first term): polynomial = coeffs[0] * u
53 |         axpby(coeffs[0], u, polynomial, N, GPU);
54 | 
55 |         //? Iterate until converges
56 |         for (iters = 1; iters < max_Leja_pts - 1; iters++)
57 |         {
58 |             //* Compute numerical Jacobian (for linear eqs., this is the RHS evaluation at u)
59 |             RHS(u, Jac_vec);
60 | 
61 |             //* u = u * ((z - c)/Gamma - Leja_X)
62 |             axpby(1./Gamma, Jac_vec, (-c/Gamma - Leja_X[iters - 1]), u, u, N, GPU);
63 | 
64 |             //* Add the new term to the polynomial (polynomial = polynomial + (coeffs[iters] * u))
65 |             axpby(coeffs[iters], u, 1.0, polynomial, polynomial, N, GPU);
66 | 
67 |             //* Error estimate: poly_error = |coeffs[iters]| ||u|| at every iteration
68 |             double poly_error = l2norm(u, N, GPU, cublas_handle)/sqrt(N);
69 |             poly_error = abs(coeffs[iters]) * poly_error;
70 | 
71 |             //* Norm of the polynomial
72 |             double poly_norm = l2norm(polynomial, N, GPU, cublas_handle)/sqrt(N);
73 | 
74 |             //? If new term to be added < tol, break loop
75 |             if (poly_error < ((rtol*poly_norm) + atol))
76 |             {
77 |                 // ::std::cout << "Converged! Iterations: " << iters << ::std::endl;
78 |                 break;
79 |             }
80 | 
81 |             //! Warning flags
82 |             if (iters == max_Leja_pts - 2)
83 |             {
84 |                 ::std::cout << "Warning!! Max. number of Leja points reached without convergence!!" << ::std::endl; 
85 |                 ::std::cout << "Max. Leja points currently set to " << max_Leja_pts << ::std::endl;
86 |                 ::std::cout << "Try increasing the number of Leja points. Max available: 10000." << ::std::endl;
87 |                 break;
88 |             }
89 |         }
90 |     }
91 | }


--------------------------------------------------------------------------------
/Python/Variable/EXPRB53s3.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(1, "../")
  3 | 
  4 | from Jacobian import *
  5 | from Phi_functions import *
  6 | from real_Leja_phi import *
  7 | from imag_Leja_phi import *
  8 | 
  9 | ################################################################################################
 10 | 
 11 | def EXPRB53s3(u, dt, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
 12 |     """
 13 |     Parameters
 14 |     ----------
 15 |     u                       : numpy array
 16 |                                 State variable(s)
 17 |     dt                      : double
 18 |                                 Step size
 19 |     RHS_function            : user-defined function 
 20 |                                 RHS function
 21 |     c                       : double
 22 |                                 Shifting factor
 23 |     Gamma                   : double
 24 |                                 Scaling factor
 25 |     Leja_X                  : numpy array
 26 |                                 Array of Leja points
 27 |     tol                     : double
 28 |                                 Accuracy of the polynomial so formed
 29 |     Real_Imag               : int
 30 |                                 0 - Real, 1 - Imaginary
 31 | 
 32 |     Returns
 33 |     -------
 34 |     u_exprb3                : numpy array
 35 |                                 Output state variable(s) after time dt (3rd order)
 36 |     u_exprb5                : numpy array
 37 |                                 Output state variable(s) after time dt (5th order)
 38 |     num_rhs_calls           : int
 39 |                                 # of RHS calls
 40 |     
 41 |     Reference:
 42 |     
 43 |         V. T. Luan, A. Ostermann, Exponential Rosenbrock methods of order five - construction, analysis and numerical comparisons, J. Comput. Appl. Math. 255 (2014) 417-431. 
 44 |         doi:10.1016/j.cam.2013.04.041.
 45 | 
 46 |     """
 47 | 
 48 |     ############## --------------------- ##############
 49 | 
 50 |     ###? Interpolate on either real Leja or imaginary Leja points
 51 |     if Real_Imag == 0:
 52 |         Leja_phi = real_Leja_phi
 53 |     elif Real_Imag == 1:
 54 |         Leja_phi = imag_Leja_phi
 55 |     else:
 56 |         print("Error!! Choose 0 for real or 1 for imaginary Leja points.")
 57 |     
 58 |     ############## --------------------- ##############
 59 |     
 60 |     ###? Vertical interpolation of f(u) at 1/2, 9/10, and 1; phi_1({1/2, 9/10, 1} J(u) dt) f(u) dt
 61 |     u_flux, rhs_calls_1, convergence = Leja_phi(u, dt, RHS_function, RHS_function(u)*dt, [1/2, 9/10, 1], c, Gamma, Leja_X, phi_1, tol)
 62 | 
 63 |     ###? If it does not converge, return (try with smaller dt)
 64 |     if convergence == 0:
 65 |         return u, 2.1*u, rhs_calls_1
 66 | 
 67 |     ###? Internal stage 1; a = u + 1/2 phi_1(1/2 J(u) dt) f(u) dt
 68 |     a = u + (1/2 * u_flux[:, 0])
 69 | 
 70 |     ###? Nonlinear remainder at u and a
 71 |     Nonlinear_u = RHS_function(u) - Jacobian(RHS_function, u, u)
 72 |     Nonlinear_a = RHS_function(a) - Jacobian(RHS_function, u, a)
 73 |     R_a = Nonlinear_a - Nonlinear_u
 74 | 
 75 |     ###? Vertical interpolation of R(a) at 1/2 and 9/10
 76 |     b_n_nl, rhs_calls_2, _ = Leja_phi(u, dt, RHS_function, R_a*dt, [1/2, 9/10], c, Gamma, Leja_X, phi_3, tol)
 77 | 
 78 |     ###? b = u + 9/10 phi_1(9/10 J(u) dt) f(u) dt + (27/25 phi_3(1/2 J(u) dt) + 729/125 phi_3(9/10 J(u) dt)) R(a) dt
 79 |     b = u + (9/10 * u_flux[:, 1]) + (27/25 * b_n_nl[:, 0]) + (729/125 * b_n_nl[:, 1])
 80 |     
 81 |     ###? Nonlinear remainder at b
 82 |     Nonlinear_b = RHS_function(b) - Jacobian(RHS_function, u, b)
 83 |     R_b = Nonlinear_b - Nonlinear_u
 84 |     
 85 |     ###? phi_3(J(u) dt) (2R(a) + (150/81)R(b)) dt
 86 |     u_nl_4_3, rhs_calls_3, _ = Leja_phi(u, dt, RHS_function, (2*R_a + (150/81)*R_b)*dt, [1], c, Gamma, Leja_X, phi_3, tol)
 87 |     
 88 |     ###? phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt
 89 |     u_nl_5_3, rhs_calls_4, _ = Leja_phi(u, dt, RHS_function, (18*R_a - (250/81)*R_b)*dt, [1], c, Gamma, Leja_X, phi_3, tol)
 90 |     
 91 |     ###? phi_3(J(u) dt) (-60R(a) + (500/27)R(b)) dt
 92 |     u_nl_5_4, rhs_calls_5, _ = Leja_phi(u, dt, RHS_function, (-60*R_a + (500/27)*R_b)*dt, [1], c, Gamma, Leja_X, phi_4, tol)
 93 | 
 94 |     ###? 3rd order solution; u_3 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (2R(a) + (150/81)R(b)) dt
 95 |     u_exprb3 = u + u_flux[:, 2] + u_nl_4_3[:, 0]
 96 |     
 97 |     ###? 5th order solution; u_5 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (18R(a) - (250/81)R(b)) dt + phi_4(J(u) dt) (-60R(a) + (500/27)R(b)) dt
 98 |     u_exprb5 = u + u_flux[:, 2] + u_nl_5_3[:, 0] + u_nl_5_4[:, 0]
 99 | 
100 |     ###? Proxy of computational cost
101 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + rhs_calls_4 + rhs_calls_5 + 13
102 | 
103 |     return u_exprb3, u_exprb5, num_rhs_calls


--------------------------------------------------------------------------------
/CUDA/Test/Dif_Adv_2D.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "Problems.hpp"
  4 | #include "../error_check.hpp"
  5 | 
  6 | using namespace std;
  7 | 
  8 | //! This function has 2 vector reads and writes.
  9 | 
 10 | //? ====================================================================================== ?//
 11 | 
 12 | #ifdef __CUDACC__
 13 | 
 14 | __global__ void Dif_Adv_2D(int N, double dx, double dy, double velocity, double* input, double* output)
 15 | {
 16 |     int ii = threadIdx.y + blockIdx.y * blockDim.y;
 17 |     int jj = threadIdx.x + blockIdx.x * blockDim.x;
 18 | 
 19 |     if ((ii >= N) || (jj >= N))
 20 |         return;
 21 |                         //? Diffusion
 22 |     output[N*ii + jj] =   (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx)
 23 |                         + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy)
 24 |                         
 25 |                         //? Advection
 26 |                         + velocity/dx 
 27 |                         * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)]
 28 |                         - 3.0/6.0 * input[PBC(ii, jj, N)]
 29 |                         + 6.0/6.0 * input[PBC(ii, jj + 1, N)]
 30 |                         - 1.0/6.0 * input[PBC(ii, jj + 2, N)])
 31 |                         + velocity/dy
 32 |                         * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)]
 33 |                         - 3.0/6.0 * input[PBC(ii, jj, N)]
 34 |                         + 6.0/6.0 * input[PBC(ii + 1, jj, N)]
 35 |                         - 1.0/6.0 * input[PBC(ii + 2, jj, N)]);
 36 | }
 37 | 
 38 | #endif
 39 | 
 40 | struct RHS_Dif_Adv_2D:public Problems_2D
 41 | {
 42 |     //? RHS = A_adv.u^2/2.0 + A_dif.u
 43 | 
 44 |     //! Constructor
 45 |     RHS_Dif_Adv_2D(int _N, double _dx, double _dy, double _velocity) : Problems_2D(_N, _dx, _dy, _velocity) {}
 46 | 
 47 |     void operator()(double* input, double* output)
 48 |     {
 49 |         #ifdef __CUDACC__
 50 | 
 51 |             int num_threads = 16;
 52 |             dim3 threads(num_threads, num_threads);
 53 |             dim3 blocks((N + num_threads - 1)/num_threads, (N + num_threads - 1)/num_threads);
 54 |             
 55 |             Dif_Adv_2D<<<blocks, threads>>>(N, dx, dy, velocity, input, output);
 56 |         
 57 |         #else
 58 | 
 59 |             int num_threads = 32;
 60 | 
 61 |             #pragma omp parallel for collapse(2)
 62 |             for (int blockIdxx = 0; blockIdxx < (N + num_threads - 1)/num_threads; blockIdxx++)
 63 |             {
 64 |                 for (int blockIdxy = 0; blockIdxy < (N + num_threads - 1)/num_threads; blockIdxy++)
 65 |                 {
 66 |                     for (int threadIdxx = 0; threadIdxx < num_threads; threadIdxx++)
 67 |                     {
 68 |                         for (int threadIdxy = 0; threadIdxy < num_threads; threadIdxy++)
 69 |                         {
 70 |                             int ii = (blockIdxx * num_threads) + threadIdxx;
 71 |                             int jj = (blockIdxy * num_threads) + threadIdxy;
 72 | 
 73 |                             if ((ii < N) && (jj < N))
 74 |                             {
 75 |                                                     //? Diffusion
 76 |                                 output[N*ii + jj] =   (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx)
 77 |                                                     + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy)
 78 |                                                     
 79 |                                                     //? Advection
 80 |                                                     + velocity/dx 
 81 |                                                     * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)]
 82 |                                                     - 3.0/6.0 * input[PBC(ii, jj, N)]
 83 |                                                     + 6.0/6.0 * input[PBC(ii, jj + 1, N)]
 84 |                                                     - 1.0/6.0 * input[PBC(ii, jj + 2, N)])
 85 |                                                     + velocity/dy
 86 |                                                     * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)]
 87 |                                                     - 3.0/6.0 * input[PBC(ii, jj, N)]
 88 |                                                     + 6.0/6.0 * input[PBC(ii + 1, jj, N)]
 89 |                                                     - 1.0/6.0 * input[PBC(ii + 2, jj, N)]);
 90 |                             }
 91 |                         }
 92 |                     }
 93 |                 }
 94 |             }
 95 |             
 96 |         #endif
 97 |     }
 98 | 
 99 |     //! Destructor
100 |     ~RHS_Dif_Adv_2D() {}
101 | };
102 | 
103 | //? ====================================================================================== ?//


--------------------------------------------------------------------------------
/Python/real_Leja_phi.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Jacobian import Jacobian
 3 | from Divided_Difference import Divided_Difference
 4 | 
 5 | def real_Leja_phi(u, dt, RHS_function, interp_vector, integrator_coeffs, c, Gamma, Leja_X, phi_function, tol):
 6 |     """
 7 |     Computes the polynomial interpolation of 'phi_function' applied to 'interp_vector' at real Leja points.
 8 | 
 9 | 
10 |         Parameters
11 |         ----------
12 |         u                       : numpy array
13 |                                     State variable(s)
14 |         dt                      : double
15 |                                     Step size
16 |         RHS_function            : user-defined function 
17 |                                     RHS function
18 |         interp_vector           : numpy array
19 |                                     Vector to be interpolated
20 |         integrator_coeff        : int
21 |                                     Point where the matrix exponential is to be evaluated
22 |         c                       : double
23 |                                     Shifting factor
24 |         Gamma                   : double
25 |                                     Scaling factor
26 |         Leja_X                  : numpy array
27 |                                     Array of Leja points
28 |         phi_function            : function
29 |                                     phi function
30 |         tol                     : double
31 |                                     Accuracy of the polynomial so formed
32 | 
33 |         Returns
34 |         ----------
35 |         polynomial              : numpy array(s)
36 |                                     Polynomial interpolation of 'interp_vector' 
37 |                                     multiplied by 'phi_function' at real Leja points
38 |         ii                      : int
39 |                                     # of Leja points used
40 |         convergence             : int
41 |                                     0 -> did not converge, 1 -> converged
42 | 
43 |     """
44 | 
45 |     ###? Initialize parameters and arrays
46 |     convergence = 0                                                             #* 0 -> did not converge, 1 -> converged
47 |     num_interpolations = len(integrator_coeffs)                                 #* Number of interpolations in vertical
48 |     max_Leja_pts = len(Leja_X)                                                  #* Max number of Leja points  
49 |     phi_function_array = np.zeros((len(Leja_X), num_interpolations))            #* Phi function applied to 'interp_vector'
50 |     poly_coeffs = np.zeros((len(Leja_X), num_interpolations))                   #* Polynomial coefficients
51 |     polynomial = np.zeros((len(interp_vector), num_interpolations))             #* Polynomial output
52 |     y = interp_vector.copy()                                                    #* To avoid changing 'interp_vector'
53 |     
54 |     ###? Loop for vertical implementation
55 |     for ij in range(0, num_interpolations):
56 |         
57 |         ###? Phi function applied to 'interp_vector' (scaled and shifted)
58 |         phi_function_array[:, ij] = phi_function(integrator_coeffs[ij] * dt * (c + Gamma*Leja_X))
59 |         
60 |         ###? Compute polynomial coefficients
61 |         poly_coeffs[:, ij] = Divided_Difference(Leja_X, phi_function_array[:, ij]) 
62 |         
63 |         ###? Form the polynomial: 1st term (p_0)
64 |         polynomial[:, ij] = interp_vector * poly_coeffs[0, ij]
65 |     
66 |     ###? p_1, p_2, ...., p_n terms; iterate until converges
67 |     for ii in range(1, max_Leja_pts):
68 | 
69 |         ###? y = y * ((z - c)/Gamma - Leja_X)
70 |         y = (Jacobian(RHS_function, u, y)/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1]))
71 | 
72 |         ###? Error estimate; poly_error = |coeffs[nn]| ||y||
73 |         poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii, np.argmax(integrator_coeffs)])
74 |         
75 |         ###? Keep adding terms to the polynomial
76 |         for ij in range(0, num_interpolations):
77 | 
78 |             ###! To prevent diverging, restart simulations with smaller dt
79 |             if poly_error > 1e7:
80 |                 convergence = 0
81 |                 polynomial[:, ij] = interp_vector
82 |                 return polynomial, 3*ii, convergence
83 | 
84 |             ###? Add the new term to the polynomial
85 |             polynomial[:, ij] = polynomial[:, ij] + (poly_coeffs[ii, ij] * y)
86 |             
87 |         ###? If new term to be added < tol, break loop
88 |         if  poly_error < (tol*np.linalg.norm(polynomial) + tol):
89 |             convergence = 1
90 |             # print("Converged! # of Leja points used (phi): ", ii)
91 |             break
92 | 
93 |         ###! Warning flags
94 |         if ii == max_Leja_pts - 1:
95 |             print("Warning!! Max. # of Leja points reached without convergence!!")
96 |             print("Reduce dt.")
97 |             break
98 | 
99 |     return polynomial, ii, convergence


--------------------------------------------------------------------------------
/CUDA/real_Leja_phi_nl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Leja.hpp"
 4 | #include "Phi_functions.hpp"
 5 | #include "Divided_Differences.hpp"
 6 | 
 7 | namespace LeXInt
 8 | {
 9 |     //? Phi function interpolated on real Leja points
10 |     template <typename rhs>
11 |     void real_Leja_phi_nl(rhs& RHS,                           //? RHS function
12 |                           double* interp_vector,              //? Input vector multiplied to phi function
13 |                           double* polynomial,                 //? Output vector multiplied to phi function
14 |                           double* auxiliary_Leja,             //? Internal auxiliary variables (Leja)
15 |                           size_t N,                           //? Number of grid points
16 |                           double (* phi_function) (double),   //? Phi function
17 |                           vector<double>& Leja_X,             //? Array of Leja points
18 |                           double c,                           //? Shifting factor
19 |                           double Gamma,                       //? Scaling factor
20 |                           double rtol,                        //? Relative tolerance (normalised desired accuracy)
21 |                           double atol,                        //? Absolute tolerance
22 |                           double dt,                          //? Step size
23 |                           int& iters,                         //? # of iterations needed to converge (iteration variable)
24 |                           bool GPU,                           //? false (0) --> CPU; true (1) --> GPU
25 |                           GPU_handle& cublas_handle           //? CuBLAS handle
26 |                           )
27 |     {
28 |         //* -------------------------------------------------------------------------
29 |         //*
30 |         //* Computes the polynomial interpolation of phi function applied to 'interp_vector' at real Leja points.
31 |         //*
32 |         //*    Returns
33 |         //*    ----------
34 |         //*    polynomial          : double*
35 |         //*                             Polynomial interpolation of 'interp_vector', applied to
36 |         //*                             phi function, at real Leja points
37 |         //*
38 |         //* -------------------------------------------------------------------------
39 | 
40 |         int max_Leja_pts = Leja_X.size();                               //? Max. # of Leja points
41 |         double* Jac_vec = &auxiliary_Leja[0];                           //? auxiliary variable for Jacobian-vector product
42 |         
43 |         //* Phi function applied to 'interp_vector' (scaled and shifted)
44 |         vector<double> phi_function_array(max_Leja_pts);
45 |         
46 |         for (int ii = 0; ii < max_Leja_pts; ii++)
47 |         {
48 |             phi_function_array[ii] = phi_function(dt * (c + (Gamma * Leja_X[ii])));
49 |         }
50 | 
51 |         //* Compute polynomial coefficients
52 |         vector<double> coeffs = Divided_Differences(Leja_X, phi_function_array);
53 | 
54 |         //* Form the polynomial (first term): polynomial = coeffs[0] * interp_vector
55 |         axpby(coeffs[0], interp_vector, polynomial, N, GPU);
56 | 
57 |         //? Iterate until converges
58 |         for (iters = 1; iters < max_Leja_pts - 1; iters++)
59 |         {
60 |             //* Compute numerical Jacobian (for linear eqs., this is the RHS evaluation at y)
61 |             RHS(interp_vector, Jac_vec);
62 | 
63 |             //* y = y * ((z - c)/Gamma - Leja_X)
64 |             axpby(1./Gamma, Jac_vec, (-c/Gamma - Leja_X[iters - 1]), interp_vector, interp_vector, N, GPU);
65 | 
66 |             //* Add the new term to the polynomial (polynomial = polynomial + (coeffs[iters] * y))
67 |             axpby(coeffs[iters], interp_vector, 1.0, polynomial, polynomial, N, GPU);
68 | 
69 |             //* Error estimate: poly_error = |coeffs[iters]| ||interp_vector|| at every iteration
70 |             double poly_error = l2norm(interp_vector, N, GPU, cublas_handle)/sqrt(N);
71 |             poly_error = abs(coeffs[iters]) * poly_error;
72 | 
73 |             //* Norm of the polynomial
74 |             double poly_norm = l2norm(polynomial, N, GPU, cublas_handle)/sqrt(N);
75 | 
76 |             //? If new term to be added < tol, break loop
77 |             if (poly_error < ((rtol*poly_norm) + atol))
78 |             {
79 |                 // ::std::cout << "Converged! Iterations: " << iters << ::std::endl;
80 |                 break;
81 |             }
82 | 
83 |             //! Warning flags
84 |             if (iters == max_Leja_pts - 2)
85 |             {
86 |                 ::std::cout << "Warning!! Max. number of Leja points reached without convergence!!" << ::std::endl; 
87 |                 ::std::cout << "Max. Leja points currently set to " << max_Leja_pts << ::std::endl;
88 |                 ::std::cout << "Try increasing the number of Leja points. Max available: 10000." << ::std::endl;
89 |                 break;
90 |             }
91 |         }
92 |     }
93 | }


--------------------------------------------------------------------------------
/Python/Variable/EPIRK5P1.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(1, "../")
  3 | 
  4 | from Jacobian import *
  5 | from Phi_functions import *
  6 | from real_Leja_phi import *
  7 | from imag_Leja_phi import *
  8 | 
  9 | ################################################################################################
 10 | 
 11 | def EPIRK5P1(u, dt, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
 12 |     """
 13 |     Parameters
 14 |     ----------
 15 |     u                       : numpy array
 16 |                                 State variable(s)
 17 |     dt                      : double
 18 |                                 Step size
 19 |     RHS_function            : user-defined function 
 20 |                                 RHS function
 21 |     c                       : double
 22 |                                 Shifting factor
 23 |     Gamma                   : double
 24 |                                 Scaling factor
 25 |     Leja_X                  : numpy array
 26 |                                 Array of Leja points
 27 |     tol                     : double
 28 |                                 Accuracy of the polynomial so formed
 29 |     Real_Imag               : int
 30 |                                 0 - Real, 1 - Imaginary
 31 | 
 32 |     Returns
 33 |     -------
 34 |     u_epirk4                : numpy array
 35 |                                 Output state variable(s) after time dt (4th order)
 36 |     u_epirk5                : numpy array
 37 |                                 Output state variable(s) after time dt (5th order)
 38 |     num_rhs_calls           : int
 39 |                                 # of RHS calls
 40 |     
 41 |     Reference:
 42 |     
 43 |         M. Tokman, J. Loffeld, P. Tranquilli, New Adaptive Exponential Propagation Iterative Methods of Runge-Kutta Type, SIAM J. Sci. Comput. 34 (5) (2012) A2650-A2669. 
 44 |         doi:10.1137/110849961.
 45 | 
 46 |     """
 47 | 
 48 |     ############## --------------------- ##############
 49 | 
 50 |     ###? Interpolate on either real Leja or imaginary Leja points
 51 |     if Real_Imag == 0:
 52 |         Leja_phi = real_Leja_phi
 53 |     elif Real_Imag == 1:
 54 |         Leja_phi = imag_Leja_phi
 55 |     else:
 56 |         print("Error!! Choose 0 for real or 1 for imaginary Leja points.")
 57 |     
 58 |     ############## --------------------- ##############
 59 |     
 60 |     ###! Parameters of EPIRK5P1 (5th order)
 61 |     a11 = 0.35129592695058193092
 62 |     a21 = 0.84405472011657126298
 63 |     a22 = 1.6905891609568963624
 64 | 
 65 |     b1  = 1.0
 66 |     b2  = 1.2727127317356892397
 67 |     b3  = 2.2714599265422622275
 68 | 
 69 |     g11 = 0.35129592695058193092
 70 |     g21 = 0.84405472011657126298
 71 |     g22 = 1.0
 72 |     g31 = 1.0
 73 |     g32 = 0.71111095364366870359
 74 |     g33 = 0.62378111953371494809
 75 |     
 76 |     ###! 4th order
 77 |     g32_4 = 0.5
 78 |     g33_4 = 1.0
 79 |     
 80 |     ############## --------------------- ##############
 81 | 
 82 |     ###? Vertical interpolation of RHS_function(u) at g11, g21, and g31; phi_1({g11, g21, g31} J(u) dt) f(u) dt
 83 |     u_flux, rhs_calls_1, convergence = Leja_phi(u, dt, RHS_function, RHS_function(u)*dt, [g11, g21, g31], c, Gamma, Leja_X, phi_1, tol)
 84 |     
 85 |     ###? If it does not converge, return (try with smaller dt)
 86 |     if convergence == 0:
 87 |         return u, 2.1*u, rhs_calls_1
 88 | 
 89 |     ###? Internal stage 1; a = u + a11 phi_1(g11 J(u) dt) f(u) dt
 90 |     a = u + (a11 * u_flux[:, 0])
 91 |     
 92 |     ###? Nonlinear remainder at u and a
 93 |     Nonlinear_u = RHS_function(u) - Jacobian(RHS_function, u, u)
 94 |     Nonlinear_a = RHS_function(a) - Jacobian(RHS_function, u, a)
 95 |     R_a = Nonlinear_a - Nonlinear_u
 96 | 
 97 |     ###? Vertical interpolation of R_a at g32_4, g32, and g22; phi_1({g11, g21, g31} J(u) dt) R(a) dt
 98 |     u_nl_1, rhs_calls_2, _ = Leja_phi(u, dt, RHS_function, R_a*dt, [g32_4, g32, g22], c, Gamma, Leja_X, phi_1, tol)
 99 | 
100 |     ###? b = u + a21 phi_1(g21 J(u) dt) f(u) dt + a22 phi_1(g22 J(u) dt) R_a dt
101 |     b = u + (a21 * u_flux[:, 1]) + (a22 * u_nl_1[:, 2])
102 | 
103 |     ###? Nonlinear remainder at b
104 |     Nonlinear_b = RHS_function(b) - Jacobian(RHS_function, u, b)
105 |     R_b = Nonlinear_b - Nonlinear_u
106 |     
107 |     ###? Vertical interpolation of (-2*R(a) + R(b)) at g33 and g33_4; phi_3({g33, g33_4} J(u) dt) (-2*R(a) + R(b)) dt
108 |     u_nl_2, rhs_calls_3, _ = Leja_phi(u, dt, RHS_function, (-2*R_a + R_b)*dt, [g33, g33_4], c, Gamma, Leja_X, phi_3, tol)
109 |  
110 |     ###? 4th order solution; u_4 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32 J(u) dt) R(a) dt + b3 phi_3(g33 J(u) dt) (-2*R(a) + R(b)) dt
111 |     u_epirk4 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 0]) + (b3 * u_nl_2[:, 1])
112 |     
113 |     ###? 5th order solution; u_5 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32_4 J(u) dt) R(a) dt + b3 phi_3(g33_4 J(u) dt) (-2*R(a) + R(b)) dt
114 |     u_epirk5 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 1]) + (b3 * u_nl_2[:, 0])
115 | 
116 |     ###? Proxy of computational cost
117 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 13
118 | 
119 |     return u_epirk4, u_epirk5, num_rhs_calls


--------------------------------------------------------------------------------
/Python/Constant/EPIRK5P1.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(1, "../")
  3 | 
  4 | from Jacobian import *
  5 | from Phi_functions import *
  6 | from real_Leja_phi import *
  7 | from imag_Leja_phi import *
  8 | 
  9 | ################################################################################################
 10 | 
 11 | def EPIRK5P1(u, dt, RHS_function, c, Gamma, Leja_X, tol, Real_Imag):
 12 |     """
 13 |     Parameters
 14 |     ----------
 15 |     u                       : numpy array
 16 |                                 State variable(s)
 17 |     dt                      : double
 18 |                                 Step size
 19 |     RHS_function            : user-defined function 
 20 |                                 RHS function
 21 |     c                       : double
 22 |                                 Shifting factor
 23 |     Gamma                   : double
 24 |                                 Scaling factor
 25 |     Leja_X                  : numpy array
 26 |                                 Array of Leja points
 27 |     tol                     : double
 28 |                                 Accuracy of the polynomial so formed
 29 |     Real_Imag               : int
 30 |                                 0 - Real, 1 - Imaginary
 31 | 
 32 |     Returns
 33 |     -------
 34 |     u_epirk4                : numpy array
 35 |                                 Output state variable(s) after time dt (4th order)
 36 |     u_epirk5                : numpy array
 37 |                                 Output state variable(s) after time dt (5th order)
 38 |     num_rhs_calls           : int
 39 |                                 # of RHS calls
 40 |     
 41 |     Reference:
 42 |     
 43 |         M. Tokman, J. Loffeld, and P. Tranquilli
 44 |         New Adaptive Exponential Propagation Iterative Methods of Runge-Kutta Type, SIAM J. Sci. Comput. 34 (5) (2012) A2650-A2669
 45 |         doi:10.1137/110849961
 46 | 
 47 |     """
 48 | 
 49 |     ############## --------------------- ##############
 50 | 
 51 |     ###? Interpolate on either real Leja or imaginary Leja points
 52 |     if Real_Imag == 0:
 53 |         Leja_phi = real_Leja_phi
 54 |     elif Real_Imag == 1:
 55 |         Leja_phi = imag_Leja_phi
 56 |     else:
 57 |         print("Error!! Choose 0 for real or 1 for imaginary Leja points.")
 58 |     
 59 |     ############## --------------------- ##############
 60 |     
 61 |     ###! Parameters of EPIRK5P1 (5th order)
 62 |     a11 = 0.35129592695058193092
 63 |     a21 = 0.84405472011657126298
 64 |     a22 = 1.6905891609568963624
 65 | 
 66 |     b1  = 1.0
 67 |     b2  = 1.2727127317356892397
 68 |     b3  = 2.2714599265422622275
 69 | 
 70 |     g11 = 0.35129592695058193092
 71 |     g21 = 0.84405472011657126298
 72 |     g22 = 1.0
 73 |     g31 = 1.0
 74 |     g32 = 0.71111095364366870359
 75 |     g33 = 0.62378111953371494809
 76 |     
 77 |     ###! 4th order
 78 |     g32_4 = 0.5
 79 |     g33_4 = 1.0
 80 |     
 81 |     ############## --------------------- ##############
 82 | 
 83 |     ###? Vertical interpolation of RHS_function(u) at g11, g21, and g31; phi_1({g11, g21, g31} J(u) dt) f(u) dt
 84 |     u_flux, rhs_calls_1, convergence = Leja_phi(u, dt, RHS_function, RHS_function(u)*dt, [g11, g21, g31], c, Gamma, Leja_X, phi_1, tol)
 85 |     
 86 |     ###? If it does not converge, return (try with smaller dt)
 87 |     if convergence == 0:
 88 |         return u, 2.1*u, rhs_calls_1
 89 | 
 90 |     ###? Internal stage 1; a = u + a11 phi_1(g11 J(u) dt) f(u) dt
 91 |     a = u + (a11 * u_flux[:, 0])
 92 |     
 93 |     ###? Nonlinear remainder at u and a
 94 |     Nonlinear_u = RHS_function(u) - Jacobian(RHS_function, u, u)
 95 |     Nonlinear_a = RHS_function(a) - Jacobian(RHS_function, u, a)
 96 |     R_a = Nonlinear_a - Nonlinear_u
 97 | 
 98 |     ###? Vertical interpolation of R_a at g32_4, g32, and g22; phi_1({g11, g21, g31} J(u) dt) R(a) dt
 99 |     u_nl_1, rhs_calls_2, _ = Leja_phi(u, dt, RHS_function, R_a*dt, [g32_4, g32, g22], c, Gamma, Leja_X, phi_1, tol)
100 | 
101 |     ###? b = u + a21 phi_1(g21 J(u) dt) f(u) dt + a22 phi_1(g22 J(u) dt) R_a dt
102 |     b = u + (a21 * u_flux[:, 1]) + (a22 * u_nl_1[:, 2])
103 | 
104 |     ###? Nonlinear remainder at b
105 |     Nonlinear_b = RHS_function(b) - Jacobian(RHS_function, u, b)
106 |     R_b = Nonlinear_b - Nonlinear_u
107 |     
108 |     ###? Vertical interpolation of (-2*R(a) + R(b)) at g33 and g33_4; phi_3({g33, g33_4} J(u) dt) (-2*R(a) + R(b)) dt
109 |     u_nl_2, rhs_calls_3, _ = Leja_phi(u, dt, RHS_function, (-2*R_a + R_b)*dt, [g33, g33_4], c, Gamma, Leja_X, phi_3, tol)
110 |  
111 |     ###? 4th order solution; u_4 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32 J(u) dt) R(a) dt + b3 phi_3(g33 J(u) dt) (-2*R(a) + R(b)) dt
112 |     u_epirk4 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 0]) + (b3 * u_nl_2[:, 1])
113 |     
114 |     ###? 5th order solution; u_5 = u + b1 phi_1(g31 J(u) dt) f(u) dt + b2 phi_1(g32_4 J(u) dt) R(a) dt + b3 phi_3(g33_4 J(u) dt) (-2*R(a) + R(b)) dt
115 |     u_epirk5 = u + u_flux[:, 2] + (b2 * u_nl_1[:, 1]) + (b3 * u_nl_2[:, 0])
116 | 
117 |     ###? Proxy of computational cost
118 |     num_rhs_calls = rhs_calls_1 + rhs_calls_2 + rhs_calls_3 + 13
119 | 
120 |     return u_epirk4, u_epirk5, num_rhs_calls


--------------------------------------------------------------------------------
/CUDA/Test/Burgers_2D.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "Problems.hpp"
  4 | #include "../error_check.hpp"
  5 | 
  6 | using namespace std;
  7 | 
  8 | //? ====================================================================================== ?//
  9 | 
 10 | #ifdef __CUDACC__
 11 | 
 12 | __global__ void Burgers_2D(int N, double dx, double dy, double velocity, double* input, double* output)
 13 | {
 14 |     int ii = threadIdx.y + blockIdx.y * blockDim.y;
 15 |     int jj = threadIdx.x + blockIdx.x * blockDim.x;
 16 | 
 17 |     if ((ii >= N) || (jj >= N))
 18 |         return;
 19 |     
 20 |                         //? Diffusion
 21 |     output[N*ii + jj] =   (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx)
 22 |                         + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy)
 23 |             
 24 |                         //? Advection (nonlinear)
 25 |                         + velocity/dx 
 26 |                         * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)] * input[PBC(ii, jj - 1, N)]/2
 27 |                         - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2
 28 |                         + 6.0/6.0 * input[PBC(ii, jj + 1, N)] * input[PBC(ii, jj + 1, N)]
 29 |                         - 1.0/6.0 * input[PBC(ii, jj + 2, N)] * input[PBC(ii, jj + 2, N)])
 30 |                         + velocity/dy
 31 |                         * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)] * input[PBC(ii - 1, jj, N)]/2
 32 |                         - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2
 33 |                         + 6.0/6.0 * input[PBC(ii + 1, jj, N)] * input[PBC(ii + 1, jj, N)]/2
 34 |                         - 1.0/6.0 * input[PBC(ii + 2, jj, N)] * input[PBC(ii + 2, jj, N)]/2);
 35 | }
 36 | 
 37 | #endif
 38 | 
 39 | struct RHS_Burgers_2D:public Problems_2D
 40 | {
 41 |     //? RHS = A_adv.u^2/2.0 + A_dif.u
 42 | 
 43 |     //! Constructor
 44 |     RHS_Burgers_2D(int _N, double _dx, double _dy, double _velocity) : Problems_2D(_N, _dx, _dy, _velocity) {}
 45 | 
 46 |     void operator()(double* input, double* output)
 47 |     {
 48 |         #ifdef __CUDACC__
 49 | 
 50 |             int num_threads = 16;
 51 |             dim3 threads(num_threads, num_threads );
 52 |             dim3 blocks((N + num_threads - 1)/num_threads, (N + num_threads - 1)/num_threads);
 53 |             
 54 |             Burgers_2D<<<blocks, threads>>>(N, dx, dy, velocity, input, output);
 55 |         
 56 |         #else
 57 | 
 58 |             int num_threads = 32;
 59 | 
 60 |             #pragma omp parallel for collapse(2)
 61 |             for (int blockIdxx = 0; blockIdxx < (N + num_threads - 1)/num_threads; blockIdxx++)
 62 |             {
 63 |                 for (int blockIdxy = 0; blockIdxy < (N + num_threads - 1)/num_threads; blockIdxy++)
 64 |                 {
 65 |                     for (int threadIdxx = 0; threadIdxx < num_threads; threadIdxx++)
 66 |                     {
 67 |                         for (int threadIdxy = 0; threadIdxy < num_threads; threadIdxy++)
 68 |                         {
 69 |                             int ii = (blockIdxx * num_threads) + threadIdxx;
 70 |                             int jj = (blockIdxy * num_threads) + threadIdxy;
 71 | 
 72 |                             if ((ii < N) && (jj < N))
 73 |                             {
 74 |                                                     //? Diffusion
 75 |                                 output[N*ii + jj] =   (input[PBC(ii, jj + 1, N)] - (4.0 * input[PBC(ii, jj, N)]) + input[PBC(ii, jj - 1, N)])/(dx*dx)
 76 |                                                     + (input[PBC(ii + 1, jj, N)] + input[PBC(ii - 1, jj, N)])/(dy*dy)
 77 |                                         
 78 |                                                     //? Advection (nonlinear)
 79 |                                                     + velocity/dx 
 80 |                                                     * (- 2.0/6.0 * input[PBC(ii, jj - 1, N)] * input[PBC(ii, jj - 1, N)]/2
 81 |                                                     - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2
 82 |                                                     + 6.0/6.0 * input[PBC(ii, jj + 1, N)] * input[PBC(ii, jj + 1, N)]/2
 83 |                                                     - 1.0/6.0 * input[PBC(ii, jj + 2, N)] * input[PBC(ii, jj + 2, N)]/2)
 84 |                                                     + velocity/dy
 85 |                                                     * (- 2.0/6.0 * input[PBC(ii - 1, jj, N)] * input[PBC(ii - 1, jj, N)]/2
 86 |                                                     - 3.0/6.0 * input[PBC(ii, jj, N)] * input[PBC(ii, jj, N)]/2
 87 |                                                     + 6.0/6.0 * input[PBC(ii + 1, jj, N)] * input[PBC(ii + 1, jj, N)]/2
 88 |                                                     - 1.0/6.0 * input[PBC(ii + 2, jj, N)] * input[PBC(ii + 2, jj, N)]/2);
 89 |                             }
 90 |                         }
 91 |                     }
 92 |                 }
 93 |             }
 94 |             
 95 |         #endif
 96 |     }
 97 | 
 98 |     //! Destructor
 99 |     ~RHS_Burgers_2D() {}
100 | };
101 | 
102 | //? ====================================================================================== ?//


--------------------------------------------------------------------------------
/Python/imag_Leja_phi.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from Divided_Difference import Divided_Difference
  3 | 
  4 | def imag_Leja_phi(u, dt, RHS_function, interp_vector, integrator_coeffs, c, Gamma, Leja_X, phi_function, tol):
  5 |     """
  6 |     To be used for computing "phi_function" applied to "interp_vector" (at imaginary Leja points) for
  7 |     exponential integrators that computes the Jacobian at every time step.
  8 | 
  9 | 
 10 |         Parameters
 11 |         ----------
 12 |         u                       : numpy array
 13 |                                     State variable(s)
 14 |         dt                      : double
 15 |                                     Step size
 16 |         RHS_function            : user-defined function 
 17 |                                     RHS function
 18 |         interp_vector           : numpy array
 19 |                                     Vector to be interpolated
 20 |         c                       : double
 21 |                                     Shifting factor
 22 |         Gamma                   : double
 23 |                                     Scaling factor
 24 |         Leja_X                  : numpy array
 25 |                                     Array of Leja points
 26 |         phi_function            : function
 27 |                                     phi function
 28 |         tol                     : double
 29 |                                     Accuracy of the polynomial so formed
 30 | 
 31 |         Returns
 32 |         ----------
 33 |         polynomial              : numpy array(s)
 34 |                                     Polynomial interpolation of 'interp_vector' 
 35 |                                     multiplied by 'phi_function' at real Leja points
 36 |         ii+1                    : int
 37 |                                     # of RHS calls
 38 |         convergence             : int
 39 |                                     0 -> did not converge, 1 -> converged
 40 | 
 41 |     """
 42 |     
 43 |     ### Initialize parameters and arrays
 44 |     epsilon = 1e-7
 45 |     convergence = 0                                                                             # 0 -> did not converge, 1 -> converged
 46 |     num_interpolations = len(integrator_coeffs)                                                 # Number of interpolations in vertical
 47 |     max_Leja_pts = len(Leja_X)                                                                  # Max number of Leja points
 48 |     phi_function_array = np.zeros((len(Leja_X), num_interpolations), dtype = "complex")         # Phi function applied to 'interp_vector'
 49 |     poly_coeffs = np.zeros((len(Leja_X), num_interpolations), dtype = "complex")                # Polynomial coefficients
 50 |     polynomial = np.zeros((len(interp_vector), num_interpolations), dtype = "complex")          # Polynomial array
 51 |     rhs_u = RHS_function(u)                                                                     # RHS evaluated at 'u'
 52 |     y = interp_vector.copy() + 0*1j                                                             # To avoid changing 'interp_vector'
 53 |     
 54 |     ### Loop for vertical implementation
 55 |     for ij in range(0, num_interpolations):
 56 |         
 57 |         ### Phi function applied to 'interp_vector' (scaled and shifted)
 58 |         phi_function_array[:, ij] = phi_function(integrator_coeffs[ij] * dt * (c + Gamma*Leja_X) * 1j)
 59 | 
 60 |         ### Compute polynomial coefficients
 61 |         poly_coeffs[:, ij] = Divided_Difference(Leja_X, phi_function_array[:, ij]) 
 62 | 
 63 |         ### p_0 term
 64 |         polynomial[:, ij] = interp_vector * poly_coeffs[0, ij] + 0*1j
 65 | 
 66 |     ### p_1, p_2, ...., p_n terms; iterate until converges
 67 |     for ii in range(1, max_Leja_pts):
 68 |         
 69 |         ### Compute numerical Jacobian
 70 |         Jacobian_function = (RHS_function(u + (epsilon * y)) - rhs_u)/epsilon
 71 |         
 72 |         ### y = y * ((z - c)/Gamma - Leja_X)
 73 |         y = (-1j * Jacobian_function/Gamma) + (y * (-c/Gamma - Leja_X[ii - 1]))
 74 | 
 75 |         ### Error estimate
 76 |         poly_error = np.linalg.norm(y) * abs(poly_coeffs[ii, np.argmax(integrator_coeffs)])     
 77 | 
 78 |         ### Keep adding terms to the polynomial
 79 |         for ij in range(0, num_interpolations):
 80 |             
 81 |             ### To prevent diverging, restart simulations with smaller dt
 82 |             if poly_error > 1e17:
 83 |                 convergence = 0
 84 |                 polynomial[:, ij] = u
 85 |                 return np.real(polynomial), ii+1, convergence
 86 |             
 87 |             ### Add the new term to the polynomial
 88 |             polynomial[:, ij] = polynomial[:, ij] + (poly_coeffs[ii, ij] * y)
 89 |             
 90 |         ### If new term to be added < tol, break loop; safety factor = 0.25
 91 |         if  poly_error < 0.25*tol*np.linalg.norm(polynomial):
 92 |             convergence = 1
 93 |             # print("Leja points used: ", ii)
 94 |             break
 95 |         
 96 |         ### Warning flags
 97 |         if ii == max_Leja_pts - 1:
 98 |             print("Warning!! Max. # of Leja points reached without convergence!! Try increasing the number of Leja points. Max available: 10000.")
 99 |             break
100 | 
101 |     return np.real(polynomial), ii+1, convergence
102 | 


--------------------------------------------------------------------------------
/Python/real_Leja_linear_exp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from Divided_Difference import Divided_Difference
  3 | 
  4 | def real_Leja_linear_exp(u, T_f, substeps, RHS_function, integrator_coeff, c, Gamma, Leja_X, tol):
  5 |     """
  6 |     Computes the polynomial interpolation of matrix exponential applied to 'u' at real Leja points.
  7 | 
  8 | 
  9 |         Parameters
 10 |         ----------
 11 |         u                       : numpy array
 12 |                                     State variable(s)
 13 |         T_f                     : double
 14 |                                     Step size
 15 |         RHS_function            : user-defined function 
 16 |                                     RHS function
 17 |         integrator_coeff        : int
 18 |                                     Point where phi function is to be evaluated
 19 |         c                       : double
 20 |                                     Shifting factor
 21 |         Gamma                   : double
 22 |                                     Scaling factor
 23 |         Leja_X                  : numpy array
 24 |                                     Array of Leja points
 25 |         tol                     : double
 26 |                                     Accuracy of the polynomial so formed
 27 |     
 28 |         Returns
 29 |         ----------
 30 |         polynomial              : numpy array
 31 |                                     Polynomial interpolation of 'u' multiplied 
 32 |                                     by the matrix exponential at real Leja points
 33 |         total_iters             : int
 34 |                                     Total number of Leja points used
 35 | 
 36 |     """
 37 |     
 38 |     ###? Initialize parameters and arrays
 39 |     y = u.copy()                                                  #* To avoid changing 'u'
 40 |     y_backup = u.copy()                                           #* Backup y - To avoid changing 'u'
 41 |     
 42 |     max_Leja_pts = len(Leja_X)                                    #* Max number of Leja points
 43 |     dt = T_f/substeps                                             #* Initial substep size
 44 |     time_elapsed = 0                                              #* Counter for time elapsed
 45 |     subs = 1                                                      #* Counter for number of substeps
 46 |     convergence = 0                                               #* Check for convergence
 47 |     total_iters = 0                                               #* Counter for Leja iterations
 48 |     
 49 |     ###! Time loop
 50 |     while time_elapsed < T_f:
 51 |         
 52 |         ###? Array to store error incurred (needs to be set to zeros for every substep)
 53 |         poly_error = np.zeros(max_Leja_pts);                            
 54 |         
 55 |         ###* Adjust final time substep
 56 |         if abs(T_f - time_elapsed) < 1e-12:
 57 |             break
 58 |         elif time_elapsed + dt > T_f:
 59 |             dt = T_f - time_elapsed
 60 |         
 61 |         ###? Compute polynomial coefficients
 62 |         poly_coeffs = Divided_Difference(Leja_X, np.exp(integrator_coeff * dt * (c + Gamma*Leja_X)))
 63 |         
 64 |         ###? Set y = polynomial; save y_backup (same dt)
 65 |         if convergence == 1:
 66 |             
 67 |             y = polynomial;
 68 |             y_backup = polynomial;
 69 | 
 70 |         ###? Set 'y' to previous value (reduce dt)
 71 |         elif convergence == 0:
 72 | 
 73 |             y = y_backup;
 74 |             
 75 |         ###? Form the first term of the s^{th} polynomial: p_0 = d_0 * y_0
 76 |         polynomial = poly_coeffs[0] * y
 77 | 
 78 |         ###? p_1, p_2, ...., p_n terms; iterate until converges
 79 |         for ii in range(1, max_Leja_pts):
 80 | 
 81 |             ###? y = y * ((z - c)/Gamma - Leja_X)
 82 |             y = (RHS_function(y)/(T_f*Gamma)) + (y * (-c/Gamma - Leja_X[ii - 1]))
 83 |             
 84 |             ###? Keep adding terms to the polynomial
 85 |             polynomial = polynomial + (poly_coeffs[ii] * y)
 86 | 
 87 |             ###? Error estimate; poly_error = |coeffs[nn]| ||y||
 88 |             poly_error[ii] = np.linalg.norm(y) * abs(poly_coeffs[ii])
 89 |             
 90 |             ###! Warning: Check for diverging values, if so, restart iteration with smaller dt
 91 |             if ii == max_Leja_pts - 1 or poly_error[ii] > 1e3:
 92 | 
 93 |                 print("Step size: ", dt)
 94 |                 print("Computations wasted: ", ii)
 95 | 
 96 |                 ###* Update parameters
 97 |                 dt = 0.5 * dt
 98 |                 subs = np.ceil(T_f/dt)
 99 |                 convergence = 0
100 |                 total_iters = total_iters + ii
101 | 
102 |                 break
103 | 
104 |             ###? If new term to be added < tol, break loop
105 |             if  poly_error[ii] < (tol*np.linalg.norm(polynomial) + tol):
106 |                 
107 |                 print()
108 |                 print("Converged! # of Leja points used (exp): ", ii)
109 |                 print()
110 |                 
111 |                 time_elapsed = time_elapsed + dt
112 |                 total_iters = total_iters + ii
113 |                 subs = max(substeps, subs)
114 |                 convergence = 1
115 |                 
116 |                 dt = 1.1*dt
117 | 
118 |                 break
119 | 
120 |     return polynomial, total_iters, subs


--------------------------------------------------------------------------------
/CUDA/Integrators/EPIRK4s3B.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "../Leja.hpp"
  4 | #include "../Phi_functions.hpp"
  5 | 
  6 | namespace LeXInt
  7 | {
  8 |     //? Phi functions interpolated on real Leja points
  9 |     template <typename rhs>
 10 |     void EPIRK4s3B(rhs& RHS,                   //? RHS function
 11 |                    double* u,                  //? Input state variable(s)
 12 |                    double* u_epirk4,           //? Output state variable(s)
 13 |                    double* auxiliary_expint,   //? Internal auxiliary variables (EPIRK4s3B)
 14 |                    double* auxiliary_Leja,     //? Internal auxiliary variables (Leja)
 15 |                    size_t N,                   //? Number of grid points
 16 |                    vector<double>& Leja_X,     //? Array of Leja points
 17 |                    double c,                   //? Shifting factor
 18 |                    double Gamma,               //? Scaling factor
 19 |                    double rtol,                //? Relative tolerance (normalised desired accuracy)
 20 |                    double atol,                //? Absolute tolerance
 21 |                    double dt,                  //? Step size
 22 |                    int& iters,                 //? # of iterations needed to converge (iteration variable)
 23 |                    bool GPU,                   //? false (0) --> CPU; true (1) --> GPU
 24 |                    GPU_handle& cublas_handle   //? CuBLAS handle
 25 |                    )
 26 |     {
 27 |         //* -------------------------------------------------------------------------
 28 | 
 29 |         //! u, u_epirk4, auxiliary_expint, and auxiliary_Leja
 30 |         //! are device vectors if GPU support is activated.
 31 | 
 32 |         //*    Returns
 33 |         //*    ----------
 34 |         //*     
 35 |         //*     u_epirk4                : double* 
 36 |         //*                                  4th order solution after time dt
 37 |         //*
 38 |         //*
 39 |         //*    Reference:
 40 |         //*         G. Rainwater, M. Tokman, A new approach to constructing efficient stiffly accurate EPIRK methods, J. Comput. Phys. 323 (2016) 283-309.
 41 |         //*         doi:10.1016/j.jcp.2016.07.026
 42 | 
 43 |         //* -------------------------------------------------------------------------
 44 | 
 45 |         //? Counters for Leja iterations
 46 |         int iters_1 = 0, iters_2 = 0, iters_3 = 0, iters_4 = 0;
 47 | 
 48 |         //? Assign names and variables
 49 |         double* u_flux = &auxiliary_expint[0]; double* f_u = &auxiliary_expint[3*N];
 50 |         double* a = &u_flux[0]; double* b = &u_flux[N]; double* NL_u = &u_epirk4[0]; 
 51 |         double* NL_a = &auxiliary_expint[3*N]; double* NL_b = &auxiliary_expint[4*N];
 52 |         double* R_a = &auxiliary_expint[3*N]; double* R_b = &auxiliary_expint[4*N];
 53 |         double* R_3 = &u_flux[0]; double* R_4 = &u_flux[N];
 54 |         double* u_nl_3 = &auxiliary_expint[3*N]; double* u_nl_4 = &auxiliary_expint[4*N];
 55 | 
 56 |         //? RHS evaluated at 'u' multiplied by 'dt'; f_u = RHS(u)*dt
 57 |         RHS(u, f_u);
 58 |         axpby(dt, f_u, f_u, N, GPU);
 59 | 
 60 |         //? Vertical interpolation of RHS(u) at 1/2 and 3/4; u_flux[0, 1] = phi_2({1/2, 3/4} J(u) dt) f(u) dt
 61 |         real_Leja_phi(RHS, u, f_u, u_flux, auxiliary_Leja, N, {1./2., 3./4.}, 
 62 |                       phi_2, Leja_X, c, Gamma, rtol, atol, dt, iters_1, GPU, cublas_handle);
 63 | 
 64 |         //? Interpolation of RHS(u) at 1; u_flux[2] = phi_1(J(u) dt) f(u) dt
 65 |         real_Leja_phi(RHS, u, f_u, &u_flux[2*N], auxiliary_Leja, N, {1.0}, 
 66 |                       phi_1, Leja_X, c, Gamma, rtol, atol, dt, iters_2, GPU, cublas_handle);
 67 | 
 68 |         //? Internal stage 1; a = u + 2/3 phi_2(1/2 J(u) dt) f(u) dt
 69 |         axpby(1.0, u, 2./3., &u_flux[0], a, N, GPU);
 70 | 
 71 |         //? Internal stage 2; b = u + phi_2(3/4 J(u) dt) f(u) dt
 72 |         axpby(1.0, u, 1.0, &u_flux[N], b, N, GPU);
 73 | 
 74 |         //? R_a = (NL_a - NL_u) * dt; R_b = (NL_b - NL_u) * dt
 75 |         Nonlinear_remainder(RHS, u, u, NL_u, auxiliary_Leja, N, GPU, cublas_handle);
 76 |         Nonlinear_remainder(RHS, u, a, NL_a, auxiliary_Leja, N, GPU, cublas_handle);
 77 |         Nonlinear_remainder(RHS, u, b, NL_b, auxiliary_Leja, N, GPU, cublas_handle);
 78 |         axpby(dt, NL_a, -dt, NL_u, R_a, N, GPU);
 79 |         axpby(dt, NL_b, -dt, NL_u, R_b, N, GPU);
 80 | 
 81 |         //? R_3 = (54R(a) - 16R(b)) dt
 82 |         axpby(54.0, R_a, -16.0, R_b, R_3, N, GPU);
 83 | 
 84 |         //? R_4 = (-324R(a) + 144R(b)) dt
 85 |         axpby(-324.0, R_a, 144.0, R_b, R_4, N, GPU);
 86 | 
 87 |         //? u_nl_3 = phi_3(J(u) dt) (54R(a) - 16R(b)) dt
 88 |         real_Leja_phi(RHS, u, R_3, u_nl_3, auxiliary_Leja, N, {1.0}, 
 89 |                         phi_3, Leja_X, c, Gamma, rtol, atol, dt, iters_3, GPU, cublas_handle);
 90 | 
 91 |         //? u_nl_4 = phi_4(J(u) dt) (-324R(a) + 144R(b)) dt
 92 |         real_Leja_phi(RHS, u, R_4, u_nl_4, auxiliary_Leja, N, {1.0}, 
 93 |                       phi_4, Leja_X, c, Gamma, rtol, atol, dt, iters_4, GPU, cublas_handle);
 94 | 
 95 |         //! 4th order solution; u_4 = u + phi_1(J(u) dt) f(u) dt + phi_3(J(u) dt) (54R(a) - 16R(b)) dt + phi_4(J(u) dt) (-324R(a) + 144R(b)) dt
 96 |         axpby(1.0, u, 1.0, &u_flux[2*N], 1.0, u_nl_3, 1.0, u_nl_4, u_epirk4, N, GPU);
 97 | 
 98 |         //? Total number of Leja iterations
 99 |         iters = iters_1 + iters_2 + iters_3 + iters_4;
100 |     }
101 | }


--------------------------------------------------------------------------------
/Python/Test/Test_data/Constant/Burgers/T_final_0.001/N_300_eta_10/EXPRB43/N_cfl_2.00/Final_data.txt:
--------------------------------------------------------------------------------
1 | 1.0382541662553444 1.037384527544248 1.03715264161397 1.037541471809854 1.038534721026023 1.040116995787637 1.0422738945728103 1.0449920221496682 1.0482589422992037 1.052063074583342 1.0563935491485978 1.061240026561423 1.0665924967247629 1.0724410642583175 1.0787757330925314 1.0855861961457358 1.0928616397420206 1.1005905686814397 1.1087606576532312 1.1173586307904895 1.1263701751838846 1.1357798868846514 1.1455712496597168 1.1557266477956982 1.166227407622735 1.1770538675854545 1.188185472843623 1.199600889385598 1.2112781361833358 1.223194728499155 1.235327829447992 1.2476544072575206 1.2601513898060803 1.2727958201598562 1.2855650016671114 1.2984366384751578 1.3113889614994398 1.324400844520841 1.337451904928437 1.3505225908245961 1.363594253240574 1.376649202854625 1.3896707531105543 1.4026432497171584 1.41555208613157 1.4283837085472162 1.4411256095000375 1.4537663116363568 1.4662953446695146 1.4787032137732852 1.490981363779453 1.5031221373576311 1.5151187318194042 1.5269651507332553 1.5386561572192883 1.5501872237153542 1.5615544836017081 1.5727546824272132 1.5837851310351492 1.5946436586089072 1.6053285697680144 1.6158386017367945 1.626172884083105 1.636330902038385 1.6463124602485322 1.656117650199406 1.665746819352391 1.6752005435139692 1.6844795991342003 1.6935849409841572 1.7025176788758771 1.7112790582065032 1.71987044134136 1.7282932909500455 1.7365491548555125 1.7446396526726076 1.752566462953836 1.760331312626906 1.7679359663451184 1.7753822183245613 1.7826718835971438 1.789806791136884 1.7967887776215177 1.8036196814389693 1.810301337788668 1.8168355743418398 1.8232242067004563 1.82946903598149 1.8355718447047005 1.841534394478185 1.8473584240819427 1.8530456469147476 1.8585977493905776 1.8640163894803394 1.8693031955423833 1.8744597650039636 1.8794876637625806 1.8843884251549599 1.8891635494339223 1.8938145035164524 1.8983427202415983 1.9027495984688807 1.9070365027819214 1.9112047628327946 1.915255674487956 1.9191904988901034 1.9230104626258218 1.9267167583108038 1.9303105444046624 1.93379294533826 1.9371650519722894 1.9404279216500353 1.943582578962767 1.9466300147268552 1.9495711881227655 1.9524070255138817 1.9551384211911642 1.9577662383338477 1.9602913079752067 1.9627144310333637 1.9650363769612234 1.9672578852237617 1.9693796652858777 1.9714023967409424 1.9733267296669093 1.9751532850895772 1.9768826555015147 1.9785154044418138 1.98005206728568 1.9814931519379924 1.982839137781033 1.9840904776017962 1.98524759626945 1.986310892290945 1.9872807370517227 1.9881574757688916 1.9889414269517192 1.9896328837634236 1.9902321124984712 1.9907393547679513 1.9911548257520517 1.9914787158168958 1.9917111900664988 1.991852387976284 1.9919024249765058 1.9918613907723317 1.9917293511266305 1.991506346380492 1.9911923929139712 1.9907874825335954 1.9902915825069278 1.989704635827661 1.9890265612881435 1.9882572534286063 1.9873965827123783 1.9864443949724897 1.9854005126353373 1.9842647331746721 1.983036830742115 1.9817165547497702 1.9803036306654636 1.9787977600172006 1.9771986198465459 1.975505862993539 1.973719118519058 1.971837990752608 1.969862059826093 1.9677908817344645 1.9656239880714983 1.963360885638188 1.9610010575012422 1.9585439615352316 1.955989031583479 1.9533356767334165 1.9505832815631021 1.947731206157098 1.9447787857168055 1.941725331506396 1.9385701293937307 1.935312441360356 1.9319515047608202 1.9284865328817467 1.9249167142895445 1.921241213917281 1.9174591731215709 1.9135697094172373 1.9095719177610484 1.9054648701343329 1.9012476172024988 1.8969191882257939 1.89247859315663 1.8879248228969403 1.8832568516633625 1.8784736390756922 1.8735741328218032 1.868557271645375 1.8634219897126219 1.8581672217821583 1.852791909377699 1.8472950085782598 1.8416754988705104 1.8359323960872322 1.830064764321562 1.824071734106491 1.8179525218570902 1.8117064536458636 1.8053329931125783 1.7988317744472972 1.7922026406882146 1.7854456880496234 1.778561315973352 1.771550285423891 1.7644137828694393 1.757153492406529 1.7497716757106818 1.7422712574044503 1.7346559178362513 1.7269301925992897 1.7190995722543754 1.7111706084496088 1.703151016525113 1.6950497759836656 1.6868772229690505 1.6786451322740064 1.6703667818261974 1.6620569964342617 1.6537321630680106 1.6454102123639385 1.637110561347647 1.6288540082760186 1.6206625763477809 1.6125593003066796 1.6045679510232385 1.5967126972984553 1.5890177014242166 1.5815066510217508 1.5742022311407262 1.5671255419092003 1.5602954709973595 1.553728034627974 1.5474357010707007 1.5414267163290076 1.5357044522581702 1.5302668004995958 1.5251056384318655 1.5202063899057372 1.5155477074600403 1.5111013010864889 1.5068319288416896 1.5026975741857749 1.4986498144547085 1.4946343903575074 1.4905919759230886 1.4864591363565898 1.4821694621885115 1.477654853032192 1.472846921938394 1.4676784830100316 1.462085081274502 1.4560065206467445 1.4493883450071863 1.442183224537922 1.434352208917451 1.4258658039216543 1.4167048426927817 1.4068611227222128 1.396337791197699 1.3851494701492448 1.373322117423665 1.3608926330996856 1.3479082243035363 1.3344255505571212 1.3205096774128722 1.3062328700830257 1.291673262926 1.2769134421070716 1.2620389808775414 1.2471369637550713 1.2322945389255755 1.2175975310922336 1.2031291462176716 1.1889687958268977 1.1751910631151188 1.1618648277005694 1.1490525630355344 1.136809811673925 1.1251848423679132 1.1142184848109382 1.1039441342046252 1.0943879143626771 1.0855689837294882 1.077499963776257 1.070187472030145 1.063632733347547 1.0578322487986065 1.0527784979601293 1.048460650485077 1.0448652678878465 1.0419769752737182 1.039779086110058


--------------------------------------------------------------------------------