├── .gitignore ├── CallBack ├── C++-Fonctor │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── C++-Pointer │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── C++-lambda │ ├── CMakeLists.txt │ ├── README.md │ └── main.cpp ├── Ju │ ├── README.md │ ├── main.jl │ ├── script │ └── script-m ├── Numba │ ├── README.md │ ├── main.py │ └── script ├── Py │ ├── README.md │ └── main.py ├── Pythran │ ├── README.md │ ├── f.py │ ├── g.py │ ├── implicit.py │ ├── main.py │ ├── script │ └── trapz.py ├── README.md ├── Results │ ├── Look.py │ └── README.md └── runAllTests.sh ├── FeStiff ├── C++ │ ├── CMakeLists.txt │ ├── README.md │ ├── Stiffness.hpp │ ├── main.cpp │ └── rando.hpp ├── Ju │ ├── README.md │ ├── Rando.jl │ ├── Stiffness.jl │ ├── main.jl │ ├── script │ └── script-m ├── Numba │ ├── README.md │ ├── RandomTriangle.py │ ├── Stiffness.py │ ├── main.py │ ├── rando.py │ └── script ├── Py │ ├── README.md │ ├── RandomTriangle.py │ ├── Stiffness.py │ ├── main.py │ └── rando.py ├── Pythran │ ├── README.md │ ├── RandomTriangle.py │ ├── StiffOut.py │ ├── Stiffness.py │ ├── main.py │ ├── rando.py │ └── script ├── README.md ├── Results │ ├── Look.py │ └── README.md ├── runAllTests.sh └── sage │ ├── README.md │ └── Stiff.ipynb ├── Gaussian ├── C++ │ ├── ARRAY │ │ ├── .gitignore │ │ ├── Doc │ │ │ └── doxygen_sqlite3.db │ │ ├── Doxyfile │ │ ├── README │ │ ├── include │ │ │ ├── ApplyFonc.hpp │ │ │ ├── Array.hpp │ │ │ ├── ArrayException.hpp │ │ │ ├── Array_Access_Operators.hpp │ │ │ ├── Array_Array_Operators.hpp │ │ │ ├── Array_Constructors_RangeBased.hpp │ │ │ ├── Array_Constructors_RangeBased_NotSafe.hpp │ │ │ ├── Array_Output.hpp │ │ │ ├── Array_Scalar_Operators.hpp │ │ │ ├── Array_iterators.hpp │ │ │ ├── Array_resize.hpp │ │ │ ├── CEngine.hpp │ │ │ ├── FEngine.hpp │ │ │ ├── MacroRestrict.hpp │ │ │ ├── Range.hpp │ │ │ ├── Slices.hpp │ │ │ └── mainpage.h │ │ └── try │ │ │ ├── CMakeLists.txt │ │ │ └── main.cpp │ ├── CMakeLists.txt │ ├── README.md │ ├── RandomFeedMatrix.hpp │ ├── factorMatrix.hpp │ ├── main.cpp │ └── rando.hpp ├── C++Lib │ ├── CMakeLists.txt │ ├── README.md │ ├── RandomFeedMatrix.hpp │ ├── factorMatrix.hpp │ ├── main.cpp │ ├── protos_lapack.hpp │ └── rando.hpp ├── Ju │ ├── README.md │ ├── Rando.jl │ ├── main.jl │ ├── script │ └── script-m ├── JuLib │ ├── README.md │ ├── main.jl │ └── script ├── Numba │ ├── README.md │ ├── RandomFeedMatrix.py │ ├── factorMatrix.py │ ├── main.py │ ├── rando.py │ └── script ├── Py │ ├── README.md │ ├── RandomFeedMatrix.py │ ├── factorMatrix.py │ ├── main.py │ └── rando.py ├── PyScipy │ ├── README.md │ ├── RandomFeedMatrix.py │ ├── main.py │ ├── plot.pdf │ └── rando.py ├── PyVec │ ├── README.md │ ├── RandomFeedMatrix.py │ ├── factorMatrix.py │ ├── main.py │ ├── plot.pdf │ └── rando.py ├── Pythran │ ├── README.md │ ├── RandomFeedMatrix.py │ ├── factorMatrix.py │ ├── main.py │ ├── rando.py │ └── script ├── PythranVec │ ├── README.md │ ├── RandomFeedMatrix.py │ ├── factorMatrix.py │ ├── main.py │ ├── plot.pdf │ ├── rando.py │ └── script ├── README.md ├── Results │ ├── Benchmarks │ │ ├── kepler-nolibs.png │ │ ├── kepler-only-libs.png │ │ └── kepler.png │ ├── README.md │ ├── gpc │ ├── gpc-nolibs │ └── gr.py └── runAllTests.sh ├── LICENSE ├── MicroBenchmarks ├── C++-xtensor │ ├── CMakeLists.txt │ ├── README.md │ ├── get_time.hpp │ ├── main_cl.cpp │ ├── main_lapl_1.cpp │ └── main_lapl_2.cpp ├── C++ │ ├── CMakeLists.txt │ ├── README.md │ ├── get_time.hpp │ ├── main_cl.cpp │ ├── main_lapl_1.cpp │ └── main_lapl_2.cpp ├── Ju │ ├── README.md │ ├── main_cl.jl │ ├── main_lapl_1d.jl │ ├── main_lapl_2d.jl │ ├── script │ └── script-m ├── Numba │ ├── README.md │ ├── main_cl.py │ ├── main_lapl_1d.py │ ├── main_lapl_2d.py │ └── script ├── Py │ ├── README.md │ ├── main_cl.py │ ├── main_lapl_1d.py │ └── main_lapl_2d.py ├── Pythran │ ├── README.md │ ├── cl_1.py │ ├── cl_2.py │ ├── lapl1d_1.py │ ├── lapl1d_2.py │ ├── lapl2d_1.py │ ├── lapl2d_2.py │ ├── main_cl.py │ ├── main_lapl_1d.py │ ├── main_lapl_2d.py │ └── script ├── README.md ├── Results │ ├── Benchmarks │ │ ├── kepler-cl.png │ │ ├── kepler-lapl_1.png │ │ └── kepler-lapl_2.png │ ├── README.md │ ├── gr.py │ ├── vis_cl │ ├── vis_lapl_1 │ └── vis_lapl_2 └── runAllTests.sh ├── README.md ├── SaintVenant ├── C │ ├── compile-cpu.sh │ ├── compile-gpu.sh │ ├── main1d-gpu-kernels.cu │ ├── main1d-gpu-kernels.hpp │ ├── main1d-gpu.cu │ └── main1d.cpp └── Ju │ ├── main1d-gpu-kernels.jl │ ├── main1d-gpu.jl │ ├── main1d.jl │ ├── main2d.jl │ ├── run_cpu.sh │ └── run_gpu.sh ├── Sparse ├── C++ │ ├── CMakeLists.txt │ ├── Csr.hpp │ ├── PreLapl.hpp │ ├── PreSparse.hpp │ ├── README.md │ └── main.cpp ├── Ju │ ├── README.md │ ├── Sparse23.jl │ ├── Sparse23push.jl │ ├── Sparse23raw.jl │ ├── main.jl │ ├── script │ └── script-m ├── Numba │ ├── README.md │ ├── build2.py │ ├── build3.py │ ├── main.py │ └── script ├── Py │ ├── README.md │ ├── build.py │ └── main.py ├── Pythran │ ├── README.md │ ├── build2.py │ ├── build3.py │ ├── main.py │ └── script ├── README.md ├── Results │ ├── Arithmetic-Intensity.md │ ├── Benchmarks │ │ ├── gpc-2-b.png │ │ ├── gpc-2-p.png │ │ ├── gpc-3-b.png │ │ └── gpc-3-p.png │ ├── README.md │ └── gr.py └── runAllTests.sh ├── TODO └── Weno ├── C++-Modulo ├── Burghers.hpp ├── CMakeLists.txt ├── Convection.hpp ├── GodunovFlux.hpp ├── LaxFriedrichsFlux.hpp ├── README.md ├── RK3TVD.hpp ├── Weno.hpp └── main.cpp ├── C++-Pointers ├── Burghers.hpp ├── CMakeLists.txt ├── Convection.hpp ├── GodunovFlux.hpp ├── LaxFriedrichsFlux.hpp ├── README.md ├── RK3TVD.hpp ├── Weno.hpp └── main.cpp ├── C++ ├── Burghers.hpp ├── CMakeLists.txt ├── Convection.hpp ├── GodunovFlux.hpp ├── LaxFriedrichsFlux.hpp ├── README.md ├── RK3TVD.hpp ├── Weno.hpp └── main.cpp ├── C++NoCopy ├── Burghers.hpp ├── CMakeLists.txt ├── Convection.hpp ├── GodunovFlux.hpp ├── LaxFriedrichsFlux.hpp ├── README.md ├── RK3TVD.hpp ├── Weno.hpp └── main.cpp ├── Fortran ├── CMakeLists.txt ├── m_RK3TVDData.F90 ├── m_burghers.F90 ├── m_godunov.F90 ├── m_weno.F90 └── main.F90 ├── Ju ├── Burghers.jl ├── Convection.jl ├── Godunov.jl ├── LaxFriedrichs.jl ├── README.md ├── RK3TVD.jl ├── Weno.jl ├── main.jl ├── script └── script-m ├── Numba ├── Burghers.py ├── Convection.py ├── GodunovFlux.py ├── LaxFriedrichs.py ├── Numfluxes.py ├── README.md ├── RK3TVD.py ├── Weno.py ├── main.py └── script ├── Py ├── Burghers.py ├── Convection.py ├── GodunovFlux.py ├── LaxFriedrichs.py ├── README.md ├── RK3TVD.py ├── Weno.py ├── main.py └── profile ├── PyVec ├── Burghers.py ├── Convection.py ├── GodunovFlux.py ├── LaxFriedrichs.py ├── README.md ├── RK3TVD.py ├── Weno.py └── main.py ├── README.md ├── Results ├── Look.py └── README.md └── runAllTests.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | _*_.tex 3 | *.pyc 4 | Build* 5 | *~ 6 | __tmpe 7 | *_temp 8 | lib*a 9 | __pycache__ 10 | *.so 11 | Running* 12 | gp* 13 | *mem 14 | .ipynb_checkpoints 15 | *.sage.py 16 | TAGS 17 | *.pdf 18 | *.orig -------------------------------------------------------------------------------- /CallBack/C++-Fonctor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(Bench) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # To use intel compiler 6 | # CXX=icpc cmake .. 7 | # for clang++: 8 | # CXX=clang++ cmake .. 9 | # otherwise, to use g++: 10 | # cmake .. 11 | # 12 | 13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 15 | 16 | 17 | else () 18 | set (USING_GNU TRUE) 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -DGCC -std=c++14 -march=native") 20 | 21 | endif () 22 | include_directories( 23 | ${CMAKE_SOURCE_DIR}/ARRAY/include 24 | ) 25 | add_executable( 26 | run 27 | ../main.cpp 28 | ) 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /CallBack/C++-Fonctor/README.md: -------------------------------------------------------------------------------- 1 | You need cmake 2 | 3 | Compilation: 4 | ----------- 5 | 6 | ``` 7 | mkdir Build 8 | cd Build 9 | cmake .. 10 | make 11 | ``` 12 | a file "run" is created 13 | 14 | Run the code: 15 | ------------ 16 | from Build/ directory, type: 17 | ``` 18 | run 19 | ``` 20 | 21 | By default, we use g++. You can change the compiler to use, for 22 | example clang++. 23 | For this just replace: 24 | 25 | ``` 26 | cmake .. 27 | ``` 28 | by: 29 | ``` 30 | CXX=clang++ cmake .. 31 | ``` 32 | -------------------------------------------------------------------------------- /CallBack/C++-Pointer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(Bench) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # To use intel compiler 6 | # CXX=icpc cmake .. 7 | # for clang++: 8 | # CXX=clang++ cmake .. 9 | # otherwise, to use g++: 10 | # cmake .. 11 | # 12 | 13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 15 | 16 | 17 | else () 18 | set (USING_GNU TRUE) 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -DGCC -std=c++14 -march=native") 20 | 21 | endif () 22 | include_directories( 23 | ${CMAKE_SOURCE_DIR}/ARRAY/include 24 | ) 25 | add_executable( 26 | run 27 | ../main.cpp 28 | ) 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /CallBack/C++-Pointer/README.md: -------------------------------------------------------------------------------- 1 | You need cmake 2 | 3 | Compilation: 4 | ----------- 5 | 6 | ``` 7 | mkdir Build 8 | cd Build 9 | cmake .. 10 | make 11 | ``` 12 | a file "run" is created 13 | 14 | Run the code: 15 | ------------ 16 | from Build/ directory, type: 17 | ``` 18 | run 19 | ``` 20 | 21 | By default, we use g++. You can change the compiler to use, for 22 | example clang++. 23 | For this just replace: 24 | 25 | ``` 26 | cmake .. 27 | ``` 28 | by: 29 | ``` 30 | CXX=clang++ cmake .. 31 | ``` 32 | -------------------------------------------------------------------------------- /CallBack/C++-lambda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(Bench) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # To use intel compiler 6 | # CXX=icpc cmake .. 7 | # for clang++: 8 | # CXX=clang++ cmake .. 9 | # otherwise, to use g++: 10 | # cmake .. 11 | # 12 | 13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 15 | 16 | 17 | else () 18 | set (USING_GNU TRUE) 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -DGCC -std=c++14 -march=native") 20 | 21 | endif () 22 | include_directories( 23 | ${CMAKE_SOURCE_DIR}/ARRAY/include 24 | ) 25 | add_executable( 26 | run 27 | ../main.cpp 28 | ) 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /CallBack/C++-lambda/README.md: -------------------------------------------------------------------------------- 1 | You need cmake 2 | 3 | Compilation: 4 | ----------- 5 | 6 | ``` 7 | mkdir Build 8 | cd Build 9 | cmake .. 10 | make 11 | ``` 12 | a file "run" is created 13 | 14 | Run the code: 15 | ------------ 16 | from Build/ directory, type: 17 | ``` 18 | run 19 | ``` 20 | 21 | By default, we use g++. You can change the compiler to use, for 22 | example clang++. 23 | For this just replace: 24 | 25 | ``` 26 | cmake .. 27 | ``` 28 | by: 29 | ``` 30 | CXX=clang++ cmake .. 31 | ``` 32 | -------------------------------------------------------------------------------- /CallBack/C++-lambda/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | using namespace std; 12 | using namespace std::chrono; 13 | 14 | // Clock! 15 | class Mtime 16 | { 17 | high_resolution_clock::time_point t1 ; 18 | public: 19 | // Initialize (start time!) 20 | void start() 21 | { 22 | t1= high_resolution_clock::now(); 23 | } 24 | // Get duration since timer was started, in seconds. 25 | double sec() const 26 | { 27 | high_resolution_clock::time_point t2= high_resolution_clock::now(); 28 | return 1.e-9* 29 | static_cast(duration_cast(t2 - t1 ).count()); 30 | } 31 | }; 32 | string host() 33 | { 34 | char hostnameC[HOST_NAME_MAX]; 35 | gethostname(hostnameC, HOST_NAME_MAX); 36 | return string(hostnameC); 37 | } 38 | 39 | template double trapz(Fonc &F,double a, double b, int n) 40 | { 41 | auto h=(b-a)/n; 42 | auto sum=0.5*(F(a)+F(b)); 43 | for(int i=1;i<=n;i++) 44 | sum+=F(i*h); 45 | 46 | return sum*h; 47 | } 48 | int main() 49 | { 50 | auto hostname = host(); 51 | cout<<"hostname: "< 1.e-15 35 | # 36 | x-= F/(4*cos(x) - exp(x)) 37 | F= 4*sin(x)-exp(x)+t 38 | # 39 | end 40 | x 41 | end 42 | 43 | io = IOContext(stdout, :compact => false) 44 | 45 | fw=open("RunningOn"*gethostname(),"w") 46 | # Note: it seems that a loop like: 47 | # 48 | # for F in [f,g,implicit] 49 | # bench_res = @benchmark trapz(0.,1.,1000,F) 50 | # end 51 | # 52 | # is incompatible with @benchmark... who knows why? 53 | 54 | f(0.5) 55 | println("f:") 56 | bench_res = @benchmark trapz(0.,1.,1000,f); 57 | show(io, bench_res) 58 | write(fw,"\nf: "*string(bench_res)*"\n") 59 | 60 | g(0.5) 61 | println("\ng:") 62 | bench_res = @benchmark trapz(0.,1.,1000,g); 63 | show(io, bench_res) 64 | write(fw,"g: "*string(bench_res)*"\n") 65 | 66 | implicit(0.5) 67 | println("\nimplicit:") 68 | bench_res = @benchmark trapz(0.,1.,1000,implicit) 69 | show(io, bench_res) 70 | write(fw,"implicit: "*string(bench_res)*"\n") 71 | 72 | println("\nend.") 73 | close(fw) 74 | -------------------------------------------------------------------------------- /CallBack/Ju/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 main.jl 3 | -------------------------------------------------------------------------------- /CallBack/Ju/script-m: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 --track-allocation=user main.jl 3 | -------------------------------------------------------------------------------- /CallBack/Numba/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | 3 | ``` 4 | ./script 5 | ``` 6 | 7 | If you do not have a machine with AVX instructions, comment out the 8 | line 9 | ``` 10 | export NUMBA_ENABLE_AVX=1 11 | ``` 12 | in ./script . 13 | -------------------------------------------------------------------------------- /CallBack/Numba/main.py: -------------------------------------------------------------------------------- 1 | from math import exp,sin,cos 2 | from numba import jit,float64,int32 3 | import time 4 | import socket 5 | 6 | @jit(nopython=True) 7 | def trapz(F,a,b,n): 8 | h=(b-a)/n 9 | sum=0.5*(F(a)+F(b)) 10 | for i in range(1,n): 11 | sum+=F(i*h) 12 | return sum*h 13 | 14 | @jit(float64(float64),nopython=True) 15 | def f(x): 16 | return exp(-x)*x*x 17 | 18 | @jit(float64(float64),nopython=True) 19 | def g(x): 20 | if x<0.5: 21 | h=-exp(-x) 22 | else: 23 | h= exp(x) 24 | return h*x*x 25 | 26 | @jit(float64(float64),nopython=True) 27 | def implicit(t): 28 | # implicit = root of 4*sin(x)-exp(x)+t 29 | # Newton iterations, starting from zero: 30 | x=0.0 31 | F= 4*sin(x)-exp(x)+t 32 | while abs(F)> 1.e-15: 33 | x-= F/(4*cos(x) - exp(x)) 34 | F= 4*sin(x)-exp(x)+t 35 | return x 36 | 37 | #----------------------main program starts here ------------------ 38 | loops=10000 39 | n=1000 40 | 41 | fic=open("RunningOn"+socket.gethostname(),"w") 42 | 43 | for F in [f,g,implicit]: 44 | # running once seems to improve performances (just in time compilation !) 45 | sum=trapz(F,0.0,1.0,n) 46 | # 47 | t1 = time.time() 48 | for i in range(0,loops): 49 | sum=trapz(F,0.0,1.0,n) 50 | t=(time.time()-t1)/loops 51 | print(F.__name__," ",t," ",sum) 52 | fic.write(F.__name__+": "+str(t)+"\n") 53 | 54 | fic.close() 55 | print("end.") 56 | -------------------------------------------------------------------------------- /CallBack/Numba/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export NUMBA_ENABLE_AVX=1 4 | python3 main.py 5 | -------------------------------------------------------------------------------- /CallBack/Py/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | 3 | ``` 4 | python3 main.py 5 | ``` 6 | -------------------------------------------------------------------------------- /CallBack/Py/main.py: -------------------------------------------------------------------------------- 1 | from math import exp,sin,cos 2 | import time 3 | import socket 4 | 5 | def trapz(F,a,b,n): 6 | h=(b-a)/n 7 | sum=0.5*(F(a)+F(b)) 8 | for i in range(1,n): 9 | sum+=F(i*h) 10 | return sum*h 11 | 12 | def f(x): 13 | return exp(-x)*x*x 14 | 15 | def g(x): 16 | if x<0.5: 17 | h=-exp(-x) 18 | else: 19 | h= exp(x) 20 | return h*x*x 21 | 22 | def implicit(t): 23 | # implicit = root of 4*sin(x)-exp(x)+t 24 | # Newton iterations, starting from zero: 25 | x=0.0 26 | F= 4*sin(x)-exp(x)+t 27 | while abs(F)> 1.e-15: 28 | x-= F/(4*cos(x) - exp(x)) 29 | F= 4*sin(x)-exp(x)+t 30 | return x 31 | 32 | #----------------------main program starts here ------------------ 33 | loops=10000 34 | n=1000 35 | 36 | fic=open("RunningOn"+socket.gethostname(),"w") 37 | 38 | for F in [f,g,implicit]: 39 | t1 = time.time() 40 | for i in range(0,loops): 41 | sum=trapz(F,0.0,1.0,n) 42 | t=(time.time()-t1)/loops 43 | print(F.__name__," ",t," ",sum) 44 | fic.write(F.__name__+": "+str(t)+"\n") 45 | fic.close() 46 | print("end.") 47 | -------------------------------------------------------------------------------- /CallBack/Pythran/README.md: -------------------------------------------------------------------------------- 1 | Compile what should be just _pythranized_ and run the code; just type: 2 | ``` 3 | ./script 4 | ``` 5 | 6 | !Z! if you are using OpenBlas, you probably must create a .pythranrc in your home directory like this: 7 | 8 | ``` 9 | [compiler] 10 | blas=openblas 11 | ``` -------------------------------------------------------------------------------- /CallBack/Pythran/f.py: -------------------------------------------------------------------------------- 1 | from math import exp 2 | #pythran export capsule f(float) 3 | def f(x): 4 | return exp(-x)*x*x 5 | -------------------------------------------------------------------------------- /CallBack/Pythran/g.py: -------------------------------------------------------------------------------- 1 | from math import exp 2 | #pythran export capsule g(float) 3 | def g(x): 4 | if x<0.5: 5 | h=-exp(-x) 6 | else: 7 | h= exp(x) 8 | return h*x*x 9 | -------------------------------------------------------------------------------- /CallBack/Pythran/implicit.py: -------------------------------------------------------------------------------- 1 | from math import exp,sin,cos 2 | #pythran export capsule implicit(float) 3 | def implicit(t): 4 | # implicit = root of 4*sin(x)-exp(x)+t 5 | # Newton iterations, starting from zero: 6 | x=0.0 7 | F= 4*sin(x)-exp(x)+t 8 | while abs(F)> 1.e-15: 9 | x-= F/(4*cos(x) - exp(x)) 10 | F= 4*sin(x)-exp(x)+t 11 | return x 12 | -------------------------------------------------------------------------------- /CallBack/Pythran/main.py: -------------------------------------------------------------------------------- 1 | from math import exp 2 | import time 3 | import socket 4 | from trapz import trapz 5 | from f import f 6 | from g import g 7 | from implicit import implicit 8 | 9 | 10 | 11 | #----------------------main program starts here ------------------ 12 | loops=10000 13 | n=1000 14 | 15 | fic=open("RunningOn"+socket.gethostname(),"w") 16 | 17 | # workaround: PyCapsule' object has no attribute '__name__' (see ../Py/main.py) 18 | name={f:"f",g:"g",implicit:"implicit"} 19 | 20 | for F in [f,g,implicit]: 21 | t1 = time.time() 22 | for i in range(0,loops): 23 | sum=trapz(F,0.0,1.0,n) 24 | t=(time.time()-t1)/loops 25 | #print(F.__name__," ",t," ",sum) 26 | #fic.write(F.__name__+": "+str(t)+"\n") 27 | print(name[F]+": ",t," ",sum) 28 | fic.write(name[F]+": "+str(t)+"\n") 29 | fic.close() 30 | print("end.") 31 | -------------------------------------------------------------------------------- /CallBack/Pythran/script: -------------------------------------------------------------------------------- 1 | pythran -march=native -O3 trapz.py 2 | pythran -march=native -O3 f.py 3 | pythran -march=native -O3 g.py 4 | pythran -march=native -O3 implicit.py 5 | echo "run test:" 6 | python3 main.py -------------------------------------------------------------------------------- /CallBack/Pythran/trapz.py: -------------------------------------------------------------------------------- 1 | #pythran export trapz(float(float),float,float,int) 2 | def trapz(F,a,b,n): 3 | h=(b-a)/n 4 | sum=0.5*(F(a)+F(b)) 5 | for i in range(1,n): 6 | sum+=F(i*h) 7 | return sum*h 8 | -------------------------------------------------------------------------------- /CallBack/README.md: -------------------------------------------------------------------------------- 1 | ### Description: 2 | 3 | We test some examples of callbacks: 4 | 5 | - a very simple function (one line of code). 6 | 7 | - a function with a conditional. 8 | 9 | - a more computationally expensive function (implicitly defined, we use Newton method to evaluate it). 10 | 11 | All these functions are integrated using the trapezoidal rule. 12 | 13 | ### Motivation: 14 | 15 | It is often said that, for "small", non expensive functions, the performances of a call back depend on the way the callback is passed: 16 | 17 | - in C++, classical C like function passing (with pointers) is generally described as non efficient; using object functions is supposed to allow inlining and should largely improve performances. For computationally expensive functions, the difference should become neglectable. 18 | 19 | - How does Julia performs ? Python, Pythran, Numba ? 20 | 21 | ### Author: 22 | 23 | Thierry Dumont tdumont@math.univ-lyon1.fr 24 | 25 | ### The directories contain: 26 | 27 | - **C++-Pointer**: computation in C++, "C" like method (pointers). 28 | 29 | - **C++-lambda**: computation in C++, passing a lambda function, when it is possible. 30 | 31 | - **C++-Fonctor**: computation in C++, using object functions. 32 | 33 | - **Py**: Pure Python computation. 34 | 35 | - **Pythran**: Python + Pythran computation. 36 | 37 | - **Numba**: Python + Numba computation. 38 | 39 | - **Ju**: Julia computation. 40 | 41 | 42 | ### Running the benchmarks 43 | 44 | cd successively in C++-xxx, Py, Pythran, Numba, Ju; then look at the documentation. 45 | 46 | Once you have run the benchmark in **all** directories, go to Results/ 47 | and look at the documentation to know how to exploit the results. 48 | 49 | 50 | -------------------------------------------------------------------------------- /CallBack/Results/README.md: -------------------------------------------------------------------------------- 1 | Just run: 2 | 3 | ``` 4 | ./Look.py 5 | ``` 6 | This will give you the computing time divided by C++-Functor computing time. 7 | 8 | Before, you must have run the benchmark in all directories 9 | (C++-Functor, C++-lambda, Ju, Py and Numba). 10 | 11 | If you do not run all the benchmarks (or if you add one), juste modify the list 12 | "directories to explore" in Look.py -------------------------------------------------------------------------------- /CallBack/runAllTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script is supposed to run *all* the test, and then compute 4 | # the final "report" in Results/ 5 | # Not sure it works everywhere. If it does not, improve it, or enter each 6 | # directory and look at README.md to know what to do. 7 | # 8 | for i in C++-Fonctor C++-lambda C++-Pointer; do 9 | echo "--- " 10 | echo "Test: "$i 11 | echo "--- " 12 | (cd $i; mkdir -p Build; cd Build; cmake ..; make; ./run) 13 | done 14 | for i in Ju Numba Pythran ; do 15 | echo "--- " 16 | echo "Test: "$i 17 | echo "--- " 18 | (cd $i; ./script) 19 | done 20 | 21 | echo "--- " 22 | echo "Test: Py" 23 | echo "--- " 24 | (cd Py; python3 ./main.py) 25 | 26 | 27 | echo " " 28 | echo "Make the report:" 29 | (cd Results; ./Look.py) 30 | echo " " 31 | echo "To replay the report, cd Results/ and run ./Look.py " 32 | echo " " 33 | -------------------------------------------------------------------------------- /FeStiff/C++/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(Benchmarks) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # To use intel compiler 6 | # CXX=icpc cmake .. 7 | # for clang++: 8 | # CXX=clang++ cmake .. 9 | # otherwise, to use g++: 10 | # cmake .. 11 | # 12 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 14 | 15 | else () 16 | set (USING_GNU TRUE) 17 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -std=c++14 -march=native") 18 | 19 | endif () 20 | 21 | add_executable( 22 | run 23 | ../main.cpp 24 | ) 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /FeStiff/C++/README.md: -------------------------------------------------------------------------------- 1 | You need cmake 2 | 3 | Compilation: 4 | ----------- 5 | 6 | ``` 7 | mkdir Build 8 | cd Build 9 | cmake .. 10 | make 11 | ``` 12 | a file "run" is created 13 | 14 | Run the code: 15 | ------------ 16 | from Build/ directory, type: 17 | ``` 18 | run 19 | ``` 20 | 21 | By default, we use g++. You can change the compiler to use, for 22 | example clang++. 23 | For this just replace: 24 | 25 | ``` 26 | cmake .. 27 | ``` 28 | by: 29 | ``` 30 | CXX=clang++ cmake .. 31 | ``` 32 | -------------------------------------------------------------------------------- /FeStiff/C++/Stiffness.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | using namespace std; 4 | //////////////////////////////////////////////////////////////////////// 5 | /// Stiffness matrix for P2 finite element in dimension 2 (Laplacian). 6 | //////////////////////////////////////////////////////////////////////// 7 | class Stiffness 8 | { 9 | double ji[4]; 10 | double grads[36]; 11 | // gradients of basis functions on the reference element, at the 12 | // middle of the edges. 13 | double gq[36]={-1, -1, 1, 1, -1, -1, 1, 0, 1, 0, -1, 0, 0, -1, 14 | 0, 1, 0, 1, 0, -2, -2, -2, 2, 0, 0, 2, 2, 2, 2, 15 | 0, 0, 2, -2, -2, -2, 0}; 16 | // we will compute only an half part (with diagonal) of the matrix (symetry!) 17 | inline int ind(int i,int j){return i*(i+1)/2+j;} 18 | // transformation current element -> reference lement (x by determinant)/ 19 | inline void JinvDetTrans(double x[],double y[]) 20 | { 21 | ji[0]=-y[0] + y[2] ; ji[1]= y[0] - y[1]; 22 | ji[2] = x[0] - x[2]; ji[3]= -x[0] + x[1]; 23 | // 4 flops 24 | } 25 | public: 26 | Stiffness()//empty constructor 27 | { 28 | } 29 | ~Stiffness()//empty destructor 30 | { 31 | } 32 | // compute the stiffness matrix. 33 | // x[3], y[3]: IN, the trangle. 34 | // m[21] : OUT, the computed matrix. 35 | void operator()(double x[],double y[],double m[]) 36 | { 37 | 38 | JinvDetTrans(x,y); 39 | for(int f=0;f<6;f++) 40 | for(int p=0;p<3;p++) 41 | { 42 | int d=6*f+2*p; 43 | grads[d] = ji[0]*gq[d]+ji[1]*gq[d+1]; 44 | grads[d+1]= ji[2]*gq[d]+ji[3]*gq[d+1]; 45 | } //18 * 6 = 108 flops. 46 | double det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2]; 47 | // det: 8 flops. 48 | double dv=1.0/(6.0*det); // 1 flop 49 | for(int i=0;i<6;i++) 50 | for(int j=0;j<=i;j++) 51 | { 52 | double s=0; 53 | for(int k=0;k<3;k++) 54 | s+=grads[6*i+2*k]*grads[6*j+2*k]+grads[6*i+2*k+1]*grads[6*j+2*k+1]; 55 | m[ind(i,j)]=s; 56 | }// 21* 4 = 84 flops. 57 | 58 | for(int i=0;i<21;i++) m[i]*=dv; //21 flops 59 | 60 | //total: 4+18*6+ 8 + 84 +1+21 = 226 flops. 61 | 62 | } 63 | static const int flops = 226; 64 | }; 65 | -------------------------------------------------------------------------------- /FeStiff/C++/rando.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | class rando{ 3 | const long int a,c,m; 4 | int long seed; 5 | public: 6 | rando():a(1103515245),c(12345),m(4294967296) 7 | { 8 | seed=123456789; 9 | } 10 | long int get(){ 11 | seed= (a * seed + c) % m; 12 | return seed; 13 | } 14 | double fv(double vmax=1.) 15 | { 16 | return vmax*(double) get()/m; 17 | } 18 | }; 19 | 20 | -------------------------------------------------------------------------------- /FeStiff/Ju/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | ./script 4 | ``` 5 | if you want to profile memory usage (beware, it will slow down the 6 | code!): 7 | ``` 8 | ./script-m 9 | ``` 10 | -------------------------------------------------------------------------------- /FeStiff/Ju/Rando.jl: -------------------------------------------------------------------------------- 1 | module Rando 2 | 3 | export RandoData, fv 4 | 5 | #= 6 | Coefficients for a simple, but reproductible random number generator. 7 | =# 8 | mutable struct RandoData 9 | seed::Int64 10 | a::Int64 11 | c::Int64 12 | m::Int64 13 | RandoData() = new(123456789, 1103515245, 12345, 4294967296) 14 | end 15 | 16 | #= 17 | return a random float in [0,vmax] 18 | =# 19 | function fv!(R::RandoData, vmax=1.) 20 | R.seed = (R.a * R.seed + R.c) % R.m 21 | vmax * Float64(R.seed) / R.m 22 | end 23 | 24 | end 25 | -------------------------------------------------------------------------------- /FeStiff/Ju/Stiffness.jl: -------------------------------------------------------------------------------- 1 | module Stiffness 2 | 3 | export StiffnessData 4 | 5 | struct StiffnessData 6 | grads::Array{Float64, 3} 7 | gq::Array{Float64, 3} #grads of 6 basis functions at 3 quadrature points. 8 | 9 | StiffnessData() = new(zeros(2, 3, 6), 10 | reshape([-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0,0.0, 11 | -1.0,0.0,1.0,0.0,1.0,0.0,-2.0,-2.0,-2.0,2.0,0.0,0.0,2.0,2.0, 12 | 2.0,2.0,0.0,0.0,2.0,-2.0,-2.0,-2.0,0], 2, 3, 6) 13 | ) 14 | end 15 | 16 | function op!(S::StiffnessData, x::Array{Float64, 1}, y::Array{Float64, 1}, m::Array{Float64, 1}) 17 | a11 = -y[1] + y[3] 18 | a12 = y[1] - y[2] 19 | a21 = x[1] - x[3] 20 | a22 = -x[1] + x[2] 21 | 22 | for f=1:6 23 | @simd for p=1:3 24 | S.grads[1, p, f] = a11 * S.gq[1, p, f] + a12 * S.gq[2, p, f] 25 | S.grads[2, p, f] = a21 * S.gq[1, p, f] + a22 * S.gq[2, p, f] 26 | end 27 | 28 | # this seems slower: 29 | # S.grads[1, 1:3, f] = a11 * S.gq[1, 1:3, f] + a12 * S.gq[2, 1:3, f] 30 | # S.grads[2, 1:3, f] = a21 * S.gq[1, 1:3, f] + a22 * S.gq[2, 1:3, f] 31 | end 32 | 33 | # This seems slower than the for loop above 34 | #@. @views S.grads[1, :, :] = a11 * S.gq[1, :, :] + a12 * S.gq[2, :, :] 35 | #@. @views S.grads[2, :, :] = a21 * S.gq[2, :, :] + a22 * S.gq[2, :, :] 36 | 37 | det = -(x[2] - x[3])*y[1] + (x[1] - x[3])*y[2] - (x[1] - x[2])*y[3] 38 | dv = 1.0 / (6.0 * det) 39 | ii = 1 40 | for i=1:6 41 | for j=1:i 42 | s = 0.0 43 | @simd for k=1:3 44 | s += S.grads[1, k, i] * S.grads[1, k, j] + S.grads[2, k, i] * S.grads[2, k, j] 45 | end 46 | m[ii] = dv * s 47 | 48 | # this seems slower: 49 | # m[ii] = dv * (dot(S.grads[1, :, i], S.grads[1, :, j]) + 50 | # dot(S.grads[2, :, i], S.grads[2, :, j])) 51 | 52 | ii += 1 53 | end 54 | end 55 | 56 | return m 57 | end 58 | 59 | end 60 | -------------------------------------------------------------------------------- /FeStiff/Ju/main.jl: -------------------------------------------------------------------------------- 1 | push!(LOAD_PATH, "./") 2 | 3 | using Rando 4 | using Stiffness 5 | using BenchmarkTools 6 | 7 | 8 | function prSubDiag(m) 9 | 10 | for i=0:5 11 | for j=0:i 12 | print(m[div(i*(i+1), 2) + j+1], " ") 13 | end 14 | 15 | println() 16 | end 17 | end 18 | 19 | function RandomTriangle!(R::RandoData, x::Array{Float64, 1}, y::Array{Float64, 1}) 20 | for i in 1:3 21 | x[i] = Rando.fv!(R, 10.) 22 | end 23 | 24 | for i in 1:3 25 | y[i] = Rando.fv!(R, 10.) 26 | end 27 | end 28 | 29 | 30 | const S = StiffnessData() 31 | const x = Float64[0., 1., 0.] 32 | const y = Float64[0., 0., 1.] 33 | const m = zeros(21) 34 | const R = RandoData() 35 | const ntri = 1_000_000 36 | 37 | 38 | print("\nVerify that, on the reference element, we are coherent with sage ") 39 | println("(see ../sage/):\n") 40 | Stiffness.op!(S, x, y, m) 41 | prSubDiag(m) 42 | 43 | print("\nWe must get the same result if we dilate the triangle:\n") 44 | x[:] *= 2.0 45 | y[:] *= 2.0 46 | Stiffness.op!(S, x, y, m) 47 | prSubDiag(m) 48 | 49 | 50 | println("\nNow, start the benchmark:") 51 | println(ntri, " triangles.") 52 | 53 | io = IOContext(stdout, :compact => false) 54 | bench = @benchmarkable Stiffness.op!($S, $x, $y, $m) setup = (RandomTriangle!($R, $x, $y)) samples = ntri 55 | bench_res = run(bench) 56 | 57 | show(io, bench_res) 58 | 59 | println("\n") 60 | timeByTr = BenchmarkTools.median(bench_res).time * 1e-9 61 | 62 | open("RunningOn" * gethostname(), "w") do f 63 | write(f, string(timeByTr), "\n") 64 | end 65 | 66 | println("end.") 67 | 68 | # to profile the code, uncomment: 69 | # ntri1 = 100000 70 | # @profile for t=1:ntri1 71 | # RandomTriangle!(R, x, y) 72 | # Stiffness.op!(S, x, y, m) 73 | # end 74 | # Profile.print(format=:flat) 75 | -------------------------------------------------------------------------------- /FeStiff/Ju/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 main.jl 3 | -------------------------------------------------------------------------------- /FeStiff/Ju/script-m: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 --track-allocation=user main.jl 3 | -------------------------------------------------------------------------------- /FeStiff/Numba/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | 3 | ``` 4 | ./script 5 | ``` 6 | 7 | If you do not have a machine with AVX instructions, comment out the 8 | line 9 | ``` 10 | export NUMBA_ENABLE_AVX=1 11 | ``` 12 | in ./script . 13 | -------------------------------------------------------------------------------- /FeStiff/Numba/RandomTriangle.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | @jit(nopython=True) 3 | def RandomTriangle(R,x,y): 4 | # domain is [0,10]x[0,10] 5 | for i in range(0,3): 6 | x[i]=R.fv(10.) 7 | for i in range(0,3): 8 | y[i]=R.fv(10.) 9 | -------------------------------------------------------------------------------- /FeStiff/Numba/Stiffness.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba.experimental import jitclass 3 | from numba import float64,int64 4 | spec=[ 5 | ('ji',float64[:,:]), 6 | ('grads',float64[:,:]), 7 | ('gq',float64[:,:]), 8 | ] 9 | @jitclass(spec) 10 | class Stiffness: 11 | def __init__(self): 12 | self.ji=np.empty((2,2)) 13 | self.grads=np.empty((18,2)) 14 | self.gq=np.empty((18,2)) 15 | self.gq=np.array([-1., -1., 1., 1., -1., -1., 1., 0., 1., 0., 16 | -1., 0., 0., -1., 0., 1., 0., 1., 0., -2., -2., 17 | -2., 2., 0., 0., 2., 2., 2., 2.,0., 0., 2., -2., 18 | -2., -2., 0.]).reshape(18,2) 19 | 20 | 21 | def op(self,x,y,m): 22 | self.ji[0,0]=-y[0] + y[2] 23 | self.ji[0,1]= y[0] - y[1] 24 | self.ji[1,0]= x[0] - x[2] 25 | self.ji[1,1]=-x[0] + x[1] 26 | 27 | c1=self.ji[0,0] 28 | c2= self.ji[0,1] 29 | self.grads[:,0] = c1*self.gq[:,0] + c2*self.gq[:,1] 30 | c1=self.ji[1,0] 31 | c2= self.ji[1,1] 32 | self.grads[:,1] = c1*self.gq[:,0] + c2*self.gq[:,1] 33 | 34 | det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2] 35 | dv=1.0/(6.0*det) 36 | ii=0 37 | for i in range(0,6): 38 | for j in range(0,i+1): 39 | m[ii]=dv*(np.dot(self.grads[3*i:3*i+3,0],self.grads[3*j:3*j+3,0]) 40 | +np.dot(self.grads[3*i:3*i+3,1], 41 | self.grads[3*j:3*j+3,1]) ) 42 | ii+=1 43 | -------------------------------------------------------------------------------- /FeStiff/Numba/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from Stiffness import * 4 | from RandomTriangle import * 5 | import time 6 | import socket 7 | 8 | 9 | ntri=10**7 10 | x=np.empty(3) 11 | y=np.empty(3) 12 | mat=np.empty(21) 13 | S=Stiffness() 14 | 15 | print("\nVerify that, on the reference element, we are coherent with sage") 16 | print("(see ../sage/):\n") 17 | x[0]=0.0; x[1]=1.0; x[2]=0.0; 18 | y[0]=0.0; y[1]=0.0; y[2]=1.0; 19 | S.op(x,y,mat) 20 | 21 | 22 | for i in range(0,6): 23 | print([mat[i*(i+1)//2+j] for j in range(0,i+1)]) 24 | print("\nWe must get the same result if we dilate the triangle:\n") 25 | for i in range(0,3): 26 | x[i]*=2. 27 | y[i]*=2 28 | S.op(x,y,mat) 29 | for i in range(0,6): 30 | print([mat[i*(i+1)//2+j] for j in range(0,i+1)]) 31 | 32 | print("\nNow, start the benchmark:") 33 | ntri=1000000 34 | print(ntri," triangles.") 35 | R = rando() 36 | t1 = time.time() 37 | for tr in range(0,ntri): 38 | RandomTriangle(R,x,y) 39 | S.op(x,y,mat) 40 | t=(time.time()-t1) 41 | print("first phase: ",t," seconds.") 42 | t1 = time.time() 43 | for tr in range(0,ntri): 44 | RandomTriangle(R,x,y) 45 | tr=(time.time()-t1) 46 | print("second phase: ",tr," seconds.") 47 | 48 | t-=tr 49 | print("Total time: ",t," seconds.") 50 | print("Time by triangle:", "{:.5e}".format(t/ntri),"second.") 51 | f=open("RunningOn"+socket.gethostname(),"w") 52 | f.write(str(t/ntri)+"\n") 53 | f.close() 54 | print("end.") 55 | -------------------------------------------------------------------------------- /FeStiff/Numba/rando.py: -------------------------------------------------------------------------------- 1 | from numba import jitclass,float64,int64 2 | specrando=[ 3 | ("seed",int64), ("a",int64),("c",int64),("m",int64)] 4 | @jitclass(specrando) 5 | class rando: 6 | def __init__(self): 7 | self.seed=123456789 8 | self.a=1103515245 9 | self.c=12345 10 | self.m=2**32 11 | def get(self): 12 | self.seed= (self.a * self.seed + self.c) % self.m 13 | return self.seed 14 | def fv(self,vmax=1.): 15 | return vmax*float(self.get())/self.m 16 | if __name__ == "__main__": 17 | R=rando() 18 | for i in range(0,100): 19 | print(R.fv(10.)) 20 | -------------------------------------------------------------------------------- /FeStiff/Numba/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export NUMBA_ENABLE_AVX=1 4 | python3 main.py 5 | -------------------------------------------------------------------------------- /FeStiff/Py/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | 3 | ``` 4 | python3 main.py 5 | ``` 6 | -------------------------------------------------------------------------------- /FeStiff/Py/RandomTriangle.py: -------------------------------------------------------------------------------- 1 | def RandomTriangle(R,x,y): 2 | # domain is [0,10]x[0,10] 3 | for i in range(0,3): 4 | x[i]=R.fv(10.) 5 | for i in range(0,3): 6 | y[i]=R.fv(10.) 7 | -------------------------------------------------------------------------------- /FeStiff/Py/Stiffness.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | class Stiffness: 3 | def __init__(self): 4 | self.ji=np.empty((2,2)) 5 | self.grads=np.empty((18,2)) 6 | self.gq=np.array([[-1., -1.], 7 | [ 1., 1.], 8 | [-1., -1.], 9 | [ 1., 0.], 10 | [ 1., 0.], 11 | [-1., 0.], 12 | [ 0., -1.], 13 | [ 0., 1.], 14 | [ 0., 1.], 15 | [ 0., -2.], 16 | [-2., -2.], 17 | [ 2., 0.], 18 | [ 0., 2.], 19 | [ 2., 2.], 20 | [ 2., 0.], 21 | [ 0., 2.], 22 | [-2., -2.], 23 | [-2., 0.]]) 24 | 25 | 26 | 27 | def op(self,x,y,m): 28 | self.ji[0,0]=-y[0] + y[2] 29 | self.ji[0,1]= y[0] - y[1] 30 | self.ji[1,0]= x[0] - x[2] 31 | self.ji[1,1]=-x[0] + x[1] 32 | 33 | c1=self.ji[0,0] 34 | c2= self.ji[0,1] 35 | self.grads[:,0] = c1*self.gq[:,0] + c2*self.gq[:,1] 36 | c1=self.ji[1,0] 37 | c2= self.ji[1,1] 38 | self.grads[:,1] = c1*self.gq[:,0] + c2*self.gq[:,1] 39 | 40 | det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2] 41 | dv=1.0/(6.0*det) 42 | ii=0 43 | for i in range(0,6): 44 | i3=3*i 45 | for j in range(0,i+1): 46 | j3=3*j 47 | m[ii]=dv*(self.grads[i3:i3+3,0].dot(self.grads[j3:j3+3,0]) 48 | +self.grads[i3:i3+3,1].dot(self.grads[j3:j3+3,1]) 49 | ) 50 | ii+=1 51 | -------------------------------------------------------------------------------- /FeStiff/Py/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from Stiffness import * 4 | from RandomTriangle import * 5 | import time 6 | import socket 7 | import cProfile 8 | 9 | ntri=10**7 10 | x=np.empty(3) 11 | y=np.empty(3) 12 | mat=np.empty(21) 13 | S=Stiffness() 14 | 15 | print("\nVerify that, on the reference element, we are coherent with sage") 16 | print("(see ../sage/):\n") 17 | x[0]=0.0; x[1]=1.0; x[2]=0.0; 18 | y[0]=0.0; y[1]=0.0; y[2]=1.0; 19 | S.op(x,y,mat) 20 | 21 | 22 | for i in range(0,6): 23 | print([mat[i*(i+1)//2+j] for j in range(0,i+1)]) 24 | print("\nWe must get the same result if we dilate the triangle:\n") 25 | for i in range(0,3): 26 | x[i]*=2. 27 | y[i]*=2 28 | S.op(x,y,mat) 29 | for i in range(0,6): 30 | print([mat[i*(i+1)//2+j] for j in range(0,i+1)]) 31 | 32 | print("\nNow, start the benchmark:") 33 | ntri=1000000 34 | print(ntri," triangles.") 35 | R = rando() 36 | t1 = time.time() 37 | for tr in range(0,ntri): 38 | RandomTriangle(R,x,y) 39 | S.op(x,y,mat) 40 | t=(time.time()-t1) 41 | print("first phase: ",t," seconds.") 42 | t1 = time.time() 43 | for tr in range(0,ntri): 44 | RandomTriangle(R,x,y) 45 | tr=(time.time()-t1) 46 | print("second phase: ",tr," seconds.") 47 | 48 | t-=tr 49 | print("Total time: ",t," seconds.") 50 | print("Time by triangle:", "{:.5e}".format(t/ntri),"second.") 51 | f=open("RunningOn"+socket.gethostname(),"w") 52 | f.write(str(t/ntri)+"\n") 53 | f.close() 54 | print("fin") 55 | -------------------------------------------------------------------------------- /FeStiff/Py/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /FeStiff/Pythran/README.md: -------------------------------------------------------------------------------- 1 | Compile what should be just _pythranized_ and run; just type: 2 | ``` 3 | ./script 4 | ``` 5 | 6 | !Z! if you are using OpenBlas, you probably must create a .pythranrc in your home directory like this: 7 | 8 | ``` 9 | [compiler] 10 | blas=openblas 11 | ``` -------------------------------------------------------------------------------- /FeStiff/Pythran/RandomTriangle.py: -------------------------------------------------------------------------------- 1 | def RandomTriangle(R,x,y): 2 | # domain is [0,10]x[0,10] 3 | for i in range(0,3): 4 | x[i]=R.fv(10.) 5 | for i in range(0,3): 6 | y[i]=R.fv(10.) 7 | -------------------------------------------------------------------------------- /FeStiff/Pythran/StiffOut.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | #pythran export StiffOut(float[:],float[:],float[:],float[:,:],float[:,:],float[:,:]) 4 | def StiffOut(x,y,m,ji,grads,gq): 5 | a11=-y[0] + y[2] 6 | a12= y[0] - y[1] 7 | a21= x[0] - x[2] 8 | a22=-x[0] + x[1] 9 | 10 | for i in range(0,18): 11 | grads[i,0] = a11*gq[i,0] + a12*gq[i,1] 12 | grads[i,1] = a21*gq[i,0] + a22*gq[i,1] 13 | 14 | # this seems slower: 15 | # grads[:,0] = a11*gq[:,0] + a12*gq[:,1] 16 | # grads[:,1] = a21*gq[:,0] + a22*gq[:,1] 17 | 18 | det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2] 19 | dv=1.0/(6.0*det) 20 | ii=0 21 | # in the following lines, if we replace 3*i by i3 => cannot compile 22 | # same when replacing 3*j by j3 23 | for i in range(0,6): 24 | for j in range(0,i+1): 25 | m[ii]=dv*(grads[3*i:3*i+3,0].dot(grads[3*j:3*j+3,0]) 26 | +grads[3*i:3*i+3,1].dot(grads[3*j:3*j+3,1]) ) 27 | ii+=1 28 | -------------------------------------------------------------------------------- /FeStiff/Pythran/Stiffness.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from StiffOut import * 3 | import StiffOut as Sout 4 | class Stiffness: 5 | def __init__(self): 6 | self.ji=np.empty((2,2)) 7 | self.grads=np.empty((18,2)) 8 | self.gq=np.array([[-1., -1.], 9 | [ 1., 1.], 10 | [-1., -1.], 11 | [ 1., 0.], 12 | [ 1., 0.], 13 | [-1., 0.], 14 | [ 0., -1.], 15 | [ 0., 1.], 16 | [ 0., 1.], 17 | [ 0., -2.], 18 | [-2., -2.], 19 | [ 2., 0.], 20 | [ 0., 2.], 21 | [ 2., 2.], 22 | [ 2., 0.], 23 | [ 0., 2.], 24 | [-2., -2.], 25 | [-2., 0.]]) 26 | 27 | 28 | def op(self,x,y,m): 29 | Sout.StiffOut(x,y,m,self.ji,self.grads,self.gq) 30 | -------------------------------------------------------------------------------- /FeStiff/Pythran/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from Stiffness import * 4 | from RandomTriangle import * 5 | import time 6 | import socket 7 | 8 | 9 | ntri=10**7 10 | x=np.empty(3) 11 | y=np.empty(3) 12 | mat=np.empty(21) 13 | S=Stiffness() 14 | 15 | print("\nVerify that, on the reference element, we are coherent with sage") 16 | print("(see ../sage/):\n") 17 | x[0]=0.0; x[1]=1.0; x[2]=0.0; 18 | y[0]=0.0; y[1]=0.0; y[2]=1.0; 19 | S.op(x,y,mat) 20 | 21 | 22 | for i in range(0,6): 23 | print([mat[i*(i+1)//2+j] for j in range(0,i+1)]) 24 | print("\nWe must get the same result if we dilate the triangle:\n") 25 | for i in range(0,3): 26 | x[i]*=2. 27 | y[i]*=2 28 | S.op(x,y,mat) 29 | for i in range(0,6): 30 | print([mat[i*(i+1)//2+j] for j in range(0,i+1)]) 31 | 32 | print("\nNow, start the benchmark:") 33 | ntri=1000000 34 | print(ntri," triangles.") 35 | R = rando() 36 | t1 = time.time() 37 | for tr in range(0,ntri): 38 | RandomTriangle(R,x,y) 39 | S.op(x,y,mat) 40 | t=(time.time()-t1) 41 | print("first phase: ",t," seconds.") 42 | t1 = time.time() 43 | for tr in range(0,ntri): 44 | RandomTriangle(R,x,y) 45 | tr=(time.time()-t1) 46 | print("second phase: ",tr," seconds.") 47 | 48 | t-=tr 49 | print("Total time: ",t," seconds.") 50 | print("Time by triangle:", "{:.5e}".format(t/ntri),"second.") 51 | f=open("RunningOn"+socket.gethostname(),"w") 52 | f.write(str(t/ntri)+"\n") 53 | f.close() 54 | print("fin") 55 | -------------------------------------------------------------------------------- /FeStiff/Pythran/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /FeStiff/Pythran/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pythran -march=native -O3 -DNDEBUG StiffOut.py 3 | echo "run test:" 4 | python3 main.py 5 | -------------------------------------------------------------------------------- /FeStiff/README.md: -------------------------------------------------------------------------------- 1 | ### Description: 2 | 3 | These programs compute the elementary stiffness matrix for the Poisson 4 | equation on a collection of randomly chosen triangles, using 5 | 2-dimensional P2 Lagrange finite elements (every book on finite elements 6 | describes this). 7 | 8 | 9 | ### Motivation: 10 | 11 | This computation is simple, but representative of many numerical 12 | computations. It is auto-contained; there is no linear algebra and thus 13 | no need for external libraries. 14 | 15 | ### Author: 16 | 17 | Thierry Dumont tdumont@math.univ-lyon1.fr 18 | 19 | ### The directories contain: 20 | 21 | - **C++**: computation in C++. 22 | 23 | - **Py**: Python + numpy computation. 24 | 25 | - **Pythran**: Python + numpy + Pythran computation. 26 | 27 | - **Numba**: Python + numpy + Numba computation. 28 | 29 | - **Ju**: Julia computation. 30 | 31 | - **sage**: _SageMath_ material (see below). 32 | ### Running the benchmarks 33 | 34 | cd successively in C++, Py, Pythran, Numba, Ju; then look at the documentation. 35 | 36 | Once you have run the benchmark in **all** directories (except sage, 37 | which is not for benchmarking!), go to Results/ 38 | and look at the documentation to know how to exploit the results. 39 | 40 | #### Sage material: 41 | 42 | In directory "sage": how to use a (free!) computer algebra system to 43 | help compute and implement finite elements. 44 | 45 | ### Results: 46 | Have a look at the [this page in the Wiki](https://github.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/wiki/5-The-FeStiff-benchmark). 47 | -------------------------------------------------------------------------------- /FeStiff/Results/Look.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | #comparison between c++ and other computations 4 | # 5 | import socket 6 | from pathlib import Path 7 | 8 | def parsit(D,name,l): 9 | # 10 | D[name]=float(l.replace("\n","")) 11 | # directories to explore --------- 12 | files=[ 13 | "Py", 14 | "Ju", 15 | "Pythran", 16 | "Numba", 17 | ] 18 | cpp="C++" 19 | #------------------------------------------- 20 | # build a dict n-> computing time for C++ 21 | C={} 22 | with open("../"+cpp+"/RunningOn"+socket.gethostname(), 'r') as file: 23 | for line in file: 24 | C[cpp]=float(line.replace("\n","")) 25 | for n in files: 26 | filename= "../"+n+"/RunningOn"+socket.gethostname() 27 | p_file = Path(filename) 28 | if p_file.is_file(): 29 | with open(filename,"r") as file: 30 | for line in file: 31 | C[n]=float(line.replace("\n","")) 32 | else: 33 | print("\n\nFile "+filename+ " does not exists !") 34 | print("did you run test in "+n+" ?\n\n") 35 | 36 | Ts=sorted([(n,C[n]/C[cpp]) for n in C],key=lambda x: x[1]) 37 | 38 | print("\nComputing time / Computing time in C++:\n") 39 | for s in Ts: 40 | if s[0] != "C++": 41 | print("* ",s[0].ljust(7)," : ",str(s[1])[0:5]) 42 | print("\n") 43 | -------------------------------------------------------------------------------- /FeStiff/Results/README.md: -------------------------------------------------------------------------------- 1 | Just run: 2 | 3 | ``` 4 | ./Look.py 5 | ``` 6 | This will give you the computing time divided by C++ computing time. 7 | 8 | Before, you must have run the benchmark in all directories 9 | (C++,Ju,Py,Pythran and Numba). 10 | -------------------------------------------------------------------------------- /FeStiff/runAllTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script is supposed to run *all* the test, and then compute 4 | # the final "report" in Results/ 5 | # Not sure it works everywhere. If it does not, improve it, or enter each 6 | # directory and look at README.md to know what to do. 7 | # 8 | echo "Test: C++" 9 | echo "--- " 10 | (cd C++; mkdir -p Build; cd Build; cmake ..; make; ./run) 11 | 12 | for i in Ju Numba Pythran ; do 13 | echo "Test: "$i 14 | echo "--- " 15 | (cd $i; ./script) 16 | done 17 | 18 | echo "Test: Py" 19 | echo "--- " 20 | (cd Py; python3 main.py) 21 | 22 | 23 | echo " " 24 | echo "Make the report:" 25 | (cd Results; ./Look.py) 26 | echo " " 27 | echo "To replay the report, cd Results/ and run ./Look.py " 28 | echo " " 29 | -------------------------------------------------------------------------------- /FeStiff/sage/README.md: -------------------------------------------------------------------------------- 1 | #### This small SageMath script is supposed to show how computer algebra can help implementing finite elements (this a very simple example!). 2 | 3 | You need [SageMath](http://www.sagemath.org/) to run this snippet, 4 | 5 | 6 | * **Stiff.ipynb:** is a SageMath _notebook_. To launch it: 7 | ``` 8 | >sage -n jupyter 9 | ``` 10 | and let you conduct by Jupyter and SageMath. 11 | 12 | Note that if you click on Stiff.ipynb here, 13 | Github will display the results (as stored in the notebook). 14 | 15 | 16 | ##### Note: #### 17 | If you want to learn _SageMath_, you can read the book _Mathematical Computation 18 | with Sage_ (which now is available in French, English and German), and 19 | for which freely available pdf files are [downloadable 20 | here](https://members.loria.fr/PZimmermann/sagebook/english.html) and [there](http://sagebook.gforge.inria.fr/). 21 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | _*_.tex 3 | mr2014.pdf 4 | *.aux 5 | toto* 6 | *out 7 | *.bbl 8 | *.blg 9 | Build* 10 | Cfile* 11 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/Doc/doxygen_sqlite3.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/C++/ARRAY/Doc/doxygen_sqlite3.db -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/README: -------------------------------------------------------------------------------- 1 | include/ :the library. 2 | try/ :a test suite. 3 | 4 | -> to generate the documentation: 5 | doxygen Doxyfile 6 | 7 | -> to compile and run the test suite: 8 | cd try 9 | cd Build 10 | cmake ../CMakeLists.txt 11 | ./run 12 | 13 | have a look at CMakeLists.txt before ! 14 | 15 | bugs and remarks: tdumont@math.univ-lyon1.fr 16 | 17 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/include/ApplyFonc.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ApplyFonc__h 2 | #define ApplyFonc__h 3 | //! apply a function F (of one variable) to the whole Array. 4 | template void applyFonc() 5 | { 6 | for(int i=0;i error at compile time. 6 | template inline void operator+=(Array& X) 7 | { 8 | #ifdef DEBUG 9 | if(!checkSamePattern(X)) 10 | throw ArrayException("Array::operator+= : patterns differ"); 11 | #endif 12 | const int temp=xsize; 13 | if( temp!=X.size() ) 14 | throw ArrayException("Array += : note the same size"); 15 | Q* Xx=X.data(); 16 | #ifdef ICC 17 | #pragma ivdep 18 | #endif 19 | for(int i=0;i error at compile time. 24 | template inline void operator-=(Array& X) 25 | { 26 | #ifdef DEBUG 27 | if(!checkSamePattern(X)) 28 | throw ArrayException("Array::operator-= : patterns differ"); 29 | #endif 30 | if( xsize!=X.size() ) 31 | throw ArrayException("Array -= : note the same size"); 32 | Q* Xx=X.data(); 33 | for(int i=0;i 6 | namespace Arrays{ 7 | //////////////////////////////////////////////////////////////////// 8 | /// Overload operator <<. 9 | /// 10 | /// This is restricted to Arrays with n<=2 indices! 11 | /////////////////////////////////////////////////////////////////// 12 | //! \param out the stream. 13 | //! \param A the array. 14 | template std::ostream& 15 | operator<<(std::ostream& out,const Array &A) 16 | { 17 | if(n>2) 18 | out<2"< 9 | template class ArrayIterator : 10 | public std::iterator 11 | { 12 | T* p; 13 | public: 14 | //! constructor 15 | //! \param x C vector of an Array. 16 | ArrayIterator(T* x) :p(x) {} 17 | //! copy constructor. 18 | //! \param mit 19 | ArrayIterator(const ArrayIterator& mit) : p(mit.p) {} 20 | //! pre-instantiation (++I) 21 | ArrayIterator& operator++() {++p;return *this;} 22 | //! post-instantiation (I++) 23 | ArrayIterator operator++(int) {ArrayIterator tmp(*this); 24 | ++p; return tmp;} 25 | //! equality test 26 | //! \param rhs test *this with rhs. 27 | bool operator==(const ArrayIterator& rhs) {return p==rhs.p;} 28 | //! non equality test 29 | //! \param rhs test *this with rhs. 30 | bool operator!=(const ArrayIterator& rhs) {return p!=rhs.p;} 31 | //! dereferentiation. 32 | T& operator*() {return *p;} 33 | //! return the pointer. 34 | T* Tpointer() {return p;} 35 | }; 36 | #endif 37 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/include/Array_resize.hpp: -------------------------------------------------------------------------------- 1 | #ifndef Array_resize__h 2 | #define Array_resize__h 3 | private: 4 | //! Really resize. 5 | //! \param the_size 6 | inline void really_resize(int the_size) 7 | { 8 | if(x!=NULL) delete[] x; 9 | x=new T[xsize]; 10 | deletable=true; 11 | } 12 | public: 13 | //! Resize a 1d Array using a Range. 14 | //! \param R0 15 | void resize(Range R0) 16 | { 17 | if(n!=1) 18 | throw ArrayException("Array: resize incompatible with dimension=",n); 19 | engine.init(R0); 20 | xsize=engine.size(); 21 | really_resize(xsize); 22 | 23 | } 24 | //! Resize a 2d Array using Ranges. 25 | //! \param R0 26 | //! \param R1 27 | void resize(Range R0,Range R1) 28 | { 29 | if(n!=2) 30 | throw ArrayException("Array: resize incompatible with dimension=",n); 31 | engine.init(R0,R1); 32 | xsize=engine.size(); 33 | really_resize(xsize); 34 | } 35 | //! Resize a 3d Array using Ranges. 36 | //! \param R0 37 | //! \param R1 38 | //! \param R2 39 | void resize(Range R0,Range R1,Range R2) 40 | { 41 | if(n!=3) 42 | throw ArrayException("Array: resize incompatible with dimension=",n); 43 | engine.init(R0,R1,R2); 44 | xsize=engine.size(); 45 | really_resize(xsize); 46 | } 47 | //! Resize a 3d Array using Ranges. 48 | //! \param R0 49 | //! \param R1 50 | //! \param R2 51 | //! \param R3 52 | void resize(Range R0,Range R1,Range R2,Range R3) 53 | { 54 | if(n!=4) 55 | throw ArrayException("Array: resize incompatible with dimension=",n); 56 | engine.init(R0,R1,R2,R3); 57 | xsize=engine.size(); 58 | really_resize(xsize); 59 | } 60 | #endif 61 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/include/MacroRestrict.hpp: -------------------------------------------------------------------------------- 1 | // define the "restrict" keyword for different compilers. 2 | // If the macro is not defined for a given compiler, a compilation error 3 | // will occur at compile time. 4 | #ifdef GCC 5 | // gcc: 6 | #define Restrict __restrict__ 7 | #endif 8 | #ifdef ICC 9 | // Intel compiler: 10 | #define Restrict restrict 11 | #endif 12 | #ifdef CLANG 13 | // Clang compiler. 14 | #define Restrict restrict 15 | #endif 16 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/include/Range.hpp: -------------------------------------------------------------------------------- 1 | #ifndef Range__h 2 | #define Range__h 3 | namespace Arrays { 4 | //////////////////////////////////////////////////////////////// 5 | /// This classe is a model of integer intervals. 6 | /// 7 | ///\brief class of integer intervals. 8 | /////////////////////////////////////////////////////////////// 9 | struct Range 10 | { 11 | int begin,end; 12 | bool one;// this means: created with one argument. 13 | public: 14 | //! constructor 15 | //! \note should not be used by end user! 16 | Range() 17 | { 18 | } 19 | //! constructor 20 | //! \param i 21 | //! \param j 22 | Range(int i,int j): begin(i),end(j),one(false){} 23 | //! constructor; creates [0,i[ 24 | //! \param i 25 | Range(int i): begin(0),end(i),one(true){} 26 | //! copy constructor 27 | //! \param R 28 | Range(const Range& R) 29 | { 30 | begin=R.begin; end=R.end; 31 | one=R.one; 32 | } 33 | //! operator = 34 | //! \param R 35 | void operator=(const Range& R) 36 | { 37 | begin=R.begin; end=R.end; 38 | one=R.one; 39 | } 40 | //! destructor. 41 | ~Range(){} 42 | }; 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/include/Slices.hpp: -------------------------------------------------------------------------------- 1 | #ifndef Slices__h 2 | #define Slices__h 3 | #include 4 | //! return a "natural" slice at line i (C like Array) or 5 | //! at column i (Fortran like Array). 6 | //! \note A "REFERENCE" is returned! thre is *no* copy of 7 | //! the array of data; so the returned Array is flagged "not deletable" 8 | //! and will actually not delete his datas when the destructor is called. 9 | Array slice_ref(int i) 10 | { 11 | #ifdef DEBUG 12 | pair lims=engine.sliceLimits(); 13 | if(ilims.second) 14 | throw ArrayException("Array::slice_ref, i=",i,"must be in [",lims.first, 15 | lims.second,"["); 16 | #endif 17 | Range R[n-1]; 18 | pair p=engine.sliceRangesIndexes(); 19 | for(int j=p.first;j A(R,false); 21 | A.x=x+engine.slice(i); 22 | return A; 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /Gaussian/C++/ARRAY/try/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(NewRadau5) 3 | enable_language(CXX Fortran) 4 | # Go to Build directory. Then: 5 | # To use intel compiler 6 | # CXX=icpc cmake .. 7 | # for clang++: 8 | # CXX=clang++ cmake .. 9 | # otherwise, to use g++: 10 | # cmake .. 11 | # 12 | # If you use g++, consider this line: set(CMAKE_CXX_COMPILER "g++-4.8") 13 | # (may be you can comment it). 14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 16 | 17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$") 18 | set (USING_GNU TRUE) 19 | set(CMAKE_CXX_COMPILER "clang++") 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DCLANG -Wall -std=c++11") 21 | else () 22 | set (USING_GNU TRUE) 23 | #set(CMAKE_CXX_COMPILER "g++-4.8") 24 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -DGCC -DALIGN_64 -ftree-vectorizer-verbose=2 -std=c++11 -march=native") 25 | 26 | endif () 27 | include_directories( 28 | ${CMAKE_SOURCE_DIR}/../include 29 | ${CMAKE_SOURCE_DIR}/../../common/include 30 | ) 31 | add_executable( 32 | run 33 | ../main.cpp 34 | ) 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /Gaussian/C++/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(NewRadau5) 3 | enable_language(CXX Fortran) 4 | # Go to Build directory. Then: 5 | # To use intel compiler 6 | # CXX=icpc cmake .. 7 | # for clang++: 8 | # CXX=clang++ cmake .. 9 | # otherwise, to use g++: 10 | # cmake .. 11 | # 12 | 13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 15 | 16 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$") 17 | set (USING_GNU TRUE) 18 | set(CMAKE_CXX_COMPILER "clang++") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DCLANG -Wall -std=c++14") 20 | else () 21 | set (USING_GNU TRUE) 22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -DGCC -DALIGN_64 -ftree-vectorizer-verbose=2 -std=c++14 -march=native") 23 | 24 | endif () 25 | include_directories( 26 | ${CMAKE_SOURCE_DIR}/ARRAY/include 27 | ) 28 | add_executable( 29 | run 30 | ../main.cpp 31 | ) 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /Gaussian/C++/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### Compilation: 3 | ``` 4 | mkdir Build 5 | cd Build 6 | cmake .. 7 | make 8 | ``` 9 | a file "run" is created 10 | 11 | ### Run the code: 12 | 13 | from Build/ directory, type: 14 | ``` 15 | ./run 16 | ``` 17 | 18 | -------------------------------------------------------------------------------- /Gaussian/C++/RandomFeedMatrix.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "rando.hpp" 3 | #include "Array.hpp" 4 | using namespace Arrays; 5 | void RandomFeedMatrix(Array& M,rando& R) 6 | { 7 | int n=M.maxs(0), m=M.maxs(1); 8 | for(int i=0;i 4 | #include 5 | using namespace Arrays; 6 | void factorMatrix(Array& M) 7 | { 8 | const int n=M.maxs(0),m=M.maxs(1); 9 | for(int line=0;line vmax) 17 | { 18 | vmax= abs(M(i,line)); 19 | cmax= i; 20 | } 21 | } 22 | if(cmax != line) 23 | { 24 | //permutate: 25 | for(int j=line;j& M,rando& R) 6 | { 7 | int n=M.maxs(0), m=M.maxs(1); 8 | for(int i=0;i 4 | #include 5 | using namespace Arrays; 6 | void factorMatrix(Array M) 7 | { 8 | const int n=M.maxs(0),m=M.maxs(1); 9 | for(int line=0;line vmax) 17 | { 18 | vmax= abs(M(i,line)); 19 | cmax= i; 20 | } 21 | } 22 | if(cmax != line) 23 | { 24 | //permutate: 25 | for(int j=line;j liminf 74 | T = doall(MatrixSize) 75 | D[MatrixSize] = 1.e-9 * T 76 | global MatrixSize = div(MatrixSize,2) 77 | end 78 | 79 | 80 | 81 | # Results: 82 | S = sort(collect(zip(keys(D), values(D))), rev=true) 83 | 84 | open("RunningOn"*gethostname(),"w") do f 85 | for k in S 86 | write(f,string(k[1])," ",string(k[2]),"\n") 87 | end 88 | end 89 | 90 | 91 | #Profile.print() 92 | -------------------------------------------------------------------------------- /Gaussian/Ju/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O 3 main.jl -------------------------------------------------------------------------------- /Gaussian/Ju/script-m: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 --track-allocation=user main.jl 3 | -------------------------------------------------------------------------------- /Gaussian/JuLib/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | ./script 4 | ``` 5 | -------------------------------------------------------------------------------- /Gaussian/JuLib/main.jl: -------------------------------------------------------------------------------- 1 | using LinearAlgebra 2 | #= 3 | Coefficients for a simple, but reproductible random number generator. 4 | =# 5 | mutable struct RandoData 6 | seed 7 | a 8 | c 9 | m 10 | RandoData()=new(123456789,1103515245,12345,4294967296) 11 | end 12 | #= 13 | return a random float in[0,vmax] 14 | =# 15 | function fv(R::RandoData,vmax=1.) 16 | R.seed= (R.a * R.seed + R.c) % R.m 17 | vmax*convert(Float64,R.seed)/R.m 18 | end 19 | #= 20 | feed a Matrix with random data 21 | =# 22 | function RandomFeedMatrix(M,R::RandoData) 23 | n,m=size(M) 24 | @inbounds for i = 1:n 25 | @inbounds for j = 1:m 26 | M[i,j]=fv(R) 27 | end 28 | end 29 | end 30 | function doall(n::Int64) 31 | Ro= RandoData() 32 | M=Array{Float64}(undef,n,n+1) 33 | RandomFeedMatrix(M,Ro) 34 | t1 = time_ns() 35 | factorize(M) 36 | time_ns()-t1 37 | end 38 | 39 | # main starts here. 40 | MatrixSize=2048 41 | liminf=2 42 | D= Dict{Integer,Real}() 43 | println("start") 44 | 45 | 46 | while MatrixSize>liminf 47 | T=doall(MatrixSize) 48 | println(MatrixSize) 49 | D[MatrixSize]=T*1.e-9 50 | global MatrixSize=convert(Int64,MatrixSize/2) 51 | end 52 | # 53 | 54 | 55 | # Results: 56 | S=sort(collect(zip(keys(D),values(D))),rev=true) 57 | 58 | open("RunningOn"*gethostname(),"w") do f 59 | for k in S 60 | write(f,string(k[1])," ",string(k[2]),"\n") 61 | end 62 | end 63 | 64 | -------------------------------------------------------------------------------- /Gaussian/JuLib/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 main.jl 3 | -------------------------------------------------------------------------------- /Gaussian/Numba/README.md: -------------------------------------------------------------------------------- 1 | Have a look at file script, to adapt it to your machine (can you use AVX 2 | instructions?). 3 | 4 | To run the programme, just type: 5 | ``` 6 | ./script 7 | ``` 8 | -------------------------------------------------------------------------------- /Gaussian/Numba/RandomFeedMatrix.py: -------------------------------------------------------------------------------- 1 | def RandomFeedMatrix(M,R): 2 | # feed the numpy array M with random values. 3 | n=M.shape[0] 4 | m=M.shape[1] 5 | for i in range(0,n): 6 | for j in range(0,m): 7 | M[i,j]= R.fv() 8 | -------------------------------------------------------------------------------- /Gaussian/Numba/factorMatrix.py: -------------------------------------------------------------------------------- 1 | import numba 2 | 3 | @numba.guvectorize(["void(float64[:, :])"], "(n, m)", nopython=True, target='parallel') 4 | def factorMatrix(M): 5 | # Gaussian elimination, partial pivoting. 6 | # M must be an (n,n+1) numpy array. Not tested! 7 | n = M.shape[0] 8 | m = M.shape[1] 9 | for line in range(0, n-1): 10 | # find pivot 11 | cmax = line 12 | vmax = abs(M[line, line]) 13 | for i in range(line+1, n): 14 | if abs(M[i, line]) > vmax: 15 | vmax = abs(M[i, line]) 16 | cmax = i 17 | 18 | # exchange rows if necessary 19 | if cmax != line: 20 | for j in range(line, m): 21 | t = M[line, j] 22 | M[line, j] = M[cmax, j] 23 | M[cmax, j] = t 24 | 25 | # eliminate 26 | pivot = M[line, line] 27 | for j in range(line+1, n): 28 | v = M[j, line] / pivot 29 | for k in range(line, m): 30 | M[j, k]-= v * M[line, k] 31 | -------------------------------------------------------------------------------- /Gaussian/Numba/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from factorMatrix import * 4 | from RandomFeedMatrix import * 5 | import time 6 | import socket 7 | 8 | sizeG=2048 9 | D={} 10 | 11 | size=sizeG 12 | loop=1 13 | 14 | # Checking algo 15 | M = np.empty((128, 128+1)) 16 | R = rando() 17 | RandomFeedMatrix(M, R) 18 | print("sum(M) = {}".format(np.sum(M))) 19 | factorMatrix(M) 20 | print("sum(Gauss(M)) = {}".format(np.sum(M))) 21 | print("") 22 | 23 | while size>2: 24 | R=rando() 25 | M=np.empty((size,size+1)) 26 | t1 = time.time() 27 | for iter in range(0,loop): 28 | RandomFeedMatrix(M,R) 29 | factorMatrix(M) 30 | t=(time.time()-t1)/loop 31 | print("size:",size," t: ",t) 32 | D[size]=t 33 | size//=2 34 | 35 | loop*=4 36 | 37 | print("---") 38 | size= sizeG 39 | loop=1 40 | while size>2: 41 | R=rando() 42 | M=np.empty((size,size+1)) 43 | t1 = time.time() 44 | for iter in range(0,loop): 45 | RandomFeedMatrix(M,R) 46 | t=(time.time()-t1)/loop 47 | D[size] -= t 48 | size//=2 49 | loop*=4 50 | 51 | f=open("RunningOn"+socket.gethostname(),"w") 52 | for x in sorted(D.keys()): 53 | f.write(str(x)+" "+str(D[x])+"\n") 54 | f.close() 55 | -------------------------------------------------------------------------------- /Gaussian/Numba/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /Gaussian/Numba/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export NUMBA_ENABLE_AVX=1 4 | python3 main.py 5 | -------------------------------------------------------------------------------- /Gaussian/Py/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | python3 main.py 4 | ``` 5 | -------------------------------------------------------------------------------- /Gaussian/Py/RandomFeedMatrix.py: -------------------------------------------------------------------------------- 1 | def RandomFeedMatrix(M,R): 2 | # feed the numpy array M with random values. 3 | n=M.shape[0] 4 | m=M.shape[1] 5 | for i in range(0,n): 6 | for j in range(0,m): 7 | M[i,j]= R.fv() 8 | -------------------------------------------------------------------------------- /Gaussian/Py/factorMatrix.py: -------------------------------------------------------------------------------- 1 | def factorMatrix(M): 2 | # Gaussian elimination, partial pivoting. 3 | # M must be an (n,n+1) numpy array. Not tested! 4 | n = M.shape[0] 5 | m= M.shape[1] 6 | for line in range(0, n-1): 7 | # find pivot 8 | cmax=line 9 | vmax = abs(M[line,line]) 10 | for i in range(line+1,n): 11 | if abs(M[i,line])> vmax: 12 | vmax= abs(M[i,line]) 13 | cmax= i 14 | 15 | # exchange rows if necessary 16 | if cmax != line: 17 | for j in range(line,m): 18 | t= M[line,j] 19 | M[line,j]= M[cmax,j] 20 | M[cmax,j]= t 21 | 22 | # eliminate 23 | pivot = M[line,line] 24 | for j in range(line+1,n): 25 | v= M[j,line]/pivot 26 | for k in range(line,m): 27 | M[j,k]-= v*M[line,k] 28 | -------------------------------------------------------------------------------- /Gaussian/Py/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from factorMatrix import * 4 | from RandomFeedMatrix import * 5 | import time 6 | import socket 7 | 8 | sizeG=128 9 | D={} 10 | 11 | size=sizeG 12 | loop=1 13 | 14 | # Checking algo 15 | M = np.empty((128, 128+1)) 16 | R = rando() 17 | RandomFeedMatrix(M, R) 18 | print("sum(M) = {}".format(np.sum(M))) 19 | factorMatrix(M) 20 | print("sum(Gauss(M)) = {}".format(np.sum(M))) 21 | print("") 22 | 23 | while size>2: 24 | R=rando() 25 | M=np.empty((size,size+1)) 26 | t1 = time.time() 27 | for iter in range(0,loop): 28 | RandomFeedMatrix(M,R) 29 | factorMatrix(M) 30 | t=(time.time()-t1)/loop 31 | D[size]=t 32 | size//=2 33 | loop*=8 34 | 35 | print("---") 36 | size= sizeG 37 | loop=1 38 | while size>2: 39 | R=rando() 40 | M=np.empty((size,size+1)) 41 | t1 = time.time() 42 | for iter in range(0,loop): 43 | RandomFeedMatrix(M,R) 44 | t=(time.time()-t1)/loop 45 | D[size] -= t 46 | size//=2 47 | loop*=8 48 | 49 | f=open("RunningOn"+socket.gethostname(),"w") 50 | for x in sorted(D.keys()): 51 | f.write(str(x)+" "+str(D[x])+"\n") 52 | f.close() 53 | -------------------------------------------------------------------------------- /Gaussian/Py/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /Gaussian/PyScipy/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | python3 main.py 4 | ``` 5 | -------------------------------------------------------------------------------- /Gaussian/PyScipy/RandomFeedMatrix.py: -------------------------------------------------------------------------------- 1 | def RandomFeedMatrix(M,R): 2 | # feed the numpy array M with random values. 3 | n=M.shape[0] 4 | m=M.shape[1] 5 | for i in range(0,n): 6 | for j in range(0,m): 7 | M[i,j]= R.fv() 8 | -------------------------------------------------------------------------------- /Gaussian/PyScipy/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from RandomFeedMatrix import * 4 | import time 5 | import socket 6 | from scipy.linalg import lu 7 | 8 | sizeG=2048 9 | D={} 10 | 11 | size=sizeG 12 | loop=1 13 | sizemin=4 14 | while size>=sizemin: 15 | R=rando() 16 | M=np.empty((size,size)) 17 | t1 = time.time() 18 | for iter in range(0,loop): 19 | RandomFeedMatrix(M,R) 20 | P,L,U=lu(M) 21 | t=(time.time()-t1)/loop 22 | print("size:",size," t: ",t) 23 | D[size]=t 24 | size//=2 25 | loop*=4 26 | print("---") 27 | size= sizeG 28 | loop=1 29 | while size>=sizemin: 30 | R=rando() 31 | M=np.empty((size,size)) 32 | t1 = time.time() 33 | for iter in range(0,loop): 34 | RandomFeedMatrix(M,R) 35 | t=(time.time()-t1)/loop 36 | D[size] -= t 37 | size//=2 38 | loop*=4 39 | 40 | f=open("RunningOn"+socket.gethostname(),"w") 41 | for x in sorted(D.keys()): 42 | f.write(str(x)+" "+str(D[x])+"\n") 43 | f.close() 44 | -------------------------------------------------------------------------------- /Gaussian/PyScipy/plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/PyScipy/plot.pdf -------------------------------------------------------------------------------- /Gaussian/PyScipy/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /Gaussian/PyVec/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | python3 main.py 4 | ``` 5 | -------------------------------------------------------------------------------- /Gaussian/PyVec/RandomFeedMatrix.py: -------------------------------------------------------------------------------- 1 | def RandomFeedMatrix(M,R): 2 | # feed the numpy array M with random values. 3 | n=M.shape[0] 4 | m=M.shape[1] 5 | for i in range(0,n): 6 | for j in range(0,m): 7 | M[i,j]= R.fv() 8 | -------------------------------------------------------------------------------- /Gaussian/PyVec/factorMatrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | def factorMatrix(M): 3 | # Gaussian elimination, partial pivoting. 4 | # M must be an (n,n+1) numpy array. Not tested! 5 | n = M.shape[0] 6 | m= M.shape[1] 7 | for line in range(0, n-1): 8 | # find pivot 9 | cmax = np.argmax(abs(M[line:n,line])) + line 10 | 11 | # exchange rows if necessary 12 | if cmax != line: 13 | M[[line,cmax]]=M[[cmax,line]] 14 | 15 | # eliminate 16 | pivot = M[line,line] 17 | v = M[(line+1):n,[line]]/pivot 18 | M[(line+1):n,line:m] -= v*M[[line],line:m] 19 | -------------------------------------------------------------------------------- /Gaussian/PyVec/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from factorMatrix import * 4 | from RandomFeedMatrix import * 5 | import time 6 | import socket 7 | 8 | sizeG=2048 9 | D={} 10 | 11 | size=sizeG 12 | loop=1 13 | 14 | # Checking algo 15 | M = np.empty((128, 128+1)) 16 | R = rando() 17 | RandomFeedMatrix(M, R) 18 | print("sum(M) = {}".format(np.sum(M))) 19 | factorMatrix(M) 20 | print("sum(Gauss(M)) = {}".format(np.sum(M))) 21 | print("") 22 | 23 | while size>2: 24 | R=rando() 25 | M=np.empty((size,size+1)) 26 | t1 = time.time() 27 | for iter in range(0,loop): 28 | RandomFeedMatrix(M,R) 29 | factorMatrix(M) 30 | t=(time.time()-t1)/loop 31 | print("size:",size," t: ",t) 32 | D[size]=t 33 | size//=2 34 | loop*=4 35 | 36 | 37 | print("---") 38 | size= sizeG 39 | loop=1 40 | while size>2: 41 | R=rando() 42 | M=np.empty((size,size+1)) 43 | t1 = time.time() 44 | for iter in range(0,loop): 45 | RandomFeedMatrix(M,R) 46 | t=(time.time()-t1)/loop 47 | #print("size:",size," t: ",t) 48 | D[size] -= t 49 | size//=2 50 | loop*=4 51 | 52 | f=open("RunningOn"+socket.gethostname(),"w") 53 | for x in sorted(D.keys()): 54 | f.write(str(x)+" "+str(D[x])+"\n") 55 | f.close() 56 | -------------------------------------------------------------------------------- /Gaussian/PyVec/plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/PyVec/plot.pdf -------------------------------------------------------------------------------- /Gaussian/PyVec/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /Gaussian/Pythran/README.md: -------------------------------------------------------------------------------- 1 | Compile factorMatrix.py and run the code; just type: 2 | ``` 3 | ./script 4 | ``` 5 | 6 | !Z! if you are using OpenBlas, you probably must create a .pythranrc in your home directory like this: 7 | 8 | ``` 9 | [compiler] 10 | blas=openblas 11 | ``` 12 | -------------------------------------------------------------------------------- /Gaussian/Pythran/RandomFeedMatrix.py: -------------------------------------------------------------------------------- 1 | def RandomFeedMatrix(M,R): 2 | # feed the numpy array M with random values. 3 | n=M.shape[0] 4 | m=M.shape[1] 5 | for i in range(0,n): 6 | for j in range(0,m): 7 | M[i,j]= R.fv() 8 | -------------------------------------------------------------------------------- /Gaussian/Pythran/factorMatrix.py: -------------------------------------------------------------------------------- 1 | #pythran export factorMatrix(float[:,:]) 2 | def factorMatrix(M): 3 | # Gaussian elimination, partial pivoting. 4 | # M must be an (n,n+1) numpy array. Not tested! 5 | n = M.shape[0] 6 | m= M.shape[1] 7 | for line in range(0, n-1): 8 | # find pivot 9 | cmax=line 10 | vmax= abs(M[line,line]) 11 | for i in range(line+1,n): 12 | if abs(M[i,line])> vmax: 13 | vmax= abs(M[i,line]) 14 | cmax= i 15 | # exchange rows if necessary 16 | if cmax != line: 17 | for j in range(line,m): 18 | t= M[line,j] 19 | M[line,j]= M[cmax,j] 20 | M[cmax,j]= t 21 | # eliminate 22 | pivot = M[line,line] 23 | for j in range(line+1,n): 24 | v= M[j,line]/pivot 25 | for k in range(line,m): 26 | M[j,k]-= v*M[line,k] 27 | -------------------------------------------------------------------------------- /Gaussian/Pythran/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | import factorMatrix 4 | from RandomFeedMatrix import * 5 | import time 6 | import socket 7 | 8 | sizeG=2048 9 | sizelim=4 10 | D={} 11 | 12 | size=sizeG 13 | loop=1 14 | 15 | # Checking algo 16 | M = np.empty((128, 128+1)) 17 | R = rando() 18 | RandomFeedMatrix(M, R) 19 | print("sum(M) = {}".format(np.sum(M))) 20 | factorMatrix.factorMatrix(M) 21 | print("sum(Gauss(M)) = {}".format(np.sum(M))) 22 | print("") 23 | 24 | while size>=sizelim: 25 | R=rando() 26 | M=np.empty((size,size+1)) 27 | t1 = time.time() 28 | for iter in range(0,loop): 29 | RandomFeedMatrix(M,R) 30 | factorMatrix.factorMatrix(M) 31 | t=(time.time()-t1)/loop 32 | print("size:",size," t: ",t) 33 | D[size]=t 34 | size//=2 35 | 36 | loop*=4 37 | 38 | print("---") 39 | size= sizeG 40 | loop=1 41 | while size>=sizelim: 42 | R=rando() 43 | M=np.empty((size,size+1)) 44 | t1 = time.time() 45 | for iter in range(0,loop): 46 | RandomFeedMatrix(M,R) 47 | t=(time.time()-t1)/loop 48 | print("size:",size," t: ",t) 49 | D[size] -= t 50 | size//=2 51 | 52 | loop*=4 53 | 54 | f=open("RunningOn"+socket.gethostname(),"w") 55 | for x in sorted(D.keys()): 56 | f.write(str(x)+" "+str(D[x])+"\n") 57 | f.close() 58 | -------------------------------------------------------------------------------- /Gaussian/Pythran/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /Gaussian/Pythran/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pythran -march=native -O3 factorMatrix.py 3 | echo "run test:" 4 | python3 main.py 5 | -------------------------------------------------------------------------------- /Gaussian/PythranVec/README.md: -------------------------------------------------------------------------------- 1 | Compile factorMatrix.py and run the code; just type: 2 | ``` 3 | ./script 4 | ``` 5 | 6 | !Z! if you are using OpenBlas, you probably must create a .pythranrc in your home directory like this: 7 | 8 | ``` 9 | [compiler] 10 | blas=openblas 11 | ``` 12 | -------------------------------------------------------------------------------- /Gaussian/PythranVec/RandomFeedMatrix.py: -------------------------------------------------------------------------------- 1 | def RandomFeedMatrix(M,R): 2 | # feed the numpy array M with random values. 3 | n=M.shape[0] 4 | m=M.shape[1] 5 | for i in range(0,n): 6 | for j in range(0,m): 7 | M[i,j]= R.fv() 8 | -------------------------------------------------------------------------------- /Gaussian/PythranVec/factorMatrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #pythran export factorMatrix(float[:,:]) 3 | def factorMatrix(M): 4 | # Gaussian elimination, partial pivoting. 5 | # M must be an (n,n+1) numpy array. Not tested! 6 | n = M.shape[0] 7 | m= M.shape[1] 8 | 9 | for line in range(0, n-1): 10 | # find pivot 11 | cmax = np.argmax(abs(M[line:n,line])) + line 12 | 13 | # exchange rows if necessary 14 | if cmax != line: 15 | tmp = np.array(M[cmax,:]) 16 | M[cmax,:] = M[line,:] 17 | M[line,:] = tmp 18 | # M[[line,cmax]]=M[[cmax,line]] # Currently doesn't work in Pythran 19 | 20 | # eliminate 21 | pivot = M[line,line] 22 | v = M[(line+1):n,line].reshape((-1,1))/pivot # M[(line+1):n,[line]] syntax currently doesn't work in Pythran 23 | M[(line+1):n,line:m] -= v*M[line,line:m].reshape((1,-1)) 24 | -------------------------------------------------------------------------------- /Gaussian/PythranVec/main.py: -------------------------------------------------------------------------------- 1 | from rando import * 2 | import numpy as np 3 | from factorMatrix import * 4 | from RandomFeedMatrix import * 5 | import time 6 | import socket 7 | 8 | sizeG=2048 9 | D={} 10 | 11 | size=sizeG 12 | loop=1 13 | 14 | # Checking algo 15 | M = np.empty((128, 128+1)) 16 | R = rando() 17 | RandomFeedMatrix(M, R) 18 | print("sum(M) = {}".format(np.sum(M))) 19 | factorMatrix(M) 20 | print("sum(Gauss(M)) = {}".format(np.sum(M))) 21 | print("") 22 | 23 | while size>2: 24 | R=rando() 25 | M=np.empty((size,size+1)) 26 | t1 = time.time() 27 | for iter in range(0,loop): 28 | RandomFeedMatrix(M,R) 29 | factorMatrix(M) 30 | t=(time.time()-t1)/loop 31 | print("size:",size," t: ",t) 32 | D[size]=t 33 | size//=2 34 | 35 | loop*=4 36 | 37 | print("---") 38 | size= sizeG 39 | loop=1 40 | while size>2: 41 | R=rando() 42 | M=np.empty((size,size+1)) 43 | t1 = time.time() 44 | for iter in range(0,loop): 45 | RandomFeedMatrix(M,R) 46 | t=(time.time()-t1)/loop 47 | #print("size:",size," t: ",t) 48 | D[size] -= t 49 | size//=2 50 | 51 | loop*=4 52 | 53 | f=open("RunningOn"+socket.gethostname(),"w") 54 | for x in sorted(D.keys()): 55 | f.write(str(x)+" "+str(D[x])+"\n") 56 | f.close() 57 | -------------------------------------------------------------------------------- /Gaussian/PythranVec/plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/PythranVec/plot.pdf -------------------------------------------------------------------------------- /Gaussian/PythranVec/rando.py: -------------------------------------------------------------------------------- 1 | class rando: 2 | def __init__(self): 3 | self.seed=123456789 4 | self.a=1103515245 5 | self.c=12345 6 | self.m=2**32 7 | def get(self): 8 | self.seed= (self.a * self.seed + self.c) % self.m 9 | return self.seed 10 | def fv(self,vmax=1.): 11 | return vmax*float(self.get())/self.m 12 | if __name__ == "__main__": 13 | R=rando() 14 | for i in range(0,100): 15 | print(R.fv(10.)) 16 | -------------------------------------------------------------------------------- /Gaussian/PythranVec/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pythran -march=native -O3 factorMatrix.py 3 | echo "run test:" 4 | python3 main.py 5 | -------------------------------------------------------------------------------- /Gaussian/Results/Benchmarks/kepler-nolibs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/Results/Benchmarks/kepler-nolibs.png -------------------------------------------------------------------------------- /Gaussian/Results/Benchmarks/kepler-only-libs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/Results/Benchmarks/kepler-only-libs.png -------------------------------------------------------------------------------- /Gaussian/Results/Benchmarks/kepler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/Results/Benchmarks/kepler.png -------------------------------------------------------------------------------- /Gaussian/Results/README.md: -------------------------------------------------------------------------------- 1 | First, do: 2 | ``` 3 | mkdir ./Results (if ./Results does not exists). 4 | ./gr.py 5 | ``` 6 | It will parse your results (in the "RunningOn _your_ _hostname_" files) and populate 7 | Results/ with gnuplot plotable files. 8 | 9 | Then: 10 | ``` 11 | gnuplot 12 | gnuplot> load "gpc" 13 | ``` 14 | will plot the results. We take C++ computing time as unit. 15 | 16 | 17 | An example of benchmark is Benchmarks/kepler.pdf which was run on a 18 | 4 core i5-4670 CPU @ 3.40GHz. 19 | -------------------------------------------------------------------------------- /Gaussian/Results/gpc: -------------------------------------------------------------------------------- 1 | set logscale 2 | set key top right 3 | set xlabel "matrix size" 4 | set ylabel "cpu time / cpu time C++" 5 | set title "All results" 6 | plot "./Results/C++" title "C++" with linespoint pointtype 7 7 | replot "./Results/Py" title "Python" with linespoint 8 | replot "./Results/PyVec" title "Python vect." with linespoint 9 | replot "./Results/Ju" title "Julia" with linespoint 10 | replot "./Results/PythranVec" title "Pythran vec." with linespoint 11 | replot "./Results/Pythran" title "Pythran" with linespoint 12 | replot "./Results/Numba" title "Numba" with linespoint 13 | replot "./Results/JuLib" title "Julia + libs" with linespoint 14 | replot "./Results/PyScipy" title "Python + Scipy" with linespoint 15 | replot "./Results/C++Lib" title "C++ + libs" with linespoint 16 | set title "Using libraries" 17 | #set terminal png size 600,450 18 | #set output "kepler.png" -------------------------------------------------------------------------------- /Gaussian/Results/gpc-nolibs: -------------------------------------------------------------------------------- 1 | set logscale 2 | set key top right 3 | set xlabel "matrix size" 4 | set ylabel "cpu time / cpu time C++" 5 | set title "Not using external libraries" 6 | plot "./Results/C++" title "C++" with linespoint pointtype 7 7 | replot "./Results/Py" title "Python" with linespoint 8 | replot "./Results/PyVec" title "Python vect." with linespoint 9 | replot "./Results/Ju" title "Julia" with linespoint 10 | replot "./Results/PythranVec" title "Pythran vec." with linespoint 11 | replot "./Results/Pythran" title "Pythran" with linespoint 12 | replot "./Results/Numba" title "Numba" with linespoint 13 | set title "Not using libraries" 14 | #set terminal png size 600,450 15 | #set output "kepler.png" 16 | -------------------------------------------------------------------------------- /Gaussian/Results/gr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | #comparison between c++ and other computations 4 | # 5 | import socket 6 | 7 | def parsit(D,l): 8 | # extract two numbers from a line, if this is possible. 9 | ll=l.split(" ") 10 | if len(ll) == 2: 11 | D[int(ll[0])]=float(ll[1].replace("\n","")) 12 | 13 | 14 | # directories to explore --------- 15 | files=[ 16 | "../Py", 17 | "../Ju", 18 | "../Pythran", 19 | "../JuLib", 20 | "../PyScipy", 21 | "../PyVec", 22 | "../PythranVec", 23 | "../Numba", 24 | "../C++Lib", 25 | "../C++"] 26 | cpp="../C++" 27 | 28 | 29 | #------------------------------------------- 30 | # build a dict n-> computing time for C++ 31 | C={} 32 | with open(cpp+"/RunningOn"+socket.gethostname(), 'r') as file: 33 | for line in file: 34 | parsit(C,line) 35 | 36 | # build a dict n-> computing time for all directories in files[] 37 | T={} 38 | for n in files: 39 | T[n]={} 40 | filename= n+"/RunningOn"+socket.gethostname() 41 | with open(filename,"r") as file: 42 | for line in file: 43 | parsit(T[n],line) 44 | print("all files parsed.") 45 | # Compute ratio time/(time C++). 46 | for n in files: 47 | D=T[n] 48 | for k in D.keys(): 49 | if k in C.keys(): 50 | D[k]/=C[k] 51 | print("ratios computed.") 52 | # create file for gnuplot. 53 | for n in files: 54 | D=T[n] 55 | thefile=n.replace("..","./Results") 56 | print("-file created: ",thefile) 57 | with open(thefile, 'w') as file: 58 | kk=sorted([k for k in D.keys()]) 59 | for k in kk: 60 | file.write(str(k)+" "+str(D[k])+'\n') 61 | 62 | print("\nsee gpc* files to plot with gnuplot.\n") 63 | print('In gnuplot do:\nload "gpc"\nor load "gpc-nolibs"') 64 | print('or load "gpc-only-libs"') 65 | -------------------------------------------------------------------------------- /Gaussian/runAllTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script is supposed to run *all* the test, and then compute 4 | # the final "report" in Results/ 5 | # Not sure it works everywhere. If it does not, improve it, or enter each 6 | # directory and look at README.md to know what to do. 7 | # 8 | for i in C++ C++Lib ; do 9 | echo "Test: "$i 10 | echo "--- " 11 | (cd $i; mkdir -p Build; cd Build; cmake ..; make; ./run) 12 | done 13 | for i in Ju Numba PythranVec Pythran ; do 14 | echo "Test: "$i 15 | echo "--- " 16 | (cd $i; ./script) 17 | done 18 | for i in Py PyVec PyScipy; do 19 | echo "Test: "$i 20 | echo "--- " 21 | (cd $i; python3 ./main.py) 22 | done 23 | 24 | echo " " 25 | eche "Make the report:" 26 | mkdir -p Results/Results 27 | (cd Results; ./gr.py) 28 | echo " " 29 | echo "Everything went well ? If yes,go to ./Results/. You can visualize the" 30 | echo " results with gnuplot. Look at README.md in ./Results" 31 | echo " " 32 | echo " " 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Thierry Dumont 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MicroBenchmarks/C++-xtensor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(NewRadau5) 3 | enable_language(CXX Fortran) 4 | # Go to Build directory. Then: 5 | # To use intel compiler 6 | # CXX=icpc cmake .. 7 | # for clang++: 8 | # CXX=clang++ cmake .. 9 | # otherwise, to use g++: 10 | # cmake .. 11 | # 12 | 13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 15 | 16 | 17 | else () 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -DGCC -DXTENSOR_USE_XSIMD=1 -DALIGN_64 -std=c++14 -march=native") 19 | 20 | endif () 21 | include_directories( 22 | ${CMAKE_SOURCE_DIR}/ARRAY/include 23 | $ENV{HOME}/anaconda3/include/ 24 | ) 25 | add_executable( 26 | run_cl 27 | ../main_cl.cpp 28 | ) 29 | add_executable( 30 | run_lapl_1 31 | ../main_lapl_1.cpp 32 | ) 33 | add_executable( 34 | run_lapl_2 35 | ../main_lapl_2.cpp 36 | ) 37 | 38 | 39 | -------------------------------------------------------------------------------- /MicroBenchmarks/C++-xtensor/README.md: -------------------------------------------------------------------------------- 1 | You need cmake ! 2 | 3 | ### Compilation: 4 | ``` 5 | mkdir Build 6 | cd Build 7 | cmake .. 8 | make 9 | ``` 10 | This creates the files: run_cl, run_lapl_1 and run_lapl_2. 11 | 12 | ### Run the code: 13 | 14 | from Build/ directory, type: 15 | ``` 16 | ./run_cl 17 | ``` 18 | or run_lapl_1 or run_lapl_2. 19 | 20 | By default, we use g++. To can change the compiler to use, for example 21 | clang++, 22 | replace 23 | ``` 24 | cmake .. 25 | ``` 26 | by 27 | 28 | ``` 29 | CXX=clang++ cmake .. 30 | ``` 31 | -------------------------------------------------------------------------------- /MicroBenchmarks/C++-xtensor/get_time.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | auto get_time() { 5 | return std::chrono::high_resolution_clock::now(); 6 | } 7 | -------------------------------------------------------------------------------- /MicroBenchmarks/C++-xtensor/main_cl.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "xtensor/xtensor.hpp" 12 | using namespace std; 13 | typedef xt::xtensor Array; 14 | double get_time() { 15 | struct timeval tv; 16 | gettimeofday(&tv,0); 17 | return (double) tv.tv_sec+tv.tv_usec*1e-6; 18 | } 19 | clock_t ck() {return clock();} 20 | string host() 21 | { 22 | char hostnameC[HOST_NAME_MAX]; 23 | gethostname(hostnameC, HOST_NAME_MAX); 24 | return string(hostnameC); 25 | } 26 | 27 | void Init(Array& X,double L,int size) 28 | { 29 | double h=L/size; 30 | for(int i=0;isize/8 && i shape = { size }; 46 | Array A(shape), B(shape), C(shape),D(shape); 47 | 48 | Init(A,1.,size); Init(B,1.,size); Init(C,1.,size); Init(D,1.,size); 49 | double T=0; 50 | double Tnew=std::pow(10.,20); 51 | int iter=10000; 52 | bool ok=false; 53 | do 54 | { 55 | double t1=get_time(); 56 | for(int i=0;i1000000; 62 | T=Tnew; 63 | if(!ok) iter*=2; 64 | } 65 | while(!ok); 66 | return T/iter; 67 | } 68 | 69 | int main() 70 | { 71 | auto hostname = host(); 72 | cout<<"hostname: "< 3 | #include 4 | auto get_time() { 5 | return std::chrono::high_resolution_clock::now(); 6 | } 7 | -------------------------------------------------------------------------------- /MicroBenchmarks/Ju/README.md: -------------------------------------------------------------------------------- 1 | * To run the different codes, just do: 2 | 3 | ``` 4 | ./script code.jl 5 | ``` 6 | 7 | with code.jl = main_cl.jl, main_lapl_1d.jl or main_lapl_2d.jl 8 | 9 | * If you want to look at memory usage do: 10 | ``` 11 | ./script-m code.jl 12 | ``` 13 | (but this slow down the code. **Do not use script-m for benchmarking!**). 14 | -------------------------------------------------------------------------------- /MicroBenchmarks/Ju/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 $1 3 | -------------------------------------------------------------------------------- /MicroBenchmarks/Ju/script-m: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 --track-allocation=user $1 3 | -------------------------------------------------------------------------------- /MicroBenchmarks/Numba/README.md: -------------------------------------------------------------------------------- 1 | * To run the different codes, just do: 2 | 3 | ``` 4 | ./script code.py 5 | ``` 6 | 7 | with code.py = main_cl.py, main_lapl_1d.py or main_lapl_2d.py 8 | 9 | 10 | -------------------------------------------------------------------------------- /MicroBenchmarks/Numba/main_cl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import socket 4 | from numba import jit 5 | 6 | def Init(X,L): 7 | size=X.size 8 | h=L/size 9 | for i in range(0,size): 10 | if i>size//8 and i0.0001 and abs(t-T)/t<0.025: 48 | break 49 | else: 50 | T=t 51 | niter*=2 52 | 53 | return T,niter 54 | 55 | DD={"cl_2":"Naive ", 56 | "cl_1":"Vectorized"} 57 | 58 | f=open("RunningOn"+socket.gethostname()+"_cl","w") 59 | 60 | size=32 61 | sizemax=100000 62 | niter=10 63 | parsef= lambda f: str(f).split(" ")[1] #parse function name 64 | while sizesize//8 and i0.0001 and abs(t-T)/t<0.025: 47 | break 48 | else: 49 | T=t 50 | niter*=2 51 | 52 | return T,niter 53 | 54 | size=32 55 | sizemax=100000 56 | niter=10 57 | parsef= lambda f: str(f).split(" ")[1] #parse function name 58 | DD={"cl_2":"Naïve ", 59 | "cl_1":"Vectorized"} 60 | 61 | f=open("RunningOn"+socket.gethostname()+"_cl","w") 62 | 63 | while sizesize//8 and i0.0001 and abs(t-T)/t<0.025: 46 | break 47 | else: 48 | T=t 49 | niter*=2 50 | 51 | return T,niter 52 | 53 | DD={"lapl1d_1":"Vectorized", 54 | "lapl1d_2":"Naïve "} 55 | 56 | f=open("RunningOn"+socket.gethostname()+"_lapl_1","w") 57 | 58 | size=32 59 | sizemax=100000 60 | niter=10 61 | parsef= lambda f: str(f).split(" ")[1] #parse function name 62 | while sizesize//8 and i0.001 and abs(t-T)/t<0.025: 30 | break 31 | else: 32 | T=t 33 | niter*=2 34 | 35 | return T,niter 36 | 37 | size=32 38 | sizemax=1000000 39 | niter=10 40 | parsef= lambda f: str(f).split(" ")[2][:-1] #parse function name 41 | DD={"cl_2":"Naïve ", 42 | "cl_1":"Vectorized"} 43 | 44 | f=open("RunningOn"+socket.gethostname()+"_cl","w") 45 | 46 | while sizesize//8 and i0.0001 and abs(t-T)/t<0.025: 32 | break 33 | else: 34 | T=t 35 | niter*=2 36 | 37 | return T,niter 38 | 39 | DD={"lapl1d_1":"Vectorized", 40 | "lapl1d_2":"Naïve "} 41 | 42 | f=open("RunningOn"+socket.gethostname()+"_lapl_1","w") 43 | 44 | size=32 45 | sizemax=100000 46 | niter=10 47 | parsef= lambda f: str(f).split(" ")[2][:-1] #parse function name 48 | 49 | while sizesize//8 and i0.0001 and abs(t-T)/t<0.025: 33 | break 34 | else: 35 | T=t 36 | niter*=2 37 | 38 | return T,niter 39 | 40 | size=32 41 | sizemax=2049 42 | niter=10 43 | parsef= lambda f: str(f).split(" ")[2][:-1] #parse function name 44 | 45 | DD={"lapl2d_1":"Vectorized", 46 | "lapl2d_2":"Naïve "} 47 | 48 | f=open("RunningOn"+socket.gethostname()+"_lapl_2","w") 49 | 50 | while size load "vis_cl" 21 | gnuplot> 22 | ``` 23 | 24 | This will show the results for the 1d laplacian (as a function of the size). 25 | 26 | replace "vis_cl" by ""vis_lapl_1" or by ""vis_lapl_2" for the other results. 27 | -------------------------------------------------------------------------------- /MicroBenchmarks/Results/gr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | #comparison between c++ and other computations 4 | # 5 | import socket 6 | import os 7 | 8 | def parsit(D,l): 9 | # extract two numbers from a line, if this is possible. 10 | ll=l.split(" ") 11 | if len(ll) == 2: 12 | D[int(ll[0])]=float(ll[1].replace("\n","")) 13 | 14 | 15 | # directories to explore --------- 16 | files=[ 17 | "../Py", 18 | "../Ju", 19 | "../Numba", 20 | "../Pythran" 21 | ] 22 | cpp="../C++" 23 | 24 | types=["_cl","_lapl_1","_lapl_2"] 25 | 26 | def doWhatWeMustDo(Dirs,Type): 27 | #------------------------------------------- 28 | # build a dict n-> computing time for C++ 29 | C={} 30 | with open(cpp+"/RunningOn"+socket.gethostname()+Type, 'r') as file: 31 | for line in file: 32 | parsit(C,line) 33 | 34 | # build a dict n-> computing time for all directories in files[] 35 | T={} 36 | for n in Dirs: 37 | T[n]={} 38 | filename= n+"/RunningOn"+socket.gethostname()+Type 39 | with open(filename,"r") as file: 40 | for line in file: 41 | parsit(T[n],line) 42 | #print("all files parsed.") 43 | # Compute ratio time/(time C++). 44 | for n in Dirs: 45 | D=T[n] 46 | for k in D.keys(): 47 | if k in C.keys(): 48 | D[k]/=C[k] 49 | #print("ratios computed.") 50 | # create file for gnuplot. 51 | for n in Dirs: 52 | D=T[n] 53 | thefile=n.replace("..","./results")+Type 54 | #print("-file created: ",thefile) 55 | with open(thefile, 'w') as file: 56 | kk=sorted([k for k in D.keys()]) 57 | for k in kk: 58 | file.write(str(k).ljust(10)+" "+str(D[k])+'\n') 59 | #------main: 60 | if not os.path.exists("./results"): 61 | os.makedirs("./results") 62 | 63 | for t in types: 64 | doWhatWeMustDo(files,t) 65 | print("\nsee files in ./results.\n") 66 | print("To plot results with gnuplot just type:") 67 | print('load "vis_cl"\nor:\n load "vis_lapl_1"\nor:\nload "vis_lapl_2"') 68 | print("at gnuplot prompt.\n") 69 | 70 | -------------------------------------------------------------------------------- /MicroBenchmarks/Results/vis_cl: -------------------------------------------------------------------------------- 1 | set logscale 2 | set key top right 3 | set xlabel "size" 4 | set ylabel "cpu time / cpu time C++" 5 | set title "Linear combinations" 6 | 7 | plot "./results/Py_cl" title "Python" with linespoint 8 | replot "./results/Ju_cl" title "Julia" with linespoint 9 | replot "./results/Pythran_cl" title "Pythran" with linespoint 10 | replot "./results/Numba_cl" title "Numba" with linespoint 11 | replot 1 title "C++" lt -1 12 | #set terminal png size 600,450 13 | #set output "kepler-cl.png" -------------------------------------------------------------------------------- /MicroBenchmarks/Results/vis_lapl_1: -------------------------------------------------------------------------------- 1 | set logscale 2 | set key top right 3 | set xlabel "size" 4 | set ylabel "cpu time / cpu time C++" 5 | set title "Laplacian 1d" 6 | 7 | plot "./results/Py_lapl_1" title "Python" with linespoint 8 | replot "./results/Ju_lapl_1" title "Julia" with linespoint 9 | replot "./results/Pythran_lapl_1" title "Pythran" with linespoint 10 | replot "./results/Numba_lapl_1" title "Numba" with linespoint 11 | replot 1 title "C++" lt -1 12 | -------------------------------------------------------------------------------- /MicroBenchmarks/Results/vis_lapl_2: -------------------------------------------------------------------------------- 1 | set logscale 2 | set key top right 3 | set xlabel "size" 4 | set ylabel "cpu time / cpu time C++" 5 | set title "Laplacian 2d" 6 | 7 | plot "./results/Py_lapl_2" title "Python" with linespoint 8 | replot "./results/Ju_lapl_2" title "Julia" with linespoint 9 | replot "./results/Pythran_lapl_2" title "Pythran" with linespoint 10 | replot "./results/Numba_lapl_2" title "Numba" with linespoint 11 | replot 1 title "C++" lt -1 12 | -------------------------------------------------------------------------------- /MicroBenchmarks/runAllTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script is supposed to run *all* the test, and then compute 4 | # the final "report" in Results/ 5 | # Not sure it works everywhere. If it does not, improve it, or enter each 6 | # directory and look at README.md to know what to do. 7 | # 8 | echo "Test: C++" 9 | echo "--- " 10 | (cd C++; mkdir -p Build;cd Build; cmake ..; make; ./run_cl;./run_lapl_1;./run_lapl_2) 11 | 12 | for i in Ju Numba ; do 13 | echo "Test: "$i 14 | echo "--- " 15 | (cd $i; for j in main*; do ./script $j; done) 16 | done 17 | echo "Test: Py" 18 | echo "--- " 19 | (cd Py; python3 main_cl.py; python3 main_lapl_1d.py; python3 main_lapl_2d.py) 20 | 21 | echo "Test: Pythran" 22 | echo "--- " 23 | (cd Pythran; ./script) 24 | echo " " 25 | 26 | echo "Make the report:" 27 | mkdir -p Results/results 28 | (cd Results; ./gr.py) 29 | echo " " 30 | echo "Everything went well ? If yes, go to ./Results/. You can visualize the" 31 | echo " results with gnuplot. Look at README.md in ./Results" 32 | echo " " 33 | echo " " 34 | -------------------------------------------------------------------------------- /SaintVenant/C/compile-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #module load GNU/7.1 4 | #module load Eigen 5 | g++ -std=c++14 -march=native -O3 -DNDEBUG main1d.cpp -o main1d 6 | -------------------------------------------------------------------------------- /SaintVenant/C/compile-gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #module load GNU/7.1 4 | #module load Eigen 5 | nvcc -std=c++14 -arch=sm_35 -O3 -DNDEBUG -g -lineinfo main1d-gpu.cu main1d-gpu-kernels.cu -o main1d-gpu --expt-relaxed-constexpr 6 | -------------------------------------------------------------------------------- /SaintVenant/C/main1d-gpu-kernels.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace cg = cooperative_groups; 7 | 8 | 9 | __global__ void scheme_LaxFriedrich(double* V1, double* V2, double* Vold1, double* Vold2, double* lambdas, double dt, double dx, double tol, unsigned int nx); 10 | 11 | __global__ void update_eigenvalues(double* lambdas, double* V1, double* V2, double tol, unsigned int nx); 12 | 13 | 14 | __global__ void reduce_max(double *in, double *out, std::size_t N); 15 | -------------------------------------------------------------------------------- /SaintVenant/Ju/run_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #module load Julia 4 | julia -O3 --check-bounds=no main1d.jl 5 | -------------------------------------------------------------------------------- /SaintVenant/Ju/run_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #module load Julia 4 | julia -O3 --check-bounds=no main1d-gpu.jl 5 | -------------------------------------------------------------------------------- /Sparse/C++/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(BenchmarksPythonJuliaAndCo) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # 6 | #- To use intel compiler 7 | # CXX=icpc cmake .. 8 | # - for clang++: 9 | # CXX=clang++ cmake .. 10 | # - otherwise, to use g++: 11 | # cmake .. 12 | # 13 | 14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict -DICC -DALIGN_64 -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 16 | 17 | elseif (${CMAKE_CXX_COMPILER} MATCHES "clang.*$") 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O1 -std=c++14 -Wall -march=native") 19 | else () 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -std=c++14 -Wall -march=native") 21 | 22 | endif () 23 | 24 | add_executable( 25 | run 26 | ../main.cpp 27 | ) 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /Sparse/C++/PreSparse.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | class PreSparse 9 | { 10 | map,double> M; 11 | int imax,jmax; 12 | public: 13 | PreSparse() 14 | { 15 | imax=0; jmax=0; 16 | } 17 | int size() const {return M.size();} 18 | void purge() 19 | { 20 | M.clear(); 21 | } 22 | double& operator()(int i,int j) 23 | { 24 | imax=max(i,imax); 25 | jmax=max(j,jmax); 26 | return M[make_pair(i,j)];//indices start at 1. 27 | } 28 | tuple sizes() const {return make_tuple(imax+1,jmax+1,size());} 29 | map,double>::const_iterator cbegin()const {return M.cbegin();} 30 | map,double>::const_iterator cend() const {return M.cend();} 31 | void print() const 32 | { 33 | for(auto I=M.cbegin();I!=M.cend();I++) 34 | cout<first.first<<" "<first.second<<" : "<second< value (in PreSparse.hpp, PreLapl.hpp). 7 | Then, building the CSR matrix can be done in one simple pass on the 8 | map. 9 | 10 | 2- Precompute the size of the arrays, allocate them, fill them and 11 | then build the CSR matrix (exactly what we do in Ju, Py, Numba and Pythran). 12 | 13 | Although the first method is very convenient, it is much slower (one 14 | reason could be that we make a lot of memory allocations). So we use the 15 | second one, but you can use the first one (look at main: replace calls to 16 | *do_test_arrays* with calls to *dotest_map*). 17 | 18 | We could imagine other methods, like using std::vector's, which grow on 19 | demand, but this will probably also be slower than the second method. 20 | 21 | ### Note that: 22 | 23 | shared_ptr have a well known problem with "old" compilers like g++-5 (cannot write a[] 24 | when a is a shared_ptr: one must use a get), and as we are a bit lazzy, we 25 | have switched to a old style programming, with new and delete. 26 | This will not change the performances. Feel free to change this. 27 | 28 | ### Compilation: 29 | ``` 30 | mkdir Build 31 | cd Build 32 | cmake .. 33 | make 34 | ``` 35 | a file "run" is created 36 | 37 | ### Run the code: 38 | 39 | from Build/ directory, type: 40 | ``` 41 | ./run 42 | ``` 43 | 44 | -------------------------------------------------------------------------------- /Sparse/Ju/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | ./script 4 | ``` 5 | If you type: 6 | ``` 7 | ./script-m 8 | ``` 9 | You will be able to see how Julia manages memory, but you will 10 | slowdown the code! 11 | 12 | ### Note: 13 | 14 | If you want to test the _pushed_ method to create matrices, have a 15 | look at the begining of main.jl. 16 | 17 | With the _pushed_ method, one does not need to precompute array 18 | sizes, put the method is much slower. 19 | -------------------------------------------------------------------------------- /Sparse/Ju/main.jl: -------------------------------------------------------------------------------- 1 | push!(LOAD_PATH, "./") 2 | 3 | # if you want to experiment with the "pushed" matrix creation method, 4 | # switch comments on the two following lines: 5 | using Sparse23 6 | #using Sparse23push 7 | #using Sparse23raw 8 | using Printf 9 | using SparseArrays, LinearAlgebra 10 | 11 | 12 | 13 | function dotest(dim::Int64, size::Int64) 14 | # 1) matrix creation: 15 | t1 = time_ns() 16 | if dim==2 17 | M, order, nc = PreLapl2(size) 18 | sizeV = size^2 19 | else 20 | M, order, nc = PreLapl3(size) 21 | sizeV = size^3 22 | end 23 | 24 | t1 = time_ns() - t1 25 | V = ones(Float64, sizeV) 26 | W = zeros(Float64, sizeV) 27 | 28 | # 2) matrix x vector product. 29 | t2 = time_ns() 30 | mul!(W, M, V) 31 | t2 = time_ns() - t2 32 | 33 | return order, nc, t1, t2 34 | end 35 | 36 | 37 | 38 | println() 39 | @printf "%-5s %1s %-20s\n" "size" ":" "size of the grid." 40 | @printf "%-5s %1s %-20s\n" "order" ":" "order of the matrix." 41 | @printf "%-5s %1s %-20s\n" "nc" ":" "number of ceofficients." 42 | @printf "%-5s %1s %-20s\n" "T.b" ":" "time to build the matrix (s)." 43 | @printf "%-5s %1s %-20s\n" "T.p" ":" "time for matrix x vector product (s)." 44 | 45 | # computation starts here: 46 | for dim=2:3 47 | println("\n" * string(dim) * "d:") 48 | fw = open("RunningOn" * gethostname() * "-" * string(dim), "w") 49 | if dim==2 50 | size = 32 51 | sizeM = 2048 52 | else 53 | size = 16 54 | sizeM = 128 55 | end 56 | 57 | @printf "%4s %8s %9s %10s %10s \n" "size" "order" "nc" "T.b" "T.p" 58 | while size <= sizeM 59 | order, nc, t1, t2 = dotest(dim, size) #warmup 60 | order, nc, t1, t2 = dotest(dim, size) 61 | 62 | 63 | s = @sprintf "%4d %8d %9d %10.2e %10.2e \n" size order nc float(t1)*10.0^(-9) float(t2)*10.0^(-9) 64 | print(s) 65 | write(fw, s) 66 | size *= 2 67 | end 68 | 69 | close(fw) 70 | end 71 | -------------------------------------------------------------------------------- /Sparse/Ju/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 main.jl 3 | -------------------------------------------------------------------------------- /Sparse/Ju/script-m: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 --track-allocation=user main.jl 3 | -------------------------------------------------------------------------------- /Sparse/Numba/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | 3 | ``` 4 | ./script 5 | ``` 6 | 7 | If you do not have a machine with AVX instructions, comment out the 8 | line 9 | ``` 10 | export NUMBA_ENABLE_AVX=1 11 | ``` 12 | in ./script . 13 | 14 | ### Note: 15 | 16 | lambda functions in jitted routines could not be compiled. 17 | -------------------------------------------------------------------------------- /Sparse/Numba/build2.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | import numpy as np 3 | @jit 4 | def build2(size): 5 | #how many coefficients ? 6 | nc= 5*(size-2)**2+ 16*(size-2)+ 12 7 | row= np.empty((nc),dtype=int) 8 | col=np.empty((nc),dtype=int) 9 | v=np.empty((nc),dtype=float) 10 | h=1./(size-1) 11 | h2=h*h 12 | cd=-4/h2 13 | hd=1./h2; 14 | #I=lambda i,j: i*size+j 15 | count=0 16 | 17 | for i in range(0,size): 18 | for j in range(0,size): 19 | l=i*size+j 20 | row[count]=l 21 | col[count]=l 22 | v[count]=cd 23 | count+=1 24 | for i1 in [-1,1]: 25 | if i+i1>=0 and i+i1=0 and j+i1=0 and i+i1=0 and j+i1=0 and k+i1=0 and i+i1=0 and j+i1=0 and i+i1=0 and j+i1=0 and k+i11 (this is a problem with the Clock). If you can help, you are 61 | welcome! 62 | -------------------------------------------------------------------------------- /Sparse/Results/Arithmetic-Intensity.md: -------------------------------------------------------------------------------- 1 | 2 | Runing _stream_ with 1 thread, that is to say: 3 | ``` 4 | export OMP_NUM_THREADS=1 5 | ./stream 6 | ``` 7 | 8 | gives, on the _reference_ _machine_: 9 | 10 | Function| Best Rate MB/s | Avg time | Min time | Max time| 11 | --------|-------------------|----------|--------------|-----------| 12 | Copy:| 7808.7 | 0.020542 | 0.020490 | 0.020588| 13 | Scale:| 7578.6 | 0.021212| 0.021112 | 0.021387| 14 | Add: | 11048.2| 0.021784| 0.021723 | 0.021899| 15 | Triad: | 10360.9 | 0.023216 | 0.023164 | 0.023299| 16 | 17 | that is to say about a maximum of 11 GB/s or 1.375 G doubles. 18 | 19 | The bandwith of the matrix vector x product is about 16 x n in 20 | dimension 2, and the number of floatting point operations is about 9 x 21 | n, and the the Arithmetic Intensity is about 9/16. The Roofline Model 22 | predicts that the maximum atainable performance will be about 23 | 1.375*9/16= 0.77 Gflops/s. 24 | -------------------------------------------------------------------------------- /Sparse/Results/Benchmarks/gpc-2-b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-2-b.png -------------------------------------------------------------------------------- /Sparse/Results/Benchmarks/gpc-2-p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-2-p.png -------------------------------------------------------------------------------- /Sparse/Results/Benchmarks/gpc-3-b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-3-b.png -------------------------------------------------------------------------------- /Sparse/Results/Benchmarks/gpc-3-p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-3-p.png -------------------------------------------------------------------------------- /Sparse/Results/README.md: -------------------------------------------------------------------------------- 1 | First, do: 2 | ``` 3 | mkdir ./Results (if ./Results does not exists). 4 | ./gr.py 5 | ``` 6 | It will parse your results (in the "RunningOn _your_ _hostname_*" files) and populate 7 | Results/ with gnuplot plotable files (6 files). 8 | 9 | It also creates gnuplot _scripts_ gpc[2-3]-[b-p]: 10 | 11 | * [2-3] stands for the spatial dimension (2 or 3) 12 | * [b-p] stands for the type of computation (b for building the matrix, 13 | p for the matrix x vector product). 14 | 15 | So, gpc-3-b is the time to build the matrix in dimension 3. More 16 | exactly, it is the ratio of the computing time for the different 17 | languages/implementations to the computing time in C++. 18 | 19 | Then: 20 | ``` 21 | gnuplot 22 | gnuplot> load "gpc-3-b" 23 | ``` 24 | will do the job in this case (and so on). Recall that we take C++ 25 | computing time as unit. 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Sparse/runAllTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script is supposed to run *all* the test, and then compute 4 | # the final "report" in Results/ 5 | # Not sure it works everywhere. If it does not, improve it, or enter each 6 | # directory and look at README.md to know what to do. 7 | # 8 | 9 | echo "--- " 10 | echo "Test: C++" 11 | echo "--- " 12 | (cd C++; mkdir -p Build; cd Build; cmake ..; make; ./run) 13 | 14 | for i in Ju Numba Pythran ; do 15 | echo "--- " 16 | echo "Test: "$i 17 | echo "--- " 18 | (cd $i; ./script) 19 | done 20 | 21 | echo "--- " 22 | echo "Test: Py" 23 | echo "--- " 24 | (cd Py; python3 ./main.py) 25 | 26 | 27 | echo " " 28 | eche "Make the report:" 29 | mkdir -p Results/Results 30 | (cd Results; ./gr.py) 31 | echo " " 32 | echo "Everything went well ? If yes,go to ./Results/. You can visualize the" 33 | echo " results with gnuplot. Look at README.md in ./Results" 34 | echo " " 35 | echo " " 36 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | There are some problems with nowadays version of Numba (0.51.1 / 2020/08): 2 | 3 | -FeStiff: adeprecation warning, and a warning concerning dot. 4 | 5 | -MicroBenchMarks: cannot parallelize lapl1d_2 in main_lapl_1d.py 6 | 7 | -Sparse: build3.py has a problem with np.empty 8 | 9 | -Weno: some warnings. 10 | 11 | all this needs modifications. -------------------------------------------------------------------------------- /Weno/C++-Modulo/Burghers.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | struct Burghers 5 | { 6 | //! the flux. 7 | static double flux(double x) 8 | { 9 | return 0.5*x*x; 10 | } 11 | //! min of flux in [a,b]. Used by Godunov flux. 12 | //! we do NOT test a<=b ! 13 | static double min(double a, double b) 14 | { 15 | 16 | if(b<=0) 17 | return 0.5*b*b; 18 | else if (a>=0) 19 | return 0.5*a*a; 20 | else 21 | return 0.0; 22 | } 23 | //! max of flux in [a,b]. Used by Godunov flux. 24 | //! we do NOT test a<=b ! 25 | static double max(double a, double b) 26 | { 27 | return 0.5*std::max(a*a,b*b); 28 | } 29 | }; 30 | -------------------------------------------------------------------------------- /Weno/C++-Modulo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(BenchmarksPythonJuliaAndCo) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # 6 | #- To use intel compiler 7 | # CXX=icpc cmake .. 8 | # - for clang++: 9 | # CXX=clang++ cmake .. 10 | # - otherwise, to use g++: 11 | # cmake .. 12 | # 13 | 14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 16 | 17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$") 18 | set(CMAKE_CXX_COMPILER "clang++") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wall -std=c++14 -march=native") 20 | else () 21 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -std=c++14 -Wall -march=native") 22 | 23 | endif () 24 | 25 | add_executable( 26 | run 27 | ../main.cpp 28 | ) 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /Weno/C++-Modulo/Convection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | struct Convection 5 | { 6 | //! the flux. 7 | static double flux(double x) 8 | { 9 | return x; 10 | } 11 | //! min of flux in [a,b]. Used by Godunov flux. 12 | //! we do NOT test a<=b ! 13 | static double min(double a, double b) 14 | { 15 | 16 | if(b<=0) 17 | return flux(b); 18 | else if (a>=0) 19 | return flux(a); 20 | else 21 | return 0.0; 22 | } 23 | //! max of flux in [a,b]. Used by Godunov flux. 24 | //! we do NOT test a<=b ! 25 | static double max(double a, double b) 26 | { 27 | return std::max(flux(a),flux(b)); 28 | } 29 | }; 30 | -------------------------------------------------------------------------------- /Weno/C++-Modulo/GodunovFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | // 4 | // The Godunov flux. 5 | // 6 | template struct GodunovFlux{ 7 | // constructors, for compatibility with other fluxes. 8 | GodunovFlux(double *params=0){} 9 | GodunovFlux(GodunovFlux& G)= delete; 10 | GodunovFlux& operator=(GodunovFlux&& G){return *this;} 11 | double operator() (double a, double b) 12 | { 13 | return a<=b ? F::min(a,b):F::max(b,a); 14 | } 15 | }; 16 | -------------------------------------------------------------------------------- /Weno/C++-Modulo/LaxFriedrichsFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // 5 | // The Lax-Friedrichs flux. 6 | // 7 | // alpha: the Lax-Friedrichs parameter. 8 | template class LaxFriedrichsFlux{ 9 | double alpha; 10 | public: 11 | LaxFriedrichsFlux(){} 12 | LaxFriedrichsFlux(double *params):alpha(params[0]){} 13 | LaxFriedrichsFlux(LaxFriedrichsFlux& L)=delete; 14 | LaxFriedrichsFlux& operator=(LaxFriedrichsFlux&& L) 15 | { 16 | alpha=L.alpha; 17 | return *this; 18 | } 19 | double operator() (double a, double b) 20 | { 21 | return 0.5*(F::flux(a)+F::flux(b) - alpha*(b-a)); 22 | } 23 | }; 24 | -------------------------------------------------------------------------------- /Weno/C++-Modulo/README.md: -------------------------------------------------------------------------------- 1 | ### Choose your problem and your numerical flux 2 | 3 | You must comment/uncomment the following lines in main.cpp 4 | ``` 5 | typedef Burghers Problem; 6 | //typedef Convection Problem; 7 | ``` 8 | and 9 | ``` 10 | typedef GodunovFlux NumFlux; 11 | //typedef LaxFriedrichsFlux NumFlux; 12 | ``` 13 | 14 | ### Plotting: 15 | 16 | In main.cpp, uncomment the line 17 | ``` 18 | #define DO_GNUPLOT_FILES 19 | ``` 20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and 21 | a file _gpfile_. To plot the solution over time, you can use gnuplot: 22 | ``` 23 | >gnuplot 24 | load "gpfile" 25 | ``` 26 | But **CAVEAT**: **comment** the line for benchmarking! 27 | 28 | 29 | ### Compilation: 30 | ``` 31 | mkdir Build 32 | cd Build 33 | cmake .. 34 | make 35 | ``` 36 | a file "run" is created. 37 | To use an other compiler (eg. clang++) do: 38 | ``` 39 | CXX=clang++ cmake .. 40 | make 41 | ``` 42 | 43 | ### Run the code: 44 | 45 | from Build/ directory, type: 46 | ``` 47 | ./run 48 | ``` 49 | -------------------------------------------------------------------------------- /Weno/C++-Modulo/RK3TVD.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // 5 | // Explicit TVD Runge-Kutta method of order 3. 6 | // 7 | // L: the RHS (du/dt =L(u)). 8 | // 9 | template class RK3TVD 10 | { 11 | const int size; 12 | std::unique_ptr u1,u2; 13 | const double c21=3./4.,c22=1./4.,c31=1./3.,c32=2./3.; 14 | 15 | L LL; 16 | public: 17 | //! constructor. 18 | //\param size size of arrays. 19 | //\param Length length of the domain. 20 | RK3TVD(int _size, double Length,double params[]):size(_size) 21 | { 22 | u1=std::make_unique(size); 23 | u2=std::make_unique(size); 24 | LL=L(size,Length,params); 25 | } 26 | 27 | //! destructor 28 | ~RK3TVD() 29 | { 30 | } 31 | //! make one step. 32 | //! \param InOut intial value, overwritten.. 33 | //! \param dt time step. 34 | void step(std::unique_ptr& InOut,double dt) 35 | { 36 | LL(InOut,u1); 37 | for(int i=0;i 3 | //#include 4 | //#include 5 | struct Burghers 6 | { 7 | //! the flux. 8 | inline double flux(double x) 9 | { 10 | return 0.5*x*x; 11 | } 12 | //! min of flux in [a,b]. Used by Godunov flux. 13 | //! we do NOT test a<=b ! 14 | double min(double a, double b) 15 | { 16 | 17 | if(b<=0) 18 | return 0.5*b*b; 19 | else if (a>=0) 20 | return 0.5*a*a; 21 | else 22 | return 0.0; 23 | } 24 | //! max of flux in [a,b]. Used by Godunov flux. 25 | //! we do NOT test a<=b ! 26 | inline double max(double a, double b) 27 | { 28 | //return 0.5*fmax(a*a,b*b); 29 | return 0.5*std::max(a*a,b*b); 30 | } 31 | }; 32 | -------------------------------------------------------------------------------- /Weno/C++-Pointers/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(BenchmarksPythonJuliaAndCo) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # 6 | #- To use intel compiler 7 | # CXX=icpc cmake .. 8 | # - for clang++: 9 | # CXX=clang++ cmake .. 10 | # - otherwise, to use g++: 11 | # cmake .. 12 | # 13 | 14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 16 | 17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$") 18 | set(CMAKE_CXX_COMPILER "clang++") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wall -std=c++14 -march=native") 20 | else () 21 | #set(CMAKE_CXX_COMPILER "g++-8") 22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -g -std=c++17 -Wall -march=native") 23 | 24 | endif () 25 | 26 | add_executable( 27 | run 28 | ../main.cpp 29 | ) 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /Weno/C++-Pointers/Convection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | struct Convection 5 | { 6 | //! the flux. 7 | static double flux(double x) 8 | { 9 | return x; 10 | } 11 | //! min of flux in [a,b]. Used by Godunov flux. 12 | //! we do NOT test a<=b ! 13 | static double min(double a, double b) 14 | { 15 | 16 | if(b<=0) 17 | return flux(b); 18 | else if (a>=0) 19 | return flux(a); 20 | else 21 | return 0.0; 22 | } 23 | //! max of flux in [a,b]. Used by Godunov flux. 24 | //! we do NOT test a<=b ! 25 | static double max(double a, double b) 26 | { 27 | return std::max(flux(a),flux(b)); 28 | } 29 | }; 30 | -------------------------------------------------------------------------------- /Weno/C++-Pointers/GodunovFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // 5 | // The Godunov flux. 6 | // 7 | template struct GodunovFlux{ 8 | // constructors, for compatibility with other fluxes. 9 | F f; 10 | GodunovFlux(double *params=0){} 11 | GodunovFlux(GodunovFlux& G)= delete; 12 | GodunovFlux& operator=(GodunovFlux&& G){return *this;} 13 | inline double operator() (double a, double b) 14 | { 15 | return a<=b ? f.min(a,b):f.max(b,a); 16 | } 17 | ~GodunovFlux() 18 | { 19 | } 20 | }; 21 | -------------------------------------------------------------------------------- /Weno/C++-Pointers/LaxFriedrichsFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // 5 | // The Lax-Friedrichs flux. 6 | // 7 | // alpha: the Lax-Friedrichs parameter. 8 | template class LaxFriedrichsFlux{ 9 | double alpha; 10 | public: 11 | LaxFriedrichsFlux(){} 12 | LaxFriedrichsFlux(double *params):alpha(params[0]){} 13 | LaxFriedrichsFlux(LaxFriedrichsFlux& L)=delete; 14 | LaxFriedrichsFlux& operator=(LaxFriedrichsFlux&& L) 15 | { 16 | alpha=L.alpha; 17 | return *this; 18 | } 19 | double operator() (double a, double b) 20 | { 21 | return 0.5*(F::flux(a)+F::flux(b) - alpha*(b-a)); 22 | } 23 | }; 24 | -------------------------------------------------------------------------------- /Weno/C++-Pointers/README.md: -------------------------------------------------------------------------------- 1 | ### Choose your problem and your numerical flux 2 | 3 | You must comment/uncomment the following lines in main.cpp 4 | ``` 5 | typedef Burghers Problem; 6 | //typedef Convection Problem; 7 | ``` 8 | and 9 | ``` 10 | typedef GodunovFlux NumFlux; 11 | //typedef LaxFriedrichsFlux NumFlux; 12 | ``` 13 | 14 | ### Plotting: 15 | 16 | In main.cpp, uncomment the line 17 | ``` 18 | #define DO_GNUPLOT_FILES 19 | ``` 20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and 21 | a file _gpfile_. To plot the solution over time, you can use gnuplot: 22 | ``` 23 | >gnuplot 24 | load "gpfile" 25 | ``` 26 | But **CAVEAT**: **comment** the line for benchmarking! 27 | 28 | 29 | ### Compilation: 30 | ``` 31 | mkdir Build 32 | cd Build 33 | cmake .. 34 | make 35 | ``` 36 | a file "run" is created. 37 | To use an other compiler (eg. clang++) do: 38 | ``` 39 | CXX=clang++ cmake .. 40 | make 41 | ``` 42 | 43 | ### Run the code: 44 | 45 | from Build/ directory, type: 46 | ``` 47 | ./run 48 | ``` 49 | -------------------------------------------------------------------------------- /Weno/C++-Pointers/RK3TVD.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | // 4 | // Explicit TVD Runge-Kutta method of order 3. 5 | // 6 | // L: the RHS (du/dt =L(u)). 7 | // 8 | template class RK3TVD 9 | { 10 | const int size; 11 | double *u1,*u2; 12 | const double c21=3./4.,c22=1./4.,c31=1./3.,c32=2./3.; 13 | 14 | L LL; 15 | public: 16 | //! constructor. 17 | //\param size size of arrays. 18 | //\param Length length of the domain. 19 | RK3TVD(int _size, double Length,double params[]):size(_size) 20 | { 21 | u1=new double[size]; 22 | u2=new double[size]; 23 | LL=L(size,Length,params); 24 | } 25 | 26 | //! destructor 27 | ~RK3TVD() 28 | { 29 | delete[] u1; delete[] u2; 30 | } 31 | //! make one step. 32 | //! \param InOut intial value, overwritten.. 33 | //! \param dt time step. 34 | void step(double * __restrict__ InOut,double dt) 35 | { 36 | LL(InOut,u1); 37 | for(int i=0;i 3 | //#include 4 | //#include 5 | struct Burghers 6 | { 7 | //! the flux. 8 | static inline double flux(double x) 9 | { 10 | return 0.5*x*x; 11 | } 12 | //! min of flux in [a,b]. Used by Godunov flux. 13 | //! we do NOT test a<=b ! 14 | double min(double a, double b) 15 | { 16 | 17 | if(b<=0) 18 | return 0.5*b*b; 19 | else if (a>=0) 20 | return 0.5*a*a; 21 | else 22 | return 0.0; 23 | } 24 | //! max of flux in [a,b]. Used by Godunov flux. 25 | //! we do NOT test a<=b ! 26 | inline double max(double a, double b) 27 | { 28 | //return 0.5*fmax(a*a,b*b); 29 | return 0.5*std::max(a*a,b*b); 30 | } 31 | }; 32 | -------------------------------------------------------------------------------- /Weno/C++/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(BenchmarksPythonJuliaAndCo) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # 6 | #- To use intel compiler 7 | # CXX=icpc cmake .. 8 | # - for clang++: 9 | # CXX=clang++ cmake .. 10 | # - otherwise, to use g++: 11 | # cmake .. 12 | # 13 | 14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 16 | else () 17 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -g -std=c++17 -Wall -march=native -mtune=native" ) 18 | endif () 19 | 20 | add_executable( 21 | run 22 | ../main.cpp 23 | ) 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /Weno/C++/Convection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | struct Convection 5 | { 6 | //! the flux. 7 | static double flux(double x) 8 | { 9 | return x; 10 | } 11 | //! min of flux in [a,b]. Used by Godunov flux. 12 | //! we do NOT test a<=b ! 13 | static double min(double a, double b) 14 | { 15 | 16 | if(b<=0) 17 | return flux(b); 18 | else if (a>=0) 19 | return flux(a); 20 | else 21 | return 0.0; 22 | } 23 | //! max of flux in [a,b]. Used by Godunov flux. 24 | //! we do NOT test a<=b ! 25 | static double max(double a, double b) 26 | { 27 | return std::max(flux(a),flux(b)); 28 | } 29 | }; 30 | -------------------------------------------------------------------------------- /Weno/C++/GodunovFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // 5 | // The Godunov flux. 6 | // 7 | template struct GodunovFlux{ 8 | // constructors, for compatibility with other fluxes. 9 | F f; 10 | GodunovFlux(double *params=0){} 11 | GodunovFlux(GodunovFlux& G)= delete; 12 | GodunovFlux& operator=(GodunovFlux&& G){return *this;} 13 | inline double operator() (double a, double b) 14 | { 15 | return a<=b ? f.min(a,b):f.max(b,a); 16 | } 17 | ~GodunovFlux() 18 | { 19 | } 20 | }; 21 | -------------------------------------------------------------------------------- /Weno/C++/LaxFriedrichsFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // 5 | // The Lax-Friedrichs flux. 6 | // 7 | // alpha: the Lax-Friedrichs parameter. 8 | template class LaxFriedrichsFlux{ 9 | double alpha; 10 | public: 11 | LaxFriedrichsFlux(){} 12 | LaxFriedrichsFlux(double *params):alpha(params[0]){} 13 | LaxFriedrichsFlux(LaxFriedrichsFlux& L)=delete; 14 | LaxFriedrichsFlux& operator=(LaxFriedrichsFlux&& L) 15 | { 16 | alpha=L.alpha; 17 | return *this; 18 | } 19 | double operator() (double a, double b) 20 | { 21 | return 0.5*(F::flux(a)+F::flux(b) - alpha*(b-a)); 22 | } 23 | }; 24 | -------------------------------------------------------------------------------- /Weno/C++/README.md: -------------------------------------------------------------------------------- 1 | ### Choose your problem and your numerical flux 2 | 3 | You must comment/uncomment the following lines in main.cpp 4 | ``` 5 | typedef Burghers Problem; 6 | //typedef Convection Problem; 7 | ``` 8 | and 9 | ``` 10 | typedef GodunovFlux NumFlux; 11 | //typedef LaxFriedrichsFlux NumFlux; 12 | ``` 13 | 14 | ### Plotting: 15 | 16 | In main.cpp, uncomment the line 17 | ``` 18 | #define DO_GNUPLOT_FILES 19 | ``` 20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and 21 | a file _gpfile_. To plot the solution over time, you can use gnuplot: 22 | ``` 23 | >gnuplot 24 | load "gpfile" 25 | ``` 26 | But **CAVEAT**: **comment** the line for benchmarking! 27 | 28 | 29 | ### Compilation: 30 | ``` 31 | mkdir Build 32 | cd Build 33 | cmake .. 34 | make 35 | ``` 36 | a file "run" is created. 37 | To use an other compiler (eg. clang++) do: 38 | ``` 39 | CXX=clang++ cmake .. 40 | make 41 | ``` 42 | 43 | ### Run the code: 44 | 45 | from Build/ directory, type: 46 | ``` 47 | ./run 48 | ``` 49 | -------------------------------------------------------------------------------- /Weno/C++/RK3TVD.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | // 5 | // Explicit TVD Runge-Kutta method of order 3. 6 | // 7 | // L: the RHS (du/dt =L(u)). 8 | // 9 | template class RK3TVD 10 | { 11 | const int size; 12 | std::unique_ptr u1,u2; 13 | const double c21=3./4.,c22=1./4.,c31=1./3.,c32=2./3.; 14 | 15 | L LL; 16 | public: 17 | //! constructor. 18 | //\param size size of arrays. 19 | //\param Length length of the domain. 20 | RK3TVD(int _size, double Length,double params[]):size(_size) 21 | { 22 | u1=std::make_unique(size); 23 | u2=std::make_unique(size); 24 | LL=L(size,Length,params); 25 | } 26 | 27 | //! destructor 28 | ~RK3TVD() 29 | { 30 | } 31 | //! make one step. 32 | //! \param InOut intial value, overwritten.. 33 | //! \param dt time step. 34 | void step(std::unique_ptr& InOut,double dt) 35 | { 36 | LL(InOut,u1); 37 | for(int i=0;i 4 | 5 | template 6 | struct Burghers 7 | { 8 | using Real = TReal; 9 | 10 | //! the flux. 11 | static inline 12 | Real flux(Real x) 13 | { 14 | return 0.5 * x * x; 15 | } 16 | 17 | //! min of flux in [a,b]. Used by Godunov flux. 18 | //! we do NOT test a<=b ! 19 | static inline 20 | Real min(Real a, Real b) 21 | { 22 | if (b <= 0) 23 | return flux(b); 24 | else if (a >= 0) 25 | return flux(a); 26 | else 27 | return Real(0); 28 | } 29 | 30 | //! max of flux in [a,b]. Used by Godunov flux. 31 | //! we do NOT test a<=b ! 32 | static inline 33 | Real max(Real a, Real b) 34 | { 35 | return std::max(flux(a), flux(b)); 36 | } 37 | }; 38 | -------------------------------------------------------------------------------- /Weno/C++NoCopy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(BenchmarksPythonJuliaAndCo) 3 | enable_language(CXX) 4 | # Go to Build directory. Then: 5 | # 6 | #- To use intel compiler 7 | # CXX=icpc cmake .. 8 | # - for clang++: 9 | # CXX=clang++ cmake .. 10 | # - otherwise, to use g++: 11 | # cmake .. 12 | # 13 | 14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$") 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DICC -DALIGN_64 -restrict -O3 -g -xavx -ipo -fargument-noalias -ansi-alias -Wall -vec-report3 -std=c++0x") 16 | 17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$") 18 | set(CMAKE_CXX_COMPILER "clang++") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -Wall -std=c++14 -march=native") 20 | else () 21 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -Wall -std=c++14 -march=native") 22 | 23 | endif () 24 | 25 | add_executable( 26 | run 27 | ../main.cpp 28 | ) 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /Weno/C++NoCopy/Convection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | template 6 | struct Convection 7 | { 8 | using Real = TReal; 9 | 10 | //! the flux. 11 | static inline 12 | Real flux(Real x) 13 | { 14 | return x; 15 | } 16 | 17 | //! min of flux in [a,b]. Used by Godunov flux. 18 | //! we do NOT test a<=b ! 19 | static inline 20 | Real min(Real a, Real b) 21 | { 22 | if (b <= 0) 23 | return flux(b); 24 | else if (a >= 0) 25 | return flux(a); 26 | else 27 | return Real(0); 28 | } 29 | 30 | //! max of flux in [a,b]. Used by Godunov flux. 31 | //! we do NOT test a<=b ! 32 | static inline 33 | Real max(Real a, Real b) 34 | { 35 | return std::max(flux(a), flux(b)); 36 | } 37 | }; 38 | -------------------------------------------------------------------------------- /Weno/C++NoCopy/GodunovFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | /** 7 | * The Godunov flux. 8 | * 9 | * @tparam TProblem Type of the problem 10 | */ 11 | template 12 | class GodunovFlux 13 | { 14 | public: 15 | using Problem = typename std::decay::type; 16 | using Real = typename Problem::Real; 17 | 18 | private: 19 | TProblem f; 20 | 21 | public: 22 | 23 | /** 24 | * @param f The problem. 25 | */ 26 | GodunovFlux(TProblem f) 27 | : f(std::forward(f)) 28 | {} 29 | 30 | inline Real operator() (Real a, Real b) const 31 | { 32 | return a <= b ? f.min(a, b) : f.max(b, a); 33 | } 34 | }; 35 | 36 | template 37 | GodunovFlux makeGodunovFlux(TProblem && problem) 38 | { 39 | return {std::forward(problem)}; 40 | } 41 | -------------------------------------------------------------------------------- /Weno/C++NoCopy/LaxFriedrichsFlux.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | /** 7 | * The Lax-Friedrichs flux. 8 | * 9 | * @tparam TProblem Type of the problem 10 | */ 11 | template 12 | class LaxFriedrichsFlux 13 | { 14 | public: 15 | using Problem = typename std::decay::type; 16 | using Real = typename Problem::Real; 17 | 18 | private: 19 | TProblem f; 20 | Real alpha; 21 | 22 | public: 23 | /** 24 | * @param f The problem 25 | * @param alpha The Lax-Friedrichs parameter. 26 | */ 27 | LaxFriedrichsFlux(TProblem f, Real alpha = 1.) 28 | : f(std::forward(f)), alpha(alpha) 29 | {} 30 | 31 | double operator() (double a, double b) const 32 | { 33 | return 0.5 * (f.flux(a) + f.flux(b) - alpha * (b - a)); 34 | } 35 | }; 36 | 37 | template 38 | auto makeLaxFriedrichsFlux(TProblem && problem, TReal const& alpha) 39 | { 40 | return LaxFriedrichsFlux(std::forward(problem), alpha); 41 | } 42 | -------------------------------------------------------------------------------- /Weno/C++NoCopy/README.md: -------------------------------------------------------------------------------- 1 | ### Choose your problem and your numerical flux 2 | 3 | You must comment/uncomment the following lines in main.cpp 4 | ``` 5 | auto const problem = Burghers{}; 6 | //auto const problem = Convection{}; 7 | ``` 8 | and 9 | ``` 10 | auto const num_flux = makeGodunovFlux(problem); 11 | //auto const num_flux = makeLaxFriedrichsFlux(problem, 1.); 12 | ``` 13 | 14 | ### Plotting: 15 | 16 | In main.cpp, uncomment the line 17 | ``` 18 | #define DO_GNUPLOT_FILES 19 | ``` 20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and 21 | a file _gpfile_. To plot the solution over time, you can use gnuplot: 22 | ``` 23 | >gnuplot 24 | load "gpfile" 25 | ``` 26 | But **CAVEAT**: **comment** the line for benchmarking! 27 | 28 | 29 | ### Compilation: 30 | ``` 31 | mkdir Build 32 | cd Build 33 | cmake .. 34 | make 35 | ``` 36 | a file "run" is created. 37 | To use an other compiler (eg. clang++) do: 38 | ``` 39 | CXX=clang++ cmake .. 40 | make 41 | ``` 42 | 43 | ### Run the code: 44 | 45 | from Build/ directory, type: 46 | ``` 47 | ./run 48 | ``` 49 | -------------------------------------------------------------------------------- /Weno/C++NoCopy/RK3TVD.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | /** 9 | * Explicit TVD Runge-Kutta method of order 3. 10 | */ 11 | // L: the RHS (du/dt =L(u)). 12 | // 13 | template 14 | class RK3TVD 15 | { 16 | public: 17 | using RHS = typename std::decay::type; 18 | using Real = typename RHS::Real; 19 | 20 | private: 21 | TRHS rhs; 22 | mutable std::vector u1, u2; 23 | 24 | public: 25 | RK3TVD(TRHS rhs) 26 | : rhs(std::forward(rhs)) 27 | { 28 | //std::cout << rhs.length << std::endl; 29 | //std::cout << this->rhs.length << std::endl; 30 | } 31 | 32 | /** Make one step. 33 | * @param[in,out] data initial value, overwritten. 34 | * @param dt time step. 35 | */ 36 | template 37 | void step(TData & data, Real dt) const 38 | { 39 | std::size_t const size = data.size(); 40 | u1.resize(size); 41 | u2.resize(size); 42 | 43 | // First step 44 | rhs(data, u1); 45 | for (std::size_t i = 0; i < size; ++i) 46 | u1[i] = data[i] + dt * u1[i]; 47 | 48 | // Second step 49 | rhs(u1, u2); 50 | for (std::size_t i = 0; i < size; ++i) 51 | u2[i] = Real(3)/Real(4) * data[i] + Real(1)/Real(4) * (u1[i] + dt*u2[i]); 52 | 53 | // Third step 54 | rhs(u2, u1); // reuse u1. 55 | for (std::size_t i = 0; i < size; ++i) 56 | data[i] = Real(1)/Real(3) * data[i] + Real(2)/Real(3) * (u2[i] + dt*u1[i]); 57 | } 58 | }; 59 | 60 | template 61 | auto makeRK3TVD(TRHS && rhs) 62 | { 63 | return RK3TVD(std::forward(rhs)); 64 | } 65 | -------------------------------------------------------------------------------- /Weno/Fortran/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(BenchmarksPythonJuliaAndCo) 3 | enable_language(Fortran) 4 | # 5 | #set(CMAKE_fortran_COMPILER "gfortran-8") 6 | 7 | set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Wall -g -O3 -Wall -march=native") 8 | 9 | 10 | add_executable( 11 | run 12 | ../main.F90 13 | ../m_burghers.F90 ../m_godunov.F90 ../m_RK3TVDData.F90 ../m_weno.F90 14 | ) 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Weno/Fortran/m_RK3TVDData.F90: -------------------------------------------------------------------------------- 1 | module m_RK3TVDData 2 | 3 | use m_burghers 4 | use m_godunov 5 | use m_weno 6 | 7 | implicit none 8 | 9 | private 10 | 11 | public :: initRK3TVDData 12 | public :: Rk3tvd 13 | 14 | type, public :: RK3TVDData 15 | 16 | integer :: size 17 | 18 | real(kind = 8) :: c21, c22, c31, c32 19 | real(kind = 8), dimension(:), allocatable :: u1, u2 20 | 21 | end type RK3TVDData 22 | 23 | 24 | contains 25 | 26 | subroutine initRK3TVDData(rdata, len) 27 | 28 | type(RK3TVDData), intent(inout) :: rdata 29 | integer :: len 30 | 31 | rdata%size = len 32 | rdata%c21 = 3/4. 33 | rdata%c22 = 1/4. 34 | rdata%c31 = 1/3. 35 | rdata%c32 = 2/3. 36 | 37 | allocate(rdata%u1(len)) 38 | allocate(rdata%u2(len)) 39 | rdata%u1 = 0. 40 | rdata%u2 = 0. 41 | 42 | end subroutine initRK3TVDData 43 | 44 | 45 | subroutine Rk3tvd(R, W, L, dt, In, Out) 46 | 47 | type(RK3TVDData), intent(inout) :: R 48 | type(WenoData), intent(inout) :: W 49 | real(kind = 8), intent(in) :: dt, L 50 | real(kind = 8), dimension(:), intent(in) :: In 51 | real(kind = 8), dimension(:), intent(inout) :: Out 52 | 53 | integer :: size 54 | 55 | size = R%size 56 | call weno(W, L, In, R%u1) 57 | R%u1 = In + dt * R%u1 58 | 59 | call weno(W, L, R%u1, R%u2) 60 | R%u2 = R%c21 * In + R%c22 * (R%u1 + dt * R%u2) 61 | 62 | call weno(W, L, R%u2, R%u1) 63 | Out = R%c31 * In + R%c32 * (R%u2 + dt * R%u1) 64 | 65 | end subroutine Rk3tvd 66 | 67 | end module m_RK3TVDData 68 | -------------------------------------------------------------------------------- /Weno/Fortran/m_burghers.F90: -------------------------------------------------------------------------------- 1 | module m_burghers 2 | 3 | implicit none 4 | 5 | contains 6 | 7 | function minf(a, b) result(m) 8 | 9 | real(kind = 8) :: a, b, m 10 | 11 | if(b <= 0.) then 12 | m = 0.5 * b**2 13 | else if(a >= 0.) then 14 | m = 0.5 * a**2 15 | else 16 | m = 0. 17 | end if 18 | 19 | end function minf 20 | 21 | 22 | function maxf(a, b) result(M) 23 | 24 | real(kind = 8) :: a, b, M 25 | M = 0.5 * max(a**2, b**2) 26 | 27 | end function maxf 28 | 29 | end module m_burghers 30 | -------------------------------------------------------------------------------- /Weno/Fortran/m_godunov.F90: -------------------------------------------------------------------------------- 1 | module m_godunov 2 | 3 | use m_burghers 4 | 5 | implicit none 6 | 7 | contains 8 | 9 | function numFlux(a, b) result(flux) 10 | 11 | real(kind = 8) :: a, b 12 | real(kind = 8) :: flux 13 | 14 | if(a <= b) then 15 | flux = minf(a, b) 16 | else 17 | flux = maxf(b, a) 18 | end if 19 | 20 | end function numFlux 21 | 22 | end module m_godunov 23 | -------------------------------------------------------------------------------- /Weno/Fortran/main.F90: -------------------------------------------------------------------------------- 1 | program main 2 | 3 | use m_weno 4 | use m_RK3TVDData 5 | 6 | implicit none 7 | 8 | integer, parameter :: size = 1000 9 | real(kind = 8), parameter :: L = 1. 10 | real(kind = 8), parameter :: T = 1. 11 | real(kind = 8), parameter :: dt = 0.8 / size 12 | 13 | real(kind = 8) :: tt, start, finish 14 | 15 | real(kind = 8), dimension(size) :: In, Out, Temp 16 | 17 | character(len=64) :: hostname 18 | 19 | type(WenoData) :: W 20 | type(RK3TVDData) :: R 21 | 22 | print *, "size=",size, "dt=",dt, "nsteps=",floor(T/dt) 23 | 24 | call init(In, L, size) 25 | call initWenoData(W, size) 26 | call initRK3TVDData(R, size) 27 | 28 | open(unit=12, file="gp0", action="write", status="replace") 29 | write(12, "(1f16.12)") In 30 | close(12) 31 | 32 | print *, "Start computation" 33 | 34 | tt = 0. 35 | call cpu_time(start) 36 | do while(tt < T) 37 | call Rk3tvd(R, W, L, dt, In, Out) 38 | Temp = In 39 | In = Out 40 | Out = Temp 41 | 42 | tt = tt + dt 43 | end do 44 | call cpu_time(finish) 45 | 46 | print *, "Time : ", finish - start 47 | 48 | open(unit=12, file="gp", action="write", status="replace") 49 | write(12, "(1f16.12)") In 50 | close(12) 51 | 52 | call hostnm(hostname) 53 | open(unit=12, file="../RunningOn" // trim(hostname), action="write", status="replace") 54 | write(12, *) "Burghers Godunov" 55 | write(12, *) finish - start 56 | close(12) 57 | 58 | end program main 59 | 60 | 61 | subroutine init(X, L, len) 62 | 63 | implicit none 64 | 65 | integer, intent(in) :: len 66 | real(kind = 8), intent(in) :: L 67 | real(kind = 8), dimension(len), intent(inout) :: X 68 | 69 | real(kind = 8) :: h 70 | integer :: i 71 | 72 | h = L / len 73 | 74 | do i = 0, len-1 75 | if(i > floor(len / 8.) .and. i < floor(len / 2.) + floor(len / 8.)) then 76 | X(i+1) = 1. - 2. * (i - floor(len / 8.)) * h / L 77 | else 78 | X(i+1) = 0. 79 | end if 80 | end do 81 | 82 | end subroutine init 83 | -------------------------------------------------------------------------------- /Weno/Ju/Burghers.jl: -------------------------------------------------------------------------------- 1 | module Burghers 2 | export flux,minf,maxf 3 | function flux(x::Float64) 4 | 0.5*x^2 5 | end 6 | @inline function minf(a::Float64,b::Float64) 7 | if b<=0.0 8 | return 0.5*b^2 9 | #return flux(b) 10 | elseif a>=0.0 11 | return 0.5*a^2 12 | #return flux(a) 13 | else 14 | return 0.0 15 | end 16 | end 17 | @inline function maxf(a::Float64,b::Float64) 18 | #max(flux(a),flux(b)) 19 | 0.5*max(a^2,b^2) 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /Weno/Ju/Convection.jl: -------------------------------------------------------------------------------- 1 | module Convection 2 | export flux,minf,maxf 3 | function flux(x::Float64) 4 | x 5 | end 6 | function minf(a::Float64,b::Float64) 7 | if b<=0.0 8 | return flux(b) 9 | elseif a>=0.0 10 | return flux(a) 11 | else 12 | return 0.0 13 | end 14 | end 15 | function maxf(a::Float64,b::Float64) 16 | max(flux(a),flux(b)) 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /Weno/Ju/Godunov.jl: -------------------------------------------------------------------------------- 1 | module Godunov 2 | export NumFlux 3 | # Godunov flux: 4 | function NumFlux(F,a::Float64,b::Float64) 5 | a<=b ? F.minf(a,b): F.maxf(b,a) 6 | #ifelse(a<=b,F.minf(a,b),F.maxf(b,a))# same computing time 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /Weno/Ju/LaxFriedrichs.jl: -------------------------------------------------------------------------------- 1 | module LaxFriedrichs 2 | export NumFlux 3 | # Lax Friedrichs flux: 4 | function NumFlux(F,a::Float64,b::Float64,alpha::Float64) 5 | 0.5*(F.flux(a)+F.flux(b) - alpha*(b-a)) 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /Weno/Ju/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | ./script 4 | ``` 5 | If you type: 6 | ``` 7 | ./script-m 8 | ``` 9 | You will be able to see how Julia manages memory, but you will 10 | slowdown the code! 11 | -------------------------------------------------------------------------------- /Weno/Ju/RK3TVD.jl: -------------------------------------------------------------------------------- 1 | module RK3TVD 2 | export Rk3tvd!,RK3TVDData 3 | # RK explicit method, TVD. 4 | struct RK3TVDData 5 | size::Int64 6 | c21::Float64 7 | c22::Float64 8 | c31::Float64 9 | c32::Float64 10 | u1::Array{Float64,1} 11 | u2::Array{Float64,1} 12 | RK3TVDData(Size)=new(Size,3.0/4.0,1.0/4,1.0/3.0,2.0/3.0,zeros(Size),zeros(Size)) 13 | end 14 | function Rk3tvd!(R::RK3TVDData,W,dt,In::Array{Float64},Out::Array{Float64}) 15 | size=R.size 16 | 17 | W(In,R.u1) 18 | #R.u1=In + dt*R.u1 19 | @simd for i=1:size 20 | R.u1[i]=In[i] + dt*R.u1[i] 21 | end 22 | 23 | W(R.u1,R.u2) 24 | #R.u2= R.c21*In+R.c22*(R.u1+dt*R.u2) 25 | @simd for i=1:size 26 | R.u2[i]= R.c21*In[i]+R.c22*(R.u1[i]+dt*R.u2[i]) 27 | end 28 | 29 | W(R.u2,R.u1) 30 | @simd for i=1:size 31 | Out[i]=R.c31*In[i]+R.c32*(R.u2[i]+dt*R.u1[i]) 32 | end 33 | #Out= @. R.c31*In+R.c32*(R.u2+dt*R.u1) 34 | nothing 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /Weno/Ju/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 main.jl 3 | -------------------------------------------------------------------------------- /Weno/Ju/script-m: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | julia --check-bounds=no -O3 --track-allocation=user main.jl 3 | -------------------------------------------------------------------------------- /Weno/Numba/Burghers.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | @jit(nopython=True) 3 | def flux(x): 4 | return 0.5*x*x 5 | 6 | @jit(nopython=True) 7 | def minf(a,b): 8 | if b<=0: 9 | return 0.5*b*b 10 | elif a>=0: 11 | return 0.5*a*a 12 | else: 13 | return 0.0 14 | 15 | @jit(nopython=True) 16 | def maxf(a,b): 17 | return 0.5*max(a*a,b*b) 18 | -------------------------------------------------------------------------------- /Weno/Numba/Convection.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | @jit(nopython=True) 3 | def flux(x): 4 | return x 5 | @jit(nopython=True) 6 | def minf(a,b): 7 | if b<=0: 8 | return b 9 | elif a>=0: 10 | return a 11 | else: 12 | return 0.0 13 | @jit(nopython=True) 14 | def maxf(a,b): 15 | return max(a,b) 16 | -------------------------------------------------------------------------------- /Weno/Numba/GodunovFlux.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | @jit(nopython=True) 3 | def NumFlux(Fmin,Fmax,a,b): 4 | if a<=b: 5 | return Fmin(a,b) 6 | else: 7 | return Fmax(b,a); 8 | -------------------------------------------------------------------------------- /Weno/Numba/LaxFriedrichs.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | @jit(nopython=True) 3 | def NumFlux(F,a,b,alpha): 4 | return 0.5*(F(a)+F(b) - alpha*(b-a)) 5 | -------------------------------------------------------------------------------- /Weno/Numba/Numfluxes.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | import GodunovFlux as Godunov 3 | import LaxFriedrichs as LF 4 | import Burghers as Burg 5 | import Convection 6 | 7 | @jit(nopython=True) 8 | def GodunovBurghers(X,Y): 9 | return Godunov.NumFlux(Burg.minf,Burg.maxf,X,Y) 10 | @jit(nopython=True) 11 | def GodunovConvection(X,Y): 12 | return Godunov.NumFlux(Convection.minf,Convection.maxf,X,Y) 13 | @jit(nopython=True) 14 | def LaxFriedrichsBurghers(X,Y): 15 | return LF.NumFlux(Burg.flux,X,Y,1.0) 16 | @jit(nopython=True) 17 | def LaxFriedrichsConvection(X,Y): 18 | return LF.NumFlux(Convection.flux,X,Y,1.0) 19 | -------------------------------------------------------------------------------- /Weno/Numba/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | python3 main.py 4 | ``` 5 | -------------------------------------------------------------------------------- /Weno/Numba/RK3TVD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Weno import Weno 3 | from numba import jitclass 4 | from numba import int32, float64,deferred_type 5 | 6 | #------------------------------------------------------ 7 | Weno_type = deferred_type() 8 | Weno_type.define(Weno.class_type.instance_type) 9 | spec = [ 10 | ('c21', float64), ('c22', float64), 11 | ('c31', float64), ('c32', float64), 12 | ('size',int32), 13 | ('u1', float64[:]), 14 | ('u2', float64[:]), 15 | ("W", Weno_type) 16 | ] 17 | #------------------------------------------------------- 18 | @jitclass(spec) 19 | class RK3TVD: 20 | def __init__(self,size,L): 21 | self.c21=3./4. 22 | self.c22=1./4. 23 | self.c31=1./3. 24 | self.c32=2./3. 25 | self.size=size 26 | self.u1=np.empty(self.size) 27 | self.u2=np.empty(self.size) 28 | self.W=Weno(size,L) 29 | 30 | def op(self,Meth,InOut,dt): 31 | 32 | self.W.weno(Meth,InOut,self.u1) 33 | self.u1=InOut + dt*self.u1 34 | 35 | self.W.weno(Meth,self.u1,self.u2) 36 | self.u2= self.c21*InOut+self.c22*(self.u1+dt*self.u2) 37 | 38 | self.W.weno(Meth,self.u2,self.u1) 39 | return self.c31*InOut+self.c32*(self.u2+dt*self.u1) 40 | -------------------------------------------------------------------------------- /Weno/Numba/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Weno import * 3 | import Numfluxes as NF 4 | from RK3TVD import * 5 | import numba as nb 6 | import time 7 | import socket 8 | 9 | size=1000 10 | L=1.0 11 | dt=0.8/size 12 | T=1. 13 | def init(X): 14 | h=L/size 15 | for i in range(0,size): 16 | if i>size//8 and i=0: 7 | return flux(a) 8 | else: 9 | return 0.0 10 | def maxf(a,b): 11 | return max(flux(a),flux(b)) 12 | -------------------------------------------------------------------------------- /Weno/Py/Convection.py: -------------------------------------------------------------------------------- 1 | def flux(x): 2 | return x 3 | def minf(a,b): 4 | if b<=0: 5 | return flux(b) 6 | elif a>=0: 7 | return flux(a) 8 | else: 9 | return 0.0 10 | def maxf(a,b): 11 | return max(flux(a),flux(b)) 12 | -------------------------------------------------------------------------------- /Weno/Py/GodunovFlux.py: -------------------------------------------------------------------------------- 1 | def NumFlux(F,a,b): 2 | if a<=b: 3 | return F.minf(a,b) 4 | else: 5 | return F.maxf(b,a); 6 | -------------------------------------------------------------------------------- /Weno/Py/LaxFriedrichs.py: -------------------------------------------------------------------------------- 1 | def NumFlux(F,a,b,alpha=1.): 2 | return 0.5*(F.flux(a)+F.flux(b) - alpha*(b-a)) 3 | -------------------------------------------------------------------------------- /Weno/Py/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | python3 main.py 4 | ``` 5 | -------------------------------------------------------------------------------- /Weno/Py/RK3TVD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | class RK3TVD: 3 | def __init__(self,size): 4 | self.c21=3./4. 5 | self.c22=1./4. 6 | self.c31=1./3. 7 | self.c32=2./3. 8 | self.size=size 9 | self.u1=np.empty(self.size) 10 | self.u2=np.empty(self.size) 11 | def op(self,Meth,InOut,dt): 12 | Meth(InOut,self.u1) 13 | self.u1=InOut + dt*self.u1 14 | 15 | Meth(self.u1,self.u2) 16 | self.u2= self.c21*InOut+self.c22*(self.u1+dt*self.u2) 17 | 18 | Meth(self.u2,self.u1) 19 | return self.c31*InOut+self.c32*(self.u2+dt*self.u1) 20 | 21 | -------------------------------------------------------------------------------- /Weno/Py/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Weno import * 3 | import cProfile 4 | import time 5 | import socket 6 | import GodunovFlux as Godunov 7 | import LaxFriedrichs 8 | import Burghers as Burg 9 | import Convection 10 | from RK3TVD import * 11 | import time 12 | 13 | size=1000 14 | L=1.0 15 | dt=0.8/size 16 | T=1. 17 | def init(X): 18 | h=L/size 19 | for i in range(0,size): 20 | if i>size//8 and i:34() 3 | 1 0.001 0.001 11.203 11.203 :1() 4 | 463768 0.072 0.000 0.072 0.000 Burghers.py:1(flux) 5 | 63968 0.028 0.000 0.059 0.000 Burghers.py:10(maxf) 6 | 335832 0.132 0.000 0.185 0.000 Burghers.py:3(minf) 7 | 399800 0.103 0.000 0.347 0.000 GodunovFlux.py:1(NumFlux) 8 | 1999 7.921 0.004 11.199 0.006 Weno.py:25(weno) 9 | 399800 0.087 0.000 0.435 0.000 Weno.py:26() 10 | 799600 0.125 0.000 1.095 0.000 _methods.py:31(_sum) 11 | 1 0.000 0.000 11.203 11.203 {built-in method builtins.exec} 12 | 63968 0.012 0.000 0.012 0.000 {built-in method builtins.max} 13 | 4797600 1.129 0.000 1.129 0.000 {built-in method builtins.pow} 14 | 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} 15 | 799600 0.411 0.000 0.411 0.000 {method 'dot' of 'numpy.ndarray' objects} 16 | 799600 0.970 0.000 0.970 0.000 {method 'reduce' of 'numpy.ufunc' objects} 17 | 799600 0.208 0.000 1.303 0.000 {method 'sum' of 'numpy.ndarray' objects} 18 | -------------------------------------------------------------------------------- /Weno/PyVec/Burghers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def flux(x): 4 | return 0.5 * np.square(x) 5 | 6 | def minf(a,b): 7 | # if b<=0: 8 | # return flux(b) 9 | # elif a>=0: 10 | # return flux(a) 11 | # else: 12 | # return 0.0 13 | return (b <= 0) * flux(b) + (a >= 0) * flux(a) 14 | 15 | def maxf(a,b): 16 | return np.maximum(flux(a),flux(b)) 17 | -------------------------------------------------------------------------------- /Weno/PyVec/Convection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def flux(x): 4 | return x 5 | 6 | def minf(a,b): 7 | # if b<=0: 8 | # return flux(b) 9 | # elif a>=0: 10 | # return flux(a) 11 | # else: 12 | # return 0.0 13 | return (b <= 0) * flux(b) + (a >= 0) * flux(a) 14 | 15 | def maxf(a,b): 16 | return np.maximum(flux(a), flux(b)) 17 | -------------------------------------------------------------------------------- /Weno/PyVec/GodunovFlux.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def NumFlux(F,a,b): 4 | #if a<=b: 5 | # return F.minf(a,b) 6 | #else: 7 | # return F.maxf(b,a); 8 | return np.where(a <= b, F.minf(a, b), F.maxf(b, a)) 9 | 10 | -------------------------------------------------------------------------------- /Weno/PyVec/LaxFriedrichs.py: -------------------------------------------------------------------------------- 1 | def NumFlux(F, a, b, alpha=1.): 2 | return 0.5 * (F.flux(a) + F.flux(b) - alpha*(b-a)) 3 | -------------------------------------------------------------------------------- /Weno/PyVec/README.md: -------------------------------------------------------------------------------- 1 | To run the code, just type: 2 | ``` 3 | python3 main.py 4 | ``` 5 | -------------------------------------------------------------------------------- /Weno/PyVec/RK3TVD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | class RK3TVD: 3 | def __init__(self,size): 4 | self.c21=3./4. 5 | self.c22=1./4. 6 | self.c31=1./3. 7 | self.c32=2./3. 8 | self.size=size 9 | self.u1=np.empty(self.size) 10 | self.u2=np.empty(self.size) 11 | def op(self,Meth,InOut,dt): 12 | Meth(InOut,self.u1) 13 | self.u1=InOut + dt*self.u1 14 | 15 | Meth(self.u1,self.u2) 16 | self.u2= self.c21*InOut+self.c22*(self.u1+dt*self.u2) 17 | 18 | Meth(self.u2,self.u1) 19 | return self.c31*InOut+self.c32*(self.u2+dt*self.u1) 20 | 21 | -------------------------------------------------------------------------------- /Weno/PyVec/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Weno import * 3 | import cProfile 4 | import time 5 | import socket 6 | import GodunovFlux as Godunov 7 | import LaxFriedrichs 8 | import Burghers as Burg 9 | import Convection 10 | from RK3TVD import * 11 | import time 12 | 13 | size=1000 14 | L=1.0 15 | dt=0.8/size 16 | T=1. 17 | def init(X): 18 | h=L/size 19 | for i in range(0,size): 20 | if i>size//8 and i computing time for C++ 26 | C={} 27 | with open("../"+cpp+"/RunningOn"+socket.gethostname(), 'r') as file: 28 | spRef= file.readline().split() 29 | s= file.readline() 30 | C[cpp]=float(s.replace("\n","")) 31 | doNotUse=[] 32 | for n in files: 33 | filename= "../"+n+"/RunningOn"+socket.gethostname() 34 | p_file = Path(filename) 35 | if p_file.is_file(): 36 | with open(filename,"r") as file: 37 | sp= file.readline().split() 38 | if sp[0]!=spRef[0] or sp[1]!=spRef[1]: 39 | print() 40 | print(n,": You did not perform the same compuation as C++") 41 | print("C++: ",spRef[0],spRef[1]) 42 | print(n,": ",sp[0],sp[1]) 43 | print("We do not use",n,"in the final comparison.") 44 | doNotUse.append(n) 45 | else: 46 | s=file.readline() 47 | C[n]=float(s.replace("\n","")) 48 | 49 | else: 50 | print("\n\nFile "+filename+ " does not exists !") 51 | print("did you run test in "+n+" ?\n\n") 52 | 53 | Ts=sorted([(n,C[n]/C[cpp]) for n in C],key=lambda x: x[1]) 54 | 55 | print("\nComputing time / Computing time in C++:\n") 56 | for s in Ts: 57 | if s[0]!="C++" and not s[0] in doNotUse: 58 | print("* ",s[0].ljust(12)," : ",str(s[1])[0:5]) 59 | print("\n") 60 | -------------------------------------------------------------------------------- /Weno/Results/README.md: -------------------------------------------------------------------------------- 1 | Just run: 2 | 3 | ``` 4 | ./Look.py 5 | ``` 6 | This will give you the computing time divided by C++ computing time. 7 | 8 | Before, you must have run the benchmark in all directories 9 | (C++,Ju,Py and Numba, Fortran). 10 | 11 | If you do not run all the benchmarks (or if you add one), juste modify the list 12 | "directories to explore" in Look.py -------------------------------------------------------------------------------- /Weno/runAllTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script is supposed to run *all* the test, and then compute 4 | # the final "report" in Results/ 5 | # Not sure it works everywhere. If it does not, improve it, or enter each 6 | # directory and look at README.md to know what to do. 7 | # 8 | for i in C++ C++NoCopy C++-Modulo C++-Pointers Fortran ; do 9 | echo "Test: "$i 10 | echo "--- " 11 | (cd $i; mkdir -p Build; cd Build; cmake ..; make; ./run) 12 | done 13 | for i in Ju Numba ; do 14 | echo "Test: "$i 15 | echo "--- " 16 | (cd $i; ./script) 17 | done 18 | for i in Py PyVec ; do 19 | echo "Test: "$i 20 | echo "--- " 21 | (cd $i; python3 ./main.py) 22 | done 23 | 24 | echo " " 25 | echo "Make the report:" 26 | (cd Results; ./Look.py) 27 | echo " " 28 | echo "To replay the report, cd Results/ and run ./Look.py " 29 | echo " " 30 | 31 | --------------------------------------------------------------------------------