├── .gitignore
├── CallBack
    ├── C++-Fonctor
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   └── main.cpp
    ├── C++-Pointer
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   └── main.cpp
    ├── C++-lambda
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   └── main.cpp
    ├── Ju
    │   ├── README.md
    │   ├── main.jl
    │   ├── script
    │   └── script-m
    ├── Numba
    │   ├── README.md
    │   ├── main.py
    │   └── script
    ├── Py
    │   ├── README.md
    │   └── main.py
    ├── Pythran
    │   ├── README.md
    │   ├── f.py
    │   ├── g.py
    │   ├── implicit.py
    │   ├── main.py
    │   ├── script
    │   └── trapz.py
    ├── README.md
    ├── Results
    │   ├── Look.py
    │   └── README.md
    └── runAllTests.sh
├── FeStiff
    ├── C++
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── Stiffness.hpp
    │   ├── main.cpp
    │   └── rando.hpp
    ├── Ju
    │   ├── README.md
    │   ├── Rando.jl
    │   ├── Stiffness.jl
    │   ├── main.jl
    │   ├── script
    │   └── script-m
    ├── Numba
    │   ├── README.md
    │   ├── RandomTriangle.py
    │   ├── Stiffness.py
    │   ├── main.py
    │   ├── rando.py
    │   └── script
    ├── Py
    │   ├── README.md
    │   ├── RandomTriangle.py
    │   ├── Stiffness.py
    │   ├── main.py
    │   └── rando.py
    ├── Pythran
    │   ├── README.md
    │   ├── RandomTriangle.py
    │   ├── StiffOut.py
    │   ├── Stiffness.py
    │   ├── main.py
    │   ├── rando.py
    │   └── script
    ├── README.md
    ├── Results
    │   ├── Look.py
    │   └── README.md
    ├── runAllTests.sh
    └── sage
    │   ├── README.md
    │   └── Stiff.ipynb
├── Gaussian
    ├── C++
    │   ├── ARRAY
    │   │   ├── .gitignore
    │   │   ├── Doc
    │   │   │   └── doxygen_sqlite3.db
    │   │   ├── Doxyfile
    │   │   ├── README
    │   │   ├── include
    │   │   │   ├── ApplyFonc.hpp
    │   │   │   ├── Array.hpp
    │   │   │   ├── ArrayException.hpp
    │   │   │   ├── Array_Access_Operators.hpp
    │   │   │   ├── Array_Array_Operators.hpp
    │   │   │   ├── Array_Constructors_RangeBased.hpp
    │   │   │   ├── Array_Constructors_RangeBased_NotSafe.hpp
    │   │   │   ├── Array_Output.hpp
    │   │   │   ├── Array_Scalar_Operators.hpp
    │   │   │   ├── Array_iterators.hpp
    │   │   │   ├── Array_resize.hpp
    │   │   │   ├── CEngine.hpp
    │   │   │   ├── FEngine.hpp
    │   │   │   ├── MacroRestrict.hpp
    │   │   │   ├── Range.hpp
    │   │   │   ├── Slices.hpp
    │   │   │   └── mainpage.h
    │   │   └── try
    │   │   │   ├── CMakeLists.txt
    │   │   │   └── main.cpp
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── RandomFeedMatrix.hpp
    │   ├── factorMatrix.hpp
    │   ├── main.cpp
    │   └── rando.hpp
    ├── C++Lib
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── RandomFeedMatrix.hpp
    │   ├── factorMatrix.hpp
    │   ├── main.cpp
    │   ├── protos_lapack.hpp
    │   └── rando.hpp
    ├── Ju
    │   ├── README.md
    │   ├── Rando.jl
    │   ├── main.jl
    │   ├── script
    │   └── script-m
    ├── JuLib
    │   ├── README.md
    │   ├── main.jl
    │   └── script
    ├── Numba
    │   ├── README.md
    │   ├── RandomFeedMatrix.py
    │   ├── factorMatrix.py
    │   ├── main.py
    │   ├── rando.py
    │   └── script
    ├── Py
    │   ├── README.md
    │   ├── RandomFeedMatrix.py
    │   ├── factorMatrix.py
    │   ├── main.py
    │   └── rando.py
    ├── PyScipy
    │   ├── README.md
    │   ├── RandomFeedMatrix.py
    │   ├── main.py
    │   ├── plot.pdf
    │   └── rando.py
    ├── PyVec
    │   ├── README.md
    │   ├── RandomFeedMatrix.py
    │   ├── factorMatrix.py
    │   ├── main.py
    │   ├── plot.pdf
    │   └── rando.py
    ├── Pythran
    │   ├── README.md
    │   ├── RandomFeedMatrix.py
    │   ├── factorMatrix.py
    │   ├── main.py
    │   ├── rando.py
    │   └── script
    ├── PythranVec
    │   ├── README.md
    │   ├── RandomFeedMatrix.py
    │   ├── factorMatrix.py
    │   ├── main.py
    │   ├── plot.pdf
    │   ├── rando.py
    │   └── script
    ├── README.md
    ├── Results
    │   ├── Benchmarks
    │   │   ├── kepler-nolibs.png
    │   │   ├── kepler-only-libs.png
    │   │   └── kepler.png
    │   ├── README.md
    │   ├── gpc
    │   ├── gpc-nolibs
    │   └── gr.py
    └── runAllTests.sh
├── LICENSE
├── MicroBenchmarks
    ├── C++-xtensor
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── get_time.hpp
    │   ├── main_cl.cpp
    │   ├── main_lapl_1.cpp
    │   └── main_lapl_2.cpp
    ├── C++
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── get_time.hpp
    │   ├── main_cl.cpp
    │   ├── main_lapl_1.cpp
    │   └── main_lapl_2.cpp
    ├── Ju
    │   ├── README.md
    │   ├── main_cl.jl
    │   ├── main_lapl_1d.jl
    │   ├── main_lapl_2d.jl
    │   ├── script
    │   └── script-m
    ├── Numba
    │   ├── README.md
    │   ├── main_cl.py
    │   ├── main_lapl_1d.py
    │   ├── main_lapl_2d.py
    │   └── script
    ├── Py
    │   ├── README.md
    │   ├── main_cl.py
    │   ├── main_lapl_1d.py
    │   └── main_lapl_2d.py
    ├── Pythran
    │   ├── README.md
    │   ├── cl_1.py
    │   ├── cl_2.py
    │   ├── lapl1d_1.py
    │   ├── lapl1d_2.py
    │   ├── lapl2d_1.py
    │   ├── lapl2d_2.py
    │   ├── main_cl.py
    │   ├── main_lapl_1d.py
    │   ├── main_lapl_2d.py
    │   └── script
    ├── README.md
    ├── Results
    │   ├── Benchmarks
    │   │   ├── kepler-cl.png
    │   │   ├── kepler-lapl_1.png
    │   │   └── kepler-lapl_2.png
    │   ├── README.md
    │   ├── gr.py
    │   ├── vis_cl
    │   ├── vis_lapl_1
    │   └── vis_lapl_2
    └── runAllTests.sh
├── README.md
├── SaintVenant
    ├── C
    │   ├── compile-cpu.sh
    │   ├── compile-gpu.sh
    │   ├── main1d-gpu-kernels.cu
    │   ├── main1d-gpu-kernels.hpp
    │   ├── main1d-gpu.cu
    │   └── main1d.cpp
    └── Ju
    │   ├── main1d-gpu-kernels.jl
    │   ├── main1d-gpu.jl
    │   ├── main1d.jl
    │   ├── main2d.jl
    │   ├── run_cpu.sh
    │   └── run_gpu.sh
├── Sparse
    ├── C++
    │   ├── CMakeLists.txt
    │   ├── Csr.hpp
    │   ├── PreLapl.hpp
    │   ├── PreSparse.hpp
    │   ├── README.md
    │   └── main.cpp
    ├── Ju
    │   ├── README.md
    │   ├── Sparse23.jl
    │   ├── Sparse23push.jl
    │   ├── Sparse23raw.jl
    │   ├── main.jl
    │   ├── script
    │   └── script-m
    ├── Numba
    │   ├── README.md
    │   ├── build2.py
    │   ├── build3.py
    │   ├── main.py
    │   └── script
    ├── Py
    │   ├── README.md
    │   ├── build.py
    │   └── main.py
    ├── Pythran
    │   ├── README.md
    │   ├── build2.py
    │   ├── build3.py
    │   ├── main.py
    │   └── script
    ├── README.md
    ├── Results
    │   ├── Arithmetic-Intensity.md
    │   ├── Benchmarks
    │   │   ├── gpc-2-b.png
    │   │   ├── gpc-2-p.png
    │   │   ├── gpc-3-b.png
    │   │   └── gpc-3-p.png
    │   ├── README.md
    │   └── gr.py
    └── runAllTests.sh
├── TODO
└── Weno
    ├── C++-Modulo
        ├── Burghers.hpp
        ├── CMakeLists.txt
        ├── Convection.hpp
        ├── GodunovFlux.hpp
        ├── LaxFriedrichsFlux.hpp
        ├── README.md
        ├── RK3TVD.hpp
        ├── Weno.hpp
        └── main.cpp
    ├── C++-Pointers
        ├── Burghers.hpp
        ├── CMakeLists.txt
        ├── Convection.hpp
        ├── GodunovFlux.hpp
        ├── LaxFriedrichsFlux.hpp
        ├── README.md
        ├── RK3TVD.hpp
        ├── Weno.hpp
        └── main.cpp
    ├── C++
        ├── Burghers.hpp
        ├── CMakeLists.txt
        ├── Convection.hpp
        ├── GodunovFlux.hpp
        ├── LaxFriedrichsFlux.hpp
        ├── README.md
        ├── RK3TVD.hpp
        ├── Weno.hpp
        └── main.cpp
    ├── C++NoCopy
        ├── Burghers.hpp
        ├── CMakeLists.txt
        ├── Convection.hpp
        ├── GodunovFlux.hpp
        ├── LaxFriedrichsFlux.hpp
        ├── README.md
        ├── RK3TVD.hpp
        ├── Weno.hpp
        └── main.cpp
    ├── Fortran
        ├── CMakeLists.txt
        ├── m_RK3TVDData.F90
        ├── m_burghers.F90
        ├── m_godunov.F90
        ├── m_weno.F90
        └── main.F90
    ├── Ju
        ├── Burghers.jl
        ├── Convection.jl
        ├── Godunov.jl
        ├── LaxFriedrichs.jl
        ├── README.md
        ├── RK3TVD.jl
        ├── Weno.jl
        ├── main.jl
        ├── script
        └── script-m
    ├── Numba
        ├── Burghers.py
        ├── Convection.py
        ├── GodunovFlux.py
        ├── LaxFriedrichs.py
        ├── Numfluxes.py
        ├── README.md
        ├── RK3TVD.py
        ├── Weno.py
        ├── main.py
        └── script
    ├── Py
        ├── Burghers.py
        ├── Convection.py
        ├── GodunovFlux.py
        ├── LaxFriedrichs.py
        ├── README.md
        ├── RK3TVD.py
        ├── Weno.py
        ├── main.py
        └── profile
    ├── PyVec
        ├── Burghers.py
        ├── Convection.py
        ├── GodunovFlux.py
        ├── LaxFriedrichs.py
        ├── README.md
        ├── RK3TVD.py
        ├── Weno.py
        └── main.py
    ├── README.md
    ├── Results
        ├── Look.py
        └── README.md
    └── runAllTests.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.log
 2 | _*_.tex
 3 | *.pyc
 4 | Build*
 5 | *~
 6 | __tmpe
 7 | *_temp
 8 | lib*a
 9 | __pycache__
10 | *.so
11 | Running*
12 | gp*
13 | *mem
14 | .ipynb_checkpoints
15 | *.sage.py
16 | TAGS
17 | *.pdf
18 | *.orig


--------------------------------------------------------------------------------
/CallBack/C++-Fonctor/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(Bench)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | 
13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
14 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
15 | 
16 | 
17 | else ()
18 |   set (USING_GNU TRUE)
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC  -std=c++14 -march=native")
20 | 
21 | endif ()
22 | include_directories(
23 | ${CMAKE_SOURCE_DIR}/ARRAY/include
24 | ) 
25 | add_executable(
26 |   run
27 |   ../main.cpp
28 |   )
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/CallBack/C++-Fonctor/README.md:
--------------------------------------------------------------------------------
 1 | You need cmake
 2 | 
 3 | Compilation:
 4 | -----------
 5 | 
 6 | ```
 7 | mkdir Build
 8 | cd Build
 9 | cmake ..
10 | make
11 | ```
12 | a file "run" is created
13 | 
14 | Run the code:
15 | ------------
16 | from Build/ directory, type:
17 | ```
18 | run
19 | ```
20 | 
21 | By default, we use g++. You can change the compiler to use, for
22 | example clang++.
23 | For this just replace:
24 | 
25 | ```
26 | cmake ..
27 | ```
28 | by:
29 | ```
30 | CXX=clang++ cmake ..
31 | ```
32 | 


--------------------------------------------------------------------------------
/CallBack/C++-Pointer/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(Bench)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | 
13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
14 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
15 | 
16 | 
17 | else ()
18 |   set (USING_GNU TRUE)
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC  -std=c++14 -march=native")
20 | 
21 | endif ()
22 | include_directories(
23 | ${CMAKE_SOURCE_DIR}/ARRAY/include
24 | ) 
25 | add_executable(
26 |   run
27 |   ../main.cpp
28 |   )
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/CallBack/C++-Pointer/README.md:
--------------------------------------------------------------------------------
 1 | You need cmake
 2 | 
 3 | Compilation:
 4 | -----------
 5 | 
 6 | ```
 7 | mkdir Build
 8 | cd Build
 9 | cmake ..
10 | make
11 | ```
12 | a file "run" is created
13 | 
14 | Run the code:
15 | ------------
16 | from Build/ directory, type:
17 | ```
18 | run
19 | ```
20 | 
21 | By default, we use g++. You can change the compiler to use, for
22 | example clang++.
23 | For this just replace:
24 | 
25 | ```
26 | cmake ..
27 | ```
28 | by:
29 | ```
30 | CXX=clang++ cmake ..
31 | ```
32 | 


--------------------------------------------------------------------------------
/CallBack/C++-lambda/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(Bench)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | 
13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
14 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
15 | 
16 | 
17 | else ()
18 |   set (USING_GNU TRUE)
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC  -std=c++14 -march=native")
20 | 
21 | endif ()
22 | include_directories(
23 | ${CMAKE_SOURCE_DIR}/ARRAY/include
24 | ) 
25 | add_executable(
26 |   run
27 |   ../main.cpp
28 |   )
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/CallBack/C++-lambda/README.md:
--------------------------------------------------------------------------------
 1 | You need cmake
 2 | 
 3 | Compilation:
 4 | -----------
 5 | 
 6 | ```
 7 | mkdir Build
 8 | cd Build
 9 | cmake ..
10 | make
11 | ```
12 | a file "run" is created
13 | 
14 | Run the code:
15 | ------------
16 | from Build/ directory, type:
17 | ```
18 | run
19 | ```
20 | 
21 | By default, we use g++. You can change the compiler to use, for
22 | example clang++.
23 | For this just replace:
24 | 
25 | ```
26 | cmake ..
27 | ```
28 | by:
29 | ```
30 | CXX=clang++ cmake ..
31 | ```
32 | 


--------------------------------------------------------------------------------
/CallBack/C++-lambda/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <algorithm>
 3 | #include <cmath>
 4 | #include <map>
 5 | #include <string>
 6 | #include <sys/time.h>
 7 | #include <unistd.h>
 8 | #include <limits.h>
 9 | #include <fstream>
10 | #include <chrono>
11 | using namespace std;
12 | using namespace std::chrono;
13 | 
14 | // Clock!
15 | class Mtime
16 | {
17 |   high_resolution_clock::time_point t1 ;
18 |   public:
19 |   // Initialize (start time!)
20 |   void start()
21 |   {
22 |     t1= high_resolution_clock::now();
23 |   }
24 |   // Get duration since timer was started, in seconds.
25 |   double sec() const
26 |   {
27 |     high_resolution_clock::time_point t2= high_resolution_clock::now();
28 |     return 1.e-9*
29 |       static_cast<double>(duration_cast<nanoseconds>(t2 - t1 ).count());
30 |   }
31 | };
32 | string host()
33 | {
34 |   char hostnameC[HOST_NAME_MAX];
35 |   gethostname(hostnameC, HOST_NAME_MAX);
36 |   return  string(hostnameC);
37 | }
38 | 
39 | template<typename Fonc> double trapz(Fonc &F,double a, double b, int n)
40 | {
41 |   auto h=(b-a)/n;
42 |   auto sum=0.5*(F(a)+F(b));
43 |   for(int i=1;i<=n;i++)
44 |     sum+=F(i*h);
45 |   
46 |   return sum*h;
47 | }
48 | int main()
49 | {
50 |   auto hostname = host();
51 |   cout<<"hostname: "<<hostname<<endl;
52 |   
53 |   Mtime T;
54 |   
55 |   const int loops=10000;
56 |   double sum;
57 |   
58 |   ofstream f; f.open("../RunningOn"+hostname);
59 |   
60 |   auto  F  = [] (double  x) {return  exp(-x)*x*x; };
61 |   T.start();
62 |   for(int i=0;i<loops;i++)
63 |     sum= trapz(F,0.,1.,1000);
64 |   double t2=T.sec()/loops;
65 |   cout<<"computing time: "<<t2<<endl;
66 |   cout<<sum<<endl;
67 |   f<<"f: "<<t2<<endl;
68 |   
69 |   auto G= [](double  x) {return x<0.5? -exp(-x)*x*x: exp(x)*x*x;};
70 |   T.start();
71 |   for(int i=0;i<loops;i++)
72 |     sum= trapz(G,0.,1.,1000);
73 |   t2=T.sec()/loops;
74 |   cout<<"computing time: "<<t2<<endl;
75 |   cout<<sum<<endl;
76 |   f<<"g: "<<t2<<endl;
77 | 
78 |   f.close();
79 | }
80 | 


--------------------------------------------------------------------------------
/CallBack/Ju/README.md:
--------------------------------------------------------------------------------
 1 | To run the code, just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | if you want to profile memory usage (beware, it will slow down the
 6 | code!):
 7 | ```
 8 | ./script-m
 9 | ```
10 | 


--------------------------------------------------------------------------------
/CallBack/Ju/main.jl:
--------------------------------------------------------------------------------
 1 | using BenchmarkTools
 2 | using InteractiveUtils
 3 | 
 4 | function trapz(a::Float64,b::Float64,n::Int64,f::Function)
 5 |     h::Float64=(b-a)/n
 6 |     sum=0.5*(f(a)+f(b))
 7 |     for i=1:n-1
 8 |         sum+=f(i*h)
 9 |     end
10 |     sum*=h
11 | end
12 |                
13 | @inline f(x::Float64)=+exp(-x)*x^2
14 | 
15 | function g(x::Float64)
16 |     h=0.0
17 |     if x<0.5
18 |         h=-exp(-x)
19 |     else
20 |         h= exp(x)
21 |     end
22 |     return h*x^2
23 | end
24 | 
25 | # Note: replacing f by f1 and g by g1 do not change the computing time:
26 | # f1(x)=x < 1.5 ? +exp(-x)*x^2 :  +exp(-x)*x^2
27 | # g1(x::Float64)=x < 0.5 ? -exp(-x)*x^2 :  exp(x)*x^2
28 | 
29 | function implicit(t::Float64)
30 |     # implicit = root of  4*sin(x)-exp(x)+t
31 |     # Newton iterations, starting from zero:
32 |     x= 0.0
33 |     F= 4*sin(x)-exp(x)+t
34 |     while abs(F)> 1.e-15
35 |         #
36 |         x-= F/(4*cos(x) - exp(x))
37 |         F=  4*sin(x)-exp(x)+t
38 |         #
39 |     end
40 |     x
41 | end
42 | 
43 | io = IOContext(stdout, :compact => false)
44 | 
45 | fw=open("RunningOn"*gethostname(),"w")
46 | # Note: it seems that a loop like:
47 | #
48 | # for F in [f,g,implicit]
49 | #   bench_res = @benchmark trapz(0.,1.,1000,F)
50 | # end
51 | #
52 | # is incompatible with @benchmark... who knows why?
53 | 
54 | f(0.5)
55 | println("f:")
56 | bench_res = @benchmark trapz(0.,1.,1000,f);
57 | show(io, bench_res)
58 | write(fw,"\nf: "*string(bench_res)*"\n")
59 | 
60 | g(0.5)
61 | println("\ng:")
62 | bench_res = @benchmark trapz(0.,1.,1000,g);
63 | show(io, bench_res)
64 | write(fw,"g: "*string(bench_res)*"\n")
65 | 
66 | implicit(0.5)
67 | println("\nimplicit:")
68 | bench_res = @benchmark trapz(0.,1.,1000,implicit)
69 | show(io, bench_res)
70 | write(fw,"implicit: "*string(bench_res)*"\n")
71 | 
72 | println("\nend.")
73 | close(fw)
74 | 


--------------------------------------------------------------------------------
/CallBack/Ju/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3  main.jl
3 | 


--------------------------------------------------------------------------------
/CallBack/Ju/script-m:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 --track-allocation=user main.jl
3 | 


--------------------------------------------------------------------------------
/CallBack/Numba/README.md:
--------------------------------------------------------------------------------
 1 | To run the code, just type:
 2 | 
 3 | ```
 4 | ./script
 5 | ```
 6 | 
 7 | If you do not have a machine with AVX instructions, comment out the
 8 | line
 9 | ```
10 | export NUMBA_ENABLE_AVX=1
11 | ```
12 | in ./script .
13 | 


--------------------------------------------------------------------------------
/CallBack/Numba/main.py:
--------------------------------------------------------------------------------
 1 | from math import exp,sin,cos
 2 | from numba import jit,float64,int32
 3 | import time
 4 | import socket
 5 | 
 6 | @jit(nopython=True)
 7 | def trapz(F,a,b,n):
 8 |     h=(b-a)/n
 9 |     sum=0.5*(F(a)+F(b))
10 |     for i in range(1,n):
11 |         sum+=F(i*h)
12 |     return sum*h
13 | 
14 | @jit(float64(float64),nopython=True)
15 | def f(x):
16 |     return exp(-x)*x*x
17 | 
18 | @jit(float64(float64),nopython=True)
19 | def g(x):
20 |     if x<0.5:
21 |         h=-exp(-x)
22 |     else:
23 |         h= exp(x)
24 |     return h*x*x
25 | 
26 | @jit(float64(float64),nopython=True)
27 | def implicit(t):
28 |     # implicit = root of  4*sin(x)-exp(x)+t
29 |     # Newton iterations, starting from zero:    
30 |     x=0.0
31 |     F= 4*sin(x)-exp(x)+t
32 |     while abs(F)> 1.e-15:
33 |         x-= F/(4*cos(x) - exp(x))
34 |         F=  4*sin(x)-exp(x)+t
35 |     return x
36 | 
37 | #----------------------main program starts here ------------------
38 | loops=10000
39 | n=1000
40 | 
41 | fic=open("RunningOn"+socket.gethostname(),"w")
42 |     
43 | for F in [f,g,implicit]:
44 |     # running once seems to improve performances (just in time compilation !)
45 |     sum=trapz(F,0.0,1.0,n)
46 |     #
47 |     t1 = time.time()
48 |     for i in range(0,loops):
49 |         sum=trapz(F,0.0,1.0,n)
50 |     t=(time.time()-t1)/loops
51 |     print(F.__name__," ",t," ",sum)
52 |     fic.write(F.__name__+": "+str(t)+"\n")
53 | 
54 | fic.close()
55 | print("end.")
56 | 


--------------------------------------------------------------------------------
/CallBack/Numba/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export NUMBA_ENABLE_AVX=1
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/CallBack/Py/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | 
3 | ```
4 | python3 main.py
5 | ```
6 | 


--------------------------------------------------------------------------------
/CallBack/Py/main.py:
--------------------------------------------------------------------------------
 1 | from math import exp,sin,cos
 2 | import time
 3 | import socket
 4 | 
 5 | def trapz(F,a,b,n):
 6 |     h=(b-a)/n
 7 |     sum=0.5*(F(a)+F(b))
 8 |     for i in range(1,n):
 9 |         sum+=F(i*h)
10 |     return sum*h
11 | 
12 | def f(x):
13 |     return exp(-x)*x*x
14 | 
15 | def g(x):
16 |     if x<0.5:
17 |         h=-exp(-x)
18 |     else:
19 |         h= exp(x)
20 |     return h*x*x
21 | 
22 | def implicit(t):
23 |     # implicit = root of  4*sin(x)-exp(x)+t
24 |     # Newton iterations, starting from zero:    
25 |     x=0.0
26 |     F= 4*sin(x)-exp(x)+t
27 |     while abs(F)> 1.e-15:
28 |         x-= F/(4*cos(x) - exp(x))
29 |         F=  4*sin(x)-exp(x)+t
30 |     return x
31 | 
32 | #----------------------main program starts here ------------------
33 | loops=10000
34 | n=1000
35 | 
36 | fic=open("RunningOn"+socket.gethostname(),"w")
37 | 
38 | for F in [f,g,implicit]:
39 |     t1 = time.time()
40 |     for i in range(0,loops):
41 |         sum=trapz(F,0.0,1.0,n)
42 |     t=(time.time()-t1)/loops
43 |     print(F.__name__," ",t," ",sum)
44 |     fic.write(F.__name__+": "+str(t)+"\n")
45 | fic.close()
46 | print("end.")
47 | 


--------------------------------------------------------------------------------
/CallBack/Pythran/README.md:
--------------------------------------------------------------------------------
 1 | Compile what should be  just _pythranized_ and run the code; just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | 
 6 | !Z! if you are using OpenBlas, you probably must create  a .pythranrc in your home directory like this:
 7 | 
 8 | ```
 9 | [compiler]
10 | blas=openblas
11 | ```


--------------------------------------------------------------------------------
/CallBack/Pythran/f.py:
--------------------------------------------------------------------------------
1 | from math import exp
2 | #pythran export capsule f(float)
3 | def f(x):
4 |     return exp(-x)*x*x
5 | 


--------------------------------------------------------------------------------
/CallBack/Pythran/g.py:
--------------------------------------------------------------------------------
1 | from math import exp
2 | #pythran export capsule g(float)
3 | def g(x):
4 |     if x<0.5:
5 |         h=-exp(-x)
6 |     else:
7 |         h= exp(x)
8 |     return h*x*x
9 | 


--------------------------------------------------------------------------------
/CallBack/Pythran/implicit.py:
--------------------------------------------------------------------------------
 1 | from math import exp,sin,cos
 2 | #pythran export capsule implicit(float)
 3 | def implicit(t):
 4 |     # implicit = root of  4*sin(x)-exp(x)+t
 5 |     # Newton iterations, starting from zero:    
 6 |     x=0.0
 7 |     F= 4*sin(x)-exp(x)+t
 8 |     while abs(F)> 1.e-15:
 9 |         x-= F/(4*cos(x) - exp(x))
10 |         F=  4*sin(x)-exp(x)+t
11 |     return x
12 | 


--------------------------------------------------------------------------------
/CallBack/Pythran/main.py:
--------------------------------------------------------------------------------
 1 | from math import exp
 2 | import time
 3 | import socket
 4 | from trapz import trapz
 5 | from f import f
 6 | from g import g
 7 | from implicit import implicit
 8 | 
 9 | 
10 | 
11 | #----------------------main program starts here ------------------
12 | loops=10000
13 | n=1000
14 | 
15 | fic=open("RunningOn"+socket.gethostname(),"w")
16 | 
17 | # workaround: PyCapsule' object has no attribute '__name__' (see ../Py/main.py)
18 | name={f:"f",g:"g",implicit:"implicit"}
19 | 
20 | for F in [f,g,implicit]:
21 |     t1 = time.time()
22 |     for i in range(0,loops):
23 |         sum=trapz(F,0.0,1.0,n)
24 |     t=(time.time()-t1)/loops
25 |     #print(F.__name__," ",t," ",sum)
26 |     #fic.write(F.__name__+": "+str(t)+"\n")
27 |     print(name[F]+": ",t," ",sum)
28 |     fic.write(name[F]+": "+str(t)+"\n")
29 | fic.close()
30 | print("end.")
31 | 


--------------------------------------------------------------------------------
/CallBack/Pythran/script:
--------------------------------------------------------------------------------
1 | pythran -march=native -O3 trapz.py
2 | pythran -march=native -O3 f.py
3 | pythran -march=native -O3 g.py
4 | pythran -march=native -O3 implicit.py
5 | echo "run test:"
6 | python3 main.py


--------------------------------------------------------------------------------
/CallBack/Pythran/trapz.py:
--------------------------------------------------------------------------------
1 | #pythran export trapz(float(float),float,float,int)
2 | def trapz(F,a,b,n):
3 |     h=(b-a)/n
4 |     sum=0.5*(F(a)+F(b))
5 |     for i in range(1,n):
6 |         sum+=F(i*h)
7 |     return sum*h
8 | 


--------------------------------------------------------------------------------
/CallBack/README.md:
--------------------------------------------------------------------------------
 1 | ### Description:
 2 | 
 3 | We test some examples of callbacks:
 4 | 
 5 | - a very simple function (one line of code).
 6 | 
 7 | - a function with a conditional.
 8 | 
 9 | - a more computationally expensive function (implicitly defined, we use Newton method to evaluate it).
10 | 
11 | All these functions are integrated using the trapezoidal rule.
12 | 
13 | ### Motivation:
14 | 
15 | It is often said that, for "small", non expensive functions, the performances of a call back depend on the way the callback is passed:
16 | 
17 | - in C++, classical C like function passing (with pointers) is generally described as non efficient; using object functions is supposed to allow inlining and should largely improve performances. For computationally expensive functions, the difference should become neglectable.
18 | 
19 |  - How does Julia performs ? Python, Pythran, Numba ? 
20 | 
21 | ### Author:
22 | 
23 | Thierry Dumont   tdumont@math.univ-lyon1.fr
24 | 
25 | ### The directories contain:
26 | 
27 | - **C++-Pointer**: computation in C++, "C" like method (pointers).
28 | 
29 | -  **C++-lambda**: computation in C++, passing a lambda function, when it is possible.
30 | 
31 | -  **C++-Fonctor**:  computation in C++, using object functions.
32 | 
33 | - **Py**:  Pure Python  computation.
34 | 
35 | - **Pythran**:  Python + Pythran  computation.
36 | 
37 | - **Numba**: Python + Numba  computation.
38 | 
39 | - **Ju**: Julia computation.
40 | 
41 | 
42 | ### Running the benchmarks
43 | 
44 | cd successively in C++-xxx, Py, Pythran, Numba, Ju; then look at the documentation. 
45 | 
46 | Once you have run the benchmark in **all** directories, go to Results/
47 | and look at the documentation to know how to exploit the results.
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/CallBack/Results/README.md:
--------------------------------------------------------------------------------
 1 | Just run:
 2 | 
 3 | ```
 4 | ./Look.py
 5 | ```
 6 | This will give you the computing time divided by C++-Functor computing time.
 7 | 
 8 | Before, you must have run the benchmark in all directories
 9 | (C++-Functor, C++-lambda, Ju, Py and Numba).
10 | 
11 | If you do not run all the benchmarks (or if you add one), juste modify the list
12 | "directories to explore" in Look.py


--------------------------------------------------------------------------------
/CallBack/runAllTests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #  This script is supposed to run *all* the test, and then compute
 4 | #  the final "report" in Results/
 5 | #  Not sure it works everywhere. If it does not, improve it, or enter each
 6 | #  directory and look at README.md to know what to do. 
 7 | #
 8 | for i in C++-Fonctor  C++-lambda  C++-Pointer; do
 9 |     echo "--- "
10 |     echo "Test: "$i
11 |     echo "--- "
12 |     (cd $i; mkdir -p Build; cd Build; cmake ..; make; ./run)
13 | done
14 | for i in Ju Numba Pythran ; do 
15 |      echo "--- " 
16 |     echo  "Test: "$i
17 |     echo "--- "
18 |     (cd $i; ./script)
19 | done
20 | 
21 | echo "--- "
22 | echo  "Test: Py"
23 | echo "--- "
24 | (cd Py; python3 ./main.py)
25 | 
26 | 
27 | echo " "
28 | echo "Make the report:"
29 | (cd Results; ./Look.py)
30 | echo " "
31 | echo "To replay the report, cd Results/ and run ./Look.py "
32 | echo " "
33 | 


--------------------------------------------------------------------------------
/FeStiff/C++/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(Benchmarks)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
13 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
14 | 
15 | else ()
16 |   set (USING_GNU TRUE)
17 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -O3 -Wall  -std=c++14 -march=native")
18 | 
19 | endif ()
20 | 
21 | add_executable(
22 |   run
23 |   ../main.cpp
24 |   )
25 | 
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/FeStiff/C++/README.md:
--------------------------------------------------------------------------------
 1 | You need cmake
 2 | 
 3 | Compilation:
 4 | -----------
 5 | 
 6 | ```
 7 | mkdir Build
 8 | cd Build
 9 | cmake ..
10 | make
11 | ```
12 | a file "run" is created
13 | 
14 | Run the code:
15 | ------------
16 | from Build/ directory, type:
17 | ```
18 | run
19 | ```
20 | 
21 | By default, we use g++. You can change the compiler to use, for
22 | example clang++.
23 | For this just replace:
24 | 
25 | ```
26 | cmake ..
27 | ```
28 | by:
29 | ```
30 | CXX=clang++ cmake ..
31 | ```
32 | 


--------------------------------------------------------------------------------
/FeStiff/C++/Stiffness.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | using namespace std;
 4 | ////////////////////////////////////////////////////////////////////////
 5 | /// Stiffness matrix for P2  finite element in dimension 2 (Laplacian).
 6 | ////////////////////////////////////////////////////////////////////////
 7 | class Stiffness
 8 | {
 9 |   double ji[4];
10 |   double grads[36];
11 |   // gradients of basis functions on the reference element, at the
12 |   // middle of the edges.
13 |   double gq[36]={-1, -1, 1, 1, -1, -1, 1, 0, 1, 0, -1, 0, 0, -1,
14 | 		 0, 1, 0, 1, 0, -2, -2, -2, 2, 0, 0, 2, 2, 2, 2,
15 | 		 0, 0, 2, -2, -2, -2, 0};
16 |   // we will compute only an half part (with diagonal) of the matrix (symetry!)
17 |   inline int ind(int i,int j){return i*(i+1)/2+j;}
18 |   // transformation current element -> reference lement (x by determinant)/
19 |   inline void JinvDetTrans(double x[],double y[])
20 |   {
21 |     ji[0]=-y[0] + y[2] ; ji[1]= y[0] - y[1];
22 |     ji[2] = x[0] - x[2]; ji[3]= -x[0] + x[1];
23 |     // 4 flops
24 |   }
25 | public:
26 |   Stiffness()//empty constructor
27 |   {
28 |   }
29 |   ~Stiffness()//empty destructor
30 |   {
31 |   }
32 |   // compute the stiffness matrix.
33 |   // x[3], y[3]: IN, the trangle.
34 |   // m[21] : OUT, the computed matrix.
35 |   void operator()(double x[],double y[],double m[])
36 |   {
37 |     
38 |     JinvDetTrans(x,y);
39 |     for(int f=0;f<6;f++)
40 |       for(int p=0;p<3;p++)
41 | 	{
42 | 	  int d=6*f+2*p;
43 | 	  grads[d]  = ji[0]*gq[d]+ji[1]*gq[d+1];
44 | 	  grads[d+1]= ji[2]*gq[d]+ji[3]*gq[d+1];
45 | 	} //18 * 6 = 108 flops.
46 |     double det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2];
47 |     // det: 8 flops.
48 |     double dv=1.0/(6.0*det); // 1 flop
49 |     for(int i=0;i<6;i++)
50 |       for(int j=0;j<=i;j++)
51 | 	{
52 | 	  double s=0;
53 | 	  for(int k=0;k<3;k++)
54 | 	    s+=grads[6*i+2*k]*grads[6*j+2*k]+grads[6*i+2*k+1]*grads[6*j+2*k+1];
55 | 	  m[ind(i,j)]=s;
56 | 	}// 21* 4 = 84 flops.
57 |     
58 |     for(int i=0;i<21;i++) m[i]*=dv; //21 flops
59 | 
60 |     //total:  4+18*6+ 8 + 84 +1+21 = 226 flops.
61 |  
62 |   }
63 |   static const int  flops = 226; 
64 | };
65 | 


--------------------------------------------------------------------------------
/FeStiff/C++/rando.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | class rando{
 3 |     const long int a,c,m;
 4 |     int long seed;
 5 |     public:
 6 |     rando():a(1103515245),c(12345),m(4294967296)
 7 |     {
 8 |         seed=123456789;
 9 |     }
10 |     long int get(){
11 |         seed= (a * seed + c) % m;
12 |         return seed;
13 |     }
14 |     double fv(double vmax=1.)
15 |     {
16 |         return vmax*(double) get()/m;
17 |     }
18 | };
19 | 
20 | 


--------------------------------------------------------------------------------
/FeStiff/Ju/README.md:
--------------------------------------------------------------------------------
 1 | To run the code, just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | if you want to profile memory usage (beware, it will slow down the
 6 | code!):
 7 | ```
 8 | ./script-m
 9 | ```
10 | 


--------------------------------------------------------------------------------
/FeStiff/Ju/Rando.jl:
--------------------------------------------------------------------------------
 1 | module Rando
 2 | 
 3 | export RandoData, fv
 4 | 
 5 | #=
 6 | Coefficients for a simple, but reproductible random number generator.
 7 | =#
 8 | mutable struct RandoData
 9 |     seed::Int64
10 |     a::Int64
11 |     c::Int64
12 |     m::Int64
13 |     RandoData() = new(123456789, 1103515245, 12345, 4294967296)
14 | end
15 | 
16 | #=
17 | return a random float in [0,vmax]
18 | =#
19 | function fv!(R::RandoData, vmax=1.)
20 |     R.seed = (R.a * R.seed + R.c) % R.m
21 |     vmax * Float64(R.seed) / R.m
22 | end
23 | 
24 | end
25 | 


--------------------------------------------------------------------------------
/FeStiff/Ju/Stiffness.jl:
--------------------------------------------------------------------------------
 1 | module Stiffness
 2 | 
 3 | export StiffnessData
 4 | 
 5 | struct StiffnessData
 6 |     grads::Array{Float64, 3}
 7 |     gq::Array{Float64, 3}       #grads of 6 basis functions at 3 quadrature points.
 8 |     
 9 |     StiffnessData() = new(zeros(2, 3, 6),
10 |                           reshape([-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0,0.0,
11 |                                    -1.0,0.0,1.0,0.0,1.0,0.0,-2.0,-2.0,-2.0,2.0,0.0,0.0,2.0,2.0,
12 |                                    2.0,2.0,0.0,0.0,2.0,-2.0,-2.0,-2.0,0], 2, 3, 6)
13 |                           )
14 | end
15 | 
16 | function op!(S::StiffnessData, x::Array{Float64, 1}, y::Array{Float64, 1}, m::Array{Float64, 1})
17 |     a11 = -y[1] + y[3]
18 |     a12 =  y[1] - y[2]
19 |     a21 =  x[1] - x[3]
20 |     a22 = -x[1] + x[2]
21 |     
22 |     for f=1:6
23 |         @simd for p=1:3
24 |             S.grads[1, p, f] = a11 * S.gq[1, p, f] + a12 * S.gq[2, p, f]
25 |             S.grads[2, p, f] = a21 * S.gq[1, p, f] + a22 * S.gq[2, p, f]
26 |         end
27 |         
28 |         # this seems slower:
29 |         # S.grads[1, 1:3, f] = a11 * S.gq[1, 1:3, f] + a12 * S.gq[2, 1:3, f]
30 |         # S.grads[2, 1:3, f] = a21 * S.gq[1, 1:3, f] + a22 * S.gq[2, 1:3, f]
31 |     end
32 | 
33 |     # This seems slower than the for loop above
34 |     #@. @views S.grads[1, :, :] = a11 * S.gq[1, :, :] + a12 * S.gq[2, :, :]
35 |     #@. @views S.grads[2, :, :] = a21 * S.gq[2, :, :] + a22 * S.gq[2, :, :]
36 |     
37 |     det = -(x[2] - x[3])*y[1] + (x[1] - x[3])*y[2] - (x[1] - x[2])*y[3]
38 |     dv = 1.0 / (6.0 * det)
39 |     ii = 1
40 |     for i=1:6
41 |         for j=1:i
42 |             s = 0.0
43 |             @simd for k=1:3
44 |                 s += S.grads[1, k, i] * S.grads[1, k, j] + S.grads[2, k, i] * S.grads[2, k, j]
45 |             end
46 |             m[ii] = dv * s
47 |             
48 |             # this seems slower:
49 |             # m[ii] = dv * (dot(S.grads[1, :, i], S.grads[1, :, j]) +
50 |             #         dot(S.grads[2, :, i], S.grads[2, :, j]))
51 |             
52 |             ii += 1
53 |         end
54 |     end
55 |     
56 |     return m
57 | end
58 | 
59 | end
60 | 


--------------------------------------------------------------------------------
/FeStiff/Ju/main.jl:
--------------------------------------------------------------------------------
 1 | push!(LOAD_PATH, "./")
 2 | 
 3 | using Rando
 4 | using Stiffness
 5 | using BenchmarkTools
 6 | 
 7 | 
 8 | function prSubDiag(m)
 9 |     
10 |     for i=0:5
11 |         for j=0:i
12 |             print(m[div(i*(i+1), 2) + j+1], " ")
13 |         end
14 |         
15 |         println()
16 |     end
17 | end
18 | 
19 | function RandomTriangle!(R::RandoData, x::Array{Float64, 1}, y::Array{Float64, 1})
20 |     for i in 1:3
21 |         x[i] = Rando.fv!(R, 10.)
22 |     end
23 |     
24 |     for i in 1:3
25 |         y[i] = Rando.fv!(R, 10.)
26 |     end
27 | end
28 | 
29 | 
30 | const S = StiffnessData()
31 | const x = Float64[0., 1., 0.]
32 | const y = Float64[0., 0., 1.]
33 | const m = zeros(21)
34 | const R = RandoData()
35 | const ntri = 1_000_000
36 | 
37 | 
38 | print("\nVerify that, on the reference element, we are coherent with sage ")
39 | println("(see ../sage/):\n")
40 | Stiffness.op!(S, x, y, m)
41 | prSubDiag(m)
42 | 
43 | print("\nWe must get the same result if we dilate the triangle:\n")
44 | x[:] *= 2.0
45 | y[:] *= 2.0
46 | Stiffness.op!(S, x, y, m)
47 | prSubDiag(m)
48 | 
49 | 
50 | println("\nNow, start the benchmark:")
51 | println(ntri, " triangles.")
52 | 
53 | io = IOContext(stdout, :compact => false)
54 | bench = @benchmarkable Stiffness.op!($S, $x, $y, $m) setup = (RandomTriangle!($R, $x, $y)) samples = ntri
55 | bench_res = run(bench)
56 | 
57 | show(io, bench_res)
58 | 
59 | println("\n")
60 | timeByTr = BenchmarkTools.median(bench_res).time * 1e-9
61 | 
62 | open("RunningOn" * gethostname(), "w") do f
63 |     write(f, string(timeByTr), "\n")
64 | end
65 | 
66 | println("end.")
67 | 
68 | # to profile the code, uncomment:
69 | # ntri1 = 100000
70 | # @profile for t=1:ntri1
71 | #     RandomTriangle!(R, x, y)
72 | #     Stiffness.op!(S, x, y, m)
73 | # end
74 | # Profile.print(format=:flat)
75 | 


--------------------------------------------------------------------------------
/FeStiff/Ju/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 main.jl
3 | 


--------------------------------------------------------------------------------
/FeStiff/Ju/script-m:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 --track-allocation=user main.jl
3 | 


--------------------------------------------------------------------------------
/FeStiff/Numba/README.md:
--------------------------------------------------------------------------------
 1 | To run the code, just type:
 2 | 
 3 | ```
 4 | ./script
 5 | ```
 6 | 
 7 | If you do not have a machine with AVX instructions, comment out the
 8 | line
 9 | ```
10 | export NUMBA_ENABLE_AVX=1
11 | ```
12 | in ./script .
13 | 


--------------------------------------------------------------------------------
/FeStiff/Numba/RandomTriangle.py:
--------------------------------------------------------------------------------
1 | from numba import jit
2 | @jit(nopython=True)
3 | def RandomTriangle(R,x,y):
4 |     # domain is [0,10]x[0,10]
5 |     for i in range(0,3):
6 |         x[i]=R.fv(10.)
7 |     for i in range(0,3):
8 |         y[i]=R.fv(10.)
9 | 


--------------------------------------------------------------------------------
/FeStiff/Numba/Stiffness.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba.experimental import jitclass
 3 | from numba import float64,int64
 4 | spec=[
 5 |     ('ji',float64[:,:]),
 6 |     ('grads',float64[:,:]),
 7 |     ('gq',float64[:,:]),
 8 |     ]
 9 | @jitclass(spec)
10 | class Stiffness:
11 |     def __init__(self):
12 |         self.ji=np.empty((2,2))
13 |         self.grads=np.empty((18,2))
14 |         self.gq=np.empty((18,2))
15 |         self.gq=np.array([-1., -1., 1., 1., -1., -1., 1., 0., 1., 0.,
16 |                           -1., 0., 0., -1., 0., 1., 0., 1., 0., -2., -2.,
17 |                           -2., 2., 0., 0., 2., 2., 2., 2.,0., 0., 2., -2.,
18 |                           -2., -2., 0.]).reshape(18,2)
19 | 
20 |  
21 |     def op(self,x,y,m):
22 |         self.ji[0,0]=-y[0] + y[2]
23 |         self.ji[0,1]= y[0] - y[1]
24 |         self.ji[1,0]= x[0] - x[2]
25 |         self.ji[1,1]=-x[0] + x[1]
26 | 
27 |         c1=self.ji[0,0]
28 |         c2= self.ji[0,1]
29 |         self.grads[:,0] = c1*self.gq[:,0] + c2*self.gq[:,1]
30 |         c1=self.ji[1,0]
31 |         c2= self.ji[1,1]
32 |         self.grads[:,1] = c1*self.gq[:,0] + c2*self.gq[:,1]
33 |         
34 |         det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2]
35 |         dv=1.0/(6.0*det)
36 |         ii=0
37 |         for i in range(0,6):
38 |             for j in range(0,i+1):
39 |                 m[ii]=dv*(np.dot(self.grads[3*i:3*i+3,0],self.grads[3*j:3*j+3,0])
40 |                           +np.dot(self.grads[3*i:3*i+3,1],
41 |                                   self.grads[3*j:3*j+3,1]) )
42 |                 ii+=1
43 | 


--------------------------------------------------------------------------------
/FeStiff/Numba/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from Stiffness import *
 4 | from RandomTriangle import *
 5 | import time
 6 | import socket
 7 | 
 8 | 
 9 | ntri=10**7
10 | x=np.empty(3)
11 | y=np.empty(3)
12 | mat=np.empty(21)
13 | S=Stiffness()
14 | 
15 | print("\nVerify that, on the reference element, we are coherent with sage")
16 | print("(see ../sage/):\n")
17 | x[0]=0.0; x[1]=1.0; x[2]=0.0;
18 | y[0]=0.0; y[1]=0.0; y[2]=1.0;
19 | S.op(x,y,mat)
20 | 
21 | 
22 | for i in range(0,6):
23 |     print([mat[i*(i+1)//2+j] for j in range(0,i+1)])
24 | print("\nWe must get the same result if we dilate the triangle:\n")
25 | for i in range(0,3):
26 |     x[i]*=2.
27 |     y[i]*=2
28 | S.op(x,y,mat)
29 | for i in range(0,6):
30 |     print([mat[i*(i+1)//2+j] for j in range(0,i+1)])
31 | 
32 | print("\nNow, start the benchmark:")
33 | ntri=1000000
34 | print(ntri," triangles.")
35 | R = rando()
36 | t1 = time.time()
37 | for tr in range(0,ntri):
38 |     RandomTriangle(R,x,y)
39 |     S.op(x,y,mat)
40 | t=(time.time()-t1)
41 | print("first phase: ",t," seconds.")
42 | t1 = time.time()
43 | for tr in range(0,ntri):
44 |     RandomTriangle(R,x,y)
45 | tr=(time.time()-t1)
46 | print("second phase: ",tr," seconds.")
47 | 
48 | t-=tr
49 | print("Total time: ",t," seconds.")
50 | print("Time by triangle:", "{:.5e}".format(t/ntri),"second.")
51 | f=open("RunningOn"+socket.gethostname(),"w")   
52 | f.write(str(t/ntri)+"\n")
53 | f.close()
54 | print("end.")
55 | 


--------------------------------------------------------------------------------
/FeStiff/Numba/rando.py:
--------------------------------------------------------------------------------
 1 | from numba import jitclass,float64,int64
 2 | specrando=[
 3 |     ("seed",int64), ("a",int64),("c",int64),("m",int64)]
 4 | @jitclass(specrando)
 5 | class rando:
 6 |     def __init__(self):
 7 |         self.seed=123456789
 8 |         self.a=1103515245
 9 |         self.c=12345
10 |         self.m=2**32
11 |     def get(self):
12 |         self.seed= (self.a * self.seed + self.c) % self.m
13 |         return self.seed
14 |     def fv(self,vmax=1.):
15 |         return vmax*float(self.get())/self.m
16 | if __name__ == "__main__":
17 |     R=rando()
18 |     for i in range(0,100):
19 |         print(R.fv(10.))
20 | 


--------------------------------------------------------------------------------
/FeStiff/Numba/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export NUMBA_ENABLE_AVX=1
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/FeStiff/Py/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | 
3 | ```
4 | python3 main.py
5 | ```
6 | 


--------------------------------------------------------------------------------
/FeStiff/Py/RandomTriangle.py:
--------------------------------------------------------------------------------
1 | def RandomTriangle(R,x,y):
2 |     # domain is [0,10]x[0,10]
3 |     for i in range(0,3):
4 |         x[i]=R.fv(10.)
5 |     for i in range(0,3):
6 |         y[i]=R.fv(10.)
7 | 


--------------------------------------------------------------------------------
/FeStiff/Py/Stiffness.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | class Stiffness:
 3 |     def __init__(self):
 4 |         self.ji=np.empty((2,2))
 5 |         self.grads=np.empty((18,2))
 6 |         self.gq=np.array([[-1., -1.],
 7 |                           [ 1.,  1.],
 8 |                           [-1., -1.],
 9 |                           [ 1.,  0.],
10 |                           [ 1.,  0.],
11 |                           [-1.,  0.],
12 |                           [ 0., -1.],
13 |                           [ 0.,  1.],
14 |                           [ 0.,  1.],
15 |                           [ 0., -2.],
16 |                           [-2., -2.],
17 |                           [ 2.,  0.],
18 |                           [ 0.,  2.],
19 |                           [ 2.,  2.],
20 |                           [ 2.,  0.],
21 |                           [ 0.,  2.],
22 |                           [-2., -2.],
23 |                           [-2.,  0.]])
24 | 
25 | 
26 | 
27 |     def op(self,x,y,m):
28 |         self.ji[0,0]=-y[0] + y[2]
29 |         self.ji[0,1]= y[0] - y[1]
30 |         self.ji[1,0]= x[0] - x[2]
31 |         self.ji[1,1]=-x[0] + x[1]
32 | 
33 |         c1=self.ji[0,0]
34 |         c2= self.ji[0,1]
35 |         self.grads[:,0] = c1*self.gq[:,0] + c2*self.gq[:,1]
36 |         c1=self.ji[1,0]
37 |         c2= self.ji[1,1]
38 |         self.grads[:,1] = c1*self.gq[:,0] + c2*self.gq[:,1]
39 |         
40 |         det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2]
41 |         dv=1.0/(6.0*det)
42 |         ii=0
43 |         for i in range(0,6):
44 |             i3=3*i
45 |             for j in range(0,i+1):
46 |                 j3=3*j
47 |                 m[ii]=dv*(self.grads[i3:i3+3,0].dot(self.grads[j3:j3+3,0])
48 |                           +self.grads[i3:i3+3,1].dot(self.grads[j3:j3+3,1])
49 |                           )
50 |                 ii+=1
51 | 


--------------------------------------------------------------------------------
/FeStiff/Py/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from Stiffness import *
 4 | from RandomTriangle import *
 5 | import time
 6 | import socket
 7 | import cProfile
 8 | 
 9 | ntri=10**7
10 | x=np.empty(3)
11 | y=np.empty(3)
12 | mat=np.empty(21)
13 | S=Stiffness()
14 | 
15 | print("\nVerify that, on the reference element, we are coherent with sage")
16 | print("(see ../sage/):\n")
17 | x[0]=0.0; x[1]=1.0; x[2]=0.0;
18 | y[0]=0.0; y[1]=0.0; y[2]=1.0;
19 | S.op(x,y,mat)
20 | 
21 | 
22 | for i in range(0,6):
23 |     print([mat[i*(i+1)//2+j] for j in range(0,i+1)])
24 | print("\nWe must get the same result if we dilate the triangle:\n")
25 | for i in range(0,3):
26 |     x[i]*=2.
27 |     y[i]*=2
28 | S.op(x,y,mat)
29 | for i in range(0,6):
30 |     print([mat[i*(i+1)//2+j] for j in range(0,i+1)])
31 | 
32 | print("\nNow, start the benchmark:")
33 | ntri=1000000
34 | print(ntri," triangles.")
35 | R = rando()
36 | t1 = time.time()
37 | for tr in range(0,ntri):
38 |     RandomTriangle(R,x,y)
39 |     S.op(x,y,mat)
40 | t=(time.time()-t1)
41 | print("first phase: ",t," seconds.")
42 | t1 = time.time()
43 | for tr in range(0,ntri):
44 |     RandomTriangle(R,x,y)
45 | tr=(time.time()-t1)
46 | print("second phase: ",tr," seconds.")
47 | 
48 | t-=tr
49 | print("Total time: ",t," seconds.")
50 | print("Time by triangle:", "{:.5e}".format(t/ntri),"second.")
51 | f=open("RunningOn"+socket.gethostname(),"w")   
52 | f.write(str(t/ntri)+"\n")
53 | f.close()
54 | print("fin")
55 | 


--------------------------------------------------------------------------------
/FeStiff/Py/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/FeStiff/Pythran/README.md:
--------------------------------------------------------------------------------
 1 | Compile what should be  just _pythranized_ and run; just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | 
 6 | !Z! if you are using OpenBlas, you probably must create  a .pythranrc in your home directory like this:
 7 | 
 8 | ```
 9 | [compiler]
10 | blas=openblas
11 | ```


--------------------------------------------------------------------------------
/FeStiff/Pythran/RandomTriangle.py:
--------------------------------------------------------------------------------
1 | def RandomTriangle(R,x,y):
2 |     # domain is [0,10]x[0,10]
3 |     for i in range(0,3):
4 |         x[i]=R.fv(10.)
5 |     for i in range(0,3):
6 |         y[i]=R.fv(10.)
7 | 


--------------------------------------------------------------------------------
/FeStiff/Pythran/StiffOut.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #pythran export StiffOut(float[:],float[:],float[:],float[:,:],float[:,:],float[:,:])
 4 | def StiffOut(x,y,m,ji,grads,gq):
 5 |         a11=-y[0] + y[2]
 6 |         a12= y[0] - y[1]
 7 |         a21= x[0] - x[2]
 8 |         a22=-x[0] + x[1]
 9 |         
10 |         for i in range(0,18):
11 |                 grads[i,0] = a11*gq[i,0] + a12*gq[i,1]
12 |                 grads[i,1] = a21*gq[i,0] + a22*gq[i,1]
13 | 
14 |         # this seems slower:
15 |         # grads[:,0] = a11*gq[:,0] + a12*gq[:,1]
16 |         # grads[:,1] = a21*gq[:,0] + a22*gq[:,1]
17 |         
18 |         det= -(x[1] - x[2])*y[0] + (x[0] - x[2])*y[1] - (x[0] - x[1])*y[2]
19 |         dv=1.0/(6.0*det)
20 |         ii=0
21 |         # in the following lines, if we replace 3*i by i3 => cannot compile
22 |         # same when replacing 3*j by j3
23 |         for i in range(0,6):
24 |             for j in range(0,i+1):
25 |                  m[ii]=dv*(grads[3*i:3*i+3,0].dot(grads[3*j:3*j+3,0])
26 |                       +grads[3*i:3*i+3,1].dot(grads[3*j:3*j+3,1]) )
27 |                  ii+=1
28 | 


--------------------------------------------------------------------------------
/FeStiff/Pythran/Stiffness.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #from StiffOut import *
 3 | import StiffOut as Sout
 4 | class Stiffness:
 5 |     def __init__(self):
 6 |         self.ji=np.empty((2,2))
 7 |         self.grads=np.empty((18,2))
 8 |         self.gq=np.array([[-1., -1.],
 9 |                           [ 1.,  1.],
10 |                           [-1., -1.],
11 |                           [ 1.,  0.],
12 |                           [ 1.,  0.],
13 |                           [-1.,  0.],
14 |                           [ 0., -1.],
15 |                           [ 0.,  1.],
16 |                           [ 0.,  1.],
17 |                           [ 0., -2.],
18 |                           [-2., -2.],
19 |                           [ 2.,  0.],
20 |                           [ 0.,  2.],
21 |                           [ 2.,  2.],
22 |                           [ 2.,  0.],
23 |                           [ 0.,  2.],
24 |                           [-2., -2.],
25 |                           [-2.,  0.]])
26 | 
27 | 
28 |     def op(self,x,y,m):
29 |         Sout.StiffOut(x,y,m,self.ji,self.grads,self.gq)
30 | 


--------------------------------------------------------------------------------
/FeStiff/Pythran/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from Stiffness import *
 4 | from RandomTriangle import *
 5 | import time
 6 | import socket
 7 | 
 8 | 
 9 | ntri=10**7
10 | x=np.empty(3)
11 | y=np.empty(3)
12 | mat=np.empty(21)
13 | S=Stiffness()
14 | 
15 | print("\nVerify that, on the reference element, we are coherent with sage")
16 | print("(see ../sage/):\n")
17 | x[0]=0.0; x[1]=1.0; x[2]=0.0;
18 | y[0]=0.0; y[1]=0.0; y[2]=1.0;
19 | S.op(x,y,mat)
20 | 
21 | 
22 | for i in range(0,6):
23 |     print([mat[i*(i+1)//2+j] for j in range(0,i+1)])
24 | print("\nWe must get the same result if we dilate the triangle:\n")
25 | for i in range(0,3):
26 |     x[i]*=2.
27 |     y[i]*=2
28 | S.op(x,y,mat)
29 | for i in range(0,6):
30 |     print([mat[i*(i+1)//2+j] for j in range(0,i+1)])
31 | 
32 | print("\nNow, start the benchmark:")
33 | ntri=1000000
34 | print(ntri," triangles.")
35 | R = rando()
36 | t1 = time.time()
37 | for tr in range(0,ntri):
38 |     RandomTriangle(R,x,y)
39 |     S.op(x,y,mat)
40 | t=(time.time()-t1)
41 | print("first phase: ",t," seconds.")
42 | t1 = time.time()
43 | for tr in range(0,ntri):
44 |     RandomTriangle(R,x,y)
45 | tr=(time.time()-t1)
46 | print("second phase: ",tr," seconds.")
47 | 
48 | t-=tr
49 | print("Total time: ",t," seconds.")
50 | print("Time by triangle:", "{:.5e}".format(t/ntri),"second.")
51 | f=open("RunningOn"+socket.gethostname(),"w")   
52 | f.write(str(t/ntri)+"\n")
53 | f.close()
54 | print("fin")
55 | 


--------------------------------------------------------------------------------
/FeStiff/Pythran/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/FeStiff/Pythran/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pythran -march=native -O3 -DNDEBUG  StiffOut.py
3 | echo "run test:"
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/FeStiff/README.md:
--------------------------------------------------------------------------------
 1 | ### Description:
 2 | 
 3 | These programs compute the elementary stiffness matrix for the Poisson
 4 | equation on a collection of  randomly chosen triangles, using 
 5 | 2-dimensional  P2 Lagrange finite elements (every book on finite elements
 6 | describes this).
 7 | 
 8 | 
 9 | ### Motivation:
10 | 
11 | This computation is simple, but representative of many numerical
12 | computations. It is auto-contained;  there is no linear algebra and thus
13 | no need for external libraries.
14 | 
15 | ### Author:
16 | 
17 | Thierry Dumont   tdumont@math.univ-lyon1.fr
18 | 
19 | ### The directories contain:
20 | 
21 | - **C++**: computation in C++.
22 | 
23 | - **Py**:  Python + numpy computation.
24 | 
25 | - **Pythran**:  Python + numpy + Pythran  computation.
26 | 
27 | - **Numba**: Python + numpy + Numba  computation.
28 | 
29 | - **Ju**: Julia computation.
30 | 
31 | - **sage**: _SageMath_ material (see below).
32 | ### Running the benchmarks
33 | 
34 | cd successively in C++, Py, Pythran, Numba, Ju; then look at the documentation.
35 | 
36 | Once you have run the benchmark in **all** directories (except sage,
37 | which is not for benchmarking!), go to Results/
38 | and look at the documentation to know how to exploit the results.
39 | 
40 | #### Sage material:
41 | 
42 | In directory "sage": how to use a (free!) computer algebra system to
43 |  help compute and implement finite elements. 
44 | 
45 | ### Results:
46 | Have a look at the  [this page in the Wiki](https://github.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/wiki/5-The-FeStiff-benchmark).
47 | 


--------------------------------------------------------------------------------
/FeStiff/Results/Look.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | #comparison between c++ and other computations
 4 | #
 5 | import socket
 6 | from pathlib import Path
 7 | 
 8 | def parsit(D,name,l):
 9 |     #
10 |     D[name]=float(l.replace("\n",""))
11 | # directories to explore ---------   
12 | files=[
13 |     "Py",
14 |     "Ju",
15 |     "Pythran",
16 |     "Numba",
17 | ]
18 | cpp="C++"
19 | #-------------------------------------------
20 | # build a dict  n-> computing time for  C++
21 | C={}
22 | with open("../"+cpp+"/RunningOn"+socket.gethostname(), 'r') as file:
23 |     for line in file:
24 |         C[cpp]=float(line.replace("\n",""))
25 | for n in files:
26 |     filename= "../"+n+"/RunningOn"+socket.gethostname()
27 |     p_file = Path(filename)
28 |     if p_file.is_file():
29 |         with open(filename,"r") as file:
30 |             for line in file:
31 |                 C[n]=float(line.replace("\n",""))
32 |     else:
33 |         print("\n\nFile "+filename+ " does not exists !")
34 |         print("did you run test in "+n+" ?\n\n")
35 |         
36 | Ts=sorted([(n,C[n]/C[cpp]) for n in C],key=lambda x: x[1])
37 | 
38 | print("\nComputing time / Computing time in C++:\n")
39 | for s in Ts:
40 |     if s[0] != "C++":
41 |         print("* ",s[0].ljust(7)," : ",str(s[1])[0:5])
42 | print("\n")
43 | 


--------------------------------------------------------------------------------
/FeStiff/Results/README.md:
--------------------------------------------------------------------------------
 1 | Just run:
 2 | 
 3 | ```
 4 | ./Look.py
 5 | ```
 6 | This will give you the computing time divided by C++ computing time.
 7 | 
 8 | Before, you must have run the benchmark in all directories
 9 | (C++,Ju,Py,Pythran and Numba). 
10 | 


--------------------------------------------------------------------------------
/FeStiff/runAllTests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #  This script is supposed to run *all* the test, and then compute
 4 | #  the final "report" in Results/
 5 | #  Not sure it works everywhere. If it does not, improve it, or enter each
 6 | #  directory and look at README.md to know what to do. 
 7 | #
 8 | echo "Test: C++"
 9 | echo "--- "
10 | (cd C++; mkdir -p Build; cd Build; cmake ..; make; ./run)
11 | 
12 | for i in Ju Numba  Pythran ; do
13 |     echo  "Test: "$i
14 |     echo "--- " 
15 |     (cd $i; ./script)
16 | done
17 | 
18 | echo "Test: Py"
19 | echo "--- "
20 | (cd Py; python3 main.py)
21 | 
22 | 
23 | echo " "
24 | echo "Make the report:"
25 | (cd Results; ./Look.py)
26 | echo " "
27 | echo "To replay the report, cd Results/ and run ./Look.py "
28 | echo " "
29 | 


--------------------------------------------------------------------------------
/FeStiff/sage/README.md:
--------------------------------------------------------------------------------
 1 | #### This small SageMath script is supposed to show how computer algebra can help implementing finite elements (this a very simple example!). 
 2 | 
 3 | You need [SageMath](http://www.sagemath.org/) to run this snippet,
 4 | 
 5 | 
 6 | * **Stiff.ipynb:**  is a SageMath _notebook_. To launch it:
 7 | ```
 8 | >sage -n jupyter
 9 | ```
10 | and let you conduct by Jupyter and SageMath. 
11 | 
12 | Note that if you click on Stiff.ipynb here,
13 | Github will display the results (as stored in the notebook).
14 | 
15 | 
16 | ##### Note: ####
17 | If you want to learn _SageMath_, you can read the book _Mathematical Computation
18 | with Sage_ (which now is available in French, English and German), and
19 | for which freely available pdf files are [downloadable 
20 | here](https://members.loria.fr/PZimmermann/sagebook/english.html) and [there](http://sagebook.gforge.inria.fr/).
21 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/.gitignore:
--------------------------------------------------------------------------------
 1 | *.log
 2 | _*_.tex
 3 | mr2014.pdf
 4 | *.aux
 5 | toto*
 6 | *out
 7 | *.bbl
 8 | *.blg
 9 | Build*
10 | Cfile*
11 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/Doc/doxygen_sqlite3.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/C++/ARRAY/Doc/doxygen_sqlite3.db


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/README:
--------------------------------------------------------------------------------
 1 | include/ :the library.
 2 | try/     :a test suite.
 3 | 
 4 | -> to generate the documentation: 
 5 |   doxygen Doxyfile
 6 | 
 7 | -> to compile and run the test suite:
 8 |    cd try
 9 |    cd Build
10 |    cmake ../CMakeLists.txt
11 |    ./run
12 | 
13 |    have a look at CMakeLists.txt before !
14 | 
15 | bugs and remarks: tdumont@math.univ-lyon1.fr
16 |    
17 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/ApplyFonc.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ApplyFonc__h
 2 | #define ApplyFonc__h
 3 | //! apply a function F (of one variable) to the whole Array.
 4 | template<T F(T)> void applyFonc()
 5 | {
 6 |   for(int i=0;i<xsize;i++)
 7 |     x[i]=F(x[i]);
 8 | }
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Array_Array_Operators.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Array_Array_Operators__h
 2 | #define Array_Array_Operators__h
 3 | //! Add an array with the same pattern
 4 | //! \param X
 5 | //! \note if there is no conversion from Q to T => error at compile time.
 6 | template<class Q> inline void operator+=(Array<Q,n,Engine>& X)
 7 | {
 8 | #ifdef DEBUG
 9 |   if(!checkSamePattern(X))
10 |     throw ArrayException("Array::operator+= : patterns differ");
11 | #endif
12 |   const int temp=xsize;
13 |   if( temp!=X.size() )
14 |     throw ArrayException("Array += : note the same size");
15 |   Q* Xx=X.data();
16 | #ifdef ICC
17 | #pragma ivdep
18 | #endif
19 |   for(int i=0;i<temp;i++) x[i]+=Xx[i];
20 | }
21 | //! Substract an array with the same pattern
22 | //! \param X
23 | //! \note if there is no conversion from Q to T => error at compile time.
24 | template<class Q> inline void operator-=(Array<Q,n,Engine>& X)
25 | {
26 | #ifdef DEBUG
27 |   if(!checkSamePattern(X))
28 |     throw ArrayException("Array::operator-= : patterns differ");
29 | #endif
30 |   if( xsize!=X.size() )
31 |     throw ArrayException("Array -= : note the same size");
32 |   Q* Xx=X.data();
33 |   for(int i=0;i<xsize;i++) x[i]-=Xx[i];
34 | }
35 | #endif
36 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Array_Constructors_RangeBased.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Array_Constructors_RangeBased__h
 2 | #define Array_Constructors_RangeBased__h
 3 | private:
 4 | //! constructor based on an array of Range
 5 | //! \note x array is created or not, depending on _deletable.
 6 | //! \note this is for internal use, to build references to sub-arrays.
 7 | //! \param R
 8 | //! \param _deletable
 9 | Array(Range R[],bool _deletable=true)
10 | {
11 |   engine.init(R,n);
12 |   xsize=engine.size();
13 |   deletable=_deletable;
14 |   if(_deletable)
15 |     x=new T[xsize];
16 | }
17 | public:
18 | //! constructor for 1d Array, based on a range.
19 | //! \param R0 range.
20 | Array(Range R0)
21 | {
22 |   if(n!=1) 
23 |     throw ArrayException("Array constructor incompatible with dimension=",n);
24 |   engine.init(R0);
25 |   xsize=engine.size();
26 |   x=new T[xsize];
27 | }
28 | //! constructor for 2d Array, based on ranges.
29 | //! \param R0 range.
30 | //! \param R1 range
31 | Array(Range R0,Range R1)
32 | {
33 |   if(n!=2) 
34 |     throw ArrayException("Array constructor incompatible with dimension=",n);
35 |   engine.init(R0,R1);
36 |   xsize=engine.size();
37 |   x=new T[xsize];
38 | }
39 | //! constructor for 3d Array, based on ranges.
40 | //! \param R0 range.
41 | //! \param R1 range
42 | //! \param R2 range
43 | Array(Range R0,Range R1,Range R2)
44 | {
45 |   if(n!=3) 
46 |     throw ArrayException("Array constructor incompatible with dimension=",n);
47 |   xsize=(R0.end-R0.begin)*(R1.end-R1.begin)*(R2.end-R2.begin);
48 |   engine.init(R0,R1,R2);
49 |   xsize=engine.size();
50 |   x=new T[xsize];
51 | }
52 | //! constructor for 4d Array, based on ranges.
53 | //! \param R0 range.
54 | //! \param R1 range
55 | //! \param R2 range
56 | //! \param R3 range
57 | Array(Range R0,Range R1,Range R2,Range R3)
58 | {
59 |   if(n!=4) 
60 |     throw ArrayException("Array constructor incompatible with dimension=",n);
61 |   xsize=(R0.end-R0.begin)*(R1.end-R1.begin)*(R2.end-R2.begin)*
62 |     (R3.end-R3.begin);
63 |   engine.init(R0,R1,R2,R3);
64 |   xsize=engine.size();
65 |   x=new T[xsize];
66 | }
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Array_Output.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Array_Output__h
 2 | #define Array_Output__h
 3 | #include "Array.hpp"
 4 | #include "ArrayException.hpp"
 5 | #include <iostream>
 6 | namespace  Arrays{ 
 7 |   ////////////////////////////////////////////////////////////////////
 8 |   /// Overload operator <<.
 9 |   ///
10 |   /// This is restricted to Arrays with n<=2 indices!
11 |   ///////////////////////////////////////////////////////////////////
12 |   //! \param out the stream.
13 |   //! \param A the array.
14 |   template<class T,int n,class Engine>  std::ostream& 
15 |              operator<<(std::ostream& out,const Array<T,n,Engine> &A)
16 |   {
17 |     if(n>2)
18 |       out<<endl<<"Arrays::Array, with n= "<<n<<
19 | 	" no operator<< for n>2"<<endl;
20 |     else
21 |       if(n==1)
22 | 	for(int i=A.mins(0);i<A.maxs(0);i++)
23 | 	  out<<A(i)<<" ";
24 |       else //n==2.
25 | 	for(int i=A.mins(0);i<A.maxs(0);i++)
26 | 	  {
27 | 	    for(int j=A.mins(1);j<A.maxs(1);j++)
28 | 	      out<<A(i,j)<<" ";
29 | 	    out<<endl<<endl;;
30 | 	  }
31 | 
32 |     return out;
33 |   }
34 | };
35 | #endif
36 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Array_Scalar_Operators.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Array_Scalar_Operators__h
 2 | #define Array_Scalar_Operators__h
 3 | //! Make all the components equal the same value
 4 | //! \param value
 5 | inline void operator=(T value)
 6 | {
 7 |   for(int i=0;i<xsize;i++) 
 8 |     x[i]=value;
 9 | }
10 | //! Add the same value to all components
11 | //! \param value
12 | inline void operator+=(T value)
13 | {
14 |   for(int i=0;i<xsize;i++) x[i]+=value;
15 | }
16 | //! Substract the same value to all components
17 | //! \param value
18 | inline void operator-=(T value)
19 | {
20 |   for(int i=0;i<xsize;i++) x[i]-=value;
21 | }
22 | //! Multiply all components by the same value
23 | //! \param value
24 | inline void operator*=(T value)
25 | {
26 |   for(int i=0;i<xsize;i++) x[i]*=value;
27 | }
28 | //! Divide all components by the same value
29 | //! \param value
30 | inline void operator/=(T value)
31 | {
32 |   for(int i=0;i<xsize;i++) x[i]/=value;
33 | }
34 | #endif
35 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Array_iterators.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Array_iterators__h
 2 | #define Array_iterators__h
 3 | ////////////////////////////////////////////////////////////////////////
 4 | /// Iterator class for the Array class
 5 | ///   should be compatible with stl iterators.
 6 | /// \brief iterator
 7 | ///////////////////////////////////////////////////////////////////////
 8 | #include <iterator>
 9 | template<class T> class ArrayIterator : 
10 |   public std::iterator<std::input_iterator_tag, T>
11 | {
12 |   T* p;
13 | public:
14 |   //! constructor
15 |   //! \param x C vector of an Array.
16 |   ArrayIterator(T* x) :p(x) {}
17 |   //! copy constructor.
18 |   //! \param mit
19 |   ArrayIterator(const ArrayIterator& mit) :  p(mit.p) {}
20 |   //! pre-instantiation (++I)
21 |   ArrayIterator& operator++() {++p;return *this;}
22 |   //! post-instantiation (I++)
23 |   ArrayIterator operator++(int) {ArrayIterator tmp(*this); 
24 |     ++p; return tmp;}
25 |   //! equality test
26 |   //! \param rhs test *this with rhs.
27 |   bool operator==(const ArrayIterator& rhs) {return p==rhs.p;}
28 |   //! non equality test
29 |   //! \param rhs test *this with rhs.
30 |   bool operator!=(const ArrayIterator& rhs) {return p!=rhs.p;}
31 |   //! dereferentiation.
32 |   T& operator*() {return *p;}
33 |   //! return the pointer.
34 |   T* Tpointer() {return p;}
35 | };
36 | #endif
37 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Array_resize.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Array_resize__h
 2 | #define Array_resize__h
 3 | private:
 4 | //! Really resize.
 5 | //! \param  the_size
 6 | inline void really_resize(int the_size)
 7 | {
 8 |   if(x!=NULL) delete[] x;
 9 |   x=new T[xsize];
10 |   deletable=true;
11 | }
12 | public:
13 | //! Resize a 1d Array using a Range.
14 | //! \param  R0
15 | void resize(Range R0)
16 | {
17 |   if(n!=1) 
18 |     throw ArrayException("Array: resize incompatible with dimension=",n);
19 |   engine.init(R0);
20 |   xsize=engine.size();
21 |   really_resize(xsize);
22 | 
23 | }
24 | //! Resize a 2d Array using Ranges.
25 | //! \param  R0
26 | //! \param R1
27 | void resize(Range R0,Range R1)
28 | {
29 |   if(n!=2) 
30 |     throw ArrayException("Array: resize incompatible with dimension=",n);
31 |   engine.init(R0,R1);
32 |   xsize=engine.size();
33 |   really_resize(xsize);
34 | }
35 | //! Resize a 3d Array using Ranges.
36 | //! \param  R0
37 | //! \param  R1
38 | //! \param  R2
39 | void resize(Range R0,Range R1,Range R2)
40 | {
41 |   if(n!=3) 
42 |     throw ArrayException("Array: resize incompatible with dimension=",n);
43 |   engine.init(R0,R1,R2);
44 |   xsize=engine.size();
45 |   really_resize(xsize);
46 | }
47 | //! Resize a 3d Array using Ranges.
48 | //! \param  R0
49 | //! \param  R1
50 | //! \param  R2
51 | //! \param  R3
52 | void resize(Range R0,Range R1,Range R2,Range R3)
53 | {
54 |   if(n!=4) 
55 |     throw ArrayException("Array: resize incompatible with dimension=",n);
56 |   engine.init(R0,R1,R2,R3);
57 |   xsize=engine.size();
58 |   really_resize(xsize);
59 | }
60 | #endif
61 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/MacroRestrict.hpp:
--------------------------------------------------------------------------------
 1 | // define the "restrict" keyword for different compilers.
 2 | // If the macro is not defined for a given compiler, a compilation error
 3 | // will occur at compile time.
 4 | #ifdef GCC
 5 | // gcc:
 6 | #define Restrict __restrict__
 7 | #endif
 8 | #ifdef ICC
 9 | // Intel compiler:
10 | #define Restrict restrict
11 | #endif
12 | #ifdef CLANG
13 | // Clang compiler.
14 | #define Restrict restrict
15 | #endif
16 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Range.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Range__h
 2 | #define Range__h
 3 | namespace Arrays {
 4 |   ////////////////////////////////////////////////////////////////
 5 |   /// This classe is a model of integer intervals.
 6 |   ///
 7 |   ///\brief class of integer intervals.
 8 |   ///////////////////////////////////////////////////////////////
 9 | struct Range
10 | {
11 |   int begin,end;
12 |   bool one;// this means: created with one argument.
13 | public:
14 |   //! constructor
15 |   //! \note should not be used by end user!
16 |   Range()
17 |   {
18 |   }
19 |   //! constructor
20 |   //! \param i
21 |   //! \param j
22 |   Range(int i,int j): begin(i),end(j),one(false){}
23 |   //! constructor; creates [0,i[
24 |   //! \param i
25 |   Range(int i): begin(0),end(i),one(true){}
26 |   //! copy constructor
27 |   //! \param R
28 |   Range(const Range& R)
29 |   {
30 |     begin=R.begin; end=R.end; 
31 |     one=R.one;
32 |   }
33 |   //! operator =
34 |   //! \param R
35 |   void operator=(const Range& R)
36 |   {
37 |     begin=R.begin; end=R.end; 
38 |     one=R.one;    
39 |   }
40 |   //! destructor.
41 |   ~Range(){}
42 | };
43 | }
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/include/Slices.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef Slices__h
 2 | #define Slices__h
 3 | #include <utility>
 4 | //! return a "natural" slice at line i (C like Array) or
 5 | //! at column i (Fortran like Array).
 6 | //! \note A "REFERENCE" is returned! thre is *no* copy of
 7 | //! the array of data; so the returned Array is flagged "not deletable"
 8 | //! and will actually not delete his datas when the destructor is called.
 9 | Array<T,n-1,Engine> slice_ref(int i)
10 | {
11 | #ifdef DEBUG
12 |   pair<int,int> lims=engine.sliceLimits();
13 |   if(i<lims.first||i>lims.second)
14 |     throw ArrayException("Array::slice_ref, i=",i,"must be in [",lims.first,
15 | 			 lims.second,"[");
16 | #endif
17 |   Range R[n-1];
18 |   pair<int,int> p=engine.sliceRangesIndexes();
19 |   for(int j=p.first;j<p.second;j++) R[j-p.first]=engine.my_range(j);
20 |   Array<T,n-1,Engine> A(R,false);
21 |   A.x=x+engine.slice(i);
22 |   return A;
23 | }
24 | #endif
25 | 


--------------------------------------------------------------------------------
/Gaussian/C++/ARRAY/try/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(NewRadau5)
 3 | enable_language(CXX Fortran)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | # If you use g++, consider this line: set(CMAKE_CXX_COMPILER "g++-4.8")
13 | # (may be you can comment it).
14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
15 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
16 | 
17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$")
18 |   set (USING_GNU TRUE)
19 |   set(CMAKE_CXX_COMPILER "clang++")
20 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3  -DCLANG  -Wall -std=c++11")
21 | else ()
22 |   set (USING_GNU TRUE)
23 |   #set(CMAKE_CXX_COMPILER "g++-4.8")
24 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC -DALIGN_64 -ftree-vectorizer-verbose=2 -std=c++11 -march=native")
25 | 
26 | endif ()
27 | include_directories(
28 | ${CMAKE_SOURCE_DIR}/../include
29 | ${CMAKE_SOURCE_DIR}/../../common/include
30 | )
31 | add_executable(
32 |   run
33 |   ../main.cpp
34 |   )
35 | 
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/Gaussian/C++/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(NewRadau5)
 3 | enable_language(CXX Fortran)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | 
13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
14 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
15 | 
16 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$")
17 |   set (USING_GNU TRUE)
18 |   set(CMAKE_CXX_COMPILER "clang++")
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3  -DCLANG  -Wall -std=c++14")
20 | else ()
21 |   set (USING_GNU TRUE)
22 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC -DALIGN_64 -ftree-vectorizer-verbose=2 -std=c++14 -march=native")
23 | 
24 | endif ()
25 | include_directories(
26 | ${CMAKE_SOURCE_DIR}/ARRAY/include
27 | ) 
28 | add_executable(
29 |   run
30 |   ../main.cpp
31 |   )
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/Gaussian/C++/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Compilation:
 3 | ```
 4 | mkdir Build
 5 | cd Build
 6 | cmake ..
 7 | make
 8 | ```
 9 | a file "run" is created
10 | 
11 | ### Run the code:
12 | 
13 | from Build/ directory, type:
14 | ```
15 | ./run
16 | ```
17 | 
18 | 


--------------------------------------------------------------------------------
/Gaussian/C++/RandomFeedMatrix.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "rando.hpp"
 3 | #include "Array.hpp"
 4 | using namespace Arrays;
 5 | void RandomFeedMatrix(Array<double,2>& M,rando& R)
 6 | {
 7 |     int n=M.maxs(0), m=M.maxs(1);
 8 |     for(int i=0;i<n;i++)
 9 |         for(int j=0;j<m;j++)
10 |             M(i,j)= R.fv();
11 | }
12 | 


--------------------------------------------------------------------------------
/Gaussian/C++/factorMatrix.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "Array.hpp"
 3 | #include <cmath>
 4 | #include <iostream>
 5 | using namespace Arrays;
 6 | void factorMatrix(Array<double,2>& M)
 7 | {
 8 |     const int n=M.maxs(0),m=M.maxs(1);
 9 |     for(int line=0;line<n-1;line++)
10 |     {
11 |         //find larger coefficient (absolute value) in remaining line-th column:
12 |         int cmax=line;
13 |         double vmax= std::abs(M(line,line));
14 |         for(int i=line+1;i<n;i++)
15 |         {
16 |             if(abs(M(i,line)) > vmax)
17 |             {
18 |                 vmax= abs(M(i,line));
19 |                 cmax= i;
20 |             }
21 |         }
22 |         if(cmax != line)
23 |         {
24 |             //permutate:
25 |             for(int j=line;j<m;j++)
26 |             {
27 |                 double t= M(line,j);
28 |                 M(line,j)= M(cmax,j);
29 |                 M(cmax,j)= t;
30 |             }
31 |         }
32 |         //eliminate
33 |         double pivot=M(line,line);
34 |         for(int j=line+1;j<n;j++)
35 |         {
36 |             double v= M(j,line)/pivot;
37 |             for(int k=line;k<m;k++)
38 |                 M(j,k)-= v*M(line,k);
39 |         }
40 | 
41 |     }
42 | }
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/Gaussian/C++/rando.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | class rando{
 3 |     const long int a,c,m;
 4 |     long int  seed;
 5 |     public:
 6 |     rando():a(1103515245),c(12345),m(4294967296)
 7 |     {
 8 |         seed=123456789;
 9 |     }
10 |     long int get(){
11 |         seed= (a * seed + c) % m;
12 |         return seed;
13 |     }
14 |     double fv(double vmax=1.)
15 |     {
16 |         return vmax*(double) get()/m;
17 |     }
18 | };
19 | 
20 | 


--------------------------------------------------------------------------------
/Gaussian/C++Lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(NewRadau5)
 3 | enable_language(CXX Fortran)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | 
13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
14 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
15 | 
16 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$")
17 |   set (USING_GNU TRUE)
18 |   set(CMAKE_CXX_COMPILER "clang++")
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3  -DCLANG  -Wall -std=c++11")
20 | else ()
21 |   set (USING_GNU TRUE)
22 |   #set(CMAKE_CXX_COMPILER "g++-4.8")
23 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC -DALIGN_64 -ftree-vectorizer-verbose=2 -std=c++11 -march=native")
24 | 
25 | endif ()
26 | find_package(LAPACK)
27 | include_directories(
28 | ${CMAKE_SOURCE_DIR}/../C++/ARRAY/include
29 | ) 
30 | add_executable(
31 |   run
32 |   ../main.cpp
33 |   )
34 | target_link_libraries(
35 | 	run
36 | 	${LAPACK_LIBRARIES}
37 | 
38 | )
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/Gaussian/C++Lib/README.md:
--------------------------------------------------------------------------------
 1 | You need cmake.
 2 | 
 3 | ### Compilation:
 4 | ```
 5 | mkdir Build
 6 | cd Build
 7 | cmake ..
 8 | make
 9 | ```
10 | a file "run" is created
11 | 
12 | ### Run the code:
13 | from Build/ directory, type:
14 | ```
15 | export OPENBLAS_NUM_THREADS=1
16 | run
17 | ```
18 | (as Julia and Python run openblas with one thread). 
19 | 
20 | By default, we use g++. Tou can change the compiler to use, for example clang++.
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/Gaussian/C++Lib/RandomFeedMatrix.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "rando.hpp"
 3 | #include "Array.hpp"
 4 | using namespace Arrays;
 5 | void RandomFeedMatrix(Array<double,2>& M,rando& R)
 6 | {
 7 |   int n=M.maxs(0), m=M.maxs(1);
 8 |   for(int i=0;i<n;i++)
 9 |     for(int j=0;j<m;j++)
10 |       M(i,j)= R.fv();
11 | }
12 | 


--------------------------------------------------------------------------------
/Gaussian/C++Lib/factorMatrix.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "Array.hpp"
 3 | #include <cmath>
 4 | #include <iostream>
 5 | using namespace Arrays;
 6 | void factorMatrix(Array<double,2> M)
 7 | {
 8 |   const int n=M.maxs(0),m=M.maxs(1);
 9 |   for(int line=0;line<n;line++)
10 |     {
11 |       //find larger coefficient (absolute value) in remaining line-th column:
12 |       int cmax=line;
13 |       double vmax= M(line,line);
14 |       for(int i=line+1;i<n;i++)
15 | 	{
16 | 	  if(abs(M(i,line)) > vmax)
17 | 	    {
18 | 	      vmax= abs(M(i,line));
19 | 	      cmax= i;
20 | 	    }
21 | 	}
22 |       if(cmax != line)
23 | 	{
24 | 	  //permutate:
25 | 	  for(int j=line;j<m;j++)
26 | 	    {
27 | 	      double t= M(line,j);
28 | 	      M(line,j)= M(cmax,j);
29 | 	      M(cmax,j)= t;
30 | 	    }
31 | 	}
32 | 	  //eliminate
33 |       double pivot=M(line,line);
34 |       for(int j=line+1;j<n;j++)
35 | 	{
36 | 	  double v= M(j,line)/pivot;
37 | 	  for(int k=line;k<m;k++)
38 | 	    M(j,k)-= v*M(line,k);
39 | 	}
40 |       
41 |     }
42 | }
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/Gaussian/C++Lib/protos_lapack.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef protos_lapack__h
 2 | #define  protos_lapack__h
 3 | namespace odes
 4 | {
 5 |   //! prototypes for lapack routines.
 6 | extern "C"{
 7 |   void dgetrf_(int *n,int *m,double* a,int *lda,int *ipiv,int *info);
 8 | }
 9 | extern "C"{
10 |   void zgetrf_(int *n,int *m,double* a,int *lda,int *ipiv,int *info);
11 | }
12 | extern "C"{
13 |   void dgetrs_(const char *s,int *N,int *NRHS,double *A,int *LDA,int *IPIV,
14 | 	       double *B,int *LDB,int *INFO );
15 | 
16 | }
17 | extern "C"{
18 |   void zgetrs_(const char *s,int *N,int *NRHS,double *A,int *LDA,int *IPIV,
19 | 	       double *B,int *LDB,int *INFO );
20 | }
21 | extern "C"{
22 |   void dgbtrf_(int *n,int *m,int *k1,int *k2,
23 | 	       double* a,int *lda,int *ipiv,int *info);
24 | }
25 | extern "C"{
26 |   void zgbtrf_(int *n,int *m,int *k1,int *k2,
27 | 	       double* a,int *lda,int *ipiv,int *info);
28 | }
29 | extern "C"{
30 |   void dgbtrs_(const char *s,int *N,int *k1,int *k2,
31 | 	       int *NRHS,double *A,int *LDA,int *IPIV,
32 | 	       double *B,int *LDB,int *INFO );
33 | }
34 | extern "C"{
35 |   void zgbtrs_(const char *s,int *N,int *k1,int *k2,
36 | 	       int *NRHS,double *A,int *LDA,int *IPIV,
37 | 	       double *B,int *LDB,int *INFO );
38 | }
39 | extern "C"{
40 |   void dlarnv_(int *idist,int iseed[],int *n,double *x);
41 | }
42 | extern "C"{
43 |   void dgehrd_(int *n,int *ilo,int *ihi,double *a,int *lda,double tau[],
44 | 	       double work[],int *lwork,int *info);
45 | }
46 | extern "C"{
47 |   void dorghr_(int *n,int *ilo,int *ihi,double *a,int *lda,double tau[],
48 | 	       double work[],int *lwork,int *info);
49 | }
50 |   extern "C"{
51 |     void dgeev_(const char *jobvl,const char *jobvr,int * n,double *a,
52 | 		int *lda,double *WR, double *WI,double *VL,int *LDVL, 
53 | 		double *VR,int  *LDVR,
54 | 		double *WORK,int  *LWORK,int *INFO );
55 | 
56 |   }
57 | }
58 | #endif
59 | 


--------------------------------------------------------------------------------
/Gaussian/C++Lib/rando.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | class rando{
 3 |     const long int a,c,m;
 4 |     long int  seed;
 5 |     public:
 6 |     rando():a(1103515245),c(12345),m(4294967296)
 7 |     {
 8 |         seed=123456789;
 9 |     }
10 |     long int get(){
11 |         seed= (a * seed + c) % m;
12 |         return seed;
13 |     }
14 |     double fv(double vmax=1.)
15 |     {
16 |         return vmax*(double) get()/m;
17 |     }
18 | };
19 | 
20 | 


--------------------------------------------------------------------------------
/Gaussian/Ju/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | ./script
4 | ```
5 | 


--------------------------------------------------------------------------------
/Gaussian/Ju/Rando.jl:
--------------------------------------------------------------------------------
 1 | module Rando
 2 | export RandoData,fv
 3 | #=
 4 | Coefficients for a simple, but reproductible random number generator.
 5 | =#
 6 | mutable struct RandoData
 7 |     seed::Int64
 8 |     a::Int64
 9 |     c::Int64
10 |     m::Int64
11 |     RandoData()=new(123456789,1103515245,12345,4294967296)
12 | end
13 | #=
14 | return a random float in [0,vmax]
15 | =#
16 | function fv!(R::RandoData,vmax=1.)
17 |     R.seed= (R.a * R.seed + R.c) % R.m
18 |     vmax*Float64(R.seed)/R.m
19 | end
20 | end
21 | 


--------------------------------------------------------------------------------
/Gaussian/Ju/main.jl:
--------------------------------------------------------------------------------
 1 | push!(LOAD_PATH, "./")
 2 | using Rando
 3 | #= 
 4 | feed a Matrix with random data
 5 | =#
 6 | function RandomFeedMatrix(M,R::RandoData)
 7 |     n,m=size(M)
 8 |     @inbounds for i = 1:n
 9 |         @inbounds for j = 1:m
10 |             M[i,j]=Rando.fv!(R)
11 |         end
12 |     end
13 | end
14 | 
15 | 
16 | #= factor a matrix od size (n,n+1)
17 |    size is not checked 
18 | =#
19 | function factorMatrix!(M::Array{Float64, 2})
20 |     n, m = size(M)
21 |     for line = 1:n
22 |         # find pivot
23 |          @inbounds(cmax= line-1+findmax(abs.(M[line:n,line]))[2])
24 |         # exchange rows if necessary
25 |         if cmax != line
26 |             @simd for j = line:m
27 |                 @inbounds M[line, j], M[cmax, j] = M[cmax, j], M[line, j]
28 |             end
29 |         end
30 | 
31 |         pivot = M[line, line]
32 |         for k = line+1:m
33 |             @inbounds tmp = M[line, k] / pivot
34 |             @simd for j = line+1:n
35 |                 @inbounds M[j, k] -= M[j, line] * tmp
36 |             end
37 |         end
38 |         
39 |         M[line+1:n, line] .= 0.
40 |     end
41 | 
42 | end
43 | 
44 | 
45 | 
46 | function doall(n::Int64)
47 |     Ro = RandoData()
48 |     M = Array{Float64}(undef,n, n+1)
49 |     RandomFeedMatrix(M, Ro)
50 |     
51 | 
52 |     
53 |     t1 = time_ns()
54 |     factorMatrix!(M)
55 |     t2 = time_ns() - t1
56 | 
57 | end
58 | 
59 | # main starts here.
60 | 
61 | MatrixSize = 2048
62 | liminf = 2
63 | D = Dict{Integer, Real}()
64 | 
65 | # be sure to run once before actually running the benchmark!
66 | T = doall(MatrixSize)
67 | #
68 | 
69 | println("start")
70 | 
71 | #Profile.clear()
72 | 
73 | while MatrixSize > liminf
74 |      T = doall(MatrixSize)
75 |      D[MatrixSize] = 1.e-9 * T
76 |     global MatrixSize = div(MatrixSize,2)
77 | end
78 | 
79 | 
80 | 
81 | # Results:
82 | S = sort(collect(zip(keys(D), values(D))), rev=true)
83 | 
84 | open("RunningOn"*gethostname(),"w") do f
85 |     for k in S
86 |         write(f,string(k[1])," ",string(k[2]),"\n")
87 |     end
88 | end
89 | 
90 | 
91 | #Profile.print()
92 | 


--------------------------------------------------------------------------------
/Gaussian/Ju/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O 3 main.jl


--------------------------------------------------------------------------------
/Gaussian/Ju/script-m:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 --track-allocation=user main.jl
3 | 


--------------------------------------------------------------------------------
/Gaussian/JuLib/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | ./script
4 | ```
5 | 


--------------------------------------------------------------------------------
/Gaussian/JuLib/main.jl:
--------------------------------------------------------------------------------
 1 | using LinearAlgebra
 2 | #=
 3 | Coefficients for a simple, but reproductible random number generator.
 4 | =#
 5 | mutable struct RandoData
 6 |     seed
 7 |     a
 8 |     c
 9 |     m
10 |     RandoData()=new(123456789,1103515245,12345,4294967296)
11 | end
12 | #=
13 | return a random float in[0,vmax]
14 | =#
15 | function fv(R::RandoData,vmax=1.)
16 |     R.seed= (R.a * R.seed + R.c) % R.m
17 |     vmax*convert(Float64,R.seed)/R.m
18 | end
19 | #= 
20 | feed a Matrix with random data
21 | =#
22 | function RandomFeedMatrix(M,R::RandoData)
23 |     n,m=size(M)
24 |     @inbounds for i = 1:n
25 |         @inbounds for j = 1:m
26 |             M[i,j]=fv(R)
27 |         end
28 |     end
29 | end
30 | function doall(n::Int64)
31 |     Ro= RandoData()
32 |     M=Array{Float64}(undef,n,n+1)
33 |     RandomFeedMatrix(M,Ro)
34 |     t1 = time_ns()
35 |     factorize(M)
36 |     time_ns()-t1
37 | end
38 | 
39 | # main starts here.
40 | MatrixSize=2048
41 | liminf=2
42 | D= Dict{Integer,Real}()
43 | println("start")
44 | 
45 | 
46 | while MatrixSize>liminf
47 |     T=doall(MatrixSize)
48 |     println(MatrixSize)
49 |     D[MatrixSize]=T*1.e-9
50 |     global MatrixSize=convert(Int64,MatrixSize/2)
51 | end
52 | #
53 | 
54 | 
55 | # Results:
56 | S=sort(collect(zip(keys(D),values(D))),rev=true)
57 | 
58 | open("RunningOn"*gethostname(),"w") do f
59 |     for k in S
60 |         write(f,string(k[1])," ",string(k[2]),"\n")
61 |     end
62 | end
63 | 
64 | 


--------------------------------------------------------------------------------
/Gaussian/JuLib/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 main.jl
3 | 


--------------------------------------------------------------------------------
/Gaussian/Numba/README.md:
--------------------------------------------------------------------------------
1 | Have a look at file script, to adapt it to your machine (can you use AVX
2 | instructions?).
3 | 
4 | To run the programme, just type:
5 | ```
6 | ./script
7 | ```
8 | 


--------------------------------------------------------------------------------
/Gaussian/Numba/RandomFeedMatrix.py:
--------------------------------------------------------------------------------
1 | def RandomFeedMatrix(M,R):
2 |     # feed the numpy array M with random values.
3 |     n=M.shape[0]
4 |     m=M.shape[1]
5 |     for i in range(0,n):
6 |         for j in range(0,m):
7 |             M[i,j]= R.fv()
8 | 


--------------------------------------------------------------------------------
/Gaussian/Numba/factorMatrix.py:
--------------------------------------------------------------------------------
 1 | import numba
 2 | 
 3 | @numba.guvectorize(["void(float64[:, :])"], "(n, m)", nopython=True, target='parallel')
 4 | def factorMatrix(M):
 5 | # Gaussian elimination, partial pivoting.
 6 | # M must be an (n,n+1) numpy array. Not tested!
 7 |     n = M.shape[0]
 8 |     m = M.shape[1]
 9 |     for line in range(0, n-1):
10 |         # find pivot
11 |         cmax = line
12 |         vmax = abs(M[line, line])
13 |         for i in range(line+1, n):
14 |             if abs(M[i, line]) > vmax:
15 |                 vmax = abs(M[i, line])
16 |                 cmax = i
17 | 
18 |         # exchange rows if necessary
19 |         if cmax != line:
20 |             for j in range(line, m):
21 |                 t = M[line, j]
22 |                 M[line, j] = M[cmax, j]
23 |                 M[cmax, j] = t
24 | 
25 |         # eliminate
26 |         pivot = M[line, line]
27 |         for j in range(line+1, n):
28 |             v = M[j, line] / pivot
29 |             for k in range(line, m):
30 |                 M[j, k]-= v * M[line, k]
31 | 


--------------------------------------------------------------------------------
/Gaussian/Numba/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from factorMatrix import *
 4 | from RandomFeedMatrix import *
 5 | import time
 6 | import socket
 7 | 
 8 | sizeG=2048
 9 | D={}
10 | 
11 | size=sizeG
12 | loop=1
13 | 
14 | # Checking algo
15 | M = np.empty((128, 128+1))
16 | R = rando()
17 | RandomFeedMatrix(M, R)
18 | print("sum(M) = {}".format(np.sum(M)))
19 | factorMatrix(M)
20 | print("sum(Gauss(M)) = {}".format(np.sum(M)))
21 | print("")
22 | 
23 | while size>2:
24 |     R=rando()
25 |     M=np.empty((size,size+1))
26 |     t1 = time.time()
27 |     for iter in range(0,loop):
28 |         RandomFeedMatrix(M,R)
29 |         factorMatrix(M)
30 |     t=(time.time()-t1)/loop
31 |     print("size:",size," t: ",t)
32 |     D[size]=t
33 |     size//=2
34 |     
35 |     loop*=4
36 | 
37 | print("---")
38 | size= sizeG
39 | loop=1
40 | while size>2:
41 |     R=rando()
42 |     M=np.empty((size,size+1))
43 |     t1 = time.time()
44 |     for iter in range(0,loop):
45 |         RandomFeedMatrix(M,R)
46 |     t=(time.time()-t1)/loop
47 |     D[size] -= t 
48 |     size//=2
49 |     loop*=4
50 | 
51 |     f=open("RunningOn"+socket.gethostname(),"w")   
52 |     for x in sorted(D.keys()):
53 |         f.write(str(x)+" "+str(D[x])+"\n")
54 |     f.close()
55 | 


--------------------------------------------------------------------------------
/Gaussian/Numba/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/Gaussian/Numba/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export NUMBA_ENABLE_AVX=1
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/Gaussian/Py/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | python3 main.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/Gaussian/Py/RandomFeedMatrix.py:
--------------------------------------------------------------------------------
1 | def RandomFeedMatrix(M,R):
2 |     # feed the numpy array M with random values.
3 |     n=M.shape[0]
4 |     m=M.shape[1]
5 |     for i in range(0,n):
6 |         for j in range(0,m):
7 |             M[i,j]= R.fv()
8 | 


--------------------------------------------------------------------------------
/Gaussian/Py/factorMatrix.py:
--------------------------------------------------------------------------------
 1 | def factorMatrix(M):
 2 | # Gaussian elimination, partial pivoting.
 3 | # M must be an (n,n+1) numpy array. Not tested!
 4 |     n = M.shape[0]
 5 |     m=  M.shape[1]
 6 |     for line in range(0, n-1):
 7 |         # find pivot
 8 |         cmax=line
 9 |         vmax = abs(M[line,line])
10 |         for i in range(line+1,n):
11 |             if abs(M[i,line])> vmax:
12 |                 vmax= abs(M[i,line])
13 |                 cmax= i
14 | 
15 |         # exchange rows if necessary
16 |         if cmax != line:
17 |             for j in range(line,m):
18 |                 t= M[line,j]
19 |                 M[line,j]= M[cmax,j]
20 |                 M[cmax,j]= t
21 | 
22 |         # eliminate
23 |         pivot = M[line,line]
24 |         for j in range(line+1,n):
25 |             v= M[j,line]/pivot
26 |             for k in range(line,m):
27 |                 M[j,k]-= v*M[line,k]
28 | 


--------------------------------------------------------------------------------
/Gaussian/Py/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from factorMatrix import *
 4 | from RandomFeedMatrix import *
 5 | import time
 6 | import socket
 7 | 
 8 | sizeG=128
 9 | D={}
10 | 
11 | size=sizeG
12 | loop=1
13 | 
14 | # Checking algo
15 | M = np.empty((128, 128+1))
16 | R = rando()
17 | RandomFeedMatrix(M, R)
18 | print("sum(M) = {}".format(np.sum(M)))
19 | factorMatrix(M)
20 | print("sum(Gauss(M)) = {}".format(np.sum(M)))
21 | print("")
22 | 
23 | while size>2:
24 |     R=rando()
25 |     M=np.empty((size,size+1))
26 |     t1 = time.time()
27 |     for iter in range(0,loop):
28 |         RandomFeedMatrix(M,R)
29 |         factorMatrix(M)
30 |     t=(time.time()-t1)/loop
31 |     D[size]=t
32 |     size//=2
33 |     loop*=8
34 | 
35 | print("---")
36 | size= sizeG
37 | loop=1
38 | while size>2:
39 |     R=rando()
40 |     M=np.empty((size,size+1))
41 |     t1 = time.time()
42 |     for iter in range(0,loop):
43 |         RandomFeedMatrix(M,R)
44 |     t=(time.time()-t1)/loop
45 |     D[size] -= t 
46 |     size//=2
47 |     loop*=8
48 | 
49 |     f=open("RunningOn"+socket.gethostname(),"w")   
50 |     for x in sorted(D.keys()):
51 |         f.write(str(x)+" "+str(D[x])+"\n")
52 |     f.close()
53 | 


--------------------------------------------------------------------------------
/Gaussian/Py/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/Gaussian/PyScipy/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | python3 main.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/Gaussian/PyScipy/RandomFeedMatrix.py:
--------------------------------------------------------------------------------
1 | def RandomFeedMatrix(M,R):
2 |     # feed the numpy array M with random values.
3 |     n=M.shape[0]
4 |     m=M.shape[1]
5 |     for i in range(0,n):
6 |         for j in range(0,m):
7 |             M[i,j]= R.fv()
8 | 


--------------------------------------------------------------------------------
/Gaussian/PyScipy/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from RandomFeedMatrix import *
 4 | import time
 5 | import socket
 6 | from scipy.linalg import lu
 7 | 
 8 | sizeG=2048
 9 | D={}
10 | 
11 | size=sizeG
12 | loop=1
13 | sizemin=4
14 | while size>=sizemin:
15 |     R=rando()
16 |     M=np.empty((size,size))
17 |     t1 = time.time()
18 |     for iter in range(0,loop):
19 |         RandomFeedMatrix(M,R)
20 |         P,L,U=lu(M)
21 |     t=(time.time()-t1)/loop
22 |     print("size:",size," t: ",t)
23 |     D[size]=t
24 |     size//=2
25 |     loop*=4
26 | print("---")
27 | size= sizeG
28 | loop=1
29 | while size>=sizemin:
30 |     R=rando()
31 |     M=np.empty((size,size))
32 |     t1 = time.time()
33 |     for iter in range(0,loop):
34 |         RandomFeedMatrix(M,R)
35 |     t=(time.time()-t1)/loop
36 |     D[size] -= t 
37 |     size//=2
38 |     loop*=4
39 |  
40 |     f=open("RunningOn"+socket.gethostname(),"w")   
41 |     for x in sorted(D.keys()):
42 |         f.write(str(x)+" "+str(D[x])+"\n")
43 |     f.close()
44 | 


--------------------------------------------------------------------------------
/Gaussian/PyScipy/plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/PyScipy/plot.pdf


--------------------------------------------------------------------------------
/Gaussian/PyScipy/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/Gaussian/PyVec/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | python3 main.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/Gaussian/PyVec/RandomFeedMatrix.py:
--------------------------------------------------------------------------------
1 | def RandomFeedMatrix(M,R):
2 |     # feed the numpy array M with random values.
3 |     n=M.shape[0]
4 |     m=M.shape[1]
5 |     for i in range(0,n):
6 |         for j in range(0,m):
7 |             M[i,j]= R.fv()
8 | 


--------------------------------------------------------------------------------
/Gaussian/PyVec/factorMatrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | def factorMatrix(M):
 3 | # Gaussian elimination, partial pivoting.
 4 | # M must be an (n,n+1) numpy array. Not tested!
 5 |     n = M.shape[0]
 6 |     m=  M.shape[1]
 7 |     for line in range(0, n-1):
 8 |         # find pivot
 9 |         cmax = np.argmax(abs(M[line:n,line])) + line
10 | 
11 |         # exchange rows if necessary
12 |         if cmax != line:
13 |             M[[line,cmax]]=M[[cmax,line]]
14 | 
15 |         # eliminate
16 |         pivot = M[line,line]
17 |         v = M[(line+1):n,[line]]/pivot
18 |         M[(line+1):n,line:m] -= v*M[[line],line:m]
19 | 


--------------------------------------------------------------------------------
/Gaussian/PyVec/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from factorMatrix import *
 4 | from RandomFeedMatrix import *
 5 | import time
 6 | import socket
 7 | 
 8 | sizeG=2048
 9 | D={}
10 | 
11 | size=sizeG
12 | loop=1
13 | 
14 | # Checking algo
15 | M = np.empty((128, 128+1))
16 | R = rando()
17 | RandomFeedMatrix(M, R)
18 | print("sum(M) = {}".format(np.sum(M)))
19 | factorMatrix(M)
20 | print("sum(Gauss(M)) = {}".format(np.sum(M)))
21 | print("")
22 | 
23 | while size>2:
24 |     R=rando()
25 |     M=np.empty((size,size+1))
26 |     t1 = time.time()
27 |     for iter in range(0,loop):
28 |         RandomFeedMatrix(M,R)
29 |         factorMatrix(M)
30 |     t=(time.time()-t1)/loop
31 |     print("size:",size," t: ",t)
32 |     D[size]=t
33 |     size//=2
34 |     loop*=4
35 | 
36 | 
37 | print("---")
38 | size= sizeG
39 | loop=1
40 | while size>2:
41 |     R=rando()
42 |     M=np.empty((size,size+1))
43 |     t1 = time.time()
44 |     for iter in range(0,loop):
45 |         RandomFeedMatrix(M,R)
46 |     t=(time.time()-t1)/loop
47 |     #print("size:",size," t: ",t)
48 |     D[size] -= t 
49 |     size//=2
50 |     loop*=4
51 |     
52 |     f=open("RunningOn"+socket.gethostname(),"w")   
53 |     for x in sorted(D.keys()):
54 |         f.write(str(x)+" "+str(D[x])+"\n")
55 |     f.close()
56 | 


--------------------------------------------------------------------------------
/Gaussian/PyVec/plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/PyVec/plot.pdf


--------------------------------------------------------------------------------
/Gaussian/PyVec/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/Gaussian/Pythran/README.md:
--------------------------------------------------------------------------------
 1 | Compile factorMatrix.py and run the code; just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | 
 6 | !Z! if you are using OpenBlas, you probably must create  a .pythranrc in your home directory like this:
 7 | 
 8 | ```
 9 | [compiler]
10 | blas=openblas
11 | ```
12 | 


--------------------------------------------------------------------------------
/Gaussian/Pythran/RandomFeedMatrix.py:
--------------------------------------------------------------------------------
1 | def RandomFeedMatrix(M,R):
2 |     # feed the numpy array M with random values.
3 |     n=M.shape[0]
4 |     m=M.shape[1]
5 |     for i in range(0,n):
6 |         for j in range(0,m):
7 |             M[i,j]= R.fv()
8 | 


--------------------------------------------------------------------------------
/Gaussian/Pythran/factorMatrix.py:
--------------------------------------------------------------------------------
 1 | #pythran export factorMatrix(float[:,:])
 2 | def factorMatrix(M):
 3 | # Gaussian elimination, partial pivoting.
 4 | # M must be an (n,n+1) numpy array. Not tested!
 5 |     n = M.shape[0]
 6 |     m=  M.shape[1]
 7 |     for line in range(0, n-1):
 8 |         # find pivot
 9 |         cmax=line
10 |         vmax= abs(M[line,line])
11 |         for i in range(line+1,n):
12 |             if abs(M[i,line])> vmax:
13 |                 vmax= abs(M[i,line])
14 |                 cmax= i
15 |         # exchange rows if necessary
16 |         if cmax != line:
17 |             for j in range(line,m):
18 |                 t= M[line,j]
19 |                 M[line,j]= M[cmax,j]
20 |                 M[cmax,j]= t
21 |         # eliminate
22 |         pivot = M[line,line]
23 |         for j in range(line+1,n):
24 |             v= M[j,line]/pivot
25 |             for k in range(line,m):
26 |                 M[j,k]-= v*M[line,k]
27 | 


--------------------------------------------------------------------------------
/Gaussian/Pythran/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | import factorMatrix 
 4 | from RandomFeedMatrix import *
 5 | import time
 6 | import socket
 7 | 
 8 | sizeG=2048
 9 | sizelim=4
10 | D={}
11 | 
12 | size=sizeG
13 | loop=1
14 | 
15 | # Checking algo
16 | M = np.empty((128, 128+1))
17 | R = rando()
18 | RandomFeedMatrix(M, R)
19 | print("sum(M) = {}".format(np.sum(M)))
20 | factorMatrix.factorMatrix(M)
21 | print("sum(Gauss(M)) = {}".format(np.sum(M)))
22 | print("")
23 | 
24 | while size>=sizelim:
25 |     R=rando()
26 |     M=np.empty((size,size+1))
27 |     t1 = time.time()
28 |     for iter in range(0,loop):
29 |         RandomFeedMatrix(M,R)
30 |         factorMatrix.factorMatrix(M)
31 |     t=(time.time()-t1)/loop
32 |     print("size:",size," t: ",t)
33 |     D[size]=t
34 |     size//=2
35 | 
36 |     loop*=4
37 | 
38 | print("---")
39 | size= sizeG
40 | loop=1
41 | while size>=sizelim:
42 |     R=rando()
43 |     M=np.empty((size,size+1))
44 |     t1 = time.time()
45 |     for iter in range(0,loop):
46 |         RandomFeedMatrix(M,R)
47 |     t=(time.time()-t1)/loop
48 |     print("size:",size," t: ",t)
49 |     D[size] -= t
50 |     size//=2
51 | 
52 |     loop*=4
53 |     
54 |     f=open("RunningOn"+socket.gethostname(),"w")   
55 |     for x in sorted(D.keys()):
56 |         f.write(str(x)+" "+str(D[x])+"\n")
57 |     f.close()
58 | 


--------------------------------------------------------------------------------
/Gaussian/Pythran/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/Gaussian/Pythran/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pythran -march=native -O3 factorMatrix.py
3 | echo "run test:"
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/Gaussian/PythranVec/README.md:
--------------------------------------------------------------------------------
 1 | Compile factorMatrix.py and run the code;  just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | 
 6 | !Z! if you are using OpenBlas, you probably must create  a .pythranrc in your home directory like this:
 7 | 
 8 | ```
 9 | [compiler]
10 | blas=openblas
11 | ```
12 | 


--------------------------------------------------------------------------------
/Gaussian/PythranVec/RandomFeedMatrix.py:
--------------------------------------------------------------------------------
1 | def RandomFeedMatrix(M,R):
2 |     # feed the numpy array M with random values.
3 |     n=M.shape[0]
4 |     m=M.shape[1]
5 |     for i in range(0,n):
6 |         for j in range(0,m):
7 |             M[i,j]= R.fv()
8 | 


--------------------------------------------------------------------------------
/Gaussian/PythranVec/factorMatrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #pythran export factorMatrix(float[:,:])
 3 | def factorMatrix(M):
 4 | # Gaussian elimination, partial pivoting.
 5 | # M must be an (n,n+1) numpy array. Not tested!
 6 |     n = M.shape[0]
 7 |     m=  M.shape[1]
 8 |     
 9 |     for line in range(0, n-1):
10 |         # find pivot
11 |         cmax = np.argmax(abs(M[line:n,line])) + line
12 | 
13 |         # exchange rows if necessary
14 |         if cmax != line:
15 |             tmp = np.array(M[cmax,:])
16 |             M[cmax,:] = M[line,:]
17 |             M[line,:] = tmp
18 |             # M[[line,cmax]]=M[[cmax,line]] # Currently doesn't work in Pythran
19 | 
20 |         # eliminate
21 |         pivot = M[line,line]
22 |         v = M[(line+1):n,line].reshape((-1,1))/pivot # M[(line+1):n,[line]] syntax currently doesn't work in Pythran
23 |         M[(line+1):n,line:m] -= v*M[line,line:m].reshape((1,-1))
24 | 


--------------------------------------------------------------------------------
/Gaussian/PythranVec/main.py:
--------------------------------------------------------------------------------
 1 | from rando import *
 2 | import numpy as np
 3 | from factorMatrix import *
 4 | from RandomFeedMatrix import *
 5 | import time
 6 | import socket
 7 | 
 8 | sizeG=2048
 9 | D={}
10 | 
11 | size=sizeG
12 | loop=1
13 | 
14 | # Checking algo
15 | M = np.empty((128, 128+1))
16 | R = rando()
17 | RandomFeedMatrix(M, R)
18 | print("sum(M) = {}".format(np.sum(M)))
19 | factorMatrix(M)
20 | print("sum(Gauss(M)) = {}".format(np.sum(M)))
21 | print("")
22 | 
23 | while size>2:
24 |     R=rando()
25 |     M=np.empty((size,size+1))
26 |     t1 = time.time()
27 |     for iter in range(0,loop):
28 |         RandomFeedMatrix(M,R)
29 |         factorMatrix(M)
30 |     t=(time.time()-t1)/loop
31 |     print("size:",size," t: ",t)
32 |     D[size]=t
33 |     size//=2
34 |     
35 |     loop*=4
36 | 
37 | print("---")
38 | size= sizeG
39 | loop=1
40 | while size>2:
41 |     R=rando()
42 |     M=np.empty((size,size+1))
43 |     t1 = time.time()
44 |     for iter in range(0,loop):
45 |         RandomFeedMatrix(M,R)
46 |     t=(time.time()-t1)/loop
47 |     #print("size:",size," t: ",t)
48 |     D[size] -= t 
49 |     size//=2
50 |     
51 |     loop*=4
52 |     
53 |     f=open("RunningOn"+socket.gethostname(),"w")   
54 |     for x in sorted(D.keys()):
55 |         f.write(str(x)+" "+str(D[x])+"\n")
56 |     f.close()
57 | 


--------------------------------------------------------------------------------
/Gaussian/PythranVec/plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/PythranVec/plot.pdf


--------------------------------------------------------------------------------
/Gaussian/PythranVec/rando.py:
--------------------------------------------------------------------------------
 1 | class rando:
 2 |     def __init__(self):
 3 |         self.seed=123456789
 4 |         self.a=1103515245
 5 |         self.c=12345
 6 |         self.m=2**32
 7 |     def get(self):
 8 |         self.seed= (self.a * self.seed + self.c) % self.m
 9 |         return self.seed
10 |     def fv(self,vmax=1.):
11 |         return vmax*float(self.get())/self.m
12 | if __name__ == "__main__":
13 |     R=rando()
14 |     for i in range(0,100):
15 |         print(R.fv(10.))
16 | 


--------------------------------------------------------------------------------
/Gaussian/PythranVec/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pythran -march=native -O3 factorMatrix.py
3 | echo "run test:"
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/Gaussian/Results/Benchmarks/kepler-nolibs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/Results/Benchmarks/kepler-nolibs.png


--------------------------------------------------------------------------------
/Gaussian/Results/Benchmarks/kepler-only-libs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/Results/Benchmarks/kepler-only-libs.png


--------------------------------------------------------------------------------
/Gaussian/Results/Benchmarks/kepler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Gaussian/Results/Benchmarks/kepler.png


--------------------------------------------------------------------------------
/Gaussian/Results/README.md:
--------------------------------------------------------------------------------
 1 | First, do:
 2 | ```
 3 | mkdir ./Results (if ./Results does not exists).
 4 | ./gr.py
 5 | ```
 6 | It will parse your results (in the "RunningOn _your_ _hostname_" files) and populate
 7 | Results/ with gnuplot plotable files.
 8 | 
 9 | Then:
10 | ```
11 | gnuplot
12 | gnuplot> load "gpc"
13 | ```
14 | will plot the results. We take C++ computing time as unit.
15 | 
16 | 
17 | An example of benchmark is Benchmarks/kepler.pdf which was run on a
18 | 4 core i5-4670 CPU @ 3.40GHz.
19 | 


--------------------------------------------------------------------------------
/Gaussian/Results/gpc:
--------------------------------------------------------------------------------
 1 | set logscale 
 2 | set key top right
 3 | set xlabel "matrix size"
 4 | set ylabel "cpu time / cpu time C++"
 5 | set title "All results"
 6 | plot "./Results/C++" title "C++" with linespoint pointtype 7 
 7 | replot "./Results/Py"      title "Python" with linespoint
 8 | replot "./Results/PyVec"      title "Python vect." with linespoint
 9 | replot "./Results/Ju"      title "Julia" with linespoint
10 | replot "./Results/PythranVec" title "Pythran vec." with linespoint
11 | replot "./Results/Pythran" title "Pythran" with linespoint
12 | replot "./Results/Numba" title "Numba" with linespoint
13 | replot "./Results/JuLib"   title "Julia + libs" with linespoint
14 | replot "./Results/PyScipy" title "Python + Scipy" with linespoint
15 | replot "./Results/C++Lib" title "C++ +  libs" with linespoint
16 | set title "Using libraries"
17 | #set terminal png size 600,450
18 | #set output "kepler.png"


--------------------------------------------------------------------------------
/Gaussian/Results/gpc-nolibs:
--------------------------------------------------------------------------------
 1 | set logscale 
 2 | set key top right
 3 | set xlabel "matrix size"
 4 | set ylabel "cpu time / cpu time C++"
 5 | set title "Not using external libraries"
 6 | plot "./Results/C++" title "C++" with linespoint pointtype 7 
 7 | replot "./Results/Py"      title "Python" with linespoint
 8 | replot "./Results/PyVec"      title "Python vect." with linespoint
 9 | replot "./Results/Ju"      title "Julia" with linespoint
10 | replot "./Results/PythranVec" title "Pythran vec." with linespoint
11 | replot "./Results/Pythran" title "Pythran" with linespoint
12 | replot "./Results/Numba" title "Numba" with linespoint
13 | set title "Not using libraries"
14 | #set terminal png size 600,450
15 | #set output "kepler.png"
16 | 


--------------------------------------------------------------------------------
/Gaussian/Results/gr.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | #comparison between c++ and other computations
 4 | #
 5 | import socket
 6 | 
 7 | def parsit(D,l):
 8 |     # extract two numbers from a line, if this is possible.
 9 |     ll=l.split(" ")
10 |     if len(ll) == 2:
11 |         D[int(ll[0])]=float(ll[1].replace("\n",""))
12 | 
13 |         
14 | # directories to explore ---------   
15 | files=[
16 |     "../Py",
17 |     "../Ju",
18 |     "../Pythran",
19 |     "../JuLib",
20 |     "../PyScipy",
21 |     "../PyVec",
22 |     "../PythranVec",
23 |     "../Numba",
24 |     "../C++Lib",
25 |     "../C++"]
26 | cpp="../C++"
27 | 
28 | 
29 | #-------------------------------------------
30 | # build a dict  n-> computing time for  C++
31 | C={}
32 | with open(cpp+"/RunningOn"+socket.gethostname(), 'r') as file:
33 |     for line in file:
34 |         parsit(C,line)
35 | 
36 | #  build a dict  n-> computing time for all directories in files[]
37 | T={}
38 | for n in files:
39 |     T[n]={}
40 |     filename= n+"/RunningOn"+socket.gethostname()
41 |     with open(filename,"r") as file:
42 |         for line in file:
43 |             parsit(T[n],line)
44 | print("all files parsed.")
45 | # Compute ratio time/(time C++).
46 | for n in files:
47 |     D=T[n]
48 |     for k in D.keys():
49 |         if k in C.keys():
50 |             D[k]/=C[k]
51 | print("ratios computed.")       
52 | # create file for gnuplot.
53 | for n in files:
54 |     D=T[n]
55 |     thefile=n.replace("..","./Results")
56 |     print("-file created: ",thefile)
57 |     with open(thefile, 'w') as file:
58 |         kk=sorted([k for k in D.keys()])
59 |         for k in kk:
60 |             file.write(str(k)+" "+str(D[k])+'\n')
61 |             
62 | print("\nsee gpc* files to plot with gnuplot.\n")
63 | print('In gnuplot do:\nload "gpc"\nor load "gpc-nolibs"')
64 | print('or load "gpc-only-libs"')
65 | 


--------------------------------------------------------------------------------
/Gaussian/runAllTests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #  This script is supposed to run *all* the test, and then compute
 4 | #  the final "report" in Results/
 5 | #  Not sure it works everywhere. If it does not, improve it, or enter each
 6 | #  directory and look at README.md to know what to do. 
 7 | #
 8 | for i in C++  C++Lib ; do
 9 |     echo "Test: "$i
10 |     echo "--- "
11 |     (cd $i; mkdir -p Build; cd Build; cmake ..; make; ./run)
12 | done
13 | for i in Ju Numba  PythranVec Pythran ; do
14 |     echo  "Test: "$i
15 |     echo "--- "
16 |     (cd $i; ./script)
17 | done  
18 | for i in Py PyVec PyScipy; do
19 |     echo  "Test: "$i
20 |     echo "--- "
21 |     (cd $i; python3 ./main.py)
22 | done
23 | 
24 | echo " "
25 | eche "Make the report:"
26 | mkdir  -p Results/Results
27 | (cd Results; ./gr.py)
28 | echo " "
29 | echo "Everything went well ? If yes,go to ./Results/. You can visualize the"
30 | echo " results with gnuplot. Look at README.md in ./Results"
31 | echo " "
32 | echo " "
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Thierry Dumont
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/C++-xtensor/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(NewRadau5)
 3 | enable_language(CXX Fortran)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | 
13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
14 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
15 | 
16 | 
17 | else ()
18 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC -DXTENSOR_USE_XSIMD=1  -DALIGN_64  -std=c++14 -march=native")
19 | 
20 | endif ()
21 | include_directories(
22 |   ${CMAKE_SOURCE_DIR}/ARRAY/include
23 |   $ENV{HOME}/anaconda3/include/
24 | ) 
25 | add_executable(
26 |   run_cl
27 |   ../main_cl.cpp
28 |   )
29 | add_executable(
30 |   run_lapl_1
31 |   ../main_lapl_1.cpp
32 |   )
33 | add_executable(
34 |   run_lapl_2
35 |   ../main_lapl_2.cpp
36 |   )
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/C++-xtensor/README.md:
--------------------------------------------------------------------------------
 1 | You need cmake !
 2 | 
 3 | ### Compilation:
 4 | ```
 5 | mkdir Build
 6 | cd Build
 7 | cmake ..
 8 | make
 9 | ```
10 | This creates the files: run_cl, run_lapl_1 and run_lapl_2.
11 | 
12 | ### Run the code:
13 | 
14 | from Build/ directory, type:
15 | ```
16 | ./run_cl
17 | ```
18 | or  run_lapl_1 or  run_lapl_2.
19 | 
20 | By default, we use g++. To can change the compiler to use, for example
21 | clang++,
22 | replace 
23 | ```
24 | cmake ..
25 | ```
26 | by
27 | 
28 | ```
29 | CXX=clang++ cmake ..
30 | ```
31 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/C++-xtensor/get_time.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <sys/time.h>
3 | #include <chrono>
4 | auto  get_time() {
5 |   return std::chrono::high_resolution_clock::now();
6 | }
7 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/C++-xtensor/main_cl.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <algorithm>
 3 | #include <string>
 4 | #include <sys/time.h>
 5 | #include <unistd.h>
 6 | #include <limits.h>
 7 | #include <fstream>
 8 | #include <cmath>
 9 | #include <ctime>
10 | #include <array>
11 | #include "xtensor/xtensor.hpp"
12 | using namespace std;
13 | typedef xt::xtensor<double,1> Array;
14 | double get_time() {
15 |     struct timeval tv;
16 |     gettimeofday(&tv,0);
17 |     return (double) tv.tv_sec+tv.tv_usec*1e-6;
18 | }
19 | clock_t ck() {return clock();}
20 | string host()
21 | {
22 |   char hostnameC[HOST_NAME_MAX];
23 |   gethostname(hostnameC, HOST_NAME_MAX);
24 |   return  string(hostnameC);
25 | }
26 | 
27 | void Init(Array&  X,double L,int size)
28 | {
29 |   double h=L/size;
30 |   for(int i=0;i<size;i++)
31 |     if(i>size/8 && i<size/2+size/8)
32 |       X[i]=1.-2*(i-size/8)*h/L;
33 |     else
34 |       X[i]=0.0;
35 | }
36 | void cl(std::size_t size,Array& A,Array& B,
37 | 	Array& C,Array& D)
38 | {
39 | 
40 |   A=1.7*B-0.8*C-0.9*D;
41 |   
42 | }
43 | double  dotest(std::size_t size)
44 | {
45 |   std::array<size_t, 1> shape = { size  };
46 |   Array A(shape), B(shape), C(shape),D(shape);
47 |  
48 |   Init(A,1.,size); Init(B,1.,size); Init(C,1.,size); Init(D,1.,size); 
49 |   double T=0;
50 |   double Tnew=std::pow(10.,20);
51 |   int iter=10000;
52 |   bool ok=false;
53 |   do
54 |     {
55 |       double t1=get_time();
56 |       for(int i=0;i<iter;i++)
57 | 	cl(size,A,B,C,D);
58 |       Tnew=(get_time()-t1);
59 |       //A.swap(D);
60 |       C=A; A=D; D=A;
61 |       ok= std::abs(Tnew-2*T)/Tnew<0.1 ||iter>1000000;
62 |       T=Tnew;
63 |       if(!ok) iter*=2;
64 |     }
65 |   while(!ok);
66 |   return T/iter;
67 | }
68 |   
69 | int main()
70 | {
71 |   auto hostname = host();
72 |   cout<<"hostname: "<<hostname<<endl;
73 |   ofstream fb; fb.open("../RunningOn"+hostname+"_cl");
74 |   
75 |   std::size_t sizemax=std::pow(10,6);
76 |   std::size_t size=32;
77 |   while(size<sizemax)
78 |     {
79 |       auto T=dotest(size);
80 |       double flops=size*5/T;
81 |       cout<<size<<" "<<T<<", Gflops/s: "<<flops*std::pow(10,-9)<<endl;
82 |       fb<<size<<" "<<T<<endl;
83 |       size*=2;
84 |     }
85 |   fb.close();
86 |   cout<<"end"<<endl;
87 | }
88 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/C++/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(NewRadau5)
 3 | enable_language(CXX Fortran)
 4 | # Go to Build directory. Then:
 5 | # To use intel compiler
 6 | # CXX=icpc cmake ..
 7 | # for clang++:
 8 | # CXX=clang++ cmake ..
 9 | # otherwise, to use g++:
10 | #  cmake ..
11 | #
12 | 
13 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
14 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
15 | 
16 | 
17 | else ()
18 |   set (USING_GNU TRUE)
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -DGCC -DALIGN_64  -std=c++14 -march=native")
20 | 
21 | endif ()
22 | 
23 | add_executable(
24 |   run_cl
25 |   ../main_cl.cpp
26 |   )
27 | add_executable(
28 |   run_lapl_1
29 |   ../main_lapl_1.cpp
30 |   )
31 | add_executable(
32 |   run_lapl_2
33 |   ../main_lapl_2.cpp
34 |   )
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/C++/README.md:
--------------------------------------------------------------------------------
 1 | You need cmake !
 2 | 
 3 | ### Compilation:
 4 | ```
 5 | mkdir Build
 6 | cd Build
 7 | cmake ..
 8 | make
 9 | ```
10 | This creates the files: run_cl, run_lapl_1 and run_lapl_2.
11 | 
12 | ### Run the code:
13 | 
14 | from Build/ directory, type:
15 | ```
16 | ./run_cl
17 | ```
18 | or  run_lapl_1 or  run_lapl_2.
19 | 
20 | By default, we use g++. To can change the compiler to use, for example
21 | clang++,
22 | replace 
23 | ```
24 | cmake ..
25 | ```
26 | by
27 | 
28 | ```
29 | CXX=clang++ cmake ..
30 | ```
31 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/C++/get_time.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <sys/time.h>
3 | #include <chrono>
4 | auto  get_time() {
5 |   return std::chrono::high_resolution_clock::now();
6 | }
7 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Ju/README.md:
--------------------------------------------------------------------------------
 1 | * To run the different codes, just do:
 2 | 
 3 | ```
 4 | ./script  code.jl
 5 | ```
 6 | 
 7 | with code.jl = main_cl.jl,  main_lapl_1d.jl or main_lapl_2d.jl
 8 | 
 9 | * If you want to look at memory usage do:
10 | ```
11 | ./script-m  code.jl
12 | ```
13 | (but this slow down the code. **Do not use script-m for benchmarking!**).
14 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Ju/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 $1
3 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Ju/script-m:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 --track-allocation=user $1
3 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Numba/README.md:
--------------------------------------------------------------------------------
 1 | * To run the different codes, just do:
 2 | 
 3 | ```
 4 | ./script  code.py
 5 | ```
 6 | 
 7 | with code.py = main_cl.py,  main_lapl_1d.py or main_lapl_2d.py
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Numba/main_cl.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | from numba import jit
 5 | 
 6 | def Init(X,L):
 7 |     size=X.size
 8 |     h=L/size
 9 |     for i in range(0,size):
10 |         if i>size//8 and i<size//2+size//8:
11 |             X[i]=1.-2*(i-size//8)*h/L;
12 |         else:
13 |             X[i]=0.0
14 | 
15 | @jit
16 | def cl_1(A,B,C,D,niter):
17 |     size=A.size
18 |     for it in range(0,niter):
19 |         A=1.7*B-0.8*C-0.9*D
20 |         A,D=D,A
21 | @jit
22 | def cl_2(A,B,C,D,niter):
23 |     size=A.size
24 |     for it in range(0,niter):
25 |         for i in range(1,size-1):
26 |             A[i]=1.7*B[i]-0.8*C[i]-0.9*D[i]
27 |         A,D=D,A    
28 |               
29 | def test(p,A,B,C,D,nit):
30 |    
31 |     niter=nit
32 |     Init(A,1.)
33 |     Init(B,1.)
34 |     Init(C,1.)
35 |     Init(D,1.)
36 |     p(A,B,C,D,niter)
37 |     T=0.
38 |     while True:
39 |         Init(A,1.)
40 |         Init(B,1.)
41 |         Init(C,1.)
42 |         Init(D,1.)
43 |         t1 = time.time()
44 |         p(A,B,C,D,niter)
45 |         treal=time.time() -t1
46 |         t = treal/niter
47 |         if treal>0.0001 and abs(t-T)/t<0.025:
48 |             break
49 |         else:
50 |             T=t
51 |             niter*=2
52 | 
53 |     return T,niter
54 | 
55 | DD={"cl_2":"Naive     ",
56 |     "cl_1":"Vectorized"}
57 | 
58 | f=open("RunningOn"+socket.gethostname()+"_cl","w")
59 | 
60 | size=32
61 | sizemax=100000
62 | niter=10
63 | parsef= lambda  f: str(f).split(" ")[1] #parse function name
64 | while size<sizemax:
65 |     print("size: ",size)
66 |     A= np.empty(size)
67 |     B= np.empty(size)
68 |     C= np.empty(size)
69 |     D= np.empty(size)
70 |     tbest=10.**20
71 |     best=0
72 |     t=0.0
73 |     for p in  [cl_1,cl_2]:
74 |         t,it=test(p,A,B,C,D,niter)
75 |         if t<tbest:
76 |             tbest=t
77 |             best=p
78 |         print(DD[parsef(p)]," : t= ",t," seconds ")
79 |     nflops= 4*(size-2)
80 |     flops=nflops/tbest
81 |     print("\nbest: ",DD[parsef(best)])
82 |     f.write(str(size)+" "+str(tbest)+"\n")
83 |     print("nb. flops: ",nflops, ", Gflops/s (best): ",flops/(10**9))
84 |     print("-------")
85 |     size*=2
86 |     print(" ")
87 | f.close()
88 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Numba/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export NUMBA_ENABLE_AVX=1
4 | python3 $1
5 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Py/README.md:
--------------------------------------------------------------------------------
 1 | * To run the different codes, just do:
 2 | 
 3 | ```
 4 | python3 code.py
 5 | ```
 6 | 
 7 | with code.py = main_cl.py,  main_lapl_1d.py or main_lapl_2d.py
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Py/main_cl.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | 
 5 | def Init(X,L):
 6 |     size=X.size
 7 |     h=L/size
 8 |     for i in range(0,size):
 9 |         if i>size//8 and i<size//2+size//8:
10 |             X[i]=1.-2*(i-size//8)*h/L;
11 |         else:
12 |             X[i]=0.0
13 | 
14 | 
15 | def cl_1(A,B,C,D,niter):
16 |     size=A.size
17 |     for it in range(0,niter):
18 |         A=1.7*B-0.8*C-0.9*D
19 |         A,D=D,A
20 | 
21 | def cl_2(A,B,C,D,niter):
22 |     size=A.size
23 |     for it in range(0,niter):
24 |         for i in range(1,size-1):
25 |             A[i]=1.7*B[i]-0.8*C[i]-0.9*D[i]
26 |         A,D=D,A    
27 |               
28 | def test(p,A,B,C,D,nit):
29 |    
30 |     niter=nit
31 |     Init(A,1.)
32 |     Init(B,1.)
33 |     Init(C,1.)
34 |     Init(D,1.)
35 |     p(A,B,C,D,niter)
36 |     T=0.
37 |     while True:
38 |         Init(A,1.)
39 |         Init(B,1.)
40 |         Init(C,1.)
41 |         Init(D,1.)
42 |         t1 = time.time()
43 |         p(A,B,C,D,niter)
44 |         treal=time.time() -t1
45 |         t = treal/niter
46 |         if treal>0.0001 and abs(t-T)/t<0.025:
47 |             break
48 |         else:
49 |             T=t
50 |             niter*=2
51 | 
52 |     return T,niter
53 | 
54 | size=32
55 | sizemax=100000
56 | niter=10
57 | parsef= lambda  f: str(f).split(" ")[1] #parse function name
58 | DD={"cl_2":"Naïve     ",
59 |     "cl_1":"Vectorized"}
60 | 
61 | f=open("RunningOn"+socket.gethostname()+"_cl","w")
62 | 
63 | while size<sizemax:
64 |     print("size: ",size)
65 |     A= np.empty(size)
66 |     B= np.empty(size)
67 |     C= np.empty(size)
68 |     D= np.empty(size)
69 |     tbest=10.**20
70 |     best=0
71 |     t=0.0
72 |     for p in  [cl_1,cl_2]:
73 |         t,it=test(p,A,B,C,D,niter)
74 |         if t<tbest:
75 |             tbest=t
76 |             best=p
77 |         print(DD[parsef(p)]," : t= ",t," seconds ")
78 |     nflops= 4*(size-2)
79 |     flops=nflops/tbest
80 |     print("\nbest: ",DD[parsef(best)])
81 |     f.write(str(size)+" "+str(tbest)+"\n")
82 |     print("nb. flops: ",nflops, ", Gflops/s (best): ",flops/(10**9))
83 |     print("-------")
84 |     size*=2
85 |     print(" ")
86 | f.close()
87 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Py/main_lapl_1d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | 
 5 | def Init(X,L):
 6 |     size=X.size
 7 |     h=L/size
 8 |     for i in range(0,size):
 9 |         if i>size//8 and i<size//2+size//8:
10 |             X[i]=1.-2*(i-size//8)*h/L;
11 |         else:
12 |             X[i]=0.0
13 | 
14 | 
15 | def lapl1d_1(In,Out,niter):
16 |     size=In.size
17 |     h2= (1./size)**2
18 |     for it in range(0,niter):
19 |         Out[1:size-1]= h2*(In[0:size-2]- 2.0*In[1:size-1]+ In[2:size])
20 |         In,Out=Out,In
21 | 
22 | def lapl1d_2(In,Out,niter):
23 |     size=In.size
24 |     h2= (1./size)**2
25 |     for it in range(0,niter):
26 |         for i in range(1,size-1):
27 |             Out[i]=h2*(In[i-1]- 2.0*In[i]+ In[i+1])
28 |         In,Out=Out,In  
29 |       
30 | def test(p,In,Out,nit):
31 |    
32 |     niter=nit
33 |     Init(In,1.)
34 |     Init(Out,1.)
35 |  
36 |     p(In,Out,niter)
37 |     T=0.
38 |     while True:
39 |         Init(In,1.)
40 |         Init(Out,1.)
41 |         t1 = time.time()
42 |         p(In,Out,niter)
43 |         treal=time.time() -t1
44 |         t = treal/niter
45 |         if treal>0.0001 and abs(t-T)/t<0.025:
46 |             break
47 |         else:
48 |             T=t
49 |             niter*=2
50 | 
51 |     return T,niter
52 | 
53 | DD={"lapl1d_1":"Vectorized",
54 |     "lapl1d_2":"Naïve     "}
55 | 
56 | f=open("RunningOn"+socket.gethostname()+"_lapl_1","w")
57 | 
58 | size=32
59 | sizemax=100000
60 | niter=10
61 | parsef= lambda  f: str(f).split(" ")[1] #parse function name
62 | while size<sizemax:
63 |     print("size: ",size)
64 |     In= np.empty(size)
65 |     Out= np.empty(size)
66 |     tbest=10.**20
67 |     best=0
68 |     t=0.0
69 |     for p in  [lapl1d_1,lapl1d_2]:
70 |         t,it=test(p,In,Out,niter)
71 |         if t<tbest:
72 |             tbest=t
73 |             best=p
74 |         print(DD[parsef(p)]," : t= ",t," seconds ")
75 |     nflops= 4*(size-2)
76 |     flops=nflops/tbest
77 |     print("\nbest: ",DD[parsef(best)])
78 |     f.write(str(size)+" "+str(tbest)+"\n")
79 |     print("nb. flops: ",nflops, ", Gflops/s (best): ",flops/(10**9))
80 |     print("-------")
81 |     size*=2
82 |     print(" ")
83 | f.close()
84 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/README.md:
--------------------------------------------------------------------------------
 1 | * Compile the _pythranized_ procedures, and run all the tests:
 2 | ```
 3 | ./script
 4 | ```
 5 | 
 6 | 
 7 | !Z! if you are using OpenBlas, you probably must create  a .pythranrc in your home directory like this:
 8 | 
 9 | ```
10 | [compiler]
11 | blas


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/cl_1.py:
--------------------------------------------------------------------------------
1 | #pythran export cl_1(float[:],float[:],float[:],float[:],int)
2 | def cl_1(A,B,C,D,niter):
3 |     size=A.size
4 |     for it in range(0,niter):
5 |         A=1.7*B-0.8*C-0.9*D
6 |         A,D=D,A
7 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/cl_2.py:
--------------------------------------------------------------------------------
1 | #pythran export cl_2(float[:],float[:],float[:],float[:],int)
2 | def cl_2(A,B,C,D,niter):
3 |     size=A.size
4 |     for it in range(0,niter):
5 |         for i in range(1,size-1):
6 |             A[i]=1.7*B[i]-0.8*C[i]-0.9*D[i]
7 |         A,D=D,A  
8 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/lapl1d_1.py:
--------------------------------------------------------------------------------
1 | #pythran export lapl1d_1(float[:],float[:],int)
2 | def lapl1d_1(In,Out,niter):
3 |     size=In.size
4 |     h2= (1./size)**2
5 |     for it in range(0,niter):
6 |         Out[1:size-1]= h2*(In[0:size-2]- 2.0*In[1:size-1]+ In[2:size])
7 |         In,Out=Out,In
8 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/lapl1d_2.py:
--------------------------------------------------------------------------------
1 | #pythran export lapl1d_2(float[:],float[:],int)
2 | def lapl1d_2(In,Out,niter):
3 |     size=In.size
4 |     h2= (1./size)**2
5 |     for it in range(0,niter):
6 |         for i in range(1,size-1):
7 |             Out[i]=h2*(In[i-1]- 2.0*In[i]+ In[i+1])
8 |         In,Out=Out,In  
9 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/lapl2d_1.py:
--------------------------------------------------------------------------------
 1 | #pythran export lapl2d_1(float[,],float[,],int)
 2 | def lapl2d_1(In,Out,niter):
 3 |     siz=In.shape[0]
 4 | 
 5 |     h2= (1./siz)**2
 6 | 
 7 |     for it in range(0,niter):
 8 |         Out[1:siz-1,1:siz-1]= h2*(
 9 |             In[0:siz-2,1:siz-1 ] + In[1:siz-1,0:siz-2]-
10 |             4.0*In[1:siz-1,1:siz-1]+
11 |             In[2:siz,1:siz-1]+In[1:siz-1,2:siz])
12 |         #In,Out=Out,In
13 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/lapl2d_2.py:
--------------------------------------------------------------------------------
 1 | #pythran export lapl2d_2(float[:,:],float[:,:],int)
 2 | def lapl2d_2(In,Out,niter):
 3 |     size=In.shape[0]
 4 |     h2= (1./size)**2
 5 |     for it in range(0,niter):
 6 |         for i in range(1,size-1):
 7 |             for j in range(1,size-1):
 8 |                 Out[i,j]= h2*(
 9 |                     In[i-1,j] + In[i,j-1]-
10 |                     4.0*In[i,j]+
11 |                     In[i+1,j]+In[i,j+1])
12 |         #In,Out=Out,In  
13 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/main_cl.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | from cl_1 import *
 5 | from cl_2 import *
 6 | 
 7 | def Init(X,L):
 8 |     size=X.size
 9 |     h=L/size
10 |     for i in range(0,size):
11 |         if i>size//8 and i<size//2+size//8:
12 |             X[i]=1.-2*(i-size//8)*h/L;
13 |         else:
14 |             X[i]=0.0
15 |               
16 | def test(p,A,B,C,D,nit):
17 |    
18 |     niter=nit
19 |     T=0.
20 |     while True:
21 |         Init(A,1.)
22 |         Init(B,1.)
23 |         Init(C,1.)
24 |         Init(D,1.)
25 |         t1 = time.time()
26 |         p(A,B,C,D,niter)
27 |         treal=time.time() -t1
28 |         t = treal/niter
29 |         if treal>0.001 and abs(t-T)/t<0.025:
30 |             break 
31 |         else:
32 |             T=t
33 |             niter*=2
34 | 
35 |     return T,niter
36 | 
37 | size=32
38 | sizemax=1000000
39 | niter=10
40 | parsef= lambda  f: str(f).split(" ")[2][:-1] #parse function name
41 | DD={"cl_2":"Naïve     ",
42 |     "cl_1":"Vectorized"}
43 | 
44 | f=open("RunningOn"+socket.gethostname()+"_cl","w")
45 | 
46 | while size<sizemax:
47 |     print("size: ",size)
48 |     A= np.empty(size)
49 |     B= np.empty(size)
50 |     C= np.empty(size)
51 |     D= np.empty(size)
52 |     tbest=10.**20
53 |     best=0
54 |     t=0.0
55 |     for p in  [cl_1,cl_2]:
56 |         t,it=test(p,A,B,C,D,niter)
57 |         if t<tbest:
58 |             tbest=t
59 |             best=p
60 |         print(DD[parsef(p)]," : t= ",t," seconds ")
61 |     nflops= size*2
62 |     flops=nflops/tbest
63 |     print("\nbest: ",DD[parsef(best)])
64 |     f.write(str(size)+" "+str(tbest)+"\n")
65 |     print("nb. flop: ",nflops, ", Gflops/ss (best): ",flops/(10**9))
66 |     print("-------")
67 |     size*=2
68 |     print(" ")
69 | f.close()
70 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/main_lapl_1d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | from lapl1d_1 import *
 5 | from lapl1d_2 import *
 6 | def Init(X,L):
 7 |     size=X.size
 8 |     h=L/size
 9 |     for i in range(0,size):
10 |         if i>size//8 and i<size//2+size//8:
11 |             X[i]=1.-2*(i-size//8)*h/L;
12 |         else:
13 |             X[i]=0.0
14 | 
15 |       
16 | def test(p,In,Out,nit):
17 |    
18 |     niter=nit
19 |     Init(In,1.)
20 |     Init(Out,1.)
21 |  
22 |     p(In,Out,niter)
23 |     T=0.
24 |     while True:
25 |         Init(In,1.)
26 |         Init(Out,1.)
27 |         t1 = time.time()
28 |         p(In,Out,niter)
29 |         treal=time.time() -t1
30 |         t = treal/niter
31 |         if treal>0.0001 and abs(t-T)/t<0.025:
32 |             break
33 |         else:
34 |             T=t
35 |             niter*=2
36 | 
37 |     return T,niter
38 | 
39 | DD={"lapl1d_1":"Vectorized",
40 |     "lapl1d_2":"Naïve     "}
41 | 
42 | f=open("RunningOn"+socket.gethostname()+"_lapl_1","w")
43 | 
44 | size=32
45 | sizemax=100000
46 | niter=10
47 | parsef= lambda  f: str(f).split(" ")[2][:-1] #parse function name
48 | 
49 | while size<sizemax:
50 |     print("size: ",size)
51 |     In= np.empty(size)
52 |     Out= np.empty(size)
53 |     tbest=10.**20
54 |     best=0
55 |     t=0.0
56 |     for p in  [lapl1d_1,lapl1d_2]:
57 |         t,it=test(p,In,Out,niter)
58 |         if t<tbest:
59 |             tbest=t
60 |             best=p
61 |         print(DD[parsef(p)]," : t= ",t," seconds ")
62 |     nflops=size*4
63 |     flops=nflops/tbest
64 |     print("\nbest: ",DD[parsef(best)])
65 |     f.write(str(size)+" "+str(tbest)+"\n")
66 |     print("nb. flops: ",nflops, ", Gflops/s (best): ",flops/(10**9))
67 |     print("-------")
68 |     size*=2
69 |     print(" ")
70 | f.close()
71 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/main_lapl_2d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | from lapl2d_1 import *
 5 | from lapl2d_2 import *
 6 | 
 7 | def Init(X,L):
 8 |     size=X.shape[0]
 9 |     h=L/size
10 |     for i in range(0,size):
11 |         for j in range(0,size):
12 |             if i>size//8 and i<size//2+size//8:
13 |                 X[i]=1.-2*(i-size//8)*h/L;
14 |             else:
15 |                 X[i]=0.0
16 | 
17 | def test(p,In,Out,nit):
18 |    
19 |     niter=nit
20 |     Init(In,1.)
21 |     Init(Out,1.)
22 |  
23 |     p(In,Out,niter)
24 |     T=0.
25 |     while True:
26 |         Init(In,1.)
27 |         Init(Out,1.)
28 |         t1 = time.time()
29 |         p(In,Out,niter)
30 |         treal=time.time() -t1
31 |         t = treal/niter
32 |         if treal>0.0001 and abs(t-T)/t<0.025:
33 |             break
34 |         else:
35 |             T=t
36 |             niter*=2
37 | 
38 |     return T,niter
39 | 
40 | size=32
41 | sizemax=2049
42 | niter=10
43 | parsef= lambda  f: str(f).split(" ")[2][:-1] #parse function name
44 | 
45 | DD={"lapl2d_1":"Vectorized",
46 |     "lapl2d_2":"Naïve     "}
47 | 
48 | f=open("RunningOn"+socket.gethostname()+"_lapl_2","w")
49 | 
50 | while size<sizemax:
51 |     print("size: ",size)
52 |     In= np.empty((size,size))
53 |     Out= np.empty((size,size))
54 |     tbest=10.**20
55 |     best=0
56 |     t=0.0
57 |     for p in  [lapl2d_1,lapl2d_2]:
58 |         t,it=test(p,In,Out,niter)
59 |         if t<tbest:
60 |             tbest=t
61 |             best=p
62 |         print(DD[parsef(p)]," : t= ",t," seconds ")
63 |     nflops=6*(size-2)**2
64 |     flops=nflops/tbest
65 |     print("\nbest: ",DD[parsef(best)])
66 |     f.write(str(size)+" "+str(tbest)+"\n")
67 |     print("nb. flops: ",nflops, ", Gflops/s (best): ",flops/(10**9))
68 |     print("-------")
69 |     size*=2
70 |     print(" ")
71 | f.close()
72 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Pythran/script:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | pythran -march=native -O3 lapl1d_1.py
 3 | pythran -march=native -O3 lapl1d_2.py
 4 | pythran -march=native -O3 lapl2d_1.py
 5 | pythran -march=native -O3 lapl2d_2.py
 6 | pythran -march=native -O3 cl_1.py
 7 | pythran -march=native -O3 cl_2.py
 8 | #
 9 | for i in main*.py; do
10 |     echo " " 
11 |     echo $i
12 |     echo "---"
13 |     echo " "
14 |     python3 $i
15 | done
16 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/README.md:
--------------------------------------------------------------------------------
 1 | We present some very small tests, wich are common pieces of _real_
 2 | software:
 3 | 
 4 | * linear combinations of vectors.
 5 | 
 6 | 
 7 | * apply finite differences (order 2) laplacian in dimension 1.
 8 | 
 9 | * apply finite differences (order 2) laplacian in dimension 2.
10 | 
11 | 
12 | ### Author:
13 | 
14 | Thierry Dumont   tdumont@math.univ-lyon1.fr
15 | 
16 | ### Results:
17 | Have a look at the  [this page in the Wiki](https://github.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/wiki/6-The-micro-benchmarks).
18 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/Benchmarks/kepler-cl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/MicroBenchmarks/Results/Benchmarks/kepler-cl.png


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/Benchmarks/kepler-lapl_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/MicroBenchmarks/Results/Benchmarks/kepler-lapl_1.png


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/Benchmarks/kepler-lapl_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/MicroBenchmarks/Results/Benchmarks/kepler-lapl_2.png


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Just do:
 3 | 
 4 | ```
 5 | ./gr.py
 6 | ```
 7 | This will create a directory "results/" if it does not exists, and
 8 | populate it with  files named:
 9 | 
10 | 
11 | AAA_BBB with AAA= Ju, Py, Numba or Pythran and BBB= cl, lapl_1 or lapl_2
12 | 
13 | These file contain the CPU time divided by the  CPU time
14 | for the corresponding C++ program (that is to say, we take the CPU time
15 | in C++ as unit).
16 | 
17 | For visualisation you can do for example:
18 | ```
19 | gnuplot
20 | gnuplot> load "vis_cl"
21 | gnuplot> 
22 | ```
23 | 
24 | This will show the results for the 1d laplacian (as a function of the size).
25 | 
26 | replace "vis_cl" by ""vis_lapl_1" or by  ""vis_lapl_2"  for the other results.
27 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/gr.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | #comparison between c++ and other computations
 4 | #
 5 | import socket
 6 | import os
 7 | 
 8 | def parsit(D,l):
 9 |     # extract two numbers from a line, if this is possible.
10 |     ll=l.split(" ")
11 |     if len(ll) == 2:
12 |         D[int(ll[0])]=float(ll[1].replace("\n",""))
13 | 
14 |         
15 | # directories to explore ---------   
16 | files=[
17 |     "../Py",
18 |     "../Ju",
19 |     "../Numba",
20 |     "../Pythran"
21 | ]
22 | cpp="../C++"
23 | 
24 | types=["_cl","_lapl_1","_lapl_2"]
25 | 
26 | def doWhatWeMustDo(Dirs,Type):
27 |     #-------------------------------------------
28 |     # build a dict  n-> computing time for  C++
29 |     C={}
30 |     with open(cpp+"/RunningOn"+socket.gethostname()+Type, 'r') as file:
31 |         for line in file:
32 |             parsit(C,line)
33 | 
34 |     #  build a dict  n-> computing time for all directories in files[]
35 |     T={}
36 |     for n in Dirs:
37 |         T[n]={}
38 |         filename= n+"/RunningOn"+socket.gethostname()+Type
39 |         with open(filename,"r") as file:
40 |             for line in file:
41 |                 parsit(T[n],line)
42 |     #print("all files parsed.")
43 |     # Compute ratio time/(time C++).
44 |     for n in Dirs:
45 |         D=T[n]
46 |         for k in D.keys():
47 |             if k in C.keys():
48 |                 D[k]/=C[k]
49 |     #print("ratios computed.")       
50 |     # create file for gnuplot.
51 |     for n in Dirs:
52 |         D=T[n]
53 |         thefile=n.replace("..","./results")+Type
54 |         #print("-file created: ",thefile)
55 |         with open(thefile, 'w') as file:
56 |             kk=sorted([k for k in D.keys()])
57 |             for k in kk:
58 |                 file.write(str(k).ljust(10)+" "+str(D[k])+'\n')
59 | #------main:
60 | if not os.path.exists("./results"):
61 |     os.makedirs("./results")
62 |     
63 | for t in types:
64 |     doWhatWeMustDo(files,t)
65 | print("\nsee  files in ./results.\n")
66 | print("To plot results with gnuplot just type:")
67 | print('load "vis_cl"\nor:\n load "vis_lapl_1"\nor:\nload "vis_lapl_2"')
68 | print("at gnuplot prompt.\n")
69 | 
70 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/vis_cl:
--------------------------------------------------------------------------------
 1 | set logscale 
 2 | set key top right
 3 | set xlabel "size"
 4 | set ylabel "cpu time / cpu time C++"
 5 | set title "Linear combinations"
 6 | 
 7 | plot "./results/Py_cl"      title "Python" with linespoint
 8 | replot "./results/Ju_cl"      title "Julia" with linespoint
 9 | replot "./results/Pythran_cl" title "Pythran" with linespoint
10 | replot "./results/Numba_cl" title "Numba" with linespoint
11 | replot 1  title "C++" lt -1
12 | #set terminal png size 600,450
13 | #set output "kepler-cl.png"


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/vis_lapl_1:
--------------------------------------------------------------------------------
 1 | set logscale 
 2 | set key top right
 3 | set xlabel "size"
 4 | set ylabel "cpu time / cpu time C++"
 5 | set title "Laplacian 1d"
 6 | 
 7 | plot "./results/Py_lapl_1"      title "Python" with linespoint
 8 | replot "./results/Ju_lapl_1"      title "Julia" with linespoint
 9 | replot "./results/Pythran_lapl_1" title "Pythran" with linespoint
10 | replot "./results/Numba_lapl_1" title "Numba" with linespoint
11 | replot 1  title "C++" lt -1
12 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/Results/vis_lapl_2:
--------------------------------------------------------------------------------
 1 | set logscale 
 2 | set key top right
 3 | set xlabel "size"
 4 | set ylabel "cpu time / cpu time C++"
 5 | set title "Laplacian 2d"
 6 | 
 7 | plot "./results/Py_lapl_2"      title "Python" with linespoint
 8 | replot "./results/Ju_lapl_2"      title "Julia" with linespoint
 9 | replot "./results/Pythran_lapl_2" title "Pythran" with linespoint
10 | replot "./results/Numba_lapl_2" title "Numba" with linespoint
11 | replot 1  title "C++" lt -1
12 | 


--------------------------------------------------------------------------------
/MicroBenchmarks/runAllTests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #  This script is supposed to run *all* the test, and then compute
 4 | #  the final "report" in Results/
 5 | #  Not sure it works everywhere. If it does not, improve it, or enter each
 6 | #  directory and look at README.md to know what to do. 
 7 | #
 8 | echo "Test: C++"
 9 | echo "--- "
10 | (cd C++; mkdir -p Build;cd Build; cmake ..; make; ./run_cl;./run_lapl_1;./run_lapl_2)
11 | 
12 | for i in Ju Numba ; do
13 |     echo  "Test: "$i
14 |     echo "--- " 
15 |     (cd $i; for j in main*; do ./script $j; done)
16 | done
17 | echo  "Test: Py"
18 | echo "--- "
19 | (cd Py; python3 main_cl.py; python3 main_lapl_1d.py; python3 main_lapl_2d.py)
20 |      
21 | echo  "Test: Pythran"
22 | echo "--- "
23 | (cd Pythran; ./script)
24 | echo " "
25 | 
26 | echo "Make the report:"
27 | mkdir  -p Results/results
28 | (cd Results; ./gr.py)
29 | echo " "
30 | echo "Everything went well ? If yes, go to ./Results/. You can visualize the"
31 | echo " results with gnuplot. Look at README.md in ./Results"
32 | echo " "
33 | echo " "
34 | 


--------------------------------------------------------------------------------
/SaintVenant/C/compile-cpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #module load GNU/7.1
4 | #module load Eigen
5 | g++ -std=c++14 -march=native -O3 -DNDEBUG main1d.cpp -o main1d
6 | 


--------------------------------------------------------------------------------
/SaintVenant/C/compile-gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #module load GNU/7.1
4 | #module load Eigen
5 | nvcc -std=c++14 -arch=sm_35 -O3 -DNDEBUG -g -lineinfo main1d-gpu.cu main1d-gpu-kernels.cu -o main1d-gpu --expt-relaxed-constexpr
6 | 


--------------------------------------------------------------------------------
/SaintVenant/C/main1d-gpu-kernels.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdio.h>
 4 | #include <cooperative_groups.h>
 5 | 
 6 | namespace cg = cooperative_groups;
 7 | 
 8 | 
 9 | __global__ void scheme_LaxFriedrich(double* V1, double* V2, double* Vold1, double* Vold2, double* lambdas, double dt, double dx, double tol, unsigned int nx);
10 | 
11 | __global__ void update_eigenvalues(double* lambdas, double* V1, double* V2, double tol, unsigned int nx);
12 | 
13 | 
14 | __global__ void reduce_max(double *in, double *out, std::size_t N);
15 | 


--------------------------------------------------------------------------------
/SaintVenant/Ju/run_cpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #module load Julia
4 | julia -O3 --check-bounds=no main1d.jl
5 | 


--------------------------------------------------------------------------------
/SaintVenant/Ju/run_gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #module load Julia
4 | julia -O3 --check-bounds=no main1d-gpu.jl
5 | 


--------------------------------------------------------------------------------
/Sparse/C++/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(BenchmarksPythonJuliaAndCo)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | #
 6 | #- To use intel compiler
 7 | # CXX=icpc cmake ..
 8 | # - for clang++:
 9 | # CXX=clang++ cmake ..
10 | # - otherwise, to use g++:
11 | #  cmake ..
12 | #
13 | 
14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
15 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -restrict -DICC -DALIGN_64  -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
16 | 
17 | elseif (${CMAKE_CXX_COMPILER} MATCHES "clang.*$")
18 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -Wall -O1  -std=c++14 -Wall -march=native")
19 | else ()
20 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -Wall -O3  -std=c++14 -Wall -march=native")
21 | 
22 | endif ()
23 | 
24 | add_executable(
25 |   run
26 |   ../main.cpp
27 |   )
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/Sparse/C++/PreSparse.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <map>
 3 | #include <utility>
 4 | #include <algorithm>
 5 | #include <iostream>
 6 | #include <tuple>
 7 | using namespace std;
 8 | class PreSparse
 9 | {
10 |   map<pair<int,int>,double> M;
11 |   int imax,jmax;
12 | public:
13 |   PreSparse()
14 |   {
15 |     imax=0; jmax=0;
16 |   }
17 |   int size() const {return M.size();}
18 |   void purge()
19 |   {
20 |     M.clear();
21 |   }
22 |   double& operator()(int i,int j)
23 |   {
24 |     imax=max(i,imax);
25 |     jmax=max(j,jmax);
26 |     return M[make_pair(i,j)];//indices start at 1.
27 |   }
28 |   tuple<int,int,int> sizes() const {return make_tuple(imax+1,jmax+1,size());}
29 |   map<pair<int,int>,double>::const_iterator cbegin()const {return M.cbegin();}
30 |   map<pair<int,int>,double>::const_iterator cend() const {return M.cend();}
31 |   void print() const
32 |   {
33 |     for(auto I=M.cbegin();I!=M.cend();I++)
34 |       cout<<I->first.first<<" "<<I->first.second<<" : "<<I->second<<endl;
35 |   }
36 | };
37 | 


--------------------------------------------------------------------------------
/Sparse/C++/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Building the CSR matrix:
 3 | 
 4 | I know two methods to build the matrix:
 5 | 
 6 | 1- We first build a std::map (i,j) -> value (in PreSparse.hpp, PreLapl.hpp).
 7 | Then, building the CSR matrix can be done in one simple pass on the
 8 | map.
 9 | 
10 | 2- Precompute the size of the arrays, allocate them, fill them and
11 | then build the CSR matrix (exactly what we do in Ju, Py, Numba and Pythran).
12 | 
13 | Although the first method is very convenient, it is much slower (one
14 | reason could be that we make a lot of memory allocations). So we use the
15 | second one, but you can use the first one (look at main: replace calls to
16 | *do_test_arrays* with calls to *dotest_map*).
17 | 
18 | We could imagine other methods, like using std::vector's, which grow on
19 | demand, but this will probably also be slower than the second method.
20 | 
21 | ### Note that:
22 | 
23 | shared_ptr have a well known problem with "old" compilers like g++-5 (cannot write a[]
24 | when a is a shared_ptr: one must use a get), and as we are a bit lazzy, we
25 | have switched to a old style programming, with new and delete.
26 | This will not change the performances. Feel free to change this.
27 | 
28 | ### Compilation:
29 | ```
30 | mkdir Build
31 | cd Build
32 | cmake ..
33 | make
34 | ```
35 | a file "run" is created
36 | 
37 | ### Run the code:
38 | 
39 | from Build/ directory, type:
40 | ```
41 | ./run
42 | ```
43 | 
44 | 


--------------------------------------------------------------------------------
/Sparse/Ju/README.md:
--------------------------------------------------------------------------------
 1 | To run the code, just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | If you type:
 6 | ```
 7 | ./script-m
 8 | ```
 9 | You will be able to see how Julia manages memory, but you will
10 | slowdown the code!
11 | 
12 | ### Note:
13 | 
14 | If you want to test the _pushed_ method to create matrices, have a
15 | look at the begining of main.jl.
16 | 
17 | With the  _pushed_ method, one does not need to precompute array
18 | sizes, put the method is much slower.
19 | 


--------------------------------------------------------------------------------
/Sparse/Ju/main.jl:
--------------------------------------------------------------------------------
 1 | push!(LOAD_PATH, "./")
 2 | 
 3 | # if you want to experiment with the "pushed" matrix creation method,
 4 | # switch comments on the two following lines:
 5 | using Sparse23
 6 | #using Sparse23push
 7 | #using Sparse23raw
 8 | using Printf
 9 | using SparseArrays, LinearAlgebra
10 | 
11 | 
12 | 
13 | function dotest(dim::Int64, size::Int64)
14 |     # 1) matrix creation:
15 |     t1 = time_ns()
16 |     if dim==2
17 |         M, order, nc = PreLapl2(size)
18 |         sizeV = size^2
19 |     else
20 |         M, order, nc = PreLapl3(size)
21 |         sizeV = size^3
22 |     end
23 |     
24 |     t1 = time_ns() - t1
25 |     V = ones(Float64, sizeV)
26 |     W = zeros(Float64, sizeV)
27 | 
28 |     # 2) matrix x vector product.
29 |     t2 = time_ns()
30 |     mul!(W, M, V)
31 |     t2 = time_ns() - t2
32 |     
33 |     return order, nc, t1, t2
34 | end
35 | 
36 | 
37 | 
38 | println()
39 | @printf "%-5s %1s %-20s\n" "size" ":" "size of the grid."
40 | @printf "%-5s %1s %-20s\n" "order" ":" "order of the matrix."
41 | @printf "%-5s %1s %-20s\n" "nc" ":" "number of ceofficients."
42 | @printf "%-5s %1s %-20s\n" "T.b" ":" "time to build the matrix (s)."
43 | @printf "%-5s %1s %-20s\n" "T.p" ":" "time for matrix x vector product (s)."
44 | 
45 | # computation starts here:
46 | for dim=2:3
47 |     println("\n" * string(dim) * "d:")
48 |     fw = open("RunningOn" * gethostname() * "-" * string(dim), "w")
49 |     if dim==2
50 |         size = 32
51 |         sizeM = 2048
52 |     else
53 |         size = 16
54 |         sizeM = 128
55 |     end
56 |     
57 |     @printf "%4s %8s %9s %10s %10s \n" "size" "order" "nc" "T.b" "T.p"
58 |     while size <= sizeM
59 |         order, nc, t1, t2 = dotest(dim, size)   #warmup
60 |         order, nc, t1, t2 = dotest(dim, size)
61 |  
62 |  
63 |         s = @sprintf "%4d %8d %9d %10.2e %10.2e \n" size order nc float(t1)*10.0^(-9) float(t2)*10.0^(-9)
64 |         print(s)
65 |         write(fw, s)
66 |         size *= 2
67 |     end
68 |     
69 |     close(fw)
70 | end
71 | 


--------------------------------------------------------------------------------
/Sparse/Ju/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 main.jl
3 | 


--------------------------------------------------------------------------------
/Sparse/Ju/script-m:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 --track-allocation=user main.jl
3 | 


--------------------------------------------------------------------------------
/Sparse/Numba/README.md:
--------------------------------------------------------------------------------
 1 | To run the code, just type:
 2 | 
 3 | ```
 4 | ./script
 5 | ```
 6 | 
 7 | If you do not have a machine with AVX instructions, comment out the
 8 | line
 9 | ```
10 | export NUMBA_ENABLE_AVX=1
11 | ```
12 | in ./script .
13 | 
14 | ### Note:
15 | 
16 | lambda functions in jitted routines could not be compiled.
17 | 


--------------------------------------------------------------------------------
/Sparse/Numba/build2.py:
--------------------------------------------------------------------------------
 1 | from numba import jit
 2 | import numpy as np
 3 | @jit
 4 | def build2(size):
 5 |     #how many coefficients ?
 6 |     nc= 5*(size-2)**2+ 16*(size-2)+ 12
 7 |     row= np.empty((nc),dtype=int)
 8 |     col=np.empty((nc),dtype=int)
 9 |     v=np.empty((nc),dtype=float)
10 |     h=1./(size-1)
11 |     h2=h*h
12 |     cd=-4/h2
13 |     hd=1./h2;
14 |     #I=lambda i,j: i*size+j
15 |     count=0
16 | 
17 |     for i in range(0,size):
18 |         for j in range(0,size):
19 |              l=i*size+j
20 |              row[count]=l
21 |              col[count]=l
22 |              v[count]=cd
23 |              count+=1
24 |              for i1 in [-1,1]:
25 |                  if i+i1>=0 and i+i1<size:
26 |                      row[count]=l
27 |                      col[count]=(i+i1)*size+j
28 |                      v[count]=hd
29 |                      count+=1
30 |                  if j+i1>=0 and j+i1<size:
31 |                     row[count]=l
32 |                     col[count]=i*size+j+i1
33 |                     v[count]=hd
34 |                     count+=1
35 |     
36 |     return nc,row,col,v
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/Sparse/Numba/build3.py:
--------------------------------------------------------------------------------
 1 | from numba import jit
 2 | import numpy as np
 3 | @jit(nopython=True)
 4 | def build3(size):
 5 |     nc= 7*(size-2)**3 + 36*(size-2)**2 + 60*(size-2)+ 8*4
 6 |     row= np.empty((nc),dtype=int)
 7 |     col=np.empty((nc),dtype=int)
 8 |     v=np.empty((nc),dtype=float)
 9 |     h=1./(size-1)
10 |     h2=h*h
11 |     cd=-6/h2
12 |     hd=1./h2
13 |     size2=size*size
14 |     #I=lambda i,j,k: i*size2+j*size+k
15 |     #
16 |     count=0
17 |     for i in range(0,size):
18 |         for j in range(0,size):
19 |             for k in range(0,size):
20 |                 #l=I(i,j,k)
21 |                 l=i*size2+j*size+k
22 |                 row[count]=l
23 |                 col[count]=l
24 |                 v[count]=cd
25 |                 count+=1
26 |                 for i1 in [-1,1]:
27 |                     if i+i1>=0 and i+i1<size:
28 |                         row[count]=l
29 |                         #col[count]=I(i+i1,j,k)
30 |                         col[count]=l+i1*size2
31 |                         v[count]=hd
32 |                         count+=1                    
33 |                     if j+i1>=0 and j+i1<size:
34 |                         row[count]=l
35 |                         #col[count]=I(i,j+i1,k)
36 |                         col[count]=l+i1*size
37 |                         v[count]=hd
38 |                         count+=1
39 |                     if k+i1>=0 and k+i1<size:
40 |                         row[count]=l
41 |                         #col[count]=I(i,j,k+i1)
42 |                         col[count]=l+i1
43 |                         v[count]=hd
44 |                         count+=1
45 |     return nc,row,col,v
46 | 


--------------------------------------------------------------------------------
/Sparse/Numba/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | from scipy.sparse import csr_matrix
 5 | import build2
 6 | import build3
 7 | #
 8 | def dotest(dim,size):
 9 |     tb = time.time()
10 |     if dim==2:
11 |         nc,row,col,v= build2.build2(size)
12 |         lenv=size*size
13 |     else:
14 |         nc,row,col,v= build3.build3(size)
15 |         lenv=size**3
16 |     A=csr_matrix((v, (row, col)))
17 |     tb=time.time()-tb
18 |     X=np.ones((lenv),dtype=float)
19 |     Y=np.empty((lenv),dtype=float)
20 | 
21 |     
22 | 
23 |     tprod=time.time()
24 |     Y=A.dot(X)
25 |     tprod=time.time()-tprod
26 | 
27 |     return lenv,nc,tb,tprod
28 | #
29 | def banner(dim):
30 |     print("\n",str(dim)+"d")
31 |     print("size".ljust(5),"order".ljust(9),"nc".ljust(9),
32 |           "T.b".ljust(21),"T.p".ljust(20))
33 |         
34 | for dim in [2,3]:
35 |     if dim==2:
36 |         size=32
37 |         sizeM=2048
38 |     else:
39 |         size=16
40 |         sizeM=256
41 |     banner(dim)
42 |     
43 |     dotest(dim,size) # make a "hot start" !
44 |     
45 |     f=open("RunningOn"+socket.gethostname()+"-"+str(dim),"w")
46 |     while size<=sizeM:
47 |         order,nc,tb,tm=dotest(dim,size)
48 |         tw=str(size).ljust(6)+str(order).ljust(10)+str(nc).ljust(10)+\
49 |             str(tb).ljust(22)+str(tm).ljust(20)
50 |         print(tw)
51 |         f.write(tw+"\n")
52 |         size*=2
53 |     f.close()    
54 | 


--------------------------------------------------------------------------------
/Sparse/Numba/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export NUMBA_ENABLE_AVX=1
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/Sparse/Py/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | python3 main.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/Sparse/Py/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | from scipy.sparse import csr_matrix
 5 | from build import build2,build3
 6 | #
 7 | def dotest(dim,size):
 8 |     tb = time.time()
 9 |     if dim==2:
10 |         A,nc= build2(size)
11 |         lenv=size*size
12 |     else:
13 |         A,nc=build3(size)
14 |         lenv=size**3
15 |     tb=time.time()-tb
16 |     X=np.ones((lenv),dtype=float)
17 |     Y=np.empty((lenv),dtype=float)
18 | 
19 |     
20 | 
21 |     tprod=time.time()
22 |     Y=A.dot(X)
23 |     tprod=time.time()-tprod
24 | 
25 |     return lenv,nc,tb,tprod
26 | #
27 | def banner(dim):
28 |     print("\n",str(dim)+"d")
29 |     print("size".ljust(5),"order".ljust(9),"nc".ljust(9),
30 |           "T.b".ljust(21),"T.p".ljust(20))
31 |     
32 | 
33 | # lets's go:     
34 | for dim in [2,3]:
35 |     if dim==2:
36 |         size=32
37 |         sizeM=2048
38 |     else:
39 |         size=16
40 |         sizeM=128
41 |     banner(dim)
42 |     f=open("RunningOn"+socket.gethostname()+"-"+str(dim),"w")
43 |     while size<=sizeM:
44 |         order,nc,tb,tm=dotest(dim,size)
45 |         tw=repr(size).ljust(6)+repr(order).ljust(10)+repr(nc).ljust(10)+\
46 |             repr(tb).ljust(22)+repr(tm).ljust(20)
47 |         print(tw)
48 |         f.write(tw+"\n")
49 |         size*=2
50 |     f.close()  
51 | 


--------------------------------------------------------------------------------
/Sparse/Pythran/README.md:
--------------------------------------------------------------------------------
 1 | First compile what should be  _pythranized_  and run the code; type:
 2 | ```
 3 | ./script
 4 | ```
 5 | 
 6 | !Z! if you are using OpenBlas, you probably must create  a .pythranrc in your home directory like this:
 7 | 
 8 | ```
 9 | [compiler]
10 | blas=openblas
11 | ```


--------------------------------------------------------------------------------
/Sparse/Pythran/build2.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #pythran export build2(int)
 3 | def build2(size):
 4 |     #how many coefficients ?
 5 |     nc= 5*(size-2)**2+ 16*(size-2)+ 12
 6 |     row= np.empty((nc),dtype=int)
 7 |     col=np.empty((nc),dtype=int)
 8 |     v=np.empty((nc),dtype=float)
 9 |     h=1./(size-1)
10 |     h2=h*h
11 |     cd=-4/h2
12 |     hd=1./h2;
13 |     I=lambda i,j: i*size+j
14 |     count=0
15 | 
16 |     for i in range(0,size):
17 |         for j in range(0,size):
18 |              l=I(i,j)
19 |              row[count]=l
20 |              col[count]=l
21 |              v[count]=cd
22 |              count+=1
23 |              for i1 in [-1,1]:
24 |                  if i+i1>=0 and i+i1<size:
25 |                      row[count]=l
26 |                      col[count]=I(i+i1,j)
27 |                      v[count]=hd
28 |                      count+=1
29 |                  if j+i1>=0 and j+i1<size:
30 |                     row[count]=l
31 |                     col[count]=I(i,j+i1)
32 |                     v[count]=hd
33 |                     count+=1
34 |     
35 |     return nc,row,col,v
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/Sparse/Pythran/build3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #pythran export build3(int)                
 3 | def build3(size):
 4 |     nc= 7*(size-2)**3 + 36*(size-2)**2 + 60*(size-2)+ 8*4
 5 |     row= np.empty((nc),dtype=int)
 6 |     col=np.empty((nc),dtype=int)
 7 |     v=np.empty((nc),dtype=float)
 8 |     h=1./(size-1)
 9 |     h2=h*h
10 |     cd=-6/h2
11 |     hd=1./h2
12 |     size2=size*size
13 |     I=lambda i,j,k: i*size2+j*size+k
14 |     #
15 |     count=0
16 |     for i in range(0,size):
17 |         for j in range(0,size):
18 |             for k in range(0,size):
19 |                 l=I(i,j,k)
20 |                 row[count]=l
21 |                 col[count]=l
22 |                 v[count]=cd
23 |                 count+=1
24 |                 for i1 in [-1,1]:
25 |                     if i+i1>=0 and i+i1<size:
26 |                         row[count]=l
27 |                         col[count]=I(i+i1,j,k)
28 |                         v[count]=hd
29 |                         count+=1                    
30 |                     if j+i1>=0 and j+i1<size:
31 |                         row[count]=l
32 |                         col[count]=I(i,j+i1,k)
33 |                         v[count]=hd
34 |                         count+=1
35 |                     if k+i1>=0 and k+i1<size:
36 |                         row[count]=l
37 |                         col[count]=I(i,j,k+i1)
38 |                         v[count]=hd
39 |                         count+=1
40 |     return nc,row,col,v
41 | 


--------------------------------------------------------------------------------
/Sparse/Pythran/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import socket
 4 | from scipy.sparse import csr_matrix
 5 | import build2
 6 | import build3
 7 | #
 8 | def dotest(dim,size):
 9 |     tb = time.time()
10 |     if dim==2:
11 |         nc,row,col,v= build2.build2(size)
12 |         lenv=size*size
13 |     else:
14 |         nc,row,col,v= build3.build3(size)
15 |         lenv=size**3
16 |     A=csr_matrix((v, (row, col)))
17 |     tb=time.time()-tb
18 |     X=np.ones((lenv),dtype=float)
19 |     Y=np.empty((lenv),dtype=float)
20 | 
21 |     
22 | 
23 |     tprod=time.time()
24 |     Y=A.dot(X)
25 |     tprod=time.time()-tprod
26 | 
27 |     return lenv,nc,tb,tprod
28 | #
29 | def banner(dim):
30 |     print("\n",str(dim)+"d")
31 |     print("size".ljust(5),"order".ljust(9),"nc".ljust(9),
32 |           "T.b".ljust(21),"T.p".ljust(20))
33 |         
34 | for dim in [2,3]:
35 |     if dim==2:
36 |         size=32
37 |         sizeM=2048
38 |     else:
39 |         size=16
40 |         sizeM=256
41 |     banner(dim)
42 | 
43 |     f=open("RunningOn"+socket.gethostname()+"-"+str(dim),"w")
44 |     while size<=sizeM:
45 |         order,nc,tb,tm=dotest(dim,size)
46 |         tw=str(size).ljust(6)+str(order).ljust(10)+str(nc).ljust(10)+\
47 |             str(tb).ljust(22)+str(tm).ljust(20)
48 |         print(tw)
49 |         f.write(tw+"\n")
50 |         size*=2
51 |     f.close()    
52 | 


--------------------------------------------------------------------------------
/Sparse/Pythran/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | rm -f *so
3 | pythran -march=native -O3 build2.py
4 | pythran -march=native -O3 build3.py
5 | echo "run test:"
6 | python3 main.py
7 | 


--------------------------------------------------------------------------------
/Sparse/README.md:
--------------------------------------------------------------------------------
 1 | ### Description:
 2 | 
 3 | We build sparse matrices and compute matrix x vector products.
 4 | 
 5 | Note that we are interested by:
 6 | 
 7 | * time to build the matrices.
 8 | * time to compute the matrix  x vector products.
 9 | 
10 | (as a function of the **size** of the problem).
11 | 
12 | Except in C++, the matrix  x vector products rely on libraries and we
13 | can expect to get about the same computing time in every case. 
14 | 
15 | The time to build the matrices is certainly  dependent on
16 | optimiations, language and so on.
17 | 
18 | ### Motivation:
19 | 
20 | The matrices are based on the classical uniform  finite difference
21 | approximation of the Laplacian in 2d (5 points stencil) and 3d (7
22 | points stencil). We compute in a square (2d) or a cube (3d) and the
23 | **size** of the problem is the number of points in every dimension for
24 | the square or for the cube, thus 
25 | giving and order of size^2 or size^3 for the matrices.
26 | 
27 | Such matrices are representative of what should be used with finite
28 | element or fine volume approximations.
29 | 
30 | 
31 | 
32 | ### Author:
33 | 
34 | Thierry Dumont   tdumont@math.univ-lyon1.fr
35 | 
36 | ### The directories contain:
37 | 
38 | - **C++**: computation in C++.
39 | 
40 | - **Py**:  Python + numpy computation.
41 | 
42 | - **Pythran**:  Python + numpy + Pythran  computation.
43 | 
44 | - **Numba**: Python + numpy + Numba  computation.
45 | 
46 | - **Ju**: Julia computation.
47 | 
48 | 
49 | ### Running the benchmarks:
50 | 
51 | cd successively in C++, Py, Pythran, Numba, Ju; then look at the documentation.
52 | 
53 | Once you have run the benchmark in **all** directories, go to Results/
54 | and look at the documentation to know how to exploit the results.
55 | 
56 | 
57 | ### Note:
58 | 
59 | In C++, using clang++ (clang version 7.0.0-3), it seems impossible to optimize
60 | with -Ox with x>1 (this is a problem with the Clock). If you can help, you are
61 | welcome! 
62 | 


--------------------------------------------------------------------------------
/Sparse/Results/Arithmetic-Intensity.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Runing _stream_ with 1 thread, that is to say:
 3 | ```
 4 | export OMP_NUM_THREADS=1
 5 | ./stream
 6 | ```
 7 | 
 8 | gives, on the _reference_ _machine_:
 9 | 
10 | Function|    Best Rate MB/s | Avg time |    Min time  |   Max time|
11 | --------|-------------------|----------|--------------|-----------|
12 | Copy:|            7808.7 |     0.020542 |    0.020490 |    0.020588|
13 | Scale:|            7578.6 |     0.021212|      0.021112 |     0.021387| 
14 | Add:  |          11048.2|     0.021784|     0.021723 |    0.021899| 
15 | Triad:  |        10360.9  |   0.023216  |   0.023164 |    0.023299|
16 | 
17 | that is to say about a maximum of 11 GB/s or 1.375 G doubles.
18 | 
19 | The bandwith of the matrix vector x product is about 16 x n in
20 | dimension 2, and the number of floatting point operations is about 9 x
21 | n, and the the Arithmetic Intensity is about 9/16. The Roofline Model
22 | predicts that the maximum atainable performance will be about
23 | 1.375*9/16= 0.77 Gflops/s.
24 | 


--------------------------------------------------------------------------------
/Sparse/Results/Benchmarks/gpc-2-b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-2-b.png


--------------------------------------------------------------------------------
/Sparse/Results/Benchmarks/gpc-2-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-2-p.png


--------------------------------------------------------------------------------
/Sparse/Results/Benchmarks/gpc-3-b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-3-b.png


--------------------------------------------------------------------------------
/Sparse/Results/Benchmarks/gpc-3-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thierry-Dumont/BenchmarksPythonJuliaAndCo/bbe18830b312881dc4abe786fc076decfa0396ad/Sparse/Results/Benchmarks/gpc-3-p.png


--------------------------------------------------------------------------------
/Sparse/Results/README.md:
--------------------------------------------------------------------------------
 1 | First, do:
 2 | ```
 3 | mkdir ./Results (if ./Results does not exists).
 4 | ./gr.py
 5 | ```
 6 | It will parse your results (in the "RunningOn _your_ _hostname_*" files) and populate
 7 | Results/ with gnuplot plotable files (6 files).
 8 | 
 9 | It also creates gnuplot _scripts_  gpc[2-3]-[b-p]:
10 | 
11 | * [2-3] stands for the spatial dimension (2 or 3)
12 | * [b-p] stands for the type of computation (b for building the matrix,
13 |   p for the matrix x vector product).
14 | 
15 | So, gpc-3-b is the time to build the matrix in dimension 3. More
16 |   exactly, it is the ratio of the computing time  for the different
17 |   languages/implementations to the computing time in C++.
18 |   
19 | Then:
20 | ```
21 | gnuplot
22 | gnuplot> load "gpc-3-b"
23 | ```
24 | will do the job in this case (and so on). Recall that we take C++
25 | computing time as unit. 
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Sparse/runAllTests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #  This script is supposed to run *all* the test, and then compute
 4 | #  the final "report" in Results/
 5 | #  Not sure it works everywhere. If it does not, improve it, or enter each
 6 | #  directory and look at README.md to know what to do. 
 7 | #
 8 | 
 9 | echo "--- "
10 | echo "Test: C++"
11 | echo "--- "
12 | (cd C++; mkdir -p Build; cd Build; cmake ..; make; ./run)
13 | 
14 | for i in Ju Numba Pythran ; do
15 |      echo "--- "
16 |     echo  "Test: "$i
17 |     echo "--- " 
18 |     (cd $i; ./script)
19 | done
20 | 
21 | echo "--- "
22 | echo  "Test: Py"
23 | echo "--- "
24 | (cd Py; python3 ./main.py)
25 | 
26 | 
27 | echo " "
28 | eche "Make the report:"
29 | mkdir  -p Results/Results
30 | (cd Results; ./gr.py)
31 | echo " "
32 | echo "Everything went well ? If yes,go to ./Results/. You can visualize the"
33 | echo " results with gnuplot. Look at README.md in ./Results"
34 | echo " "
35 | echo " "
36 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | There are some problems with nowadays version of Numba (0.51.1 / 2020/08):
 2 | 
 3 | -FeStiff:  adeprecation warning, and a warning concerning dot.
 4 | 
 5 | -MicroBenchMarks: cannot parallelize lapl1d_2 in main_lapl_1d.py
 6 | 
 7 | -Sparse: build3.py has a problem with np.empty
 8 | 
 9 | -Weno: some warnings.
10 | 
11 | all this needs modifications.


--------------------------------------------------------------------------------
/Weno/C++-Modulo/Burghers.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <algorithm>
 3 | #include <string>
 4 | struct  Burghers
 5 | {
 6 |   //! the flux.
 7 |   static double flux(double x) 
 8 |   {
 9 |     return 0.5*x*x;
10 |   }
11 |   //! min of flux  in [a,b]. Used by Godunov flux.
12 |   //! we do NOT test a<=b !
13 |   static double min(double a, double b)
14 |   {
15 | 
16 |     if(b<=0)
17 |       return  0.5*b*b;
18 |     else if (a>=0)
19 |       return  0.5*a*a;
20 |     else
21 |       return 0.0;
22 |   }
23 |   //! max of flux in [a,b]. Used by Godunov flux.
24 |   //! we do NOT test a<=b !
25 |   static double max(double a, double b)
26 |   {
27 |      return 0.5*std::max(a*a,b*b);
28 |   }
29 | };
30 | 


--------------------------------------------------------------------------------
/Weno/C++-Modulo/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(BenchmarksPythonJuliaAndCo)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | #
 6 | #- To use intel compiler
 7 | # CXX=icpc cmake ..
 8 | # - for clang++:
 9 | # CXX=clang++ cmake ..
10 | # - otherwise, to use g++:
11 | #  cmake ..
12 | #
13 | 
14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
15 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
16 | 
17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$")
18 |   set(CMAKE_CXX_COMPILER "clang++")
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3  -Wall   -Wall -std=c++14 -march=native")
20 | else ()
21 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -std=c++14 -Wall -march=native")
22 | 
23 | endif ()
24 | 
25 | add_executable(
26 |   run
27 |   ../main.cpp
28 |   )
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/Weno/C++-Modulo/Convection.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <algorithm>
 3 | #include <string>
 4 | struct  Convection
 5 | {
 6 |   //! the flux.
 7 |   static double flux(double x) 
 8 |   {
 9 |     return x;
10 |   }
11 |   //! min of flux  in [a,b]. Used by Godunov flux.
12 |   //! we do NOT test a<=b !
13 |   static double min(double a, double b)
14 |   {
15 | 
16 |     if(b<=0)
17 |       return flux(b);
18 |     else if (a>=0)
19 |       return flux(a);
20 |     else
21 |       return 0.0;
22 |   }
23 |   //! max of flux in [a,b]. Used by Godunov flux.
24 |   //! we do NOT test a<=b !
25 |   static double max(double a, double b)
26 |   {
27 |      return std::max(flux(a),flux(b));
28 |   }
29 | };
30 | 


--------------------------------------------------------------------------------
/Weno/C++-Modulo/GodunovFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <utility>
 3 | //
 4 | // The Godunov flux.
 5 | //
 6 | template<class F> struct GodunovFlux{
 7 |   // constructors, for compatibility with other fluxes.
 8 |   GodunovFlux(double *params=0){}
 9 |   GodunovFlux(GodunovFlux<F>& G)= delete;
10 |   GodunovFlux& operator=(GodunovFlux<F>&& G){return *this;}
11 |   double operator() (double a, double b)
12 |   {
13 |     return a<=b ?  F::min(a,b):F::max(b,a);
14 |   }
15 | };
16 | 


--------------------------------------------------------------------------------
/Weno/C++-Modulo/LaxFriedrichsFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | #include <utility>
 4 | //
 5 | // The Lax-Friedrichs flux.
 6 | //
 7 | // alpha: the Lax-Friedrichs parameter.
 8 | template<class F> class LaxFriedrichsFlux{
 9 |   double alpha;
10 | public:
11 |   LaxFriedrichsFlux(){}
12 |   LaxFriedrichsFlux(double *params):alpha(params[0]){}
13 |   LaxFriedrichsFlux(LaxFriedrichsFlux<F>& L)=delete;
14 |   LaxFriedrichsFlux& operator=(LaxFriedrichsFlux<F>&& L)
15 |   {
16 |     alpha=L.alpha;
17 |     return *this;
18 |   }
19 |   double operator()  (double a, double b)
20 |   {
21 |      return 0.5*(F::flux(a)+F::flux(b) - alpha*(b-a));
22 |   }
23 | };
24 | 


--------------------------------------------------------------------------------
/Weno/C++-Modulo/README.md:
--------------------------------------------------------------------------------
 1 | ### Choose your problem and your numerical flux
 2 | 
 3 | You must comment/uncomment the following lines in main.cpp
 4 | ```
 5 |   typedef Burghers Problem;
 6 |   //typedef Convection Problem;
 7 | ```
 8 | and
 9 | ```
10 |   typedef GodunovFlux<Problem> NumFlux;
11 |   //typedef LaxFriedrichsFlux<Problem> NumFlux;
12 | ```
13 | 
14 | ### Plotting:
15 | 
16 | In main.cpp, uncomment the line
17 | ```
18 | #define DO_GNUPLOT_FILES
19 | ```
20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and
21 | a file _gpfile_. To plot the solution over time, you can use gnuplot:
22 | ```
23 | >gnuplot
24 | load "gpfile"
25 | ```
26 | But **CAVEAT**: **comment** the line for benchmarking!
27 | 
28 | 
29 | ### Compilation:
30 | ```
31 | mkdir Build
32 | cd Build
33 | cmake ..
34 | make
35 | ```
36 | a file "run" is created.
37 | To use an other compiler (eg. clang++) do:
38 | ```
39 | CXX=clang++ cmake ..
40 | make
41 | ```
42 | 
43 | ### Run the code:
44 | 
45 | from Build/ directory, type:
46 | ```
47 | ./run
48 | ```
49 | 


--------------------------------------------------------------------------------
/Weno/C++-Modulo/RK3TVD.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <iostream>
 4 | //
 5 | // Explicit TVD Runge-Kutta method of order 3.
 6 | //
 7 | // L: the RHS (du/dt =L(u)).
 8 | //
 9 | template<class L> class RK3TVD
10 | {
11 |   const int size;
12 |   std::unique_ptr<double[]> u1,u2; 
13 |   const double c21=3./4.,c22=1./4.,c31=1./3.,c32=2./3.;
14 | 
15 |   L LL;
16 | public:
17 |   //! constructor.
18 |   //\param size size of arrays.
19 |   //\param Length length of the domain.
20 |   RK3TVD(int _size, double Length,double params[]):size(_size)
21 |   {
22 |     u1=std::make_unique<double[]>(size);
23 |     u2=std::make_unique<double[]>(size);
24 |     LL=L(size,Length,params);
25 |   }
26 | 
27 |   //! destructor
28 |   ~RK3TVD()
29 |   {
30 |   }
31 |   //! make one step.
32 |   //! \param InOut intial value, overwritten..
33 |   //! \param dt time step.
34 |   void step(std::unique_ptr<double []>& InOut,double dt)
35 |   {
36 |     LL(InOut,u1);
37 |     for(int i=0;i<size;i++)
38 |       u1[i]=InOut[i] + dt*u1[i];
39 | 
40 |     LL(u1,u2);
41 |     for(int i=0;i<size;i++)
42 |       u2[i]= c21*InOut[i]+c22*(u1[i]+dt*u2[i]);
43 | 
44 |     LL(u2,u1); // reuse u1.
45 |     for(int i=0;i<size;i++)
46 |       InOut[i]=c31*InOut[i]+c32*(u2[i]+dt*u1[i]);
47 |   }
48 | };
49 | 


--------------------------------------------------------------------------------
/Weno/C++-Pointers/Burghers.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <algorithm>
 3 | //#include <string>
 4 | //#include <cmath>
 5 | struct  Burghers
 6 | {
 7 |   //! the flux.
 8 |   inline double flux(double x) 
 9 |   {
10 |     return 0.5*x*x;
11 |   }
12 |   //! min of flux  in [a,b]. Used by Godunov flux.
13 |   //! we do NOT test a<=b !
14 |   double min(double a, double b)
15 |   {
16 | 
17 |     if(b<=0)
18 |       return  0.5*b*b;
19 |     else if (a>=0)
20 |       return  0.5*a*a;
21 |     else
22 |       return 0.0;
23 |   }
24 |   //! max of flux in [a,b]. Used by Godunov flux.
25 |   //! we do NOT test a<=b !
26 |   inline  double max(double a, double b)
27 |   {
28 |     //return 0.5*fmax(a*a,b*b);
29 |     return 0.5*std::max(a*a,b*b);
30 |   }
31 | };
32 | 


--------------------------------------------------------------------------------
/Weno/C++-Pointers/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(BenchmarksPythonJuliaAndCo)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | #
 6 | #- To use intel compiler
 7 | # CXX=icpc cmake ..
 8 | # - for clang++:
 9 | # CXX=clang++ cmake ..
10 | # - otherwise, to use g++:
11 | #  cmake ..
12 | #
13 | 
14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
15 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
16 | 
17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$")
18 |   set(CMAKE_CXX_COMPILER "clang++")
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3  -Wall   -Wall -std=c++14 -march=native")
20 | else ()
21 |    #set(CMAKE_CXX_COMPILER "g++-8")
22 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -g  -std=c++17 -Wall -march=native")
23 | 
24 | endif ()
25 | 
26 | add_executable(
27 |   run
28 |   ../main.cpp
29 |   )
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/Weno/C++-Pointers/Convection.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <algorithm>
 3 | #include <string>
 4 | struct  Convection
 5 | {
 6 |   //! the flux.
 7 |   static double flux(double x) 
 8 |   {
 9 |     return x;
10 |   }
11 |   //! min of flux  in [a,b]. Used by Godunov flux.
12 |   //! we do NOT test a<=b !
13 |   static double min(double a, double b)
14 |   {
15 | 
16 |     if(b<=0)
17 |       return flux(b);
18 |     else if (a>=0)
19 |       return flux(a);
20 |     else
21 |       return 0.0;
22 |   }
23 |   //! max of flux in [a,b]. Used by Godunov flux.
24 |   //! we do NOT test a<=b !
25 |   static double max(double a, double b)
26 |   {
27 |      return std::max(flux(a),flux(b));
28 |   }
29 | };
30 | 


--------------------------------------------------------------------------------
/Weno/C++-Pointers/GodunovFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <utility>
 3 | #include <iostream>
 4 | //
 5 | // The Godunov flux.
 6 | //
 7 | template<class F> struct GodunovFlux{
 8 |   // constructors, for compatibility with other fluxes.
 9 |   F f;
10 |   GodunovFlux(double *params=0){}
11 |   GodunovFlux(GodunovFlux<F>& G)= delete;
12 |   GodunovFlux& operator=(GodunovFlux<F>&& G){return *this;}
13 |   inline double operator() (double a, double b)
14 |   {
15 |     return a<=b ?  f.min(a,b):f.max(b,a);
16 |   }
17 |   ~GodunovFlux()
18 |   {
19 |   }
20 | };
21 | 


--------------------------------------------------------------------------------
/Weno/C++-Pointers/LaxFriedrichsFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | #include <utility>
 4 | //
 5 | // The Lax-Friedrichs flux.
 6 | //
 7 | // alpha: the Lax-Friedrichs parameter.
 8 | template<class F> class LaxFriedrichsFlux{
 9 |   double alpha;
10 | public:
11 |   LaxFriedrichsFlux(){}
12 |   LaxFriedrichsFlux(double *params):alpha(params[0]){}
13 |   LaxFriedrichsFlux(LaxFriedrichsFlux<F>& L)=delete;
14 |   LaxFriedrichsFlux& operator=(LaxFriedrichsFlux<F>&& L)
15 |   {
16 |     alpha=L.alpha;
17 |     return *this;
18 |   }
19 |   double operator()  (double a, double b)
20 |   {
21 |      return 0.5*(F::flux(a)+F::flux(b) - alpha*(b-a));
22 |   }
23 | };
24 | 


--------------------------------------------------------------------------------
/Weno/C++-Pointers/README.md:
--------------------------------------------------------------------------------
 1 | ### Choose your problem and your numerical flux
 2 | 
 3 | You must comment/uncomment the following lines in main.cpp
 4 | ```
 5 |   typedef Burghers Problem;
 6 |   //typedef Convection Problem;
 7 | ```
 8 | and
 9 | ```
10 |   typedef GodunovFlux<Problem> NumFlux;
11 |   //typedef LaxFriedrichsFlux<Problem> NumFlux;
12 | ```
13 | 
14 | ### Plotting:
15 | 
16 | In main.cpp, uncomment the line
17 | ```
18 | #define DO_GNUPLOT_FILES
19 | ```
20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and
21 | a file _gpfile_. To plot the solution over time, you can use gnuplot:
22 | ```
23 | >gnuplot
24 | load "gpfile"
25 | ```
26 | But **CAVEAT**: **comment** the line for benchmarking!
27 | 
28 | 
29 | ### Compilation:
30 | ```
31 | mkdir Build
32 | cd Build
33 | cmake ..
34 | make
35 | ```
36 | a file "run" is created.
37 | To use an other compiler (eg. clang++) do:
38 | ```
39 | CXX=clang++ cmake ..
40 | make
41 | ```
42 | 
43 | ### Run the code:
44 | 
45 | from Build/ directory, type:
46 | ```
47 | ./run
48 | ```
49 | 


--------------------------------------------------------------------------------
/Weno/C++-Pointers/RK3TVD.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | //
 4 | // Explicit TVD Runge-Kutta method of order 3.
 5 | //
 6 | // L: the RHS (du/dt =L(u)).
 7 | //
 8 | template<class L> class RK3TVD
 9 | {
10 |   const int size;
11 |   double *u1,*u2; 
12 |   const double c21=3./4.,c22=1./4.,c31=1./3.,c32=2./3.;
13 | 
14 |   L LL;
15 | public:
16 |   //! constructor.
17 |   //\param size size of arrays.
18 |   //\param Length length of the domain.
19 |   RK3TVD(int _size, double Length,double params[]):size(_size)
20 |   {
21 |     u1=new double[size];
22 |     u2=new double[size];
23 |     LL=L(size,Length,params);
24 |   }
25 | 
26 |   //! destructor
27 |   ~RK3TVD()
28 |   {
29 |     delete[] u1; delete[] u2;
30 |   }
31 |   //! make one step.
32 |   //! \param InOut intial value, overwritten..
33 |   //! \param dt time step.
34 |   void step(double * __restrict__ InOut,double dt)
35 |   {
36 |     LL(InOut,u1);
37 |     for(int i=0;i<size;i++)
38 |       u1[i]=InOut[i] + dt*u1[i];
39 | 
40 |     LL(u1,u2);
41 |     for(int i=0;i<size;i++)
42 |       u2[i]= c21*InOut[i]+c22*(u1[i]+dt*u2[i]);
43 | 
44 |     LL(u2,u1); // reuse u1.
45 |     for(int i=0;i<size;i++)
46 |       InOut[i]=c31*InOut[i]+c32*(u2[i]+dt*u1[i]);
47 |   }
48 | };
49 | 


--------------------------------------------------------------------------------
/Weno/C++/Burghers.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <algorithm>
 3 | //#include <string>
 4 | //#include <cmath>
 5 | struct  Burghers
 6 | {
 7 |   //! the flux.
 8 |   static inline double flux(double x) 
 9 |   {
10 |     return 0.5*x*x;
11 |   }
12 |   //! min of flux  in [a,b]. Used by Godunov flux.
13 |   //! we do NOT test a<=b !
14 |   double min(double a, double b)
15 |   {
16 | 
17 |     if(b<=0)
18 |       return  0.5*b*b;
19 |     else if (a>=0)
20 |       return  0.5*a*a;
21 |     else
22 |       return 0.0;
23 |   }
24 |   //! max of flux in [a,b]. Used by Godunov flux.
25 |   //! we do NOT test a<=b !
26 |   inline  double max(double a, double b)
27 |   {
28 |     //return 0.5*fmax(a*a,b*b);
29 |     return 0.5*std::max(a*a,b*b);
30 |   }
31 | };
32 | 


--------------------------------------------------------------------------------
/Weno/C++/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(BenchmarksPythonJuliaAndCo)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | #
 6 | #- To use intel compiler
 7 | # CXX=icpc cmake ..
 8 | # - for clang++:
 9 | # CXX=clang++ cmake ..
10 | # - otherwise, to use g++:
11 | #  cmake ..
12 | #
13 | 
14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
15 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
16 | else ()
17 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall  -g   -std=c++17 -Wall -march=native -mtune=native" )
18 | endif ()
19 | 
20 | add_executable(
21 |   run
22 |   ../main.cpp
23 |   )
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/Weno/C++/Convection.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <algorithm>
 3 | #include <string>
 4 | struct  Convection
 5 | {
 6 |   //! the flux.
 7 |   static double flux(double x) 
 8 |   {
 9 |     return x;
10 |   }
11 |   //! min of flux  in [a,b]. Used by Godunov flux.
12 |   //! we do NOT test a<=b !
13 |   static double min(double a, double b)
14 |   {
15 | 
16 |     if(b<=0)
17 |       return flux(b);
18 |     else if (a>=0)
19 |       return flux(a);
20 |     else
21 |       return 0.0;
22 |   }
23 |   //! max of flux in [a,b]. Used by Godunov flux.
24 |   //! we do NOT test a<=b !
25 |   static double max(double a, double b)
26 |   {
27 |      return std::max(flux(a),flux(b));
28 |   }
29 | };
30 | 


--------------------------------------------------------------------------------
/Weno/C++/GodunovFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <utility>
 3 | #include <iostream>
 4 | //
 5 | // The Godunov flux.
 6 | //
 7 | template<class F> struct GodunovFlux{
 8 |   // constructors, for compatibility with other fluxes.
 9 |   F f;
10 |   GodunovFlux(double *params=0){}
11 |   GodunovFlux(GodunovFlux<F>& G)= delete;
12 |   GodunovFlux& operator=(GodunovFlux<F>&& G){return *this;}
13 |   inline double operator() (double a, double b)
14 |   {
15 |     return a<=b ?  f.min(a,b):f.max(b,a);
16 |   }
17 |   ~GodunovFlux()
18 |   {
19 |   }
20 | };
21 | 


--------------------------------------------------------------------------------
/Weno/C++/LaxFriedrichsFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | #include <utility>
 4 | //
 5 | // The Lax-Friedrichs flux.
 6 | //
 7 | // alpha: the Lax-Friedrichs parameter.
 8 | template<class F> class LaxFriedrichsFlux{
 9 |   double alpha;
10 | public:
11 |   LaxFriedrichsFlux(){}
12 |   LaxFriedrichsFlux(double *params):alpha(params[0]){}
13 |   LaxFriedrichsFlux(LaxFriedrichsFlux<F>& L)=delete;
14 |   LaxFriedrichsFlux& operator=(LaxFriedrichsFlux<F>&& L)
15 |   {
16 |     alpha=L.alpha;
17 |     return *this;
18 |   }
19 |   double operator()  (double a, double b)
20 |   {
21 |      return 0.5*(F::flux(a)+F::flux(b) - alpha*(b-a));
22 |   }
23 | };
24 | 


--------------------------------------------------------------------------------
/Weno/C++/README.md:
--------------------------------------------------------------------------------
 1 | ### Choose your problem and your numerical flux
 2 | 
 3 | You must comment/uncomment the following lines in main.cpp
 4 | ```
 5 |   typedef Burghers Problem;
 6 |   //typedef Convection Problem;
 7 | ```
 8 | and
 9 | ```
10 |   typedef GodunovFlux<Problem> NumFlux;
11 |   //typedef LaxFriedrichsFlux<Problem> NumFlux;
12 | ```
13 | 
14 | ### Plotting:
15 | 
16 | In main.cpp, uncomment the line
17 | ```
18 | #define DO_GNUPLOT_FILES
19 | ```
20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and
21 | a file _gpfile_. To plot the solution over time, you can use gnuplot:
22 | ```
23 | >gnuplot
24 | load "gpfile"
25 | ```
26 | But **CAVEAT**: **comment** the line for benchmarking!
27 | 
28 | 
29 | ### Compilation:
30 | ```
31 | mkdir Build
32 | cd Build
33 | cmake ..
34 | make
35 | ```
36 | a file "run" is created.
37 | To use an other compiler (eg. clang++) do:
38 | ```
39 | CXX=clang++ cmake ..
40 | make
41 | ```
42 | 
43 | ### Run the code:
44 | 
45 | from Build/ directory, type:
46 | ```
47 | ./run
48 | ```
49 | 


--------------------------------------------------------------------------------
/Weno/C++/RK3TVD.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <iostream>
 4 | //
 5 | // Explicit TVD Runge-Kutta method of order 3.
 6 | //
 7 | // L: the RHS (du/dt =L(u)).
 8 | //
 9 | template<class L> class RK3TVD
10 | {
11 |   const int size;
12 |   std::unique_ptr<double[]> u1,u2; 
13 |   const double c21=3./4.,c22=1./4.,c31=1./3.,c32=2./3.;
14 | 
15 |   L LL;
16 | public:
17 |   //! constructor.
18 |   //\param size size of arrays.
19 |   //\param Length length of the domain.
20 |   RK3TVD(int _size, double Length,double params[]):size(_size)
21 |   {
22 |     u1=std::make_unique<double[]>(size);
23 |     u2=std::make_unique<double[]>(size);
24 |     LL=L(size,Length,params);
25 |   }
26 | 
27 |   //! destructor
28 |   ~RK3TVD()
29 |   {
30 |   }
31 |   //! make one step.
32 |   //! \param InOut intial value, overwritten..
33 |   //! \param dt time step.
34 |   void step(std::unique_ptr<double []>& InOut,double dt)
35 |   {
36 |     LL(InOut,u1);
37 |     for(int i=0;i<size;i++)
38 |       u1[i]=InOut[i] + dt*u1[i];
39 | 
40 |     LL(u1,u2);
41 |     for(int i=0;i<size;i++)
42 |       u2[i]= c21*InOut[i]+c22*(u1[i]+dt*u2[i]);
43 | 
44 |     LL(u2,u1); // reuse u1.
45 |     for(int i=0;i<size;i++)
46 |       InOut[i]=c31*InOut[i]+c32*(u2[i]+dt*u1[i]);
47 |   }
48 | };
49 | 


--------------------------------------------------------------------------------
/Weno/C++NoCopy/Burghers.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <algorithm>
 4 | 
 5 | template <typename TReal = double>
 6 | struct  Burghers
 7 | {
 8 |     using Real = TReal;
 9 | 
10 |     //! the flux.
11 |     static inline
12 |     Real flux(Real x) 
13 |     {
14 |         return 0.5 * x * x;
15 |     }
16 | 
17 |     //! min of flux  in [a,b]. Used by Godunov flux.
18 |     //! we do NOT test a<=b !
19 |     static inline
20 |     Real min(Real a, Real b)
21 |     {
22 |         if (b <= 0)
23 |             return flux(b);
24 |         else if (a >= 0)
25 |             return flux(a);
26 |         else
27 |             return Real(0);
28 |     }
29 | 
30 |     //! max of flux in [a,b]. Used by Godunov flux.
31 |     //! we do NOT test a<=b !
32 |     static inline
33 |     Real max(Real a, Real b)
34 |     {
35 |         return std::max(flux(a), flux(b));
36 |     }
37 | };
38 | 


--------------------------------------------------------------------------------
/Weno/C++NoCopy/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(BenchmarksPythonJuliaAndCo)
 3 | enable_language(CXX)
 4 | # Go to Build directory. Then:
 5 | #
 6 | #- To use intel compiler
 7 | # CXX=icpc cmake ..
 8 | # - for clang++:
 9 | # CXX=clang++ cmake ..
10 | # - otherwise, to use g++:
11 | #  cmake ..
12 | #
13 | 
14 | if (${CMAKE_CXX_COMPILER} MATCHES "icpc.*$")
15 |   set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DICC -DALIGN_64 -restrict -O3  -g -xavx -ipo -fargument-noalias  -ansi-alias -Wall -vec-report3 -std=c++0x")
16 | 
17 | elseif(${CMAKE_CXX_COMPILER} MATCHES "clang.*$")
18 |   set(CMAKE_CXX_COMPILER "clang++")
19 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -Wall -std=c++14 -march=native")
20 | else ()
21 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -Wall -std=c++14 -march=native")
22 | 
23 | endif ()
24 | 
25 | add_executable(
26 |   run
27 |   ../main.cpp
28 |   )
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/Weno/C++NoCopy/Convection.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <algorithm>
 4 | 
 5 | template <typename TReal = double>
 6 | struct Convection
 7 | {
 8 |     using Real = TReal;
 9 | 
10 |     //! the flux.
11 |     static inline
12 |     Real flux(Real x) 
13 |     {
14 |         return x;
15 |     }
16 | 
17 |     //! min of flux  in [a,b]. Used by Godunov flux.
18 |     //! we do NOT test a<=b !
19 |     static inline
20 |     Real min(Real a, Real b)
21 |     {
22 |         if (b <= 0)
23 |             return flux(b);
24 |         else if (a >= 0)
25 |             return flux(a);
26 |         else
27 |             return Real(0);
28 |     }
29 | 
30 |     //! max of flux in [a,b]. Used by Godunov flux.
31 |     //! we do NOT test a<=b !
32 |     static inline
33 |     Real max(Real a, Real b)
34 |     {
35 |         return std::max(flux(a), flux(b));
36 |     }
37 | };
38 | 


--------------------------------------------------------------------------------
/Weno/C++NoCopy/GodunovFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <utility>
 4 | #include <type_traits>
 5 | 
 6 | /**
 7 |  * The Godunov flux.
 8 |  *
 9 |  * @tparam  TProblem    Type of the problem
10 |  */
11 | template <typename TProblem>
12 | class GodunovFlux
13 | {
14 | public:
15 |     using Problem = typename std::decay<TProblem>::type;
16 |     using Real = typename Problem::Real;
17 | 
18 | private:
19 |     TProblem f;
20 | 
21 | public:
22 |     
23 |   /**
24 |    * @param f   The problem.
25 |    */
26 |   GodunovFlux(TProblem f)
27 |       : f(std::forward<TProblem>(f))
28 |   {}
29 | 
30 |   inline Real operator() (Real a, Real b) const
31 |   {
32 |     return a <= b ?  f.min(a, b) : f.max(b, a);
33 |   }
34 | };
35 | 
36 | template <typename TProblem>
37 | GodunovFlux<TProblem> makeGodunovFlux(TProblem && problem)
38 | {
39 |     return {std::forward<TProblem>(problem)};
40 | }
41 | 


--------------------------------------------------------------------------------
/Weno/C++NoCopy/LaxFriedrichsFlux.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <utility>
 4 | #include <type_traits>
 5 | 
 6 | /**
 7 |  * The Lax-Friedrichs flux.
 8 |  *
 9 |  * @tparam  TProblem    Type of the problem
10 |  */
11 | template <typename TProblem>
12 | class LaxFriedrichsFlux
13 | {
14 | public:
15 |     using Problem = typename std::decay<TProblem>::type;
16 |     using Real = typename Problem::Real;
17 | 
18 | private:
19 |     TProblem f;
20 |     Real alpha;
21 | 
22 | public:
23 |     /**
24 |      * @param f     The problem
25 |      * @param alpha The Lax-Friedrichs parameter.
26 |      */
27 |     LaxFriedrichsFlux(TProblem f, Real alpha = 1.)
28 |         : f(std::forward<TProblem>(f)), alpha(alpha)
29 |     {}
30 | 
31 |     double operator() (double a, double b) const
32 |     {
33 |         return 0.5 * (f.flux(a) + f.flux(b) - alpha * (b - a));
34 |     }
35 | };
36 | 
37 | template <typename TProblem, typename TReal>
38 | auto makeLaxFriedrichsFlux(TProblem && problem, TReal const& alpha)
39 | {
40 |     return LaxFriedrichsFlux<TProblem>(std::forward<TProblem>(problem), alpha);
41 | }
42 | 


--------------------------------------------------------------------------------
/Weno/C++NoCopy/README.md:
--------------------------------------------------------------------------------
 1 | ### Choose your problem and your numerical flux
 2 | 
 3 | You must comment/uncomment the following lines in main.cpp
 4 | ```
 5 | auto const problem = Burghers<Real>{};
 6 | //auto const problem = Convection<Real>{};
 7 | ```
 8 | and
 9 | ```
10 | auto const num_flux = makeGodunovFlux(problem);
11 | //auto const num_flux = makeLaxFriedrichsFlux(problem, 1.);
12 | ```
13 | 
14 | ### Plotting:
15 | 
16 | In main.cpp, uncomment the line
17 | ```
18 | #define DO_GNUPLOT_FILES
19 | ```
20 | then, at run time, the code will produce a file _resultXXX_ every 100 steps, and
21 | a file _gpfile_. To plot the solution over time, you can use gnuplot:
22 | ```
23 | >gnuplot
24 | load "gpfile"
25 | ```
26 | But **CAVEAT**: **comment** the line for benchmarking!
27 | 
28 | 
29 | ### Compilation:
30 | ```
31 | mkdir Build
32 | cd Build
33 | cmake ..
34 | make
35 | ```
36 | a file "run" is created.
37 | To use an other compiler (eg. clang++) do:
38 | ```
39 | CXX=clang++ cmake ..
40 | make
41 | ```
42 | 
43 | ### Run the code:
44 | 
45 | from Build/ directory, type:
46 | ```
47 | ./run
48 | ```
49 | 


--------------------------------------------------------------------------------
/Weno/C++NoCopy/RK3TVD.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <algorithm>
 4 | #include <iterator>
 5 | #include <type_traits>
 6 | #include <vector>
 7 | 
 8 | /**
 9 |  * Explicit TVD Runge-Kutta method of order 3.
10 |  */
11 | // L: the RHS (du/dt =L(u)).
12 | //
13 | template <typename TRHS>
14 | class RK3TVD
15 | {
16 | public:
17 |     using RHS = typename std::decay<TRHS>::type;
18 |     using Real = typename RHS::Real;
19 | 
20 | private:
21 |     TRHS rhs;
22 |     mutable std::vector<Real> u1, u2;
23 | 
24 | public:
25 |     RK3TVD(TRHS rhs)
26 |         : rhs(std::forward<TRHS>(rhs))
27 |     {
28 |         //std::cout << rhs.length << std::endl;
29 |         //std::cout << this->rhs.length << std::endl;
30 |     }
31 | 
32 |     /** Make one step.
33 |      * @param[in,out]   data    initial value, overwritten.
34 |      * @param           dt      time step.
35 |      */
36 |     template <typename TData>
37 |     void step(TData & data, Real dt) const
38 |     {
39 |         std::size_t const size = data.size();
40 |         u1.resize(size);
41 |         u2.resize(size);
42 | 
43 |         // First step
44 |         rhs(data, u1);
45 |         for (std::size_t i = 0; i < size; ++i)
46 |             u1[i] = data[i] + dt * u1[i];
47 | 
48 |         // Second step
49 |         rhs(u1, u2);
50 |         for (std::size_t i = 0; i < size; ++i)
51 |             u2[i] = Real(3)/Real(4) * data[i] + Real(1)/Real(4) * (u1[i] + dt*u2[i]);
52 | 
53 |         // Third step
54 |         rhs(u2, u1); // reuse u1.
55 |         for (std::size_t i = 0; i < size; ++i)
56 |             data[i] = Real(1)/Real(3) * data[i] + Real(2)/Real(3) * (u2[i] + dt*u1[i]);
57 |     }
58 | };
59 | 
60 | template <typename TRHS>
61 | auto makeRK3TVD(TRHS && rhs)
62 | {
63 |     return RK3TVD<TRHS>(std::forward<TRHS>(rhs));
64 | }
65 | 


--------------------------------------------------------------------------------
/Weno/Fortran/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | project(BenchmarksPythonJuliaAndCo)
 3 | enable_language(Fortran)
 4 | # 
 5 | #set(CMAKE_fortran_COMPILER "gfortran-8")
 6 | 
 7 |   set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Wall  -g  -O3 -Wall -march=native")
 8 | 
 9 | 
10 | add_executable(
11 |   run
12 |   ../main.F90
13 |   ../m_burghers.F90  ../m_godunov.F90  ../m_RK3TVDData.F90  ../m_weno.F90
14 |   )
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Weno/Fortran/m_RK3TVDData.F90:
--------------------------------------------------------------------------------
 1 | module m_RK3TVDData
 2 | 
 3 |   use m_burghers
 4 |   use m_godunov
 5 |   use m_weno
 6 |   
 7 |   implicit none
 8 | 
 9 |   private
10 |   
11 |   public :: initRK3TVDData
12 |   public :: Rk3tvd
13 |   
14 |   type, public :: RK3TVDData
15 |      
16 |      integer :: size
17 |      
18 |      real(kind = 8) :: c21, c22, c31, c32
19 |      real(kind = 8), dimension(:), allocatable :: u1, u2
20 |      
21 |   end type RK3TVDData
22 |   
23 |   
24 |   contains
25 | 
26 |     subroutine initRK3TVDData(rdata, len)
27 | 
28 |       type(RK3TVDData), intent(inout) :: rdata
29 |       integer :: len
30 | 
31 |       rdata%size = len
32 |       rdata%c21 = 3/4.
33 |       rdata%c22 = 1/4.
34 |       rdata%c31 = 1/3.
35 |       rdata%c32 = 2/3.
36 | 
37 |       allocate(rdata%u1(len))
38 |       allocate(rdata%u2(len))
39 |       rdata%u1 = 0.
40 |       rdata%u2 = 0.
41 |       
42 |     end subroutine initRK3TVDData
43 | 
44 | 
45 |     subroutine Rk3tvd(R, W, L, dt, In, Out)
46 | 
47 |       type(RK3TVDData), intent(inout) :: R
48 |       type(WenoData), intent(inout) :: W
49 |       real(kind = 8), intent(in) :: dt, L
50 |       real(kind = 8), dimension(:), intent(in) :: In
51 |       real(kind = 8), dimension(:), intent(inout) :: Out
52 | 
53 |       integer :: size
54 | 
55 |       size = R%size
56 |       call weno(W, L, In, R%u1)
57 |       R%u1 = In + dt * R%u1
58 | 
59 |       call weno(W, L, R%u1, R%u2)
60 |       R%u2 = R%c21 * In + R%c22 * (R%u1 + dt * R%u2)
61 | 
62 |       call weno(W, L, R%u2, R%u1)
63 |       Out = R%c31 * In + R%c32 * (R%u2 + dt * R%u1)
64 | 
65 |     end subroutine Rk3tvd
66 |     
67 | end module m_RK3TVDData
68 | 


--------------------------------------------------------------------------------
/Weno/Fortran/m_burghers.F90:
--------------------------------------------------------------------------------
 1 | module m_burghers
 2 | 
 3 |   implicit none
 4 | 
 5 | contains
 6 |   
 7 |   function minf(a, b) result(m)
 8 | 
 9 |     real(kind = 8) :: a, b, m
10 | 
11 |     if(b <= 0.) then
12 |        m = 0.5 * b**2
13 |     else if(a >= 0.) then
14 |        m = 0.5 * a**2
15 |     else
16 |        m = 0.
17 |     end if
18 |     
19 |   end function minf
20 | 
21 | 
22 |   function maxf(a, b) result(M)
23 |     
24 |     real(kind = 8) :: a, b, M
25 |     M = 0.5 * max(a**2, b**2)
26 |     
27 |   end function maxf
28 |   
29 | end module m_burghers
30 | 


--------------------------------------------------------------------------------
/Weno/Fortran/m_godunov.F90:
--------------------------------------------------------------------------------
 1 | module m_godunov
 2 | 
 3 |   use m_burghers
 4 |   
 5 |   implicit none
 6 | 
 7 | contains
 8 |   
 9 |   function numFlux(a, b) result(flux)
10 |     
11 |     real(kind = 8) :: a, b
12 |     real(kind = 8) :: flux
13 |     
14 |     if(a <= b) then
15 |        flux = minf(a, b)
16 |     else
17 |        flux = maxf(b, a)
18 |     end if
19 |     
20 |   end function numFlux
21 |   
22 | end module m_godunov
23 | 


--------------------------------------------------------------------------------
/Weno/Fortran/main.F90:
--------------------------------------------------------------------------------
 1 | program main
 2 | 
 3 |   use m_weno
 4 |   use m_RK3TVDData
 5 |   
 6 |   implicit none
 7 | 
 8 |   integer, parameter :: size = 1000
 9 |   real(kind = 8), parameter :: L = 1.
10 |   real(kind = 8), parameter :: T = 1.
11 |   real(kind = 8), parameter :: dt = 0.8 / size
12 |   
13 |   real(kind = 8) :: tt, start, finish
14 | 
15 |   real(kind = 8), dimension(size) :: In, Out, Temp
16 | 
17 |   character(len=64) :: hostname
18 | 
19 |   type(WenoData) :: W
20 |   type(RK3TVDData) :: R
21 |   
22 |   print *, "size=",size, "dt=",dt, "nsteps=",floor(T/dt)
23 | 
24 |   call init(In, L, size)
25 |   call initWenoData(W, size)
26 |   call initRK3TVDData(R, size)
27 | 
28 |   open(unit=12, file="gp0", action="write", status="replace")
29 |   write(12, "(1f16.12)") In
30 |   close(12)
31 |   
32 |   print *, "Start computation"
33 | 
34 |   tt = 0.
35 |   call cpu_time(start)
36 |   do while(tt < T)
37 |      call Rk3tvd(R, W, L, dt, In, Out)
38 |      Temp = In
39 |      In = Out
40 |      Out = Temp
41 | 
42 |      tt = tt + dt
43 |   end do
44 |   call cpu_time(finish)
45 | 
46 |   print *, "Time : ", finish - start
47 | 
48 |   open(unit=12, file="gp", action="write", status="replace")
49 |   write(12, "(1f16.12)") In
50 |   close(12)
51 | 
52 |   call hostnm(hostname)
53 |   open(unit=12, file="../RunningOn" // trim(hostname), action="write", status="replace")
54 |   write(12, *) "Burghers Godunov"
55 |   write(12, *) finish - start
56 |   close(12)
57 |   
58 | end program main
59 | 
60 | 
61 | subroutine init(X, L, len)
62 | 
63 |   implicit none
64 | 
65 |   integer, intent(in) :: len
66 |   real(kind = 8), intent(in) :: L
67 |   real(kind = 8), dimension(len), intent(inout) :: X
68 | 
69 |   real(kind = 8) :: h
70 |   integer :: i
71 | 
72 |   h = L / len
73 | 
74 |   do i = 0, len-1
75 |      if(i > floor(len / 8.) .and. i < floor(len / 2.) + floor(len / 8.)) then
76 |         X(i+1) = 1. - 2. * (i - floor(len / 8.)) * h / L
77 |      else
78 |         X(i+1) = 0.
79 |      end if
80 |   end do
81 |      
82 | end subroutine init
83 | 


--------------------------------------------------------------------------------
/Weno/Ju/Burghers.jl:
--------------------------------------------------------------------------------
 1 | module Burghers
 2 | export flux,minf,maxf
 3 | function flux(x::Float64)
 4 |     0.5*x^2
 5 | end
 6 | @inline function minf(a::Float64,b::Float64)
 7 |     if b<=0.0
 8 |         return 0.5*b^2
 9 |         #return flux(b)
10 |     elseif a>=0.0
11 |         return 0.5*a^2
12 |         #return flux(a)
13 |     else
14 |         return 0.0
15 |     end
16 | end
17 | @inline function maxf(a::Float64,b::Float64)
18 |     #max(flux(a),flux(b))
19 |     0.5*max(a^2,b^2)
20 | end
21 | end
22 | 


--------------------------------------------------------------------------------
/Weno/Ju/Convection.jl:
--------------------------------------------------------------------------------
 1 | module Convection
 2 | export flux,minf,maxf
 3 | function flux(x::Float64)
 4 |     x
 5 | end
 6 | function minf(a::Float64,b::Float64)
 7 |     if b<=0.0
 8 |         return flux(b)
 9 |     elseif a>=0.0
10 |         return flux(a)
11 |     else
12 |         return 0.0
13 |     end
14 | end
15 | function maxf(a::Float64,b::Float64)
16 |     max(flux(a),flux(b))
17 | end
18 | end
19 | 


--------------------------------------------------------------------------------
/Weno/Ju/Godunov.jl:
--------------------------------------------------------------------------------
1 | module Godunov
2 | export NumFlux
3 | # Godunov flux:
4 | function NumFlux(F,a::Float64,b::Float64)
5 |     a<=b ? F.minf(a,b):  F.maxf(b,a)
6 |     #ifelse(a<=b,F.minf(a,b),F.maxf(b,a))# same computing time
7 | end
8 | end
9 | 


--------------------------------------------------------------------------------
/Weno/Ju/LaxFriedrichs.jl:
--------------------------------------------------------------------------------
1 | module LaxFriedrichs
2 | export NumFlux
3 | # Lax Friedrichs flux:
4 | function NumFlux(F,a::Float64,b::Float64,alpha::Float64)
5 |     0.5*(F.flux(a)+F.flux(b) - alpha*(b-a))
6 | end
7 | end
8 | 


--------------------------------------------------------------------------------
/Weno/Ju/README.md:
--------------------------------------------------------------------------------
 1 | To run the code, just type:
 2 | ```
 3 | ./script
 4 | ```
 5 | If you type:
 6 | ```
 7 | ./script-m
 8 | ```
 9 | You will be able to see how Julia manages memory, but you will
10 | slowdown the code!
11 | 


--------------------------------------------------------------------------------
/Weno/Ju/RK3TVD.jl:
--------------------------------------------------------------------------------
 1 | module RK3TVD
 2 | export Rk3tvd!,RK3TVDData
 3 | # RK explicit method, TVD.
 4 | struct RK3TVDData
 5 |     size::Int64
 6 |     c21::Float64
 7 |     c22::Float64
 8 |     c31::Float64
 9 |     c32::Float64
10 |     u1::Array{Float64,1}
11 |     u2::Array{Float64,1}
12 | RK3TVDData(Size)=new(Size,3.0/4.0,1.0/4,1.0/3.0,2.0/3.0,zeros(Size),zeros(Size))
13 | end
14 | function Rk3tvd!(R::RK3TVDData,W,dt,In::Array{Float64},Out::Array{Float64})
15 |     size=R.size
16 |     
17 |     W(In,R.u1)
18 |     #R.u1=In + dt*R.u1
19 |     @simd for i=1:size
20 |         R.u1[i]=In[i] + dt*R.u1[i]
21 |     end
22 |     
23 |     W(R.u1,R.u2)
24 |     #R.u2= R.c21*In+R.c22*(R.u1+dt*R.u2)
25 |     @simd for i=1:size
26 |         R.u2[i]= R.c21*In[i]+R.c22*(R.u1[i]+dt*R.u2[i])
27 |     end
28 |     
29 |     W(R.u2,R.u1)
30 |     @simd for i=1:size
31 |         Out[i]=R.c31*In[i]+R.c32*(R.u2[i]+dt*R.u1[i])
32 |     end
33 |     #Out= @. R.c31*In+R.c32*(R.u2+dt*R.u1)
34 |     nothing
35 | end
36 | end
37 | 


--------------------------------------------------------------------------------
/Weno/Ju/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 main.jl
3 | 


--------------------------------------------------------------------------------
/Weno/Ju/script-m:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | julia --check-bounds=no -O3 --track-allocation=user main.jl
3 | 


--------------------------------------------------------------------------------
/Weno/Numba/Burghers.py:
--------------------------------------------------------------------------------
 1 | from numba import jit
 2 | @jit(nopython=True)
 3 | def flux(x):
 4 |     return 0.5*x*x
 5 | 
 6 | @jit(nopython=True)
 7 | def minf(a,b):
 8 |     if b<=0:
 9 |       return  0.5*b*b
10 |     elif a>=0:
11 |       return 0.5*a*a
12 |     else:
13 |       return 0.0
14 |   
15 | @jit(nopython=True)
16 | def maxf(a,b):
17 |     return 0.5*max(a*a,b*b)
18 | 


--------------------------------------------------------------------------------
/Weno/Numba/Convection.py:
--------------------------------------------------------------------------------
 1 | from numba import jit
 2 | @jit(nopython=True)
 3 | def flux(x):
 4 |     return x
 5 | @jit(nopython=True)
 6 | def minf(a,b):
 7 |     if b<=0:
 8 |       return b
 9 |     elif a>=0:
10 |       return a
11 |     else:
12 |       return 0.0
13 | @jit(nopython=True)  
14 | def maxf(a,b):
15 |     return max(a,b)
16 | 


--------------------------------------------------------------------------------
/Weno/Numba/GodunovFlux.py:
--------------------------------------------------------------------------------
1 | from numba import jit
2 | @jit(nopython=True)
3 | def NumFlux(Fmin,Fmax,a,b):
4 |     if a<=b:
5 |       return Fmin(a,b)
6 |     else:
7 |       return Fmax(b,a);
8 | 


--------------------------------------------------------------------------------
/Weno/Numba/LaxFriedrichs.py:
--------------------------------------------------------------------------------
1 | from numba import jit
2 | @jit(nopython=True)
3 | def NumFlux(F,a,b,alpha):
4 |     return 0.5*(F(a)+F(b) - alpha*(b-a))
5 | 


--------------------------------------------------------------------------------
/Weno/Numba/Numfluxes.py:
--------------------------------------------------------------------------------
 1 | from numba import jit
 2 | import GodunovFlux as Godunov
 3 | import LaxFriedrichs as LF
 4 | import Burghers as Burg
 5 | import Convection
 6 | 
 7 | @jit(nopython=True)
 8 | def GodunovBurghers(X,Y):
 9 |     return Godunov.NumFlux(Burg.minf,Burg.maxf,X,Y)
10 | @jit(nopython=True)
11 | def GodunovConvection(X,Y):
12 |     return Godunov.NumFlux(Convection.minf,Convection.maxf,X,Y)
13 | @jit(nopython=True)
14 | def LaxFriedrichsBurghers(X,Y):
15 |     return LF.NumFlux(Burg.flux,X,Y,1.0)
16 | @jit(nopython=True)
17 | def LaxFriedrichsConvection(X,Y):
18 |     return LF.NumFlux(Convection.flux,X,Y,1.0)
19 | 


--------------------------------------------------------------------------------
/Weno/Numba/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | python3 main.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/Weno/Numba/RK3TVD.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Weno import Weno
 3 | from numba import jitclass
 4 | from numba import int32, float64,deferred_type
 5 | 
 6 | #------------------------------------------------------
 7 | Weno_type = deferred_type()
 8 | Weno_type.define(Weno.class_type.instance_type)
 9 | spec = [
10 |     ('c21', float64), ('c22', float64),
11 |     ('c31', float64), ('c32', float64),
12 |     ('size',int32),
13 |     ('u1', float64[:]),
14 |     ('u2', float64[:]),
15 |     ("W", Weno_type)
16 | ]
17 | #-------------------------------------------------------
18 | @jitclass(spec)
19 | class RK3TVD:
20 |     def __init__(self,size,L):
21 |         self.c21=3./4.
22 |         self.c22=1./4.
23 |         self.c31=1./3.
24 |         self.c32=2./3.
25 |         self.size=size
26 |         self.u1=np.empty(self.size)
27 |         self.u2=np.empty(self.size)
28 |         self.W=Weno(size,L)
29 | 
30 |     def op(self,Meth,InOut,dt):
31 | 
32 |         self.W.weno(Meth,InOut,self.u1)
33 |         self.u1=InOut + dt*self.u1
34 |         
35 |         self.W.weno(Meth,self.u1,self.u2)
36 |         self.u2= self.c21*InOut+self.c22*(self.u1+dt*self.u2)
37 | 
38 |         self.W.weno(Meth,self.u2,self.u1)
39 |         return self.c31*InOut+self.c32*(self.u2+dt*self.u1)
40 | 


--------------------------------------------------------------------------------
/Weno/Numba/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Weno import *
 3 | import Numfluxes as NF
 4 | from  RK3TVD import *
 5 | import numba as nb
 6 | import time
 7 | import socket
 8 | 
 9 | size=1000
10 | L=1.0
11 | dt=0.8/size
12 | T=1.
13 | def init(X):
14 |      h=L/size
15 |      for i in range(0,size):
16 |          if i>size//8 and i<size//2+size//8:
17 |              X[i]=1.-2*(i-size//8)*h/L;
18 |          else:
19 |              X[i]=0.0
20 | 
21 | #
22 | In = np.empty(size)
23 | Out = np.empty(size)
24 | init(In)
25 | 
26 | np.savetxt("gp0",In)
27 | 
28 | print("size= ",size," dt= ",dt," nteps=", T/dt)
29 | 
30 | R=RK3TVD(size,L)
31 | 
32 | #NumF=NF.GodunovConvection
33 | NumF=NF.GodunovBurghers
34 | #NumF=NF.LaxFriedrichsConvection
35 | #NumF=NF.LaxFriedrichsBurghers
36 | 
37 | if NumF==NF.GodunovBurghers:
38 |      pE="Burghers"
39 |      pF="Godunov"
40 | elif NumF==NF.GodunovConvection:
41 |      pE="Convection"
42 |      pF="Godunov"
43 | elif NumF==NF.LaxFriedrichsConvection:
44 |      pE="Convection"
45 |      pF="Lax-Friedrichs"
46 | else:
47 |      pE="Convection"
48 |      pF="Burghers"
49 | t=0
50 | 
51 | print(pE," with ",pF)
52 | t1 = time.time()
53 | while t<T:
54 |      Out=R.op(NumF,In,dt)
55 |      In,Out=Out,In
56 |      t+=dt
57 | 
58 | t=(time.time()-t1)
59 | print("computing time: ",t)
60 | fi=open("gp","w")
61 | np.savetxt("gp",In)
62 | fi.close()
63 | print("A file 'gp' with the final solution was created.")
64 | 
65 | f=open("RunningOn"+socket.gethostname(),"w")
66 | f.write(pE+" "+pF+"\n")
67 | f.write(str(t)+"\n")
68 | f.close()
69 | 


--------------------------------------------------------------------------------
/Weno/Numba/script:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export NUMBA_ENABLE_AVX=1
4 | python3 main.py
5 | 


--------------------------------------------------------------------------------
/Weno/Py/Burghers.py:
--------------------------------------------------------------------------------
 1 | def flux(x):
 2 |     return 0.5*x*x
 3 | def minf(a,b):
 4 |     if b<=0:
 5 |       return flux(b)
 6 |     elif a>=0:
 7 |       return flux(a)
 8 |     else:
 9 |       return 0.0
10 | def maxf(a,b):
11 |     return max(flux(a),flux(b))
12 | 


--------------------------------------------------------------------------------
/Weno/Py/Convection.py:
--------------------------------------------------------------------------------
 1 | def flux(x):
 2 |     return x
 3 | def minf(a,b):
 4 |     if b<=0:
 5 |       return flux(b)
 6 |     elif a>=0:
 7 |       return flux(a)
 8 |     else:
 9 |       return 0.0
10 | def maxf(a,b):
11 |     return max(flux(a),flux(b))
12 | 


--------------------------------------------------------------------------------
/Weno/Py/GodunovFlux.py:
--------------------------------------------------------------------------------
1 | def NumFlux(F,a,b):
2 |     if a<=b:
3 |       return F.minf(a,b)
4 |     else:
5 |       return F.maxf(b,a);
6 | 


--------------------------------------------------------------------------------
/Weno/Py/LaxFriedrichs.py:
--------------------------------------------------------------------------------
1 | def NumFlux(F,a,b,alpha=1.):
2 |     return 0.5*(F.flux(a)+F.flux(b) - alpha*(b-a))
3 | 


--------------------------------------------------------------------------------
/Weno/Py/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | python3 main.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/Weno/Py/RK3TVD.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | class RK3TVD:
 3 |     def __init__(self,size):
 4 |         self.c21=3./4.
 5 |         self.c22=1./4.
 6 |         self.c31=1./3.
 7 |         self.c32=2./3.
 8 |         self.size=size
 9 |         self.u1=np.empty(self.size)
10 |         self.u2=np.empty(self.size)
11 |     def op(self,Meth,InOut,dt):
12 |         Meth(InOut,self.u1)
13 |         self.u1=InOut + dt*self.u1
14 | 
15 |         Meth(self.u1,self.u2)
16 |         self.u2= self.c21*InOut+self.c22*(self.u1+dt*self.u2)
17 | 
18 |         Meth(self.u2,self.u1)
19 |         return self.c31*InOut+self.c32*(self.u2+dt*self.u1)
20 | 
21 | 


--------------------------------------------------------------------------------
/Weno/Py/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Weno import *
 3 | import cProfile
 4 | import time
 5 | import socket
 6 | import GodunovFlux as Godunov
 7 | import LaxFriedrichs 
 8 | import Burghers as Burg
 9 | import Convection
10 | from  RK3TVD import *
11 | import time
12 | 
13 | size=1000
14 | L=1.0
15 | dt=0.8/size
16 | T=1.
17 | def init(X):
18 |      h=L/size
19 |      for i in range(0,size):
20 |          if i>size//8 and i<size//2+size//8:
21 |              X[i]=1.-2*(i-size//8)*h/L;
22 |          else:
23 |              X[i]=0.0
24 | 
25 | #
26 | In = np.empty(size)
27 | Out = np.empty(size)
28 | init(In)
29 | 
30 | np.savetxt("gp0",In)
31 | 
32 | print("size= ",size," dt= ",dt," nteps=", T/dt)
33 | 
34 | 
35 | W=Weno(size)
36 | 
37 | # Choose on equation:
38 | Flux=Burg
39 | #Flux=Convection
40 | 
41 | # Choose one numerical flux:
42 | NumFlux=Godunov
43 | #NumFlux=LaxFriedrichs 
44 | 
45 | if NumFlux==Godunov:
46 |      pF="Godunov"
47 | else:
48 |      pF="Lax-Friedrichs"
49 | if Flux==Burg:
50 |      pE="Burghers"
51 | else:
52 |      pE="Convection"
53 |      
54 | Meth=lambda x,y: W.weno(NumFlux,Flux,L,x,y)
55 | 
56 | 
57 | R=RK3TVD(size)
58 | 
59 | print(pE," with ",pF)
60 | 
61 | t=0
62 | 
63 | t1 = time.time()
64 | while t<T:
65 |      Out=R.op(Meth,In,dt)
66 |      In,Out=Out,In
67 |      t+=dt
68 | 
69 | t=(time.time()-t1)
70 | print("computing time: ",t)
71 | fi=open("gp","w")
72 | np.savetxt("gp",In)
73 | fi.close()
74 | print("A file 'gp' with the final solution was created.")
75 | 
76 | f=open("RunningOn"+socket.gethostname(),"w")
77 | f.write(pE+" "+pF+"\n")
78 | f.write(str(t)+"\n")
79 | f.close()
80 | 


--------------------------------------------------------------------------------
/Weno/Py/profile:
--------------------------------------------------------------------------------
 1 |   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
 2 |      1999    0.003    0.000   11.202    0.006 <ipython-input-1-58b4317959d9>:34(<lambda>)
 3 |         1    0.001    0.001   11.203   11.203 <string>:1(<module>)
 4 |    463768    0.072    0.000    0.072    0.000 Burghers.py:1(flux)
 5 |     63968    0.028    0.000    0.059    0.000 Burghers.py:10(maxf)
 6 |    335832    0.132    0.000    0.185    0.000 Burghers.py:3(minf)
 7 |    399800    0.103    0.000    0.347    0.000 GodunovFlux.py:1(NumFlux)
 8 |      1999    7.921    0.004   11.199    0.006 Weno.py:25(weno)
 9 |    399800    0.087    0.000    0.435    0.000 Weno.py:26(<lambda>)
10 |    799600    0.125    0.000    1.095    0.000 _methods.py:31(_sum)
11 |         1    0.000    0.000   11.203   11.203 {built-in method builtins.exec}
12 |     63968    0.012    0.000    0.012    0.000 {built-in method builtins.max}
13 |   4797600    1.129    0.000    1.129    0.000 {built-in method builtins.pow}
14 |         1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
15 |    799600    0.411    0.000    0.411    0.000 {method 'dot' of 'numpy.ndarray' objects}
16 |    799600    0.970    0.000    0.970    0.000 {method 'reduce' of 'numpy.ufunc' objects}
17 |    799600    0.208    0.000    1.303    0.000 {method 'sum' of 'numpy.ndarray' objects}
18 | 


--------------------------------------------------------------------------------
/Weno/PyVec/Burghers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def flux(x):
 4 |     return 0.5 * np.square(x)
 5 | 
 6 | def minf(a,b):
 7 | #    if b<=0:
 8 | #      return flux(b)
 9 | #    elif a>=0:
10 | #      return flux(a)
11 | #    else:
12 | #      return 0.0
13 |     return (b <= 0) * flux(b) + (a >= 0) * flux(a)
14 | 
15 | def maxf(a,b):
16 |     return np.maximum(flux(a),flux(b))
17 | 


--------------------------------------------------------------------------------
/Weno/PyVec/Convection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def flux(x):
 4 |     return x
 5 | 
 6 | def minf(a,b):
 7 | #    if b<=0:
 8 | #      return flux(b)
 9 | #    elif a>=0:
10 | #      return flux(a)
11 | #    else:
12 | #      return 0.0
13 |     return (b <= 0) * flux(b) + (a >= 0) * flux(a)
14 | 
15 | def maxf(a,b):
16 |     return np.maximum(flux(a), flux(b))
17 | 


--------------------------------------------------------------------------------
/Weno/PyVec/GodunovFlux.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def NumFlux(F,a,b):
 4 |     #if a<=b:
 5 |     #  return F.minf(a,b)
 6 |     #else:
 7 |     #  return F.maxf(b,a);
 8 |     return np.where(a <= b, F.minf(a, b), F.maxf(b, a))
 9 | 
10 | 


--------------------------------------------------------------------------------
/Weno/PyVec/LaxFriedrichs.py:
--------------------------------------------------------------------------------
1 | def NumFlux(F, a, b, alpha=1.):
2 |     return 0.5 * (F.flux(a) + F.flux(b) - alpha*(b-a))
3 | 


--------------------------------------------------------------------------------
/Weno/PyVec/README.md:
--------------------------------------------------------------------------------
1 | To run the code, just type:
2 | ```
3 | python3 main.py
4 | ```
5 | 


--------------------------------------------------------------------------------
/Weno/PyVec/RK3TVD.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | class RK3TVD:
 3 |     def __init__(self,size):
 4 |         self.c21=3./4.
 5 |         self.c22=1./4.
 6 |         self.c31=1./3.
 7 |         self.c32=2./3.
 8 |         self.size=size
 9 |         self.u1=np.empty(self.size)
10 |         self.u2=np.empty(self.size)
11 |     def op(self,Meth,InOut,dt):
12 |         Meth(InOut,self.u1)
13 |         self.u1=InOut + dt*self.u1
14 | 
15 |         Meth(self.u1,self.u2)
16 |         self.u2= self.c21*InOut+self.c22*(self.u1+dt*self.u2)
17 | 
18 |         Meth(self.u2,self.u1)
19 |         return self.c31*InOut+self.c32*(self.u2+dt*self.u1)
20 | 
21 | 


--------------------------------------------------------------------------------
/Weno/PyVec/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Weno import *
 3 | import cProfile
 4 | import time
 5 | import socket
 6 | import GodunovFlux as Godunov
 7 | import LaxFriedrichs 
 8 | import Burghers as Burg
 9 | import Convection
10 | from  RK3TVD import *
11 | import time
12 | 
13 | size=1000
14 | L=1.0
15 | dt=0.8/size
16 | T=1.
17 | def init(X):
18 |      h=L/size
19 |      for i in range(0,size):
20 |          if i>size//8 and i<size//2+size//8:
21 |              X[i]=1.-2*(i-size//8)*h/L;
22 |          else:
23 |              X[i]=0.0
24 | 
25 | #
26 | In = np.empty(size)
27 | Out = np.empty(size)
28 | init(In)
29 | 
30 | np.savetxt("gp0",In)
31 | 
32 | print("size= ",size," dt= ",dt," nteps=", T/dt)
33 | 
34 | 
35 | W=Weno()
36 | 
37 | # Choose on equation:
38 | Flux=Burg
39 | #Flux=Convection
40 | 
41 | # Choose one numerical flux:
42 | NumFlux=Godunov
43 | #NumFlux=LaxFriedrichs 
44 | 
45 | if NumFlux==Godunov:
46 |      pF="Godunov"
47 | else:
48 |      pF="Lax-Friedrichs"
49 | if Flux==Burg:
50 |      pE="Burghers"
51 | else:
52 |      pE="Convection"
53 | 
54 | Meth=lambda x,y: W.weno(NumFlux,Flux,L,x,y)
55 | 
56 | 
57 | R=RK3TVD(size)
58 | 
59 | print(pE," with ",pF)
60 | 
61 | t=0
62 | 
63 | t1 = time.time()
64 | while t<T:
65 |      Out=R.op(Meth,In,dt)
66 |      In,Out=Out,In
67 |      t+=dt
68 | 
69 | t=(time.time()-t1)
70 | print("computing time: ",t)
71 | fi=open("gp","w")
72 | np.savetxt("gp",In)
73 | fi.close()
74 | print("A file 'gp' with the final solution was created.")
75 | 
76 | f=open("RunningOn"+socket.gethostname(),"w")
77 | f.write(pE+" "+pF+"\n")
78 | f.write(str(t)+"\n")
79 | f.close()
80 | 


--------------------------------------------------------------------------------
/Weno/Results/Look.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | #comparison between c++ and other computations
 4 | #
 5 | import socket
 6 | from pathlib import Path
 7 | 
 8 | def parsit(D,name,l):
 9 |     #
10 |     D[name]=float(l.replace("\n",""))
11 | # directories to explore ---------   
12 | files=[
13 |     "C++NoCopy",
14 |     "Py",
15 |     "PyVec",
16 |     "Ju",
17 |     "Numba",
18 |     "Fortran",
19 |     "C++-Modulo",
20 |     "C++-Pointers",
21 |     "Fortran",
22 | ]
23 | cpp="C++"
24 | #-------------------------------------------
25 | # build a dict  n-> computing time for  C++
26 | C={}
27 | with open("../"+cpp+"/RunningOn"+socket.gethostname(), 'r') as file:
28 |     spRef= file.readline().split()
29 |     s= file.readline()
30 |     C[cpp]=float(s.replace("\n",""))
31 | doNotUse=[]
32 | for n in files:
33 |     filename= "../"+n+"/RunningOn"+socket.gethostname()
34 |     p_file = Path(filename)
35 |     if p_file.is_file():
36 |         with open(filename,"r") as file:
37 |             sp= file.readline().split()
38 |             if sp[0]!=spRef[0] or sp[1]!=spRef[1]:
39 |                 print()
40 |                 print(n,": You did not perform the same compuation as C++")
41 |                 print("C++: ",spRef[0],spRef[1])
42 |                 print(n,": ",sp[0],sp[1])
43 |                 print("We do not use",n,"in the final comparison.")
44 |                 doNotUse.append(n)
45 |             else:
46 |                 s=file.readline()
47 |                 C[n]=float(s.replace("\n",""))
48 | 
49 |     else:
50 |         print("\n\nFile "+filename+ " does not exists !")
51 |         print("did you run test in "+n+" ?\n\n")
52 |         
53 | Ts=sorted([(n,C[n]/C[cpp]) for n in C],key=lambda x: x[1])
54 | 
55 | print("\nComputing time / Computing time in C++:\n")
56 | for s in Ts:
57 |     if s[0]!="C++" and not s[0] in doNotUse:
58 |         print("* ",s[0].ljust(12)," : ",str(s[1])[0:5])
59 | print("\n")
60 | 


--------------------------------------------------------------------------------
/Weno/Results/README.md:
--------------------------------------------------------------------------------
 1 | Just run:
 2 | 
 3 | ```
 4 | ./Look.py
 5 | ```
 6 | This will give you the computing time divided by C++ computing time.
 7 | 
 8 | Before, you must have run the benchmark in all directories
 9 | (C++,Ju,Py and Numba, Fortran). 
10 | 
11 | If you do not run all the benchmarks (or if you add one), juste modify the list
12 | "directories to explore" in Look.py


--------------------------------------------------------------------------------
/Weno/runAllTests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #  This script is supposed to run *all* the test, and then compute
 4 | #  the final "report" in Results/
 5 | #  Not sure it works everywhere. If it does not, improve it, or enter each
 6 | #  directory and look at README.md to know what to do. 
 7 | #
 8 | for i in C++  C++NoCopy C++-Modulo C++-Pointers Fortran ; do
 9 |     echo "Test: "$i
10 |     echo "--- "
11 |     (cd $i; mkdir -p Build; cd Build; cmake ..; make; ./run)
12 | done
13 | for i in Ju Numba   ; do
14 |     echo  "Test: "$i 
15 |     echo "--- "
16 |     (cd $i; ./script)
17 | done
18 | for i in Py PyVec ; do
19 |     echo  "Test: "$i
20 |     echo "--- "
21 |     (cd $i; python3 ./main.py)
22 | done
23 | 
24 | echo " "
25 | echo "Make the report:"
26 | (cd Results; ./Look.py)
27 | echo " "
28 | echo "To replay the report, cd Results/ and run ./Look.py "
29 | echo " "
30 | 
31 | 


--------------------------------------------------------------------------------