├── .github └── workflows │ ├── dubtest.yml │ └── pytest.yml ├── .gitignore ├── D ├── GSRBBenchmark.d ├── app.d ├── benchmark.sh ├── dub.json ├── results │ ├── outfile_casclakesp2_1501_field_gsrb │ ├── outfile_casclakesp2_1501_naive_gsrb │ ├── outfile_casclakesp2_1501_ndslice_gsrb │ ├── outfile_casclakesp2_1501_slice_gsrb │ ├── outfile_casclakesp2_1601_field_gsrb │ ├── outfile_casclakesp2_1601_field_gsrb-avx512 │ ├── outfile_casclakesp2_1601_naive_gsrb │ ├── outfile_casclakesp2_1601_naive_gsrb-avx512 │ ├── outfile_casclakesp2_1601_ndslice_gsrb │ ├── outfile_casclakesp2_1601_ndslice_gsrb-avx512 │ ├── outfile_casclakesp2_1601_slice_gsrb │ ├── outfile_casclakesp2_1601_slice_gsrb-avx512 │ ├── outfile_cip1e1_1212_field_gsrb │ ├── outfile_cip1e1_1212_naive_gsrb │ ├── outfile_cip1e1_1212_ndslice_gsrb │ ├── outfile_cip1e1_1212_slice_gsrb │ ├── outfile_cip1e3_1212_field_multigrid │ ├── outfile_cip1e3_1212_naive_multigrid │ ├── outfile_cip1e3_1212_ndslice_multigrid │ ├── outfile_cip1e3_1212_slice_multigrid │ ├── outfile_cip1e5_2611_field_multigrid │ ├── outfile_cip1e5_2611_naive_multigrid │ ├── outfile_cip1e5_2611_ndslice_multigrid │ ├── outfile_cip1e5_2611_slice_multigrid │ ├── outfile_cip1e5_3110_field_multigrid │ ├── outfile_cip1e5_3110_naive_multigrid │ ├── outfile_cip1e5_3110_slice_multigrid │ ├── outfile_cip1e6_2611_field_gsrb │ ├── outfile_cip1e6_2611_naive_gsrb │ ├── outfile_cip1e6_2611_ndslice_gsrb │ ├── outfile_cip1e6_2611_slice_gsrb │ ├── outfile_cip1e6_3010_field_gsrb │ ├── outfile_cip1e6_3010_field_multigrid │ ├── outfile_cip1e6_3010_naive_gsrb │ ├── outfile_cip1e6_3010_naive_multigrid │ ├── outfile_cip1e6_3010_slice_gsrb │ ├── outfile_cip1e6_3010_slice_multigrid │ ├── outfile_cip1e6_3110_field_gsrb │ ├── outfile_cip1e6_3110_naive_gsrb │ ├── outfile_cip1e6_3110_slice_gsrb │ ├── outfile_cip1g4_2011_field(rework)_multigrid │ ├── outfile_cip1g4_2011_field_multigrid │ ├── outfile_cip1g4_2011_naive(rework)_multigrid │ ├── outfile_cip1g4_2011_naive_multigrid │ ├── outfile_cip1g4_2011_slice(rework)_multigrid │ └── outfile_cip1g4_2011_slice_multigrid └── source │ ├── loadproblem.d │ ├── multid │ ├── gaussseidel │ │ ├── redblack.d │ │ └── sweep.d │ ├── multigrid │ │ ├── cycle.d │ │ ├── multigrid.d │ │ ├── prolongation.d │ │ └── restriction.d │ └── tools │ │ ├── apply_poisson.d │ │ ├── norm.d │ │ └── util.d │ ├── scripts.d │ └── startup.d ├── Python ├── benchmark_gsrb.py ├── benchmark_multigrid.py ├── create_gif.py ├── draw.py ├── multipy │ ├── GaussSeidel │ │ ├── GaussSeidel.py │ │ ├── GaussSeidel_RB.py │ │ └── __init__.py │ ├── __init__.py │ ├── multigrid │ │ ├── __init__.py │ │ ├── cycle.py │ │ ├── prolongation.py │ │ └── restriction.py │ ├── tests │ │ ├── __init__.py │ │ ├── problem_1D_20.npy │ │ ├── problem_2D_20.npy │ │ ├── problem_3D_20.npy │ │ ├── test_gauss_seidel.py │ │ ├── test_multigrid.py │ │ └── test_tools.py │ └── tools │ │ ├── __init__.py │ │ ├── apply_poisson.py │ │ ├── heatmap.py │ │ ├── operators.py │ │ └── util.py ├── problemgenerator │ ├── femwave.py │ ├── generate.py │ ├── heatmap.py │ └── load_problem.py ├── profiling.py ├── requirements.txt ├── results │ ├── outfile_cip1e1_1212_intel_1_nonumba_gsrb │ ├── outfile_cip1e1_1212_intel_1_numba_gsrb │ ├── outfile_cip1e1_1212_intel_8_nonumba_gsrb │ ├── outfile_cip1e1_1212_intel_8_numba_gsrb │ ├── outfile_cip1e1_1212_openblas_1_nonumba_gsrb │ ├── outfile_cip1e1_1212_openblas_1_numba_gsrb │ ├── outfile_cip1e1_1212_openblas_8_nonumba_gsrb │ ├── outfile_cip1e1_1212_openblas_8_numba_gsrb │ ├── outfile_cip1e3_1212_intel_1_nonumba_multigrid │ ├── outfile_cip1e3_1212_intel_1_numba_multigrid │ ├── outfile_cip1e3_1212_intel_8_nonumba_multigrid │ ├── outfile_cip1e3_1212_intel_8_numba_multigrid │ ├── outfile_cip1e3_1212_openblas_1_nonumba_multigrid │ ├── outfile_cip1e3_1212_openblas_1_numba_multigrid │ ├── outfile_cip1e3_1212_openblas_8_nonumba_multigrid │ ├── outfile_cip1e3_1212_openblas_8_numba_multigrid │ ├── outfile_cip1e5_2611_intel_1_nonumba_multigrid │ ├── outfile_cip1e5_2611_intel_1_numba_multigrid │ ├── outfile_cip1e5_2611_intel_8_nonumba_multigrid │ ├── outfile_cip1e5_2611_intel_8_numba_multigrid │ ├── outfile_cip1e5_2611_openblas_1_nonumba_multigrid │ ├── outfile_cip1e5_2611_openblas_1_numba_multigrid │ ├── outfile_cip1e5_2611_openblas_8_nonumba_multigrid │ ├── outfile_cip1e5_2611_openblas_8_numba_multigrid │ ├── outfile_cip1e5_3110_intel_1_nonumba_multigrid │ ├── outfile_cip1e5_3110_intel_1_numba_multigrid │ ├── outfile_cip1e5_3110_intel_8_nonumba_multigrid │ ├── outfile_cip1e5_3110_intel_8_numba_multigrid │ ├── outfile_cip1e5_3110_openblas_1_nonumba_multigrid │ ├── outfile_cip1e5_3110_openblas_1_numba_multigrid │ ├── outfile_cip1e5_3110_openblas_8_nonumba_multigrid │ ├── outfile_cip1e5_3110_openblas_8_numba_multigrid │ ├── outfile_cip1e6_2611_intel_1_nonumba_gsrb │ ├── outfile_cip1e6_2611_intel_1_numba_gsrb │ ├── outfile_cip1e6_2611_intel_8_nonumba_gsrb │ ├── outfile_cip1e6_2611_intel_8_numba_gsrb │ ├── outfile_cip1e6_2611_openblas_1_nonumba_gsrb │ ├── outfile_cip1e6_2611_openblas_1_numba_gsrb │ ├── outfile_cip1e6_2611_openblas_8_nonumba_gsrb │ ├── outfile_cip1e6_2611_openblas_8_numba_gsrb │ ├── outfile_cip1e6_3010_intel_1_nonumba_gsrb │ ├── outfile_cip1e6_3010_intel_1_nonumba_multigrid │ ├── outfile_cip1e6_3010_intel_1_numba_gsrb │ ├── outfile_cip1e6_3010_intel_1_numba_multigrid │ ├── outfile_cip1e6_3010_intel_8_nonumba_gsrb │ ├── outfile_cip1e6_3010_intel_8_nonumba_multigrid │ ├── outfile_cip1e6_3010_intel_8_numba_gsrb │ ├── outfile_cip1e6_3010_intel_8_numba_multigrid │ ├── outfile_cip1e6_3010_openblas_1_nonumba_gsrb │ ├── outfile_cip1e6_3010_openblas_1_nonumba_multigrid │ ├── outfile_cip1e6_3010_openblas_1_numba_gsrb │ ├── outfile_cip1e6_3010_openblas_1_numba_multigrid │ ├── outfile_cip1e6_3010_openblas_8_nonumba_gsrb │ ├── outfile_cip1e6_3010_openblas_8_nonumba_multigrid │ ├── outfile_cip1e6_3010_openblas_8_numba_gsrb │ ├── outfile_cip1e6_3010_openblas_8_numba_multigrid │ ├── outfile_cip1e6_3110_intel_1_nonumba_gsrb │ ├── outfile_cip1e6_3110_intel_1_numba_gsrb │ ├── outfile_cip1e6_3110_intel_8_nonumba_gsrb │ ├── outfile_cip1e6_3110_intel_8_numba_gsrb │ ├── outfile_cip1e6_3110_openblas_1_nonumba_gsrb │ ├── outfile_cip1e6_3110_openblas_1_numba_gsrb │ ├── outfile_cip1e6_3110_openblas_8_nonumba_gsrb │ └── outfile_cip1e6_3110_openblas_8_numba_gsrb ├── run.sh ├── scripts.py └── startup.py ├── README.md ├── graphs ├── d_rework_flops.png ├── d_rework_time.png ├── gsrb-avx512_flops.png ├── gsrb-avx512_time.png ├── gsrbD_flops.png ├── gsrbD_time.png ├── gsrb_FLOPS_subplots.png ├── gsrb_flops.png ├── gsrb_time.png ├── gsrb_time_subplots.png ├── gsrbnonumba_flops.png ├── gsrbnonumba_time.png ├── gsrbnumba_flops.png ├── gsrbnumba_time.png ├── heatmap.gif ├── multigridD_flops.png ├── multigridD_time.png ├── multigrid_FLOPS_subplots.png ├── multigrid_flops.png ├── multigrid_time.png ├── multigrid_time_subplots.png ├── multigridnonumba_flops.png ├── multigridnonumba_time.png ├── multigridnumba_flops.png ├── multigridnumba_time.png └── wave.gif ├── problems ├── problem_1D_100.npy └── problem_2D_100.npy └── scripts ├── check_perf.sh ├── data.py ├── generate_problems.sh ├── getinfos.sh ├── gsrb_avx.sh ├── masterrun.sh └── slurm_job_gsrb.sh /.github/workflows/dubtest.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | name: D 6 | 7 | on: 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | jobs: 14 | build: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - uses: dlang-community/setup-dlang@v1 21 | 22 | - name: 'Build & Test' 23 | working-directory: ./D 24 | run: | 25 | # Build the project, with its main file included, without unittests 26 | dub build --compiler=$DC 27 | # Build and run tests, as defined by `unittest` configuration 28 | # In this mode, `mainSourceFile` is excluded and `version (unittest)` are included 29 | # See https://dub.pm/package-format-json.html#configurations 30 | dub test --compiler=$DC 31 | -------------------------------------------------------------------------------- /.github/workflows/pytest.yml: -------------------------------------------------------------------------------- 1 | name: Run Python Tests 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Install Python 3 16 | uses: actions/setup-python@v1 17 | with: 18 | python-version: 3.7 19 | - name: Install dependencies 20 | working-directory: ./Python 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements.txt 24 | - name: Run tests with pytest 25 | working-directory: ./Python 26 | run: pytest 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.dub 2 | *.vscode 3 | *.json 4 | !dub.json 5 | __dummy.html 6 | *.o 7 | *.obj 8 | *.exe 9 | *.vim 10 | *.swp 11 | *.swo 12 | *.pyc 13 | __pycache__ 14 | *.pytest_cache 15 | venv 16 | intelpython3 17 | Python/problems 18 | */multigrid 19 | */gsrb 20 | */gsrb-avx512 21 | multid-test-multigrid 22 | multid-test-library 23 | multid-static 24 | trace.* 25 | *.log 26 | *problem_3D_100.npy 27 | D/multigrid_old 28 | -------------------------------------------------------------------------------- /D/GSRBBenchmark.d: -------------------------------------------------------------------------------- 1 | import mir.ndslice : slice; 2 | import std.exception : enforce; 3 | 4 | import startup : init; 5 | import loadproblem : npyload, getDim; 6 | import multid.gaussseidel.redblack : GS_RB, SweepType; 7 | 8 | /++ 9 | This loads and runs a problem that is provided on Commandline and delays the execution of 10 | the Gauss-Seidel redblack till delay is over. 11 | 12 | +/ 13 | void main(string[] argv) 14 | { 15 | alias i = init!(); 16 | i.start(); 17 | i.getopt(argv); 18 | const iterations = 5_000; 19 | 20 | void warmup() 21 | { 22 | auto UF1 = npyload!(double, 2)(i.default_path); 23 | GS_RB!(SweepType.ndslice)(UF1[1].slice, UF1[0].slice, 1, iterations, iterations + 10, 1e-8); 24 | } 25 | 26 | const uint dim = getDim(i.path); 27 | enforce(dim == 2, "This benchmark only supports 2D problems"); 28 | 29 | auto UF = npyload!(double, 2)(i.path); 30 | warmup(); 31 | i.wait_till(); 32 | switch (i.sweep) 33 | { 34 | case "slice": 35 | GS_RB!(SweepType.slice)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8); 36 | break; 37 | case "naive": 38 | GS_RB!(SweepType.naive)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8); 39 | break; 40 | case "field": 41 | GS_RB!(SweepType.field)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8); 42 | break; 43 | default: 44 | GS_RB!(SweepType.ndslice)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8); 45 | 46 | } 47 | i.print_time(); 48 | } 49 | -------------------------------------------------------------------------------- /D/app.d: -------------------------------------------------------------------------------- 1 | import mir.ndslice : slice; 2 | 3 | import startup : init; 4 | import loadproblem : npyload, getDim; 5 | import multid.multigrid.multigrid : poisson_multigrid; 6 | import multid.gaussseidel.redblack : GS_RB; 7 | 8 | /++ 9 | This loads and runs a problem that is provided on Commandline and delays the execution of 10 | the multigrid till delay is over. 11 | 12 | +/ 13 | void main(string[] argv) 14 | { 15 | alias i = init!(); 16 | i.start(); 17 | i.getopt(argv); 18 | 19 | void warmup() 20 | { 21 | auto UF1 = npyload!(double, 2)(i.default_path); 22 | poisson_multigrid(UF1[1].slice, UF1[0].slice, 0, 1, 2, 2, 1); 23 | } 24 | 25 | const uint dim = getDim(i.path); 26 | 27 | switch (dim) 28 | { 29 | case 1: 30 | auto UF = npyload!(double, 1)(i.path); 31 | warmup(); 32 | i.wait_till(); 33 | poisson_multigrid(UF[1].slice, UF[0].slice, 0, 1, 2, 2, 100, i.sweep); 34 | break; 35 | case 2: 36 | auto UF = npyload!(double, 2)(i.path); 37 | warmup(); 38 | i.wait_till(); 39 | poisson_multigrid(UF[1].slice, UF[0].slice, 0, 1, 2, 2, 100, i.sweep); 40 | break; 41 | case 3: 42 | auto UF = npyload!(double, 3)(i.path); 43 | warmup(); 44 | i.wait_till(); 45 | poisson_multigrid(UF[1].slice, UF[0].slice, 0, 1, 2, 2, 100, i.sweep); 46 | break; 47 | default: 48 | throw new Exception("wrong dimension!"); 49 | } 50 | i.print_time(); 51 | } 52 | -------------------------------------------------------------------------------- /D/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | problempath=${1:-'../problems/'} 4 | [ -d "$problempath" ] || exit 1 5 | binary=${2:-'./multigrid -s ndslice'} 6 | sweeptype=$(echo "$binary" | sed -r 's/.+ -s (field|naive|slice|ndslice).*/\1/') 7 | buildtype=$(echo "$binary" | sed -r 's/.+(multigrid|gsrb|gsrb-avx512) .+/\1/') 8 | # sanitiy check at least aginst empty strings 9 | 10 | [ -z "$buildtype" ] && exit 1 11 | [ -z "$sweeptype" ] && exit 1 12 | 13 | OUTFILE="results/outfile_$(hostname -s)_$(date +%d%m)_${sweeptype}_${buildtype}" 14 | # checks if the perf is usabele to count flops with GFOPS group 15 | 16 | echo "$OUTFILE" 17 | 18 | benchmark() { 19 | perf=$1 20 | problem=$2 21 | delay=1000 22 | delayPerf=1000 23 | 24 | cmd="$binary -p $problem -d $delay" 25 | if [ "$buildtype" = 'gsrb' ] || [ "$buildtype" = 'gsrb-avx512' ]; then 26 | cmd="$cmd -v" 27 | fi 28 | 29 | if [ "$perf" = true ]; then 30 | cmd="perf stat -M GFLOPS -D $delayPerf $cmd" 31 | fi 32 | 33 | x=$($cmd 2>&1) || exit 1 34 | out=$(echo "$x" | head -n 2 | tr '\n' ':' | tr ' ' ':' | awk -F':' '{print $23 ":" $11 ":" $14 ":"}') 35 | if [ "$perf" = true ]; then 36 | flops=$(echo "$x" | tail -n +3 | grep -i 'fp' | awk '{ print $1}' | tr '\n' ':') 37 | out="$out$flops" 38 | fi 39 | 40 | printf "%s\n" "$out" 41 | 42 | } 43 | 44 | perf=$(../scripts/check_perf.sh) 45 | 46 | get_infos() { 47 | ../scripts/getinfos.sh "mir" "$perf" 48 | } 49 | 50 | [ -e "$OUTFILE" ] || get_infos >>"$OUTFILE" || exit 1 51 | 52 | reps=5 53 | 54 | for _ in $(seq $reps); do 55 | for problem in "$problempath/"*.npy; do 56 | dim=$(echo "$problem" | awk -F'_' '{print $2}') 57 | N=$(echo "$problem" | awk -F'_' '{print $3}') 58 | N=${N%%\.npy} 59 | 60 | x=$(benchmark "$perf" "$problem") && printf "%b:%b:%b\n" "$N" "$dim" "$x" >>"${OUTFILE}" 61 | done 62 | done 63 | -------------------------------------------------------------------------------- /D/dub.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "multid", 3 | 4 | "dependencies": { 5 | "mir-algorithm": "~>3.10.11", 6 | "mir-random": "~>2.2.14", 7 | "numir": "~>2.0.5" 8 | }, 9 | "configurations": [ 10 | { 11 | "name": "multigrid", 12 | "targetName": "multigrid", 13 | "mainSourceFile": "app.d", 14 | "compiler": "ldc", 15 | "dflags-ldc": ["-mcpu=native"], 16 | "targetType": "executable" 17 | }, 18 | { 19 | "name": "multid-static", 20 | "mainSourceFile": "app.d", 21 | "targetType": "executable", 22 | "targetName": "multid-static", 23 | "compiler": "ldc", 24 | "dflags-ldc": ["-mcpu=native", "--static"] 25 | }, 26 | { 27 | "name": "gsrb", 28 | "mainSourceFile": "GSRBBenchmark.d", 29 | "targetName": "gsrb", 30 | "dflags-ldc": ["-mcpu=native", "-mattr=-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq"], 31 | "targetType": "executable", 32 | "compiler": "ldc2" 33 | }, 34 | { 35 | "name": "gsrb-avx512", 36 | "mainSourceFile": "GSRBBenchmark.d", 37 | "targetName": "gsrb-avx512", 38 | "dflags-ldc": ["-mcpu=native", "-mattr=+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512er,+avx512f,+avx512ifma,+avx512pf,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vp2intersect,+avx512vpopcntdq"], 39 | "targetType": "executable", 40 | "compiler": "ldc2" 41 | } 42 | ] 43 | } 44 | 45 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_2611_field_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Thu 26 Nov 2020 08:47:39 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:8.000383e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 7 | 1024:2D:7.752853e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 8 | 1024:2D:7.446066e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 9 | 1088:2D:9.411376e+00:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 10 | 1088:2D:9.401843e+00:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 11 | 1088:2D:9.329276e+00:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 12 | 112:2D:5.523500e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:8.245700e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:5.509600e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.077344e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 16 | 1152:2D:1.073133e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 17 | 1152:2D:1.070235e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 18 | 1216:2D:1.223708e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 19 | 1216:2D:1.216874e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 20 | 1216:2D:1.213055e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 21 | 128:2D:7.605100e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:7.162400e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:7.668000e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.376782e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 25 | 1280:2D:1.372937e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 26 | 1280:2D:1.372863e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 27 | 144:2D:9.562000e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:9.319100e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:9.668100e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:4.292000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:2.071000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:4.290000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:1.224000e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:1.193060e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:1.221780e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:1.470710e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:1.591360e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:1.465730e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:1.900490e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:1.765570e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:1.782750e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:2.300110e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:2.061050e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:2.137020e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:2.484770e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:2.720090e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:2.342510e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:2.680010e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:2.660470e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:2.708750e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:2.979390e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:3.126120e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:3.061670e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:3.395920e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:3.400590e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:3.463790e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:3.828990e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:3.845340e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:3.894150e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:4.319440e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:4.235200e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:4.263890e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:3.978000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:6.759000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:4.060000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:4.723970e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 67 | 320:2D:4.676170e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 68 | 320:2D:4.720660e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 69 | 384:2D:6.778680e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:6.675070e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:6.777440e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:9.237100e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:9.291700e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:9.328580e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:9.336000e-03:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:1.309800e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:1.323100e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:1.241880e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 79 | 512:2D:1.225156e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 80 | 512:2D:1.224832e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 81 | 576:2D:1.561220e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 82 | 576:2D:1.566434e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 83 | 576:2D:1.555911e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 84 | 64:2D:1.945400e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:1.850800e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:2.014500e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:1.961468e+00:5000:0.000000e+00:12:12,211,320,014:0:0:0:0: 88 | 640:2D:1.936614e+00:5000:0.000000e+00:12:12,211,320,014:0:0:0:0: 89 | 640:2D:1.972345e+00:5000:0.000000e+00:12:12,211,320,014:0:0:0:0: 90 | 704:2D:2.510337e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:2.512725e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:2.479376e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:3.177582e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:3.286164e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:3.191389e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:2.831200e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:2.650700e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:2.671200e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:4.161993e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 100 | 832:2D:4.087105e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 101 | 832:2D:4.121650e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 102 | 896:2D:5.208627e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 103 | 896:2D:5.178747e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 104 | 896:2D:5.249514e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 105 | 96:2D:7.653800e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:4.132900e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:5.626400e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:6.493801e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 109 | 960:2D:6.402130e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 110 | 960:2D:6.494050e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_2611_naive_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Thu 26 Nov 2020 08:53:45 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:8.254934e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 7 | 1024:2D:8.363179e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 8 | 1024:2D:8.267449e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 9 | 1088:2D:1.047409e+01:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 10 | 1088:2D:1.054409e+01:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 11 | 1088:2D:1.051555e+01:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 12 | 112:2D:7.448300e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:7.931400e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:7.716600e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.206235e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 16 | 1152:2D:1.199897e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 17 | 1152:2D:1.195902e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 18 | 1216:2D:1.331659e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 19 | 1216:2D:1.345480e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 20 | 1216:2D:1.339775e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 21 | 128:2D:1.035130e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:9.528600e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:9.967700e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.492151e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 25 | 1280:2D:1.490413e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 26 | 1280:2D:1.486857e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 27 | 144:2D:1.256650e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:1.258090e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:1.249200e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:1.425000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:1.275000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:1.724000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:1.543800e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:1.923540e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:1.526060e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:1.867350e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:1.939080e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:1.910250e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:2.199340e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:2.204160e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:2.170390e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:2.670510e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:2.665940e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:2.573700e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:3.052920e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:2.977040e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:3.316140e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:3.426940e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:3.439580e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:3.405090e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:4.175150e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:3.854690e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:3.830280e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:4.619980e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:4.371510e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:4.399010e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:4.851420e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:4.913170e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:4.925590e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:5.467870e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:5.511940e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:5.457080e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:8.169000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:9.334000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:2.227200e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:5.973010e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 67 | 320:2D:6.010750e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 68 | 320:2D:5.987430e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 69 | 384:2D:8.622030e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:8.685420e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:8.654810e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:1.176424e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:1.189773e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:1.175506e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:1.228000e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:1.319000e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:1.575900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:1.540415e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 79 | 512:2D:1.546700e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 80 | 512:2D:1.537299e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 81 | 576:2D:1.975691e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 82 | 576:2D:1.955410e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 83 | 576:2D:1.978564e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 84 | 64:2D:5.983100e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:2.420700e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:2.641900e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:2.511615e+00:5000:0.000000e+00:12:12,211,320,015:0:0:0:0: 88 | 640:2D:2.537655e+00:5000:0.000000e+00:12:12,211,320,015:0:0:0:0: 89 | 640:2D:2.486312e+00:5000:0.000000e+00:12:12,211,320,015:0:0:0:0: 90 | 704:2D:3.180670e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:3.162762e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:3.101611e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:3.864085e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:3.907207e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:3.952781e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:6.582300e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:4.370000e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:4.390700e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:4.900009e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 100 | 832:2D:5.010069e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 101 | 832:2D:4.893291e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 102 | 896:2D:6.200509e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 103 | 896:2D:6.216420e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 104 | 896:2D:6.217914e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 105 | 96:2D:6.758200e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:5.941200e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:5.944000e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:7.518621e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 109 | 960:2D:7.553885e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 110 | 960:2D:7.432408e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_2611_ndslice_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Thu 26 Nov 2020 09:07:36 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:7.620814e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 7 | 1024:2D:7.286306e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 8 | 1024:2D:7.242712e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 9 | 1088:2D:9.112372e+00:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 10 | 1088:2D:9.312927e+00:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 11 | 1088:2D:9.101741e+00:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 12 | 112:2D:5.002700e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:5.131200e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:5.220300e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.050969e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 16 | 1152:2D:1.069037e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 17 | 1152:2D:1.070052e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 18 | 1216:2D:1.200284e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 19 | 1216:2D:1.191272e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 20 | 1216:2D:1.211052e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 21 | 128:2D:6.824900e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:7.298800e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:6.660700e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.353397e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 25 | 1280:2D:1.361889e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 26 | 1280:2D:1.361724e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 27 | 144:2D:8.701600e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:9.023300e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:8.894800e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:1.337000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:4.285000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:4.488000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:1.103560e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:1.090680e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:1.487000e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:1.368140e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:1.302530e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:1.315220e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:1.612440e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:1.636490e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:1.944750e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:1.873910e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:1.901910e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:1.848160e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:2.239060e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:2.214160e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:2.155220e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:2.443100e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:2.483140e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:2.496930e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:2.915120e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:2.845620e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:2.844050e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:3.247670e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:3.257640e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:3.276500e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:3.756320e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:3.609020e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:3.632230e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:4.076680e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:4.053440e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:4.037690e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:3.943000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:8.118000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:3.859000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:4.703490e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 67 | 320:2D:4.465350e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 68 | 320:2D:4.470920e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 69 | 384:2D:6.467120e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:6.422450e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:6.496040e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:8.801260e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:8.854640e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:8.763100e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:2.167100e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:8.859000e-03:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:1.224400e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:1.220294e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 79 | 512:2D:1.176679e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 80 | 512:2D:1.205860e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 81 | 576:2D:1.518752e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 82 | 576:2D:1.472899e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 83 | 576:2D:1.483097e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 84 | 64:2D:1.926300e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:1.945200e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:1.915400e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:1.912830e+00:5000:0.000000e+00:12:12,211,320,014:0:0:0:0: 88 | 640:2D:1.868021e+00:5000:0.000000e+00:12:12,211,320,014:0:0:0:0: 89 | 640:2D:1.911430e+00:5000:0.000000e+00:12:12,211,320,014:0:0:0:0: 90 | 704:2D:2.497040e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:2.375358e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:2.498221e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:3.141181e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:3.001378e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:3.378429e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:2.533500e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:2.564900e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:2.468700e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:3.919957e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 100 | 832:2D:3.917033e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 101 | 832:2D:3.979723e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 102 | 896:2D:5.002529e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 103 | 896:2D:4.944650e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 104 | 896:2D:5.108772e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 105 | 96:2D:4.483000e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:3.893500e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:7.354200e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:6.469626e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 109 | 960:2D:6.312343e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 110 | 960:2D:6.405771e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_2611_slice_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Thu 26 Nov 2020 09:00:27 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:9.002128e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 7 | 1024:2D:8.990643e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 8 | 1024:2D:9.170142e+00:5000:0.000000e+00:12:31,334,520,015:0:0:0:0: 9 | 1088:2D:1.175178e+01:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 10 | 1088:2D:1.170778e+01:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 11 | 1088:2D:1.178078e+01:5000:0.000000e+00:12:35,381,880,015:0:0:0:0: 12 | 112:2D:7.535900e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:7.803300e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:7.934500e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.341061e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 16 | 1152:2D:1.329945e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 17 | 1152:2D:1.351882e+01:5000:0.000000e+00:12:39,675,000,015:0:0:0:0: 18 | 1216:2D:1.571168e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 19 | 1216:2D:1.550177e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 20 | 1216:2D:1.536541e+01:5000:0.000000e+00:12:44,213,880,015:0:0:0:0: 21 | 128:2D:9.917700e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:1.018120e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:9.895900e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.701222e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 25 | 1280:2D:1.678961e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 26 | 1280:2D:1.674414e+01:5000:0.000000e+00:12:48,998,520,015:0:0:0:0: 27 | 144:2D:1.347520e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:1.291430e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:1.302550e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:5.730000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:1.693000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:5.051000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:1.687950e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:1.690420e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:1.665490e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:2.051560e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:1.985820e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:2.049410e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:2.340550e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:2.317000e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:2.349640e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:3.290400e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:3.180270e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:2.751350e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:3.178150e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:3.207750e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:3.237050e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:3.762180e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:3.608340e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:3.645160e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:4.036370e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:4.071120e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:4.159830e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:4.668170e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:4.734380e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:4.737320e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:5.648090e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:5.337270e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:5.359720e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:5.787220e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:6.189580e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:5.743060e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:8.355000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:1.724600e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:6.192000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:6.264450e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 67 | 320:2D:6.305960e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 68 | 320:2D:6.501800e-01:5000:0.000000e+00:8:3,033,720,010:0:0:0:0: 69 | 384:2D:8.871530e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:9.092500e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:9.095910e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:1.242868e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:1.224662e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:1.202766e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:1.520300e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:1.443400e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:1.625200e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:1.636416e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 79 | 512:2D:1.671972e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 80 | 512:2D:1.629378e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 81 | 576:2D:2.054543e+00:5000:0.000000e+00:12:9,884,280,015:0:0:0:0: 82 | 576:2D:1.996377e+00:5000:0.000000e+00:12:9,884,280,014:0:0:0:0: 83 | 576:2D:2.005237e+00:5000:0.000000e+00:12:9,884,280,015:0:0:0:0: 84 | 64:2D:2.730000e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:2.455800e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:2.569400e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:2.519519e+00:5000:0.000000e+00:12:12,211,320,015:0:0:0:0: 88 | 640:2D:2.504406e+00:5000:0.000000e+00:12:12,211,320,015:0:0:0:0: 89 | 640:2D:2.572243e+00:5000:0.000000e+00:12:12,211,320,015:0:0:0:0: 90 | 704:2D:3.291420e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:3.181701e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:3.290754e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:3.933555e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:4.004359e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:4.029722e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:4.116000e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:3.848000e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:4.144900e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:5.154172e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 100 | 832:2D:5.101280e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 101 | 832:2D:5.087519e+00:5000:0.000000e+00:12:20,667,000,015:0:0:0:0: 102 | 896:2D:6.404761e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 103 | 896:2D:6.454031e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 104 | 896:2D:6.494543e+00:5000:0.000000e+00:12:23,977,080,015:0:0:0:0: 105 | 96:2D:5.469900e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:5.798700e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:5.719400e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:8.171241e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 109 | 960:2D:8.177021e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 110 | 960:2D:8.097435e+00:5000:0.000000e+00:12:27,532,920,015:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_3010_field_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Fri 30 Oct 2020 04:56:29 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:7.510472e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 7 | 1024:2D:7.542225e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 8 | 1024:2D:7.532740e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 9 | 1088:2D:9.322126e+00:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 10 | 1088:2D:9.857043e+00:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 11 | 1088:2D:9.302216e+00:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 12 | 112:2D:5.536500e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:5.539300e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:5.593700e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.116284e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 16 | 1152:2D:1.091383e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 17 | 1152:2D:1.068208e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 18 | 1216:2D:1.212235e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 19 | 1216:2D:1.220570e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 20 | 1216:2D:1.219966e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 21 | 128:2D:7.573200e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:7.610800e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:7.586700e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.360988e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 25 | 1280:2D:1.361398e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 26 | 1280:2D:1.367449e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 27 | 144:2D:9.686200e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:9.467300e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:9.854200e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:4.090000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:4.498000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:4.749000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:1.219940e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:1.235240e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:1.217180e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:1.483490e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:1.470410e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:1.752790e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:1.835310e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:1.775980e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:1.784070e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:2.068430e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:2.076380e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:2.043460e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:2.395780e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:2.414770e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:2.450960e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:2.723060e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:2.793340e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:2.802690e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:3.092150e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:3.141520e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:3.165410e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:3.489480e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:3.523500e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:3.501580e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:4.051460e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:3.924390e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:3.879350e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:4.396480e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:4.359690e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:4.374400e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:4.090000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:4.782000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:4.322000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:4.886540e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 67 | 320:2D:4.895510e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 68 | 320:2D:4.808360e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 69 | 384:2D:7.374720e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:6.866470e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:6.877010e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:9.622490e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:9.415720e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:9.379990e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:9.328000e-03:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:1.067900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:1.062000e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:1.252978e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 79 | 512:2D:1.289026e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 80 | 512:2D:1.248933e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 81 | 576:2D:1.580461e+00:5000:0.000000e+00:8:9,884,280,010:0:0:0:0: 82 | 576:2D:1.591083e+00:5000:0.000000e+00:8:9,884,280,010:0:0:0:0: 83 | 576:2D:1.566851e+00:5000:0.000000e+00:8:9,884,280,010:0:0:0:0: 84 | 64:2D:2.285300e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:2.021600e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:2.124300e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:2.006965e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 88 | 640:2D:1.975934e+00:5000:0.000000e+00:8:12,211,320,010:0:0:0:0: 89 | 640:2D:1.967776e+00:5000:0.000000e+00:8:12,211,320,010:0:0:0:0: 90 | 704:2D:2.532751e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:2.526125e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:2.530629e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:3.229104e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:3.204204e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:3.162734e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:2.774400e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:5.234600e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:2.971500e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:4.100372e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 100 | 832:2D:4.056157e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 101 | 832:2D:4.030517e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 102 | 896:2D:5.193849e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 103 | 896:2D:5.182850e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 104 | 896:2D:5.246345e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 105 | 96:2D:4.461100e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:4.285200e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:3.915800e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:6.561217e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 109 | 960:2D:6.495915e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 110 | 960:2D:6.380518e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_3010_naive_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Fri 30 Oct 2020 05:02:35 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:9.880387e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 7 | 1024:2D:9.893210e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 8 | 1024:2D:9.899505e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 9 | 1088:2D:1.212571e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 10 | 1088:2D:1.205080e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 11 | 1088:2D:1.213612e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 12 | 112:2D:1.028020e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:1.019320e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:1.045000e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.368404e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 16 | 1152:2D:1.374208e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 17 | 1152:2D:1.380266e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 18 | 1216:2D:1.532058e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 19 | 1216:2D:1.533094e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 20 | 1216:2D:1.529742e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 21 | 128:2D:1.378930e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:1.325200e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:1.292700e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.685082e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 25 | 1280:2D:1.694186e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 26 | 1280:2D:1.689568e+01:5000:0.000000e+00:8:48,998,520,011:0:0:0:0: 27 | 144:2D:1.678540e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:1.629890e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:2.020480e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:2.153000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:4.809000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:5.933000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:2.288820e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:2.006570e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:2.037020e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:2.456930e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:2.460050e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:2.448620e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:2.870160e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:2.918480e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:3.226500e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:3.374220e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:3.421800e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:3.379720e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:3.900650e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:3.909000e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:4.154860e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:4.497840e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:4.493520e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:4.690390e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:5.140030e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:5.120840e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:5.083130e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:5.733660e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:5.773370e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:5.776090e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:6.454890e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:6.776090e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:6.451780e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:7.173630e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:7.317790e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:7.436480e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:1.111200e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:1.032100e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:2.041800e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:7.981330e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 67 | 320:2D:7.989880e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 68 | 320:2D:7.971400e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 69 | 384:2D:1.176857e+00:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:1.144237e+00:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:1.143339e+00:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:1.566551e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:1.559338e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:1.558070e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:1.856900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:2.016600e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:2.003800e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:2.049981e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 79 | 512:2D:2.056974e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 80 | 512:2D:2.084688e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 81 | 576:2D:2.601022e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 82 | 576:2D:2.594845e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 83 | 576:2D:2.599639e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 84 | 64:2D:3.011900e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:3.096700e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:3.225700e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:3.296804e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 88 | 640:2D:3.281161e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 89 | 640:2D:3.256612e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 90 | 704:2D:4.016556e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:4.035180e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:4.041845e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:4.991035e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:4.960707e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:4.939843e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:5.221300e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:6.037700e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:5.407500e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:6.131736e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 100 | 832:2D:6.122036e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 101 | 832:2D:6.086761e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 102 | 896:2D:7.431496e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 103 | 896:2D:7.416629e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 104 | 896:2D:7.444029e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 105 | 96:2D:7.680500e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:7.394300e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:7.796700e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:8.911726e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 109 | 960:2D:8.870637e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 110 | 960:2D:8.910696e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_3010_slice_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Fri 30 Oct 2020 05:10:13 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:2.087929e+01:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 7 | 1024:2D:2.059064e+01:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 8 | 1024:2D:2.070470e+01:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 9 | 1088:2D:2.575819e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 10 | 1088:2D:2.527173e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 11 | 1088:2D:2.588342e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 12 | 112:2D:1.617690e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:1.614950e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:1.617390e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:3.046357e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 16 | 1152:2D:3.022500e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 17 | 1152:2D:3.031849e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 18 | 1216:2D:3.671951e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 19 | 1216:2D:3.671200e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 20 | 1216:2D:3.659546e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 21 | 128:2D:2.091500e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:2.293230e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:2.219780e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:4.210534e+01:5000:0.000000e+00:8:48,998,520,011:0:0:0:0: 25 | 1280:2D:4.253758e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 26 | 1280:2D:4.204814e+01:5000:0.000000e+00:8:48,998,520,011:0:0:0:0: 27 | 144:2D:2.963800e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:2.807540e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:2.799040e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:6.575000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:3.743000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:7.178000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:3.558350e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:3.760500e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:3.641130e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:4.445410e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:4.573780e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:4.428550e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:5.575820e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:5.575620e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:5.583600e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:7.108020e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:6.929840e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:6.937450e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:7.981880e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:8.234870e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:8.093270e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:9.374980e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:1.000953e+00:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:9.290980e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:1.089021e+00:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:1.097755e+00:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:1.068546e+00:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:1.201304e+00:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:1.239344e+00:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:1.247088e+00:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:1.358959e+00:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:1.395891e+00:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:1.342620e+00:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:1.511687e+00:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:1.530231e+00:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:1.538092e+00:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:1.674900e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:1.459900e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:1.288600e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:1.726242e+00:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 67 | 320:2D:1.745762e+00:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 68 | 320:2D:1.683361e+00:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 69 | 384:2D:2.448279e+00:5000:0.000000e+00:8:4,377,720,011:0:0:0:0: 70 | 384:2D:2.411772e+00:5000:0.000000e+00:8:4,377,720,011:0:0:0:0: 71 | 384:2D:2.435746e+00:5000:0.000000e+00:8:4,377,720,011:0:0:0:0: 72 | 448:2D:3.264330e+00:5000:0.000000e+00:8:5,967,480,011:0:0:0:0: 73 | 448:2D:3.264295e+00:5000:0.000000e+00:8:5,967,480,011:0:0:0:0: 74 | 448:2D:3.254355e+00:5000:0.000000e+00:8:5,967,480,011:0:0:0:0: 75 | 48:2D:2.589200e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:4.866100e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:2.892900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:4.303585e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 79 | 512:2D:4.386692e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 80 | 512:2D:4.327338e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 81 | 576:2D:5.571503e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 82 | 576:2D:5.420727e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 83 | 576:2D:5.476466e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 84 | 64:2D:7.859700e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:5.161200e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:4.954100e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:6.849490e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 88 | 640:2D:6.651403e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 89 | 640:2D:6.632482e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 90 | 704:2D:8.152631e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:8.286485e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:8.194858e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:9.927357e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:9.896525e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:9.936867e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:8.437200e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:1.106440e-01:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:8.106100e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:1.221299e+01:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 100 | 832:2D:1.205044e+01:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 101 | 832:2D:1.193302e+01:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 102 | 896:2D:1.451232e+01:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 103 | 896:2D:1.461022e+01:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 104 | 896:2D:1.468601e+01:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 105 | 96:2D:1.186090e-01:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:1.172220e-01:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:1.187300e-01:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:1.770860e+01:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 109 | 960:2D:1.758362e+01:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 110 | 960:2D:1.767122e+01:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_3110_field_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Sat 31 Oct 2020 05:14:52 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:7.541628e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 7 | 1024:2D:7.538269e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 8 | 1024:2D:7.436423e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 9 | 1088:2D:9.435511e+00:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 10 | 1088:2D:9.345257e+00:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 11 | 1088:2D:9.425080e+00:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 12 | 112:2D:5.257600e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:5.472800e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:6.214700e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.083972e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 16 | 1152:2D:1.077377e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 17 | 1152:2D:1.085338e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 18 | 1216:2D:1.226068e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 19 | 1216:2D:1.212659e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 20 | 1216:2D:1.222306e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 21 | 128:2D:1.114830e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:7.748700e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:8.243000e-02:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.395794e+01:5000:0.000000e+00:8:48,998,520,011:0:0:0:0: 25 | 1280:2D:1.374614e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 26 | 1280:2D:1.372807e+01:5000:0.000000e+00:8:48,998,520,011:0:0:0:0: 27 | 144:2D:9.746300e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:9.722700e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:9.951300e-02:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:1.019000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:1.012000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:1.414300e-02:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:1.245730e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:1.341540e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:1.234310e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:1.488140e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:1.486360e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:1.480940e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:1.780350e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:1.806460e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:1.820950e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:2.085160e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:2.041590e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:2.105530e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:2.417320e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:2.376520e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:2.449060e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:2.800800e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:2.787540e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:2.717480e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:3.076110e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:3.104240e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:3.094600e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:3.650290e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:3.513160e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:3.478510e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:3.907460e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:3.917920e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:3.951710e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:4.351770e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:4.346720e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:4.742100e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:4.158000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:6.735000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:6.941000e-03:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:4.854490e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 67 | 320:2D:4.841330e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 68 | 320:2D:4.888040e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 69 | 384:2D:6.996930e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:6.993300e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:6.885640e-01:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:9.499090e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:9.602540e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:9.369850e-01:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:1.224900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:1.002900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:1.141400e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:1.249210e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 79 | 512:2D:1.281822e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 80 | 512:2D:1.259950e+00:5000:0.000000e+00:8:7,803,000,010:0:0:0:0: 81 | 576:2D:1.571255e+00:5000:0.000000e+00:8:9,884,280,010:0:0:0:0: 82 | 576:2D:1.564743e+00:5000:0.000000e+00:8:9,884,280,010:0:0:0:0: 83 | 576:2D:1.558330e+00:5000:0.000000e+00:8:9,884,280,010:0:0:0:0: 84 | 64:2D:2.031400e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:4.254400e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:1.999200e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:2.006535e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 88 | 640:2D:2.033835e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 89 | 640:2D:2.068567e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 90 | 704:2D:2.558973e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:2.605182e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:2.545946e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:3.247343e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:3.206035e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:3.263236e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:2.998700e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:2.730200e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:3.004900e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:4.153480e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 100 | 832:2D:4.113271e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 101 | 832:2D:4.099877e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 102 | 896:2D:5.226397e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 103 | 896:2D:5.217234e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 104 | 896:2D:5.268663e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 105 | 96:2D:3.998600e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:4.183400e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:4.229000e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:6.544584e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 109 | 960:2D:6.463863e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 110 | 960:2D:6.438701e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_3110_naive_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Sat 31 Oct 2020 05:20:58 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:9.999921e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 7 | 1024:2D:9.902563e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 8 | 1024:2D:9.935658e+00:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 9 | 1088:2D:1.212048e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 10 | 1088:2D:1.210751e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 11 | 1088:2D:1.234772e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 12 | 112:2D:1.051100e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:9.922100e-02:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:1.206940e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:1.377751e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 16 | 1152:2D:1.373112e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 17 | 1152:2D:1.373826e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 18 | 1216:2D:1.533257e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 19 | 1216:2D:1.531026e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 20 | 1216:2D:1.528564e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 21 | 128:2D:1.340530e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:1.320220e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:1.345540e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:1.694035e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 25 | 1280:2D:1.692629e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 26 | 1280:2D:1.686565e+01:5000:0.000000e+00:8:48,998,520,011:0:0:0:0: 27 | 144:2D:1.660710e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:1.640540e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:1.640220e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:1.846000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:3.804000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:1.488000e-02:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:2.011870e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:2.047220e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:2.048180e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 36 | 176:2D:2.802640e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:2.422630e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:2.451940e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:2.921010e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:2.879600e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:2.932670e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:3.405450e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:3.388070e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:3.389570e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:3.926200e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:3.927530e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:3.956680e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:4.478170e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:4.513180e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:4.521530e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:5.119530e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:5.124330e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:5.133760e-01:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:5.785260e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:6.131290e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:5.811240e-01:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:6.523820e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:6.770610e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:6.438930e-01:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:7.207920e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:7.215970e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:7.273740e-01:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:1.100500e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:1.124100e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:2.436200e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:7.933650e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 67 | 320:2D:7.964220e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 68 | 320:2D:7.987780e-01:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 69 | 384:2D:1.145133e+00:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 70 | 384:2D:1.141672e+00:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 71 | 384:2D:1.186160e+00:5000:0.000000e+00:8:4,377,720,010:0:0:0:0: 72 | 448:2D:1.553678e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 73 | 448:2D:1.558474e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 74 | 448:2D:1.558349e+00:5000:0.000000e+00:8:5,967,480,010:0:0:0:0: 75 | 48:2D:1.656500e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:1.793800e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:1.722000e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:2.051792e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 79 | 512:2D:2.055526e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 80 | 512:2D:2.054759e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 81 | 576:2D:2.582124e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 82 | 576:2D:2.592898e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 83 | 576:2D:2.609135e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 84 | 64:2D:3.334400e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:3.284700e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:6.114200e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:3.271206e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 88 | 640:2D:3.300691e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 89 | 640:2D:3.267299e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 90 | 704:2D:4.036827e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:4.012718e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:4.059490e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:4.960348e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:4.956495e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:5.282838e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:5.271900e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:5.553100e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:5.573900e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:6.074820e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 100 | 832:2D:6.065315e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 101 | 832:2D:6.082920e+00:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 102 | 896:2D:7.501490e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 103 | 896:2D:7.459641e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 104 | 896:2D:7.432933e+00:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 105 | 96:2D:7.478700e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:7.492900e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:7.713400e-02:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:8.901607e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 109 | 960:2D:8.962134e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 110 | 960:2D:8.947826e+00:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/results/outfile_cip1e6_3110_slice_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Sat 31 Oct 2020 05:28:37 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | ############ END INFOS 5 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 6 | 1024:2D:2.081872e+01:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 7 | 1024:2D:2.087465e+01:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 8 | 1024:2D:2.066772e+01:5000:0.000000e+00:8:31,334,520,011:0:0:0:0: 9 | 1088:2D:2.538284e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 10 | 1088:2D:2.557194e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 11 | 1088:2D:2.590555e+01:5000:0.000000e+00:8:35,381,880,011:0:0:0:0: 12 | 112:2D:1.590480e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 13 | 112:2D:1.558000e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 14 | 112:2D:1.593720e-01:5000:0.000000e+00:4:363,000,006:0:0:0:0: 15 | 1152:2D:3.030205e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 16 | 1152:2D:3.094189e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 17 | 1152:2D:3.113937e+01:5000:0.000000e+00:8:39,675,000,011:0:0:0:0: 18 | 1216:2D:3.601927e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 19 | 1216:2D:3.611443e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 20 | 1216:2D:3.627361e+01:5000:0.000000e+00:8:44,213,880,011:0:0:0:0: 21 | 128:2D:2.180830e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 22 | 128:2D:2.154070e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 23 | 128:2D:2.054360e-01:5000:0.000000e+00:4:476,280,006:0:0:0:0: 24 | 1280:2D:4.179752e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 25 | 1280:2D:4.187148e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 26 | 1280:2D:4.135747e+01:5000:0.000000e+00:8:48,998,520,012:1:1:0:0: 27 | 144:2D:2.765050e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 28 | 144:2D:2.927560e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 29 | 144:2D:2.912770e-01:5000:0.000000e+00:4:604,920,006:0:0:0:0: 30 | 16:2D:4.856000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 31 | 16:2D:4.986000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 32 | 16:2D:5.680000e-03:5000:0.000000e+00:4:5,880,006:0:0:0:0: 33 | 160:2D:3.804280e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 34 | 160:2D:3.607270e-01:5000:0.000000e+00:4:748,920,006:0:0:0:0: 35 | 160:2D:3.462370e-01:5000:0.000000e+00:4:731,619,953:0:0:0:0: 36 | 176:2D:4.512440e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 37 | 176:2D:4.838720e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 38 | 176:2D:4.524110e-01:5000:0.000000e+00:4:908,280,006:0:0:0:0: 39 | 192:2D:5.659330e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 40 | 192:2D:5.588510e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 41 | 192:2D:5.679050e-01:5000:0.000000e+00:4:1,083,000,006:0:0:0:0: 42 | 208:2D:6.808940e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 43 | 208:2D:7.069490e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 44 | 208:2D:6.659140e-01:5000:0.000000e+00:4:1,273,080,006:0:0:0:0: 45 | 224:2D:8.197140e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 46 | 224:2D:8.071570e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 47 | 224:2D:8.148950e-01:5000:0.000000e+00:4:1,478,520,006:0:0:0:0: 48 | 240:2D:9.471920e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 49 | 240:2D:9.360030e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 50 | 240:2D:9.592040e-01:5000:0.000000e+00:4:1,699,320,006:0:0:0:0: 51 | 256:2D:1.083180e+00:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 52 | 256:2D:1.141867e+00:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 53 | 256:2D:1.079031e+00:5000:0.000000e+00:4:1,935,480,006:0:0:0:0: 54 | 272:2D:1.217310e+00:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 55 | 272:2D:1.212238e+00:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 56 | 272:2D:1.204349e+00:5000:0.000000e+00:4:2,187,000,006:0:0:0:0: 57 | 288:2D:1.358968e+00:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 58 | 288:2D:1.378045e+00:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 59 | 288:2D:1.389541e+00:5000:0.000000e+00:4:2,453,880,006:0:0:0:0: 60 | 304:2D:1.546668e+00:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 61 | 304:2D:1.520274e+00:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 62 | 304:2D:1.501499e+00:5000:0.000000e+00:4:2,736,120,006:0:0:0:0: 63 | 32:2D:4.354000e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 64 | 32:2D:1.625100e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 65 | 32:2D:1.701000e-02:5000:0.000000e+00:4:27,000,006:0:0:0:0: 66 | 320:2D:1.699280e+00:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 67 | 320:2D:1.692372e+00:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 68 | 320:2D:1.688223e+00:5000:0.000000e+00:4:3,033,720,006:0:0:0:0: 69 | 384:2D:2.420574e+00:5000:0.000000e+00:8:4,377,720,011:0:0:0:0: 70 | 384:2D:2.453449e+00:5000:0.000000e+00:8:4,377,720,011:0:0:0:0: 71 | 384:2D:2.484507e+00:5000:0.000000e+00:8:4,377,720,011:0:0:0:0: 72 | 448:2D:3.365517e+00:5000:0.000000e+00:8:5,967,480,011:0:0:0:0: 73 | 448:2D:3.306219e+00:5000:0.000000e+00:8:5,967,480,011:0:0:0:0: 74 | 448:2D:3.358847e+00:5000:0.000000e+00:8:5,967,480,011:0:0:0:0: 75 | 48:2D:2.953900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 76 | 48:2D:2.959500e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 77 | 48:2D:2.549900e-02:5000:0.000000e+00:4:63,480,006:0:0:0:0: 78 | 512:2D:4.334323e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 79 | 512:2D:4.399613e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 80 | 512:2D:4.325031e+00:5000:0.000000e+00:8:7,803,000,011:0:0:0:0: 81 | 576:2D:5.471934e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 82 | 576:2D:5.561167e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 83 | 576:2D:5.447287e+00:5000:0.000000e+00:8:9,884,280,011:0:0:0:0: 84 | 64:2D:4.934000e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 85 | 64:2D:4.644500e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 86 | 64:2D:4.859100e-02:5000:0.000000e+00:4:115,320,006:0:0:0:0: 87 | 640:2D:6.747338e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 88 | 640:2D:6.689685e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 89 | 640:2D:6.695018e+00:5000:0.000000e+00:8:12,211,320,011:0:0:0:0: 90 | 704:2D:8.204256e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 91 | 704:2D:8.190865e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 92 | 704:2D:8.149945e+00:5000:0.000000e+00:8:14,784,120,011:0:0:0:0: 93 | 768:2D:9.997710e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 94 | 768:2D:9.875102e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 95 | 768:2D:9.975184e+00:5000:0.000000e+00:8:17,602,680,011:0:0:0:0: 96 | 80:2D:7.700400e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 97 | 80:2D:7.637900e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 98 | 80:2D:7.918300e-02:5000:0.000000e+00:4:182,520,006:0:0:0:0: 99 | 832:2D:1.236691e+01:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 100 | 832:2D:1.244996e+01:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 101 | 832:2D:1.224520e+01:5000:0.000000e+00:8:20,667,000,011:0:0:0:0: 102 | 896:2D:1.466424e+01:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 103 | 896:2D:1.465096e+01:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 104 | 896:2D:1.459491e+01:5000:0.000000e+00:8:23,977,080,011:0:0:0:0: 105 | 96:2D:1.135240e-01:5000:0.000000e+00:4:265,080,006:0:0:0:0: 106 | 96:2D:1.159670e-01:5000:0.000000e+00:4:265,080,006:0:0:0:0: 107 | 96:2D:1.179660e-01:5000:0.000000e+00:4:265,080,006:0:0:0:0: 108 | 960:2D:1.815498e+01:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 109 | 960:2D:1.760985e+01:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 110 | 960:2D:1.778041e+01:5000:0.000000e+00:8:27,532,920,011:0:0:0:0: 111 | -------------------------------------------------------------------------------- /D/source/loadproblem.d: -------------------------------------------------------------------------------- 1 | module loadproblem; 2 | 3 | import mir.conv : to; 4 | import mir.ndslice; 5 | import numir.io; 6 | import std.regex; 7 | import std.stdio; 8 | 9 | // /++ 10 | // Implementation of npy loader 11 | // +/ 12 | // auto npyloader(string path) 13 | // { 14 | // uint dim = getDim(path); 15 | // return npyload!(dim)(path); 16 | // } 17 | 18 | /++ 19 | Get Dimension 20 | +/ 21 | uint getDim(string path) 22 | { 23 | uint dim = 0; 24 | auto r = regex(r"_[0-9]D_"); 25 | foreach (d; matchAll(path, r)) 26 | { 27 | dim = (d.hit[1]).to!uint - 48; 28 | } 29 | return dim; 30 | } 31 | 32 | /++ 33 | Implementation of an npy loader 1D 34 | +/ 35 | auto npyload(T, uint Dim : 1)(string path) 36 | { 37 | return loadNpy!(T, 2)(path); 38 | } 39 | /++ 40 | Implementation of an npy loader 2D 41 | +/ 42 | auto npyload(T, uint Dim : 2)(string path) 43 | { 44 | return loadNpy!(T, 3)(path); 45 | } 46 | /++ 47 | Implementation of an npy loader 3D 48 | +/ 49 | auto npyload(T, uint Dim : 3)(string path) 50 | { 51 | return loadNpy!(T, 4)(path); 52 | } 53 | 54 | -------------------------------------------------------------------------------- /D/source/multid/gaussseidel/redblack.d: -------------------------------------------------------------------------------- 1 | module multid.gaussseidel.redblack; 2 | 3 | import mir.math: fastmath, approxEqual; 4 | import mir.algorithm.iteration: Chequer, all; 5 | import mir.ndslice : slice, uninitSlice, sliced, Slice, strided; 6 | import multid.gaussseidel.sweep; 7 | import multid.tools.apply_poisson : compute_residual; 8 | import multid.tools.norm : nrmL2; 9 | import std.experimental.logger; 10 | import std.traits : isFloatingPoint; 11 | 12 | /++ enum to differentiate between sweep types +/ 13 | enum SweepType 14 | { 15 | ndslice = "ndslice", 16 | field = "field", 17 | slice = "slice", 18 | naive = "naive" 19 | } 20 | 21 | /++ 22 | This is a Gauss Seidel Red Black implementation 23 | it solves AU = F, with A being a poisson matrix like this 24 | 1 1 1 1 .. 1 25 | 1 4 -1 0 .. 1 26 | 1 -1 4 -1 .. 1 27 | . . 28 | . 0..-1 4 1 . 29 | 1 .. 1 1 1 1 30 | so the borders of U remain unchanged 31 | Params: 32 | F = slice of dimension Dim 33 | U = slice of dimension Dim 34 | R = slice of dimension Dim 35 | h = the distance between the grid points 36 | Returns: U 37 | +/ 38 | 39 | Slice!(T*, Dim) GS_RB(SweepType sweeptype = SweepType.ndslice, T, size_t Dim)( 40 | Slice!(const(T)*, Dim) F, 41 | Slice!(T*, Dim) U, 42 | const T h, 43 | size_t max_iter = 10_000_000, 44 | size_t norm_iter = 1_000, 45 | double eps = 1e-8, 46 | ) 47 | if ((1 <= Dim && Dim <= 8) && isFloatingPoint!T) 48 | { 49 | auto R = U.shape.slice!T; 50 | T norm; 51 | auto it = GS_RB!sweeptype(F, U, R, h, norm, max_iter, norm_iter, eps); 52 | logf("GS_RB converged after %d iterations with %e error", it, norm); 53 | return U; 54 | } 55 | 56 | @nogc @fastmath 57 | size_t GS_RB(SweepType sweeptype = SweepType.ndslice, T, size_t Dim)( 58 | Slice!(const(T)*, Dim) F, 59 | Slice!(T*, Dim) U, 60 | Slice!(T*, Dim) R, //residual 61 | const T h, 62 | out T norm, 63 | size_t max_iter = 10_000_000, 64 | size_t norm_iter = 1_000, 65 | double eps = 1e-8, 66 | ) 67 | if ((1 <= Dim && Dim <= 8) && isFloatingPoint!T) 68 | { 69 | mixin("alias sweep = sweep_" ~ sweeptype ~ ";"); 70 | 71 | const T h2 = h * h; 72 | size_t it; 73 | norm = 0; 74 | while (it < max_iter) 75 | { 76 | it++; 77 | if (it % norm_iter == 0) 78 | { 79 | compute_residual(R, F, U, h); 80 | norm = R.nrmL2; 81 | if (norm <= eps) 82 | { 83 | break; 84 | } 85 | 86 | } 87 | // rote Halbiteration 88 | sweep(Chequer.red, F, U, h2); 89 | // schwarze Halbiteration 90 | sweep(Chequer.black, F, U, h2); 91 | } 92 | return it; 93 | } 94 | 95 | unittest 96 | { 97 | const size_t N = 3; 98 | auto U1 = slice!double([N], 1.0); 99 | auto F1 = slice!double([N], 0.0); 100 | F1[1] = 1; 101 | GS_RB(F1, U1, 1.0, 1); 102 | assert(U1 == [1.0, 1.0 / 2.0, 1.0].sliced); 103 | 104 | auto U2 = slice!double([N, N], 1.0); 105 | auto F2 = slice!double([N, N], 0.0); 106 | F2[1, 1] = 1; 107 | 108 | auto expected = slice!double([N, N], 1.0); 109 | expected[1, 1] = 3.0 / 4.0; 110 | GS_RB(F2, U2, 1.0, 1); 111 | assert(expected == U2); 112 | 113 | auto U3 = slice!double([N, N, N], 1.0); 114 | auto F3 = slice!double([N, N, N], 0.0); 115 | F3[1, 1, 1] = 1; 116 | GS_RB(F3, U3, 1.0, 1); 117 | 118 | auto expected3 = slice!double([N, N, N], 1.0); 119 | expected3[1, 1, 1] = 5.0; 120 | expected3[1, 1, 1] *= 1 / 6.0; 121 | assert(expected3 == U3); 122 | 123 | } 124 | 125 | unittest 126 | { 127 | import multid.gaussseidel.sweep; 128 | import multid.tools.util : randomMatrix; 129 | 130 | const size_t N = 10; 131 | const double h2 = 1.0; 132 | 133 | auto U = randomMatrix!(double, 1)(N); 134 | auto U1 = U.dup; 135 | auto U2 = U.dup; 136 | auto U3 = U.dup; 137 | const F = slice!double([N], 1.0); 138 | 139 | sweep_naive(Chequer.red, F, U, h2); 140 | sweep_field(Chequer.red, F, U1, h2); 141 | sweep_slice(Chequer.red, F, U2, h2); 142 | sweep_ndslice(Chequer.red, F, U3, h2); 143 | assert(U == U1); 144 | assert(U1 == U2); 145 | assert(all!approxEqual(U, U3)); 146 | 147 | sweep_naive(Chequer.black, F, U, h2); 148 | sweep_field(Chequer.black, F, U1, h2); 149 | sweep_slice(Chequer.black, F, U2, h2); 150 | sweep_ndslice(Chequer.black, F, U3, h2); 151 | assert(U == U1); 152 | assert(U1 == U2); 153 | assert(all!approxEqual(U, U3)); 154 | 155 | } 156 | 157 | unittest 158 | { 159 | import multid.gaussseidel.sweep; 160 | import multid.tools.util : randomMatrix; 161 | 162 | const size_t N = 10; 163 | const double h2 = 1.0; 164 | 165 | auto U = randomMatrix!(double, 2)(N); 166 | auto U1 = U.dup; 167 | auto U2 = U.dup; 168 | auto U3 = U.dup; 169 | const F = slice!double([N, N], 1.0); 170 | 171 | sweep_naive(Chequer.red, F, U, h2); 172 | sweep_field(Chequer.red, F, U1, h2); 173 | sweep_slice(Chequer.red, F, U2, h2); 174 | sweep_ndslice(Chequer.red, F, U3, h2); 175 | assert(U == U1); 176 | assert(U1 == U2); 177 | assert(all!approxEqual(U, U3)); 178 | 179 | sweep_naive(Chequer.black, F, U, h2); 180 | sweep_field(Chequer.black, F, U1, h2); 181 | sweep_slice(Chequer.black, F, U2, h2); 182 | sweep_ndslice(Chequer.black, F, U3, h2); 183 | assert(U == U1); 184 | assert(U1 == U2); 185 | assert(all!approxEqual(U, U3)); 186 | } 187 | 188 | unittest 189 | { 190 | import multid.gaussseidel.sweep; 191 | import multid.tools.util : randomMatrix; 192 | 193 | const size_t N = 10; 194 | auto U = randomMatrix!(double, 3)(N); 195 | auto U1 = U.dup; 196 | auto U2 = U.dup; 197 | auto U3 = U.dup; 198 | const F = slice!double([N, N, N], 1.0); 199 | const double h2 = 1.0; 200 | 201 | sweep_naive(Chequer.red, F, U, h2); 202 | sweep_field(Chequer.red, F, U1, h2); 203 | sweep_slice(Chequer.red, F, U2, h2); 204 | sweep_ndslice(Chequer.red, F, U3, h2); 205 | // import std.stdio; 206 | // writeln(U - U1); 207 | assert(U == U1); 208 | assert(U1 == U2); 209 | assert(all!approxEqual(U, U3)); 210 | 211 | sweep_naive(Chequer.black, F, U, h2); 212 | sweep_field(Chequer.black, F, U1, h2); 213 | sweep_slice(Chequer.black, F, U2, h2); 214 | sweep_ndslice(Chequer.black, F, U3, h2); 215 | assert(U == U1); 216 | assert(U1 == U2); 217 | assert(all!approxEqual(U, U3)); 218 | } 219 | -------------------------------------------------------------------------------- /D/source/multid/multigrid/multigrid.d: -------------------------------------------------------------------------------- 1 | module multid.multigrid.multigrid; 2 | 3 | import std.experimental.logger : logf, infof; 4 | import multid.multigrid.cycle; 5 | import mir.ndslice : Slice; 6 | 7 | import multid.gaussseidel.redblack : SweepType; 8 | 9 | /++ 10 | Method to run some multigrid steps for abstract cycle 11 | +/ 12 | Slice!(T*, Dim) multigrid(T, size_t Dim)(Cycle!(T, Dim) cycle, Slice!(T*, Dim) U, size_t iter_cycle, double eps) 13 | { 14 | //scale the epsilon with the number of gridpoints 15 | eps *= U.elementCount; 16 | foreach (i; 1 .. iter_cycle + 1) 17 | { 18 | 19 | cycle.cycle(U); 20 | auto norm = cycle.norm(U); 21 | logf("Residual has a L2-Norm of %f after %d iterations", norm, i); 22 | if (norm <= eps) 23 | { 24 | infof("MG converged after %d iterations with %e error", i, norm); 25 | break; 26 | } 27 | } 28 | 29 | return U; 30 | } 31 | 32 | /++ 33 | Run some poisson multigrid to solve AU = F with A is a poisson matrix 34 | 35 | Params: 36 | F = Dim-slice 37 | U = Dim-slice 38 | level = the depth of the multigrid cycle if it is set to 0, the maxmium depth is choosen 39 | mu = 1 for V Cycle, 2 for W Cycle, 3 for VW cycle 40 | iter_cycles = maxium number for cycles 41 | eps = criteria to stop 42 | 43 | Returns: U 44 | +/ 45 | Slice!(T*, Dim) poisson_multigrid(T, size_t Dim)( 46 | Slice!(T*, Dim) F, 47 | Slice!(T*, Dim) U, 48 | uint level, 49 | uint mu, 50 | uint v1, 51 | uint v2, 52 | size_t iter_cycles, 53 | string sweep = "ndslice", 54 | T eps = 1e-6, 55 | T h = 0) 56 | { 57 | Cycle!(T, Dim) cycle; 58 | switch (sweep) 59 | { 60 | case "slice": 61 | cycle = new PoissonCycle!(T, Dim, SweepType.slice)(F, mu, level, h, v1, v2); 62 | break; 63 | case "naive": 64 | cycle = new PoissonCycle!(T, Dim, SweepType.naive)(F, mu, level, h, v1, v2); 65 | break; 66 | case "field": 67 | cycle = new PoissonCycle!(T, Dim, SweepType.field)(F, mu, level, h, v1, v2); 68 | break; 69 | default: 70 | cycle = new PoissonCycle!(T, Dim, SweepType.ndslice)(F, mu, level, h, v1, v2); 71 | } 72 | return multigrid!(T, Dim)(cycle, U, iter_cycles, eps); 73 | } 74 | 75 | unittest 76 | { 77 | 78 | import multid.tools.util : randomMatrix; 79 | import multid.gaussseidel.redblack : GS_RB; 80 | import mir.ndslice : slice; 81 | import std.experimental.logger : globalLogLevel, LogLevel; 82 | 83 | globalLogLevel(LogLevel.off); 84 | 85 | const size_t N = 50; 86 | immutable h = 1.0 / N; 87 | 88 | auto U = randomMatrix!(double, 2)(N); 89 | 90 | U[0][0 .. $] = 1.0; 91 | U[1 .. $, 0] = 1.0; 92 | U[$ - 1][1 .. $] = 0.0; 93 | U[1 .. $, $ - 1] = 0.0; 94 | 95 | auto F = slice!double([N, N], 0.0); 96 | F[0][0 .. $] = 1.0; 97 | F[1 .. $, 0] = 1.0; 98 | F[$ - 1][1 .. $] = 0.0; 99 | F[1 .. $, $ - 1] = 0.0; 100 | auto U1 = U.dup; 101 | poisson_multigrid(F, U, 0, 2, 2, 2, 100, "field", 1e-9); 102 | 103 | GS_RB(F, U1, h); 104 | 105 | import numir : approxEqual; 106 | 107 | assert(approxEqual(U, U1, 1e-8)); 108 | 109 | } 110 | -------------------------------------------------------------------------------- /D/source/multid/multigrid/prolongation.d: -------------------------------------------------------------------------------- 1 | module multid.multigrid.prolongation; 2 | 3 | import mir.functional: naryFun; 4 | import mir.math: fastmath; 5 | import mir.ndslice; 6 | import numir : approxEqual; 7 | 8 | /++ 9 | This is the implementation of a prolongation 10 | Params: 11 | e = the grid that needs to be prolongated 12 | fine_shape = the shape of the returned grid 13 | Returns: the finer grid with interpolated values in between 14 | +/ 15 | 16 | Slice!(T*, Dim) prolongation(T, size_t Dim)(Slice!(const(T)*, Dim) e, size_t[Dim] fine_shape) 17 | { 18 | auto w = slice!T(fine_shape); 19 | prolongation(w, e); 20 | return w; 21 | } 22 | 23 | @nogc @fastmath 24 | void prolongation(IteratorA, IteratorB, size_t N, SliceKind aKind, SliceKind bKind)(Slice!(IteratorA, N, aKind) a, Slice!(IteratorB, N, bKind) b) 25 | { 26 | static if (N == 1) 27 | { 28 | alias expand = naryFun!"a = b"; 29 | alias apply = naryFun!"b = a / 2"; 30 | } 31 | else 32 | { 33 | alias expand = prolongation; 34 | alias apply = each!"b = a / 2"; 35 | } 36 | 37 | import mir.functional: reverseArgs; 38 | import multid.multigrid.restriction; 39 | restriction!(reverseArgs!expand)(b.byDim!0, a.byDim!0); 40 | each!apply(a.byDim!0.slide!(3, "a + c").stride, a.byDim!0[1 .. $ - 1].stride); 41 | } 42 | 43 | // Tests 1D 44 | unittest 45 | { 46 | import mir.ndslice : iota, sliced; 47 | 48 | auto a = [0, 2, 4, 6, 8].sliced!double; 49 | auto correct = 9.iota.slice; 50 | auto ret = prolongation!(double, 1)(a, correct.shape); 51 | assert(ret == correct); 52 | 53 | auto a2 = [0, 2, 4, 6, 8, 9].sliced!long; 54 | auto correct2 = 10.iota.slice; 55 | auto ret2 = prolongation!(long, 1)(a2, correct2.shape); 56 | assert(ret2 == correct2); 57 | 58 | auto a3 = [0, 2, 4, 6, 7].sliced!long; 59 | auto correct3 = 8.iota.slice; 60 | auto ret3 = prolongation!(long, 1)(a3, correct3.shape); 61 | assert(ret3 == correct3); 62 | 63 | } 64 | 65 | // Tests 2D 66 | unittest 67 | { 68 | import mir.ndslice : iota, sliced; 69 | 70 | auto arr = [ 71 | 0., 2., 4., 6., 8., 72 | 18., 20., 22., 24., 26., 73 | 36., 38., 40., 42., 44., 74 | 54., 56., 58., 60., 62., 75 | 72., 74., 76., 78., 80. 76 | ].sliced(5, 5); 77 | 78 | auto correct = iota([9, 9]).slice; 79 | auto ret = prolongation!(double, 2)(arr, correct.shape); 80 | assert(ret == correct); 81 | auto arr2 = [0., 2., 4., 6., 7., 16., 18., 20., 22., 23., 32., 34., 36., 82 | 38., 39., 48., 50., 52., 54., 55., 56., 58., 60., 62., 63.].sliced(5, 5); 83 | auto correct2 = iota([8, 8]).slice; 84 | auto ret2 = prolongation!(double, 2)(arr2, correct2.shape); 85 | assert(ret2 == correct2); 86 | } 87 | 88 | unittest 89 | { 90 | import mir.ndslice : fuse; 91 | 92 | auto arr = [ 93 | [0.70986027, 0.05107005, 0.36803441, 0.91042483], 94 | [0.18354898, 0.5568611, 0.94596048, 0.99127882], 95 | [0.63025087, 0.33234683, 0.65401546, 0.98237209], 96 | [0.66271802, 0.48028311, 0.79653074, 0.18756112] 97 | ].fuse; 98 | auto correct6 = [ 99 | [0.70986027, 0.38046516, 0.05107005, 0.20955223, 0.36803441, 0.91042483], 100 | [0.44670463, 0.3753351, 0.30396557, 0.48048151, 0.65699745, 0.95085182], 101 | [0.18354898, 0.37020504, 0.5568611, 0.75141079, 0.94596048, 0.99127882], 102 | [0.40689993, 0.42575195, 0.44460397, 0.62229597, 0.79998797, 0.98682545], 103 | [0.63025087, 0.48129885, 0.33234683, 0.49318115, 0.65401546, 0.98237209], 104 | [0.66271802, 0.57150057, 0.48028311, 0.63840692, 0.79653074, 0.18756112] 105 | ].fuse; 106 | auto correct7 = [ 107 | [0.70986027, 0.38046516, 0.05107005, 0.20955223, 0.36803441, 0.63922962, 0.91042483], 108 | [0.44670463, 0.3753351, 0.30396557, 0.48048151, 0.65699745, 0.80392463, 0.95085182], 109 | [0.18354898, 0.37020504, 0.5568611, 0.75141079, 0.94596048, 0.96861965, 0.99127882], 110 | [0.40689993, 0.42575195, 0.44460397, 0.62229597, 0.79998797, 0.89340671, 0.98682545], 111 | [0.63025087, 0.48129885, 0.33234683, 0.49318115, 0.65401546, 0.81819377, 0.98237209], 112 | [0.64648445, 0.52639971, 0.40631497, 0.56579404, 0.7252731, 0.65511985, 0.5849666], 113 | [0.66271802, 0.57150057, 0.48028311, 0.63840692, 0.79653074, 0.49204593, 0.18756112] 114 | ].fuse; 115 | auto ret6 = prolongation!(double, 2)(arr, [6, 6]); 116 | auto ret7 = prolongation!(double, 2)(arr, [7, 7]); 117 | 118 | assert(approxEqual(ret6, correct6, 1e-2, 1e-8)); 119 | assert(approxEqual(ret7, correct7, 1e-2, 1e-8)); 120 | } 121 | 122 | unittest 123 | { 124 | import multid.tools.util : randomMatrix; 125 | 126 | import mir.ndslice : strided; 127 | 128 | immutable size_t N = 4; 129 | 130 | auto A = randomMatrix!(double, 2)(N); 131 | 132 | auto ret6 = prolongation!(double, 2)(A, [6, 6]); 133 | auto ret7 = prolongation!(double, 2)(A, [7, 7]); 134 | 135 | assert(ret6[0, 0 .. $].stride == A[0, 0 .. $ - 1]); 136 | assert(ret6[0 .. $, 0].stride == A[0 .. $ - 1, 0]); 137 | assert(ret6[$ - 2 .. $, 0 .. $].strided!1(2) == A[$ - 2 .. $, 0 .. $ - 1]); 138 | assert(ret6[0 .. $, $ - 2 .. $].strided!0(2) == A[0 .. $ - 1, $ - 2 .. $]); 139 | assert(ret6[$ - 2 .. $, $ - 2 .. $] == A[$ - 2 .. $, $ - 2 .. $]); 140 | 141 | assert(ret7[0, 0 .. $].stride == A[0, 0 .. $]); 142 | assert(ret7[0 .. $, 0].stride == A[0 .. $, 0]); 143 | assert(ret7[$ - 1 .. $, 0 .. $].strided!1(2) == A[$ - 1 .. $, 0 .. $]); 144 | assert(ret7[0 .. $, $ - 1 .. $].strided!0(2) == A[0 .. $, $ - 1 .. $]); 145 | } 146 | 147 | unittest 148 | { 149 | import mir.ndslice : iota, sliced; 150 | import std.stdio : writeln; 151 | 152 | auto A = [ 153 | 0., 2., 4., 6., 154 | 14., 16., 18., 20., 155 | 28., 30., 32., 34., 156 | 42., 44., 46., 48., 157 | 98., 100., 102., 104., 158 | 112., 114., 116., 118., 159 | 126., 128., 130., 132., 160 | 140., 142., 144., 146., 161 | 196., 198., 200., 202., 162 | 210., 212., 214., 216., 163 | 224., 226., 228., 230., 164 | 238., 240., 242., 244., 165 | 294., 296., 298., 300., 166 | 308., 310., 312., 314., 167 | 322., 324., 326., 328., 168 | 336., 338., 340., 342. 169 | ].sliced(4, 4, 4); 170 | auto correct = iota([7, 7, 7]).slice; 171 | auto B = prolongation!(double, 3)(A, [7, 7, 7]); 172 | assert(correct == B); 173 | } 174 | 175 | unittest 176 | { 177 | import mir.ndslice : iota, sliced; 178 | import std.stdio : writeln; 179 | 180 | auto A = [ 181 | 0., 2., 4., 5., 182 | 12., 14., 16., 17., 183 | 24., 26., 28., 29., 184 | 30., 32., 34., 35., 185 | 72., 74., 76., 77., 186 | 84., 86., 88., 89., 187 | 96., 98., 100., 101., 188 | 102., 104., 106., 107., 189 | 144., 146., 148., 149., 190 | 156., 158., 160., 161., 191 | 168., 170., 172., 173., 192 | 174., 176., 178., 179., 193 | 180., 182., 184., 185., 194 | 192., 194., 196., 197., 195 | 204., 206., 208., 209., 196 | 210., 212., 214., 215. 197 | ].sliced(4, 4, 4); 198 | auto correct = iota([6, 6, 6]).slice; 199 | auto B = prolongation!(double, 3)(A, [6, 6, 6]); 200 | assert(correct == B); 201 | } 202 | -------------------------------------------------------------------------------- /D/source/multid/tools/apply_poisson.d: -------------------------------------------------------------------------------- 1 | module multid.tools.apply_poisson; 2 | 3 | import mir.math: fastmath; 4 | import mir.ndslice; 5 | 6 | /++ 7 | Calculates the A * U, where A is a poisson matrix 8 | 9 | Params: 10 | U = Dim-array 11 | h = distance between grid points 12 | Returns: x = A*U 13 | +/ 14 | Slice!(T*, Dim) apply_poisson(T, size_t Dim)(Slice!(const(T)*, Dim) U, const T h) 15 | { 16 | auto x = U.shape.slice!T; 17 | apply_poisson(x, U, h); 18 | return x; 19 | } 20 | 21 | @nogc @fastmath 22 | void apply_poisson(T, size_t Dim)(Slice!(T*, Dim) x, Slice!(const(T)*, Dim) U, const T h) 23 | { 24 | assumeSameShape(x, U); 25 | eachOnBorder!"a = b"(x, U); 26 | x.dropBorders[] = (1 / (h * h)) * U.withNeighboursSum.map!((u, sum) => sum - 2 * Dim * u); 27 | } 28 | 29 | /++ 30 | Computes F - AU were A is the poisson matrix 31 | +/ 32 | @nogc @fastmath 33 | void compute_residual(T, size_t Dim)(Slice!(T*, Dim) R, Slice!(const(T)*, Dim) F, Slice!(const(T)*, Dim) U, const T current_h) 34 | { 35 | assumeSameShape(U, R, F); 36 | // performs 37 | // apply_poisson(R, U, current_h); 38 | // R[] = F - R 39 | // in a single memory access 40 | R.dropBorders[] = ((1 / current_h ^^ 2) * U.withNeighboursSum.map!((u, sum) => sum - 2 * Dim * u)).zip!true(F.dropBorders).map!"b - a"; 41 | eachOnBorder!"a = b - c"(R, F, U); 42 | } 43 | 44 | Slice!(T*, Dim) compute_residual(T, size_t Dim)(Slice!(const(T)*, Dim) F, Slice!(const(T)*, Dim) U, const T current_h) 45 | { 46 | auto AU = U.shape.slice!T; 47 | assert(AU.shape == F.shape); 48 | compute_residual(AU, F, U, current_h); 49 | return AU; 50 | } 51 | 52 | unittest 53 | { 54 | import mir.algorithm.iteration: all; 55 | import mir.math.common: approxEqual; 56 | import multid.tools.util : randomMatrix; 57 | 58 | const size_t N = 100; 59 | immutable auto h = 1.0 / double(N); 60 | 61 | auto U = N.randomMatrix!(double, 1); 62 | 63 | auto x = U.dup; 64 | for (size_t i = 1; i < U.shape[0] - 1; i++) 65 | { 66 | x[i] = (-2.0 * U[i] + U[i - 1] + U[i + 1]) / (h * h); 67 | } 68 | 69 | auto x1 = apply_poisson(U, h); 70 | assert(all!approxEqual(x, x1)); 71 | } 72 | 73 | unittest 74 | { 75 | import mir.algorithm.iteration: all; 76 | import mir.math.common: approxEqual; 77 | import multid.tools.util : randomMatrix; 78 | 79 | const size_t N = 100; 80 | immutable auto h = 1.0 / double(N); 81 | 82 | auto U = N.randomMatrix!(double, 2); 83 | 84 | immutable m = U.shape[0]; 85 | immutable n = U.shape[1]; 86 | auto x = U.dup; 87 | 88 | for (size_t i = 1; i < m - 1; i++) 89 | { 90 | for (size_t j = 1; j < n - 1; j++) 91 | { 92 | x[i, j] = (-4.0 * U[i, j] 93 | + U[i - 1, j] 94 | + U[i + 1, j] 95 | + U[i, j - 1] 96 | + U[i, j + 1]) / (h * h); 97 | } 98 | } 99 | 100 | auto x1 = apply_poisson(U, h); 101 | assert(all!approxEqual(x, x1)); 102 | } 103 | 104 | 105 | unittest 106 | { 107 | import mir.algorithm.iteration: all; 108 | import mir.math.common: approxEqual; 109 | import multid.tools.util : randomMatrix; 110 | 111 | const size_t N = 100; 112 | immutable auto h = 1.0 / double(N); 113 | 114 | auto U = N.randomMatrix!(double, 3); 115 | 116 | auto x = U.dup; 117 | for (size_t i = 1; i < U.shape[0] - 1; i++) 118 | { 119 | for (size_t j = 1; j < U.shape[1] - 1; j++) 120 | { 121 | for (size_t k = 1; k < U.shape[2] - 1; k++) 122 | { 123 | x[i, j, k] = (-6.0 * 124 | U[i, j, k] + 125 | U[i - 1, j, k] + 126 | U[i + 1, j, k] + 127 | U[i, j - 1, k] + 128 | U[i, j + 1, k] + 129 | U[i, j, k - 1] + 130 | U[i, j, k + 1]) / (h * h); 131 | } 132 | } 133 | } 134 | 135 | auto x1 = apply_poisson(U, h); 136 | assert(all!approxEqual(x, x1)); 137 | } 138 | -------------------------------------------------------------------------------- /D/source/multid/tools/norm.d: -------------------------------------------------------------------------------- 1 | module multid.tools.norm; 2 | 3 | import mir.math : sqrt, fastmath; 4 | import mir.ndslice : Slice, sliced; 5 | 6 | /++ 7 | Computes the L2 norm 8 | +/ 9 | @fastmath 10 | T nrmL2(T, size_t Dim)(Slice!(T*, Dim) v) 11 | { 12 | T s = 0.0; 13 | foreach (x; v.field) 14 | { 15 | s += x * x; 16 | } 17 | return s.sqrt; 18 | } 19 | 20 | unittest 21 | { 22 | assert([1, 2, 3, 4].sliced!double.nrmL2 == 30.0.sqrt); 23 | assert([1, 1].sliced!double.nrmL2 == 2.0.sqrt); 24 | assert([1, 1, 1, 1].sliced!double.nrmL2 == 2.0); 25 | } 26 | -------------------------------------------------------------------------------- /D/source/multid/tools/util.d: -------------------------------------------------------------------------------- 1 | module multid.tools.util; 2 | 3 | import mir.ndslice.slice: Slice; 4 | 5 | /++ Timer Template +/ 6 | template Timer() 7 | { 8 | import std.datetime.stopwatch; 9 | import std.stdio : writeln; 10 | 11 | StopWatch sw; 12 | 13 | void start() 14 | { 15 | sw.reset; 16 | sw.start; 17 | } 18 | 19 | void stop(string text) 20 | { 21 | sw.stop; 22 | writeln(text, " ", sw.peek.total!"msecs"); 23 | } 24 | } 25 | 26 | /++ Generator for random matrix with dimension Dim and dimension size N +/ 27 | Slice!(T*, Dim) randomMatrix(T, size_t Dim)(size_t N) 28 | { 29 | import mir.random.algorithm: randomSlice; 30 | import mir.random.variable: uniformVar; 31 | 32 | size_t[Dim] shape = N; 33 | return uniformVar!T(0, 1).randomSlice(shape); 34 | } 35 | -------------------------------------------------------------------------------- /D/source/scripts.d: -------------------------------------------------------------------------------- 1 | import loadproblem; 2 | import mir.ndslice; 3 | import multid.gaussseidel.redblack; 4 | import multid.multigrid.cycle; 5 | import multid.multigrid.multigrid; 6 | import multid.multigrid.restriction; 7 | import multid.tools.util; 8 | // import pretty_array; 9 | import std.datetime.stopwatch : StopWatch; 10 | import std.stdio; 11 | 12 | /++ 13 | This performs a GS_RB run for 3D 14 | +/ 15 | void test3D() 16 | { 17 | 18 | immutable size_t N = 50; 19 | auto U = N.randomMatrix!(double, 3); 20 | U[0, 0 .. $, 0 .. $] = 1.0; 21 | U[0 .. $, 0, 0 .. $] = 1.0; 22 | U[0 .. $, 0 .. $, 0] = 1.0; 23 | U[$ - 1, 0 .. $, 0 .. $] = 0.0; 24 | U[1 .. $, $ - 1, 1 .. $] = 0.0; 25 | U[1 .. $, 1 .. $, $ - 1] = 0.0; 26 | 27 | auto F = slice!double([N, N, N], 0.0); 28 | const double h = 1.0 / double(N); 29 | 30 | GS_RB(F, U, h); 31 | // U.prettyArr.writeln; 32 | 33 | } 34 | 35 | /++ 36 | This performs a GS_RB run for 2D 37 | +/ 38 | void test2D() 39 | { 40 | 41 | immutable size_t N = 200; 42 | auto U = N.randomMatrix!(double, 2); 43 | U[0][0 .. $] = 1.0; 44 | U[1 .. $, 0] = 1.0; 45 | U[$ - 1][1 .. $] = 0.0; 46 | U[1 .. $, $ - 1] = 0.0; 47 | 48 | auto F = slice!double([N, N], 0.0); 49 | const double h = 1.0 / double(N); 50 | 51 | GS_RB(F, U, h); 52 | // U.prettyArr.writeln; 53 | 54 | } 55 | 56 | /++ 57 | This performs a GS_RB run for 1D 58 | +/ 59 | void test1D() 60 | { 61 | 62 | immutable size_t N = 1000; 63 | auto U = N.randomMatrix!(double, 1); 64 | U[0] = 1.0; 65 | U[$ - 1] = 0.0; 66 | 67 | auto F = slice!double([N], 0.0); 68 | const double h = 1.0 / double(N); 69 | 70 | GS_RB(F, U, h, 30_000); 71 | // U.prettyArr.writeln; 72 | 73 | } 74 | 75 | /++ 76 | This performs multigrid for 2D 77 | +/ 78 | void testMG2D() 79 | { 80 | immutable size_t N = 1000; 81 | auto U = N.randomMatrix!(double, 2); 82 | U[0][0 .. $] = 1.0; 83 | U[1 .. $, 0] = 1.0; 84 | U[$ - 1][1 .. $] = 0.0; 85 | U[1 .. $, $ - 1] = 0.0; 86 | 87 | auto F = slice!double([N, N], 0.0); 88 | F[0][0 .. $] = 1.0; 89 | F[1 .. $, 0] = 1.0; 90 | F[$ - 1][1 .. $] = 0.0; 91 | F[1 .. $, $ - 1] = 0.0; 92 | 93 | U = poisson_multigrid(F, U, 0, 2, 2, 2, 100); 94 | 95 | //U.prettyArr.writeln; 96 | } 97 | -------------------------------------------------------------------------------- /D/source/startup.d: -------------------------------------------------------------------------------- 1 | module startup; 2 | 3 | 4 | template init() 5 | { 6 | import core.thread : Thread; 7 | import mir.conv : to; 8 | import std.datetime.stopwatch : StopWatch, msecs; 9 | import std.experimental.logger : infof, globalLogLevel, LogLevel; 10 | import std.getopt : getopt; 11 | 12 | StopWatch sw; 13 | 14 | uint delay = 500; 15 | bool verbose = false; 16 | string path = default_path; 17 | string sweep = "ndslice"; 18 | immutable string default_path = "../problems/problem_2D_100.npy"; 19 | 20 | void start() 21 | { 22 | sw.reset; 23 | sw.start; 24 | globalLogLevel(LogLevel.info); 25 | 26 | } 27 | 28 | void wait_till() 29 | { 30 | if (verbose) 31 | { 32 | globalLogLevel(LogLevel.all); 33 | } 34 | auto rest = delay - sw.peek.total!"msecs"; 35 | if (0 < rest) 36 | { 37 | Thread.sleep(msecs(rest)); 38 | } 39 | sw.stop; 40 | sw.reset; 41 | sw.start; 42 | } 43 | 44 | void getopt(string[] argv) 45 | { 46 | getopt(argv, "p|P", &path, "d|D", &delay, "v", &verbose, "s", &sweep); 47 | 48 | } 49 | 50 | void print_time() 51 | { 52 | sw.stop; 53 | infof("%e", sw.peek 54 | .total!"usecs" 55 | .to!double / 1_000_000.0); 56 | 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /Python/benchmark_gsrb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | import logging 5 | 6 | from startup import DEFAULT_PROBLEM, getopts, wait 7 | 8 | from multipy.tools.util import load_problem, timer 9 | 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | 14 | def main(): 15 | options = getopts() 16 | 17 | U, F = load_problem(options.path) 18 | # warm up with the smaller problem so it doesnt take to long for big 19 | # problems 20 | U1, F1 = load_problem(DEFAULT_PROBLEM) 21 | from multipy.GaussSeidel.GaussSeidel_RB import GS_RB 22 | 23 | GS_RB(F1, U1, h=1, max_iter=2, eps=1e-8, norm_iter=10) 24 | 25 | if options.verbose: 26 | logging.getLogger('multipy.GaussSeidel.GaussSeidel_RB').setLevel( 27 | level=logging.DEBUG) 28 | else: 29 | logging.getLogger('multipy.GaussSeidel.GaussSeidel_RB').setLevel( 30 | level=logging.INFO) 31 | 32 | wait(options) 33 | start = time.perf_counter() 34 | GS_RB( 35 | F, 36 | U, 37 | h=1, 38 | max_iter=5_000, 39 | eps=1e-8, 40 | norm_iter=5_010) 41 | 42 | logging.info(time.perf_counter() - start) 43 | 44 | 45 | if __name__ == '__main__': 46 | main() 47 | -------------------------------------------------------------------------------- /Python/benchmark_multigrid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from startup import DEFAULT_PROBLEM, getopts, wait 4 | import logging 5 | import time 6 | from multipy.tools.util import load_problem, timer 7 | 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | 12 | def main(): 13 | options = getopts() 14 | 15 | U, F = load_problem(options.path) 16 | # warm up with the smaller problem so it doesnt take to long for big 17 | # problems 18 | U1, F1 = load_problem(DEFAULT_PROBLEM) 19 | from multipy.multigrid import poisson_multigrid 20 | poisson_multigrid(F1, U1, 0, 1, 1, 1, 1) 21 | 22 | if options.verbose: 23 | logging.getLogger('multipy.multigrid').setLevel(level=logging.DEBUG) 24 | else: 25 | logging.getLogger('multipy.multigrid').setLevel(level=logging.INFO) 26 | 27 | wait(options) 28 | 29 | start = time.perf_counter() 30 | poisson_multigrid(F, U, 0, 2, 2, 1, 100) 31 | 32 | logging.info(time.perf_counter() - start) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /Python/create_gif.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import matplotlib.animation as animation 6 | 7 | from multipy.multigrid import PoissonCycle 8 | from multipy.tools.util import load_problem 9 | 10 | 11 | DEFAULT_FILE = '../problems/problem_2D_1024.npy' 12 | 13 | 14 | def main(): 15 | U, F = load_problem(DEFAULT_FILE) 16 | N = U.shape[0] 17 | cycle = PoissonCycle(F, 1, 0, 1, 0) 18 | ims = [] 19 | fig = plt.figure() 20 | ax = fig.gca(projection="3d") 21 | x = np.linspace(0, 1, 1024) 22 | X, Y = np.meshgrid(x, x) 23 | for i in range(100): 24 | #im = plt.imshow(U, cmap='RdBu_r', interpolation='nearest') 25 | im = ax.plot_surface( 26 | X[1: -1, 1: -1], 27 | Y[1: -1, 1: -1], 28 | U[1: -1, 1: -1], 29 | alpha=0.7, cmap='magma') 30 | t = ax.annotate(i, (0.1, 0.1), xycoords='figure fraction') 31 | norm = cycle.norm(U) 32 | n = ax.annotate(norm, (0.7, 0.1), xycoords='figure fraction') 33 | ims.append([im, t, n]) 34 | 35 | if norm <= 1e-6 * N * N: 36 | break 37 | U = cycle(U) 38 | 39 | fig.colorbar(ims[0][0], shrink=0.5, aspect=10, pad=0.1) 40 | ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat=True) 41 | writer = animation.PillowWriter(fps=2) 42 | ani.save('../graphs/wave.gif', writer=writer) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /Python/draw.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import os 4 | 5 | 6 | def plot_multiple_flops_sec(str_startswith): 7 | for file in os.listdir("results"): 8 | if file.startswith(str_startswith): 9 | draw_flops_sec(*read_file(f"results/{file}")) 10 | plt.legend() 11 | plt.minorticks_on() 12 | plt.grid(color='b', linestyle='-', linewidth=0.2, alpha=0.5) 13 | 14 | 15 | def plot_multiple_flops(str_startswith): 16 | for file in os.listdir("results"): 17 | if file.startswith(str_startswith): 18 | draw_flops(*read_file(f"results/{file}")) 19 | plt.legend() 20 | plt.minorticks_on() 21 | plt.grid(color='b', linestyle='-', linewidth=0.2, alpha=0.5) 22 | 23 | 24 | def plot_multiple_times(str_startswith): 25 | for file in os.listdir("results"): 26 | if file.startswith(str_startswith): 27 | draw_time(*read_file(f"results/{file}")) 28 | plt.legend() 29 | plt.minorticks_on() 30 | plt.grid(color='b', linestyle='-', linewidth=0.2, alpha=0.5) 31 | 32 | 33 | def read_file(filepath): 34 | ret = [[], [], []] 35 | with open(filepath, 'r') as target: 36 | lines = target.readlines() 37 | infos = "" 38 | if lines[0].startswith('##'): 39 | line = lines.pop(0) 40 | line = lines.pop(0) 41 | while not line.startswith('##'): 42 | infos += line 43 | line = lines.pop(0) 44 | 45 | for line in lines: 46 | if line.count(':') == 1: 47 | N, time = line.split(':') 48 | ret[0].append(float(N)) 49 | ret[1].append(float(time)) 50 | ret[2].append(float(time)) 51 | else: 52 | N, *flop, time, _ = line.split(':') 53 | ret[0].append(float(N)) 54 | ret[1].append(sum([float(x.replace(',', '')) for x in flop])) 55 | ret[2].append(float(time)) 56 | # this assures that we have that sorted by N 57 | str_legend = " ".join(os.path.basename(filepath).split('_')[-3:]) 58 | return list(zip(*sorted(zip(*ret), key=lambda x: x[0]))), str_legend 59 | 60 | 61 | def draw_flops_sec(input_data, str_legend): 62 | print(str_legend) 63 | sizes, flops_sec, _, _ = avg_reduce(input_data) 64 | plt.plot(sizes, flops_sec, label=str_legend) 65 | plt.ylabel("Flops/sec") 66 | plt.xlabel("Problem size") 67 | 68 | 69 | def draw_flops(input_data, str_legend): 70 | print(str_legend) 71 | sizes, _, _, flops = avg_reduce(input_data) 72 | plt.plot(sizes, flops, label=str_legend) 73 | plt.ylabel("Flops") 74 | plt.xlabel("Problem size") 75 | 76 | 77 | def draw_time(input_data, str_legend): 78 | print(str_legend) 79 | sizes, _, sec, _ = avg_reduce(input_data) 80 | plt.plot(sizes, sec, label=str_legend) 81 | plt.ylabel("Time in s") 82 | plt.xlabel("Problem size") 83 | 84 | 85 | def avg_reduce(input_data): 86 | dict = {} 87 | for n, flops, time in zip(*input_data): 88 | if n not in dict: 89 | dict[n] = [] 90 | dict[n].append((flops, time)) 91 | sizes, flops_sec, sec, flops = [], [], [], [] 92 | for key, value in dict.items(): 93 | sizes.append(key) 94 | flops_sec.append( 95 | sum([flop / time for flop, time in value]) / len(value)) 96 | sec.append(sum([time for _, time in value]) / len(value)) 97 | flops.append(sum([flops for flops, _ in value]) / len(value)) 98 | return sizes, flops_sec, sec, flops 99 | -------------------------------------------------------------------------------- /Python/multipy/GaussSeidel/GaussSeidel.py: -------------------------------------------------------------------------------- 1 | #!/bin/usr/env python3 2 | import numpy as np 3 | 4 | 5 | def gauss_seidel(A, F, U=None, eps=1e-10, max_iter=1_000_000): 6 | """Implementation of Gauss Seidl iterations 7 | should solve AU = F 8 | @param A n x m Matrix 9 | @param F n vector 10 | @return n vector 11 | """ 12 | n, *_ = A.shape 13 | if U is None: 14 | U = np.zeros_like(F) 15 | 16 | for _ in range(max_iter): 17 | U_next = np.zeros_like(U) 18 | for i in range(n): 19 | left = np.dot(A[i, :i], U_next[:i]) 20 | right = np.dot(A[i, i + 1:], U[i + 1:]) 21 | U_next[i] = (F[i] - left - right) / (A[i, i]) 22 | 23 | U = U_next 24 | if np.linalg.norm(F - A @ U) < eps: 25 | break 26 | 27 | return U 28 | -------------------------------------------------------------------------------- /Python/multipy/GaussSeidel/GaussSeidel_RB.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | from numba import jit 5 | 6 | from ..tools.apply_poisson import apply_poisson 7 | from ..tools.util import timer 8 | 9 | logger = logging.getLogger(__name__) 10 | logger.setLevel(logging.INFO) 11 | 12 | 13 | def GS_RB( 14 | F, 15 | U=None, 16 | h=None, 17 | max_iter=10_000_000, 18 | eps=1e-8, 19 | norm_iter=1000, 20 | ): 21 | """ 22 | Solve AU = F, the poisson equation. 23 | 24 | @param F n vector 25 | @param h is distance between grid points | default is 1/N 26 | @return U n vector 27 | """ 28 | if U is None: 29 | U = np.zeros_like(F) 30 | if h is None: 31 | h = 1 / (U.shape[0]) 32 | 33 | h2 = h * h 34 | 35 | if len(F.shape) == 1: 36 | # do the sweep 37 | sweep = sweep_1D 38 | elif len(F.shape) == 2: 39 | # do the sweep 40 | sweep = sweep_2D 41 | elif len(F.shape) == 3: 42 | # Anzahl an Gauss-Seidel-Iterationen ausfuehren 43 | sweep = sweep_3D 44 | else: 45 | raise ValueError("Wrong Shape!!!") 46 | 47 | # a dirty hack that improves the speed, 48 | # maybe it is related that this memory is later reused in the sweeps 49 | # for the allocation of the lhs 50 | np.zeros_like(U) 51 | norm = 0.0 # declarate norm so we can output later 52 | it = 0 53 | # Anzahl an Gauss-Seidel-Iterationen ausfuehren 54 | while it < max_iter: 55 | it += 1 56 | # check sometimes if solutions converges 57 | if it % norm_iter == 0: 58 | norm = np.linalg.norm(F - apply_poisson(U, h)) 59 | if norm <= eps: 60 | break 61 | 62 | # rote Halbiteration 63 | sweep(1, F, U, h2) 64 | # schwarze Halbiteration 65 | sweep(0, F, U, h2) 66 | 67 | logger.debug(f"converged after {it} iterations with {norm:.4} error") 68 | 69 | return U 70 | 71 | 72 | # --- 1D Fall --- 73 | @jit(nopython=True, fastmath=True) 74 | def sweep_1D(color, F, U, h2): 75 | """ 76 | Do the sweeps. 77 | 78 | @param color 1 = red 0 for black 79 | @param h2 is distance between grid points squared 80 | """ 81 | n = F.shape[0] 82 | U[2 - color:n - 1:2] = (U[1 - color:n - 2:2] + U[3 - color::2] - 83 | F[2 - color:n - 1:2] * h2) / (2.0) 84 | 85 | # ---------------- 86 | 87 | 88 | # --- 2D Fall --- 89 | @jit(nopython=True, fastmath=True) 90 | def sweep_2D(color, F, U, h2): 91 | """ 92 | Do the sweeps. 93 | 94 | @param color 1 = red 0 for black 95 | @param h2 is distance between grid points squared 96 | """ 97 | m, n = F.shape 98 | 99 | U[1:m - 1:2, 1 + color:n - 1:2] = ( 100 | U[0:m - 2:2, 1 + color:n - 1:2] + 101 | U[2::2, 1 + color:n - 1:2] + 102 | U[1:m - 1:2, color:n - 2:2] + 103 | U[1:m - 1:2, 2 + color::2] - 104 | F[1:m - 1:2, 1 + color:n - 1:2] * h2) / (4.0) 105 | U[2:m - 1:2, 2 - color:n - 1:2] = ( 106 | U[1:m - 2:2, 2 - color:n - 1:2] + 107 | U[3::2, 2 - color:n - 1:2] + 108 | U[2:m - 1:2, 1 - color:n - 2:2] + 109 | U[2:m - 1:2, 3 - color::2] - 110 | F[2:m - 1:2, 2 - color:n - 1:2] * h2) / (4.0) 111 | 112 | # ---------------- 113 | 114 | 115 | # --- 3D Fall --- 116 | @jit(nopython=True, fastmath=True) 117 | def sweep_3D(color, F, U, h2): 118 | """ 119 | Do the sweeps. 120 | 121 | @param color 1 = red 0 for black 122 | @param h is distance between grid points 123 | """ 124 | m, n, o = F.shape 125 | 126 | U[2:m - 1:2, 1:n - 1:2, 1 + color:o - 1:2] = ( 127 | U[1:m - 2:2, 1:n - 1:2, 1 + color:o - 1:2] + 128 | U[3:m:2, 1:n - 1:2, 1 + color:o - 1:2] + 129 | U[2:m - 1:2, 0:n - 2:2, 1 + color:o - 1:2] + 130 | U[2:m - 1:2, 2:n:2, 1 + color:o - 1:2] + 131 | U[2:m - 1:2, 1:n - 1:2, color:o - 2:2] + 132 | U[2:m - 1:2, 1:n - 1:2, 2 + color:o:2] - 133 | F[2:m - 1:2, 1:n - 1:2, 1 + color:o - 1:2] * h2) / (6.0) 134 | 135 | U[1:m - 1:2, 1:n - 1:2, 2 - color:o - 1:2] = ( 136 | U[0:m - 2:2, 1:n - 1:2, 2 - color:o - 1:2] + 137 | U[2:m:2, 1:n - 1:2, 2 - color:o - 1:2] + 138 | U[1:m - 1:2, 0:n - 2:2, 2 - color:o - 1:2] + 139 | U[1:m - 1:2, 2:n:2, 2 - color:o - 1:2] + 140 | U[1:m - 1:2, 1:n - 1:2, 1 - color:o - 2:2] + 141 | U[1:m - 1:2, 1:n - 1:2, 3 - color:o:2] - 142 | F[1:m - 1:2, 1:n - 1:2, 2 - color:o - 1:2] * h2) / (6.0) 143 | 144 | U[1:m - 1:2, 2:n - 1:2, 1 + color:o - 1:2] = ( 145 | U[0:m - 2:2, 2:n - 1:2, 1 + color:o - 1:2] + 146 | U[2:m:2, 2:n - 1:2, 1 + color:o - 1:2] + 147 | U[1:m - 1:2, 1:n - 2:2, 1 + color:o - 1:2] + 148 | U[1:m - 1:2, 3:n:2, 1 + color:o - 1:2] + 149 | U[1:m - 1:2, 2:n - 1:2, color:o - 2:2] + 150 | U[1:m - 1:2, 2:n - 1:2, 2 + color:o:2] - 151 | F[1:m - 1:2, 2:n - 1:2, 1 + color:o - 1:2] * h2) / (6.0) 152 | 153 | U[2:m - 1:2, 2:n - 1:2, 2 - color:o - 1:2] = ( 154 | U[1:m - 2:2, 2:n - 1:2, 2 - color:o - 1:2] + 155 | U[3:m:2, 2:n - 1:2, 2 - color:o - 1:2] + 156 | U[2:m - 1:2, 1:n - 2:2, 2 - color:o - 1:2] + 157 | U[2:m - 1:2, 3:n:2, 2 - color:o - 1:2] + 158 | U[2:m - 1:2, 2:n - 1:2, 1 - color:o - 2:2] + 159 | U[2:m - 1:2, 2:n - 1:2, 3 - color:o:2] - 160 | F[2:m - 1:2, 2:n - 1:2, 2 - color:o - 1:2] * h2) / (6.0) 161 | 162 | # ---------------- 163 | -------------------------------------------------------------------------------- /Python/multipy/GaussSeidel/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["GaussSeidel", "GaussSeidel_RB"] 2 | -------------------------------------------------------------------------------- /Python/multipy/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["GaussSeidel", "multigrid", "tools"] 2 | -------------------------------------------------------------------------------- /Python/multipy/multigrid/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | 5 | from .cycle import PoissonCycle 6 | from .prolongation import prolongation 7 | from .restriction import restriction, weighted_restriction 8 | 9 | logger = logging.getLogger(__name__) 10 | logger.setLevel(logging.INFO) 11 | 12 | 13 | def poisson_multigrid(F, U, l, v1, v2, mu, iter_cycle, eps=1e-6, h=None): 14 | """Implementation of MultiGrid iterations 15 | should solve AU = F 16 | A is poisson equation 17 | @param U n x n Matrix 18 | @param F n x n Matrix 19 | @param v1 Gauss Seidel iterations in pre smoothing 20 | @param v2 Gauss Seidel iterations in post smoothing 21 | @param mu iterations for recursive call 22 | @return x n vector 23 | """ 24 | 25 | cycle = PoissonCycle(F, v1, v2, mu, l, eps, h) 26 | return multigrid(cycle, U, eps, iter_cycle) 27 | 28 | 29 | def multigrid(cycle, U, eps, iter_cycle): 30 | 31 | # scale the epsilon with the number of gridpoints 32 | eps *= U.shape[0] * U.shape[0] 33 | for i in range(1, iter_cycle + 1): 34 | U = cycle(U) 35 | norm = cycle.norm(U) 36 | logger.debug(f"Residual has a L2-Norm of {norm:.4} after {i} MGcycle") 37 | if norm <= eps: 38 | logger.info( 39 | f"converged after {i} cycles with {norm:.4} error") 40 | break 41 | return U 42 | -------------------------------------------------------------------------------- /Python/multipy/multigrid/cycle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from abc import abstractmethod 3 | from ..GaussSeidel.GaussSeidel_RB import GS_RB 4 | from ..GaussSeidel.GaussSeidel import gauss_seidel 5 | from ..tools.operators import poisson_operator_like 6 | from ..tools.apply_poisson import apply_poisson 7 | 8 | from .restriction import restriction, weighted_restriction 9 | from .prolongation import prolongation 10 | 11 | 12 | class AbstractCycle: 13 | def __init__(self, F, v1, v2, mu, l, eps=1e-8, h=None): 14 | self.v1 = v1 15 | self.v2 = v2 16 | self.mu = mu 17 | self.F = F 18 | self.l = l 19 | self.eps = eps 20 | if h is None: 21 | self.h = 1 / F.shape[0] 22 | else: 23 | self.h = h 24 | if (self.l == 0): 25 | self.l = int(np.log2(self.F.shape[0])) - 1 26 | # ceck if l is plausible 27 | if np.log2(self.F.shape[0]) < self.l: 28 | raise ValueError('false value of levels') 29 | 30 | def __call__(self, U): 31 | return self.do_cycle(self.F, U, self.l, self.h) 32 | 33 | @abstractmethod 34 | def _presmooth(self, F, U, h): 35 | pass 36 | 37 | @abstractmethod 38 | def _postsmooth(self, F, U, h): 39 | pass 40 | 41 | @abstractmethod 42 | def _compute_residual(self, F, U, h): 43 | pass 44 | 45 | @abstractmethod 46 | def _solve(self, F, U, h): 47 | pass 48 | 49 | @abstractmethod 50 | def norm(self, U): 51 | pass 52 | 53 | @abstractmethod 54 | def restriction(self, r): 55 | pass 56 | 57 | def _residual(self, U): 58 | return self._compute_residual(self.F, U, self.h) 59 | 60 | def _compute_correction(self, r, l, h): 61 | e = np.zeros_like(r) 62 | for _ in range(self.mu): 63 | e = self.do_cycle(r, e, l, h) 64 | return e 65 | 66 | def do_cycle(self, F, U, l, h): 67 | 68 | if l <= 1 or U.shape[0] <= 1: 69 | return self._solve(F, U, h) 70 | 71 | U = self._presmooth(F=F, U=U, h=h) 72 | 73 | r = self._compute_residual(F=F, U=U, h=h) 74 | 75 | r = self.restriction(r) 76 | 77 | e = self._compute_correction(r, l - 1, 2 * h) 78 | 79 | e = prolongation(e, U.shape) 80 | 81 | # correction 82 | U += e 83 | 84 | return self._postsmooth(F=F, U=U, h=h) 85 | 86 | 87 | class PoissonCycle(AbstractCycle): 88 | def __init__(self, F, v1, v2, mu, l, eps=1e-8, h=None): 89 | super().__init__(F, v1, v2, mu, l, eps, h) 90 | 91 | def _presmooth(self, F, U, h=None): 92 | return GS_RB( 93 | F, 94 | U=U, 95 | h=h, 96 | max_iter=self.v1, 97 | eps=self.eps) 98 | 99 | def _postsmooth(self, F, U, h=None): 100 | return GS_RB( 101 | F, 102 | U=U, 103 | h=h, 104 | max_iter=self.v2, 105 | eps=self.eps) 106 | 107 | def _compute_residual(self, F, U, h): 108 | return F - apply_poisson(U, h) 109 | 110 | def _solve(self, F, U, h): 111 | return GS_RB( 112 | F=F, 113 | U=U, 114 | h=h, 115 | max_iter=100_000, 116 | eps=self.eps, 117 | norm_iter=5) 118 | 119 | def norm(self, U): 120 | residual = self._residual(U) 121 | return np.linalg.norm(residual) 122 | 123 | def restriction(self, r): 124 | return weighted_restriction(r) 125 | -------------------------------------------------------------------------------- /Python/multipy/multigrid/prolongation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import jit 3 | 4 | 5 | def prolongation(e, fine_shape): 6 | """ 7 | This interpolates/ prolongates to a grid of fine_shape 8 | @param e 9 | @param fine_shape targeted shape 10 | @return grid with fine_shape 11 | """ 12 | 13 | # indicator for Dimension 14 | alpha = len(e.shape) 15 | # initialize result with respect to the wanted shape 16 | w = np.zeros(fine_shape) 17 | # Index of the second to the last element to mention in e (depends on the 18 | # shape of w) 19 | end = e.shape[0] - (w.shape[0] + 1) % 2 20 | # Index of the second to the last element to mention in w (depends on the 21 | # shape of w) 22 | wend = w.shape[0] - (w.shape[0] + 1) % 2 23 | 24 | # Case: Dimension 1 25 | if alpha == 1: 26 | prolongation_1D(w, e, end) 27 | # Case: Dimension 2 28 | elif alpha == 2: 29 | prolongation_2D(w, e, end, wend) 30 | # Case: Dimension 3 31 | elif alpha == 3: 32 | prolongation_3D(w, e, end, wend) 33 | 34 | # Case: Error 35 | else: 36 | raise ValueError("prolongation: invalid dimension") 37 | return w 38 | 39 | 40 | @jit(nopython=True, fastmath=True) 41 | def prolongation_1D(w, e, end): 42 | # copy e to every second index in w 43 | w[:-1:2] = e[:-1] 44 | # Interpolate the missing elements in w with their two neighbors 45 | w[1:-1:2] = (e[: end - 1] + e[1:end]) / 2 46 | # set last index since this one was skipped before 47 | w[-1] = e[-1] 48 | 49 | 50 | @jit(nopython=True, fastmath=True) 51 | def prolongation_2D(w, e, end, wend): 52 | # copy elements from e to w 53 | w[:-1:2, :-1:2] = e[:-1, :-1] 54 | w[:-1:2, -1] = e[:-1, -1] 55 | w[-1, :-1:2] = e[-1, :-1] 56 | w[-1, -1] = e[-1, -1] 57 | 58 | # interpolate elements horizontally 59 | w[:-1:2, 1:-1:2] = (e[:-1, : end - 1] + e[:-1, 1:end]) / 2 60 | w[-1, 1:-1:2] = (e[-1, : end - 1] + e[-1, 1:end]) / 2 61 | 62 | # interpolate elements vertically 63 | w[1:-1:2, :-1:2] = (e[: end - 1, :-1] + e[1:end, :-1]) / 2 64 | w[1:-1:2, -1] = (e[: end - 1, -1] + e[1:end, -1]) / 2 65 | 66 | # interpolate missing elements: average of 4 neighbors 67 | w[1:-1:2, 1:-1:2] = ( 68 | w[2:wend:2, 1:wend:2] + 69 | w[: wend - 1: 2, 1:wend:2] + 70 | w[1:wend:2, : wend - 1: 2] + 71 | w[1:wend:2, 2:wend:2] 72 | ) / 4 73 | 74 | 75 | @jit(nopython=True, fastmath=True) 76 | def prolongation_3D(w, e, end, wend): 77 | # copy elements from e to w 78 | w[:-1:2, :-1:2, :-1:2] = e[:-1, :-1, :-1] 79 | w[:-1:2, -1, -1] = e[:-1, -1, -1] 80 | w[-1, :-1:2, -1] = e[-1, :-1, -1] 81 | w[-1, -1, :-1:2] = e[-1, -1, :-1] 82 | w[:-1:2, :-1:2, -1] = e[:-1, :-1, -1] 83 | w[:-1:2, -1, :-1:2] = e[:-1, -1, :-1] 84 | w[-1, :-1:2, :-1:2] = e[-1, :-1, :-1] 85 | w[-1, -1, -1] = e[-1, -1, -1] 86 | 87 | # interpolate elements horizontally 88 | w[:-1:2, 1:-1:2, :-1:2] = ( 89 | e[:-1, : end - 1, :-1] + e[:-1, 1:end, :-1] 90 | ) / 2 91 | w[:-1:2, -1, 1:-1:2] = (e[:-1, -1, : end - 1] + e[:-1, -1, 1:end]) / 2 92 | w[:-1:2, :-1:2, 1:-1:2] = ( 93 | e[:-1, :-1, : end - 1] + e[:-1, :-1, 1:end] 94 | ) / 2 95 | w[:-1:2, 1:-1:2, -1] = (e[:-1, : end - 1, -1] + e[:-1, 1:end, -1]) / 2 96 | w[:-1:2, 1:-1:2, 1:-1:2] = ( 97 | e[:-1, : end - 1, : end - 1] + 98 | e[:-1, 1:end, 1:end] + 99 | e[:-1, : end - 1, 1:end] + 100 | e[:-1, 1:end, : end - 1]) / 4 101 | 102 | # special case 103 | w[-1, 1:-1:2, :-1:2] = (e[-1, : end - 1, :-1] + e[-1, 1:end, :-1]) / 2 104 | w[-1, -1, 1:-1:2] = (e[-1, -1, : end - 1] + e[-1, -1, 1:end]) / 2 105 | w[-1, :-1:2, 1:-1:2] = (e[-1, :-1, : end - 1] + e[-1, :-1, 1:end]) / 2 106 | w[-1, 1:-1:2, -1] = (e[-1, : end - 1, -1] + e[-1, 1:end, -1]) / 2 107 | w[-1, 1:-1:2, 1:-1:2] = ( 108 | e[-1, : end - 1, : end - 1] + 109 | e[-1, 1:end, 1:end] + 110 | e[-1, : end - 1, 1:end] + 111 | e[-1, 1:end, : end - 1] 112 | ) / 4 113 | 114 | # interpolate elements vertically 115 | w[1:-1:2, :-1:2, :-1:2] = ( 116 | e[: end - 1, :-1, :-1] + e[1:end, :-1, :-1] 117 | ) / 2 118 | w[1:-1:2, -1, :-1:2] = (e[: end - 1, -1, :-1] + e[1:end, -1, :-1]) / 2 119 | w[1:-1:2, :-1:2, -1] = (e[: end - 1, :-1, -1] + e[1:end, :-1, -1]) / 2 120 | w[1:-1:2, -1, -1] = (e[: end - 1, -1, -1] + e[1:end, -1, -1]) / 2 121 | w[1:-1:2, -1, 1:-1:2] = ( 122 | w[1:-1:2, -1, : wend - 1: 2] + w[1:-1:2, -1, 2:wend:2] 123 | ) / 2 124 | w[1:-1:2, :-1:2, 1:-1:2] = ( 125 | w[1:-1:2, :-1:2, : wend - 1: 2] + w[1:-1:2, :-1:2, 2:wend:2] 126 | ) / 2 127 | w[1:-1:2, 1:-1:2, -1] = ( 128 | w[1:-1:2, : wend - 1: 2, -1] + w[1:-1:2, 2:wend:2, -1] 129 | ) / 2 130 | w[1:-1:2, 1:-1:2, :-1:2] = ( 131 | w[1:-1:2, : wend - 1: 2, :-1:2] + w[1:-1:2, 2:wend:2, :-1:2] 132 | ) / 2 133 | w[1:-1:2, 1:-1:2, 1:-1:2] = ( 134 | w[1:-1:2, 1:-1:2, : wend - 1: 2] + w[1:-1:2, 1:-1:2, 2:wend:2] 135 | ) / 2 136 | -------------------------------------------------------------------------------- /Python/multipy/multigrid/restriction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import jit 3 | 4 | 5 | def restriction(A): 6 | """ 7 | applies simple restriction to A 8 | @param A n x n matrix 9 | @return (n//2 +1, n//2 + 1) matrix 10 | """ 11 | # indicator for Dimension 12 | alpha = len(A.shape) 13 | # initialize result with respect to the wanted shape 14 | ret = np.empty(np.array(A.shape) // 2 + 1) 15 | # Index of the second to the last element to mention in ret (depends on 16 | # the shape of A) 17 | end = ret.shape[0] - (A.shape[0] + 1) % 2 18 | 19 | # Case: Dimension 1 20 | if alpha == 1: 21 | restriction_1D(A, ret, end) 22 | # Case: Dimension 2 23 | elif alpha == 2: 24 | restriction_2D(A, ret, end) 25 | # Case: Dimension 3 26 | elif alpha == 3: 27 | restriction_3D(A, ret, end) 28 | # Case: Error 29 | else: 30 | raise ValueError('restriction: invalid dimension') 31 | 32 | return ret 33 | 34 | 35 | @jit(nopython=True, fastmath=True) 36 | def restriction_1D(A, ret, end): 37 | # get every second element in A 38 | ret[:end:] = A[::2] 39 | # set the last index correctly 40 | ret[-1] = A[-1] 41 | 42 | 43 | @jit(nopython=True, fastmath=True) 44 | def restriction_2D(A, ret, end): 45 | # get every second element in A 46 | ret[:end:, :end:] = A[::2, ::2] 47 | # special case: borders 48 | ret[:end, -1] = A[::2, -1] 49 | ret[-1, :end] = A[-1, ::2] 50 | # special case: outer corner 51 | ret[-1, -1] = A[-1, -1] 52 | 53 | 54 | @jit(nopython=True, fastmath=True) 55 | def restriction_3D(A, ret, end): 56 | # get every second element in A 57 | ret[:end:, :end:, :end:] = A[::2, ::2, ::2] 58 | # special case: inner borders 59 | ret[:end, :end, -1] = A[::2, ::2, -1] 60 | ret[-1, :end, :end] = A[-1, ::2, ::2] 61 | ret[:end, -1, :end] = A[::2, -1, ::2] 62 | # special case: outer borders 63 | ret[:end, -1, -1] = A[::2, -1, -1] 64 | ret[-1, :end, -1] = A[-1, ::2, -1] 65 | ret[-1, -1, :end] = A[-1, -1, ::2] 66 | # special case: outer corner 67 | ret[-1, -1, -1] = A[-1, -1, -1] 68 | 69 | 70 | def weighted_restriction(A): 71 | # indicator for Dimension 72 | alpha = len(A.shape) 73 | # initialize result with respect to the wanted shape 74 | ret = restriction(A) 75 | 76 | # min length is 3 77 | assert(A.shape[0] >= 3) 78 | 79 | if alpha == 1: 80 | weighted_restriction_1D(A, ret) 81 | elif alpha == 2: 82 | weighted_restriction_2D(A, ret) 83 | elif alpha == 3: 84 | weighted_restriction_3D(A, ret) 85 | else: 86 | raise ValueError('weighted restriction: invalid dimension') 87 | 88 | return ret 89 | 90 | 91 | @jit(nopython=True, fastmath=True) 92 | def weighted_restriction_1D(A, ret): 93 | # core 94 | ret[1:-1] /= 2 95 | # corner 96 | ret[1:-1] += (A[1:-2:2] + A[3::2]) / 4 97 | 98 | 99 | @jit(nopython=True, fastmath=True) 100 | def weighted_restriction_2D(A, ret): 101 | # core 102 | ret[1:-1, 1:-1] /= 4 103 | # edges 104 | ret[1:-1, 1:-1] += (A[2:-1:2, 1:-2:2] + A[1:-2:2, 2:-1:2] + 105 | A[2:-1:2, 3::2] + A[3::2, 2:-1:2]) / 8 106 | # corners 107 | ret[1:-1, 1:-1] += (A[1:-2:2, 1:-2:2] + A[1:-2:2, 3::2] + 108 | A[3::2, 1:-2:2] + A[3::2, 3::2]) / 16 109 | 110 | 111 | @jit(nopython=True, fastmath=True) 112 | def weighted_restriction_3D(A, ret): 113 | # core 114 | ret[1:-1, 1:-1, 1:-1] *= 8 115 | # edges 116 | ret[1:-1, 1:-1, 1:-1] += ( 117 | A[2:-1:2, 2:-1:2, 1:-2:2] + A[2:-1:2, 2:-1:2, 3::2] + 118 | A[2:-1:2, 1:-2:2, 2:-1:2] + A[2:-1:2, 3::2, 2:-1:2] + 119 | A[1:-2:2, 2:-1:2, 2:-1:2] + A[3::2, 2:-1:2, 2:-1:2]) * 4 120 | # more edges 121 | ret[1:-1, 1:-1, 1:-1] += ( 122 | A[2:-1:2, 1:-2:2, 3::2] + A[2:-1:2, 3::2, 1:-2:2] + 123 | A[2:-1:2, 1:-2:2, 1:-2:2] + A[2:-1:2, 3::2, 3::2] + 124 | A[1:-2:2, 2:-1:2, 3::2] + A[3::2, 2:-1:2, 1:-2:2] + 125 | A[1:-2:2, 2:-1:2, 1:-2:2] + A[3::2, 2:-1:2, 3::2] + 126 | A[1:-2:2, 3::2, 2:-1:2] + A[3::2, 1:-2:2, 2:-1:2] + 127 | A[1:-2:2, 1:-2:2, 2:-1:2] + A[3::2, 3::2, 2:-1:2]) * 2 128 | # corners 129 | ret[1:-1, 1:-1, 1:-1] += ( 130 | A[3::2, 1:-2:2, 1:-2:2] + A[3::2, 3::2, 1:-2:2] + 131 | A[3::2, 3::2, 3::2] + A[3::2, 1:-2:2, 3::2] + 132 | A[1:-2:2, 1:-2:2, 1:-2:2] + A[1:-2:2, 1:-2:2, 3::2] + 133 | A[1:-2:2, 3::2, 3::2] + A[1:-2:2, 3::2, 1:-2:2]) 134 | 135 | ret[1:-1, 1:-1, 1:-1] /= 64 136 | -------------------------------------------------------------------------------- /Python/multipy/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["test_gauss_seidel", "test_multigrid"] 2 | -------------------------------------------------------------------------------- /Python/multipy/tests/problem_1D_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/Python/multipy/tests/problem_1D_20.npy -------------------------------------------------------------------------------- /Python/multipy/tests/problem_2D_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/Python/multipy/tests/problem_2D_20.npy -------------------------------------------------------------------------------- /Python/multipy/tests/problem_3D_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/Python/multipy/tests/problem_3D_20.npy -------------------------------------------------------------------------------- /Python/multipy/tests/test_tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..tools import operators as op 3 | from ..tools.apply_poisson import apply_poisson 4 | from ..tools import util 5 | 6 | 7 | def test_apply_poisson(): 8 | eps = 1e-12 9 | # Variables 10 | U, _ = util.load_test_2D_problem() 11 | 12 | A = op.poisson_operator_2D(U.shape[0] - 2) 13 | B = (A @ U[1:-1, 1:-1].flatten() - op.boundary_condition(U) 14 | ).reshape(np.array(U.shape) - 2) 15 | C = apply_poisson(-U, 1) 16 | 17 | assert np.allclose(C[1:-1, 1:-1], B, atol=eps) 18 | 19 | 20 | def test_apply_poisson_1D(): 21 | U, _ = util.load_test_1D_problem() 22 | N = U.shape[0] 23 | h = 1 24 | expected = np.zeros_like(U) 25 | expected[0] = U[0] 26 | expected[-1] = U[-1] 27 | for i in range(1, U.shape[0] - 1): 28 | expected[i] = (-2.0 * U[i] + U[i - 1] + U[i + 1]) / (h * h) 29 | 30 | assert np.array_equal(expected, apply_poisson(U, h)) 31 | 32 | 33 | def test_apply_poisson_2D(): 34 | U, _ = util.load_test_2D_problem() 35 | N = U.shape[0] 36 | h = 1 37 | expected = np.zeros_like(U) 38 | expected[:, 0] = U[:, 0] 39 | expected[0, :] = U[0, :] 40 | for i in range(1, U.shape[0] - 1): 41 | for j in range(1, U.shape[1] - 1): 42 | 43 | expected[i, j] = (-4.0 * 44 | U[i, j] + 45 | U[i - 1, j] + 46 | U[i + 1, j] + 47 | U[i, j - 1] + 48 | U[i, j + 1]) / (h * h) 49 | 50 | assert np.array_equal(expected, apply_poisson(U, h)) 51 | 52 | 53 | def test_apply_poisson_3D(): 54 | U, _ = util.load_test_3D_problem() 55 | N = U.shape[0] 56 | h = 1 57 | expected = np.zeros_like(U) 58 | expected[:, :, 0] = U[:, :, 0] 59 | expected[:, 0, :] = U[:, 0, :] 60 | expected[0, :, :] = U[0, :, :] 61 | for i in range(1, U.shape[0] - 1): 62 | for j in range(1, U.shape[1] - 1): 63 | for k in range(1, U.shape[2] - 1): 64 | expected[i, j, k] = (-6.0 * U[i, j, k] + 65 | U[i - 1, j, k] + 66 | U[i + 1, j, k] + 67 | U[i, j - 1, k] + 68 | U[i, j + 1, k] + 69 | U[i, j, k - 1] + 70 | U[i, j, k + 1]) / (h * h) 71 | assert np.array_equal(expected, apply_poisson(U, h)) 72 | -------------------------------------------------------------------------------- /Python/multipy/tools/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["util", "operators", "heatmap"] 2 | -------------------------------------------------------------------------------- /Python/multipy/tools/apply_poisson.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def apply_poisson(U, h=None): 5 | """Apply the 2D poisson operator to U.""" 6 | alpha = len(U.shape) 7 | x = np.empty_like(U) 8 | 9 | if h is None: 10 | h = 1 / U.shape[0] 11 | 12 | if alpha == 1: 13 | x[0] = U[0] 14 | x[-1] = U[-1] 15 | x[1:-1] = (-2.0 * U[1:-1] + U[:-2] + U[2:]) / (h * h) 16 | elif alpha == 2: 17 | x[:, 0] = U[:, 0] 18 | x[0, :] = U[0, :] 19 | x[:, -1] = U[:, -1] 20 | x[-1, :] = U[-1, :] 21 | x[1:-1, 1:-1] = (-4.0 * 22 | U[1:-1, 1:-1] + 23 | U[:-2, 1:-1] + 24 | U[2:, 1:-1] + 25 | U[1:-1, :-2] + 26 | U[1:-1, 2:]) / (h * h) 27 | 28 | elif alpha == 3: 29 | x[:, :, 0] = U[:, :, 0] 30 | x[:, 0, :] = U[:, 0, :] 31 | x[0, :, :] = U[0, :, :] 32 | x[:, :, -1] = U[:, :, -1] 33 | x[:, -1, :] = U[:, -1, :] 34 | x[-1, :, :] = U[-1, :, :] 35 | x[1:-1, 1:-1, 1:-1] = (-6.0 * U[1:-1, 1:-1, 1:-1] + 36 | U[:-2, 1:-1, 1:-1] + 37 | U[2:, 1:-1, 1:-1] + 38 | U[1:-1, :-2, 1:-1] + 39 | U[1:-1, 2:, 1:-1] + 40 | U[1:-1, 1:-1, :-2] + 41 | U[1:-1, 1:-1, 2:]) / (h * h) 42 | else: 43 | raise ValueError('residual: invalid dimension') 44 | 45 | return x 46 | -------------------------------------------------------------------------------- /Python/multipy/tools/heatmap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/Python/multipy/tools/heatmap.py -------------------------------------------------------------------------------- /Python/multipy/tools/operators.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.linalg import block_diag 3 | 4 | from .util import timer 5 | 6 | 7 | def restriction_operator(N): 8 | """ 9 | should return the restriction operator matrix from R^(N-1) -> R^(N/2-1) 10 | """ 11 | diag = np.array([1 / 4, 1 / 2, 1 / 4]) 12 | zeros = np.zeros(N - 2) 13 | conc = np.concatenate((diag, zeros)) 14 | ret = np.tile(conc, N // 2 - 2) 15 | ret = np.concatenate((ret, diag)) 16 | return ret.reshape((N // 2 - 1, N - 1)) 17 | 18 | 19 | def poisson_operator(N, h): 20 | """ 21 | returns a Matrix with nxn -1 4 -1 on diagonal 22 | @param h is distance between grid points 23 | """ 24 | A = 4. * np.eye(N, N) 25 | upper = -1. * np.eye(N, N - 1) 26 | upper = np.concatenate((np.zeros((N, 1)), upper), axis=1) 27 | ret = A + upper + upper.T 28 | return ret 29 | 30 | 31 | def poisson_operator_2D(N, h=None): 32 | """ 33 | return n^2 x n^2 matrix 34 | @param h is distance between grid points 35 | """ 36 | if h is None: 37 | h = 1 / N 38 | 39 | B = poisson_operator(N, h) 40 | middle = block_diag(*[B] * N) 41 | upper = - np.eye(N * N, N * (N - 1)) 42 | upper = np.concatenate((np.zeros((N * N, N)), upper), axis=1) 43 | return middle + upper + upper.T 44 | 45 | 46 | def poisson_operator_like(x): 47 | assert len(x.shape) == 1 48 | N = x.shape[0] 49 | ret = 4 * np.eye(N, N) 50 | ret[0, 1] = ret[-1, -2] = - 1 51 | if 3 < N: 52 | ret[0, 3] = ret[-1, -4] = -1 53 | for i in range(1, N - 1): 54 | ret[i, i + 1] = - 1 55 | ret[i, i - 1] = - 1 56 | if 3 <= i: 57 | ret[i, i - 3] = - 1 58 | if i < N - 3: 59 | ret[i, i + 3] = - 1 60 | 61 | return ret 62 | 63 | 64 | def boundary_condition(U): 65 | N = U.shape[0] - 2 66 | ret = np.zeros(N ** 2) 67 | # left boundary 68 | ret[:N] = U[1:-1, 0] 69 | # right boundary 70 | ret[-N:] = U[-1, 1:-1] 71 | 72 | # Top boundary 73 | ret[::N] += U[0, 1:-1:] 74 | 75 | # bottom boundary 76 | ret[N - 1::N] += U[-1, 1:-1:] 77 | return ret 78 | 79 | 80 | def reshape_grid(grid, rhs, h=None): 81 | """ 82 | Takes a grid and a rhs and reformulates it to 83 | AU = F with A as poisson operator 84 | @param h is the distance between the grid points 85 | """ 86 | assert grid.shape == rhs.shape 87 | N = grid.shape[0] 88 | if h is None: 89 | h = 1 / N 90 | A = poisson_operator_2D(N - 2) 91 | U = grid[1:-1, 1:-1].flatten() 92 | F = h * h * rhs[1:-1, 1:-1].flatten() + boundary_condition(grid) 93 | return A, U, F 94 | -------------------------------------------------------------------------------- /Python/multipy/tools/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Util functions 3 | """ 4 | import logging 5 | import time as time 6 | from functools import wraps 7 | import numpy as np 8 | 9 | TIME_STATS = {} 10 | FLOPS = {} 11 | 12 | logger = logging.getLogger('time') 13 | logger.setLevel(logging.INFO) 14 | 15 | # TODO: ggf. auch mal mit PERF was machen 16 | 17 | 18 | def profiling(profunc): 19 | 20 | import cProfile 21 | from pstats import SortKey, Stats 22 | 23 | @wraps(profunc) 24 | def prof_wrapper(*args, **kwargs): 25 | with cProfile.Profile() as pr: 26 | value = profunc(*args, **kwargs) 27 | p = Stats(pr) 28 | p.sort_stats(SortKey.TIME).dump_stats( 29 | f"profiles/{profunc.__name__}_{args[0]}.prof") 30 | return value 31 | return prof_wrapper 32 | 33 | 34 | def timer(func): 35 | @wraps(func) 36 | def wrapper(*args, **kwargs): 37 | before = time.perf_counter() 38 | value = func(*args, **kwargs) 39 | after = time.perf_counter() - before 40 | if func.__name__ not in TIME_STATS: 41 | TIME_STATS[func.__name__] = [0, 0] 42 | TIME_STATS[func.__name__][0] += 1 43 | TIME_STATS[func.__name__][1] += after 44 | 45 | logger.info(f"{func.__name__}({args}) took {after:.6}") 46 | 47 | return value 48 | return wrapper 49 | 50 | 51 | def counter(func): 52 | """ 53 | A Decorator function to count the flops 54 | the values are just ridicusly guessed 55 | """ 56 | @wraps(func) 57 | def counter_wrapper(*args, **kwargs): 58 | value = func(*args, **kwargs) 59 | if func.__name__ not in FLOPS: 60 | FLOPS[func.__name__] = 0 61 | 62 | # TODO: sweep1D and sweep_3D 63 | if (func.__name__ == "sweep_2D"): 64 | N = args[1].shape[0] 65 | FLOPS[func.__name__] += 6 * (((N - 2) // 2)**2) 66 | elif (func.__name__ == "weighted_restriction"): 67 | alpha = len(args[0].shape) 68 | N = args[0].shape[0] // 2 + 1 69 | if (alpha == 1): 70 | pass 71 | elif (alpha == 2): 72 | FLOPS[func.__name__] += 11 * ((N - 2) // 2)**2 73 | elif (alpha == 3): 74 | pass 75 | elif (func.__name__ == "prolongation"): 76 | alpha = len(args[1]) 77 | N = args[1][0] 78 | if (alpha == 1): 79 | pass 80 | elif (alpha == 2): 81 | FLOPS[func.__name__] += 6 * ((N - 2) // 2)**2 82 | elif (alpha == 3): 83 | pass 84 | 85 | return value 86 | return counter_wrapper 87 | 88 | 89 | def MatrixGenerator(dim, max_value=500): 90 | return np.random.rand(*dim) * np.random.randint(max_value) 91 | 92 | 93 | def load_problem(path): 94 | U, F = np.load(path) 95 | return U, F 96 | 97 | 98 | def load_test_1D_problem(): 99 | return load_problem("./multipy/tests/problem_1D_20.npy") 100 | 101 | 102 | def load_test_2D_problem(): 103 | return load_problem("./multipy/tests/problem_2D_20.npy") 104 | 105 | 106 | def load_test_3D_problem(): 107 | return load_problem("./multipy/tests/problem_3D_20.npy") 108 | 109 | 110 | def str2bool(v): 111 | return v.lower() in ("yes", "true", "t", "1") 112 | -------------------------------------------------------------------------------- /Python/problemgenerator/femwave.py: -------------------------------------------------------------------------------- 1 | """ 2 | A example problem 3 | solves the finite element method (wave) in NxN grid 4 | """ 5 | import numpy as np 6 | 7 | def f(x,y): 8 | return np.sin(2*np.pi * x) * np.cos(2*np.pi * y) 9 | 10 | def u(x,y): 11 | return f(x,y) / (8 * np.pi**2) 12 | 13 | def create_2D(N): 14 | # Generate meshes 15 | F = f(*np.meshgrid(np.linspace(0, 1, N), np.linspace(0,1,N))) 16 | # Set borders correct 17 | F[:, 0] /= -8 * np.pi**2 18 | F[0, 1:-1] /= -8 * np.pi**2 19 | F[:, -1] /= -8 * np.pi**2 20 | F[-1, 1:-1] /= -8 * np.pi**2 21 | U = F.copy() 22 | U[1:-1, 1:-1] = 0 23 | return U, F 24 | 25 | def solution_2D(N): 26 | # Generate analytical solution 27 | return -1 * u(*np.meshgrid(np.linspace(0, 1, N), np.linspace(0,1,N))) -------------------------------------------------------------------------------- /Python/problemgenerator/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import os 5 | import optparse 6 | import numpy as np 7 | 8 | from heatmap import create_problem_1D, create_problem_2D, create_problem_3D 9 | from femwave import create_2D 10 | 11 | 12 | def save_to_npy(file, tensor): 13 | np.save(file, tensor) 14 | 15 | 16 | def generate_1D_problem(N): 17 | U, F = create_problem_1D(N) 18 | return np.array([U, F]) 19 | 20 | 21 | def generate_2D_problem(N): 22 | U, F = create_problem_2D(N) 23 | return np.array([U, F]) 24 | 25 | 26 | def generate_3D_problem(N): 27 | U, F = create_problem_3D(N) 28 | return np.array([U, F]) 29 | 30 | 31 | def generate_problem(dim): 32 | if dim == 1: 33 | return generate_1D_problem 34 | if dim == 2: 35 | return generate_2D_problem 36 | if dim == 3: 37 | return generate_3D_problem 38 | 39 | raise ValueError(f"{dim} is invalid dimension") 40 | 41 | 42 | def save_problem(base, dim, tensor): 43 | filename = f"{base}/problem_{dim}D_{N:04}" 44 | if os.path.exists(filename): 45 | os.remove(filename) 46 | save_to_npy(filename, tensor) 47 | 48 | 49 | if __name__ == "__main__": 50 | parser = optparse.OptionParser() 51 | parser.add_option('-t', action='store', default='heat', dest='type', 52 | help='select problem type heat|wave') 53 | 54 | options, args = parser.parse_args() 55 | if not len(args) == 3: 56 | print(f"{sys.argv[0]} base dimension N") 57 | exit(1) 58 | 59 | base, dim, N = args[0], int(args[1]), int(args[2]) 60 | if options.type == 'heat': 61 | save_problem(base, dim, generate_problem(dim)(N)) 62 | elif options.type == 'wave' and dim == 2: 63 | save_problem(base, dim, np.array(create_2D(N))) 64 | else: 65 | raise Exception('invalid type or dimension') 66 | -------------------------------------------------------------------------------- /Python/problemgenerator/heatmap.py: -------------------------------------------------------------------------------- 1 | """ 2 | A example problem 3 | solves the heat distribution in NxN grid 4 | """ 5 | import numpy as np 6 | 7 | 8 | def initMap_1D(N): 9 | U = np.random.uniform(0, 1, (N)) 10 | U[0] = 1 11 | U[-1] = 0 12 | return U 13 | 14 | 15 | def initMap_2D(N): 16 | U = np.random.uniform(0, 1, (N, N)) 17 | U[:, -1] = 0 18 | U[-1, :] = 0 19 | U[:, 0] = 1 20 | U[0, :] = 1 21 | return U 22 | 23 | 24 | def initMap_3D(N): 25 | U = np.random.uniform(0, 1, (N, N, N)) 26 | U[:, -1, :] = 0 27 | U[-1, :, :] = 0 28 | U[:, :, -1] = 0 29 | U[:, 0, :] = 1 30 | U[0, :, :] = 1 31 | U[:, :, 0] = 1 32 | return U 33 | 34 | 35 | def heat_sources_1D(N): 36 | F = np.zeros((N)) 37 | F[0] = 1 38 | F[-1] = 0 39 | return F 40 | 41 | 42 | def heat_sources_2D(N): 43 | F = np.zeros((N, N)) 44 | F[:, -1] = 0 45 | F[-1, :] = 0 46 | F[:, 0] = 1 47 | F[0, :] = 1 48 | return F 49 | 50 | 51 | def heat_sources_3D(N): 52 | F = np.zeros((N, N, N)) 53 | F[:, -1, :] = 0 54 | F[-1, :, :] = 0 55 | F[:, :, -1] = 0 56 | F[:, 0, :] = 1 57 | F[0, :, :] = 1 58 | F[:, :, 0] = 1 59 | return F 60 | 61 | 62 | def create_problem_1D(N): 63 | return initMap_1D(N), heat_sources_1D(N) 64 | 65 | 66 | def create_problem_2D(N): 67 | return initMap_2D(N), heat_sources_2D(N) 68 | 69 | 70 | def create_problem_3D(N): 71 | return initMap_3D(N), heat_sources_3D(N) 72 | -------------------------------------------------------------------------------- /Python/problemgenerator/load_problem.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def load_problem(path): 5 | U, F = np.load(path) 6 | return U, F 7 | -------------------------------------------------------------------------------- /Python/profiling.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | import numpy as np 5 | 6 | import multipy.tools.heatmap as hm 7 | import multipy.tools.util as util 8 | 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | np.set_printoptions(precision=4, linewidth=180) 13 | 14 | 15 | @util.profiling 16 | def profile_2D_multigrid(N, numba=True): 17 | iter_cycle = 1000 18 | U = hm.initMap_2D(N) 19 | F = hm.heat_sources_2D(N) 20 | hm.poisson_multigrid(F, U, 5, 2, 2, 2, iter_cycle, numba=numba) 21 | 22 | 23 | @util.timer 24 | def time_multigrid(N, numba=True): 25 | U, F = hm.create_problem_2D(N) 26 | iter_cycle = 100 27 | hm.poisson_multigrid(F, U, 5, 2, 2, 2, iter_cycle, numba=numba) 28 | 29 | 30 | if __name__ == "__main__": 31 | numba = True 32 | if len(sys.argv) == 2: 33 | numba = util.str2bool(str(sys.argv[1])) 34 | for i in range(8, 14): 35 | profile_2D_multigrid(2**i, numba) 36 | -------------------------------------------------------------------------------- /Python/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==20.2.0 2 | iniconfig==1.0.1 3 | llvmlite==0.34.0 4 | numba==0.51.2 5 | numpy==1.19.2 6 | packaging==20.4 7 | pluggy==0.13.1 8 | py==1.9.0 9 | pyparsing==2.4.7 10 | pytest==6.1.1 11 | scipy==1.5.2 12 | six==1.15.0 13 | toml==0.10.1 14 | -------------------------------------------------------------------------------- /Python/results/outfile_cip1e6_2611_openblas_8_numba_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Thu 26 Nov 2020 10:56:41 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | numpy version: 1.19.3 5 | blas_mkl_info: 6 | NOT AVAILABLE 7 | blis_info: 8 | NOT AVAILABLE 9 | openblas_info: 10 | libraries = ['openblas', 'openblas'] 11 | library_dirs = ['/usr/local/lib'] 12 | language = c 13 | define_macros = [('HAVE_CBLAS', None)] 14 | blas_opt_info: 15 | libraries = ['openblas', 'openblas'] 16 | library_dirs = ['/usr/local/lib'] 17 | language = c 18 | define_macros = [('HAVE_CBLAS', None)] 19 | lapack_mkl_info: 20 | NOT AVAILABLE 21 | openblas_lapack_info: 22 | libraries = ['openblas', 'openblas'] 23 | library_dirs = ['/usr/local/lib'] 24 | language = c 25 | define_macros = [('HAVE_CBLAS', None)] 26 | lapack_opt_info: 27 | libraries = ['openblas', 'openblas'] 28 | library_dirs = ['/usr/local/lib'] 29 | language = c 30 | define_macros = [('HAVE_CBLAS', None)] 31 | ############ END INFOS 32 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 33 | 1024:2D:26.309246671386063:5000:0.0:0:31,334,520,030:0:0:0:0: 34 | 1024:2D:26.241105939261615:5000:0.0:0:31,334,520,030:0:0:0:0: 35 | 1024:2D:26.31578349880874:5000:0.0:0:31,334,520,030:0:0:0:0: 36 | 1088:2D:31.649163152091205:5000:0.0:0:35,381,880,030:0:0:0:0: 37 | 1088:2D:31.251295044086874:5000:0.0:0:35,381,880,030:0:0:0:0: 38 | 1088:2D:31.143179490230978:5000:0.0:0:35,381,880,030:0:0:0:0: 39 | 112:2D:0.28180971276015043:5000:0.0:0:363,000,030:0:0:0:0: 40 | 112:2D:0.2781702158972621:5000:0.0:0:363,000,030:0:0:0:0: 41 | 112:2D:0.27501570247113705:5000:0.0:0:363,000,030:0:0:0:0: 42 | 1152:2D:35.68576753232628:5000:0.0:0:39,675,000,030:0:0:0:0: 43 | 1152:2D:35.38722257968038:5000:0.0:0:39,675,000,030:0:0:0:0: 44 | 1152:2D:35.25037477724254:5000:0.0:0:39,675,000,030:0:0:0:0: 45 | 1216:2D:40.40483799856156:5000:0.0:0:44,213,880,030:0:0:0:0: 46 | 1216:2D:40.649159932509065:5000:0.0:0:44,213,880,030:0:0:0:0: 47 | 1216:2D:40.34736832790077:5000:0.0:0:44,213,880,030:0:0:0:0: 48 | 128:2D:0.3624443160369992:5000:0.0:0:476,280,030:0:0:0:0: 49 | 128:2D:0.36002199817448854:5000:0.0:0:476,280,030:0:0:0:0: 50 | 128:2D:0.36405875626951456:5000:0.0:0:476,280,030:0:0:0:0: 51 | 1280:2D:44.60959123726934:5000:0.0:0:48,998,520,030:0:0:0:0: 52 | 1280:2D:44.7725593643263:5000:0.0:0:48,998,520,030:0:0:0:0: 53 | 1280:2D:44.53643043804914:5000:0.0:0:48,998,520,030:0:0:0:0: 54 | 144:2D:0.46905550453811884:5000:0.0:0:604,920,030:0:0:0:0: 55 | 144:2D:0.4557317094877362:5000:0.0:0:604,920,030:0:0:0:0: 56 | 144:2D:0.46871287003159523:5000:0.0:0:604,920,030:0:0:0:0: 57 | 16:2D:0.014972666278481483:5000:0.0:0:5,880,030:0:0:0:0: 58 | 16:2D:0.014362451620399952:5000:0.0:0:5,880,030:0:0:0:0: 59 | 16:2D:0.03349983599036932:5000:0.0:0:5,880,030:0:0:0:0: 60 | 160:2D:0.6144370026886463:5000:0.0:0:748,920,030:0:0:0:0: 61 | 160:2D:0.5764889372512698:5000:0.0:0:748,920,030:0:0:0:0: 62 | 160:2D:0.5816875798627734:5000:0.0:0:748,920,030:0:0:0:0: 63 | 176:2D:0.6997493896633387:5000:0.0:0:908,280,030:0:0:0:0: 64 | 176:2D:0.7012885352596641:5000:0.0:0:908,280,030:0:0:0:0: 65 | 176:2D:0.7355166357010603:5000:0.0:0:908,280,030:0:0:0:0: 66 | 192:2D:0.8340429337695241:5000:0.0:0:1,083,000,030:0:0:0:0: 67 | 192:2D:0.8344286885112524:5000:0.0:0:1,083,000,030:0:0:0:0: 68 | 192:2D:0.8354444913566113:5000:0.0:0:1,083,000,030:0:0:0:0: 69 | 208:2D:0.9777672486379743:5000:0.0:0:1,273,080,030:0:0:0:0: 70 | 208:2D:0.9824433829635382:5000:0.0:0:1,273,080,030:0:0:0:0: 71 | 208:2D:0.9863227559253573:5000:0.0:0:1,273,080,030:0:0:0:0: 72 | 224:2D:1.129329132847488:5000:0.0:0:1,478,520,030:0:0:0:0: 73 | 224:2D:1.1309372456744313:5000:0.0:0:1,478,520,030:0:0:0:0: 74 | 224:2D:1.137239775620401:5000:0.0:0:1,478,520,030:0:0:0:0: 75 | 240:2D:1.2976867137476802:5000:0.0:0:1,699,320,030:0:0:0:0: 76 | 240:2D:1.3008966315537691:5000:0.0:0:1,699,320,030:0:0:0:0: 77 | 240:2D:1.307204739190638:5000:0.0:0:1,699,320,030:0:0:0:0: 78 | 256:2D:1.4514079615473747:5000:0.0:0:1,935,480,030:0:0:0:0: 79 | 256:2D:1.4595936900004745:5000:0.0:0:1,935,480,030:0:0:0:0: 80 | 256:2D:1.4597322223708034:5000:0.0:0:1,935,480,030:0:0:0:0: 81 | 272:2D:1.675942705012858:5000:0.0:0:2,187,000,030:0:0:0:0: 82 | 272:2D:1.6660032561048865:5000:0.0:0:2,187,000,030:0:0:0:0: 83 | 272:2D:1.6684989528730512:5000:0.0:0:2,187,000,030:0:0:0:0: 84 | 288:2D:1.8556229108944535:5000:0.0:0:2,453,880,030:0:0:0:0: 85 | 288:2D:1.854264016263187:5000:0.0:0:2,453,880,030:0:0:0:0: 86 | 288:2D:1.8653659708797932:5000:0.0:0:2,453,880,030:0:0:0:0: 87 | 304:2D:2.1115432027727365:5000:0.0:0:2,736,120,030:0:0:0:0: 88 | 304:2D:2.0769806429743767:5000:0.0:0:2,736,120,030:0:0:0:0: 89 | 304:2D:2.0620971471071243:5000:0.0:0:2,736,120,030:0:0:0:0: 90 | 32:2D:0.026243766769766808:5000:0.0:0:27,000,030:0:0:0:0: 91 | 32:2D:0.03024252876639366:5000:0.0:0:27,000,030:0:0:0:0: 92 | 32:2D:0.02583037130534649:5000:0.0:0:27,000,030:0:0:0:0: 93 | 320:2D:2.2955073248595:5000:0.0:0:3,033,720,030:0:0:0:0: 94 | 320:2D:2.282577526755631:5000:0.0:0:3,033,720,030:0:0:0:0: 95 | 320:2D:2.291262909770012:5000:0.0:0:3,033,720,030:0:0:0:0: 96 | 384:2D:3.2708385149016976:5000:0.0:0:4,377,720,030:0:0:0:0: 97 | 384:2D:3.278370536863804:5000:0.0:0:4,377,720,030:0:0:0:0: 98 | 384:2D:3.2973442748188972:5000:0.0:0:4,377,720,030:0:0:0:0: 99 | 448:2D:4.473402062430978:5000:0.0:0:5,967,480,030:0:0:0:0: 100 | 448:2D:4.469397189095616:5000:0.0:0:5,967,480,030:0:0:0:0: 101 | 448:2D:4.467172138392925:5000:0.0:0:5,967,480,030:0:0:0:0: 102 | 48:2D:0.05600588396191597:5000:0.0:0:63,480,030:0:0:0:0: 103 | 48:2D:0.05325297359377146:5000:0.0:0:63,480,030:0:0:0:0: 104 | 48:2D:0.05404267460107803:5000:0.0:0:63,480,030:0:0:0:0: 105 | 512:2D:5.821523290127516:5000:0.0:0:7,803,000,030:0:0:0:0: 106 | 512:2D:5.800331969745457:5000:0.0:0:7,803,000,030:0:0:0:0: 107 | 512:2D:5.780480737797916:5000:0.0:0:7,803,000,030:0:0:0:0: 108 | 576:2D:7.41345913335681:5000:0.0:0:9,884,280,030:0:0:0:0: 109 | 576:2D:7.480423765257001:5000:0.0:0:9,884,280,030:0:0:0:0: 110 | 576:2D:7.433467405848205:5000:0.0:0:9,884,280,030:0:0:0:0: 111 | 64:2D:0.09258827660232782:5000:0.0:0:115,320,030:0:0:0:0: 112 | 64:2D:0.08939884509891272:5000:0.0:0:115,320,030:0:0:0:0: 113 | 64:2D:0.09235293883830309:5000:0.0:0:115,320,030:0:0:0:0: 114 | 640:2D:9.22315105702728:5000:0.0:0:12,211,320,030:0:0:0:0: 115 | 640:2D:9.296485809609294:5000:0.0:0:12,211,320,030:0:0:0:0: 116 | 640:2D:9.206088953651488:5000:0.0:0:12,211,320,030:0:0:0:0: 117 | 704:2D:11.387983289547265:5000:0.0:0:14,784,120,030:0:0:0:0: 118 | 704:2D:11.454845005646348:5000:0.0:0:14,784,120,030:0:0:0:0: 119 | 704:2D:11.371313828974962:5000:0.0:0:14,784,120,030:0:0:0:0: 120 | 768:2D:13.652984978631139:5000:0.0:0:17,602,680,030:0:0:0:0: 121 | 768:2D:13.683782684616745:5000:0.0:0:17,602,680,030:0:0:0:0: 122 | 768:2D:13.596705535426736:5000:0.0:0:17,602,680,030:0:0:0:0: 123 | 80:2D:0.1432384392246604:5000:0.0:0:182,520,030:0:0:0:0: 124 | 80:2D:0.14197462424635887:5000:0.0:0:182,520,030:0:0:0:0: 125 | 80:2D:0.13982795178890228:5000:0.0:0:182,520,030:0:0:0:0: 126 | 832:2D:16.61629591975361:5000:0.0:0:20,667,000,030:0:0:0:0: 127 | 832:2D:16.6535640982911:5000:0.0:0:20,667,000,030:0:0:0:0: 128 | 832:2D:16.539008485153317:5000:0.0:0:20,667,000,030:0:0:0:0: 129 | 896:2D:19.585343497805297:5000:0.0:0:23,977,080,030:0:0:0:0: 130 | 896:2D:19.694931518286467:5000:0.0:0:23,977,080,030:0:0:0:0: 131 | 896:2D:19.675003775395453:5000:0.0:0:23,977,080,030:0:0:0:0: 132 | 96:2D:0.20839896984398365:5000:0.0:0:265,080,030:0:0:0:0: 133 | 96:2D:0.20249551627784967:5000:0.0:0:265,080,030:0:0:0:0: 134 | 96:2D:0.2047235481441021:5000:0.0:0:265,080,030:0:0:0:0: 135 | 960:2D:23.255385753698647:5000:0.0:0:27,532,920,030:0:0:0:0: 136 | 960:2D:23.45814239513129:5000:0.0:0:27,532,920,030:0:0:0:0: 137 | 960:2D:23.180467498488724:5000:0.0:0:27,532,920,030:0:0:0:0: 138 | -------------------------------------------------------------------------------- /Python/results/outfile_cip1e6_3010_openblas_1_nonumba_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Fri 30 Oct 2020 06:54:48 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | numpy version: 1.19.3 5 | blas_mkl_info: 6 | NOT AVAILABLE 7 | blis_info: 8 | NOT AVAILABLE 9 | openblas_info: 10 | libraries = ['openblas', 'openblas'] 11 | library_dirs = ['/usr/local/lib'] 12 | language = c 13 | define_macros = [('HAVE_CBLAS', None)] 14 | blas_opt_info: 15 | libraries = ['openblas', 'openblas'] 16 | library_dirs = ['/usr/local/lib'] 17 | language = c 18 | define_macros = [('HAVE_CBLAS', None)] 19 | lapack_mkl_info: 20 | NOT AVAILABLE 21 | openblas_lapack_info: 22 | libraries = ['openblas', 'openblas'] 23 | library_dirs = ['/usr/local/lib'] 24 | language = c 25 | define_macros = [('HAVE_CBLAS', None)] 26 | lapack_opt_info: 27 | libraries = ['openblas', 'openblas'] 28 | library_dirs = ['/usr/local/lib'] 29 | language = c 30 | define_macros = [('HAVE_CBLAS', None)] 31 | ############ END INFOS 32 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 33 | 1024:2D:25.73112323973328:5000:0.0:0:31,334,520,030:0:0:0:0: 34 | 1024:2D:26.202388829551637:5000:0.0:0:31,334,520,030:0:0:0:0: 35 | 1024:2D:25.87002468202263:5000:0.0:0:31,334,520,030:0:0:0:0: 36 | 1088:2D:30.860171189531684:5000:0.0:0:35,381,880,030:0:0:0:0: 37 | 1088:2D:31.014199911616743:5000:0.0:0:35,381,880,030:0:0:0:0: 38 | 1088:2D:30.90449277497828:5000:0.0:0:35,381,880,030:0:0:0:0: 39 | 112:2D:0.2829741947352886:5000:0.0:0:363,000,030:0:0:0:0: 40 | 112:2D:0.2797746267169714:5000:0.0:0:363,000,030:0:0:0:0: 41 | 112:2D:0.2795694572851062:5000:0.0:0:363,000,030:0:0:0:0: 42 | 1152:2D:35.16333812195808:5000:0.0:0:39,675,000,030:0:0:0:0: 43 | 1152:2D:35.071463556960225:5000:0.0:0:39,675,000,030:0:0:0:0: 44 | 1152:2D:35.43860719539225:5000:0.0:0:39,675,000,030:0:0:0:0: 45 | 1216:2D:40.00388178881258:5000:0.0:0:44,213,880,030:0:0:0:0: 46 | 1216:2D:40.18837969377637:5000:0.0:0:44,213,880,030:0:0:0:0: 47 | 1216:2D:40.161869794130325:5000:0.0:0:44,213,880,030:0:0:0:0: 48 | 128:2D:0.36741648614406586:5000:0.0:0:476,280,030:0:0:0:0: 49 | 128:2D:0.36547753494232893:5000:0.0:0:476,280,030:0:0:0:0: 50 | 128:2D:0.3634420810267329:5000:0.0:0:476,280,030:0:0:0:0: 51 | 1280:2D:44.38648191653192:5000:0.0:0:48,998,520,030:0:0:0:0: 52 | 1280:2D:44.46438513044268:5000:0.0:0:48,998,520,030:0:0:0:0: 53 | 1280:2D:44.35702454298735:5000:0.0:0:48,998,520,030:0:0:0:0: 54 | 144:2D:0.4628731291741133:5000:0.0:0:604,920,030:0:0:0:0: 55 | 144:2D:0.46857427060604095:5000:0.0:0:604,920,030:0:0:0:0: 56 | 144:2D:0.46588720567524433:5000:0.0:0:604,920,030:0:0:0:0: 57 | 16:2D:0.014230810105800629:5000:0.0:0:5,880,030:0:0:0:0: 58 | 16:2D:0.010219015181064606:5000:0.0:0:5,880,030:0:0:0:0: 59 | 16:2D:0.013652684167027473:5000:0.0:0:5,880,030:0:0:0:0: 60 | 160:2D:0.5795128233730793:5000:0.0:0:748,920,030:0:0:0:0: 61 | 160:2D:0.5750898504629731:5000:0.0:0:748,920,030:0:0:0:0: 62 | 160:2D:0.5814064769074321:5000:0.0:0:748,920,030:0:0:0:0: 63 | 176:2D:0.6997191980481148:5000:0.0:0:908,280,030:0:0:0:0: 64 | 176:2D:0.714571001008153:5000:0.0:0:908,280,030:0:0:0:0: 65 | 176:2D:0.6982313012704253:5000:0.0:0:908,280,030:0:0:0:0: 66 | 192:2D:0.8750882046297193:5000:0.0:0:1,083,000,030:0:0:0:0: 67 | 192:2D:0.8338224366307259:5000:0.0:0:1,083,000,030:0:0:0:0: 68 | 192:2D:0.8284653127193451:5000:0.0:0:1,083,000,030:0:0:0:0: 69 | 208:2D:0.9857681812718511:5000:0.0:0:1,273,080,030:0:0:0:0: 70 | 208:2D:0.9721061503514647:5000:0.0:0:1,273,080,030:0:0:0:0: 71 | 208:2D:0.9812933318316936:5000:0.0:0:1,273,080,030:0:0:0:0: 72 | 224:2D:1.1308265570551157:5000:0.0:0:1,478,520,030:0:0:0:0: 73 | 224:2D:1.1225775871425867:5000:0.0:0:1,478,520,030:0:0:0:0: 74 | 224:2D:1.1273310603573918:5000:0.0:0:1,478,520,030:0:0:0:0: 75 | 240:2D:1.300495813600719:5000:0.0:0:1,699,320,030:0:0:0:0: 76 | 240:2D:1.2996864365413785:5000:0.0:0:1,699,320,030:0:0:0:0: 77 | 240:2D:1.304205920547247:5000:0.0:0:1,699,320,030:0:0:0:0: 78 | 256:2D:1.460962824523449:5000:0.0:0:1,935,480,030:0:0:0:0: 79 | 256:2D:1.4971478525549173:5000:0.0:0:1,935,480,030:0:0:0:0: 80 | 256:2D:1.45571484323591:5000:0.0:0:1,935,480,030:0:0:0:0: 81 | 272:2D:1.6678394237533212:5000:0.0:0:2,187,000,030:0:0:0:0: 82 | 272:2D:1.6691401591524482:5000:0.0:0:2,187,000,030:0:0:0:0: 83 | 272:2D:1.6627158289775252:5000:0.0:0:2,187,000,030:0:0:0:0: 84 | 288:2D:1.849510408937931:5000:0.0:0:2,453,880,030:0:0:0:0: 85 | 288:2D:1.855419403873384:5000:0.0:0:2,453,880,030:0:0:0:0: 86 | 288:2D:1.8577553806826472:5000:0.0:0:2,453,880,030:0:0:0:0: 87 | 304:2D:2.098596219904721:5000:0.0:0:2,736,120,030:0:0:0:0: 88 | 304:2D:2.298802520148456:5000:0.0:0:2,736,120,030:0:0:0:0: 89 | 304:2D:2.080257346853614:5000:0.0:0:2,736,120,030:0:0:0:0: 90 | 32:2D:0.026956734247505665:5000:0.0:0:27,000,030:0:0:0:0: 91 | 32:2D:0.029762056656181812:5000:0.0:0:27,000,030:0:0:0:0: 92 | 32:2D:0.027845053933560848:5000:0.0:0:27,000,030:0:0:0:0: 93 | 320:2D:2.2946692751720548:5000:0.0:0:3,033,720,030:0:0:0:0: 94 | 320:2D:2.2734431102871895:5000:0.0:0:3,033,720,030:0:0:0:0: 95 | 320:2D:2.2695815647020936:5000:0.0:0:3,033,720,030:0:0:0:0: 96 | 384:2D:3.2857530415058136:5000:0.0:0:4,377,720,030:0:0:0:0: 97 | 384:2D:3.274027000181377:5000:0.0:0:4,377,720,030:0:0:0:0: 98 | 384:2D:3.2714408170431852:5000:0.0:0:4,377,720,030:0:0:0:0: 99 | 448:2D:4.542428694665432:5000:0.0:0:5,967,480,030:0:0:0:0: 100 | 448:2D:4.469412698410451:5000:0.0:0:5,967,480,030:0:0:0:0: 101 | 448:2D:4.472785328514874:5000:0.0:0:5,967,480,030:0:0:0:0: 102 | 48:2D:0.05643815919756889:5000:0.0:0:63,480,030:0:0:0:0: 103 | 48:2D:0.05501906108111143:5000:0.0:0:63,480,030:0:0:0:0: 104 | 48:2D:0.056576515547931194:5000:0.0:0:63,480,030:0:0:0:0: 105 | 512:2D:5.779942158609629:5000:0.0:0:7,803,000,030:0:0:0:0: 106 | 512:2D:5.78660231269896:5000:0.0:0:7,803,000,030:0:0:0:0: 107 | 512:2D:5.777117047458887:5000:0.0:0:7,803,000,030:0:0:0:0: 108 | 576:2D:7.404247182421386:5000:0.0:0:9,884,280,030:0:0:0:0: 109 | 576:2D:7.455344883725047:5000:0.0:0:9,884,280,030:0:0:0:0: 110 | 576:2D:7.395018252544105:5000:0.0:0:9,884,280,030:0:0:0:0: 111 | 64:2D:0.09194501768797636:5000:0.0:0:115,320,030:0:0:0:0: 112 | 64:2D:0.08974436577409506:5000:0.0:0:115,320,030:0:0:0:0: 113 | 64:2D:0.09438630193471909:5000:0.0:0:115,320,030:0:0:0:0: 114 | 640:2D:9.215693291276693:5000:0.0:0:12,211,320,030:0:0:0:0: 115 | 640:2D:9.172618202865124:5000:0.0:0:12,211,320,030:0:0:0:0: 116 | 640:2D:9.192135262303054:5000:0.0:0:12,211,320,030:0:0:0:0: 117 | 704:2D:11.32591331936419:5000:0.0:0:14,784,120,030:0:0:0:0: 118 | 704:2D:11.313046796247363:5000:0.0:0:14,784,120,030:0:0:0:0: 119 | 704:2D:11.36694265063852:5000:0.0:0:14,784,120,030:0:0:0:0: 120 | 768:2D:13.56861549988389:5000:0.0:0:17,602,680,030:0:0:0:0: 121 | 768:2D:13.60307392757386:5000:0.0:0:17,602,680,030:0:0:0:0: 122 | 768:2D:13.552910187281668:5000:0.0:0:17,602,680,030:0:0:0:0: 123 | 80:2D:0.14685088954865932:5000:0.0:0:182,520,030:0:0:0:0: 124 | 80:2D:0.15474816039204597:5000:0.0:0:182,520,030:0:0:0:0: 125 | 80:2D:0.1433962918817997:5000:0.0:0:182,520,030:0:0:0:0: 126 | 832:2D:16.428213391453028:5000:0.0:0:20,667,000,030:0:0:0:0: 127 | 832:2D:16.385704359039664:5000:0.0:0:20,667,000,030:0:0:0:0: 128 | 832:2D:16.438180243596435:5000:0.0:0:20,667,000,030:0:0:0:0: 129 | 896:2D:19.450880957767367:5000:0.0:0:23,977,080,030:0:0:0:0: 130 | 896:2D:19.388597080484033:5000:0.0:0:23,977,080,030:0:0:0:0: 131 | 896:2D:19.422455803491175:5000:0.0:0:23,977,080,030:0:0:0:0: 132 | 96:2D:0.19809488952159882:5000:0.0:0:265,080,030:0:0:0:0: 133 | 96:2D:0.20063416101038456:5000:0.0:0:265,080,030:0:0:0:0: 134 | 96:2D:0.20022231433540583:5000:0.0:0:265,080,030:0:0:0:0: 135 | 960:2D:23.05303905531764:5000:0.0:0:27,532,920,030:0:0:0:0: 136 | 960:2D:23.035011262632906:5000:0.0:0:27,532,920,030:0:0:0:0: 137 | 960:2D:23.043035962618887:5000:0.0:0:27,532,920,030:0:0:0:0: 138 | -------------------------------------------------------------------------------- /Python/results/outfile_cip1e6_3010_openblas_1_numba_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Fri 30 Oct 2020 06:54:47 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | numpy version: 1.19.3 5 | blas_mkl_info: 6 | NOT AVAILABLE 7 | blis_info: 8 | NOT AVAILABLE 9 | openblas_info: 10 | libraries = ['openblas', 'openblas'] 11 | library_dirs = ['/usr/local/lib'] 12 | language = c 13 | define_macros = [('HAVE_CBLAS', None)] 14 | blas_opt_info: 15 | libraries = ['openblas', 'openblas'] 16 | library_dirs = ['/usr/local/lib'] 17 | language = c 18 | define_macros = [('HAVE_CBLAS', None)] 19 | lapack_mkl_info: 20 | NOT AVAILABLE 21 | openblas_lapack_info: 22 | libraries = ['openblas', 'openblas'] 23 | library_dirs = ['/usr/local/lib'] 24 | language = c 25 | define_macros = [('HAVE_CBLAS', None)] 26 | lapack_opt_info: 27 | libraries = ['openblas', 'openblas'] 28 | library_dirs = ['/usr/local/lib'] 29 | language = c 30 | define_macros = [('HAVE_CBLAS', None)] 31 | ############ END INFOS 32 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 33 | 1024:2D:25.949164061807096:5000:0.0:0:31,334,520,030:0:0:0:0: 34 | 1024:2D:25.73419304471463:5000:0.0:0:31,334,520,030:0:0:0:0: 35 | 1024:2D:25.849803181365132:5000:0.0:0:31,334,520,030:0:0:0:0: 36 | 1088:2D:30.946544421836734:5000:0.0:0:35,381,880,030:0:0:0:0: 37 | 1088:2D:30.94564885739237:5000:0.0:0:35,381,880,030:0:0:0:0: 38 | 1088:2D:31.057795187458396:5000:0.0:0:35,381,880,030:0:0:0:0: 39 | 112:2D:0.28017475362867117:5000:0.0:0:363,000,030:0:0:0:0: 40 | 112:2D:0.2789255799725652:5000:0.0:0:363,000,030:0:0:0:0: 41 | 112:2D:0.2804556516930461:5000:0.0:0:363,000,030:0:0:0:0: 42 | 1152:2D:35.12991729192436:5000:0.0:0:39,675,000,030:0:0:0:0: 43 | 1152:2D:35.271447028033435:5000:0.0:0:39,675,000,030:0:0:0:0: 44 | 1152:2D:35.18519844301045:5000:0.0:0:39,675,000,030:0:0:0:0: 45 | 1216:2D:40.32368126884103:5000:0.0:0:44,213,880,030:0:0:0:0: 46 | 1216:2D:40.02505576983094:5000:0.0:0:44,213,880,030:0:0:0:0: 47 | 1216:2D:40.08251027204096:5000:0.0:0:44,213,880,030:0:0:0:0: 48 | 128:2D:0.3689504563808441:5000:0.0:0:476,280,030:0:0:0:0: 49 | 128:2D:0.3641216456890106:5000:0.0:0:476,280,030:0:0:0:0: 50 | 128:2D:0.36573146656155586:5000:0.0:0:476,280,030:0:0:0:0: 51 | 1280:2D:44.38233935274184:5000:0.0:0:48,998,520,030:0:0:0:0: 52 | 1280:2D:44.64331135712564:5000:0.0:0:48,998,520,030:0:0:0:0: 53 | 1280:2D:44.49628258403391:5000:0.0:0:48,998,520,030:0:0:0:0: 54 | 144:2D:0.4553103093057871:5000:0.0:0:604,920,030:0:0:0:0: 55 | 144:2D:0.46158995386213064:5000:0.0:0:604,920,030:0:0:0:0: 56 | 144:2D:0.4631213881075382:5000:0.0:0:604,920,030:0:0:0:0: 57 | 16:2D:0.012285195291042328:5000:0.0:0:5,880,030:0:0:0:0: 58 | 16:2D:0.014668242074549198:5000:0.0:0:5,880,030:0:0:0:0: 59 | 16:2D:0.014756091870367527:5000:0.0:0:5,880,030:0:0:0:0: 60 | 160:2D:0.5794518971815705:5000:0.0:0:748,920,030:0:0:0:0: 61 | 160:2D:0.5878423638641834:5000:0.0:0:748,920,030:0:0:0:0: 62 | 160:2D:0.5775797236710787:5000:0.0:0:748,920,030:0:0:0:0: 63 | 176:2D:0.7061210200190544:5000:0.0:0:908,280,030:0:0:0:0: 64 | 176:2D:0.698485110886395:5000:0.0:0:908,280,030:0:0:0:0: 65 | 176:2D:0.705122753046453:5000:0.0:0:908,280,030:0:0:0:0: 66 | 192:2D:0.8300388036295772:5000:0.0:0:1,083,000,030:0:0:0:0: 67 | 192:2D:0.8362217461690307:5000:0.0:0:1,083,000,030:0:0:0:0: 68 | 192:2D:0.8768561836332083:5000:0.0:0:1,083,000,030:0:0:0:0: 69 | 208:2D:0.9796262178570032:5000:0.0:0:1,273,080,030:0:0:0:0: 70 | 208:2D:0.9841585075482726:5000:0.0:0:1,273,080,030:0:0:0:0: 71 | 208:2D:0.9822040442377329:5000:0.0:0:1,273,080,030:0:0:0:0: 72 | 224:2D:1.1336676133796573:5000:0.0:0:1,478,520,030:0:0:0:0: 73 | 224:2D:1.1190910255536437:5000:0.0:0:1,478,520,030:0:0:0:0: 74 | 224:2D:1.1267205579206347:5000:0.0:0:1,478,520,030:0:0:0:0: 75 | 240:2D:1.302328492514789:5000:0.0:0:1,699,320,030:0:0:0:0: 76 | 240:2D:1.2992156548425555:5000:0.0:0:1,699,320,030:0:0:0:0: 77 | 240:2D:1.3084705611690879:5000:0.0:0:1,699,320,030:0:0:0:0: 78 | 256:2D:1.4450151827186346:5000:0.0:0:1,935,480,030:0:0:0:0: 79 | 256:2D:1.4494243171066046:5000:0.0:0:1,935,480,030:0:0:0:0: 80 | 256:2D:1.4568563401699066:5000:0.0:0:1,935,480,030:0:0:0:0: 81 | 272:2D:1.6744065806269646:5000:0.0:0:2,187,000,030:0:0:0:0: 82 | 272:2D:1.6994495019316673:5000:0.0:0:2,187,000,030:0:0:0:0: 83 | 272:2D:1.6783166276291013:5000:0.0:0:2,187,000,030:0:0:0:0: 84 | 288:2D:1.8673982163891196:5000:0.0:0:2,453,880,030:0:0:0:0: 85 | 288:2D:1.8469950994476676:5000:0.0:0:2,453,880,030:0:0:0:0: 86 | 288:2D:1.8498567258939147:5000:0.0:0:2,453,880,030:0:0:0:0: 87 | 304:2D:2.138789201155305:5000:0.0:0:2,736,120,030:0:0:0:0: 88 | 304:2D:2.1072356775403023:5000:0.0:0:2,736,120,030:0:0:0:0: 89 | 304:2D:2.1174045158550143:5000:0.0:0:2,736,120,030:0:0:0:0: 90 | 32:2D:0.02759405504912138:5000:0.0:0:27,000,030:0:0:0:0: 91 | 32:2D:0.02994136232882738:5000:0.0:0:27,000,030:0:0:0:0: 92 | 32:2D:0.029498289339244366:5000:0.0:0:27,000,030:0:0:0:0: 93 | 320:2D:2.2806929163634777:5000:0.0:0:3,033,720,030:0:0:0:0: 94 | 320:2D:2.2924937335774302:5000:0.0:0:3,033,720,030:0:0:0:0: 95 | 320:2D:2.280422465875745:5000:0.0:0:3,033,720,030:0:0:0:0: 96 | 384:2D:3.275134852156043:5000:0.0:0:4,377,720,030:0:0:0:0: 97 | 384:2D:3.2794157788157463:5000:0.0:0:4,377,720,030:0:0:0:0: 98 | 384:2D:3.260727838613093:5000:0.0:0:4,377,720,030:0:0:0:0: 99 | 448:2D:4.45730035007:5000:0.0:0:5,967,480,030:0:0:0:0: 100 | 448:2D:4.467097621411085:5000:0.0:0:5,967,480,030:0:0:0:0: 101 | 448:2D:4.4602263467386365:5000:0.0:0:5,967,480,030:0:0:0:0: 102 | 48:2D:0.05659742746502161:5000:0.0:0:63,480,030:0:0:0:0: 103 | 48:2D:0.05496519897133112:5000:0.0:0:63,480,030:0:0:0:0: 104 | 48:2D:0.054156954400241375:5000:0.0:0:63,480,030:0:0:0:0: 105 | 512:2D:5.775571311824024:5000:0.0:0:7,803,000,030:0:0:0:0: 106 | 512:2D:5.800301362760365:5000:0.0:0:7,803,000,030:0:0:0:0: 107 | 512:2D:5.78529570158571:5000:0.0:0:7,803,000,030:0:0:0:0: 108 | 576:2D:7.395738513208926:5000:0.0:0:9,884,280,030:0:0:0:0: 109 | 576:2D:7.3865242740139365:5000:0.0:0:9,884,280,030:0:0:0:0: 110 | 576:2D:7.391360701993108:5000:0.0:0:9,884,280,030:0:0:0:0: 111 | 64:2D:0.09312237240374088:5000:0.0:0:115,320,030:0:0:0:0: 112 | 64:2D:0.093773796223104:5000:0.0:0:115,320,030:0:0:0:0: 113 | 64:2D:0.09363177046179771:5000:0.0:0:115,320,030:0:0:0:0: 114 | 640:2D:9.178438590839505:5000:0.0:0:12,211,320,030:0:0:0:0: 115 | 640:2D:9.175279817543924:5000:0.0:0:12,211,320,030:0:0:0:0: 116 | 640:2D:9.250253266654909:5000:0.0:0:12,211,320,030:0:0:0:0: 117 | 704:2D:11.246690288186073:5000:0.0:0:14,784,120,030:0:0:0:0: 118 | 704:2D:11.25232170149684:5000:0.0:0:14,784,120,030:0:0:0:0: 119 | 704:2D:11.322884797118604:5000:0.0:0:14,784,120,030:0:0:0:0: 120 | 768:2D:13.530557720921934:5000:0.0:0:17,602,680,030:0:0:0:0: 121 | 768:2D:13.608560875058174:5000:0.0:0:17,602,680,030:0:0:0:0: 122 | 768:2D:13.51255946047604:5000:0.0:0:17,602,680,030:0:0:0:0: 123 | 80:2D:0.14676533732563257:5000:0.0:0:182,520,030:0:0:0:0: 124 | 80:2D:0.14185425639152527:5000:0.0:0:182,520,030:0:0:0:0: 125 | 80:2D:0.15576701797544956:5000:0.0:0:182,520,030:0:0:0:0: 126 | 832:2D:16.37858504988253:5000:0.0:0:20,667,000,030:0:0:0:0: 127 | 832:2D:16.33272674307227:5000:0.0:0:20,667,000,030:0:0:0:0: 128 | 832:2D:16.378873604349792:5000:0.0:0:20,667,000,030:0:0:0:0: 129 | 896:2D:19.356269624084234:5000:0.0:0:23,977,080,030:0:0:0:0: 130 | 896:2D:19.369749411009252:5000:0.0:0:23,977,080,030:0:0:0:0: 131 | 896:2D:19.44980021007359:5000:0.0:0:23,977,080,030:0:0:0:0: 132 | 96:2D:0.20346930250525475:5000:0.0:0:265,080,030:0:0:0:0: 133 | 96:2D:0.2002642396837473:5000:0.0:0:265,080,030:0:0:0:0: 134 | 96:2D:0.20190101489424706:5000:0.0:0:265,080,030:0:0:0:0: 135 | 960:2D:23.101354079321027:5000:0.0:0:27,532,920,030:0:0:0:0: 136 | 960:2D:22.869007047265768:5000:0.0:0:27,532,920,030:0:0:0:0: 137 | 960:2D:23.014365564100444:5000:0.0:0:27,532,920,030:0:0:0:0: 138 | -------------------------------------------------------------------------------- /Python/results/outfile_cip1e6_3010_openblas_8_nonumba_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Fri 30 Oct 2020 06:54:48 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | numpy version: 1.19.3 5 | blas_mkl_info: 6 | NOT AVAILABLE 7 | blis_info: 8 | NOT AVAILABLE 9 | openblas_info: 10 | libraries = ['openblas', 'openblas'] 11 | library_dirs = ['/usr/local/lib'] 12 | language = c 13 | define_macros = [('HAVE_CBLAS', None)] 14 | blas_opt_info: 15 | libraries = ['openblas', 'openblas'] 16 | library_dirs = ['/usr/local/lib'] 17 | language = c 18 | define_macros = [('HAVE_CBLAS', None)] 19 | lapack_mkl_info: 20 | NOT AVAILABLE 21 | openblas_lapack_info: 22 | libraries = ['openblas', 'openblas'] 23 | library_dirs = ['/usr/local/lib'] 24 | language = c 25 | define_macros = [('HAVE_CBLAS', None)] 26 | lapack_opt_info: 27 | libraries = ['openblas', 'openblas'] 28 | library_dirs = ['/usr/local/lib'] 29 | language = c 30 | define_macros = [('HAVE_CBLAS', None)] 31 | ############ END INFOS 32 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 33 | 1024:2D:25.990376983769238:5000:0.0:0:31,334,520,030:0:0:0:0: 34 | 1024:2D:25.88049151096493:5000:0.0:0:31,334,520,030:0:0:0:0: 35 | 1024:2D:25.884231513366103:5000:0.0:0:31,334,520,030:0:0:0:0: 36 | 1088:2D:30.980417019687593:5000:0.0:0:35,381,880,030:0:0:0:0: 37 | 1088:2D:30.894154717214406:5000:0.0:0:35,381,880,030:0:0:0:0: 38 | 1088:2D:31.40632290765643:5000:0.0:0:35,381,880,030:0:0:0:0: 39 | 112:2D:0.28040587063878775:5000:0.0:0:363,000,030:0:0:0:0: 40 | 112:2D:0.2801833273842931:5000:0.0:0:363,000,030:0:0:0:0: 41 | 112:2D:0.2816861253231764:5000:0.0:0:363,000,030:0:0:0:0: 42 | 1152:2D:35.165945584885776:5000:0.0:0:39,675,000,030:0:0:0:0: 43 | 1152:2D:35.287998910062015:5000:0.0:0:39,675,000,030:0:0:0:0: 44 | 1152:2D:35.18856031540781:5000:0.0:0:39,675,000,030:0:0:0:0: 45 | 1216:2D:40.084430295974016:5000:0.0:0:44,213,880,030:0:0:0:0: 46 | 1216:2D:40.01735743135214:5000:0.0:0:44,213,880,030:0:0:0:0: 47 | 1216:2D:40.06659886520356:5000:0.0:0:44,213,880,030:0:0:0:0: 48 | 128:2D:0.36161704268306494:5000:0.0:0:476,280,030:0:0:0:0: 49 | 128:2D:0.3641798524186015:5000:0.0:0:476,280,030:0:0:0:0: 50 | 128:2D:0.36848006024956703:5000:0.0:0:476,280,030:0:0:0:0: 51 | 1280:2D:44.41577055770904:5000:0.0:0:48,998,520,030:0:0:0:0: 52 | 1280:2D:44.4389075441286:5000:0.0:0:48,998,520,030:0:0:0:0: 53 | 1280:2D:44.35948596615344:5000:0.0:0:48,998,520,030:0:0:0:0: 54 | 144:2D:0.46255667321383953:5000:0.0:0:604,920,030:0:0:0:0: 55 | 144:2D:0.46140685956925154:5000:0.0:0:604,920,030:0:0:0:0: 56 | 144:2D:0.46279801707714796:5000:0.0:0:604,920,030:0:0:0:0: 57 | 16:2D:0.010800760239362717:5000:0.0:0:5,880,030:0:0:0:0: 58 | 16:2D:0.012984641827642918:5000:0.0:0:5,880,030:0:0:0:0: 59 | 16:2D:0.011672683991491795:5000:0.0:0:5,880,030:0:0:0:0: 60 | 160:2D:0.5811404651030898:5000:0.0:0:748,920,030:0:0:0:0: 61 | 160:2D:0.5768856918439269:5000:0.0:0:748,920,030:0:0:0:0: 62 | 160:2D:0.5771815199404955:5000:0.0:0:748,920,030:0:0:0:0: 63 | 176:2D:0.7039821613579988:5000:0.0:0:908,280,030:0:0:0:0: 64 | 176:2D:0.693852080963552:5000:0.0:0:908,280,030:0:0:0:0: 65 | 176:2D:0.7009740537032485:5000:0.0:0:908,280,030:0:0:0:0: 66 | 192:2D:0.8422265406697989:5000:0.0:0:1,083,000,030:0:0:0:0: 67 | 192:2D:0.8362379316240549:5000:0.0:0:1,083,000,030:0:0:0:0: 68 | 192:2D:0.8335206676274538:5000:0.0:0:1,083,000,030:0:0:0:0: 69 | 208:2D:0.9774075001478195:5000:0.0:0:1,273,080,030:0:0:0:0: 70 | 208:2D:0.9764702832326293:5000:0.0:0:1,273,080,030:0:0:0:0: 71 | 208:2D:0.9922306900843978:5000:0.0:0:1,273,080,030:0:0:0:0: 72 | 224:2D:1.1393972495570779:5000:0.0:0:1,478,520,030:0:0:0:0: 73 | 224:2D:1.1257903268560767:5000:0.0:0:1,478,520,030:0:0:0:0: 74 | 224:2D:1.129525057040155:5000:0.0:0:1,478,520,030:0:0:0:0: 75 | 240:2D:1.2941890396177769:5000:0.0:0:1,699,320,030:0:0:0:0: 76 | 240:2D:1.2972707208245993:5000:0.0:0:1,699,320,030:0:0:0:0: 77 | 240:2D:1.3006160901859403:5000:0.0:0:1,699,320,030:0:0:0:0: 78 | 256:2D:1.4467438627034426:5000:0.0:0:1,935,480,030:0:0:0:0: 79 | 256:2D:1.4527342459186912:5000:0.0:0:1,935,480,030:0:0:0:0: 80 | 256:2D:1.4647307004779577:5000:0.0:0:1,935,480,030:0:0:0:0: 81 | 272:2D:1.6616202248260379:5000:0.0:0:2,187,000,030:0:0:0:0: 82 | 272:2D:1.66456853505224:5000:0.0:0:2,187,000,030:0:0:0:0: 83 | 272:2D:1.6569809075444937:5000:0.0:0:2,187,000,030:0:0:0:0: 84 | 288:2D:1.844514176249504:5000:0.0:0:2,453,880,030:0:0:0:0: 85 | 288:2D:1.8455063859000802:5000:0.0:0:2,453,880,030:0:0:0:0: 86 | 288:2D:1.8661194052547216:5000:0.0:0:2,453,880,030:0:0:0:0: 87 | 304:2D:2.057537923566997:5000:0.0:0:2,736,120,030:0:0:0:0: 88 | 304:2D:2.0791408475488424:5000:0.0:0:2,736,120,030:0:0:0:0: 89 | 304:2D:2.063083954155445:5000:0.0:0:2,736,120,030:0:0:0:0: 90 | 32:2D:0.03010358102619648:5000:0.0:0:27,000,030:0:0:0:0: 91 | 32:2D:0.02739813458174467:5000:0.0:0:27,000,030:0:0:0:0: 92 | 32:2D:0.02769650984555483:5000:0.0:0:27,000,030:0:0:0:0: 93 | 320:2D:2.273933525197208:5000:0.0:0:3,033,720,030:0:0:0:0: 94 | 320:2D:2.268358364701271:5000:0.0:0:3,033,720,030:0:0:0:0: 95 | 320:2D:2.2925059562548995:5000:0.0:0:3,033,720,030:0:0:0:0: 96 | 384:2D:3.274610214866698:5000:0.0:0:4,377,720,030:0:0:0:0: 97 | 384:2D:3.2616136306896806:5000:0.0:0:4,377,720,030:0:0:0:0: 98 | 384:2D:3.289866767823696:5000:0.0:0:4,377,720,030:0:0:0:0: 99 | 448:2D:4.46269295271486:5000:0.0:0:5,967,480,030:0:0:0:0: 100 | 448:2D:4.470501432195306:5000:0.0:0:5,967,480,030:0:0:0:0: 101 | 448:2D:4.454831789247692:5000:0.0:0:5,967,480,030:0:0:0:0: 102 | 48:2D:0.051958877593278885:5000:0.0:0:63,480,030:0:0:0:0: 103 | 48:2D:0.052964831702411175:5000:0.0:0:63,480,030:0:0:0:0: 104 | 48:2D:0.0520805437117815:5000:0.0:0:63,480,030:0:0:0:0: 105 | 512:2D:5.778484258800745:5000:0.0:0:7,803,000,030:0:0:0:0: 106 | 512:2D:5.77358634583652:5000:0.0:0:7,803,000,030:0:0:0:0: 107 | 512:2D:5.7967279851436615:5000:0.0:0:7,803,000,030:0:0:0:0: 108 | 576:2D:7.392704793252051:5000:0.0:0:9,884,280,030:0:0:0:0: 109 | 576:2D:7.4041057750582695:5000:0.0:0:9,884,280,030:0:0:0:0: 110 | 576:2D:7.398675392381847:5000:0.0:0:9,884,280,030:0:0:0:0: 111 | 64:2D:0.09544887952506542:5000:0.0:0:115,320,030:0:0:0:0: 112 | 64:2D:0.09682985115796328:5000:0.0:0:115,320,030:0:0:0:0: 113 | 64:2D:0.0936248330399394:5000:0.0:0:115,320,030:0:0:0:0: 114 | 640:2D:9.201745353639126:5000:0.0:0:12,211,320,030:0:0:0:0: 115 | 640:2D:9.221219033002853:5000:0.0:0:12,211,320,030:0:0:0:0: 116 | 640:2D:9.261528371833265:5000:0.0:0:12,211,320,030:0:0:0:0: 117 | 704:2D:11.298940861597657:5000:0.0:0:14,784,120,030:0:0:0:0: 118 | 704:2D:11.304005291312933:5000:0.0:0:14,784,120,030:0:0:0:0: 119 | 704:2D:11.32462505530566:5000:0.0:0:14,784,120,030:0:0:0:0: 120 | 768:2D:13.52604515478015:5000:0.0:0:17,602,680,030:0:0:0:0: 121 | 768:2D:13.603359125554562:5000:0.0:0:17,602,680,030:0:0:0:0: 122 | 768:2D:13.602015710435808:5000:0.0:0:17,602,680,030:0:0:0:0: 123 | 80:2D:0.144421492703259:5000:0.0:0:182,520,030:0:0:0:0: 124 | 80:2D:0.14133281912654638:5000:0.0:0:182,520,030:0:0:0:0: 125 | 80:2D:0.18167520128190517:5000:0.0:0:182,520,030:0:0:0:0: 126 | 832:2D:16.27998843882233:5000:0.0:0:20,667,000,030:0:0:0:0: 127 | 832:2D:16.26023083087057:5000:0.0:0:20,667,000,030:0:0:0:0: 128 | 832:2D:16.440659024752676:5000:0.0:0:20,667,000,030:0:0:0:0: 129 | 896:2D:19.518818614073098:5000:0.0:0:23,977,080,030:0:0:0:0: 130 | 896:2D:19.403984899632633:5000:0.0:0:23,977,080,030:0:0:0:0: 131 | 896:2D:19.301187720149755:5000:0.0:0:23,977,080,030:0:0:0:0: 132 | 96:2D:0.20232235547155142:5000:0.0:0:265,080,030:0:0:0:0: 133 | 96:2D:0.2001691460609436:5000:0.0:0:265,080,030:0:0:0:0: 134 | 96:2D:0.2223272491246462:5000:0.0:0:265,080,030:0:0:0:0: 135 | 960:2D:23.104529906064272:5000:0.0:0:27,532,920,030:0:0:0:0: 136 | 960:2D:22.9529911801219:5000:0.0:0:27,532,920,030:0:0:0:0: 137 | 960:2D:23.057464035227895:5000:0.0:0:27,532,920,030:0:0:0:0: 138 | -------------------------------------------------------------------------------- /Python/results/outfile_cip1e6_3110_openblas_1_numba_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Sat 31 Oct 2020 07:25:56 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | numpy version: 1.19.3 5 | blas_mkl_info: 6 | NOT AVAILABLE 7 | blis_info: 8 | NOT AVAILABLE 9 | openblas_info: 10 | libraries = ['openblas', 'openblas'] 11 | library_dirs = ['/usr/local/lib'] 12 | language = c 13 | define_macros = [('HAVE_CBLAS', None)] 14 | blas_opt_info: 15 | libraries = ['openblas', 'openblas'] 16 | library_dirs = ['/usr/local/lib'] 17 | language = c 18 | define_macros = [('HAVE_CBLAS', None)] 19 | lapack_mkl_info: 20 | NOT AVAILABLE 21 | openblas_lapack_info: 22 | libraries = ['openblas', 'openblas'] 23 | library_dirs = ['/usr/local/lib'] 24 | language = c 25 | define_macros = [('HAVE_CBLAS', None)] 26 | lapack_opt_info: 27 | libraries = ['openblas', 'openblas'] 28 | library_dirs = ['/usr/local/lib'] 29 | language = c 30 | define_macros = [('HAVE_CBLAS', None)] 31 | ############ END INFOS 32 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 33 | 1024:2D:25.916580071672797:5000:0.0:0:31,334,520,030:0:0:0:0: 34 | 1024:2D:25.800488401204348:5000:0.0:0:31,334,520,030:0:0:0:0: 35 | 1024:2D:26.339626295492053:5000:0.0:0:31,334,520,030:0:0:0:0: 36 | 1088:2D:31.011876970529556:5000:0.0:0:35,381,880,030:0:0:0:0: 37 | 1088:2D:31.068309034220874:5000:0.0:0:35,381,880,030:0:0:0:0: 38 | 1088:2D:30.97180982492864:5000:0.0:0:35,381,880,030:0:0:0:0: 39 | 112:2D:0.2763606980443001:5000:0.0:0:363,000,030:0:0:0:0: 40 | 112:2D:0.2779390290379524:5000:0.0:0:363,000,030:0:0:0:0: 41 | 112:2D:0.27598789893090725:5000:0.0:0:363,000,030:0:0:0:0: 42 | 1152:2D:35.26515268813819:5000:0.0:0:39,675,000,030:0:0:0:0: 43 | 1152:2D:35.354687524959445:5000:0.0:0:39,675,000,030:0:0:0:0: 44 | 1152:2D:35.26118703186512:5000:0.0:0:39,675,000,030:0:0:0:0: 45 | 1216:2D:40.00194463785738:5000:0.0:0:44,213,880,030:0:0:0:0: 46 | 1216:2D:40.11727172508836:5000:0.0:0:44,213,880,030:0:0:0:0: 47 | 1216:2D:40.30685833469033:5000:0.0:0:44,213,880,030:0:0:0:0: 48 | 128:2D:0.361171243712306:5000:0.0:0:476,280,030:0:0:0:0: 49 | 128:2D:0.36739528458565474:5000:0.0:0:476,280,030:0:0:0:0: 50 | 128:2D:0.3583089131861925:5000:0.0:0:476,280,030:0:0:0:0: 51 | 1280:2D:44.363957225345075:5000:0.0:0:48,998,520,030:0:0:0:0: 52 | 1280:2D:44.58323056064546:5000:0.0:0:48,998,520,030:0:0:0:0: 53 | 1280:2D:44.29927832540125:5000:0.0:0:48,998,520,030:0:0:0:0: 54 | 144:2D:0.4669204233214259:5000:0.0:0:604,920,030:0:0:0:0: 55 | 144:2D:0.4622843023389578:5000:0.0:0:604,920,030:0:0:0:0: 56 | 144:2D:0.45954594668000937:5000:0.0:0:604,920,030:0:0:0:0: 57 | 16:2D:0.013604938983917236:5000:0.0:0:5,880,030:0:0:0:0: 58 | 16:2D:0.013343727216124535:5000:0.0:0:5,880,030:0:0:0:0: 59 | 16:2D:0.013739155605435371:5000:0.0:0:5,880,030:0:0:0:0: 60 | 160:2D:0.5791207198053598:5000:0.0:0:748,920,030:0:0:0:0: 61 | 160:2D:0.587938416749239:5000:0.0:0:748,920,030:0:0:0:0: 62 | 160:2D:0.5798793658614159:5000:0.0:0:748,920,030:0:0:0:0: 63 | 176:2D:0.6979006668552756:5000:0.0:0:908,280,030:0:0:0:0: 64 | 176:2D:0.6973444717004895:5000:0.0:0:908,280,030:0:0:0:0: 65 | 176:2D:0.7006888510659337:5000:0.0:0:908,280,030:0:0:0:0: 66 | 192:2D:0.8349113315343857:5000:0.0:0:1,083,000,030:0:0:0:0: 67 | 192:2D:0.8265512259677052:5000:0.0:0:1,083,000,030:0:0:0:0: 68 | 192:2D:0.8301759734749794:5000:0.0:0:1,083,000,030:0:0:0:0: 69 | 208:2D:0.9746050862595439:5000:0.0:0:1,273,080,030:0:0:0:0: 70 | 208:2D:1.0018917517736554:5000:0.0:0:1,273,080,030:0:0:0:0: 71 | 208:2D:1.014046342112124:5000:0.0:0:1,273,080,030:0:0:0:0: 72 | 224:2D:1.1362475557252765:5000:0.0:0:1,478,520,030:0:0:0:0: 73 | 224:2D:1.1249706326052547:5000:0.0:0:1,478,520,030:0:0:0:0: 74 | 224:2D:1.1317811710759997:5000:0.0:0:1,478,520,030:0:0:0:0: 75 | 240:2D:1.3035553293302655:5000:0.0:0:1,699,320,030:0:0:0:0: 76 | 240:2D:1.3064141413196921:5000:0.0:0:1,699,320,030:0:0:0:0: 77 | 240:2D:1.291368279606104:5000:0.0:0:1,699,320,030:0:0:0:0: 78 | 256:2D:1.4656198639422655:5000:0.0:0:1,935,480,030:0:0:0:0: 79 | 256:2D:1.4578202357515693:5000:0.0:0:1,935,480,030:0:0:0:0: 80 | 256:2D:1.4620645502582192:5000:0.0:0:1,935,480,030:0:0:0:0: 81 | 272:2D:1.6590912947431207:5000:0.0:0:2,187,000,030:0:0:0:0: 82 | 272:2D:1.6622085403651:5000:0.0:0:2,187,000,030:0:0:0:0: 83 | 272:2D:1.673003830946982:5000:0.0:0:2,187,000,030:0:0:0:0: 84 | 288:2D:1.844526855275035:5000:0.0:0:2,453,880,030:0:0:0:0: 85 | 288:2D:1.8627283489331603:5000:0.0:0:2,453,880,030:0:0:0:0: 86 | 288:2D:1.860102922655642:5000:0.0:0:2,453,880,030:0:0:0:0: 87 | 304:2D:2.0707833636552095:5000:0.0:0:2,736,120,030:0:0:0:0: 88 | 304:2D:2.074995392933488:5000:0.0:0:2,736,120,030:0:0:0:0: 89 | 304:2D:2.0720826825127006:5000:0.0:0:2,736,120,030:0:0:0:0: 90 | 32:2D:0.02633024286478758:5000:0.0:0:27,000,030:0:0:0:0: 91 | 32:2D:0.029933192767202854:5000:0.0:0:27,000,030:0:0:0:0: 92 | 32:2D:0.029051032848656178:5000:0.0:0:27,000,030:0:0:0:0: 93 | 320:2D:2.2703040651977062:5000:0.0:0:3,033,720,030:0:0:0:0: 94 | 320:2D:2.279391803778708:5000:0.0:0:3,033,720,030:0:0:0:0: 95 | 320:2D:2.277445623651147:5000:0.0:0:3,033,720,030:0:0:0:0: 96 | 384:2D:3.2808080473914742:5000:0.0:0:4,377,720,030:0:0:0:0: 97 | 384:2D:3.277492160908878:5000:0.0:0:4,377,720,030:0:0:0:0: 98 | 384:2D:3.2573955934494734:5000:0.0:0:4,377,720,030:0:0:0:0: 99 | 448:2D:4.451490600593388:5000:0.0:0:5,967,480,030:0:0:0:0: 100 | 448:2D:4.486062265001237:5000:0.0:0:5,967,480,030:0:0:0:0: 101 | 448:2D:4.4656498013064265:5000:0.0:0:5,967,480,030:0:0:0:0: 102 | 48:2D:0.059486846439540386:5000:0.0:0:63,480,030:0:0:0:0: 103 | 48:2D:0.05190186947584152:5000:0.0:0:63,480,030:0:0:0:0: 104 | 48:2D:0.05222857277840376:5000:0.0:0:63,480,030:0:0:0:0: 105 | 512:2D:5.7951826909556985:5000:0.0:0:7,803,000,030:0:0:0:0: 106 | 512:2D:5.788912602700293:5000:0.0:0:7,803,000,030:0:0:0:0: 107 | 512:2D:5.7728263065218925:5000:0.0:0:7,803,000,030:0:0:0:0: 108 | 576:2D:7.4144272711128:5000:0.0:0:9,884,280,030:0:0:0:0: 109 | 576:2D:7.405420565977693:5000:0.0:0:9,884,280,030:0:0:0:0: 110 | 576:2D:7.415914220735431:5000:0.0:0:9,884,280,030:0:0:0:0: 111 | 64:2D:0.09315485879778862:5000:0.0:0:115,320,030:0:0:0:0: 112 | 64:2D:0.0935626607388258:5000:0.0:0:115,320,030:0:0:0:0: 113 | 64:2D:0.09017869736999273:5000:0.0:0:115,320,030:0:0:0:0: 114 | 640:2D:9.162303588353097:5000:0.0:0:12,211,320,030:0:0:0:0: 115 | 640:2D:9.242444231174886:5000:0.0:0:12,211,320,030:0:0:0:0: 116 | 640:2D:9.388803776353598:5000:0.0:0:12,211,320,030:0:0:0:0: 117 | 704:2D:11.316949223168194:5000:0.0:0:14,784,120,030:0:0:0:0: 118 | 704:2D:11.238609235733747:5000:0.0:0:14,784,120,030:0:0:0:0: 119 | 704:2D:11.256517251953483:5000:0.0:0:14,784,120,030:0:0:0:0: 120 | 768:2D:13.571693879552186:5000:0.0:0:17,602,680,030:0:0:0:0: 121 | 768:2D:13.567195945419371:5000:0.0:0:17,602,680,030:0:0:0:0: 122 | 768:2D:13.56992055196315:5000:0.0:0:17,602,680,030:0:0:0:0: 123 | 80:2D:0.14569399785250425:5000:0.0:0:182,520,030:0:0:0:0: 124 | 80:2D:0.1464177407324314:5000:0.0:0:182,520,030:0:0:0:0: 125 | 80:2D:0.143758081831038:5000:0.0:0:182,520,030:0:0:0:0: 126 | 832:2D:17.67401034757495:5000:0.0:0:20,667,000,030:0:0:0:0: 127 | 832:2D:16.470516408793628:5000:0.0:0:20,667,000,030:0:0:0:0: 128 | 832:2D:16.289420110173523:5000:0.0:0:20,667,000,030:0:0:0:0: 129 | 896:2D:19.441933350637555:5000:0.0:0:23,977,080,030:0:0:0:0: 130 | 896:2D:19.873410986736417:5000:0.0:0:23,977,080,030:0:0:0:0: 131 | 896:2D:19.51648219488561:5000:0.0:0:23,977,080,030:0:0:0:0: 132 | 96:2D:0.20547055453062057:5000:0.0:0:265,080,030:0:0:0:0: 133 | 96:2D:0.20164159778505564:5000:0.0:0:265,080,030:0:0:0:0: 134 | 96:2D:0.20500116236507893:5000:0.0:0:265,080,030:0:0:0:0: 135 | 960:2D:22.940660229884088:5000:0.0:0:27,532,920,030:0:0:0:0: 136 | 960:2D:23.090015655383468:5000:0.0:0:27,532,920,030:0:0:0:0: 137 | 960:2D:23.030980593524873:5000:0.0:0:27,532,920,030:0:0:0:0: 138 | -------------------------------------------------------------------------------- /Python/results/outfile_cip1e6_3110_openblas_8_numba_gsrb: -------------------------------------------------------------------------------- 1 | ############ INFOS 2 | Sat 31 Oct 2020 07:25:56 PM CET 3 | Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz 4 | numpy version: 1.19.3 5 | blas_mkl_info: 6 | NOT AVAILABLE 7 | blis_info: 8 | NOT AVAILABLE 9 | openblas_info: 10 | libraries = ['openblas', 'openblas'] 11 | library_dirs = ['/usr/local/lib'] 12 | language = c 13 | define_macros = [('HAVE_CBLAS', None)] 14 | blas_opt_info: 15 | libraries = ['openblas', 'openblas'] 16 | library_dirs = ['/usr/local/lib'] 17 | language = c 18 | define_macros = [('HAVE_CBLAS', None)] 19 | lapack_mkl_info: 20 | NOT AVAILABLE 21 | openblas_lapack_info: 22 | libraries = ['openblas', 'openblas'] 23 | library_dirs = ['/usr/local/lib'] 24 | language = c 25 | define_macros = [('HAVE_CBLAS', None)] 26 | lapack_opt_info: 27 | libraries = ['openblas', 'openblas'] 28 | library_dirs = ['/usr/local/lib'] 29 | language = c 30 | define_macros = [('HAVE_CBLAS', None)] 31 | ############ END INFOS 32 | size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty 33 | 1024:2D:25.779769513756037:5000:0.0:0:31,334,520,030:0:0:0:0: 34 | 1024:2D:25.788296152837574:5000:0.0:0:31,334,520,030:0:0:0:0: 35 | 1024:2D:25.876603824086487:5000:0.0:0:31,334,520,030:0:0:0:0: 36 | 1088:2D:31.03622013423592:5000:0.0:0:35,381,880,030:0:0:0:0: 37 | 1088:2D:31.15648502483964:5000:0.0:0:35,381,880,030:0:0:0:0: 38 | 1088:2D:30.894304528832436:5000:0.0:0:35,381,880,030:0:0:0:0: 39 | 112:2D:0.27864178735762835:5000:0.0:0:363,000,030:0:0:0:0: 40 | 112:2D:0.2779154246672988:5000:0.0:0:363,000,030:0:0:0:0: 41 | 112:2D:0.28086254373192787:5000:0.0:0:363,000,030:0:0:0:0: 42 | 1152:2D:35.19956117682159:5000:0.0:0:39,675,000,030:0:0:0:0: 43 | 1152:2D:35.233670715242624:5000:0.0:0:39,675,000,030:0:0:0:0: 44 | 1152:2D:36.62239230144769:5000:0.0:0:39,675,000,030:0:0:0:0: 45 | 1216:2D:40.08507126849145:5000:0.0:0:44,213,880,030:0:0:0:0: 46 | 1216:2D:40.05363038927317:5000:0.0:0:44,213,880,030:0:0:0:0: 47 | 1216:2D:40.32748669479042:5000:0.0:0:44,213,880,030:0:0:0:0: 48 | 128:2D:0.4016906740143895:5000:0.0:0:476,280,030:0:0:0:0: 49 | 128:2D:0.3632366331294179:5000:0.0:0:476,280,030:0:0:0:0: 50 | 128:2D:0.3649074761196971:5000:0.0:0:476,280,030:0:0:0:0: 51 | 1280:2D:44.71398639585823:5000:0.0:0:48,998,520,030:0:0:0:0: 52 | 1280:2D:44.323860792443156:5000:0.0:0:48,998,520,030:0:0:0:0: 53 | 1280:2D:44.39863499626517:5000:0.0:0:48,998,520,030:0:0:0:0: 54 | 144:2D:0.46660248655825853:5000:0.0:0:604,920,030:0:0:0:0: 55 | 144:2D:0.4631348978728056:5000:0.0:0:604,920,030:0:0:0:0: 56 | 144:2D:0.463198509067297:5000:0.0:0:604,920,030:0:0:0:0: 57 | 16:2D:0.014478026889264584:5000:0.0:0:5,880,030:0:0:0:0: 58 | 16:2D:0.014469419606029987:5000:0.0:0:5,880,030:0:0:0:0: 59 | 16:2D:0.014733599498867989:5000:0.0:0:5,880,030:0:0:0:0: 60 | 160:2D:0.5762892076745629:5000:0.0:0:748,920,030:0:0:0:0: 61 | 160:2D:0.5745977750048041:5000:0.0:0:748,920,030:0:0:0:0: 62 | 160:2D:0.573895763605833:5000:0.0:0:748,920,030:0:0:0:0: 63 | 176:2D:0.6957722883671522:5000:0.0:0:908,280,030:0:0:0:0: 64 | 176:2D:0.7005610447376966:5000:0.0:0:908,280,030:0:0:0:0: 65 | 176:2D:0.6939067151397467:5000:0.0:0:908,280,030:0:0:0:0: 66 | 192:2D:0.8270253138616681:5000:0.0:0:1,083,000,030:0:0:0:0: 67 | 192:2D:0.8421173915266991:5000:0.0:0:1,083,000,030:0:0:0:0: 68 | 192:2D:0.8234472339972854:5000:0.0:0:1,083,000,030:0:0:0:0: 69 | 208:2D:0.9814729001373053:5000:0.0:0:1,273,080,030:0:0:0:0: 70 | 208:2D:0.97907595615834:5000:0.0:0:1,273,080,030:0:0:0:0: 71 | 208:2D:0.9845217391848564:5000:0.0:0:1,273,080,030:0:0:0:0: 72 | 224:2D:1.1244754260405898:5000:0.0:0:1,478,520,030:0:0:0:0: 73 | 224:2D:1.1331719206646085:5000:0.0:0:1,478,520,030:0:0:0:0: 74 | 224:2D:1.129870911128819:5000:0.0:0:1,478,520,030:0:0:0:0: 75 | 240:2D:1.3037590980529785:5000:0.0:0:1,699,320,030:0:0:0:0: 76 | 240:2D:1.2949766870588064:5000:0.0:0:1,699,320,030:0:0:0:0: 77 | 240:2D:1.3031410342082381:5000:0.0:0:1,699,320,030:0:0:0:0: 78 | 256:2D:1.4531079968437552:5000:0.0:0:1,935,480,030:0:0:0:0: 79 | 256:2D:1.4533574506640434:5000:0.0:0:1,935,480,030:0:0:0:0: 80 | 256:2D:1.4546176241710782:5000:0.0:0:1,935,480,030:0:0:0:0: 81 | 272:2D:1.692449102178216:5000:0.0:0:2,187,000,030:0:0:0:0: 82 | 272:2D:1.6729359347373247:5000:0.0:0:2,187,000,030:0:0:0:0: 83 | 272:2D:1.664042454212904:5000:0.0:0:2,187,000,030:0:0:0:0: 84 | 288:2D:1.8472860446199775:5000:0.0:0:2,453,880,030:0:0:0:0: 85 | 288:2D:1.8471090570092201:5000:0.0:0:2,453,880,030:0:0:0:0: 86 | 288:2D:1.8459211271256208:5000:0.0:0:2,453,880,030:0:0:0:0: 87 | 304:2D:2.073905267752707:5000:0.0:0:2,736,120,030:0:0:0:0: 88 | 304:2D:2.10368113219738:5000:0.0:0:2,736,120,030:0:0:0:0: 89 | 304:2D:2.0764477299526334:5000:0.0:0:2,736,120,030:0:0:0:0: 90 | 32:2D:0.03006592206656933:5000:0.0:0:27,000,030:0:0:0:0: 91 | 32:2D:0.031212538480758667:5000:0.0:0:27,000,030:0:0:0:0: 92 | 32:2D:0.03043258748948574:5000:0.0:0:27,000,030:0:0:0:0: 93 | 320:2D:2.317241075448692:5000:0.0:0:3,033,720,030:0:0:0:0: 94 | 320:2D:2.2795329205691814:5000:0.0:0:3,033,720,030:0:0:0:0: 95 | 320:2D:2.272922900505364:5000:0.0:0:3,033,720,030:0:0:0:0: 96 | 384:2D:3.2814943762496114:5000:0.0:0:4,377,720,030:0:0:0:0: 97 | 384:2D:3.2841247329488397:5000:0.0:0:4,377,720,030:0:0:0:0: 98 | 384:2D:3.290617191232741:5000:0.0:0:4,377,720,030:0:0:0:0: 99 | 448:2D:4.466005094349384:5000:0.0:0:5,967,480,030:0:0:0:0: 100 | 448:2D:4.454224092885852:5000:0.0:0:5,967,480,030:0:0:0:0: 101 | 448:2D:4.454984452575445:5000:0.0:0:5,967,480,030:0:0:0:0: 102 | 48:2D:0.055499603040516376:5000:0.0:0:63,480,030:0:0:0:0: 103 | 48:2D:0.05282107088714838:5000:0.0:0:63,480,030:0:0:0:0: 104 | 48:2D:0.05247209779918194:5000:0.0:0:63,480,030:0:0:0:0: 105 | 512:2D:5.781878042966127:5000:0.0:0:7,803,000,030:0:0:0:0: 106 | 512:2D:5.782811854965985:5000:0.0:0:7,803,000,030:0:0:0:0: 107 | 512:2D:5.78078774176538:5000:0.0:0:7,803,000,030:0:0:0:0: 108 | 576:2D:7.394512556493282:5000:0.0:0:9,884,280,030:0:0:0:0: 109 | 576:2D:7.428199429996312:5000:0.0:0:9,884,280,030:0:0:0:0: 110 | 576:2D:7.372738121077418:5000:0.0:0:9,884,280,030:0:0:0:0: 111 | 64:2D:0.10753439925611019:5000:0.0:0:115,320,030:0:0:0:0: 112 | 64:2D:0.1325625739991665:5000:0.0:0:115,320,030:0:0:0:0: 113 | 64:2D:0.09478065278381109:5000:0.0:0:115,320,030:0:0:0:0: 114 | 640:2D:9.179769102483988:5000:0.0:0:12,211,320,030:0:0:0:0: 115 | 640:2D:9.245853329077363:5000:0.0:0:12,211,320,030:0:0:0:0: 116 | 640:2D:9.19542586337775:5000:0.0:0:12,211,320,030:0:0:0:0: 117 | 704:2D:11.221827185712755:5000:0.0:0:14,784,120,030:0:0:0:0: 118 | 704:2D:11.301228093914688:5000:0.0:0:14,784,120,030:0:0:0:0: 119 | 704:2D:11.307472635991871:5000:0.0:0:14,784,120,030:0:0:0:0: 120 | 768:2D:13.473660895600915:5000:0.0:0:17,602,680,030:0:0:0:0: 121 | 768:2D:13.595316030085087:5000:0.0:0:17,602,680,030:0:0:0:0: 122 | 768:2D:13.534112071618438:5000:0.0:0:17,602,680,030:0:0:0:0: 123 | 80:2D:0.16529652010649443:5000:0.0:0:182,520,030:0:0:0:0: 124 | 80:2D:0.14506397861987352:5000:0.0:0:182,520,030:0:0:0:0: 125 | 80:2D:0.14546671602874994:5000:0.0:0:182,520,030:0:0:0:0: 126 | 832:2D:16.47503675799817:5000:0.0:0:20,667,000,030:0:0:0:0: 127 | 832:2D:16.396178744733334:5000:0.0:0:20,667,000,030:0:0:0:0: 128 | 832:2D:16.408197452314198:5000:0.0:0:20,667,000,030:0:0:0:0: 129 | 896:2D:19.420089550316334:5000:0.0:0:23,977,080,030:0:0:0:0: 130 | 896:2D:19.54837659932673:5000:0.0:0:23,977,080,030:0:0:0:0: 131 | 896:2D:19.451757646165788:5000:0.0:0:23,977,080,030:0:0:0:0: 132 | 96:2D:0.20785998459905386:5000:0.0:0:265,080,030:0:0:0:0: 133 | 96:2D:0.22524954192340374:5000:0.0:0:265,080,030:0:0:0:0: 134 | 96:2D:0.2012389088049531:5000:0.0:0:265,080,030:0:0:0:0: 135 | 960:2D:23.01093055959791:5000:0.0:0:27,532,920,030:0:0:0:0: 136 | 960:2D:23.11443913076073:5000:0.0:0:27,532,920,030:0:0:0:0: 137 | 960:2D:23.35736613534391:5000:0.0:0:27,532,920,030:0:0:0:0: 138 | -------------------------------------------------------------------------------- /Python/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Fist argument is the type of accelerator usally intel mkl or openblas 3 | # second is the base dir for the problems 4 | OUTFILE="results/outfile_$(hostname -s)_$(date +%d%m)_${1}" 5 | 6 | problempath=${2:-'../problems/'} 7 | TYPE=${3:-'multigrid'} 8 | 9 | [ -d "$problempath" ] || exit 1 10 | [ -e "./benchmark_${TYPE}.py" ] || exit 1 11 | 12 | benchmark() { 13 | perf=$1 14 | threads=$2 15 | problem=$3 16 | numba=$4 17 | # Loading the interpreter takes round about 500ms, 18 | # so the delay is acutally delayed by additionally 500ms. 19 | # We want to make sure that perf starts measuring when 20 | # Python benchmark is in wait state. 21 | delay=4000 22 | delayPerf=3900 23 | 24 | export OPENBLAS_NUM_THREADS=$threads 25 | export MKL_NUM_THREADS=$threads 26 | export NUMEXPR_NUM_THREADS=$threads 27 | export VECLIB_MAXIMUM_THREADS=$threads 28 | export OMP_NUM_THREADS=$threads 29 | export NUMBA_NUM_THREADS=$threads 30 | cmd="./benchmark_${TYPE}.py $([ "$TYPE" = "gsrb" ] && echo "-v") -p $problem -d $delay $numba" 31 | if [ "$perf" = true ]; then 32 | cmd="perf stat -M GFLOPS -D $delayPerf $cmd" 33 | fi 34 | 35 | # if it does not work on the first time try it asecond time but then leave 36 | x=$($cmd -t "$(date +%s%N)" 2>&1) || x=$($cmd -t "$(date +%s%N)" 2>&1) || { echo "$x" >>"./error.log" && exit 1; } 37 | 38 | out=$(echo "$x" | head -n 2 | tr '\n' ':' | tr ' ' ':' | awk -F':' '{print $12 ":" $5 ":" $8 ":"}') 39 | 40 | if [ "$perf" = true ]; then 41 | flops=$(echo "$x" | tail -n +3 | grep -i 'fp' | awk '{ print $1}' | tr '\n' ':') 42 | out="$out$flops" 43 | fi 44 | 45 | printf "%s\n" "$out" 46 | 47 | } 48 | 49 | perf=$(../scripts/check_perf.sh) 50 | reps=5 51 | 52 | get_infos() { 53 | ../scripts/getinfos.sh "np" "$perf" 54 | } 55 | 56 | [ -e "${OUTFILE}_1_numba_${TYPE}" ] || get_infos >>"${OUTFILE}_1_numba_${TYPE}" || exit 1 57 | [ -e "${OUTFILE}_8_numba_${TYPE}" ] || get_infos >>"${OUTFILE}_8_numba_${TYPE}" || exit 1 58 | [ -e "${OUTFILE}_1_nonumba_${TYPE}" ] || get_infos >>"${OUTFILE}_1_nonumba_${TYPE}" || exit 1 59 | [ -e "${OUTFILE}_8_nonumba_${TYPE}" ] || get_infos >>"${OUTFILE}_8_nonumba_${TYPE}" || exit 1 60 | 61 | for _ in $(seq $reps); do 62 | for threads in 1 8; do 63 | for numba in "-n" " "; do 64 | for problem in "$problempath/"*.npy; do 65 | dim=$(echo "$problem" | awk -F'_' '{print $2}') 66 | N=$(echo "$problem" | awk -F'_' '{print $3}') 67 | N=${N%%\.npy} 68 | 69 | echo "$problem $numba $threads" 70 | EXTENSION=$([ "$numba" = " " ] && echo "nonumba" || echo "numba") 71 | x=$(benchmark $perf $threads "$problem" "$numba") || continue 72 | printf "%b:%b:%b\n" "$N" "$dim" "$x" >>"${OUTFILE}_${threads}_${EXTENSION}_${TYPE}" 73 | done 74 | done 75 | done 76 | done 77 | 78 | exit 0 79 | -------------------------------------------------------------------------------- /Python/scripts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | 4 | import numpy as np 5 | 6 | import problemgenerator.heatmap as hm 7 | import problemgenerator.femwave as fw 8 | import multipy.tools.operators as op 9 | import multipy.tools.util as util 10 | from multipy.multigrid import poisson_multigrid 11 | from multipy.GaussSeidel import GaussSeidel as gs 12 | from multipy.GaussSeidel import GaussSeidel_RB as gsrb 13 | 14 | 15 | logging.basicConfig(level=logging.INFO) 16 | logging.getLogger('multipy.multigrid').setLevel(level=logging.DEBUG) 17 | np.set_printoptions(precision=4, linewidth=180) 18 | 19 | 20 | @util.timer 21 | def run(N, iter=500): 22 | grid = hm.initMap_2D(N) 23 | A, U, F = op.reshape_grid(grid, hm.heat_sources_2D(N)) 24 | U = gs.gauss_seidel(A, F, U, max_iter=iter) 25 | grid[1:-1, 1:-1] = U.reshape((N - 2, N - 2)) 26 | return grid 27 | 28 | 29 | @util.timer 30 | def solve(N): 31 | grid = hm.initMap_2D(N) 32 | A, U, F = op.reshape_grid(grid, hm.heat_sources_2D(N)) 33 | U = np.linalg.solve(A, F) 34 | grid[1:-1, 1:-1] = U.reshape((N - 2, N - 2)) 35 | return grid 36 | 37 | 38 | @util.timer 39 | def simulate_1D(N, max_iter=500): 40 | U = hm.initMap_1D(N) 41 | F = hm.heat_sources_1D(N) 42 | return gsrb.GS_RB(F, U, h=None, max_iter=max_iter) 43 | 44 | 45 | @util.timer 46 | def simulate_2D(N, max_iter=20000): 47 | U = hm.initMap_2D(N) 48 | F = hm.heat_sources_2D(N) 49 | return gsrb.GS_RB(F, U, h=None, max_iter=max_iter) 50 | 51 | 52 | @util.timer 53 | def simulate_3D(N, max_iter=500): 54 | U = hm.initMap_3D(N) 55 | F = np.zeros((N, N, N)) 56 | return gsrb.GS_RB(F, U, max_iter=max_iter) 57 | 58 | 59 | @util.timer 60 | def simulate_2D_multigrid(N, iter_cycle=5): 61 | U = hm.initMap_2D(N) 62 | F = hm.heat_sources_2D(N) 63 | return poisson_multigrid(F, U, 0, 2, 2, 2, iter_cycle) 64 | 65 | 66 | @util.timer 67 | def simulate_2D_FEM_multigrid(N, iter_cycle=5): 68 | U, F = fw.create_2D(N) 69 | h = 1 / N 70 | return poisson_multigrid(F, U, 0, 2, 2, 1, iter_cycle, h=h) 71 | 72 | 73 | @util.timer 74 | def simulate_3D_multigrid(N, iter_cycle=5): 75 | U = hm.initMap_3D(N) 76 | F = hm.heat_sources_3D(N) 77 | return poisson_multigrid(F, U, 0, 2, 2, 2, iter_cycle) 78 | 79 | 80 | def draw2D(U): 81 | import matplotlib.pyplot as plt 82 | if len(U.shape) == 1: 83 | n = int(np.sqrt(U.shape[0])) 84 | assert n * n == U.shape[0] 85 | plt.imshow(U.reshape((n, n)), cmap='RdBu_r', interpolation='nearest') 86 | else: 87 | plt.imshow(U, cmap='RdBu_r', interpolation='nearest') 88 | plt.show() 89 | 90 | 91 | def draw3D(map): 92 | import matplotlib.pyplot as plt 93 | fig = plt.figure() 94 | ax = fig.add_subplot(111, projection='3d') 95 | 96 | # Plot the surface. 97 | for index, x in np.ndenumerate(map): 98 | if x > 0.5: 99 | ax.scatter(*index, c='black', alpha=max(x - 0.5, 0)) 100 | 101 | fig.show() 102 | 103 | 104 | if __name__ == "__main__": 105 | simulate_2D_FEM_multigrid(128, 50) 106 | -------------------------------------------------------------------------------- /Python/startup.py: -------------------------------------------------------------------------------- 1 | import optparse 2 | import time 3 | 4 | DEFAULT_PROBLEM = '../problems/problem_2D_100.npy' 5 | 6 | 7 | def getopts(): 8 | parser = optparse.OptionParser() 9 | parser.add_option( 10 | '-n', 11 | action='store_true', 12 | dest='numba', 13 | default=False, 14 | help='activates numba') 15 | parser.add_option( 16 | '-v', 17 | action='store_true', 18 | dest='verbose', 19 | default=False, 20 | help='makes it more verbose') 21 | parser.add_option( 22 | '-d', 23 | action='store', 24 | dest='delay', 25 | type=int, 26 | default=500, 27 | help='delays the start of the run by DELAY ms (default:500)') 28 | 29 | parser.add_option('-p', action='store', dest='path', 30 | default=DEFAULT_PROBLEM, 31 | help='path to a problem (npy file) that is loaded') 32 | 33 | parser.add_option( 34 | '-t', action='store', dest='start_time', 35 | type='int', 36 | help='unix time stamp in nanoseconds of the programm call') 37 | 38 | options, _ = parser.parse_args() 39 | if not options.numba: 40 | deactivate_numba_jit() 41 | return options 42 | 43 | 44 | def deactivate_numba_jit(): 45 | import os 46 | os.environ['NUMBA_DISABLE_JIT'] = '1' 47 | 48 | 49 | def wait(options): 50 | rest = options.delay / 1000 - (time.time() - options.start_time / 1e9) 51 | 52 | if 0.1 < rest: 53 | time.sleep(rest) 54 | else: 55 | # if there is no time left over we can not be sure that the measurement 56 | # is not spoiled with startup so exit 57 | raise Exception("Warumup took to long") 58 | -------------------------------------------------------------------------------- /graphs/d_rework_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/d_rework_flops.png -------------------------------------------------------------------------------- /graphs/d_rework_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/d_rework_time.png -------------------------------------------------------------------------------- /graphs/gsrb-avx512_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrb-avx512_flops.png -------------------------------------------------------------------------------- /graphs/gsrb-avx512_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrb-avx512_time.png -------------------------------------------------------------------------------- /graphs/gsrbD_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrbD_flops.png -------------------------------------------------------------------------------- /graphs/gsrbD_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrbD_time.png -------------------------------------------------------------------------------- /graphs/gsrb_FLOPS_subplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrb_FLOPS_subplots.png -------------------------------------------------------------------------------- /graphs/gsrb_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrb_flops.png -------------------------------------------------------------------------------- /graphs/gsrb_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrb_time.png -------------------------------------------------------------------------------- /graphs/gsrb_time_subplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrb_time_subplots.png -------------------------------------------------------------------------------- /graphs/gsrbnonumba_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrbnonumba_flops.png -------------------------------------------------------------------------------- /graphs/gsrbnonumba_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrbnonumba_time.png -------------------------------------------------------------------------------- /graphs/gsrbnumba_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrbnumba_flops.png -------------------------------------------------------------------------------- /graphs/gsrbnumba_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/gsrbnumba_time.png -------------------------------------------------------------------------------- /graphs/heatmap.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/heatmap.gif -------------------------------------------------------------------------------- /graphs/multigridD_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigridD_flops.png -------------------------------------------------------------------------------- /graphs/multigridD_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigridD_time.png -------------------------------------------------------------------------------- /graphs/multigrid_FLOPS_subplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigrid_FLOPS_subplots.png -------------------------------------------------------------------------------- /graphs/multigrid_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigrid_flops.png -------------------------------------------------------------------------------- /graphs/multigrid_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigrid_time.png -------------------------------------------------------------------------------- /graphs/multigrid_time_subplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigrid_time_subplots.png -------------------------------------------------------------------------------- /graphs/multigridnonumba_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigridnonumba_flops.png -------------------------------------------------------------------------------- /graphs/multigridnonumba_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigridnonumba_time.png -------------------------------------------------------------------------------- /graphs/multigridnumba_flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigridnumba_flops.png -------------------------------------------------------------------------------- /graphs/multigridnumba_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/multigridnumba_time.png -------------------------------------------------------------------------------- /graphs/wave.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/graphs/wave.gif -------------------------------------------------------------------------------- /problems/problem_1D_100.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/problems/problem_1D_100.npy -------------------------------------------------------------------------------- /problems/problem_2D_100.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/typohnebild/numpy-vs-mir/6780d20aa2c3e16814fbaf4aca9c6d22f0629a36/problems/problem_2D_100.npy -------------------------------------------------------------------------------- /scripts/check_perf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | paranoid=$(cat /proc/sys/kernel/perf_event_paranoid) 4 | perf=false 5 | if [ -x "$(command -v perf)" ] && [ "$paranoid" -lt 3 ] && perf list eventgroups | grep -q FLOPS; then 6 | perf=true 7 | fi 8 | echo "$perf" 9 | -------------------------------------------------------------------------------- /scripts/generate_problems.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | usage() { 4 | echo "Usage: $0 [ -p problempath ] [ -b (multigrid|gsrb|gsrb-avx512)] [-t problemtype(wave|heat)]" 5 | exit 2 6 | } 7 | 8 | problempath='../problems' 9 | buildconf='multigrid' 10 | typ='wave' 11 | while getopts 'p:b:t:' opts; do 12 | case $opts in 13 | p) problempath=$OPTARG ;; 14 | b) buildconf=$OPTARG ;; 15 | t) typ=$OPTARG ;; 16 | *) usage ;; 17 | esac 18 | done 19 | 20 | # sanitycheck 21 | [ -z "$problempath" ] && usage 22 | [ -z "$buildconf" ] && usage 23 | [ -z "$problempath" ] && usage 24 | 25 | generate_problem() { 26 | ../Python/problemgenerator/generate.py "$problempath" 2 "$1" -t "$typ" 27 | } 28 | 29 | generate() { 30 | N=${1} # start 31 | STEP=${2} 32 | COUNT=${3} 33 | 34 | for _ in $(seq "$COUNT"); do 35 | generate_problem "$N" 36 | N=$((N + STEP)) 37 | done 38 | } 39 | 40 | [ -e "$problempath" ] || mkdir -p "$problempath" 41 | 42 | # delete existing problems 43 | rm -f "$problempath/"*.npy 44 | 45 | case $buildconf in 46 | "multigrid") 47 | generate 16 16 3 48 | generate 64 64 20 49 | generate 1280 128 10 50 | generate 2560 256 6 51 | ;; 52 | "gsrb") 53 | generate 16 16 20 54 | generate 384 64 15 55 | ;; 56 | "gsrb-avx512") 57 | generate 16 16 20 58 | generate 384 64 15 59 | generate 1536 256 5;; 60 | 61 | *) echo "$buildconf is not a supported buildconf" ;; 62 | esac 63 | -------------------------------------------------------------------------------- /scripts/getinfos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # script to get the cpu infos and the numpy config 4 | # if $1 is np then the numpy configuration is also printed 5 | 6 | get_cpu_infos(){ 7 | lscpu | grep -i 'model name' | awk -F: '{ print $2}' | sed -e 's/^\s*//g' 8 | } 9 | 10 | get_numpy_config(){ 11 | python -c 'import numpy; print("numpy version:", numpy.__version__); numpy.show_config()' 12 | } 13 | 14 | get_git_head(){ 15 | CurrCom=$(git rev-parse HEAD) 16 | echo "Current GIT-Commit: $CurrCom" 17 | } 18 | 19 | echo "############ INFOS" 20 | date 21 | get_git_head 22 | get_cpu_infos 23 | [ "$1" = "np" ] && get_numpy_config 24 | echo "############ END INFOS" 25 | 26 | if [ "$2" = true ] 27 | then 28 | echo "size:dim:time:cycles:error:scalar_single:scalar_double:128b_packed_double:128b_packed_single:256b_packed_double:256b_packed_single:empty" 29 | else 30 | echo "size:dim:time:cycles:error:empty" 31 | fi 32 | -------------------------------------------------------------------------------- /scripts/gsrb_avx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | 3 | ./masterrun.sh -i -o -p ~/problems -b "gsrb-avx512" 4 | ./masterrun.sh -i -o -p ~/problems -b gsrb 5 | -------------------------------------------------------------------------------- /scripts/masterrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | usage() { 4 | echo "Usage: $0 [-dio] [ -p problempath ] [ -b (multigrid|gsrb|gsrb-avx512)]" 5 | exit 2 6 | } 7 | 8 | problempath='/tmp/problems/' 9 | buildconf='multigrid' 10 | # Flags that indicate with benchmark should be executed default all are true 11 | RUN_INTEL=1 12 | RUN_OPENBLAS=1 13 | RUN_D=1 14 | while getopts 'diop:b:' opts; do 15 | case $opts in 16 | d) RUN_D=0 ;; 17 | i) RUN_INTEL=0 ;; 18 | o) RUN_OPENBLAS=0 ;; 19 | p) problempath=$OPTARG ;; 20 | b) buildconf=$OPTARG ;; 21 | *) usage ;; 22 | esac 23 | done 24 | 25 | [ "$buildconf" = "multigrid" ] || [ "$buildconf" = "gsrb" ] || [ "$buildconf" = "gsrb-avx512" ] || exit 1 26 | 27 | # source of virtual Python environment 28 | run_openblas() { 29 | cd ../Python/ || exit 1 30 | . ./venv/bin/activate || exit 1 31 | ./run.sh "openblas" "$problempath" "${buildconf}" 32 | deactivate 33 | } 34 | 35 | # source of intel Python environment 36 | run_intel() { 37 | cd ../Python/ || exit 1 38 | . ./intelpython3/bin/activate || exit 1 39 | ./run.sh "intel" "$problempath" "${buildconf}" 40 | # conda deactivate 41 | } 42 | 43 | run_d() { 44 | ./benchmark.sh "$problempath" "$1" 45 | } 46 | 47 | ./generate_problems.sh -p "$problempath" -b "$buildconf" -t "wave" || exit 1 48 | 49 | oldpwd=$(pwd) 50 | 51 | if [ $RUN_D -eq 1 ]; then 52 | cd ../D || exit 1 53 | dub build --force --build=release-nobounds --config="$buildconf" || exit 1 54 | for x in "field" "naive" "slice" "ndslice"; do 55 | run_d "./$buildconf -s $x" 56 | done 57 | fi 58 | 59 | cd "$oldpwd" || exit 1 60 | 61 | if [ $RUN_INTEL -eq 1 ]; then 62 | run_intel 63 | cd "$oldpwd" || exit 1 64 | fi 65 | 66 | if [ $RUN_OPENBLAS -eq 1 ]; then 67 | run_openblas 68 | cd "$oldpwd" || exit 1 69 | fi 70 | -------------------------------------------------------------------------------- /scripts/slurm_job_gsrb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | # 3 | #SBATCH --nodes=1 4 | #SBATCH --tasks-per-node=1 5 | ## allocate nodes for 6 hours 6 | #SBATCH --time=06:00:00 7 | # job name 8 | #SBATCH --job-name=gsrb_dlang 9 | #SBATCH --constraint=hwperf 10 | # # first non-empty non-comment line ends SBATCH options 11 | 12 | #load required modules (compiler, MPI, ...) 13 | module load python 14 | # run 15 | srun ./gsrb_avx.sh 16 | --------------------------------------------------------------------------------