├── .github
    └── workflows
    │   └── benchmarks.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── benchmarks
    └── .gitignore
├── bin
    ├── .gitignore
    ├── benchmarks.csv
    ├── benchmarks.ipynb
    ├── collect.jl
    ├── plot.jl
    ├── table.jl
    └── versions.sh
├── java
    ├── .gitignore
    ├── pom.xml
    ├── setup.sh
    └── src
    │   └── main
    │       └── java
    │           ├── Complex.java
    │           ├── PerfBLAS.java
    │           └── PerfPure.java
├── lua
    └── lua-install.sh
├── perf.R
├── perf.c
├── perf.f90
├── perf.go
├── perf.jl
├── perf.js
├── perf.lua
├── perf.m
├── perf.nb
├── perf.py
├── perfutil.jl
├── randmtzig.c
├── rust
    ├── .gitignore
    ├── Cargo.lock
    ├── Cargo.toml
    ├── rust-toolchain
    └── src
    │   ├── direct_blas.rs
    │   ├── main.rs
    │   └── util.rs
└── scala
    ├── .gitignore
    ├── build.sbt
    └── src
        └── main
            └── scala
                └── perf.scala


/.github/workflows/benchmarks.yml:
--------------------------------------------------------------------------------
  1 | name: Benchmarks
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   push:
  6 |     branches:
  7 |       - master
  8 |     tags: '*'
  9 |   workflow_dispatch:
 10 | 
 11 | concurrency:
 12 |   # Skip intermediate builds: all builds except for builds on the `master` or `release-*` branches
 13 |   # Cancel intermediate builds: only pull request builds
 14 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref != 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release-') || github.run_number }}
 15 |   cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
 16 | 
 17 | permissions:
 18 |   contents: read
 19 | 
 20 | jobs:
 21 |   test:
 22 |     runs-on: ${{ matrix.os }}
 23 |     defaults:
 24 |       run:
 25 |         shell: bash
 26 |     strategy:
 27 |       matrix:
 28 |         os: [ubuntu-latest]
 29 |         java-version: ['17']
 30 |         julia-version: ['1.9.3']
 31 |         python-version: ['3.10']
 32 |         numpy-version: ['1.23.2']
 33 |         gfortran-version: ['9']  # Note: unused since is built-in.
 34 |         rust-version: ['1.42.0']  # Note: unused since controlled by `rust/rust-toolchain`
 35 |         js-version: ['18']
 36 |         r-version: ['4.2.1']
 37 |         lua-version: ['latest']  # Note: unused since lua distribution manually downloaded
 38 |         go-version: ['1.19']
 39 | 
 40 |     steps:
 41 |       - uses: actions/checkout@v3
 42 |         with:
 43 |           persist-credentials: false
 44 |       - name: "Cache Julia"
 45 |         id: cache-julia
 46 |         uses: actions/cache@v2
 47 |         with:
 48 |           path: ~/julia
 49 |           key: ${{ runner.os }}-v${{ matrix.julia-version }}
 50 |       - name: "Build Julia"
 51 |         if: steps.cache-julia.outputs.cache-hit != 'true'
 52 |         uses: julia-actions/build-julia@v1
 53 |         with:
 54 |           ref: v${{ matrix.julia-version }}
 55 |       - name: "Set up dSFMT"
 56 |         run: |
 57 |           cd ~/
 58 |           mkdir -p dSFMT
 59 |           cd dSFMT
 60 |           wget https://github.com/MersenneTwister-Lab/dSFMT/archive/refs/tags/v2.2.4.tar.gz
 61 |           echo "39682961ecfba621a98dbb6610b6ae2b7d6add450d4f08d8d4edd0e10abd8174 v2.2.4.tar.gz" | sha256sum --check --status
 62 |           tar -xzf v2.2.4.tar.gz
 63 |           mv dSFMT-*/* ./
 64 |       - name: "Set up OpenBLAS"
 65 |         run: |
 66 |           sudo apt-get install -y libopenblas-dev
 67 |       - name: "Set up Python"
 68 |         uses: actions/setup-python@v1
 69 |         with:
 70 |           python-version: ${{ matrix.python-version }}
 71 |       - name: "Set up NumPy"
 72 |         run: pip install numpy==${{ matrix.numpy-version }}
 73 |       - name: "Set up Rust"
 74 |         uses: actions-rs/toolchain@v1
 75 |         with:
 76 |           toolchain: ${{ matrix.rust-version }}
 77 |       - name: "Set up Java"
 78 |         uses: actions/setup-java@v2
 79 |         with:
 80 |           distribution: 'temurin'
 81 |           java-version: ${{ matrix.java-version }}
 82 |           cache: 'maven'
 83 |       - name: "Set up JavaScript"
 84 |         uses: actions/setup-node@v2
 85 |         with:
 86 |           node-version: ${{ matrix.js-version }}
 87 |       - name: "Set up R"
 88 |         uses: r-lib/actions/setup-r@v2
 89 |         with:
 90 |           r-version: ${{ matrix.r-version }}
 91 |       - name: "Set up LuaJit"
 92 |         run: |
 93 |           cd ~/work/Microbenchmarks/Microbenchmarks/lua
 94 |           ./lua-install.sh
 95 |       - name: "Set up Go"
 96 |         uses: actions/setup-go@v3
 97 |         with:
 98 |           go-version: ${{ matrix.go-version }}
 99 |       - name: "Run benchmark"
100 |         run: |
101 |           JULIAHOME=~/julia DSFMTDIR=~/dSFMT/ make gh_action_benchmarks.html
102 |       - name: "Print benchmark data"
103 |         run: cat gh_action_benchmarks.csv
104 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /perf.h
2 | /versions.csv
3 | /benchmarks.csv
4 | /benchmarks.txt
5 | /benchmarks.html
6 | /gopath
7 | /mods/*
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2009-2018 Jeff Bezanson, Stefan Karpinski, Viral B. Shah,
 4 | and other contributors.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | ifndef JULIAHOME
  2 | $(error JULIAHOME not defined. Set value to the root of the Julia source tree.)
  3 | endif
  4 | ifndef DSFMTDIR
  5 | $(error DSFMTDIR not defined. Set value to the root of the dSFMT source tree.)
  6 | endif
  7 | 
  8 | 
  9 | # Will make multi-line targets work
 10 | # (so we can use @for on the second line)
 11 | .ONESHELL:
 12 | 
 13 | include $(JULIAHOME)/Make.inc
 14 | include $(JULIAHOME)/deps/Versions.make
 15 | 
 16 | NODEJSBIN = node
 17 | 
 18 | ITERATIONS=$(shell seq 1 5)
 19 | 
 20 | #Use python2 for Python 2.x
 21 | PYTHON = python3
 22 | 
 23 | OCTAVE = octave-cli
 24 | 
 25 | ifeq ($(OS), WINNT)
 26 | MATHEMATICABIN = MathKernel
 27 | else ifeq ($(OS), Darwin)
 28 | MATHEMATICABIN = MathKernel
 29 | else
 30 | MATHEMATICABIN = math
 31 | endif
 32 | 
 33 | FFLAGS=-fexternal-blas
 34 | #gfortran cannot multiply matrices using 64-bit external BLAS.
 35 | ifeq ($(findstring gfortran, $(FC)), gfortran)
 36 | ifeq ($(USE_BLAS64), 1)
 37 | FFLAGS=
 38 | endif
 39 | FFLAGS+= -static-libgfortran
 40 | endif
 41 | 
 42 | #Which libm library am I using?
 43 | LIBMDIR = $(JULIAHOME)/usr/lib/
 44 | ifeq ($(USE_SYSTEM_LIBM), 0)
 45 | ifeq ($(USE_SYSTEM_OPENLIBM), 0)
 46 | LIBM = $(LIBMDIR)libopenlibm.a
 47 | endif
 48 | endif
 49 | 
 50 | default: benchmarks.html
 51 | 
 52 | export OMP_NUM_THREADS=1
 53 | export GOTO_NUM_THREADS=1
 54 | export OPENBLAS_NUM_THREADS=1
 55 | 
 56 | perf.h: $(JULIAHOME)/deps/Versions.make
 57 | 	echo '#include "cblas.h"' > $@
 58 | 	echo '#include "$(DSFMTDIR)/dSFMT.c"' >> $@
 59 | 
 60 | bin/perf%: perf.c perf.h
 61 | 	$(CC) -std=c99 -O$* $< -o $@  -I$(DSFMTDIR) -lopenblas -L$(LIBMDIR) $(LIBM) $(CFLAGS) -lpthread
 62 | 
 63 | bin/fperf%: perf.f90
 64 | 	mkdir -p mods/$@ #Modules for each binary go in separate directories
 65 | 	$(FC) $(FFLAGS) -Jmods/$@ -O$* $< -o $@ -lopenblas -L$(LIBMDIR) $(LIBM) -lpthread
 66 | 
 67 | benchmarks/c.csv: \
 68 | 	benchmarks/c0.csv \
 69 | 	benchmarks/c1.csv \
 70 | 	benchmarks/c2.csv \
 71 | 	benchmarks/c3.csv
 72 | 	cat $^ > $@
 73 | 
 74 | benchmarks/fortran.csv: \
 75 | 	benchmarks/fortran0.csv \
 76 | 	benchmarks/fortran1.csv \
 77 | 	benchmarks/fortran2.csv \
 78 | 	benchmarks/fortran3.csv
 79 | 	cat $^ > $@
 80 | 
 81 | 
 82 | benchmarks/c%.csv: bin/perf%
 83 | 	@for t in $(ITERATIONS); do $<; done >$@
 84 | 
 85 | benchmarks/fortran%.csv: bin/fperf%
 86 | 	@for t in $(ITERATIONS); do $<; done >$@
 87 | 
 88 | benchmarks/go.csv: export GOPATH=$(abspath gopath)
 89 | benchmarks/go.csv: perf.go
 90 | 	go env -w GO111MODULE=off
 91 | 	export CGO_LDFLAGS="-L${LIBM} -lopenblas"
 92 | 	go get gonum.org/v1/netlib/blas/netlib
 93 | 	go get gonum.org/v1/gonum/mat
 94 | 	go get gonum.org/v1/gonum/stat
 95 | 	@for t in $(ITERATIONS); do go run $<; done >$@
 96 | 
 97 | benchmarks/julia.csv: perf.jl
 98 | 	@for t in $(ITERATIONS); do $(JULIAHOME)/usr/bin/julia $<; done >$@
 99 | 
100 | benchmarks/python.csv: perf.py
101 | 	@for t in $(ITERATIONS); do $(PYTHON) $<; done >$@
102 | 
103 | benchmarks/matlab.csv: perf.m
104 | 	@for t in $(ITERATIONS); do matlab -nojvm -singleCompThread -r 'perf; perf; exit' | grep ^matlab | tail -8; done >$@
105 | 
106 | benchmarks/octave.csv: perf.m
107 | 	@for t in $(ITERATIONS); do $(OCTAVE) -q --eval perf 2>/dev/null; done >$@
108 | 
109 | benchmarks/r.csv: perf.R
110 | 	@for t in $(ITERATIONS); do cat $< | R --vanilla --slave 2>/dev/null; done >$@
111 | 
112 | benchmarks/javascript.csv: perf.js
113 | 	@for t in $(ITERATIONS); do $(NODEJSBIN) $<; done >$@
114 | 
115 | benchmarks/mathematica.csv: perf.nb
116 | 	@for t in $(ITERATIONS); do $(MATHEMATICABIN) -noprompt -run "<<$<; Exit[]"; done >$@
117 | 
118 | benchmarks/lua.csv: perf.lua
119 | 	export BIT=64
120 | 	@for t in $(ITERATIONS); do ./lua/ulua/bin/scilua $<; done >$@
121 | 
122 | benchmarks/java.csv: java/src/main/java/PerfBLAS.java
123 | 	cd java
124 | 	sh setup.sh
125 | 	@for t in $(ITERATIONS); do mvn -q exec:java; done >../$@
126 | 
127 | benchmarks/scala.csv: scala/src/main/scala/perf.scala scala/build.sbt
128 | 	cd scala
129 | 	@for t in $(ITERATIONS); do sbt run; done >../$@
130 | 
131 | benchmarks/rust.csv: rust/src/main.rs rust/src/util.rs rust/Cargo.lock
132 | 	cd rust
133 | 	@for t in $(ITERATIONS); do cargo run --release -q; done >../$@
134 | 
135 | LANGUAGES = c fortran go java javascript julia lua mathematica matlab octave python r rust
136 | GH_ACTION_LANGUAGES = c fortran go java javascript julia lua python r rust
137 | 
138 | # These were formerly listed in LANGUAGES, but I can't get them to run
139 | # 2017-09-27 johnfgibson
140 | #	scala
141 | 
142 | BENCHMARKS = $(foreach lang,$(LANGUAGES),benchmarks/$(lang).csv)
143 | GH_ACTION_BENCHMARKS = $(foreach lang,$(GH_ACTION_LANGUAGES),benchmarks/$(lang).csv)
144 | 
145 | COLON_SEPARATED_GHA_LANGUAGES = $(shell echo $(GH_ACTION_LANGUAGES) | sed 's/ /:/g')
146 | 
147 | versions.csv: bin/versions.sh
148 | 	$^ >$@
149 | 
150 | gh_action_versions.csv: bin/versions.sh
151 | 	$^ $(COLON_SEPARATED_GHA_LANGUAGES) >$@
152 | 
153 | benchmarks.csv: bin/collect.jl $(BENCHMARKS)
154 | 	@$(call PRINT_JULIA, $^ >$@)
155 | 
156 | gh_action_benchmarks.csv: bin/collect.jl $(GH_ACTION_BENCHMARKS)
157 | 	@$(call PRINT_JULIA, $^ >$@)
158 | 
159 | benchmarks.html: bin/table.jl versions.csv benchmarks.csv
160 | 	@$(call PRINT_JULIA, $^ >$@)
161 | 
162 | gh_action_benchmarks.html: bin/table.jl gh_action_versions.csv gh_action_benchmarks.csv
163 | 	@$(call PRINT_JULIA, $^ >$@)
164 | 
165 | clean:
166 | 	@rm -rf perf.h bin/perf* bin/fperf* benchmarks/*.csv benchmarks.csv mods *~ octave-core perf.log gopath/*
167 | 
168 | .PHONY: all perf clean
169 | 
170 | .PRECIOUS: bin/perf0 bin/perf1 bin/perf2 bin/perf3
171 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Microbenchmarks
 2 | 
 3 | This is a collection of micro-benchmarks used to compare Julia's performance against
 4 | that of other languages.
 5 | It was formerly part of the Julia source tree.
 6 | The results of these benchmarks are used to generate the performance graph on the
 7 | [Julia benchmarks page](https://julialang.org/benchmarks).
 8 | 
 9 | ## Running benchmarks
10 | 
11 | This repository assumes that Julia has been built from source and that there exists
12 | an environment variable `JULIAHOME` that points to the root of the Julia source tree.
13 | This can also be set when invoking `make`, e.g. `make JULIAHOME=path/to/julia`.
14 | 
15 | To build binaries and run the benchmarks, simply run `make`.
16 | Note that this refers to GNU Make, so BSD users will need to run `gmake`.
17 | 
18 | ## Included languages:
19 | 
20 | * C
21 | * Fortran
22 | * Go
23 | * Java
24 | * JavaScript
25 | * Julia
26 | * LuaJIT
27 | * Mathematica
28 | * Matlab
29 | * Python
30 | * R
31 | * Rust
32 | * Scala
33 | 


--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | /*.csv
2 | 


--------------------------------------------------------------------------------
/bin/.gitignore:
--------------------------------------------------------------------------------
1 | /perf*
2 | /fperf*
3 | 


--------------------------------------------------------------------------------
/bin/benchmarks.csv:
--------------------------------------------------------------------------------
  1 | c,iteration_pi_sum,27.368069
  2 | c,matrix_multiply,72.014809
  3 | c,matrix_statistics,4.528999
  4 | c,parse_integers,0.099249
  5 | c,print_to_file,9.929895
  6 | c,recursion_fibonacci,0.022732
  7 | c,recursion_quicksort,0.258923
  8 | c,userfunc_mandelbrot,0.076702
  9 | fortran,iteration_pi_sum,27.368789
 10 | fortran,matrix_multiply,83.416557
 11 | fortran,matrix_statistics,6.984467
 12 | fortran,parse_integers,0.682692
 13 | fortran,print_to_file,59.287684
 14 | fortran,recursion_fibonacci,0.022466
 15 | fortran,recursion_quicksort,0.308118
 16 | fortran,userfunc_mandelbrot,0.053836
 17 | go,iteration_pi_sum,27.3706417
 18 | go,matrix_multiply,102.9984314
 19 | go,matrix_statistics,27.57979652
 20 | go,parse_integers,0.0953154785
 21 | go,print_to_file,17.37055717
 22 | go,recursion_fibonacci,0.04101122396666667
 23 | go,recursion_quicksort,0.32159370860000003
 24 | go,userfunc_mandelbrot,0.059685397066666666
 25 | java,iteration_pi_sum,29.723044
 26 | java,iteration_sinc_sum,0.118555
 27 | java,matrix_multiply,581.467297
 28 | java,matrix_statistics,22.776195
 29 | java,parse_integers,0.314691
 30 | java,print_to_file,95.850461
 31 | java,recursion_fibonacci,0.082739
 32 | java,recursion_quicksort,0.772211
 33 | java,userfunc_mandelbrot,0.109468
 34 | javascript,iteration_pi_sum,27.6
 35 | javascript,matrix_multiply,2288.0
 36 | javascript,matrix_statistics,63.3
 37 | javascript,parse_integers,0.5
 38 | javascript,print_to_file,72.0
 39 | javascript,recursion_fibonacci,0.08
 40 | javascript,recursion_quicksort,1.11
 41 | javascript,userfunc_mandelbrot,0.087
 42 | julia,iteration_pi_sum,27.670768
 43 | julia,matrix_multiply,70.249355
 44 | julia,matrix_statistics,7.396705
 45 | julia,parse_integers,0.217658
 46 | julia,print_to_file,10.868424
 47 | julia,recursion_fibonacci,0.030162
 48 | julia,recursion_quicksort,0.258018
 49 | julia,userfunc_mandelbrot,0.052706
 50 | lua,iteration_pi_sum,27.368
 51 | lua,matrix_multiply,77.87
 52 | lua,matrix_statistics,7.731
 53 | lua,parse_integers,0.097
 54 | lua,print_to_file,5.996
 55 | lua,recursion_fibonacci,0.027
 56 | lua,recursion_quicksort,0.404
 57 | lua,userfunc_mandelbrot,0.077
 58 | mathematica,iteration_pi_sum,39.862
 59 | mathematica,matrix_multiply,85.409
 60 | mathematica,matrix_statistics,33.94
 61 | mathematica,parse_integers,2.249
 62 | mathematica,print_to_file,664.313
 63 | mathematica,recursion_fibonacci,3.002
 64 | mathematica,recursion_quicksort,11.518
 65 | mathematica,userfunc_mandelbrot,1.403
 66 | matlab,iteration_pi_sum,27.56
 67 | matlab,matrix_multiply,83.906
 68 | matlab,matrix_statistics,36.674
 69 | matlab,parse_integers,17.688
 70 | matlab,print_to_file,1009.8
 71 | matlab,recursion_fibonacci,0.4
 72 | matlab,recursion_quicksort,0.612
 73 | matlab,userfunc_mandelbrot,0.755
 74 | octave,iteration_pi_sum,8691.11084938
 75 | octave,matrix_multiply,87.54110336
 76 | octave,matrix_statistics,209.43498611
 77 | octave,parse_integers,57.0089817
 78 | octave,print_to_file,1293.26105118
 79 | octave,recursion_fibonacci,228.35707664
 80 | octave,recursion_quicksort,575.16098022
 81 | octave,userfunc_mandelbrot,445.80197334
 82 | python,iteration_pi_sum,404.39462661743164
 83 | python,matrix_multiply,84.99646186828613
 84 | python,matrix_statistics,80.32107353210449
 85 | python,parse_integers,1.9617080688476562
 86 | python,print_to_file,47.04570770263672
 87 | python,recursion_fibonacci,2.1429061889648438
 88 | python,recursion_quicksort,9.729623794555664
 89 | python,userfunc_mandelbrot,5.036592483520508
 90 | r,iteration_pi_sum,320.0
 91 | r,matrix_multiply,595.0
 92 | r,matrix_statistics,95.0
 93 | r,parse_integers,5.0
 94 | r,print_to_file,1009.0
 95 | r,recursion_fibonacci,6.0
 96 | r,recursion_quicksort,15.0
 97 | r,userfunc_mandelbrot,15.0
 98 | rust,iteration_pi_sum,27.373818
 99 | rust,matrix_multiply,77.615658
100 | rust,matrix_statistics,6.497923
101 | rust,parse_integers,0.121999
102 | rust,print_to_file,8.81542
103 | rust,recursion_fibonacci,0.039227
104 | rust,recursion_quicksort,0.253416
105 | rust,userfunc_mandelbrot,0.057636
106 | 


--------------------------------------------------------------------------------
/bin/collect.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | 
 3 | const data = Dict{Tuple{String,String},Float64}()
 4 | 
 5 | for arg in ARGS, line in eachline(arg)
 6 |     lang, bench, time_str = split(line, ',')
 7 |     old_time = get(data, (lang, bench), Inf)
 8 |     new_time = parse(Float64, time_str)
 9 |     0 < new_time < old_time || continue
10 |     data[lang, bench] = new_time
11 | end
12 | 
13 | for ((lang, bench), min_time) in sort!(collect(data))
14 |     println("$lang,$bench,$min_time")
15 | end
16 | 


--------------------------------------------------------------------------------
/bin/plot.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | 
 3 | # Producing the Julia Microbenchmarks plot
 4 | 
 5 | # Load the required Julia packages
 6 | using Base.MathConstants
 7 | using CSV
 8 | using DataFrames
 9 | using Gadfly
10 | using StatsBase
11 | 
12 | # Load benchmark data from file
13 | benchmarks =
14 |     CSV.read("benchmarks.csv", DataFrame; header = ["language", "benchmark", "time"])
15 | 
16 | # Capitalize and decorate language names from datafile
17 | dict = Dict(
18 |     "c" => "C",
19 |     "julia" => "Julia",
20 |     "lua" => "LuaJIT",
21 |     "fortran" => "Fortran",
22 |     "java" => "Java",
23 |     "javascript" => "JavaScript",
24 |     "matlab" => "Matlab",
25 |     "mathematica" => "Mathematica",
26 |     "python" => "Python",
27 |     "octave" => "Octave",
28 |     "r" => "R",
29 |     "rust" => "Rust",
30 |     "go" => "Go",
31 | );
32 | benchmarks[!, :language] = [dict[lang] for lang in benchmarks[!, :language]]
33 | 
34 | # Normalize benchmark times by C times
35 | ctime = benchmarks[benchmarks[!, :language] .== "C", :]
36 | benchmarks = innerjoin(benchmarks, ctime, on = :benchmark, makeunique = true)
37 | select!(benchmarks, Not(:language_1))
38 | rename!(benchmarks, :time_1 => :ctime)
39 | benchmarks[!, :normtime] = benchmarks[!, :time] ./ benchmarks[!, :ctime];
40 | 
41 | # Compute the geometric mean for each language
42 | langs = [];
43 | means = [];
44 | priorities = [];
45 | for lang in benchmarks[!, :language]
46 |     data = benchmarks[benchmarks[!, :language] .== lang, :]
47 |     gmean = geomean(data[!, :normtime])
48 |     push!(langs, lang)
49 |     push!(means, gmean)
50 |     if (lang == "C")
51 |         push!(priorities, 1)
52 |     elseif (lang == "Julia")
53 |         push!(priorities, 2)
54 |     else
55 |         push!(priorities, 3)
56 |     end
57 | end
58 | 
59 | # Add the geometric means back into the benchmarks dataframe
60 | langmean = DataFrame(language = langs, geomean = means, priority = priorities)
61 | benchmarks = innerjoin(benchmarks, langmean, on = :language)
62 | 
63 | # Put C first, Julia second, and sort the rest by geometric mean
64 | sort!(benchmarks, [:priority, :geomean]);
65 | sort!(langmean, [:priority, :geomean]);
66 | 
67 | p = plot(
68 |     benchmarks,
69 |     x = :language,
70 |     y = :normtime,
71 |     color = :benchmark,
72 |     Scale.y_log10,
73 |     Guide.ylabel(nothing),
74 |     Guide.xlabel(nothing),
75 |     Theme(
76 |         guide_title_position = :left,
77 |         colorkey_swatch_shape = :circle,
78 |         minor_label_font = "Georgia",
79 |         major_label_font = "Georgia",
80 |     ),
81 | )
82 | 
83 | draw(SVG("benchmarks.svg", 9inch, 9inch / golden), p)
84 | 


--------------------------------------------------------------------------------
/bin/table.jl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env julia
  2 | 
  3 | # This script generates an HTML table with the benchmark values and language versions.
  4 | 
  5 | import Statistics
  6 | import Printf
  7 | 
  8 | const benchmark_order = [
  9 |     "iteration_pi_sum",
 10 |     "recursion_fibonacci",
 11 |     "recursion_quicksort",
 12 |     "parse_integers",
 13 |     "print_to_file",
 14 |     "matrix_statistics",
 15 |     "matrix_multiply",
 16 |     "userfunc_mandelbrot",
 17 | ]
 18 | 
 19 | const versions = Dict{String, String}()
 20 | const benchmarks = Dict{String, Dict{String, Float64}}()
 21 | 
 22 | # Read versions.csv
 23 | for line in eachline(ARGS[1])
 24 |     lang, version = split(line, ',')
 25 |     versions[lang] = version
 26 | end
 27 | 
 28 | # Read benchmarks.csv
 29 | for line in eachline(ARGS[2])
 30 |     lang, bench, time_str = split(line, ',')
 31 |     times = get!(benchmarks, lang, Dict{String, Float64}())
 32 |     times[bench] = parse(Float64, time_str)
 33 | end
 34 | 
 35 | const labels = Dict{String, String}(
 36 |     "c"           => "C"                ,
 37 |     "julia"       => "Julia"            ,
 38 |     "lua"         => "LuaJIT"           ,
 39 |     "fortran"     => "Fortran"          ,
 40 |     "java"        => "Java"             ,
 41 |     "javascript"  => "JavaScript"       ,
 42 |     "matlab"      => "Matlab"           ,
 43 |     "python"      => "Python"           ,
 44 |     "mathematica" => "Mathe&shy;matica" ,
 45 |     "r"           => "R"                ,
 46 |     "octave"      => "Octave"           ,
 47 |     "go"          => "Go"               ,
 48 |     "rust"        => "Rust"             ,
 49 | )
 50 | 
 51 | # Produce the sorting order for the list of languages
 52 | function lang_by(lang::String)
 53 |     # C is placed at the start of the list
 54 |     lang == "c" ? -Inf :
 55 |     # Julia is sorted immediately after C
 56 |     lang == "julia" ? -floatmax() :
 57 |     # The rest of the languages are sorted by the geometric mean of their benchmark values
 58 |     # See https://en.wikipedia.org/wiki/Geometric_mean#Relationship_with_logarithms for details
 59 |     exp(Statistics.mean(log.(collect(values(benchmarks[lang])))))
 60 | end
 61 | 
 62 | const language_order = sort!(collect(keys(benchmarks)), by=lang_by)
 63 | 
 64 | print("""
 65 | <!-- generated by JuliaLang/Microbenchmarks/bin/table.jl -->
 66 | 
 67 | <table class="benchmarks">
 68 |     <colgroup>
 69 |         <col class="name">
 70 |         <col class="relative" span="$(length(language_order))">
 71 |     </colgroup>
 72 |     <thead>
 73 |         <tr><th></th>
 74 | """)
 75 | 
 76 | for lang in language_order
 77 |     label = get(labels, lang, lang)
 78 |     println("            <th class=\"system\">$label</th>")
 79 | end
 80 | 
 81 | print("""
 82 |         </tr>
 83 |         <tr><td></td>
 84 | """)
 85 | 
 86 | for lang in language_order
 87 |     version = get(versions, lang, "???")
 88 |     println("            <th class=\"version\">$version</th>")
 89 | end
 90 | 
 91 | print("""
 92 |         </tr>
 93 |     </thead>
 94 |     <tbody>
 95 | """)
 96 | 
 97 | for benchmark in benchmark_order
 98 |     println("        <tr><th>$benchmark</th>")
 99 |     c_time = benchmarks["c"][benchmark]
100 |     for lang in language_order
101 |         rel_time = "n/a"
102 |         if haskey(benchmarks[lang], benchmark)
103 |             rel_time = Printf.@sprintf "%.2f" benchmarks[lang][benchmark]/c_time
104 |         end
105 |         println("            <td class=\"data\">$rel_time</td>")
106 |     end
107 |     println("        </tr>")
108 | end
109 | 
110 | print("""
111 |     </tbody>
112 | </table>
113 | 
114 | <!-- end of generated table -->
115 | """)
116 | 


--------------------------------------------------------------------------------
/bin/versions.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # User argument declaring what languages to query:
 4 | DEFAULT_LANGUAGES="c:fortran:go:java:javascript:julia:lua:mathematica:matlab:octave:python:r:rust"
 5 | LANGUAGES=${1:-DEFAULT_LANGUAGES}
 6 | 
 7 | LANGUAGES=":${LANGUAGES}:"
 8 | 
 9 | # Check if ":c:" in languages:
10 | if [[ $LANGUAGES == *":c:"* ]]; then
11 |     echo -n "c,gcc "
12 |     gcc -v 2>&1 | grep "gcc version" | cut -f3 -d" "
13 | fi
14 | 
15 | if [[ $LANGUAGES == *":fortran:"* ]]; then
16 |     echo -n "fortran,gcc "
17 |     gfortran -v 2>&1 | grep "gcc version" | cut -f3 -d" "
18 | fi
19 | 
20 | if [[ $LANGUAGES == *":go:"* ]]; then
21 |     echo -n go,
22 |     go version | cut -f3 -d" "
23 | fi
24 | 
25 | if [[ $LANGUAGES == *":java:"* ]]; then
26 |     echo -n java,
27 |     java -version 2>&1 | grep "version" | cut -f2 -d "\""
28 | fi
29 | 
30 | if [[ $LANGUAGES == *":javascript:"* ]]; then
31 |     echo -n "javascript,V8 "
32 |     node -e "console.log(process.versions.v8)"
33 | fi
34 | 
35 | if [[ $LANGUAGES == *":julia:"* ]]; then
36 |     echo -n "julia,"
37 |     $JULIAHOME/usr/bin/julia -v | cut -f3 -d" "
38 | fi
39 | 
40 | if [[ $LANGUAGES == *":lua:"* ]]; then
41 |     echo -n "lua,"
42 |     (cd lua; ./ulua/luajit/*/Linux/x64/luajit -v | cut -f2 -d" ")
43 | fi
44 | 
45 | if [[ $LANGUAGES == *":mathematica:"* ]]; then
46 |     echo -n "mathematica,"
47 |     echo quit | math -version | head -n 1 | cut -f2 -d" "
48 | fi
49 | 
50 | if [[ $LANGUAGES == *":matlab:"* ]]; then
51 |     echo -n "matlab,R"
52 |     matlab -nodisplay -nojvm -nosplash -r "version -release, quit" | tail -n3 | head -n1 | cut -f5 -d" " | sed "s/'//g"
53 | fi
54 | 
55 | if [[ $LANGUAGES == *":octave:"* ]]; then
56 |     echo -n "octave,"
57 |     octave-cli -v | grep version | cut -f4 -d" "
58 | fi
59 | 
60 | if [[ $LANGUAGES == *":python:"* ]]; then
61 |     echo -n "python,"
62 |     python3 -V 2>&1 | cut -f2 -d" "
63 | fi
64 | 
65 | if [[ $LANGUAGES == *":r:"* ]]; then
66 |     echo -n "r,"
67 |     R --version | grep "R version" | cut -f3 -d" "
68 | fi
69 | 
70 | if [[ $LANGUAGES == *":rust:"* ]]; then
71 |     echo -n "rust,"
72 |     (cd rust; rustc --version | cut -c 7- | sed 's/ ([0-9a-f]* /<br>(/g')
73 | fi
74 | 


--------------------------------------------------------------------------------
/java/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/java/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 2 |   <modelVersion>4.0.0</modelVersion>
 3 |   <groupId>julialang.org</groupId>
 4 |   <artifactId>javaBenchmarks</artifactId>
 5 |   <version>0.0.1-SNAPSHOT</version>
 6 |   <name>javaBenchmarks</name>
 7 |   <description>micro benchmarks for Julia done in Java</description>
 8 |   <dependencies>
 9 |       <dependency>
10 | 		  <groupId>org.jblas</groupId>
11 | 		  <artifactId>jblas</artifactId>
12 | 		  <version>1.2.3</version>
13 | 	  </dependency>
14 | 	  <dependency>
15 | 		<groupId>com.googlecode.efficient-java-matrix-library</groupId>
16 | 		<artifactId>ejml</artifactId>
17 | 		<version>0.23</version>
18 | 	  </dependency>
19 |   </dependencies>
20 |   <build>
21 |   <plugins>
22 | 	   <plugin>
23 | 	        <artifactId>maven-compiler-plugin</artifactId>
24 | 	        <version>3.0</version>
25 | 	        <configuration>
26 | 	            <source>1.7</source>
27 | 	            <target>1.7</target>
28 | 	        </configuration>
29 | 	    </plugin>
30 | 	    <plugin>
31 | 		  <groupId>org.codehaus.mojo</groupId>
32 | 		  <artifactId>exec-maven-plugin</artifactId>
33 | 		  <version>1.2.1</version>
34 | 		  <executions>
35 | 		    <execution>
36 | 		      <goals>
37 | 		        <goal>java</goal>
38 | 		      </goals>
39 | 		    </execution>
40 | 		  </executions>
41 | 		  <configuration>
42 | 		    <mainClass>PerfBLAS</mainClass>
43 | 		  </configuration>
44 | 		</plugin>
45 | 	  </plugins>
46 |   </build>
47 | </project>
48 | 


--------------------------------------------------------------------------------
/java/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license
3 | 
4 | mvn compile exec:java
5 | # requires maven and java 7
6 | 


--------------------------------------------------------------------------------
/java/src/main/java/Complex.java:
--------------------------------------------------------------------------------
 1 | public class Complex {
 2 |     private final double re;
 3 |     private final double im;
 4 | 
 5 |     public Complex(double real, double imag) {
 6 |         re = real;
 7 |         im = imag;
 8 |     }
 9 | 
10 |     public static double abs(Complex z) {
11 |         return Math.sqrt(z.re*z.re + z.im*z.im);
12 |     }
13 | 
14 |     public static Complex add(Complex a, Complex b) {
15 |         return new Complex(a.re + b.re, a.im + b.im);
16 |     }
17 | 
18 |     public static Complex mul(Complex a, Complex b) {
19 |         return new Complex(a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re);
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/java/src/main/java/PerfBLAS.java:
--------------------------------------------------------------------------------
 1 | import java.io.FileNotFoundException;
 2 | import java.io.FileOutputStream;
 3 | import java.io.PrintStream;
 4 | import java.util.List;
 5 | import java.util.Random;
 6 | 
 7 | import org.jblas.DoubleMatrix;
 8 | 
 9 | /**
10 |  * Benchmark tests that call BLAS. 
11 |  */
12 | public class PerfBLAS extends PerfPure {
13 | 
14 |     public static void main(String[] args) {
15 |         PerfBLAS p = new PerfBLAS();
16 |         p.runBenchmarks();
17 |     }
18 | 
19 |     private double randmatmul(int i) {
20 |         DoubleMatrix a = DoubleMatrix.randn(i,i);
21 |         DoubleMatrix b = DoubleMatrix.randn(i,i);
22 |         return a.mmul(b).get(0);
23 |     }
24 | 
25 |     private double[] randmatstat(int t) {
26 |         int n=5;
27 |         DoubleMatrix p;
28 |         DoubleMatrix q;
29 |         DoubleMatrix v = new DoubleMatrix(new double[t][1]); //zeros(t,1);
30 |         DoubleMatrix w = new DoubleMatrix(new double[t][1]); //zeros(t,1);
31 |         for (int i=0; i < t; i++) {
32 |             DoubleMatrix a = DoubleMatrix.randn(n,n);
33 |             DoubleMatrix b = DoubleMatrix.randn(n,n);
34 |             DoubleMatrix c = DoubleMatrix.randn(n,n);
35 |             DoubleMatrix d = DoubleMatrix.randn(n,n);
36 | 
37 |             p = DoubleMatrix.concatHorizontally(DoubleMatrix.concatHorizontally(a, b),DoubleMatrix.concatHorizontally(c, d));
38 |             q = DoubleMatrix.concatVertically(DoubleMatrix.concatHorizontally(a, b),DoubleMatrix.concatHorizontally(c, d));
39 | 
40 |             DoubleMatrix x = p.transpose().mmul(p);
41 |             x = x.mmul(x);
42 |             x = x.mmul(x);
43 |             v.data[i]=x.diag().sum();
44 | 
45 |             x = q.transpose().mmul(q);
46 |             x = x.mmul(x);
47 |             x = x.mmul(x);
48 |             w.data[i]=x.diag().sum();
49 | 
50 |         }
51 | 
52 |         List<Double> vElements = v.elementsAsList();
53 |         List<Double> wElements = w.elementsAsList();
54 | 
55 |         return new double[]{stdev(vElements)/mean(vElements),stdev(wElements)/mean(wElements)};
56 |     }
57 | 
58 |     private static int mandel(double re, double im) {
59 |         int n = 0;
60 |         Complex z = new Complex(re, im);
61 |         Complex c = new Complex(re, im);
62 |         for (n=0; n<=79; ++n) {
63 |             if (Complex.abs(z) > 2.0) {
64 |                 n -= 1;
65 |                 break;
66 |             }
67 | 
68 |             // z = z*z + c
69 |             z = Complex.add(Complex.mul(z, z), c);
70 |         }
71 |         return n+1;
72 |     }
73 | 
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/java/src/main/java/PerfPure.java:
--------------------------------------------------------------------------------
  1 | import java.io.FileNotFoundException;
  2 | import java.io.FileOutputStream;
  3 | import java.io.PrintStream;
  4 | import java.util.List;
  5 | import java.util.Random;
  6 | 
  7 | import org.ejml.simple.SimpleMatrix;
  8 | 
  9 | /**
 10 |  * (Below excerpt is printed on the website and repeated here)
 11 |  * 
 12 |  * These benchmarks, while not comprehensive, do test compiler performance on a range of common code patterns, 
 13 |  * such as function calls, string parsing, sorting, numerical loops, random number generation, and array operations. 
 14 |  * It is important to note that these benchmark implementations are not written for absolute maximal performance 
 15 |  * (the fastest code to compute fib(20) is the constant literal 6765). Rather, all of the benchmarks are written 
 16 |  * to test the performance of specific algorithms, expressed in a reasonable idiom in each language. 
 17 |  * In particular, all languages use the same algorithm: the Fibonacci benchmarks are all recursive while 
 18 |  * the pi summation benchmarks are all iterative; the “algorithm” for random matrix multiplication is to 
 19 |  * call LAPACK, except where that’s not possible, such as in JavaScript. The point of these benchmarks is to 
 20 |  * compare the performance of specific algorithms across language implementations, not to compare the fastest 
 21 |  * means of computing a result, which in most high-level languages relies on calling C code. 
 22 |  *
 23 |  */
 24 | public class PerfPure {
 25 |     
 26 |     protected final int NITER = 5;
 27 |     protected Random rand = new Random(0);
 28 | 
 29 |     public static void main(String[] args) {
 30 |         PerfPure p = new PerfPure();
 31 |         p.runBenchmarks();
 32 |     }
 33 |     
 34 |     void runBenchmarks() {
 35 | 
 36 |         long t, tmin;
 37 | 
 38 |         assert(fib(20) == 6765);
 39 |         int f = 0;
 40 |         tmin = Long.MAX_VALUE;
 41 |         for (int i=0; i<NITER; ++i) {
 42 |             t = System.nanoTime();
 43 |             f += fib(20);
 44 |             t = (System.nanoTime())-t;
 45 |             if (t < tmin) tmin = t;
 46 |         }
 47 |         print_perf("recursion_fibonacci", tmin);
 48 | 
 49 |         // parse_bin
 50 |         tmin = Long.MAX_VALUE;
 51 |         for (int i=0; i<NITER; ++i) {
 52 |             t = System.nanoTime();
 53 |             for (int k=0; k<1000; ++k) {
 54 |                 int n = rand.nextInt(Integer.MAX_VALUE);
 55 |                 String str = Integer.toHexString(n);
 56 |                 int m = Integer.valueOf(str, 16);
 57 |                 assert(m == n);
 58 |             }
 59 |             t = System.nanoTime()-t;
 60 |             if (t < tmin) tmin = t;
 61 |         }
 62 |         print_perf("parse_integers", tmin);
 63 | 
 64 |         // mandel
 65 |         int mandel_sum = 0;
 66 |         tmin = Long.MAX_VALUE;
 67 |         for (int i=0; i<NITER; ++i) {
 68 |             t = System.nanoTime();
 69 |             mandel_sum = mandelperf();
 70 |             t = System.nanoTime()-t;
 71 |             if (t < tmin) tmin = t;
 72 |         }
 73 |         assert(mandel_sum == 14791) : "value was "+mandel_sum;
 74 |         print_perf("userfunc_mandelbrot", tmin);
 75 | 
 76 |         // sort
 77 |         tmin = Long.MAX_VALUE;
 78 |         for (int i=0; i<NITER; ++i) {
 79 |             t = System.nanoTime();
 80 |             int j = 5000;
 81 |             double[] d = new double[j];
 82 |             while (--j>=0) {
 83 |                 d[j] = rand.nextDouble();
 84 |             }
 85 |             quicksort(d, 0, 5000-1);
 86 |             t = System.nanoTime()-t;
 87 |             if (t < tmin) tmin = t;
 88 |         }
 89 |         print_perf("recursion_quicksort", tmin);
 90 | 
 91 |         // pi sum
 92 |         double pi = 0;
 93 |         tmin = Long.MAX_VALUE;
 94 |         for (int i=0; i<NITER; ++i) {
 95 |             t = System.nanoTime();
 96 |             pi = pisum();
 97 |             t = System.nanoTime()-t;
 98 |             if (t < tmin) tmin = t;
 99 |         }
100 |         assert(Math.abs(pi-1.644834071848065) < 1e-12);
101 |         print_perf("iteration_pi_sum", tmin);
102 | 
103 |         // rand mat stat
104 |         double[] r;
105 |         tmin = Long.MAX_VALUE;
106 |         for (int i=0; i<NITER; ++i) {
107 |             t = System.nanoTime();
108 |             r = randmatstat(1000);
109 |             t = System.nanoTime()-t;
110 |             if (t < tmin) tmin = t;
111 |         }
112 |         print_perf("matrix_statistics", tmin);
113 | 
114 |         // rand mat mul
115 |         tmin = Long.MAX_VALUE;
116 |         for (int i=0; i<NITER; ++i) {
117 |             t = System.nanoTime();
118 |             double d = randmatmul(1000);
119 |             assert(0 <= d);
120 |             t = System.nanoTime()-t;
121 |             if (t < tmin) tmin = t;
122 |         }
123 |         print_perf("matrix_multiply", tmin);
124 | 
125 | 
126 |         tmin = Long.MAX_VALUE;
127 |         for (int i=0; i<NITER; ++i) {
128 |             t = System.nanoTime();
129 |             sinc_sum(1000);
130 |             t = System.nanoTime()-t;
131 |             if (t < tmin) tmin = t;
132 |         }
133 |         print_perf("iteration_sinc_sum", tmin);
134 | 
135 |         // printfd
136 |         tmin = Long.MAX_VALUE;
137 |         for (int i=0; i<NITER; ++i) {
138 |             t = System.nanoTime();
139 |             printfd(100000);
140 |             t = System.nanoTime()-t;
141 |             if (t < tmin) tmin = t;
142 |         }
143 |         print_perf("print_to_file", tmin);
144 |     }
145 | 
146 |     void printfd(int n) {
147 |         try {
148 |             FileOutputStream f = new FileOutputStream("/dev/null");
149 |             PrintStream ps = new PrintStream(f);
150 |             long i = 0;
151 |             for (i = 0; i < n; i++) {
152 |                 ps.println(i + " " + (i+1));
153 |             }
154 |             ps.close();
155 |         } catch (FileNotFoundException e) {
156 |             e.printStackTrace();
157 |         }
158 |     }
159 | 
160 | 
161 |     protected double sinc_sum(int n) {
162 | 
163 |         double total = 0;
164 |         for (int i=0; i < n; i++) {
165 |             double f= Math.PI*i;
166 |             double sinc = Math.sin(f)/f;
167 |             total+=sinc;
168 |         }
169 |         return total;
170 |     }
171 | 
172 |     private double randmatmul(int i) {
173 |         SimpleMatrix a = SimpleMatrix.random(i, i,  -1d, +1d, rand);
174 |         SimpleMatrix b = SimpleMatrix.random(i, i,  -1d, +1d, rand);
175 |         return a.mult(b).get(0);
176 |     }
177 | 
178 |     private double[] randmatstat(int t) {
179 |         int n=5;
180 |         SimpleMatrix p = new SimpleMatrix(new double[n][4*n]);
181 |         SimpleMatrix q = new SimpleMatrix(new double[2*n][2*n]);
182 |         SimpleMatrix v = new SimpleMatrix(new double[t][1]); //zeros(t,1);
183 |         SimpleMatrix w = new SimpleMatrix(new double[t][1]); //zeros(t,1);
184 |         for (int i=0; i < t; i++) {
185 |             SimpleMatrix a = SimpleMatrix.random(n, n,  -1d, +1d, rand);
186 |             SimpleMatrix b = SimpleMatrix.random(n, n,  -1d, +1d, rand);
187 |             SimpleMatrix c = SimpleMatrix.random(n, n,  -1d, +1d, rand);
188 |             SimpleMatrix d = SimpleMatrix.random(n, n,  -1d, +1d, rand);
189 | 
190 |             p.combine(0, 0*n, a);
191 |             p.combine(0, 1*n, b);
192 |             p.combine(0, 2*n, c);
193 |             p.combine(0, 3*n, d);
194 | 
195 |             q.combine(0, 0*n, a);
196 |             q.combine(0, 1*n, b);
197 |             q.combine(n, 0*n, c);
198 |             q.combine(n, 1*n, d);
199 | 
200 |             SimpleMatrix x = p.transpose().mult(p);
201 |             x = x.mult(x);
202 |             x = x.mult(x);
203 |             v.set(i, x.trace());
204 | 
205 |             x = q.transpose().mult(q);
206 |             x = x.mult(x);
207 |             x = x.mult(x);
208 |             w.set(i, x.trace());
209 | 
210 |         }
211 |         return new double[]{stdev(v)/mean(v),stdev(w)/mean(w)};
212 |     }
213 | 
214 |     public double stdev(List<Double> elements) {
215 |         double m = mean(elements);
216 |         double total = 0;
217 |         for(Double d:elements) {
218 |             double dif = (d-m);
219 |             total += dif*dif;
220 |         }
221 |         return Math.sqrt(total/(elements.size()-1));
222 |     }
223 | 
224 |     public double mean(List<Double> elements) {
225 |         double total = 0;
226 |         for(Double d:elements) {
227 |             total += d;
228 |         }
229 |         return total/elements.size();
230 |     }
231 | 
232 |     public double stdev(SimpleMatrix sm) {
233 |             double m = mean(sm);
234 |             double total = 0;
235 | 
236 |             int i = sm.getNumElements();
237 |             while (--i>=0) {
238 |                 double dif = (sm.get(i)-m);
239 |                 total += dif*dif;
240 |             }
241 |             return Math.sqrt(total/(sm.getNumElements()-1));
242 |         }
243 | 
244 |     public double mean(SimpleMatrix sm) {
245 |         double total = 0;
246 |         int i = sm.getNumElements();
247 |         while (--i>=0) {
248 |             total += sm.get(i);
249 |         }
250 |         return total/sm.getNumElements();
251 |     }
252 | 
253 |     protected void quicksort(double[] a, int lo, int hi) {
254 |         int i = lo;
255 |         int j = hi;
256 |         while (i < hi) {
257 |             double pivot = a[(lo+hi)/2];
258 |             // Partition
259 |             while (i <= j) {
260 |                 while (a[i] < pivot) {
261 |                     i = i + 1;
262 |                 }
263 |                 while (a[j] > pivot) {
264 |                     j = j - 1;
265 |                 }
266 |                 if (i <= j) {
267 |                     double t = a[i];
268 |                     a[i] = a[j];
269 |                     a[j] = t;
270 |                     i = i + 1;
271 |                     j = j - 1;
272 |                 }
273 |             }
274 | 
275 |             // Recursion for quicksort
276 |             if (lo < j) {
277 |                 quicksort(a, lo, j);
278 |             }
279 |             lo = i;
280 |             j = hi;
281 |         }
282 |     }
283 | 
284 |     protected double pisum() {
285 |         double sum = 0.0;
286 |         for (int j=0; j<500; ++j) {
287 |             sum = 0.0;
288 |             for (int k=1; k<=10000; ++k) {
289 |                 sum += 1.0/(k*k);
290 |             }
291 |         }
292 |         return sum;
293 |     }
294 | 
295 |     private int mandel(double zReal, double zImag) {
296 |         int n = 0;
297 |         double cReal = zReal;
298 |         double cImag = zImag;
299 |         for (n=0; n<=79; ++n) {
300 |             if (complexAbs2(zReal,zImag) > 4.0) {
301 |                 n -= 1;
302 |                 break;
303 |             }
304 | 
305 |             // z^2
306 |             double zSquaredReal = zReal*zReal-zImag*zImag;
307 |             double zSquaredImag = zReal*zImag+zImag*zReal;
308 | 
309 |             // +c
310 |             zReal = zSquaredReal+cReal;
311 |             zImag = zSquaredImag+cImag;
312 | 
313 |         }
314 |         return n+1;
315 |     }
316 | 
317 |     private double complexAbs(double zReal, double zImag) {
318 |         return Math.sqrt(zReal*zReal + zImag*zImag);
319 |     }
320 | 
321 |     private double complexAbs2(double zReal, double zImag) {
322 |         return zReal*zReal + zImag*zImag;
323 |     }
324 | 
325 |     protected int mandelperf() {
326 |         int mandel_sum = 0;
327 |         for (double re=-2.0; re<=0.5; re+=0.1) {
328 |             for (double im=-1.0; im<=1.0; im+=0.1) {
329 |                 int m = mandel(re,im);
330 |                 mandel_sum += m;
331 |             }
332 |         }
333 |         return mandel_sum;
334 |     }
335 | 
336 |     protected void print_perf(String name, long t) {
337 |         System.out.printf("java,%s,%.6f\n", name, t/(double)1E6);
338 |     }
339 | 
340 |     protected int fib(int n) {
341 |         return n < 2 ? n : fib(n-1) + fib(n-2);
342 |     }
343 | 
344 | }
345 | 
346 | 


--------------------------------------------------------------------------------
/lua/lua-install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Install lua-sci-lang as recommended via ulua
 3 | 
 4 | wget https://ulua.io/download/ulua~latest.zip
 5 | unzip ulua~latest.zip
 6 | sed -i 's/noconfirm  = false,/noconfirm  = true,/g' ulua/host/config.lua
 7 | ulua/bin/upkg add time
 8 | ulua/bin/upkg add sci
 9 | ulua/bin/upkg add sci-lang
10 | 


--------------------------------------------------------------------------------
/perf.R:
--------------------------------------------------------------------------------
  1 | require(compiler)
  2 | 
  3 | assert = function(bool) {
  4 |     if (!bool) stop('Assertion failed')
  5 | }
  6 | 
  7 | timeit = function(name, f, ..., times=5) {
  8 |     tmin = Inf
  9 |     f = cmpfun(f)
 10 |     for (t in 1:times) {
 11 |         t = system.time(f(...))["elapsed"]
 12 |         if (t < tmin) tmin = t
 13 |     }
 14 |     cat(sprintf("r,%s,%.8f\n", name, tmin*1000))
 15 | }
 16 | 
 17 | ## fib ##
 18 | 
 19 | fib = function(n) {
 20 |     if (n < 2) {
 21 |         return(n)
 22 |     } else {
 23 |         return(fib(n-1) + fib(n-2))
 24 |     }
 25 | }
 26 | 
 27 | assert(fib(20) == 6765)
 28 | timeit("recursion_fibonacci", fib, 20)
 29 | 
 30 | ## parse_int ##
 31 | 
 32 | parseintperf = function(t) {
 33 |     for (i in 1:t) {
 34 |         # R doesn't support uint32 values
 35 |         n = floor(runif(1, min=0, max=2^31-1))
 36 |         s = sprintf("0x%x", n)
 37 |         m = as.numeric(s)
 38 |         assert(m == n)
 39 |     }
 40 | }
 41 | 
 42 | timeit("parse_integers", parseintperf, 1000)
 43 | 
 44 | printfdperf = function(t) {
 45 |     fd<-file("/dev/null")
 46 |     on.exit(close(fd))
 47 |     for (i in 1:t) {
 48 |         s = sprintf("%d %d", i, i+1)
 49 | 	writeLines(s, fd)
 50 |     }
 51 | }
 52 | 
 53 | timeit("print_to_file", printfdperf, 100000)
 54 | 
 55 | ## quicksort ##
 56 | 
 57 | qsort = function(a) {
 58 |     qsort_kernel = function(lo, hi) {
 59 |         i = lo
 60 |         j = hi
 61 |         while (i < hi) {
 62 |             pivot = a[floor((lo+hi)/2)]
 63 |             while (i <= j) {
 64 |                 while (a[i] < pivot) i = i + 1
 65 |                 while (a[j] > pivot) j = j - 1
 66 |                 if (i <= j) {
 67 |                     t = a[i]
 68 |                     a[i] <<- a[j]
 69 |                     a[j] <<- t
 70 |                     i = i + 1;
 71 |                     j = j - 1;
 72 |                 }
 73 |             }
 74 |             if (lo < j) qsort_kernel(lo, j)
 75 |             lo = i
 76 |             j = hi
 77 |         }
 78 |     }
 79 |     qsort_kernel(1, length(a))
 80 |     return(a)
 81 | }
 82 | 
 83 | sortperf = function(n) {
 84 |     v = runif(n)
 85 |     return(qsort(v))
 86 | }
 87 | 
 88 | assert(!is.unsorted(sortperf(5000)))
 89 | timeit('recursion_quicksort', sortperf, 5000)
 90 | 
 91 | ## mandel ##
 92 | Mod2 = function(z) {
 93 |      return(Re(z)*Re(z) + Im(z)*Im(z))
 94 | }
 95 | 
 96 | mandel = function(z) {
 97 |     c = z
 98 |     maxiter = 80
 99 |     for (n in 1:maxiter) {
100 |         if (Mod2(z) > 4) return(n-1)
101 |         z = z^2+c
102 |     }
103 |     return(maxiter)
104 | }
105 | 
106 | mandelperf = function() {
107 |     re = seq(-2,0.5,.1)
108 |     im = seq(-1,1,.1)
109 |     M = matrix(0.0,nrow=length(re),ncol=length(im))
110 |     count = 1
111 |     for (r in re) {
112 |         for (i in im) {
113 |             M[count] = mandel(complex(real=r,imag=i))
114 |             count = count + 1
115 |         }
116 |     }
117 |     return(M)
118 | }
119 | 
120 | assert(sum(mandelperf()) == 14791)
121 | timeit("userfunc_mandelbrot", mandelperf)
122 | 
123 | ## pi_sum ##
124 | 
125 | pisum = function() {
126 |     t = 0.0
127 |     for (j in 1:500) {
128 |         t = 0.0
129 |         for (k in 1:10000) {
130 |             t = t + 1.0/(k*k)
131 |         }
132 |     }
133 |     return(t)
134 | }
135 | 
136 | assert(abs(pisum()-1.644834071848065) < 1e-12);
137 | timeit("iteration_pi_sum", pisum, times=1)
138 | 
139 | ## pi_sum_vec ##
140 | 
141 | pisumvec = function() {
142 |     r = 1:10000
143 | 	return(replicate(500, sum(1/((r)^2)))[1])
144 | }
145 | 
146 | #assert(abs(pisumvec()-1.644834071848065) < 1e-12);
147 | #timeit("pi_sum_vec", pisumvec, times=10)
148 | 
149 | ## rand_mat_stat ##
150 | 
151 | randmatstat = function(t) {
152 |     n = 5
153 |     v = matrix(0, nrow=t)
154 |     w = matrix(0, nrow=t)
155 |     for (i in 1:t) {
156 |         a = matrix(rnorm(n*n), ncol=n, nrow=n)
157 |         b = matrix(rnorm(n*n), ncol=n, nrow=n)
158 |         c = matrix(rnorm(n*n), ncol=n, nrow=n)
159 |         d = matrix(rnorm(n*n), ncol=n, nrow=n)
160 |         P = cbind(a,b,c,d)
161 |         Q = rbind(cbind(a,b),cbind(c,d))
162 |         v[i] = sum(diag((t(P)%*%P)^4))
163 |         w[i] = sum(diag((t(Q)%*%Q)^4))
164 |     }
165 |     s1 = apply(v,2,sd)/mean(v)
166 |     s2 = apply(w,2,sd)/mean(w)
167 |     return(c(s1,s2))
168 | }
169 | 
170 | timeit("matrix_statistics", randmatstat, 1000)
171 | 
172 | ## rand_mat_mul ##
173 | 
174 | randmatmul = function(n) {
175 |     A = matrix(runif(n*n), ncol=n, nrow=n)
176 |     B = matrix(runif(n*n), ncol=n, nrow=n)
177 |     return(A %*% B)
178 | }
179 | 
180 | assert(randmatmul(1000)[1] >= 0)
181 | timeit("matrix_multiply", randmatmul, 1000)
182 | 


--------------------------------------------------------------------------------
/perf.c:
--------------------------------------------------------------------------------
  1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license
  2 | 
  3 | #include <complex.h>
  4 | 
  5 | // include header file generated by make:
  6 | #define DSFMT_MEXP 19937
  7 | #include "perf.h"
  8 | #include "randmtzig.c"
  9 | 
 10 | double *myrand(int n) {
 11 |     double *d = (double *)malloc(n*sizeof(double));
 12 |     dsfmt_gv_fill_array_close_open(d, n);
 13 |     return d;
 14 | }
 15 | 
 16 | #define NITER 5
 17 | 
 18 | double clock_now()
 19 | {
 20 |     struct timeval now;
 21 | 
 22 |     gettimeofday(&now, NULL);
 23 |     return (double)now.tv_sec + (double)now.tv_usec/1.0e6;
 24 | }
 25 | 
 26 | int fib(int n) {
 27 |     return n < 2 ? n : fib(n-1) + fib(n-2);
 28 | }
 29 | 
 30 | long parse_int(const char *s, long base) {
 31 |     long n = 0;
 32 |     for (; *s; ++s) {
 33 |         char c = *s;
 34 |         long d = 0;
 35 |         if (c >= '0' && c <= '9') d = c-'0';
 36 |         else if (c >= 'A' && c <= 'Z') d = c-'A' + (int) 10;
 37 |         else if (c >= 'a' && c <= 'z') d = c-'a' + (int) 10;
 38 |         else exit(-1);
 39 | 
 40 |         if (base <= d) exit(-1);
 41 |         n = n*base + d;
 42 |     }
 43 |     return n;
 44 | }
 45 | 
 46 | double *ones(int m, int n) {
 47 |     double *a = (double *) malloc(m*n*sizeof(double));
 48 |     for (int k=0; k<m*n; ++k) {
 49 |         a[k] = 1.0;
 50 |     }
 51 |     return a;
 52 | }
 53 | 
 54 | double *matmul_aat(int n, double *b) {
 55 |     double *c = (double *) malloc(n*n*sizeof(double));
 56 |     cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, n, n, n, 1.0, b, n, b, n, 0.0, c, n);
 57 |     return c;
 58 | }
 59 | 
 60 | double cabs2(double complex z) {
 61 |   return creal(z)*creal(z) + cimag(z)*cimag(z);
 62 | }
 63 | 
 64 | int mandel(double complex z) {
 65 |     int maxiter = 80;
 66 |     double complex c = z;
 67 |     for (int n = 0; n < maxiter; ++n) {
 68 |         if (cabs2(z) > 4.0) {
 69 |             return n;
 70 |         }
 71 |         z = z*z+c;
 72 |     }
 73 |     return maxiter;
 74 | }
 75 | 
 76 | int *mandelperf() {
 77 |     int *M = (int*) malloc(21*26*sizeof(int));
 78 |     for (int i = 0; i < 21; i++) {
 79 |         for (int j = 0; j < 26; j++) {
 80 |             M[26*i + j] = mandel((j-20)/10.0 + ((i-10)/10.0)*I);
 81 |         }
 82 |     }
 83 |     return M;
 84 | }
 85 | 
 86 | void quicksort(double *a, int lo, int hi) {
 87 |     int i = lo;
 88 |     int j = hi;
 89 |     while (i < hi) {
 90 |         double pivot = a[(lo+hi)/2];
 91 |         // Partition
 92 |         while (i <= j) {
 93 |             while (a[i] < pivot) {
 94 |                 i = i + 1;
 95 |             }
 96 |             while (a[j] > pivot) {
 97 |                 j = j - 1;
 98 |             }
 99 |             if (i <= j) {
100 |                 double t = a[i];
101 |                 a[i] = a[j];
102 |                 a[j] = t;
103 |                 i = i + 1;
104 |                 j = j - 1;
105 |             }
106 |         }
107 | 
108 |         // Recursion for quicksort
109 |         if (lo < j) {
110 |             quicksort(a, lo, j);
111 |         }
112 |         lo = i;
113 |         j = hi;
114 |     }
115 | }
116 | 
117 | double pisum() {
118 |     double sum = 0.0;
119 |     for (int j=0; j<500; ++j) {
120 |         sum = 0.0;
121 |         for (int k=1; k<=10000; ++k) {
122 |             sum += 1.0/(k*k);
123 |         }
124 |     }
125 |     return sum;
126 | }
127 | 
128 | struct double_pair { double s1, s2; };
129 | 
130 | static void randmtzig_fill_randn(dsfmt_t *dsfmt, double *a, int size) {
131 |     for (int i=0; i<size; ++i)
132 |         a[i] = randmtzig_randn(dsfmt);
133 | }
134 | 
135 | struct double_pair randmatstat(int t) {
136 |     dsfmt_t dsfmt;
137 |     dsfmt_init_gen_rand(&dsfmt, 1234);
138 | 
139 |     int n = 5;
140 |     struct double_pair r;
141 |     double *v = (double*)calloc(t,sizeof(double));
142 |     double *w = (double*)calloc(t,sizeof(double));
143 |     double *a = (double*)malloc((n)*(n)*sizeof(double));
144 |     double *b = (double*)malloc((n)*(n)*sizeof(double));
145 |     double *c = (double*)malloc((n)*(n)*sizeof(double));
146 |     double *d = (double*)malloc((n)*(n)*sizeof(double));
147 |     double *P = (double*)malloc((n)*(4*n)*sizeof(double));
148 |     double *Q = (double*)malloc((2*n)*(2*n)*sizeof(double));
149 |     double *PtP1 = (double*)malloc((4*n)*(4*n)*sizeof(double));
150 |     double *PtP2 = (double*)malloc((4*n)*(4*n)*sizeof(double));
151 |     double *QtQ1 = (double*)malloc((2*n)*(2*n)*sizeof(double));
152 |     double *QtQ2 = (double*)malloc((2*n)*(2*n)*sizeof(double));
153 |     for (int i=0; i < t; i++) {
154 |         randmtzig_fill_randn(&dsfmt, a, n*n);
155 |         randmtzig_fill_randn(&dsfmt, b, n*n);
156 |         randmtzig_fill_randn(&dsfmt, c, n*n);
157 |         randmtzig_fill_randn(&dsfmt, d, n*n);
158 |         memcpy(P+0*n*n, a, n*n*sizeof(double));
159 |         memcpy(P+1*n*n, b, n*n*sizeof(double));
160 |         memcpy(P+2*n*n, c, n*n*sizeof(double));
161 |         memcpy(P+3*n*n, d, n*n*sizeof(double));
162 |         for (int j=0; j < n; j++) {
163 |             for (int k=0; k < n; k++) {
164 |                 Q[2*n*j+k]       = a[k];
165 |                 Q[2*n*j+n+k]     = b[k];
166 |                 Q[2*n*(n+j)+k]   = c[k];
167 |                 Q[2*n*(n+j)+n+k] = d[k];
168 |             }
169 |         }
170 |         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
171 |                     n, n, 4*n, 1.0, P, 4*n, P, 4*n, 0.0, PtP1, 4*n);
172 |         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
173 |                     4*n, 4*n, 4*n, 1.0, PtP1, 4*n, PtP1, 4*n, 0.0, PtP2, 4*n);
174 |         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
175 |                     4*n, 4*n, 4*n, 1.0, PtP2, 4*n, PtP2, 4*n, 0.0, PtP1, 4*n);
176 |         for (int j=0; j < 4*n; j++) {
177 |             v[i] += PtP1[(4*n+1)*j];
178 |         }
179 |         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
180 |                     2*n, 2*n, 2*n, 1.0, Q, 2*n, Q, 2*n, 0.0, QtQ1, 2*n);
181 |         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
182 |                     2*n, 2*n, 2*n, 1.0, QtQ1, 2*n, QtQ1, 2*n, 0.0, QtQ2, 2*n);
183 |         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
184 |                     2*n, 2*n, 2*n, 1.0, QtQ2, 2*n, QtQ2, 2*n, 0.0, QtQ1, 2*n);
185 |         for (int j=0; j < 2*n; j++) {
186 |             w[i] += QtQ1[(2*n+1)*j];
187 |         }
188 |     }
189 |     free(PtP1);
190 |     free(PtP2);
191 |     free(QtQ1);
192 |     free(QtQ2);
193 |     free(P);
194 |     free(Q);
195 |     free(a);
196 |     free(b);
197 |     free(c);
198 |     free(d);
199 |     double v1=0.0, v2=0.0, w1=0.0, w2=0.0;
200 |     for (int i=0; i < t; i++) {
201 |         v1 += v[i]; v2 += v[i]*v[i];
202 |         w1 += w[i]; w2 += w[i]*w[i];
203 |     }
204 |     free(v);
205 |     free(w);
206 |     r.s1 = sqrt((t*(t*v2-v1*v1))/((t-1)*v1*v1));
207 |     r.s2 = sqrt((t*(t*w2-w1*w1))/((t-1)*w1*w1));
208 |     return r;
209 | }
210 | 
211 | double *randmatmul(int n) {
212 |     double *A = myrand(n*n);
213 |     double *B = myrand(n*n);
214 |     double *C = (double*)malloc(n*n*sizeof(double));
215 |     cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
216 |                 n, n, n, 1.0, A, n, B, n, 0.0, C, n);
217 |     free(A);
218 |     free(B);
219 |     return C;
220 | }
221 | 
222 | void printfd(int n) {
223 |     FILE *f = fopen("/dev/null", "w");
224 |     long i = 0;
225 |     for (i = 0; i < n; i++)
226 |         fprintf(f, "%ld %ld\n", i, i+1);
227 |     fclose(f);
228 | }
229 | 
230 | void print_perf(const char *name, double t) {
231 |     printf("c,%s,%.6f\n", name, t*1000);
232 | }
233 | 
234 | int main() {
235 |     // Initialize RNG
236 |     dsfmt_gv_init_gen_rand(0);
237 | 
238 |     double t, tmin;
239 | 
240 |     // fib(20)
241 |     assert(fib(20) == 6765);
242 |     int f = 0;
243 |     tmin = INFINITY;
244 |     volatile int fibarg = 20; // prevent constant propagation
245 |     for (int i=0; i<NITER; ++i) {
246 |         t = clock_now();
247 |         for (int j = 0; j < 1000; j++)
248 |                 f += fib(fibarg);
249 |         t = clock_now()-t;
250 |         if (t < tmin) tmin = t;
251 |     }
252 |     print_perf("recursion_fibonacci", tmin / 1000);
253 | 
254 |     // parse_bin
255 |     tmin = INFINITY;
256 |     for (int i=0; i<NITER; ++i) {
257 |         t = clock_now();
258 |         char s[11];
259 |         for (int k=0; k<1000 * 100; ++k) {
260 |             uint32_t n = dsfmt_gv_genrand_uint32();
261 |             sprintf(s, "%x", n);
262 |             uint32_t m = (uint32_t)parse_int(s, 16);
263 |             assert(m == n);
264 |         }
265 |         t = clock_now()-t;
266 |         if (t < tmin) tmin = t;
267 |     }
268 |     print_perf("parse_integers", tmin / 100);
269 | 
270 |     // // array constructor
271 |     // tmin = INFINITY;
272 |     // for (int i=0; i<NITER; ++i) {
273 |     //     t = clock_now();
274 |     //     double *a = ones(200,200);
275 |     //     free(a);
276 |     //     t = clock_now()-t;
277 |     //     if (t < tmin) tmin = t;
278 |     // }
279 |     // print_perf("ones", tmin);
280 |     //
281 |     // // A*A'
282 |     // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
283 |     // double *b = ones(200, 200);
284 |     // tmin = INFINITY;
285 |     // for (int i=0; i<NITER; ++i) {
286 |     //     t = clock_now();
287 |     //     double *c = matmul_aat(200, b);
288 |     //     free(c);
289 |     //     t = clock_now()-t;
290 |     //     if (t < tmin) tmin = t;
291 |     // }
292 |     // free(b);
293 |     // print_perf("AtA", tmin);
294 | 
295 |     // mandel
296 |     /* The initialization on the next line is deliberately volatile to
297 |      * prevent gcc from optimizing away the entire loop.
298 |      * (First observed in gcc 4.9.2)
299 |      */
300 |     static volatile int mandel_sum_init = 0;
301 |     int mandel_sum2 = mandel_sum_init;
302 |     tmin = INFINITY;
303 |     for (int i=0; i<NITER; ++i) {
304 |         int *M;
305 |         t = clock_now();
306 |         for (int j = 0; j < 100; j++) {
307 |             M = mandelperf();
308 |             if (j == 0) {
309 |                 int mandel_sum = 0;
310 |                 // for (int ii = 0; ii < 21; ii++) {
311 |                 //     for (int jj = 0; jj < 26; jj++) {
312 |                 //         printf("%4d", M[26*ii + jj]);
313 |                 //     }
314 |                 //     printf("\n");
315 |                 // }
316 |                 for (int k = 0; k < 21*26; k++) {
317 |                     mandel_sum += M[k];
318 |                 }
319 |                 assert(mandel_sum == 14791);
320 |                 mandel_sum2 += mandel_sum;
321 |             }
322 |             free(M);
323 |         }
324 |         t = clock_now()-t;
325 |         if (t < tmin) tmin = t;
326 |     }
327 |     assert(mandel_sum2 == 14791 * NITER);
328 |     print_perf("userfunc_mandelbrot", tmin / 100);
329 | 
330 |     // sort
331 |     tmin = INFINITY;
332 |     for (int i=0; i<NITER; ++i) {
333 |         t = clock_now();
334 |         double *d = myrand(5000);
335 |         quicksort(d, 0, 5000-1);
336 |         free(d);
337 |         t = clock_now()-t;
338 |         if (t < tmin) tmin = t;
339 |     }
340 |     print_perf("recursion_quicksort", tmin);
341 | 
342 |     // pi sum
343 |     double pi;
344 |     tmin = INFINITY;
345 |     for (int i=0; i<NITER; ++i) {
346 |         t = clock_now();
347 |         pi = pisum();
348 |         t = clock_now()-t;
349 |         if (t < tmin) tmin = t;
350 |     }
351 |     assert(fabs(pi-1.644834071848065) < 1e-12);
352 |     print_perf("iteration_pi_sum", tmin);
353 | 
354 |     // rand mat stat
355 |     struct double_pair r;
356 |     tmin = INFINITY;
357 |     for (int i=0; i<NITER; ++i) {
358 |         t = clock_now();
359 |         r = randmatstat(1000);
360 |         t = clock_now()-t;
361 |         if (t < tmin) tmin = t;
362 |     }
363 |     // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0);
364 |     print_perf("matrix_statistics", tmin);
365 | 
366 |     // rand mat mul
367 |     tmin = INFINITY;
368 |     for (int i=0; i<NITER; ++i) {
369 |         t = clock_now();
370 |         double *C = randmatmul(1000);
371 |         assert(0 <= C[0]);
372 |         free(C);
373 |         t = clock_now()-t;
374 |         if (t < tmin) tmin = t;
375 |     }
376 |     print_perf("matrix_multiply", tmin);
377 | 
378 |     // printfd
379 |     tmin = INFINITY;
380 |     for (int i=0; i<NITER; ++i) {
381 |         t = clock_now();
382 |         printfd(100000);
383 |         t = clock_now()-t;
384 |         if (t < tmin) tmin = t;
385 |     }
386 |     print_perf("print_to_file", tmin);
387 | 
388 |     return 0;
389 | }
390 | 


--------------------------------------------------------------------------------
/perf.f90:
--------------------------------------------------------------------------------
  1 | module types
  2 | implicit none
  3 | private
  4 | public dp, i64
  5 | integer, parameter :: dp=kind(0.d0)          ! double precision
  6 | integer, parameter :: i64 = selected_int_kind(18) ! At least 64-bit integer
  7 | end module
  8 | 
  9 | 
 10 | module utils
 11 | ! Various utilities
 12 | use types, only: dp, i64
 13 | implicit none
 14 | private
 15 | public trace, mean, std, init_random_seed, randn, assert, stop_error, &
 16 |      sysclock2ms, hex_string
 17 | 
 18 | contains
 19 | 
 20 | subroutine stop_error(msg)
 21 | ! Aborts the program with nonzero exit code
 22 | !
 23 | ! The statement "stop msg" will return 0 exit code when compiled using
 24 | ! gfortran. stop_error() uses the statement "stop 1" which returns an exit code
 25 | ! 1 and a print statement to print the message.
 26 | !
 27 | ! Example
 28 | ! -------
 29 | !
 30 | ! call stop_error("Invalid argument")
 31 | 
 32 | character(len=*) :: msg ! Message to print on stdout
 33 | print *, msg
 34 | stop 1
 35 | end subroutine
 36 | 
 37 | subroutine assert(condition)
 38 | ! If condition == .false., it aborts the program.
 39 | !
 40 | ! Arguments
 41 | ! ---------
 42 | !
 43 | logical, intent(in) :: condition
 44 | !
 45 | ! Example
 46 | ! -------
 47 | !
 48 | ! call assert(a == 5)
 49 | 
 50 | if (.not. condition) call stop_error("Assert failed.")
 51 | end subroutine
 52 | 
 53 | real(dp) function trace(A) result(t)
 54 | real(dp), intent(in) :: A(:, :)
 55 | integer :: i
 56 | t = 0
 57 | do i = 1, size(A, 1)
 58 |     t = t + A(i, i)
 59 | end do
 60 | end function
 61 | 
 62 | real(dp) function mean(x) result(t)
 63 | real(dp), intent(in) :: x(:)
 64 | t = sum(x) / size(x)
 65 | end function
 66 | 
 67 | real(dp) function std(x) result(t)
 68 | real(dp), intent(in) :: x(:)
 69 | t = sqrt(mean(abs(x - mean(x))**2))
 70 | end function
 71 | 
 72 | subroutine init_random_seed()
 73 | integer :: i, n, clock
 74 | integer, allocatable :: seed(:)
 75 | call random_seed(size=n)
 76 | allocate(seed(n))
 77 | call system_clock(count=clock)
 78 | seed = clock + 37 * [ (i - 1, i = 1, n) ]
 79 | call random_seed(put=seed)
 80 | end subroutine
 81 | 
 82 | FUNCTION rnorm() RESULT( fn_val )
 83 | 
 84 | !   This subroutine was taken from: http://jblevins.org/mirror/amiller/rnorm.f90
 85 | 
 86 | !   Generate a random normal deviate using the polar method.
 87 | !   Reference: Marsaglia,G. & Bray,T.A. 'A convenient method for generating
 88 | !              normal variables', Siam Rev., vol.6, 260-264, 1964.
 89 | 
 90 | IMPLICIT NONE
 91 | REAL(dp)  :: fn_val
 92 | 
 93 | ! Local variables
 94 | 
 95 | REAL(dp)            :: u, sum
 96 | REAL(dp), SAVE      :: v, sln
 97 | LOGICAL, SAVE   :: second = .FALSE.
 98 | REAL(dp), PARAMETER :: one = 1, vsmall = TINY( one )
 99 | 
100 | IF (second) THEN
101 | ! If second, use the second random number generated on last call
102 | 
103 |   second = .false.
104 |   fn_val = v*sln
105 | 
106 | ELSE
107 | ! First call; generate a pair of random normals
108 | 
109 |   second = .true.
110 |   DO
111 |     CALL RANDOM_NUMBER( u )
112 |     CALL RANDOM_NUMBER( v )
113 |     u = SCALE( u, 1 ) - one
114 |     v = SCALE( v, 1 ) - one
115 |     sum = u*u + v*v + vsmall         ! vsmall added to prevent LOG(zero) / zero
116 |     IF(sum < one) EXIT
117 |   END DO
118 |   sln = SQRT(- SCALE( LOG(sum), 1 ) / sum)
119 |   fn_val = u*sln
120 | END IF
121 | 
122 | RETURN
123 | END FUNCTION rnorm
124 | 
125 | subroutine randn(A)
126 | real(dp), intent(out) :: A(:, :)
127 | integer :: i, j
128 | do j = 1, size(A, 2)
129 |     do i = 1, size(A, 1)
130 |         A(i, j) = rnorm()
131 |     end do
132 | end do
133 | end subroutine
134 | 
135 | ! Convert a number of clock ticks, as returned by system_clock called
136 | ! with integer(i64) arguments, to milliseconds
137 | function sysclock2ms(t)
138 |   integer(i64), intent(in) :: t
139 |   integer(i64) :: rate
140 |   real(dp) :: sysclock2ms, r
141 |   call system_clock(count_rate=rate)
142 |   r = 1000._dp / rate
143 |   sysclock2ms = t * r
144 | end function sysclock2ms
145 | 
146 | ! Convert an integer to a hex string
147 | !
148 | subroutine hex_string(dec,hexchar)
149 |   integer, intent(in) :: dec
150 |   character(*) :: hexchar
151 | 
152 |   integer :: i
153 |   integer :: quotient
154 | 
155 |   character(len=1), parameter :: table(0:15) = &
156 |   [(char(i),i=ichar('0'),ichar('9')),(char(i),i=ichar('A'),ichar('F'))]
157 | 
158 |   quotient = dec
159 | 
160 |   hexchar = '00000000'
161 | 
162 |   i = 8
163 |   do while (quotient /= 0 .and. i > 0)
164 | 
165 |       hexchar(i:i) = table(iand(quotient,15))
166 |       i = i-1
167 | 
168 |       quotient = ishft(quotient,-4)
169 | end do
170 | 
171 | end subroutine hex_string
172 | 
173 | end module
174 | 
175 | 
176 | 
177 | module bench
178 | use utils, only: trace, randn, std, mean, stop_error
179 | use types, only: dp
180 | implicit none
181 | private
182 | public fib, parse_int, printfd, quicksort, mandelperf, pisum, randmatstat, randmatmul
183 | 
184 | contains
185 | 
186 | integer recursive function fib(n) result(r)
187 | integer, intent(in) :: n
188 | if (n < 2) then
189 |     r = n
190 | else
191 |     r = fib(n-1) + fib(n-2)
192 | end if
193 | end function
194 | 
195 | integer function parse_int(s, base) result(n)
196 | character(len=*), intent(in) :: s
197 | integer, intent(in) :: base
198 | integer :: i, d
199 | character :: c
200 | n = 0
201 | do i = 1, len(s)
202 |     c = s(i:i)
203 |     d = 0
204 |     if (ichar(c) >= ichar('0') .and. ichar(c) <= ichar('9')) then
205 |         d = ichar(c) - ichar('0')
206 |     else if (ichar(c) >= ichar('A') .and. ichar(c) <= ichar('Z')) then
207 |         d = ichar(c) - ichar('A') + 10
208 |     else if (ichar(c) >= ichar('a') .and. ichar(c) <= ichar('z')) then
209 |         d = ichar(c) - ichar('a') + 10
210 |     else
211 |         call stop_error("parse_int 1")
212 |     end if
213 | 
214 |     if (base <= d) call stop_error("parse_int 2")
215 |     n = n*base + d
216 | end do
217 | 
218 | end function
219 | 
220 | subroutine printfd(n)
221 | integer, intent(in) :: n
222 | integer :: i , unit
223 | open(unit=1, file="/dev/null")
224 | do i = 1, n
225 |     write(unit=1, fmt=*) i, i+1
226 | end do
227 | close(unit=1)
228 | end subroutine
229 | 
230 | real(dp) function abs2(z) result(r)
231 | complex(dp), intent(in) :: z
232 |     r = real(z)*real(z) + imag(z)*imag(z);
233 | end function
234 | 
235 | integer function mandel(z0) result(r)
236 | complex(dp), intent(in) :: z0
237 | complex(dp) :: c, z
238 | integer :: n, maxiter
239 | maxiter = 80
240 | z = z0
241 | c = z0
242 | do n = 1, maxiter
243 |     if (abs2(z) > 4) then
244 |         r = n-1
245 |         return
246 |     end if
247 |     z = z**2 + c
248 | end do
249 | r = maxiter
250 | end function
251 | 
252 | integer function mandelperf() result(mandel_sum)
253 | integer :: re, im
254 | volatile :: mandel_sum
255 | mandel_sum = 0
256 | re = -20
257 | do while (re <= 5)
258 |     im = -10
259 |     do while (im <= 10)
260 |         mandel_sum = mandel_sum + mandel(cmplx(re/10._dp, im/10._dp, dp))
261 |         im = im + 1
262 |     end do
263 |     re = re + 1
264 | end do
265 | end function
266 | 
267 | recursive subroutine quicksort(a, lo0, hi)
268 | real(dp), intent(inout) :: a(:)
269 | integer, intent(in) :: lo0, hi
270 | integer :: i, j, lo
271 | real(dp) :: pivot, t
272 | lo = lo0
273 | i = lo
274 | j = hi
275 | do while (i < hi)
276 |     pivot = a((lo+hi)/2)
277 |     do while (i <= j)
278 |         do while (a(i) < pivot)
279 |             i = i + 1
280 |         end do
281 |         do while (a(j) > pivot)
282 |             j = j - 1
283 |         end do
284 |         if (i <= j) then
285 |             t = a(i)
286 |             a(i) = a(j)
287 |             a(j) = t
288 |             i = i + 1
289 |             j = j - 1
290 |         end if
291 |     end do
292 |     if (lo < j) call quicksort(a, lo, j)
293 |     lo = i
294 |     j = hi
295 | end do
296 | end subroutine
297 | 
298 | real(dp) function pisum() result(s)
299 | integer :: j, k
300 | do j = 1, 500
301 |     s = 0
302 |     do k = 1, 10000
303 |         s = s + 1._dp / k**2
304 |     end do
305 | end do
306 | end function
307 | 
308 | subroutine randmatstat(t, s1, s2)
309 | integer, intent(in) :: t
310 | real(dp), intent(out) :: s1, s2
311 | real(dp), allocatable, dimension(:, :) :: a, b, c, d, P, Q, X
312 | real(dp), allocatable :: v(:), w(:)
313 | integer :: n, i
314 | n = 5
315 | allocate(a(n, n), b(n, n), c(n, n), d(n, n))
316 | allocate(P(4*n, n), Q(2*n, 2*n), X(2*n, 2*n))
317 | allocate(v(t), w(t))
318 | do i = 1, t
319 |     call randn(a)
320 |     call randn(b)
321 |     call randn(c)
322 |     call randn(d)
323 |     P(:n, :)=a; P(n+1:2*n, :)=b; P(2*n+1:3*n, :)=c; P(3*n+1:, :)=d
324 |     Q(:n,    :n) = a; Q(n+1:,    :n) = b
325 |     Q(:n, n+1: ) = c; Q(n+1:, n+1: ) = d
326 |     X = matmul(transpose(P), P)
327 |     X = matmul(X, X)
328 |     X = matmul(X, X)
329 |     v(i) = trace(X)
330 |     X = matmul(transpose(Q), Q)
331 |     X = matmul(X, X)
332 |     X = matmul(X, X)
333 |     w(i) = trace(X)
334 | end do
335 | s1 = std(v) / mean(v)
336 | s2 = std(w) / mean(w)
337 | end subroutine
338 | 
339 | subroutine randmatmul(n, C)
340 | integer, intent(in) :: n
341 | real(dp), intent(out), allocatable :: C(:, :)
342 | real(dp), allocatable :: A(:, :), B(:, :)
343 | allocate(A(n, n), B(n, n), C(n, n))
344 | call random_number(A)
345 | call random_number(B)
346 | !C = matmul(A, B)
347 | call dgemm('N','N',n,n,n,1.0d0,A,n,B,n,0.0d0,C,n)
348 | end subroutine
349 | 
350 | end module
351 | 
352 | program perf
353 | use types, only: dp, i64
354 | use utils, only: assert, init_random_seed, sysclock2ms, hex_string
355 | use bench, only: fib, parse_int, printfd, quicksort, mandelperf, pisum, randmatstat, &
356 |     randmatmul
357 | implicit none
358 | 
359 | integer, parameter :: NRUNS = 1000
360 | integer :: i, f, n, m, k, k2
361 | integer(i64) :: t1, t2, tmin
362 | real(dp) :: pi, s1, s2
363 | real(dp), allocatable :: C(:, :), d(:)
364 | character(len=11) :: s
365 | 
366 | call init_random_seed()
367 | 
368 | tmin = huge(0_i64)
369 | do i = 1, 5
370 |     call system_clock(t1)
371 |     do k = 1, NRUNS
372 |         f = fib(20)
373 |     end do
374 |     call system_clock(t2)
375 |     if (t2-t1 < tmin) tmin = t2-t1
376 | end do
377 | call assert(f == 6765)
378 | print "('fortran,recursion_fibonacci,',f0.6)", sysclock2ms(tmin) / NRUNS
379 | 
380 | tmin = huge(0_i64)
381 | do i = 1, 5
382 |     call system_clock(t1)
383 |     do k2 = 1, NRUNS
384 |         do k = 1, 1000
385 |             call random_number(s1)
386 |             n = int(s1*huge(n))
387 |             call hex_string(n,s)
388 |             m = parse_int(s(:len_trim(s)), 16)
389 |             call assert(m == n)
390 |         end do
391 |     end do
392 |     call system_clock(t2)
393 |     if (t2-t1 < tmin) tmin = t2-t1
394 | end do
395 | print "('fortran,parse_integers,',f0.6)", sysclock2ms(tmin) / NRUNS
396 | 
397 | tmin = huge(0_i64)
398 | do i = 1, 5
399 |     call system_clock(t1)
400 |     call printfd(100000)
401 |     call system_clock(t2)
402 |     if (t2-t1 < tmin) tmin = t2-t1
403 | end do
404 | print "('fortran,print_to_file,',f0.6)", sysclock2ms(tmin)
405 | 
406 | 
407 | tmin = huge(0_i64)
408 | do i = 1, 5
409 |     call system_clock(t1)
410 |     do k = 1, NRUNS
411 |         f = mandelperf()
412 |     end do
413 |     call system_clock(t2)
414 |     if (t2-t1 < tmin) tmin = t2-t1
415 | end do
416 | call assert(f == 14791)
417 | print "('fortran,userfunc_mandelbrot,',f0.6)", sysclock2ms(tmin) / NRUNS
418 | 
419 | tmin = huge(0_i64)
420 | do i = 1, 5
421 |     call system_clock(t1)
422 |     do k = 1, NRUNS
423 |         allocate(d(5000))
424 |         call random_number(d)
425 |         call quicksort(d, 1, size(d))
426 |         deallocate(d)
427 |     end do
428 |     call system_clock(t2)
429 |     if (t2-t1 < tmin) tmin = t2-t1
430 | end do
431 | print "('fortran,recursion_quicksort,',f0.6)", sysclock2ms(tmin) / NRUNS
432 | 
433 | tmin = huge(0_i64)
434 | do i = 1, 5
435 |     call system_clock(t1)
436 |     pi = pisum()
437 |     call system_clock(t2)
438 |     if (t2-t1 < tmin) tmin = t2-t1
439 | end do
440 | call assert(abs(pi - 1.644834071848065_dp) < 1e-6_dp)
441 | print "('fortran,iteration_pi_sum,',f0.6)", sysclock2ms(tmin)
442 | 
443 | tmin = huge(0_i64)
444 | do i = 1, 5
445 |     call system_clock(t1)
446 |     call randmatstat(1000, s1, s2)
447 |     call system_clock(t2)
448 |     if (t2-t1 < tmin) tmin = t2-t1
449 | end do
450 | ! call assert(s1 > 0.5_dp .and. s1 < 1)
451 | ! call assert(s2 > 0.5_dp .and. s2 < 1)
452 | print "('fortran,matrix_statistics,',f0.6)", sysclock2ms(tmin)
453 | 
454 | tmin = huge(0_i64)
455 | do i = 1, 5
456 |     call system_clock(t1)
457 |     call randmatmul(1000, C)
458 |     call assert(C(1, 1) >= 0)
459 |     call system_clock(t2)
460 |     if (t2-t1 < tmin) tmin = t2-t1
461 | end do
462 | print "('fortran,matrix_multiply,',f0.6)", sysclock2ms(tmin)
463 | 
464 | end program
465 | 


--------------------------------------------------------------------------------
/perf.go:
--------------------------------------------------------------------------------
  1 | // Implementation of the Julia benchmark suite in Go.
  2 | //
  3 | // Three gonum packages must be installed, and then an additional environment
  4 | // variable must be set to use the BLAS installation.
  5 | // To install the gonum packages, run:
  6 | // 		go get gonum.org/v1/netlib/blas/netlib
  7 | //		go get gonum.org/v1/gonum/mat
  8 | //		go get gonum.org/v1/gonum/stat
  9 | // The cgo ldflags must then be set to use the BLAS implementation. As an example,
 10 | // download OpenBLAS to ~/software
 11 | //		git clone https://github.com/xianyi/OpenBLAS
 12 | // 		cd OpenBLAS
 13 | //		make
 14 | // Then edit the environment variable to have
 15 | // 		export CGO_LDFLAGS="-L/$HOME/software/OpenBLAS -lopenblas"
 16 | package main
 17 | 
 18 | import (
 19 | 	"bufio"
 20 | 	"errors"
 21 | 	"fmt"
 22 | 	"log"
 23 | 	"math"
 24 | 	"math/rand"
 25 | 	"os"
 26 | 	"strconv"
 27 | 	"testing"
 28 | 
 29 | 	"gonum.org/v1/gonum/mat"
 30 | 	"gonum.org/v1/gonum/stat"
 31 | 	"gonum.org/v1/netlib/blas/netlib"
 32 | )
 33 | 
 34 | func init() {
 35 | 	// Use the BLAS implementation specified in CGO_LDFLAGS. This line can be
 36 | 	// commented out to use the native Go BLAS implementation found in
 37 | 	// gonum.org/v1/gonum/blas/gonum.
 38 | 	//blas64.Use(gonum.Implementation{})
 39 | 
 40 | 	// These are here so that toggling the BLAS implementation does not make imports unused
 41 | 	_ = netlib.Implementation{}
 42 | }
 43 | 
 44 | // fibonacci
 45 | 
 46 | func fib(n int) int {
 47 | 	if n < 2 {
 48 | 		return n
 49 | 	}
 50 | 	return fib(n-1) + fib(n-2)
 51 | }
 52 | 
 53 | // print to file descriptor
 54 | 
 55 | func printfd(n int) {
 56 | 	f, err := os.Create("/dev/null")
 57 | 	if err != nil {
 58 | 		panic(err)
 59 | 	}
 60 | 	defer f.Close()
 61 | 	w := bufio.NewWriter(f)
 62 | 
 63 | 	for i := 0; i < n; i++ {
 64 | 		_, err = fmt.Fprintf(w, "%d %d\n", i, i+1)
 65 | 	}
 66 | 	w.Flush()
 67 | 	f.Close()
 68 | }
 69 | 
 70 | // quicksort
 71 | 
 72 | func qsort_kernel(a []float64, lo, hi int) []float64 {
 73 | 	i := lo
 74 | 	j := hi
 75 | 	for i < hi {
 76 | 		pivot := a[(lo+hi)/2]
 77 | 		for i <= j {
 78 | 			for a[i] < pivot {
 79 | 				i += 1
 80 | 			}
 81 | 			for a[j] > pivot {
 82 | 				j -= 1
 83 | 			}
 84 | 			if i <= j {
 85 | 				a[i], a[j] = a[j], a[i]
 86 | 				i += 1
 87 | 				j -= 1
 88 | 			}
 89 | 		}
 90 | 		if lo < j {
 91 | 			qsort_kernel(a, lo, j)
 92 | 		}
 93 | 		lo = i
 94 | 		j = hi
 95 | 	}
 96 | 	return a
 97 | }
 98 | 
 99 | var rnd = rand.New(rand.NewSource(1))
100 | 
101 | // randmatstat
102 | 
103 | func randmatstat(t int) (float64, float64) {
104 | 	n := 5
105 | 	v := make([]float64, t)
106 | 	w := make([]float64, t)
107 | 	ad := make([]float64, n*n)
108 | 	bd := make([]float64, n*n)
109 | 	cd := make([]float64, n*n)
110 | 	dd := make([]float64, n*n)
111 | 	P := mat.NewDense(n, 4*n, nil)
112 | 	Q := mat.NewDense(2*n, 2*n, nil)
113 | 	pTmp := mat.NewDense(4*n, 4*n, nil)
114 | 	qTmp := mat.NewDense(2*n, 2*n, nil)
115 | 	for i := 0; i < t; i++ {
116 | 		for i := range ad {
117 | 			ad[i] = rnd.NormFloat64()
118 | 			bd[i] = rnd.NormFloat64()
119 | 			cd[i] = rnd.NormFloat64()
120 | 			dd[i] = rnd.NormFloat64()
121 | 		}
122 | 		a := mat.NewDense(n, n, ad)
123 | 		b := mat.NewDense(n, n, bd)
124 | 		c := mat.NewDense(n, n, cd)
125 | 		d := mat.NewDense(n, n, dd)
126 | 		P.Copy(a)
127 | 		P.Slice(0, n, n, n+n).(*mat.Dense).Copy(b)
128 | 		P.Slice(0, n, 2*n, 3*n).(*mat.Dense).Copy(c)
129 | 		P.Slice(0, n, 3*n, 4*n).(*mat.Dense).Copy(d)
130 | 
131 | 		Q.Copy(a)
132 | 		Q.Slice(0, n, n, 2*n).(*mat.Dense).Copy(b)
133 | 		Q.Slice(n, 2*n, 0, n).(*mat.Dense).Copy(c)
134 | 		Q.Slice(n, 2*n, n, 2*n).(*mat.Dense).Copy(d)
135 | 
136 | 		pTmp.Mul(P.T(), P)
137 | 		pTmp.Pow(pTmp, 4)
138 | 
139 | 		qTmp.Mul(Q.T(), Q)
140 | 		qTmp.Pow(qTmp, 4)
141 | 
142 | 		v[i] = mat.Trace(pTmp)
143 | 		w[i] = mat.Trace(qTmp)
144 | 	}
145 | 	mv, stdv := stat.MeanStdDev(v, nil)
146 | 	mw, stdw := stat.MeanStdDev(v, nil)
147 | 	return stdv / mv, stdw / mw
148 | }
149 | 
150 | // randmatmul
151 | 
152 | func randmatmul(n int) *mat.Dense {
153 | 	aData := make([]float64, n*n)
154 | 	for i := range aData {
155 | 		aData[i] = rnd.Float64()
156 | 	}
157 | 	a := mat.NewDense(n, n, aData)
158 | 
159 | 	bData := make([]float64, n*n)
160 | 	for i := range bData {
161 | 		bData[i] = rnd.Float64()
162 | 	}
163 | 	b := mat.NewDense(n, n, bData)
164 | 	var c mat.Dense
165 | 	c.Mul(a, b)
166 | 	return &c
167 | }
168 | 
169 | // mandelbrot
170 | func abs2(z complex128) float64 {
171 | 	return real(z)*real(z) + imag(z)*imag(z)
172 | }
173 | func mandel(z complex128) int {
174 | 	maxiter := 80
175 | 	c := z
176 | 	for n := 0; n < maxiter; n++ {
177 | 		if abs2(z) > 4 {
178 | 			return n
179 | 		}
180 | 		z = z*z + c
181 | 	}
182 | 	return maxiter
183 | }
184 | 
185 | // mandelperf
186 | 
187 | func mandelperf() int {
188 | 	mandel_sum := 0
189 | 	// These loops are constructed as such because mandel is very sensitive to
190 | 	// its input and this avoids very small floating point issues.
191 | 	for re := -20.0; re <= 5; re += 1 {
192 | 		for im := -10.0; im <= 10; im += 1 {
193 | 			m := mandel(complex(re/10, im/10))
194 | 			mandel_sum += m
195 | 		}
196 | 	}
197 | 	return mandel_sum
198 | }
199 | 
200 | // pisum
201 | 
202 | func pisum() float64 {
203 | 	var sum float64
204 | 	for i := 0; i < 500; i++ {
205 | 		sum = 0.0
206 | 		for k := 1.0; k <= 10000; k += 1 {
207 | 			sum += 1.0 / (k * k)
208 | 		}
209 | 	}
210 | 	return sum
211 | }
212 | 
213 | func print_perf(name string, time float64) {
214 | 	fmt.Printf("go,%v,%v\n", name, time*1000)
215 | }
216 | 
217 | // run tests
218 | 
219 | func assert(b *testing.B, t bool) {
220 | 	if t != true {
221 | 		b.Fatal("assert failed")
222 | 	}
223 | }
224 | 
225 | func main() {
226 | 	for _, bm := range benchmarks {
227 | 		seconds, err := runBenchmarkFor(bm.fn)
228 | 		if err != nil {
229 | 			log.Fatalf("%s %s", bm.name, err)
230 | 		}
231 | 		print_perf(bm.name, seconds)
232 | 	}
233 | }
234 | 
235 | func runBenchmarkFor(fn func(*testing.B)) (seconds float64, err error) {
236 | 	bm := testing.Benchmark(fn)
237 | 	if (bm.N == 0) {
238 | 		return 0, errors.New("failed")
239 | 	}
240 | 	return bm.T.Seconds() / float64(bm.N), nil
241 | }
242 | 
243 | var benchmarks = []struct {
244 | 	name string
245 | 	fn   func(*testing.B)
246 | }{
247 | 	{
248 | 		name: "recursion_fibonacci",
249 | 		fn: func(b *testing.B) {
250 | 			for i := 0; i < b.N; i++ {
251 | 				if fib(20) != 6765 {
252 | 					b.Fatal("unexpected value for fib(20)")
253 | 				}
254 | 			}
255 | 		},
256 | 	},
257 | 
258 | 	{
259 | 		name: "parse_integers",
260 | 		fn: func(b *testing.B) {
261 | 			for i := 0; i < b.N; i++ {
262 | 				for k := 0; k < 1000; k++ {
263 | 					n := rnd.Uint32()
264 | 					m, _ := strconv.ParseUint(strconv.FormatUint(uint64(n), 16), 16, 32)
265 | 					if uint32(m) != n {
266 | 						b.Fatal("incorrect value for m")
267 | 					}
268 | 				}
269 | 			}
270 | 		},
271 | 	},
272 | 
273 | 	{
274 | 		name: "userfunc_mandelbrot",
275 | 		fn: func(b *testing.B) {
276 | 			for i := 0; i < b.N; i++ {
277 | 				if mandelperf() != 14791 {
278 | 					b.Fatal("unexpected value for mandelperf")
279 | 				}
280 | 			}
281 | 		},
282 | 	},
283 | 
284 | 	{
285 | 		name: "print_to_file",
286 | 		fn: func(b *testing.B) {
287 | 			for i := 0; i < b.N; i++ {
288 | 				printfd(100000)
289 | 			}
290 | 		},
291 | 	},
292 | 
293 | 	{
294 | 		name: "recursion_quicksort",
295 | 		fn: func(b *testing.B) {
296 | 			lst := make([]float64, 5000)
297 | 			b.ResetTimer()
298 | 			for i := 0; i < b.N; i++ {
299 | 				for k := range lst {
300 | 					lst[k] = rnd.Float64()
301 | 				}
302 | 				qsort_kernel(lst, 0, len(lst)-1)
303 | 			}
304 | 		},
305 | 	},
306 | 
307 | 	{
308 | 		name: "iteration_pi_sum",
309 | 		fn: func(b *testing.B) {
310 | 			for i := 0; i < b.N; i++ {
311 | 				if math.Abs(pisum()-1.644834071848065) >= 1e-6 {
312 | 					b.Fatal("pi_sum out of range")
313 | 				}
314 | 			}
315 | 		},
316 | 	},
317 | 
318 | 	{
319 | 		name: "matrix_statistics",
320 | 		fn: func(b *testing.B) {
321 | 			for i := 0; i < b.N; i++ {
322 | 				c1, c2 := randmatstat(1000)
323 | 				assert(b, 0.5 < c1)
324 | 				assert(b, c1 < 1.0)
325 | 				assert(b, 0.5 < c2)
326 | 				assert(b, c2 < 1.0)
327 | 			}
328 | 		},
329 | 	},
330 | 
331 | 	{
332 | 		name: "matrix_multiply",
333 | 		fn: func(b *testing.B) {
334 | 			for i := 0; i < b.N; i++ {
335 | 				c := randmatmul(1000)
336 | 				assert(b, c.At(0, 0) >= 0)
337 | 			}
338 | 		},
339 | 	},
340 | }
341 | 


--------------------------------------------------------------------------------
/perf.jl:
--------------------------------------------------------------------------------
  1 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license
  2 | 
  3 | import LinearAlgebra
  4 | import Test
  5 | import Printf
  6 | import Statistics
  7 | import Base.Sys
  8 | 
  9 | include("./perfutil.jl")
 10 | 
 11 | ## recursive fib ##
 12 | 
 13 | fib(n) = n < 2 ? n : fib(n-1) + fib(n-2)
 14 | 
 15 | Test.@test fib(20) == 6765
 16 | @timeit fib(20) "recursion_fibonacci" "Recursive fibonacci"
 17 | 
 18 | ## parse integer ##
 19 | 
 20 | function parseintperf(t)
 21 |     local n, m
 22 |     for i=1:t
 23 |         n = rand(UInt32)
 24 |         @static if VERSION >= v"0.7.0-DEV.4446"
 25 |             s = string(n, base = 16)
 26 |             m = UInt32(parse(Int64, s, base = 16))
 27 |         else
 28 |             s = hex(n)
 29 |             m = UInt32(parse(Int64, s, 16))
 30 |         end
 31 |         @assert m == n
 32 |     end
 33 |     return n
 34 | end
 35 | 
 36 | @timeit parseintperf(1000) "parse_integers" "Integer parsing"
 37 | 
 38 | ## array constructors ##
 39 | 
 40 | Test.@test all(fill(1.,200,200) .== 1)
 41 | 
 42 | ## matmul and transpose ##
 43 | 
 44 | A = fill(1.,200,200)
 45 | Test.@test all(A*A' .== 200)
 46 | # @timeit A*A' "AtA" "description"
 47 | 
 48 | ## mandelbrot set: complex arithmetic and comprehensions ##
 49 | 
 50 | function myabs2(z)
 51 |     return real(z)*real(z) + imag(z)*imag(z)
 52 | end
 53 | 
 54 | function mandel(z)
 55 |     c = z
 56 |     maxiter = 80
 57 |     for n = 1:maxiter
 58 |         if myabs2(z) > 4
 59 |             return n-1
 60 |         end
 61 |         z = z^2 + c
 62 |     end
 63 |     return maxiter
 64 | end
 65 | 
 66 | mandelperf() = [ mandel(complex(r,i)) for i=-1.:.1:1., r=-2.0:.1:0.5 ]
 67 | Test.@test sum(mandelperf()) == 14791
 68 | @timeit mandelperf() "userfunc_mandelbrot" "Calculation of mandelbrot set"
 69 | 
 70 | ## numeric vector sort ##
 71 | 
 72 | function qsort!(a,lo,hi)
 73 |     i, j = lo, hi
 74 |     while i < hi
 75 |         pivot = a[(lo+hi)>>>1]
 76 |         while i <= j
 77 |             while a[i] < pivot; i += 1; end
 78 |             while a[j] > pivot; j -= 1; end
 79 |             if i <= j
 80 |                 a[i], a[j] = a[j], a[i]
 81 |                 i, j = i+1, j-1
 82 |             end
 83 |         end
 84 |         if lo < j; qsort!(a,lo,j); end
 85 |         lo, j = i, hi
 86 |     end
 87 |     return a
 88 | end
 89 | 
 90 | sortperf(n) = qsort!(rand(n), 1, n)
 91 | Test.@test issorted(sortperf(5000))
 92 | @timeit sortperf(5000) "recursion_quicksort" "Sorting of random numbers using quicksort"
 93 | 
 94 | ## slow pi series ##
 95 | 
 96 | function pisum()
 97 |     sum = 0.0
 98 |     for j = 1:500
 99 |         sum = 0.0
100 |         for k = 1:10000
101 |             sum += 1.0/(k*k)
102 |         end
103 |     end
104 |     sum
105 | end
106 | 
107 | Test.@test abs(pisum()-1.644834071848065) < 1e-12
108 | @timeit pisum() "iteration_pi_sum" "Summation of a power series"
109 | 
110 | ## slow pi series, vectorized ##
111 | 
112 | function pisumvec()
113 |     s = 0.0
114 |     a = [1:10000]
115 |     for j = 1:500
116 |         s = sum(1 ./ (a.^2))
117 |     end
118 |     s
119 | end
120 | 
121 | #@test abs(pisumvec()-1.644834071848065) < 1e-12
122 | #@timeit pisumvec() "pi_sum_vec"
123 | 
124 | ## random matrix statistics ##
125 | 
126 | function randmatstat(t)
127 |     n = 5
128 |     v = zeros(t)
129 |     w = zeros(t)
130 |     for i=1:t
131 |         a = randn(n,n)
132 |         b = randn(n,n)
133 |         c = randn(n,n)
134 |         d = randn(n,n)
135 |         P = [a b c d]
136 |         Q = [a b; c d]
137 |         @static if VERSION >= v"0.7.0" 
138 |             v[i] = LinearAlgebra.tr((P'*P)^4)
139 |             w[i] = LinearAlgebra.tr((Q'*Q)^4)
140 |         else
141 |             v[i] = trace((P'*P)^4)
142 |             w[i] = trace((Q'*Q)^4)
143 |         end
144 |     end
145 |     return (Statistics.std(v)/Statistics.mean(v), Statistics.std(w)/Statistics.mean(w))
146 | end
147 | 
148 | (s1, s2) = randmatstat(1000)
149 | Test.@test 0.5 < s1 < 1.0 && 0.5 < s2 < 1.0
150 | @timeit randmatstat(1000) "matrix_statistics" "Statistics on a random matrix"
151 | 
152 | ## largish random number gen & matmul ##
153 | 
154 | @timeit rand(1000,1000)*rand(1000,1000) "matrix_multiply" "Multiplication of random matrices"
155 | 
156 | ## printfd ##
157 | 
158 | if Sys.isunix()
159 |     function printfd(n)
160 |         open("/dev/null", "w") do io
161 |             for i = 1:n
162 |                 Printf.@printf(io, "%d %d\n", i, i + 1)
163 |             end
164 |         end
165 |     end
166 | 
167 |     printfd(1)
168 |     @timeit printfd(100000) "print_to_file" "Printing to a file descriptor"
169 | end
170 | 
171 | #maxrss("micro")
172 | 


--------------------------------------------------------------------------------
/perf.js:
--------------------------------------------------------------------------------
  1 | const fs = require('fs'); // for print to file benchmark
  2 | 
  3 | (function () {
  4 |     'use strict';
  5 | 
  6 |     var tmin, i, j, t, n, m, s, a, sum, a0, v, r, C, filename, fd;
  7 | 
  8 |     function assert(t) { if (!t) { throw new Error("assertion failed"); } }
  9 | 
 10 |     // recursive fib //
 11 | 
 12 |     function fib(n) {
 13 |         if (n < 2) { return n; }
 14 |         return fib(n-1) + fib(n-2);
 15 |     }
 16 | 
 17 |     tmin = Number.POSITIVE_INFINITY;
 18 |     for (i=0; i < 5; i++) {
 19 |         t = (new Date()).getTime();
 20 |         for (j=0; j < 1000; j++) {
 21 |             assert(fib(20) === 6765);
 22 |         }
 23 |         t = (new Date()).getTime()-t;
 24 |         if (t < tmin) { tmin = t; }
 25 |     }
 26 |     console.log("javascript,recursion_fibonacci," + tmin/1000);
 27 | 
 28 |     // parse int //
 29 | 
 30 |     tmin = Number.POSITIVE_INFINITY;
 31 |     for (i=0; i < 5; i++) {
 32 |         t = (new Date()).getTime();
 33 |         for (j=0; j < 1000*100; j++) {
 34 |             n = Math.floor(4294967295*Math.random());
 35 |             s = n.toString(16);
 36 |             m = parseInt(s,16);
 37 |             assert(m === n);
 38 |         }
 39 |         t = (new Date()).getTime()-t;
 40 |         if (t < tmin) { tmin = t; }
 41 |     }
 42 |     console.log("javascript,parse_integers," + tmin/100);
 43 | 
 44 | 
 45 |     // print to file
 46 | 
 47 |     function printfd(n) {
 48 | 	let f = fs.openSync("/dev/null", "w");
 49 | 	for (let i = 1; i <= n; i++) {
 50 | 	    fs.writeSync(f, `${i} ${i + 1}\n`);
 51 | 	}
 52 | 	fs.closeSync(f);
 53 |     }
 54 | 
 55 |     tmin = Number.POSITIVE_INFINITY;
 56 |     for (i=0; i < 5; i++) {
 57 |         t = (new Date()).getTime();
 58 | 	printfd(100000)
 59 | 	t = (new Date()).getTime()-t;
 60 | 	if (t < tmin) { tmin = t; }
 61 |     }
 62 |     console.log("javascript,print_to_file," + tmin);
 63 | 
 64 |     // mandelbrot set //
 65 | 
 66 |     function Complex(real, imag) {
 67 |         this.re = real;
 68 |         this.im = imag;
 69 |     }
 70 |     function complex_abs(z) {
 71 |         return Math.sqrt(z.re*z.re + z.im*z.im);
 72 |     }
 73 |     function complex_abs2(z) {
 74 |         return z.re*z.re + z.im*z.im;
 75 |     }
 76 |     function complex_add(z,w) {
 77 |         return new Complex(z.re+w.re, z.im+w.im);
 78 |     }
 79 |     function complex_multiply(z,w) {
 80 |         return new Complex(z.re*w.re-z.im*w.im, z.re*w.im+z.im*w.re);
 81 |     }
 82 | 
 83 |     function mandel(z) {
 84 |         var c, n, maxiter;
 85 |         c = z;
 86 |         maxiter = 80;
 87 |         n = 0;
 88 |         for (n = 0; n < maxiter; n++) {
 89 |             if (complex_abs2(z) > 4) { return n; }
 90 |             z = complex_add(complex_multiply(z,z),c);
 91 |         }
 92 |         return maxiter;
 93 |     }
 94 | 
 95 |     function mandelperf() {
 96 |         var a, r, re, i, im, z;
 97 |         a = new Array(26*21);
 98 |         r = 0;
 99 |         for (r = 0; r < 26; r++) {
100 |             re = -2.0 + r*0.1;
101 |             i = 0;
102 |             for (i = 0; i < 21; i++) {
103 |                 im = -1.0 + i*0.1;
104 |                 z = new Complex(re,im);
105 |                 a[r*21+i] = mandel(z);
106 |             }
107 |         }
108 |         return a;
109 |     }
110 | 
111 |     a = mandelperf();
112 |     i = 0;
113 |     sum = 0;
114 |     for (i = 0; i < a.length; i++) { sum += a[i]; }
115 |     assert(sum === 14791);
116 |     a0 = a[0];
117 | 
118 |     tmin = Number.POSITIVE_INFINITY;
119 |     for (i=0; i < 5; i++) {
120 |         t = (new Date()).getTime();
121 |         for (j=0; j < 1000; j++) {
122 |             a = mandelperf();
123 |             assert(a[0] === a0);
124 |         }
125 |         t = (new Date()).getTime()-t;
126 |         if (t < tmin) { tmin=t; }
127 |     }
128 |     console.log("javascript,userfunc_mandelbrot," + tmin/1000);
129 | 
130 |     // numeric vector sort //
131 | 
132 |     function rand(n) {
133 |         var v, i;
134 |         v = new Array(n);
135 | 
136 |         for (i = 0; i < n; i++) {
137 |             v[i] = Math.random();
138 |         }
139 | 
140 |         return v;
141 |     }
142 | 
143 |     function qsort_kernel(a, lo, hi) {
144 |         var i, j, pivot, t;
145 |         i = lo;
146 |         j = hi;
147 |         while (i < hi) {
148 |             pivot = a[Math.floor((lo+hi)/2)];
149 |             while (i <= j) {
150 |                 while (a[i] < pivot) {
151 |                     i = i + 1;
152 |                 }
153 |                 while (a[j] > pivot) {
154 |                     j = j - 1;
155 |                 }
156 |                 if (i <= j) {
157 |                     t = a[i];
158 |                     a[i] = a[j];
159 |                     a[j] = t;
160 |                     i = i + 1;
161 |                     j = j - 1;
162 |                 }
163 |             }
164 |             if (lo < j) {
165 |                 qsort_kernel(a, lo, j);
166 |             }
167 |             lo = i;
168 |             j = hi;
169 |         }
170 |     }
171 | 
172 |     function sortperf(n) {
173 |         var v = rand(n);
174 |         qsort_kernel(v, 0, n);
175 |         return v;
176 |     }
177 | 
178 |     tmin = Number.POSITIVE_INFINITY;
179 |     for (i=0; i < 5; i++) {
180 |         t = (new Date()).getTime();
181 |         for (j=0; j < 100; j++) {
182 |             v = sortperf(5000);
183 |             assert(a[0] < 0.99);
184 |         }
185 |         t = (new Date()).getTime()-t;
186 |         if (t < tmin) { tmin=t; }
187 |     }
188 |     console.log("javascript,recursion_quicksort," + tmin/100);
189 | 
190 |     // slow pi series //
191 | 
192 |     function pisum() {
193 |         var sum, k;
194 |         sum = 0.0;
195 |         for (i=0; i < 500; i++) {
196 |             sum = 0.0;
197 |             for (k=1; k <= 10000; k++) {
198 |                 sum += 1.0/(k*k);
199 |             }
200 |         }
201 |         return sum;
202 |     }
203 | 
204 |     tmin = Number.POSITIVE_INFINITY;
205 |     for (i=0; i < 5; i++) {
206 |         t = (new Date()).getTime();
207 |         for (j=0; j < 10; j++) {
208 |             assert(Math.abs(pisum()-1.644834071848065) < 1e-12);
209 |         }
210 |         t = (new Date()).getTime()-t;
211 |         if (t < tmin) { tmin=t; }
212 |     }
213 |     console.log("javascript,iteration_pi_sum," + tmin/10);
214 | 
215 |     // random matrix statistics //
216 | 
217 |     function gaussian() {
218 |         var k, i, j;
219 |         k = 2;
220 |         do {
221 |             i = 2*Math.random()-1;
222 |             j = 2*Math.random()-1;
223 |             k = i*i+j*j;
224 |         } while (k >= 1);
225 |         return i*Math.sqrt((-2*Math.log(k))/k);
226 |     }
227 | 
228 |     function randn( a, sub ) {
229 |         var subLen, len, i;
230 |         subLen = sub.length;
231 |         len = a.length;
232 | 
233 |         for (i = 0; i < subLen; i++) {
234 |             a[i] = sub[i] = gaussian();
235 |         }
236 | 
237 |         for (i = subLen; i < len; i++) {
238 |             a[i] = gaussian();
239 |         }
240 | 
241 |         return a;
242 |     }
243 | 
244 |     function transpose(dest, src,m,n) {
245 |         var i, j;
246 |         i = 0;
247 |         j = 0;
248 | 
249 |         for (i = 0; i < m; i++) {
250 |             for (j = 0; j < n; j++) {
251 |                 dest[i*n+j] = src[j*m+i];
252 |             }
253 |         }
254 |     }
255 | 
256 |     function matmulCopy( dest, A,B,m,l,n) {
257 |         var i, j, k, sum;
258 |         i = 0;
259 |         j = 0;
260 |         k = 0;
261 | 
262 |         for (i = 0; i < m; i++) {
263 |             for (j = 0; j < n; j++) {
264 |                 sum = 0.0;
265 | 
266 |                 for (k = 0; k < l; k++) {
267 |                     sum += A[i*l+k]*B[k*n+j];
268 |                 }
269 | 
270 |                 dest[i*n+j] = sum;
271 |             }
272 |         }
273 |     }
274 | 
275 |     function randmatstat(t) {
276 |         var n, P, PTransposed, Pt1P, Pt2P, Q, QTransposed, Pt1Q, Pt2Q,
277 |         a, b, c, d, aSub, bSub, cSub, dSub, v, w, i, j, k,
278 |         trP, trQ, v1, v2, w1, w2;
279 |         n = 5;
280 | 
281 |         P = new Float64Array( 4*n*n );
282 |         Q = new Float64Array( 4*n*n );
283 | 
284 |         PTransposed = new Float64Array( P.length );
285 |         QTransposed = new Float64Array( Q.length );
286 | 
287 |         Pt1P = new Float64Array( (4*n) * (4*n) );
288 |         Pt2P = new Float64Array( (4*n) * (4*n) );
289 |         Pt1Q = new Float64Array( (2*n) * (2*n) );
290 |         Pt2Q = new Float64Array( (2*n) * (2*n) );
291 | 
292 |         a = new Float64Array( n*n );
293 |         b = new Float64Array( n*n );
294 |         c = new Float64Array( n*n );
295 |         d = new Float64Array( n*n );
296 | 
297 |         // the first n number of elements of a to d
298 |         aSub = new Float64Array( n );
299 |         bSub = new Float64Array( n );
300 |         cSub = new Float64Array( n );
301 |         dSub = new Float64Array( n );
302 | 
303 |         v = new Float64Array( t );
304 |         w = new Float64Array( t );
305 | 
306 |         i = 0;
307 |         j = 0;
308 |         k = 0;
309 | 
310 |         for (i = 0; i < t; i++) {
311 |             a = randn( a, aSub );
312 |             b = randn( b, bSub );
313 |             c = randn( c, cSub );
314 |             d = randn( d, dSub );
315 | 
316 |             P.set( a, 0*n*n );
317 |             P.set( b, 1*n*n );
318 |             P.set( c, 2*n*n );
319 |             P.set( d, 3*n*n );
320 | 
321 |             for (j = 0; j < n; j++) {
322 |                 Q.set( aSub, 2*n*j         );
323 |                 Q.set( bSub, 2*n*j+n       );
324 |                 Q.set( cSub, 2*n*(n+j)     );
325 |                 Q.set( dSub, 2*n*(n+j)+n   );
326 |                 /*
327 |                   for (k = 0; k < n; k++) {
328 |                   Q[ 2*n*j        + k ] = a[k];
329 |                   Q[ 2*n*j+n      + k ] = b[k];
330 |                   Q[ 2*n*(n+j)    + k ] = c[k];
331 |                   Q[ 2*n*(n+j)+n  + k ] = d[k];
332 |                   }
333 |                 */
334 |             }
335 | 
336 |             transpose( PTransposed, P, n, 4*n );
337 |             matmulCopy( Pt1P, PTransposed, P, 4*n, n, 4*n );
338 |             matmulCopy( Pt2P, Pt1P, Pt1P, 4*n, 4*n, 4*n);
339 |             matmulCopy( Pt1P, Pt2P, Pt2P, 4*n, 4*n, 4*n);
340 | 
341 |             trP = 0;
342 |             for (j = 0; j < 4*n; j++) {
343 |                 trP += Pt1P[(4*n+1)*j];
344 |             }
345 |             v[i] = trP;
346 | 
347 |             transpose( QTransposed, Q, 2*n, 2*n );
348 |             matmulCopy( Pt1Q, QTransposed, Q, 2*n, 2*n, 2*n );
349 |             matmulCopy( Pt2Q, Pt1Q, Pt1Q, 2*n, 2*n, 2*n);
350 |             matmulCopy( Pt1Q, Pt2Q, Pt2Q, 2*n, 2*n, 2*n);
351 | 
352 |             trQ = 0;
353 |             for (j = 0; j < 2*n; j++) {
354 |                 trQ += Pt1Q[(2*n+1)*j];
355 |             }
356 |             w[i] = trQ;
357 |         }
358 | 
359 |         v1 = 0.0;
360 |         v2 = 0.0;
361 |         w1 = 0.0;
362 |         w2 = 0.0;
363 |         for (i = 0; i < t; i++) {
364 |             v1 += v[i]; v2 += v[i]*v[i];
365 |             w1 += w[i]; w2 += w[i]*w[i];
366 |         }
367 | 
368 |         return {
369 |             s1: Math.sqrt((t*(t*v2-v1*v1))/((t-1)*v1*v1)),
370 |             s2: Math.sqrt((t*(t*w2-w1*w1))/((t-1)*w1*w1))
371 |         };
372 |     }
373 | 
374 |     tmin = Number.POSITIVE_INFINITY;
375 |     for (i=0; i < 5; i++) {
376 |         t = (new Date()).getTime();
377 |         for (j=0; j < 10; j++) {
378 |             r = randmatstat(1000);
379 |             // assert(0.5 < r.s1 < 1.0);
380 |             //        assert(0.5 < r.s2 < 1.0);
381 |         }
382 |         t = (new Date()).getTime()-t;
383 |         if (t < tmin) { tmin=t; }
384 |     }
385 |     console.log("javascript,matrix_statistics," + tmin/10);
386 | 
387 |     // random matrix multiply //
388 | 
389 |     function randFloat64(n) {
390 |         var v, i;
391 |         v = new Float64Array(n);
392 | 
393 |         for (i = 0; i < n; i++) {
394 |             v[i] = Math.random();
395 |         }
396 | 
397 |         return v;
398 |     }
399 | 
400 |     // Transpose mxn matrix.
401 |     function mattransp(A, m, n) {
402 |         var i, j, T;
403 |         T = new Float64Array(m * n);
404 | 
405 |         for (i = 0; i < m; ++i) {
406 |             for (j = 0; j < n; ++j) {
407 |                 T[j * m + i] = A[i * n + j];
408 |             }
409 |         }
410 | 
411 |         return T;
412 |     }
413 | 
414 |     function matmul(A,B,m,l,n) {
415 |         var C, i, j, k, total;
416 |         C = new Float64Array(m*n);
417 |         i = 0;
418 |         j = 0;
419 |         k = 0;
420 | 
421 |         // Use the transpose of B so that
422 |         // during the matrix multiplication
423 |         // we access consecutive memory locations.
424 |         // This is a fairer comparison of JS
425 |         // with the other languages which call on
426 |         // custom multiplication routines, which
427 |         // likely make use of such aligned memory.
428 |         B = mattransp(B,l,n);
429 | 
430 |         for (i = 0; i < m; i++) {
431 |             for (j = 0; j < n; j++) {
432 |                 total = 0.0;
433 | 
434 |                 for (k = 0; k < l; k++) {
435 |                     total += A[i*l+k]*B[j*l+k];
436 |                 }
437 | 
438 |                 C[i*n+j] = total;
439 |             }
440 |         }
441 | 
442 |         return C;
443 |     }
444 | 
445 |     function randmatmul(n) {
446 |         var A, B;
447 |         A = randFloat64(n*n);
448 |         B = randFloat64(n*n);
449 | 
450 |         return matmul(A, B, n, n, n);
451 |     }
452 | 
453 |     tmin = Number.POSITIVE_INFINITY;
454 |     t = (new Date()).getTime();
455 |     C = randmatmul(1000);
456 |     assert(0 <= C[0]);
457 |     t = (new Date()).getTime()-t;
458 |     if (t < tmin) { tmin=t; }
459 |     console.log("javascript,matrix_multiply," + tmin);
460 | }());
461 | 


--------------------------------------------------------------------------------
/perf.lua:
--------------------------------------------------------------------------------
  1 | 
  2 | if jit.arch ~= 'x64' then
  3 |     print('WARNING: please use BIT=64 for optimal OpenBLAS performance')
  4 | end
  5 | 
  6 | local ffi     = require 'ffi'
  7 | local bit     = require 'bit'
  8 | local time    = require 'time'
  9 | local alg     = require 'sci.alg'
 10 | local prng    = require 'sci.prng'
 11 | local stat    = require 'sci.stat'
 12 | local dist    = require 'sci.dist'
 13 | local complex = require 'sci.complex'
 14 | 
 15 | local min, sqrt, random, abs = math.min, math.sqrt, math.random, math.abs
 16 | local cabs = complex.abs
 17 | local rshift = bit.rshift
 18 | local format = string.format
 19 | local nowutc = time.nowutc
 20 | local rng = prng.std()
 21 | local vec, mat, join = alg.vec, alg.mat, alg.join
 22 | local sum, trace = alg.sum, alg.trace
 23 | local var, mean = stat.var, stat.mean
 24 | 
 25 | --------------------------------------------------------------------------------
 26 | local function elapsed(f)
 27 |     local t0 = nowutc()
 28 |     local val1, val2 = f()
 29 |     local t1 = nowutc()
 30 |     return (t1 - t0):tomilliseconds(), val1, val2
 31 | end
 32 | 
 33 | local function timeit(f, name, check)
 34 |     local t, k, s = 1/0, 0, nowutc()
 35 |     while true do
 36 |         k = k + 1
 37 |         local tx, val1, val2 = elapsed(f)
 38 |         t = min(t, tx)
 39 |         if check then
 40 |             check(val1, val2)
 41 |         end
 42 |         if k > 5 and (nowutc() - s):toseconds() >= 2 then break end
 43 |     end
 44 |     io.write(format('lua,%s,%g\n', name, t))
 45 | end
 46 | 
 47 | --------------------------------------------------------------------------------
 48 | local function fib(n)
 49 |     if n < 2 then
 50 |         return n
 51 |     else
 52 |         return fib(n-1) + fib(n-2)
 53 |     end
 54 | end
 55 | 
 56 | timeit(function() return fib(20) end, 'recursion_fibonacci', function(x) assert(x == 6765) end)
 57 | 
 58 | local function parseint()
 59 |     local lmt = 2^32 - 1
 60 |     local n, m
 61 |     for i = 1, 1000 do
 62 |         n = random(lmt) -- Between 0 and 2^32 - 1, i.e. uint32_t.
 63 |         local s = format('0x%x', tonumber(n))
 64 |         m = tonumber(s)
 65 |         assert(n == m) -- Done here to be even with Julia benchmark.
 66 |     end
 67 |     return n, m
 68 | end
 69 | 
 70 | timeit(parseint, 'parse_integers')
 71 | 
 72 | local function cabs2( z )
 73 |    return z[1]*z[1] + z[2]*z[2]
 74 | end
 75 | 
 76 | local function mandel(z)
 77 |     local c = z
 78 |     local maxiter = 80
 79 |     for n = 1, maxiter do
 80 |         if cabs2(z) > 4 then
 81 |             return n-1
 82 |         end
 83 |         z = z*z + c
 84 |     end
 85 |     return maxiter
 86 | end
 87 | local function mandelperf()
 88 |     local a = mat(26, 21)
 89 |     for r=1,26 do -- Lua's for i=l,u,c doesn't match Julia's for i=l:c:u.
 90 |         for c=1,21 do
 91 |             local re, im = (r - 21)*0.1, (c - 11)*0.1
 92 |             a[{r, c}] = mandel(re + im*1i)
 93 |         end
 94 |     end
 95 |     return a
 96 | end
 97 | 
 98 | timeit(mandelperf, 'userfunc_mandelbrot', function(a) assert(sum(a) == 14791) end)
 99 | 
100 | local function qsort(a, lo, hi)
101 |     local i, j = lo, hi
102 |     while i < hi do
103 |         local pivot = a[rshift(lo+hi, 1)]
104 |         while i <= j do
105 |             while a[i] < pivot do i = i+1 end
106 |             while a[j] > pivot do j = j-1 end
107 |             if i <= j then
108 |                 a[i], a[j] = a[j], a[i]
109 |                 i, j = i+1, j-1
110 |             end
111 |         end
112 |         if lo < j then qsort(a, lo, j) end
113 |         lo, j = i, hi
114 |     end
115 |     return a
116 | end
117 | 
118 | local function sortperf()
119 |     local n = 5000
120 |     local v = ffi.new('double[?]', n+1)
121 |     for i=1,n do
122 |         v[i] = rng:sample()
123 |     end
124 |     return qsort(v, 1, n)
125 | end
126 | 
127 | timeit(sortperf, 'recursion_quicksort', function(x)
128 |     for i=2,5000 do
129 |         assert(x[i-1] <= x[i])
130 |     end
131 | end
132 | )
133 | 
134 | local function pisum()
135 |     local s
136 |     for j = 1, 500 do
137 |         s = 0
138 |         for k = 1, 10000 do
139 |             s = s + 1 / (k*k)
140 |         end
141 |     end
142 |     return s
143 | end
144 | 
145 | timeit(pisum, 'iteration_pi_sum', function(x)
146 |     assert(abs(x - 1.644834071848065) < 1e-12)
147 | end)
148 | 
149 | local function rand(r, c)
150 |     local x = mat(r, c)
151 |     for i=1,#x do
152 |         x[i] = rng:sample()
153 |     end
154 |     return x
155 | end
156 | 
157 | local function randn(r, c)
158 |     local x = mat(r, c)
159 |     for i=1,#x do
160 |         x[i] = dist.normal(0, 1):sample(rng)
161 |     end
162 |     return x
163 | end
164 | 
165 | local function randmatstat(t)
166 |     local n = 5
167 |     local v, w = vec(t), vec(t)
168 |     for i=1,t do
169 |         local a, b, c, d = randn(n, n), randn(n, n), randn(n, n), randn(n, n)
170 |         local P = join(a..b..c..d)
171 |         local Q = join(a..b, c..d)
172 |         v[i] = trace((P[]`**P[])^^4)
173 |         w[i] = trace((Q[]`**Q[])^^4)
174 |     end
175 |     return sqrt(var(v))/mean(v), sqrt(var(w))/mean(w)
176 | end
177 | 
178 | timeit(function() return randmatstat(1000) end, 'matrix_statistics',
179 |     function(s1, s2)
180 |         assert( 0.5 < s1 and s1 < 1.0 and 0.5 < s2 and s2 < 1.0 )
181 |     end)
182 | 
183 | local function randmatmult(n)
184 |     local a, b = rand(n, n), rand(n, n)
185 |     return a[]**b[]
186 | end
187 | 
188 | timeit(function() return randmatmult(1000) end, 'matrix_multiply')
189 | 
190 | if jit.os ~= 'Windows' then
191 |     local function printfd(n)
192 |         local f = io.open('/dev/null','w')
193 |         for i = 1, n do
194 |             f:write(format('%d %d\n', i, i+1))
195 |         end
196 |         f:close()
197 |     end
198 | 
199 |     timeit(function() return printfd(100000) end, 'print_to_file')
200 | end
201 | 


--------------------------------------------------------------------------------
/perf.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | %%  Main function. All the tests are run here.           %%
  3 | %%  The functions declarations can be found at the end.  %%
  4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  5 | 
  6 | function perf()
  7 | 
  8 | 	warning off;
  9 | 
 10 | 	f = fib(20);
 11 | 	assert(f == 6765)
 12 | 	timeit('recursion_fibonacci', @fib, 20)
 13 | 
 14 | 	timeit('parse_integers', @parseintperf, 1000)
 15 | 
 16 | 	%% array constructors %%
 17 | 
 18 | 	%o = ones(200,200);
 19 | 	%assert(all(o) == 1)
 20 | 	%timeit('ones', @ones, 200, 200)
 21 | 
 22 | 	%assert(all(matmul(o) == 200))
 23 | 	%timeit('AtA', @matmul, o)
 24 | 
 25 | 	mandel(complex(-.53,.68));
 26 | 	assert(sum(sum(mandelperf(true))) == 14791)
 27 | 	timeit('userfunc_mandelbrot', @mandelperf, true)
 28 | 
 29 | 	assert(issorted(sortperf(5000)))
 30 | 	timeit('recursion_quicksort', @sortperf, 5000)
 31 | 
 32 | 	s = pisum(true);
 33 | 	assert(abs(s-1.644834071848065) < 1e-12);
 34 | 	timeit('iteration_pi_sum',@pisum, true)
 35 | 
 36 | 	%s = pisumvec(true);
 37 | 	%assert(abs(s-1.644834071848065) < 1e-12);
 38 | 	%timeit('pi_sum_vec',@pisumvec, true)
 39 | 
 40 | 	[s1, s2] = randmatstat(1000);
 41 | 	assert(round(10*s1) > 5 && round(10*s1) < 10);
 42 | 	timeit('matrix_statistics', @randmatstat, 1000)
 43 | 
 44 | 	timeit('matrix_multiply', @randmatmul, 1000);
 45 | 
 46 | 	printfd(1)
 47 | 	timeit('print_to_file', @printfd, 100000)
 48 | 
 49 | end
 50 | 
 51 | 
 52 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 53 | %%  Functions declarations  %%
 54 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 55 | 
 56 | function assert(bool)
 57 |    if ~bool
 58 |      error('Assertion failed')
 59 |    end
 60 | end
 61 | 
 62 | function timeit(name, func, varargin)
 63 |     lang = 'matlab';
 64 |     if exist('OCTAVE_VERSION') ~= 0
 65 |        lang = 'octave';
 66 |     end
 67 | 
 68 |     nexpt = 5;
 69 |     times = zeros(nexpt, 1);
 70 | 
 71 |     for i=1:nexpt
 72 |         tic(); func(varargin{:}); times(i) = toc();
 73 |     end
 74 | 
 75 |     times = sort(times);
 76 |     fprintf ('%s,%s,%.8f\n', lang, name, times(1)*1000);
 77 | end
 78 | 
 79 | %% recursive fib %%
 80 | 
 81 | function f = fib(n)
 82 |     if n < 2
 83 |         f = n;
 84 |         return
 85 |     else
 86 |         f = fib(n-1) + fib(n-2);
 87 |     end
 88 | end
 89 | 
 90 | %% parse int %%
 91 | 
 92 | function n = parseintperf(t)
 93 |     for i = 1:t
 94 |         n = fix(rand*(2^32));
 95 |         s = sprintf('%08x',n);
 96 |         m = sscanf(s,'%x');
 97 |         assert(m == n);
 98 |     end
 99 | end
100 | 
101 | %% matmul and transpose %%
102 | 
103 | %function oo = matmul(o)
104 | %    oo = o * o.';
105 | %end
106 | 
107 | %% mandelbrot set: complex arithmetic and comprehensions %%
108 | 
109 | function r = abs2(z)
110 |     r = real(z)*real(z) + imag(z)*imag(z);
111 | end
112 | 
113 | function n = mandel(z)
114 |     n = 0;
115 |     c = z;
116 |     for n=0:79
117 |         if abs2(z)>4
118 |             return
119 |         end
120 |         z = z^2+c;
121 |     end
122 |     n = 80;
123 | end
124 | 
125 | function M = mandelperf(ignore)
126 |     x=-2.0:.1:0.5;
127 |     y=-1:.1:1;
128 |     M=zeros(length(y),length(x));
129 |     for r=1:size(M,1)
130 |         for c=1:size(M,2)
131 |            M(r,c) = mandel(x(c)+y(r)*i);
132 |         end
133 |     end
134 | end
135 | 
136 | %% numeric vector quicksort %%
137 | 
138 | function b = qsort(a)
139 |     b = qsort_kernel(a, 1, length(a));
140 | end
141 | 
142 | function a = qsort_kernel(a, lo, hi)
143 |     i = lo;
144 |     j = hi;
145 |     while i < hi
146 |         pivot = a(floor((lo+hi)/2));
147 |     	while i <= j
148 |               while a(i) < pivot, i = i + 1; end
149 |               while a(j) > pivot, j = j - 1; end
150 |               if i <= j
151 | 	      	 t = a(i);
152 | 	    	 a(i) = a(j);
153 | 	    	 a(j) = t;
154 |             	 i = i + 1;
155 |             	 j = j - 1;
156 |        	      end
157 |     	end
158 |         if lo < j; a=qsort_kernel(a, lo, j); end
159 |         lo = i;
160 | 	    j = hi;
161 |     end
162 | end
163 | 
164 | function v = sortperf(n)
165 |     v = rand(n,1);
166 |     v = qsort(v);
167 | end
168 | 
169 | %% slow pi series %%
170 | 
171 | function sum = pisum(ignore)
172 |     sum = 0.0;
173 |     for j=1:500
174 |         sum = 0.0;
175 |         for k=1:10000
176 |             sum = sum + 1.0/(k*k);
177 |         end
178 |     end
179 | end
180 | 
181 | %% slow pi series, vectorized %%
182 | 
183 | function s = pisumvec(ignore)
184 |     a = [1:10000];
185 |     for j=1:500
186 |         s = sum( 1./(a.^2));
187 |     end
188 | end
189 | 
190 | %% random matrix statistics %%
191 | 
192 | function [s1, s2] = randmatstat(t)
193 |     n=5;
194 |     v = zeros(t,1);
195 |     w = zeros(t,1);
196 |     for i=1:t
197 |         a = randn(n, n);
198 |         b = randn(n, n);
199 |         c = randn(n, n);
200 |         d = randn(n, n);
201 |         P = [a b c d];
202 |         Q = [a b;c d];
203 |         v(i) = trace((P.'*P)^4);
204 |         w(i) = trace((Q.'*Q)^4);
205 |     end
206 |     s1 = std(v)/mean(v);
207 |     s2 = std(w)/mean(w);
208 | end
209 | 
210 | function t = mytranspose(x)
211 |     [m, n] = size(x);
212 |     t = zeros(n, m);
213 |     for i=1:n
214 |         for j=1:m
215 |             t(i,j) = x(j,i);
216 |         end
217 |     end
218 | end
219 | 
220 | %% largish random number gen & matmul %%
221 | 
222 | function X = randmatmul(n)
223 |     X = rand(n,n)*rand(n,n);
224 | end
225 | 
226 | %% printf %%
227 | 
228 | function printfd(n)
229 |     f = fopen('/dev/null','w');
230 |     for i = 1:n
231 |         fprintf(f, '%d %d\n', i, i + 1);
232 |     end
233 |     fclose(f);
234 | end
235 | 


--------------------------------------------------------------------------------
/perf.nb:
--------------------------------------------------------------------------------
  1 | (* Benchmark script *)
  2 | 
  3 | (* Set up output stream *)
  4 | SetOptions[$Output, FormatType -> OutputForm];
  5 | 
  6 | (* Test if system has a C compiler and if so set target to "C"*)
  7 | Needs["CCompilerDriver`"];
  8 | If[ Length[CCompilers[]] > 0,
  9 | 	$CompilationTarget = "C"
 10 | ];
 11 | 
 12 | 
 13 | ClearAll[$printOutput];
 14 | $printOutput = True;
 15 | 
 16 | ClearAll[timeit];
 17 | SetAttributes[timeit, HoldFirst];
 18 | timeit[ex_, name_String] := Module[
 19 | 	{t},
 20 | 	t = Infinity;
 21 | 	Do[
 22 | 		t = Min[t, N[First[AbsoluteTiming[ex]]]];
 23 | 		,
 24 | 		{i, 1, 5}
 25 | 	];
 26 | 	If[$printOutput,
 27 | 		(*Print[OutputForm["mathematica," <> name <> ","], t*1000];*)
 28 | 		Print["mathematica,", name, ",", t*1000];
 29 | 	];
 30 | ];
 31 | 
 32 | ClearAll[test];
 33 | SetAttributes[test, HoldFirst];
 34 | test[ex_] := Assert[ex];
 35 | On[Assert];
 36 | 
 37 | 
 38 | (* recursive fib *)
 39 | 
 40 | ClearAll[fib];
 41 | fib = Compile[{{n, _Integer}},
 42 | 	If[n < 2, n, fib[n - 1] + fib[n - 2]],
 43 | 	CompilationTarget -> "WVM"
 44 | ];
 45 | 
 46 | test[fib[20] == 6765];
 47 | timeit[fib[20], "recursion_fibonacci"];
 48 | 
 49 | (* parse integer *)
 50 | 
 51 | ClearAll[parseintperf];
 52 | parseintperf[t_] := Module[
 53 | 	{n, m, i, s},
 54 | 	Do[
 55 | 		n = RandomInteger[{0, 4294967295}];
 56 | 		s = IntegerString[n, 16];
 57 | 		m = FromDigits[s, 16];
 58 | 		test[ m == n];
 59 | 		,
 60 | 		{i, 1, t}
 61 | 	];
 62 | 	n
 63 | ];
 64 | 
 65 | timeit[parseintperf[1000], "parse_integers"];
 66 | 
 67 | (* print to file *)
 68 | 
 69 | ClearAll[printfdperf];
 70 | printfdperf[t_] := Module[
 71 | 	{i,fd,filename},
 72 | 	filename = "/dev/null";
 73 | 	fd = OpenWrite[filename];
 74 | 	For[i=1, i<=t, ++i,
 75 | 		 WriteString[fd, StringForm["`1` `2`\n", i, i + 1]];
 76 | 	];
 77 | 	Close[fd];
 78 | ];
 79 | 
 80 | timeit[printfdperf[100000], "print_to_file"];
 81 | 
 82 | (* array constructors *)
 83 | 
 84 | test[ And @@ And @@@ Thread /@ Thread[ConstantArray[1, {200, 200}] == 1]];
 85 | 
 86 | (* matmul and transpose *)
 87 | 
 88 | ClearAll[A];
 89 | A = ConstantArray[1, {200, 200}];
 90 | test[And @@ And @@@ Thread /@ Thread[A.ConjugateTranspose[A] == 200]];
 91 | 
 92 | (* mandelbrot set: complex arithmetic and comprehensions *)
 93 | 
 94 | ClearAll[abs2];
 95 | (*abs2[z_] := Module[
 96 | 	Re(z)*Re(z) + Im(z)*Im(z);
 97 | ];*)
 98 | 
 99 | ClearAll[mandel];
100 | (*mandel[zin_] := Module[
101 | 	{z, c, maxiter, n},
102 | 	z = zin;
103 | 	c = z;
104 | 	maxiter = 80;
105 | 	Do[
106 | 		If[ Abs2[z] > 4,
107 | 			maxiter = n-1;
108 | 			Break[]
109 | 		];
110 | 		z = z^2 + c;
111 | 		,
112 | 		{n, 1, maxiter}
113 | 	];
114 | 	maxiter
115 | ];*)
116 | mandel = Compile[{{zin, _Complex}},
117 | 	Module[
118 | 		{z = zin, c = zin, maxiter = 80, n = 0},
119 | 		Do[
120 | 			If[ Abs[z] > 2,
121 | 				maxiter = n-1;
122 | 				Break[]
123 | 			];
124 | 			z = z^2 + c;
125 | 			,
126 | 			{n, 1, maxiter}
127 | 		];
128 | 		maxiter
129 | 	]
130 | ];
131 | 
132 | ClearAll[mandelperf];
133 | mandelperf[] := Table[mandel[r + i*I], {i, -1., 1., 0.1}, {r, -2.0, 0.5, 0.1}];
134 | 
135 | test[ Total[mandelperf[], 2] == 14791];
136 | timeit[mandelperf[], "userfunc_mandelbrot"];
137 | 
138 | (* numeric vector sort *)
139 | 
140 | ClearAll[qsort];
141 | (* qsort[ain_, loin_, hiin_] := Module[
142 | 	{a = ain, i = loin, j = hiin, lo = loin, hi = hiin, pivot},
143 | 	While[ i < hi,
144 | 		pivot = a[[BitShiftRight[lo + hi] ]];
145 | 		While[ i <= j,
146 | 			While[a[[i]] < pivot, i++];
147 | 			While[a[[j]] > pivot, j--];
148 | 			If[ i <= j,
149 | 				a[[{i,j}]] = a[[{j, i}]];
150 | 				i++; j--;
151 | 			];
152 | 		];
153 | 		If[ lo < j, a = qsort[a, lo, j] ];
154 | 		{lo, j} = {i, hi};
155 | 	];
156 | 	a
157 | ]; *)
158 | qsort = Compile[
159 | 	{{ain, _Real, 1}, {loin, _Integer}, {hiin, _Integer}},
160 | 	Module[
161 | 		{a = ain, i = loin, j = hiin, lo = loin, hi = hiin, pivot},
162 | 		While[ i < hi,
163 | 			pivot = a[[ Floor[(lo + hi)/2] ]];
164 | 			While[ i <= j,
165 | 				While[a[[i]] < pivot, i++];
166 | 				While[a[[j]] > pivot, j--];
167 | 				If[ i <= j,
168 | 					a[[{i,j}]] = a[[{j, i}]];
169 | 					i++; j--;
170 | 				];
171 | 			];
172 | 			If[ lo < j, a[[lo;;j]] = qsort[ a[[lo;;j]], 1, j - lo + 1] ];
173 | 			{lo, j} = {i, hi};
174 | 		];
175 | 		a
176 | 	]
177 | ];
178 | 
179 | 
180 | ClearAll[sortperf];
181 | sortperf[n_] := Module[{vec = RandomReal[1, n]}, qsort[vec, 1, n]];
182 | 
183 | test[OrderedQ[sortperf[5000]] ];
184 | timeit[sortperf[5000], "recursion_quicksort"];
185 | 
186 | (* slow pi series  *)
187 | 
188 | ClearAll[pisum];
189 | pisum = Compile[ {},
190 | 	Module[
191 | 		{sum = 0.`},
192 | 		Do[sum = Sum[1/(k*k), {k, 1, 10000}],
193 | 			{500}];
194 | 		sum
195 | 	]
196 | ];
197 | 
198 | 
199 | test[Abs[pisum[] - 1.644834071848065`] < 1.`*^-12 ];
200 | timeit[pisum[], "iteration_pi_sum"];
201 | 
202 | (* slow pi series, vectorized *)
203 | 
204 | pisumvec = Compile[{},
205 | 	Module[
206 | 		{sum = 0.},
207 | 		Do[
208 | 			sum = Total[1/Range[1, 10000]^2];,
209 | 			{500}
210 | 		];
211 | 		sum
212 | 	]
213 | ];
214 | 
215 | (* test[Abs[pisumvec[] - 1.644834071848065`] < 1.`*^-12 ];*)
216 | (* timeit[pisumvec[], "pi_sum_vec"];*)
217 | 
218 | (* random matrix statistics *)
219 | 
220 | ClearAll[randmatstat];
221 | (*randmatstat[t_] := Module[
222 | 	{n, v, w, a, b, c, d, P, Q},
223 | 	n = 5;
224 | 	v = w = ConstantArray[0., {t}];
225 | 	Do[
226 | 		a = RandomReal[NormalDistribution[], {n, n}];
227 | 		b = RandomReal[NormalDistribution[], {n, n}];
228 | 		c = RandomReal[NormalDistribution[], {n, n}];
229 | 		d = RandomReal[NormalDistribution[], {n, n}];
230 | 		P = Join[a, b, c, d, 2];
231 | 		Q = ArrayFlatten[{{a, b}, {c, d}}];
232 | 		v[[i]] = Tr[MatrixPower[Transpose[P].P, 4]];
233 | 		w[[i]] = Tr[MatrixPower[Transpose[Q].Q, 4]];
234 | 		,
235 | 		{i, 1, t}
236 | 	];
237 | 	{StandardDeviation[v]/Mean[v], StandardDeviation[w]/Mean[w]}
238 | ];*)
239 | randmatstat = Compile[{{t, _Integer}},
240 | 	Module[
241 | 		{
242 | 			n = 5,
243 | 			v = ConstantArray[0., t],
244 | 			w = ConstantArray[0., t],
245 | 			a = {{0.}}, b = {{0.}},
246 | 			c = {{0.}}, d = {{0.}},
247 | 			P = {{0.}}, Q = {{0.}}
248 | 		},
249 | 		Do[
250 | 			a = RandomReal[NormalDistribution[], {n, n}];
251 | 			b = RandomReal[NormalDistribution[], {n, n}];
252 | 			c = RandomReal[NormalDistribution[], {n, n}];
253 | 			d = RandomReal[NormalDistribution[], {n, n}];
254 | 			P = Join[a, b, c, d, 2];
255 | 			Q = ArrayFlatten[{{a, b}, {c, d}}];
256 | 			v[[i]] = Tr[MatrixPower[Transpose[P].P, 4]];
257 | 			w[[i]] = Tr[MatrixPower[Transpose[Q].Q, 4]];
258 | 			,
259 | 			{i, 1, t}
260 | 		];
261 | 		{StandardDeviation[v]/Mean[v], StandardDeviation[w]/Mean[w]}
262 | 	],
263 | 	{{_ArrayFlatten, _Real, 2}}
264 | ];
265 | 
266 | 
267 | ClearAll[s1,s2];
268 | {s1, s2} = randmatstat[1000];
269 | test[0.5 < s1 < 1.0 && 0.5 < s2 < 1.0];
270 | 
271 | timeit[randmatstat[1000], "matrix_statistics"];
272 | 
273 | (* largish random number gen & matmul *)
274 | 
275 | timeit[RandomReal[1, {1000, 1000}].RandomReal[1, {1000, 1000}], "matrix_multiply"];
276 | 
277 | (* printfd *)
278 | 
279 | (* only on unix systems *)
280 | If[ $OperatingSystem == "Linux"||$OperatingSystem == "MacOSX",
281 | 
282 | 	ClearAll[printfd];
283 | 	printfd[n_] := Module[
284 | 		{stream},
285 | 		stream = OpenWrite["/dev/null"];
286 | 		Do[
287 | 			WriteString[stream, i, " ", i+1, "\n" ];
288 | 			,
289 | 			{i, 1, n}
290 | 		];
291 | 		Close[stream];
292 | 	];
293 | 
294 | 	timeit[printfd[100000], "print_to_file"];
295 | 
296 | ];
297 | 


--------------------------------------------------------------------------------
/perf.py:
--------------------------------------------------------------------------------
  1 | from numpy import *
  2 | from numpy.random import rand, randn
  3 | from numpy.linalg import matrix_power
  4 | import sys
  5 | import time
  6 | import random
  7 | 
  8 | if sys.version_info < (3,):
  9 |     range = xrange
 10 | 
 11 | ## fibonacci ##
 12 | 
 13 | def fib(n):
 14 |     if n<2:
 15 |         return n
 16 |     return fib(n-1)+fib(n-2)
 17 | 
 18 | ## quicksort ##
 19 | 
 20 | def qsort_kernel(a, lo, hi):
 21 |     i = lo
 22 |     j = hi
 23 |     while i < hi:
 24 |         pivot = a[(lo+hi) // 2]
 25 |         while i <= j:
 26 |             while a[i] < pivot:
 27 |                 i += 1
 28 |             while a[j] > pivot:
 29 |                 j -= 1
 30 |             if i <= j:
 31 |                 a[i], a[j] = a[j], a[i]
 32 |                 i += 1
 33 |                 j -= 1
 34 |         if lo < j:
 35 |             qsort_kernel(a, lo, j)
 36 |         lo = i
 37 |         j = hi
 38 |     return a
 39 | 
 40 | ## randmatstat ##
 41 | 
 42 | def randmatstat(t):
 43 |     n = 5
 44 |     v = zeros(t)
 45 |     w = zeros(t)
 46 |     for i in range(t):
 47 |         a = randn(n, n)
 48 |         b = randn(n, n)
 49 |         c = randn(n, n)
 50 |         d = randn(n, n)
 51 |         P = concatenate((a, b, c, d), axis=1)
 52 |         Q = concatenate((concatenate((a, b), axis=1), concatenate((c, d), axis=1)), axis=0)
 53 |         v[i] = trace(matrix_power(dot(P.T,P), 4))
 54 |         w[i] = trace(matrix_power(dot(Q.T,Q), 4))
 55 |     return (std(v)/mean(v), std(w)/mean(w))
 56 | 
 57 | ## randmatmul ##
 58 | 
 59 | def randmatmul(n):
 60 |     A = rand(n,n)
 61 |     B = rand(n,n)
 62 |     return dot(A,B)
 63 | 
 64 | ## mandelbrot ##
 65 | 
 66 | def abs2(z):
 67 |     return z.real*z.real +  z.imag*z.imag
 68 | 
 69 | def mandel(z):
 70 |     maxiter = 80
 71 |     c = z
 72 |     for n in range(maxiter):
 73 |         if abs2(z) > 4:
 74 |             return n
 75 |         z = z*z + c
 76 |     return maxiter
 77 | 
 78 | def mandelperf():
 79 |     r1 = [-2. + 0.1*i for i in range(26)]
 80 |     r2 = [-1. + 0.1*i for i in range(21)]
 81 |     return [mandel(complex(r, i)) for r in r1 for i in r2]
 82 | 
 83 | def pisum():
 84 |     sum = 0.0
 85 |     for j in range(1, 501):
 86 |         sum = 0.0
 87 |         for k in range(1, 10001):
 88 |             sum += 1.0/(k*k)
 89 |     return sum
 90 | 
 91 | #### Is this single threaded?
 92 | # def pisumvec():
 93 | #     return numpy.sum(1./(numpy.arange(1,10000)**2))
 94 | 
 95 | def parse_int(t):
 96 |     for i in range(1,t):
 97 |         n = random.randint(0,2**32-1)
 98 |         s = hex(n)
 99 |         #s = string(n, base = 16)
100 |         if s[-1]=='L':
101 |             s = s[0:-1]
102 |         m = int(s,16)
103 |         assert m == n
104 |     return n
105 | 
106 | def printfd(t):
107 |     f = open("/dev/null", "w")
108 |     for i in range(1,t):
109 |         f.write("{:d} {:d}\n".format(i, i+1))
110 |     f.close()
111 | 
112 | 
113 | def print_perf(name, time):
114 |     print("python," + name + "," + str(time*1000))
115 | 
116 | ## run tests ##
117 | 
118 | if __name__=="__main__":
119 | 
120 |     mintrials = 5
121 | 
122 |     assert fib(20) == 6765
123 |     tmin = float('inf')
124 |     for i in range(mintrials):
125 |         t = time.time()
126 |         f = fib(20)
127 |         t = time.time()-t
128 |         if t < tmin: tmin = t
129 |     print_perf("recursion_fibonacci", tmin)
130 | 
131 |     tmin = float('inf')
132 |     for i in range(mintrials):
133 |         t = time.time()
134 |         n = parse_int(1000)
135 |         t = time.time()-t
136 |         if t < tmin: tmin = t
137 |     print_perf ("parse_integers", tmin)
138 | 
139 |     assert sum(mandelperf()) == 14791
140 |     tmin = float('inf')
141 |     for i in range(mintrials):
142 |         t = time.time()
143 |         mandelperf()
144 |         t = time.time()-t
145 |         if t < tmin: tmin = t
146 |     print_perf ("userfunc_mandelbrot", tmin)
147 | 
148 |     tmin = float('inf')
149 |     for i in range(mintrials):
150 |         lst = [ random.random() for i in range(1,5000) ]
151 |         t = time.time()
152 |         qsort_kernel(lst, 0, len(lst)-1)
153 |         t = time.time()-t
154 |         if t < tmin: tmin = t
155 |     print_perf ("recursion_quicksort", tmin)
156 | 
157 |     assert abs(pisum()-1.644834071848065) < 1e-6
158 |     tmin = float('inf')
159 |     for i in range(mintrials):
160 |         t = time.time()
161 |         pisum()
162 |         t = time.time()-t
163 |         if t < tmin: tmin = t
164 |     print_perf ("iteration_pi_sum", tmin)
165 | 
166 |     # assert abs(pisumvec()-1.644834071848065) < 1e-6
167 |     # tmin = float('inf')
168 |     # for i in range(mintrials):
169 |     #     t = time.time()
170 |     #     pisumvec()
171 |     #     t = time.time()-t
172 |     #     if t < tmin: tmin = t
173 |     # print_perf ("pi_sum_vec", tmin)
174 | 
175 |     (s1, s2) = randmatstat(1000)
176 |     assert s1 > 0.5 and s1 < 1.0
177 |     tmin = float('inf')
178 |     for i in range(mintrials):
179 |         t = time.time()
180 |         randmatstat(1000)
181 |         t = time.time()-t
182 |         if t < tmin: tmin = t
183 |     print_perf ("matrix_statistics", tmin)
184 | 
185 |     tmin = float('inf')
186 |     for i in range(mintrials):
187 |         t = time.time()
188 |         C = randmatmul(1000)
189 |         assert C[0,0] >= 0
190 |         t = time.time()-t
191 |         if t < tmin: tmin = t
192 |     print_perf ("matrix_multiply", tmin)
193 | 
194 |     tmin = float('inf')
195 |     for i in range(mintrials):
196 |         t = time.time()
197 |         printfd(100000)
198 |         t = time.time()-t
199 |         if t < tmin: tmin = t
200 |     print_perf ("print_to_file", tmin)
201 | 


--------------------------------------------------------------------------------
/perfutil.jl:
--------------------------------------------------------------------------------
  1 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license
  2 | 
  3 | import Printf
  4 | import Random
  5 | import Statistics
  6 | import Base.Sys
  7 | 
  8 | const mintrials = 5
  9 | const mintime = 2000.0
 10 | print_output = isempty(ARGS)
 11 | codespeed = length(ARGS) > 0 && ARGS[1] == "codespeed"
 12 | 
 13 | if codespeed
 14 |     using JSON
 15 |     using HTTPClient.HTTPC
 16 | 
 17 |     # Ensure that we've got the environment variables we want:
 18 |     if !haskey(ENV, "JULIA_FLAVOR")
 19 |         error( "You must provide the JULIA_FLAVOR environment variable identifying this julia build!" )
 20 |     end
 21 | 
 22 |     # Setup codespeed data dict for submissions to codespeed's JSON endpoint.  These parameters
 23 |     # are constant across all benchmarks, so we'll just let them sit here for now
 24 |     csdata = Dict()
 25 |     csdata["commitid"] = Base.GIT_VERSION_INFO.commit
 26 |     csdata["project"] = "Julia"
 27 |     csdata["branch"] = Base.GIT_VERSION_INFO.branch
 28 |     csdata["executable"] = ENV["JULIA_FLAVOR"]
 29 |     csdata["environment"] = chomp(read(`hostname`, String))
 30 |     csdata["result_date"] = join( split(Base.GIT_VERSION_INFO.date_string)[1:2], " " )    #Cut the timezone out
 31 | end
 32 | 
 33 | # Takes in the raw array of values in vals, along with the benchmark name, description, unit and whether less is better
 34 | function submit_to_codespeed(vals,name,desc,unit,test_group,lessisbetter=true)
 35 |     # Points to the server
 36 |     codespeed_host = "julia-codespeed.csail.mit.edu"
 37 | 
 38 |     csdata["benchmark"] = name
 39 |     csdata["description"] = desc
 40 |     csdata["result_value"] = Statistics.mean(vals)
 41 |     csdata["std_dev"] = Statistics.std(vals)
 42 |     csdata["min"] = minimum(vals)
 43 |     csdata["max"] = maximum(vals)
 44 |     csdata["units"] = unit
 45 |     csdata["units_title"] = test_group
 46 |     csdata["lessisbetter"] = lessisbetter
 47 | 
 48 |     println( "$name: $(Statistics.mean(vals))" )
 49 |     ret = post( "http://$codespeed_host/result/add/json/", Dict("json" => json([csdata])) )
 50 |     println( json([csdata]) )
 51 |     if ret.http_code != 200 && ret.http_code != 202
 52 |         error("Error submitting $name [HTTP code $(ret.http_code)], dumping headers and text: $(ret.headers)\n$(String(ret.body))\n\n")
 53 |         return false
 54 |     end
 55 |     return true
 56 | end
 57 | 
 58 | macro output_timings(t,name,desc,group)
 59 |     t = esc(t)
 60 |     name = esc(name)
 61 |     desc = esc(desc)
 62 |     group = esc(group)
 63 |     quote
 64 |         # If we weren't given anything for the test group, infer off of file path!
 65 |         test_group = length($group) == 0 ? basename(dirname(Base.source_path())) : $group[1]
 66 |         if codespeed
 67 |             submit_to_codespeed( $t, $name, $desc, "seconds", test_group )
 68 |         elseif print_output
 69 |             Printf.@printf "julia,%s,%f,%f,%f,%f\n" $name minimum($t) maximum($t) Statistics.mean($t) Statistics.std($t)
 70 |         end
 71 |         GC.gc()
 72 |     end
 73 | end
 74 | 
 75 | macro timeit(ex,name,desc,group...)
 76 |     quote
 77 |         let
 78 |             t = Float64[]
 79 |             tot = 0.0
 80 |             i = 0
 81 |             while i < mintrials || tot < mintime
 82 |                 e = 1000*(@elapsed $(esc(ex)))
 83 |                 tot += e
 84 |                 if i > 0
 85 |                     # warm up on first iteration
 86 |                     push!(t, e)
 87 |                 end
 88 |                 i += 1
 89 |             end
 90 |             @output_timings t $(esc(name)) $(esc(desc)) $(esc(group))
 91 |         end
 92 |     end
 93 | end
 94 | 
 95 | macro timeit_init(ex,init,name,desc,group...)
 96 |     quote
 97 |         t = zeros(mintrials)
 98 |         for i=0:mintrials
 99 |             $(esc(init))
100 |             e = 1000*(@elapsed $(esc(ex)))
101 |             if i > 0
102 |                 # warm up on first iteration
103 |                 t[i] = e
104 |             end
105 |         end
106 |         @output_timings t $(esc(name)) $(esc(desc)) $(esc(group))
107 |     end
108 | end
109 | 
110 | function maxrss(name)
111 |     # FIXME: call uv_getrusage instead here
112 |     @static if (Sys.islinux())
113 |         rus = Vector{Int64}(uninitialized, div(144,8))
114 |         fill!(rus, 0x0)
115 |         res = ccall(:getrusage, Int32, (Int32, Ptr{Cvoid}), 0, rus)
116 |         if res == 0
117 |             mx = rus[5]/1024
118 |             Printf.@printf "julia,%s.mem,%f,%f,%f,%f\n" name mx mx mx 0
119 |         end
120 |     end
121 | end
122 | 
123 | 
124 | # seed rng for more consistent timings
125 | if VERSION >= v"0.7.0"
126 |     Random.seed!(1776)
127 | else
128 |     srand(1776)
129 | end
130 | 


--------------------------------------------------------------------------------
/randmtzig.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    A C-program for MT19937, with initialization improved 2002/2/10.
  3 |    Coded by Takuji Nishimura and Makoto Matsumoto.
  4 |    This is a faster version by taking Shawn Cokus's optimization,
  5 |    Matthe Bellew's simplification, Isaku Wada's real version.
  6 |    David Bateman added normal and exponential distributions following
  7 |    Marsaglia and Tang's Ziggurat algorithm.
  8 | 
  9 |    Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
 10 |    Copyright (C) 2004, David Bateman
 11 |    All rights reserved.
 12 | 
 13 |    Redistribution and use in source and binary forms, with or without
 14 |    modification, are permitted provided that the following conditions
 15 |    are met:
 16 | 
 17 |      1. Redistributions of source code must retain the above copyright
 18 |         notice, this list of conditions and the following disclaimer.
 19 | 
 20 |      2. Redistributions in binary form must reproduce the above copyright
 21 |         notice, this list of conditions and the following disclaimer in the
 22 |         documentation and/or other materials provided with the distribution.
 23 | 
 24 |      3. The names of its contributors may not be used to endorse or promote
 25 |         products derived from this software without specific prior written
 26 |         permission.
 27 | 
 28 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 29 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 30 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 31 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
 32 |    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 33 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 34 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 35 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 36 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 37 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 38 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 39 | 
 40 | 
 41 |    Any feedback is very welcome.
 42 |    http://www.math.keio.ac.jp/matumoto/emt.html
 43 |    email: matumoto@math.keio.ac.jp
 44 | */
 45 | 
 46 | /*
 47 |   Modified by Viral B. Shah for julia to support dsfmt and only __LP64__
 48 |   systems. 52-bits of randomness are used from the mantissa of random double
 49 |   precision numbers generated by dsfmt.
 50 |  */
 51 | 
 52 | #include <math.h>
 53 | #include <stdio.h>
 54 | #include <stddef.h>
 55 | #include <time.h>
 56 | #ifndef _MSC_VER
 57 | #include <sys/time.h>
 58 | #endif
 59 | 
 60 | #include <stdlib.h>
 61 | #include <string.h>
 62 | 
 63 | #define DSFMT_DO_NOT_USE_OLD_NAMES
 64 | #define DSFMT_MEXP 19937
 65 | #include <dSFMT.h>
 66 | 
 67 | typedef ptrdiff_t randmtzig_idx_type;
 68 | typedef signed char randmtzig_int8_t;
 69 | typedef unsigned char randmtzig_uint8_t;
 70 | typedef short randmtzig_int16_t;
 71 | typedef unsigned short randmtzig_uint16_t;
 72 | typedef int randmtzig_int32_t;
 73 | typedef unsigned int randmtzig_uint32_t;
 74 | typedef long long randmtzig_int64_t;
 75 | typedef unsigned long long randmtzig_uint64_t;
 76 | 
 77 | /* Declarations */
 78 | 
 79 | extern double randmtzig_randn (dsfmt_t *dsfmt);
 80 | extern double randmtzig_gv_randn (void);
 81 | extern double randmtzig_gv_exprnd (void);
 82 | 
 83 | /* ===== Uniform generators ===== */
 84 | 
 85 | inline static randmtzig_uint64_t gv_randi (void)
 86 | {
 87 |     double r = dsfmt_gv_genrand_close1_open2();
 88 |     return *((uint64_t *) &r) & 0x000fffffffffffff;
 89 | }
 90 | 
 91 | /* generates a random number on (0,1) with 53-bit resolution */
 92 | inline static double gv_randu (void)
 93 | {
 94 |     return dsfmt_gv_genrand_open_open();
 95 | }
 96 | 
 97 | inline static randmtzig_uint64_t randi (dsfmt_t *dsfmt)
 98 | {
 99 |     double r = dsfmt_genrand_close1_open2(dsfmt);
100 |     return *((uint64_t *) &r) & 0x000fffffffffffff;
101 | }
102 | 
103 | /* generates a random number on (0,1) with 53-bit resolution */
104 | inline static double randu (dsfmt_t *dsfmt)
105 | {
106 |     return dsfmt_genrand_open_open(dsfmt);
107 | }
108 | 
109 | /* ===== Ziggurat normal and exponential generators ===== */
110 | # define ZIGINT randmtzig_uint64_t
111 | # define EMANTISSA 4503599627370496  /* 52 bit mantissa */
112 | # define ERANDI gv_randi() /* 52 bits for mantissa */
113 | # define NMANTISSA 2251799813685248
114 | # define NRANDI gv_randi() /* 51 bits for mantissa + 1 bit sign */
115 | # define RANDU gv_randu()
116 | 
117 | #define ZIGGURAT_TABLE_SIZE 256
118 | 
119 | #define ZIGGURAT_NOR_R 3.6541528853610088
120 | #define ZIGGURAT_NOR_INV_R 0.27366123732975828
121 | #define NOR_SECTION_AREA 0.00492867323399
122 | 
123 | #define ZIGGURAT_EXP_R 7.69711747013104972
124 | #define ZIGGURAT_EXP_INV_R 0.129918765548341586
125 | #define EXP_SECTION_AREA 0.0039496598225815571993
126 | 
127 | 
128 | /*
129 | This code is based on the paper Marsaglia and Tsang, "The ziggurat method
130 | for generating random variables", Journ. Statistical Software. Code was
131 | presented in this paper for a Ziggurat of 127 levels and using a 32 bit
132 | integer random number generator. This version of the code, uses the
133 | Mersenne Twister as the integer generator and uses 256 levels in the
134 | Ziggurat. This has several advantages.
135 | 
136 |   1) As Marsaglia and Tsang themselves states, the more levels the few
137 |      times the expensive tail algorithm must be called
138 |   2) The cycle time of the generator is determined by the integer
139 |      generator, thus the use of a Mersenne Twister for the core random
140 |      generator makes this cycle extremely long.
141 |   3) The license on the original code was unclear, thus rewriting the code
142 |      from the article means we are free of copyright issues.
143 |   4) Compile flag for full 53-bit random mantissa.
144 | 
145 | It should be stated that the authors made my life easier, by the fact that
146 | the algorithm developed in the text of the article is for a 256 level
147 | ziggurat, even if the code itself isn't...
148 | 
149 | One modification to the algorithm developed in the article, is that it is
150 | assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in
151 | terms like 2^32 in the code. As the normal distribution is defined between
152 | -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus
153 | in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for
154 | this term.  The exponential distribution is one sided so we use the
155 | full 32 bits.  We use EMANTISSA for this term.
156 | 
157 | It appears that I'm slightly slower than the code in the article, this
158 | is partially due to a better generator of random integers than they
159 | use. But might also be that the case of rapid return was optimized by
160 | inlining the relevant code with a #define. As the basic Mersenne
161 | Twister is only 25% faster than this code I suspect that the main
162 | reason is just the use of the Mersenne Twister and not the inlining,
163 | so I'm not going to try and optimize further.
164 | */
165 | 
166 | 
167 | // void randmtzig_create_ziggurat_tables (void)
168 | // {
169 | //   int i;
170 | //   double x, x1;
171 | 
172 | //   /* Ziggurat tables for the normal distribution */
173 | //   x1 = ZIGGURAT_NOR_R;
174 | //   wi[255] = x1 / NMANTISSA;
175 | //   fi[255] = exp (-0.5 * x1 * x1);
176 | 
177 | //   /* Index zero is special for tail strip, where Marsaglia and Tsang
178 | //    * defines this as
179 | //    * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
180 | //    * where v is the area of each strip of the ziggurat.
181 | //    */
182 | //   ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA);
183 | //   wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA;
184 | //   fi[0] = 1.;
185 | 
186 | //   for (i = 254; i > 0; i--)
187 | //     {
188 | //       /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
189 | //        * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
190 | //        */
191 | //       x = sqrt(-2. * log(NOR_SECTION_AREA / x1 + fi[i+1]));
192 | //       ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA);
193 | //       wi[i] = x / NMANTISSA;
194 | //       fi[i] = exp (-0.5 * x * x);
195 | //       x1 = x;
196 | //     }
197 | 
198 | //   ki[1] = 0;
199 | 
200 | //   /* Zigurrat tables for the exponential distribution */
201 | //   x1 = ZIGGURAT_EXP_R;
202 | //   we[255] = x1 / EMANTISSA;
203 | //   fe[255] = exp (-x1);
204 | 
205 | //   /* Index zero is special for tail strip, where Marsaglia and Tsang
206 | //    * defines this as
207 | //    * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
208 | //    * where v is the area of each strip of the ziggurat.
209 | //    */
210 | //   ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA);
211 | //   we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA;
212 | //   fe[0] = 1.;
213 | 
214 | //   for (i = 254; i > 0; i--)
215 | //     {
216 | //       /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
217 | //        * need inverse operator of y = exp(-x) -> x = -ln(y)
218 | //        */
219 | //       x = - log(EXP_SECTION_AREA / x1 + fe[i+1]);
220 | //       ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA);
221 | //       we[i] = x / EMANTISSA;
222 | //       fe[i] = exp (-x);
223 | //       x1 = x;
224 | //     }
225 | //   ke[1] = 0;
226 | // }
227 | 
228 | // Tables for randn
229 | static ZIGINT ki[ZIGGURAT_TABLE_SIZE] =
230 |     {2104047571230236,               0,1693657211688499,1919380038164751,
231 |      2015384402142420,2068365869415708,2101878624030987,2124958784087614,
232 |      2141808670783638,2154644611559370,2164744887580145,2172897953690771,
233 |      2179616279367521,2185247251864556,2190034623104318,2194154434518163,
234 |      2197736978772008,2200880740889623,2203661538008543,2206138681107245,
235 |      2208359231804928,2210361007256700,2212174742387166,2213825672703393,
236 |      2215334711001466,2216719334486539,2217994262138197,2219171977964129,
237 |      2220263139537873,2221276900116549,2222221164932202,2223102796828387,
238 |      2223927782546019,2224701368169460,2225428170203747,2226112267247709,
239 |      2226757276104752,2227366415327922,2227942558554233,2228488279492093,
240 |      2229005890046815,2229497472774805,2229964908626691,2230409900758245,
241 |      2230833995044249,2231238597815812,2231624991249884,2231994346765634,
242 |      2232347736722468,2232686144665663,2233010474325699,2233321557544631,
243 |      2233620161275830,2233906993781039,2234182710130112,2234447917093281,
244 |      2234703177502812,2234949014149981,2235185913274123,2235414327692697,
245 |      2235634679614740,2235847363174420,2236052746716668,2236251174862705,
246 |      2236442970379808,2236628435876608,2236807855342616,2236981495548416,
247 |      2237149607321006,2237312426707072,2237470176035519,2237623064889274,
248 |      2237771290995262,2237915041040474,2238054491421185,2238189808931596,
249 |      2238321151397547,2238448668260322,2238572501115061,2238692784207837,
250 |      2238809644895031,2238923204068302,2239033576548092,2239140871448347,
251 |      2239245192514865,2239346638439450,2239445303151863,2239541276091355,
252 |      2239634642459413,2239725483455210,2239813876495104,2239899895417414,
253 |      2239983610673598,2240065089506859,2240144396119109,2240221591827156,
254 |      2240296735208897,2240369882240222,2240441086423317,2240510398906937,
255 |      2240577868599239,2240643542273660,2240707464668327,2240769678579424,
256 |      2240830224948918,2240889142947021,2240946470049710,2241002242111632,
257 |      2241056493434688,2241109256832545,2241160563691345,2241210444026824,
258 |      2241258926538069,2241306038658085,2241351806601384,2241396255408737,
259 |      2241439408989263,2241481290159988,2241521920683014,2241561321300414,
260 |      2241599511766981,2241636510880914,2241672336512567,2241707005631317,
261 |      2241740534330669,2241772937851645,2241804230604542,2241834426189118,
262 |      2241863537413270,2241891576310240,2241918554154426,2241944481475803,
263 |      2241969368073032,2241993223025259,2242016054702647,2242037870775672,
264 |      2242058678223188,2242078483339294,2242097291739004,2242115108362739,
265 |      2242131937479636,2242147782689690,2242162646924702,2242176532448058,
266 |      2242189440853303,2242201373061504,2242212329317384,2242222309184204,
267 |      2242231311537365,2242239334556685,2242246375717338,2242252431779384,
268 |      2242257498775863,2242261571999386,2242264645987166,2242266714504423,
269 |      2242267770526080,2242267806216682,2242266812908434,2242264781077261,
270 |      2242261700316790,2242257559310117,2242252345799249,2242246046552055,
271 |      2242238647326588,2242230132832599,2242220486690050,2242209691384432,
272 |      2242197728218658,2242184577261284,2242170217290794,2242154625735654,
273 |      2242137778609814,2242119650443302,2242100214207531,2242079441234882,
274 |      2242057301132111,2242033761687055,2242008788768083,2241982346215658,
275 |      2241954395725333,2241924896721420,2241893806220494,2241861078683807,
276 |      2241826665857576,2241790516600019,2241752576693859,2241712788642894,
277 |      2241671091451056,2241627420382213,2241581706698751,2241533877376746,
278 |      2241483854795259,2241431556397014,2241376894317324,2241319774977796,
279 |      2241260098640839,2241197758920517,2241132642244683,2241064627262631,
280 |      2240993584191722,2240919374095516,2240841848084869,2240760846432212,
281 |      2240676197587764,2240587717084761,2240495206318733,2240398451183547,
282 |      2240297220544145,2240191264522592,2240080312570135,2239964071293311,
283 |      2239842221996510,2239714417896679,2239580280957705,2239439398282173,
284 |      2239291317986176,2239135544468183,2238971532964959,2238798683265249,
285 |      2238616332424332,2238423746288075,2238220109591870,2238004514345197,
286 |      2237775946143192,2237533267957802,2237275200846732,2237000300869931,
287 |      2236706931309079,2236393229029127,2236057063479481,2235695986373225,
288 |      2235307169458838,2234887326941556,2234432617919425,2233938522519742,
289 |      2233399683022654,2232809697779175,2232160850599794,2231443750584617,
290 |      2230646845562145,2229755753817960,2228752329126507,2227613325162477,
291 |      2226308442121145,2224797391720369,2223025347823800,2220915633329775,
292 |      2218357446086993,2215184158448627,2211132412537323,2205758503851011,
293 |      2198248265654920,2186916352102052,2167562552481677,2125549880839429};
294 | 
295 | static double wi[ZIGGURAT_TABLE_SIZE] =
296 |     {17367254121656703e-31,9558660348275145e-32,12708704832820278e-32,
297 |      14909740960986864e-32,16658733630346416e-32,18136120809053487e-32,
298 |      1942972015219358e-31,20589500627632916e-32,21646860576118966e-32,
299 |      2262294039150043e-31,23532718913376864e-32,24387234556800803e-32,
300 |      25194879828681465e-32,2596219977196592e-31,26694407473112964e-32,
301 |      2739572968463095e-31,280696460019946e-30,28719058903642897e-32,
302 |      29346417484275224e-32,29953809336344285e-32,30543030006769113e-32,
303 |      3111563633851158e-31,3167298801818414e-31,3221628035016365e-31,
304 |      32746570407564125e-32,33264798116476e-29,337718034169968e-30,
305 |      34268340352771636e-32,34755088731390227e-32,3523266384567022e-31,
306 |      3570162463362898e-31,3616248057128073e-31,36615697529342477e-32,
307 |      3706170277693123e-31,37500889278448874e-32,3793361940125627e-31,
308 |      38360228129389374e-32,3878102586096749e-31,3919630085297984e-31,
309 |      39606321365983254e-32,40011337552278087e-32,4041158312387907e-31,
310 |      4080727683070036e-31,4119862377455137e-31,41585816580575855e-32,
311 |      41969036444492247e-32,4234845407127582e-31,42724230518658345e-32,
312 |      43096517956924877e-32,4346546035489394e-31,4383119410062289e-31,
313 |      4419384856424202e-31,4455354660935343e-31,4491040505860591e-31,
314 |      4526453511835132e-31,45616042766683e-29,4596502910863464e-31,
315 |      4631159070186941e-31,4665581985579899e-31,469978049067346e-30,
316 |      4733763047137822e-31,4767537768070579e-31,4801112439606964e-31,
317 |      4834494540915173e-31,4867691262722585e-31,4900709524503576e-31,
318 |      4933555990446197e-31,4966237084303158e-31,499875900322208e-30,
319 |      5031127730640677e-31,5063349048324261e-31,5095428547615612e-31,
320 |      5127371639960692e-31,5159183566767805e-31,5190869408652579e-31,
321 |      5222434094116442e-31,52538824077020155e-32,5285218997665102e-31,
322 |      5316448383199491e-31,5347574961247755e-31,5378603012928409e-31,
323 |      5409536709607314e-31,5440380118638932e-31,5471137208800966e-31,
324 |      550181185544408e-30,5532407845376661e-31,5562928881503102e-31,
325 |      5593378587232605e-31,5623760510674315e-31,5654078128633358e-31,
326 |      5684334850421336e-31,5714534021493849e-31,5744678926926726e-31,
327 |      5774772794741848e-31,5804818799092685e-31,5834820063319006e-31,
328 |      5864779662879593e-31,589470062817121e-30,5924585947241581e-31,
329 |      5954438568403615e-31,598426140275769e-30,601405732662843e-30,
330 |      6043829183921996e-31,6073579788409578e-31,6103311925942512e-31,
331 |      6133028356604082e-31,6162731816802865e-31,6192425021312213e-31,
332 |      6222110665260248e-31,6251791426074554e-31,6281469965385542e-31,
333 |      6311148930892342e-31,6340830958194888e-31,6370518672595733e-31,
334 |      640021469087503e-30,6429921623041988e-31,645964207406601e-30,
335 |      648937864559066e-30,6519133937633505e-31,6548910550274845e-31,
336 |      6578711085338253e-31,6608538148065851e-31,6638394348791179e-31,
337 |      6668282304612498e-31,6698204641069389e-31,6728163993825439e-31,
338 |      6758163010359885e-31,6788204351671041e-31,681829069399439e-30,
339 |      6848424730538249e-31,6878609173239948e-31,6908846754545526e-31,
340 |      6939140229215998e-31,696949237616333e-30,6999906000319335e-31,
341 |      7030383934540792e-31,7060929041554193e-31,7091544215943653e-31,
342 |      7122232386185626e-31,7152996516734219e-31,7183839610161045e-31,
343 |      7214764709353755e-31,7245774899777502e-31,72768733118038725e-32,
344 |      7308063123111988e-31,7339347561166714e-31,7370729905779203e-31,
345 |      7402213491755235e-31,7433801711637146e-31,7465498018545449e-31,
346 |      7497305929126601e-31,7529229026613742e-31,7561270964007667e-31,
347 |      7593435467385694e-31,7625726339346621e-31,7658147462600412e-31,
348 |      7690702803711903e-31,7723396417008341e-31,7756232448661274e-31,
349 |      778921514095401e-30,7822348836746627e-31,7855637984151357e-31,
350 |      7889087141432085e-31,7922700982142658e-31,7956484300519808e-31,
351 |      7990442017147628e-31,8024579184911813e-31,8058900995263265e-31,
352 |      8093412784812165e-31,812812004227522e-30,8163028415800651e-31,
353 |      8198143720697359e-31,8233471947596931e-31,8269019271079405e-31,
354 |      8304792058796362e-31,834079688112767e-30,8377040521411316e-31,
355 |      8413529986789175e-31,8450272519715296e-31,8487275610177406e-31,
356 |      85245470086869e-29,8562094740097588e-31,8599927118319072e-31,
357 |      86380527619967175e-32,8676480611237092e-31,8715219945465259e-31,
358 |      8754280402508787e-31,8793671999012706e-31,8833405152300122e-31,
359 |      88734907038049e-29,8913939944215902e-31,8954764640486935e-31,
360 |      8995977064883017e-31,9037590026252085e-31,9079616903732087e-31,
361 |      9122071683126914e-31,9164968996211253e-31,9208324163254476e-31,
362 |      9252153239087913e-31,9296473063078686e-31,9341301313417584e-31,
363 |      938665656617903e-30,9432558359669126e-31,9479027264644209e-31,
364 |      95260849610588e-29,957375432209002e-30,962205950628746e-30,
365 |      9671026058815726e-31,972068102289435e-30,9771053062699983e-31,
366 |      9822172599183368e-31,9874071960473548e-31,9926785548800904e-31,
367 |      9980350026176626e-31,10034804521429213e-31,10090190861630543e-31,
368 |      10146553831460223e-31,10203941464676316e-31,1026240537260681e-30,
369 |      10322001115479755e-31,10382788623508751e-31,10444832675993878e-31,
370 |      10508203448348659e-31,1057297713900341e-30,10639236690670377e-31,
371 |      10707072623626628e-31,107765840026618e-29,10847879564397177e-31,
372 |      10921079038143372e-31,109963147017795e-29,11073733224929686e-31,
373 |      11153497865847152e-31,11235791107104895e-31,11320817840158973e-31,
374 |      11408809242576976e-31,1150002753783406e-30,11594771891443527e-31,
375 |      11693385786905373e-31,1179626635295029e-30,11903876299277459e-31,
376 |      1201675939253847e-30,12135560818661637e-31,12261054417445396e-31,
377 |      12394179789158183e-31,12536093926597603e-31,1268824481425016e-30,
378 |      12852479319091384e-31,13031206634685398e-31,13227655770190893e-31,
379 |      13446300925006917e-31,13693606835124475e-31,13979436672771461e-31,
380 |      14319989869657897e-31,14744848603594667e-31,1531787274160907e-30,
381 |      16227698675312968e-31};
382 | 
383 | static double fi[ZIGGURAT_TABLE_SIZE] =
384 |     {1.,.9771017012827331,.9598790918124174,.9451989534530794,
385 |      .9320600759689914,.9199915050483614,.9087264400605639,.898095921906305,
386 |      .8879846607634008,.8783096558161477,.869008688043794,.8600336212030095,
387 |      .8513462584651245,.842915653118442,.8347162929929313,.8267268339520951,
388 |      .8189291916094156,.8113078743182208,.8038494831763903,.7965423304282554,
389 |      .7893761435711993,.7823418326598627,.775431304986139,.7686373158033355,
390 |      .7619533468415471,.7553735065117552,.7488924472237273,.7425052963446368,
391 |      .7362075981312672,.729995264565803,.7238645334728822,.717811932634902,
392 |      .711834248882359,.7059285013367979,.7000919181404905,.694321916130033,
393 |      .6886160830085275,.6829721616487918,.6773880362225135,.6718617199007669,
394 |      .6663913439123812,.6609751477802419,.6556114705832252,.650298743114295,
395 |      .6450354808242524,.6398202774564395,.6346517992909606,.6295287799281287,
396 |      .6244500155502747,.6194143606090396,.6144207238920772,.6094680649288958,
397 |      .6045553907005499,.599681752622168,.5948462437709915,.5900479963357922,
398 |      .5852861792663006,.5805599961036837,.5758686829752109,.5712115067380753,
399 |      .5665877632589521,.5619967758172782,.5574378936214867,.5529104904285204,
400 |      .5484139632579217,.5439477311926505,.5395112342595453,.5351039323830201,
401 |      .5307253044061945,.5263748471741873,.5220520746747954,.5177565172322012,
402 |      .5134877207497434,.5092452459981365,.5050286679458292,.5008375751284826,
403 |      .49667156905479676,.4925302636461491,.4884132847077125,.48432026942891204,
404 |      .48025086591125016,.4762047327216842,.4721815384698837,.46818096140782267,
405 |      .4642026890502793,.460246417814924,.45631185268077407,.4523987068638829,
406 |      .4485067015092144,.4446355653977281,.4407850346677702,.43695485254992955,
407 |      .43314476911457434,.42935454103134185,.42558393133990086,.4218327092313535,
408 |      .41810064983968476,.4143875340427069,.41069314827198344,.40701728433124823,
409 |      .4033597392228692,.399720314981932,.3960988185175474,.39249506146101104,
410 |      .3889088600204649,.38534003484173424,.3817884108750316,.37825381724723833,
411 |      .37473608713949164,.37123505766982157,.3677505697805964,.36428246813054976,
412 |      .36083060099117586,.3573948201472906,.35397498080156936,.35057094148288126,
413 |      .34718256395825153,.3438097131482915,.3404522570459456,.33711006663841303,
414 |      .33378301583210873,.3304709813805373,.32717384281495887,.32389148237773235,
415 |      .3206237849582305,.3173706380312227,.3141319315976305,.310907558127564,
416 |      .307697412505554,.30450139197789644,.30131939610203423,.29815132669790145,
417 |      .2949970878011627,.2918565856182811,.28872972848335393,.28561642681665805,
418 |      .28251659308484933,.27943014176276515,.2763569892967811,.27329705406967564,
419 |      .2702502563669598,.26721651834463167,.2641957639983174,.2611879191337636,
420 |      .25819291133864797,.2552106699556771,.25224112605694377,.2492842124195167,
421 |      .24633986350223877,.24340801542371202,.24048860594144916,.23758157443217368,
422 |      .2346868618732527,.23180441082524855,.22893416541557743,.22607607132326474,
423 |      .22323007576478943,.22039612748101145,.21757417672517823,.2147641752520084,
424 |      .21196607630785277,.20917983462193548,.20640540639867916,.20364274931112133,
425 |      .20089182249543117,.19815258654653795,.1954250035148854,.19270903690432864,
426 |      .19000465167119293,.18731181422451676,.18463049242750437,.18196065560021638,
427 |      .17930227452353026,.17665532144440646,.17401977008249914,.17139559563815535,
428 |      .16878277480185,.1661812857651097,.1635911082329826,.16101222343811727,
429 |      .1584446141565199,.15588826472506426,.15334316106083742,.15080929068240986,
430 |      .1482866427331284,.14577520800653765,.14327497897404687,.14078594981496803,
431 |      .138308116449064,.13584147657175705,.13338602969216254,.13094177717412792,
432 |      .12850872228047336,.12608687022065,.12367622820205106,.12127680548523516,
433 |      .11888861344334545,.11651166562603685,.11414597782825504,.1117915681642454,
434 |      .10944845714720981,.10711666777507266,.10479622562286683,.10248715894230599,
435 |      .10018949876917177,.09790327903921535,.09562853671335306,.09336531191302634,
436 |      .09111364806670041,.08887359206859394,.08664519445086755,.08442850957065445,
437 |      .0822235958134955,.08003051581494733,.07784933670237201,.07568013035919481,
438 |      .07352297371424082,.07137794905914183,.06924514439725017,.06712465382802392,
439 |      .06501657797147035,.06292102443797778,.06083810834975175,.05876795292113793,
440 |      .056710690106399425,.05466646132507786,.05263541827697361,.05061772386112175,
441 |      .04861355321603513,.04662309490208967,.044646552251446515,.042684144916619336,
442 |      .04073611065607874,.0388027074046569,.03688421568869112,.034980941461833046,
443 |      .033093219458688684,.031221417192023686,.02936593975823011,.027527235669693315,
444 |      .02570580400863265,.023902203305873237,.022117062707379908,.020351096230109344,
445 |      .018605121275783343,.016880083152595836,.015177088307982065,.013497450601780796,
446 |      .0118427578579431,.0102149714397311,.008616582769422912,.007050875471392109,
447 |      .005522403299264755,.0040379725933718715,.002609072746106362,.0012602859304985975};
448 | 
449 | // Tables for exprnd
450 | static ZIGINT ke[ZIGGURAT_TABLE_SIZE] =
451 |     {3985772928715748,               0,2742928985168065,3438700186803721,
452 |      3744780257810519,3914896975372863,4022625697542798,4096776410635450,
453 |      4150853606149210,4192001604687417,4224344877584101,4250427292531740,
454 |      4271901371161554,4289886428824118,4305167164135199,4318309783140431,
455 |      4329732973408940,4339752937704679,4348612900760388,4356502988721768,
456 |      4363573953227346,4369946852445020,4375720012348349,4380974119031481,
457 |      4385776001930298,4390181484145305,4394237557465219,4397984061535398,
458 |      4401454994146430,4404679543790856,4407682910787985,4410486965794400,
459 |      4413110782053579,4415571068741702,4417882526198713,4420058138987325,
460 |      4422109419110772,4424046609003130,4425878851844253,4427614335173868,
461 |      4429260412563040,4430823707156475,4432310200160197,4433725306767517,
462 |      4435073941555377,4436360575016074,4437589282595121,4438763787369085,
463 |      4439887497305303,4440963537889317,4441994780778252,4442983869033585,
464 |      4443933239400428,4444845142028910,4445721657973833,4446564714759241,
465 |      4447376100252993,4448157475061632,4448910383626429,4449636264176642,
466 |      4450336457674983,4451012215872352,4451664708573597,4452295030203006,
467 |      4452904205747010,4453493196141906,4454062903166143,4454614173889474,
468 |      4455147804725090,4455664545125435,4456165100957688,4456650137590828,
469 |      4457120282722585,4457576128971459,4458018236256245,4458447133983073,
470 |      4458863323057847,4459267277740095,4459659447352586,4460040257859578,
471 |      4460410113325310,4460769397263133,4461118473884710,4461457689257740,
472 |      4461787372379910,4462107836175980,4462419378424319,4462722282618581,
473 |      4463016818769709,4463303244152965,4463581804004301,4463852732169940,
474 |      4464116251712773,4464372575478779,4464621906626490,4464864439122178,
475 |      4465100358203284,4465329840812355,4465553056003596,4465770165323939,
476 |      4465981323170417,4466186677125455,4466386368271563,4466580531486827,
477 |      4466769295722448,4466952784263502,4467131114974006,4467304400527265,
478 |      4467472748622447,4467636262188208,4467795039574164,4467949174730939,
479 |      4468098757379442,4468243873170018,4468384603832024,4468521027314373,
480 |      4468653217917530,4468781246417428,4468905180181701,4469025083278642,
481 |      4469141016579234,4469253037852582,4469361201855066,4469465560413474,
482 |      4469566162502383,4469663054316032,4469756279334881,4469845878387080,
483 |      4469931889704995,4470014348976986,4470093289394551,4470168741694984,
484 |      4470240734199652,4470309292847996,4470374441227332,4470436200598525,
485 |      4470494589917605,4470549625853344,4470601322800852,4470649692891185,
486 |      4470694745996980,4470736489734116,4470774929459349,4470810068263924,
487 |      4470841906963074,4470870444081369,4470895675833821,4470917596102651,
488 |      4470936196409614,4470951465883737,4470963391224346,4470971956659198,
489 |      4470977143897542,4470978932077904,4470977297710362,4470972214613072,
490 |      4470963653842747,4470951583618802,4470935969240827,4470916772999009,
491 |      4470893954077117,4470867468447603,4470837268758338,4470803304210460,
492 |      4470765520426769,4470723859310029,4470678258890503,4470628653161980,
493 |      4470574971905457,4470517140499614,4470455079717082,4470388705505446,
494 |      4470317928751818,4470242655029689,4470162784326669,4470078210751556,
495 |      4469988822219058,4469894500110287,4469795118907000,4469690545797298,
496 |      4469580640250319,4469465253557163,4469344228335006,4469217397991048,
497 |      4469084586142556,4468945605988875,4468800259630802,4468648337332217,
498 |      4468489616718259,4468323861903709,4468150822544456,4467970232804102,
499 |      4467781810226787,4467585254506222,4467380246139658,4467166444954116,
500 |      4466943488490515,4466710990229518,4466468537640691,4466215690034133,
501 |      4465951976190801,4465676891744455,4465389896284247,4465090410142477,
502 |      4464777810826750,4464451429049612,4464110544301482,4463754379904174,
503 |      4463382097472202,4462992790697122,4462585478355953,4462159096427753,
504 |      4461712489182116,4461244399078944,4460753455289386,4460238160612098,
505 |      4459696876515553,4459127805983956,4458528973779075,4457898203649722,
506 |      4457233091920646,4456530976767892,4455788902331217,4455003576616607,
507 |      4454171321891082,4453288015951104,4452349022232651,4451349106194827,
508 |      4450282334707462,4449141954247903,4447920242480611,4446608326137821,
509 |      4445195955871677,4443671225661690,4442020220072463,4440226566619900,
510 |      4438270861888260,4436129927556552,4433775834104270,4431174602388627,
511 |      4428284451100006,4425053392146958,4421415870372502,4417287970124084,
512 |      4412560416174562,4407088078325945,4400673742272494,4393042098597073,
513 |      4383796248451589,4372341169422858,4357740343059956,4338425130125967,
514 |      4311541827049177,4271262897902398,4203411844498905,4061213381260384};
515 | 
516 | static double we[ZIGGURAT_TABLE_SIZE] =
517 |     {19311480126418366e-31,1417802848791084e-32,23278824993382457e-33,
518 |      30487830247064326e-33,3666569771447489e-32,4217930218928974e-32,
519 |      4722256155686277e-32,51911915446217885e-33,5632347108395505e-32,
520 |      6051008260642765e-32,645101650967275e-31,6835264680370054e-32,
521 |      7205993957468906e-32,7564981553739299e-32,7913664396195108e-32,
522 |      8253223556351894e-32,8584643616885051e-32,8908755486564743e-32,
523 |      9226267962966373e-32,9537791450529272e-32,9843856087455926e-32,
524 |      10144925809006294e-32,10441409405585343e-32,10733669323436384e-32,
525 |      1102202874567019e-31,11306777346479334e-32,11588176009705533e-32,
526 |      11866460730417886e-32,1214184586569436e-31,12414526862326387e-32,
527 |      12684682560606153e-32,12952477151912284e-32,1321806185153881e-31,
528 |      13481576335745447e-32,13743149982367625e-32,14002902946807862e-32,
529 |      14260947099321287e-32,14517386844829297e-32,14772319842763584e-32,
530 |      15025837641447456e-32,15278026239101652e-32,15528966581595696e-32,
531 |      1577873500545958e-31,1602740363335091e-31,16275040728083524e-32,
532 |      16521711010420076e-32,16767475945078279e-32,17012393998770646e-32,
533 |      17256520873568226e-32,17499909718432365e-32,17742611321380505e-32,
534 |      17984674284430714e-32,18226145183195818e-32,18467068712763576e-32,
535 |      18707487821298258e-32,18947443832625902e-32,19186976558915997e-32,
536 |      19426124404443042e-32,19664924461299023e-32,19903412597830144e-32,
537 |      20141623540485899e-32,20379590949693882e-32,2061734749030844e-31,
538 |      2085492489712377e-31,21092354035891528e-32,21329664960238294e-32,
539 |      21566886964838972e-32,2180404863516701e-31,22041177894111562e-32,
540 |      2227830204572395e-31,2251544781633135e-31,22752641393233694e-32,
541 |      22989908461180186e-32,23227274236804366e-32,23464763501180916e-32,
542 |      2370240063065339e-31,23940209626069303e-32,2417821414054771e-31,
543 |      24416437505894123e-32,24654902757768304e-32,2489363265970225e-31,
544 |      2513264972605797e-31,2537197624400795e-31,2561163429461499e-31,
545 |      2585164577308239e-31,26092032408240577e-32,2633281578133145e-31,
546 |      2657401734414762e-31,2681565843657999e-31,2705776030362351e-31,
547 |      27300344111887955e-32,27543430965657624e-32,2778704192254128e-31,
548 |      2803119800875143e-31,28275920234049704e-32,2852122960639331e-31,
549 |      28767147146315804e-32,29013693901073754e-32,29260890958589514e-32,
550 |      29508759461219033e-32,2975732061937252e-31,3000659572501474e-31,
551 |      3025660616507079e-31,3050737343476251e-31,3075891915089994e-31,
552 |      31011265065151543e-32,3126443307731675e-31,31518445248623523e-32,
553 |      31773323815073683e-32,32029091200858335e-32,32285770031865573e-32,
554 |      3254338314930261e-31,3280195362345436e-31,3306150476760074e-31,
555 |      3332206015211484e-31,33583643618764577e-32,33846279295240445e-32,
556 |      34109991609932597e-32,34374805306980633e-32,34640745461620167e-32,
557 |      3490783749585068e-31,3517610719444983e-31,3544558072136013e-31,
558 |      3571628463647465e-31,35988245912849274e-32,3626149195437003e-31,
559 |      36536050613905045e-32,36811950211971757e-32,3708921955595139e-31,
560 |      37367887959883854e-32,3764798526487784e-31,37929541860172334e-32,
561 |      3821258870488753e-31,38497157350504876e-32,3878327996411799e-31,
562 |      39070989352498183e-32,3936031898702075e-31,3965130302950038e-31,
563 |      3994397635898684e-31,40238374599574693e-32,40534534149283966e-32,
564 |      4083249221007178e-31,41132286819038357e-32,4143395688089474e-31,
565 |      417375422017632e-30,42043083524385856e-32,4235062256482152e-31,
566 |      4266020205071558e-31,42971865761233266e-32,43285658568752094e-32,
567 |      4360162648241568e-31,43919816693657415e-32,4424027762380992e-31,
568 |      4456305897392361e-31,4488821177692617e-31,4521578845226347e-31,
569 |      4554584286317242e-31,4587843037674623e-31,4621360792696427e-31,
570 |      4655143408087069e-31,4689196910809916e-31,4723527505395548e-31,
571 |      4758141581628553e-31,4793045722637247e-31,4828246713412587e-31,
572 |      4863751549784512e-31,489956744788614e-30,4935701854138577e-31,
573 |      4972162455791703e-31,5008957192059114e-31,5046094265888434e-31,
574 |      5083582156411624e-31,5121429632123542e-31,5159645764841062e-31,
575 |      5198239944499494e-31,5237221894847848e-31,5276601690109886e-31,
576 |      531638977268369e-30,535659697195905e-30,5397234524338979e-31,
577 |      5438314094559637e-31,547984779841163e-30,5521848226975234e-31,
578 |      5564328472492872e-31,5607302156013967e-31,5650783456960506e-31,
579 |      5694787144776348e-31,5739328612839635e-31,5784423914835991e-31,
580 |      5830089803810586e-31,5876343774140057e-31,5923204106690931e-31,
581 |      5970689917460091e-31,6018821210025236e-31,6067618932170007e-31,
582 |      6117105037089722e-31,616730254963062e-30,6218235638068533e-31,
583 |      6269929691993326e-31,6322411406934211e-31,6375708876439426e-31,
584 |      6429851692413595e-31,6484871054618903e-31,6540799890364481e-31,
585 |      6597672985544566e-31,6655527128343343e-31,6714401267106488e-31,
586 |      677433668409101e-30,6835377187051274e-31,6897569320906848e-31,
587 |      6960962602074885e-31,7025609778445959e-31,7091567118449584e-31,
588 |      7158894733208553e-31,7227656936438121e-31,7297922647529085e-31,
589 |      7369765844191243e-31,7443266072160415e-31,7518509020832513e-31,
590 |      7595587175337749e-31,7674600557578427e-31,7755657571215791e-31,
591 |      7838875968622858e-31,792438396157355e-30,8012321502113083e-31,
592 |      8102841765913146e-31,8196112877806125e-31,8292319928581809e-31,
593 |      8391667344146798e-31,849438168364877e-30,8600714963334941e-31,
594 |      8710948629387904e-31,882539833807214e-30,8944419748519865e-31,
595 |      9068415597131669e-31,9197844409811865e-31,9333231329422952e-31,
596 |      9475181706524984e-31,9624398345658476e-31,978170365478442e-30,
597 |      994806847238388e-30,1012465014428832e-30,10312843657756166e-31,
598 |      1051435160404455e-30,10731281954224043e-31,10966288068517408e-31,
599 |      1122277490935032e-30,11505212963006663e-31,11819635283304206e-31,
600 |      12174462832361815e-31,12581958069755114e-31,13060984107128082e-31,
601 |      13642786158057857e-31,14384889932178723e-31,15412190700064194e-31,
602 |      17091034077168055e-31};
603 | 
604 | static double fe[ZIGGURAT_TABLE_SIZE] =
605 |     {              1.0,.9381436808621746,.9004699299257464,.8717043323812036,
606 |      .8477855006239896,.8269932966430503,.8084216515230084,.7915276369724956,
607 |      .7759568520401156,.7614633888498963,.7478686219851951,.7350380924314235,
608 |      .722867659593572,.711274760805076,.7001926550827882,.689566496117078,
609 |      .6793505722647654,.6695063167319247,.6600008410789997,.650805833414571,
610 |      .6418967164272661,.6332519942143661,.6248527387036659,.6166821809152077,
611 |      .608725382079622,.6009689663652322,.5934009016917334,.586010318477268,
612 |      .578787358602845,.5717230486648258,.5648091929124002,.5580382822625874,
613 |      .5514034165406413,.5448982376724396,.5385168720028619,.5322538802630432,
614 |      .5261042139836197,.5200631773682336,.5141263938147486,.5082897764106429,
615 |      .5025495018413477,.49690198724154955,.49134386959403253,.4858719873418849,
616 |      .4804833639304542,.4751751930373774,.46994482528396,.4647897562504262,
617 |      .4597076156421377,.45469615747461545,.449753251162755,.4448768734145485,
618 |      .4400651008423539,.4353161032156366,.43062813728845883,.42599954114303434,
619 |      .4214287289976166,.4169141864330029,.4124544659971612,.4080481831520324,
620 |      .4036940125305303,.3993906844752311,.39513698183329016,.3909317369847971,
621 |      .38677382908413765,.38266218149600983,.3785957594095808,.37457356761590216,
622 |      .370594648435146,.36665807978151416,.3627629733548178,.3589084729487498,
623 |      .35509375286678746,.35131801643748334,.347580494621637,.3438804447045024,
624 |      .34021714906678,.33658991402867755,.332998068761809,.3294409642641363,
625 |      .3259179723935562,.3224284849560891,.31897191284495724,.31554768522712895,
626 |      .31215524877417955,.3087940669345602,.30546361924459026,.3021634006756935,
627 |      .2988929210155818,.2956517042812612,.2924392881618926,.28925522348967775,
628 |      .2860990737370768,.28297041453878075,.27986883323697287,.27679392844851736,
629 |      .27374530965280297,.27072259679906,.2677254199320448,.2647534188350622,
630 |      .2618062426893629,.25888354974901623,.2559850070304154,.25311029001562946,
631 |      .2502590823688623,.24743107566532763,.2446259691318921,.24184346939887721,
632 |      .23908329026244918,.23634515245705964,.23362878343743335,.2309339171696274,
633 |      .2282602939307167,.22560766011668407,.22297576805812017,.2203643758433595,
634 |      .21777324714870053,.21520215107537868,.21265086199297828,.21011915938898826,
635 |      .20760682772422204,.2051136562938377,.20263943909370902,.20018397469191127,
636 |      .19774706610509887,.19532852067956322,.19292814997677132,.1905457696631954,
637 |      .1881811994042543,.1858342627621971,.18350478709776746,.1811926034754963,
638 |      .1788975465724783,.17661945459049488,.1743581691713535,.17211353531532006,
639 |      .16988540130252766,.1676736186172502,.165478041874936,.16329852875190182,
640 |      .16113493991759203,.1589871389693142,.15685499236936523,.15473836938446808,
641 |      .15263714202744286,.1505511850010399,.1484803756438668,.14642459387834494,
642 |      .14438372216063478,.1423576454324722,.14034625107486245,.1383494288635802,
643 |      .13636707092642886,.13439907170221363,.13244532790138752,.13050573846833077,
644 |      .12858020454522817,.12666862943751067,.12477091858083096,.12288697950954514,
645 |      .12101672182667483,.11916005717532768,.11731689921155557,.11548716357863353,
646 |      .11367076788274431,.1118676316700563,.11007767640518538,.1083008254510338,
647 |      .10653700405000166,.10478613930657017,.10304816017125772,.10132299742595363,
648 |      .09961058367063713,.0979108533114922,.0962237425504328,.09454918937605586,
649 |      .09288713355604354,.09123751663104016,.08960028191003286,.08797537446727022,
650 |      .08636274114075691,.08476233053236812,.08317409300963238,.08159798070923742,
651 |      .0800339475423199,.07848194920160642,.0769419431704805,.07541388873405841,
652 |      .07389774699236475,.07239348087570874,.07090105516237183,.06942043649872875,
653 |      .0679515934219366,.06649449638533977,.06504911778675375,.06361543199980733,
654 |      .062193415408540995,.06078304644547963,.059384305633420266,.05799717563120066,
655 |      .05662164128374288,.05525768967669704,.05390531019604609,.05256449459307169,
656 |      .05123523705512628,.04991753428270637,.0486113855733795,.04731679291318155,
657 |      .04603376107617517,.04476229773294328,.04350241356888818,.042254122413316234,
658 |      .04101744138041482,.039792391023374125,.03857899550307486,.03737728277295936,
659 |      .03618728478193142,.03500903769739741,.03384258215087433,.032687963508959535,
660 |      .03154523217289361,.030414443910466604,.029295660224637393,.028188948763978636,
661 |      .0270943837809558,.026012046645134217,.024942026419731783,.02388442051155817,
662 |      .02283933540638524,.02180688750428358,.020787204072578117,.019780424338009743,
663 |      .01878670074469603,.01780620041091136,.016839106826039948,.015885621839973163,
664 |      .014945968011691148,.014020391403181938,.013109164931254991,.012212592426255381,
665 |      .011331013597834597,.010464810181029979,.00961441364250221,.008780314985808975,
666 |      .00796307743801704,.007163353183634984,.006381905937319179,.005619642207205483,
667 |      .004877655983542392,.004157295120833795,.003460264777836904,.002788798793574076,
668 |      .0021459677437189063,.0015362997803015724,.0009672692823271745,.00045413435384149677};
669 | 
670 | 
671 | /*
672 |  * Here is the guts of the algorithm. As Marsaglia and Tsang state the
673 |  * algorithm in their paper
674 |  *
675 |  * 1) Calculate a random signed integer j and let i be the index
676 |  *     provided by the rightmost 8-bits of j
677 |  * 2) Set x = j * w_i. If j < k_i return x
678 |  * 3) If i = 0, then return x from the tail
679 |  * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
680 |  * 5) goto step 1
681 |  *
682 |  * Where f is the functional form of the distribution, which for a normal
683 |  * distribution is exp(-0.5*x*x)
684 |  */
685 | 
686 | /* NOTE: This is identical to randmtzig_gv_randn() below except for the random number generation */
687 | double randmtzig_randn (dsfmt_t *dsfmt)
688 | {
689 |   while (1)
690 |     {
691 |         /* arbitrary mantissa (selected by randi, with 1 bit for sign) */
692 |         const randmtzig_uint64_t r = randi(dsfmt);
693 |         const randmtzig_int64_t rabs=r>>1;
694 |         const int idx = (int)(rabs&0xFF);
695 |         const double x = ( r&1 ? -rabs : rabs) * wi[idx];
696 | 
697 |         if (rabs < (randmtzig_int64_t)ki[idx]) {
698 |             return x;        /* 99.3% of the time we return here 1st try */
699 |         } else if (idx == 0) {
700 |             /* As stated in Marsaglia and Tsang
701 |              *
702 |              * For the normal tail, the method of Marsaglia[5] provides:
703 |              * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
704 |              * then return r+x. Except that r+x is always in the positive
705 |              * tail!!!! Any thing random might be used to determine the
706 |              * sign, but as we already have r we might as well use it
707 |              *
708 |              * [PAK] but not the bottom 8 bits, since they are all 0 here!
709 |              */
710 |             double xx, yy;
711 |             do {
712 |                 xx = - ZIGGURAT_NOR_INV_R * log (randu(dsfmt));
713 |                 yy = - log (randu(dsfmt));
714 |             }
715 |             while ( yy+yy <= xx*xx);
716 |             return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
717 |         } else if ((fi[idx-1] - fi[idx]) * randu(dsfmt) + fi[idx] < exp(-0.5*x*x)) {
718 |             return x;
719 |         }
720 | 
721 |     }
722 | }
723 | 
724 | /* NOTE: This is identical to randmtzig_randn() above except for the random number generation */
725 | double randmtzig_gv_randn (void)
726 | {
727 |   while (1)
728 |     {
729 |         /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */
730 |         const randmtzig_uint64_t r = NRANDI;
731 |         const randmtzig_int64_t rabs=r>>1;
732 |         const int idx = (int)(rabs&0xFF);
733 |         const double x = ( r&1 ? -rabs : rabs) * wi[idx];
734 | 
735 |         if (rabs < (randmtzig_int64_t)ki[idx]) {
736 |             return x;        /* 99.3% of the time we return here 1st try */
737 |         } else if (idx == 0) {
738 |             /* As stated in Marsaglia and Tsang
739 |              *
740 |              * For the normal tail, the method of Marsaglia[5] provides:
741 |              * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
742 |              * then return r+x. Except that r+x is always in the positive
743 |              * tail!!!! Any thing random might be used to determine the
744 |              * sign, but as we already have r we might as well use it
745 |              *
746 |              * [PAK] but not the bottom 8 bits, since they are all 0 here!
747 |              */
748 |             double xx, yy;
749 |             do {
750 |                 xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
751 |                 yy = - log (RANDU);
752 |             }
753 |             while ( yy+yy <= xx*xx);
754 |             return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
755 |         } else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) {
756 |             return x;
757 |         }
758 | 
759 |     }
760 | }
761 | 
762 | double randmtzig_gv_exprnd (void)
763 | {
764 |     while (1)
765 |     {
766 |         ZIGINT ri = ERANDI;
767 |         const int idx = (int)(ri & 0xFF);
768 |         const double x = ri * we[idx];
769 |         if (ri < ke[idx])
770 |             return x;        // 98.9% of the time we return here 1st try
771 |         else if (idx == 0)
772 |         {
773 |             /* As stated in Marsaglia and Tsang
774 |              *
775 |              * For the exponential tail, the method of Marsaglia[5] provides:
776 |              * x = r - ln(U);
777 |              */
778 |             return ZIGGURAT_EXP_R - log(RANDU);
779 |     }
780 |     else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp(-x))
781 |         return x;
782 |     }
783 | }
784 | 
785 | #ifdef STANDALONE
786 | 
787 | int main(int ac, char *av[]) {
788 |     if (ac == 1) {
789 |         printf("Usage: randmtzig <n>\n");
790 |         return (-1);
791 |     }
792 | 
793 |     int n = atoi(av[1]);
794 |     time_t t1;
795 | 
796 |     dsfmt_gv_init_gen_rand(0);
797 | 
798 |     double *p; posix_memalign((void **)&p, 16, n*sizeof(double));
799 |     uint32_t *u; posix_memalign((void **)&u, 16, 2*n*sizeof(uint32_t));
800 | 
801 |     t1 = clock();
802 |     dsfmt_gv_fill_array_close_open(p, n);
803 |     printf("Uniform fill (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
804 | 
805 |     t1 = clock();
806 |     for (int i = 0; i < n; i++)  p[i] = dsfmt_gv_genrand_close_open();
807 |     printf("Uniform (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
808 | 
809 |     t1 = clock();
810 |     for (int i = 0; i < 2*n; i++)  u[i] = dsfmt_gv_genrand_uint32();
811 |     printf("Uniform 32-bit ints (2*n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
812 | 
813 |     memset((void *)p, 0, n*sizeof(double));
814 |     t1 = clock();
815 |     for (int i = 0; i < n; i++)  p[i] = randmtzig_gv_randn();
816 |     printf("Normal (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
817 |     for (int i = 0; i < 10; i++)  printf("%lf\n", p[i]);
818 | 
819 |     return 0;
820 | }
821 | 
822 | #endif
823 | 


--------------------------------------------------------------------------------
/rust/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | **/*.rs.bk
3 | 


--------------------------------------------------------------------------------
/rust/Cargo.lock:
--------------------------------------------------------------------------------
  1 | [[package]]
  2 | name = "bitflags"
  3 | version = "1.0.1"
  4 | source = "registry+https://github.com/rust-lang/crates.io-index"
  5 | 
  6 | [[package]]
  7 | name = "blas-src"
  8 | version = "0.1.3"
  9 | source = "registry+https://github.com/rust-lang/crates.io-index"
 10 | dependencies = [
 11 |  "openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "cblas"
 16 | version = "0.1.5"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | dependencies = [
 19 |  "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 20 |  "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
 21 |  "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
 22 | ]
 23 | 
 24 | [[package]]
 25 | name = "cblas-sys"
 26 | version = "0.1.4"
 27 | source = "registry+https://github.com/rust-lang/crates.io-index"
 28 | dependencies = [
 29 |  "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
 30 | ]
 31 | 
 32 | [[package]]
 33 | name = "either"
 34 | version = "1.5.0"
 35 | source = "registry+https://github.com/rust-lang/crates.io-index"
 36 | 
 37 | [[package]]
 38 | name = "fuchsia-zircon"
 39 | version = "0.3.3"
 40 | source = "registry+https://github.com/rust-lang/crates.io-index"
 41 | dependencies = [
 42 |  "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 43 |  "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 44 | ]
 45 | 
 46 | [[package]]
 47 | name = "fuchsia-zircon-sys"
 48 | version = "0.3.3"
 49 | source = "registry+https://github.com/rust-lang/crates.io-index"
 50 | 
 51 | [[package]]
 52 | name = "itertools"
 53 | version = "0.7.8"
 54 | source = "registry+https://github.com/rust-lang/crates.io-index"
 55 | dependencies = [
 56 |  "either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
 57 | ]
 58 | 
 59 | [[package]]
 60 | name = "julia-bench"
 61 | version = "0.1.0"
 62 | dependencies = [
 63 |  "blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 64 |  "cblas 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
 65 |  "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 66 |  "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)",
 67 |  "mersenne_twister 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 68 |  "ndarray 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)",
 69 |  "num 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
 70 |  "openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
 71 |  "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
 72 | ]
 73 | 
 74 | [[package]]
 75 | name = "libc"
 76 | version = "0.2.40"
 77 | source = "registry+https://github.com/rust-lang/crates.io-index"
 78 | 
 79 | [[package]]
 80 | name = "matrixmultiply"
 81 | version = "0.1.14"
 82 | source = "registry+https://github.com/rust-lang/crates.io-index"
 83 | dependencies = [
 84 |  "rawpointer 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 85 | ]
 86 | 
 87 | [[package]]
 88 | name = "mersenne_twister"
 89 | version = "1.1.1"
 90 | source = "registry+https://github.com/rust-lang/crates.io-index"
 91 | dependencies = [
 92 |  "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
 93 | ]
 94 | 
 95 | [[package]]
 96 | name = "ndarray"
 97 | version = "0.11.2"
 98 | source = "registry+https://github.com/rust-lang/crates.io-index"
 99 | dependencies = [
100 |  "blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
101 |  "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
102 |  "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)",
103 |  "matrixmultiply 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
104 |  "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
105 |  "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
106 | ]
107 | 
108 | [[package]]
109 | name = "num"
110 | version = "0.1.42"
111 | source = "registry+https://github.com/rust-lang/crates.io-index"
112 | dependencies = [
113 |  "num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
114 |  "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
115 |  "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
116 |  "num-iter 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
117 |  "num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
118 |  "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
119 | ]
120 | 
121 | [[package]]
122 | name = "num-bigint"
123 | version = "0.1.43"
124 | source = "registry+https://github.com/rust-lang/crates.io-index"
125 | dependencies = [
126 |  "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
127 |  "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
128 |  "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
129 |  "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
130 | ]
131 | 
132 | [[package]]
133 | name = "num-complex"
134 | version = "0.1.43"
135 | source = "registry+https://github.com/rust-lang/crates.io-index"
136 | dependencies = [
137 |  "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
138 |  "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
139 | ]
140 | 
141 | [[package]]
142 | name = "num-integer"
143 | version = "0.1.36"
144 | source = "registry+https://github.com/rust-lang/crates.io-index"
145 | dependencies = [
146 |  "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
147 | ]
148 | 
149 | [[package]]
150 | name = "num-iter"
151 | version = "0.1.35"
152 | source = "registry+https://github.com/rust-lang/crates.io-index"
153 | dependencies = [
154 |  "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
155 |  "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
156 | ]
157 | 
158 | [[package]]
159 | name = "num-rational"
160 | version = "0.1.42"
161 | source = "registry+https://github.com/rust-lang/crates.io-index"
162 | dependencies = [
163 |  "num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
164 |  "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
165 |  "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
166 |  "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
167 | ]
168 | 
169 | [[package]]
170 | name = "num-traits"
171 | version = "0.1.43"
172 | source = "registry+https://github.com/rust-lang/crates.io-index"
173 | dependencies = [
174 |  "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
175 | ]
176 | 
177 | [[package]]
178 | name = "num-traits"
179 | version = "0.2.2"
180 | source = "registry+https://github.com/rust-lang/crates.io-index"
181 | 
182 | [[package]]
183 | name = "openblas-src"
184 | version = "0.5.6"
185 | source = "registry+https://github.com/rust-lang/crates.io-index"
186 | 
187 | [[package]]
188 | name = "rand"
189 | version = "0.4.2"
190 | source = "registry+https://github.com/rust-lang/crates.io-index"
191 | dependencies = [
192 |  "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
193 |  "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
194 |  "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
195 | ]
196 | 
197 | [[package]]
198 | name = "rawpointer"
199 | version = "0.1.0"
200 | source = "registry+https://github.com/rust-lang/crates.io-index"
201 | 
202 | [[package]]
203 | name = "rustc-serialize"
204 | version = "0.3.24"
205 | source = "registry+https://github.com/rust-lang/crates.io-index"
206 | 
207 | [[package]]
208 | name = "winapi"
209 | version = "0.3.4"
210 | source = "registry+https://github.com/rust-lang/crates.io-index"
211 | dependencies = [
212 |  "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
213 |  "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
214 | ]
215 | 
216 | [[package]]
217 | name = "winapi-i686-pc-windows-gnu"
218 | version = "0.4.0"
219 | source = "registry+https://github.com/rust-lang/crates.io-index"
220 | 
221 | [[package]]
222 | name = "winapi-x86_64-pc-windows-gnu"
223 | version = "0.4.0"
224 | source = "registry+https://github.com/rust-lang/crates.io-index"
225 | 
226 | [metadata]
227 | "checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf"
228 | "checksum blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8d3a12d382bd4c40f95c105f1a7074a18bdb0ee140ddb73f6d924a4f7d333bc9"
229 | "checksum cblas 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ce45c2223361cc6077b505f4d203e3b9494d746f39dfbf7627bbcb5aa7f0a13a"
230 | "checksum cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b6feecd82cce51b0204cf063f0041d69f24ce83f680d87514b004248e7b0fa65"
231 | "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0"
232 | "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
233 | "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
234 | "checksum itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f58856976b776fedd95533137617a02fb25719f40e7d9b01c7043cd65474f450"
235 | "checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b"
236 | "checksum matrixmultiply 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "cac1a66eab356036af85ea093101a14223dc6e3f4c02a59b7d572e5b93270bf7"
237 | "checksum mersenne_twister 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b85dbb2f68dfc026aac8f4c5196579896b10ee45e8b9a1a3b325fab3043d1cb0"
238 | "checksum ndarray 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0e3d24c5ba54015d7d5203ca6f00d4cc16c71042bf7f7be26f091236f390a16a"
239 | "checksum num 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "4703ad64153382334aa8db57c637364c322d3372e097840c72000dabdcf6156e"
240 | "checksum num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "81b483ea42927c463e191802e7334556b48e7875297564c0e9951bd3a0ae53e3"
241 | "checksum num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "b288631d7878aaf59442cffd36910ea604ecd7745c36054328595114001c9656"
242 | "checksum num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "f8d26da319fb45674985c78f1d1caf99aa4941f785d384a2ae36d0740bc3e2fe"
243 | "checksum num-iter 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "4b226df12c5a59b63569dd57fafb926d91b385dfce33d8074a412411b689d593"
244 | "checksum num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "ee314c74bd753fc86b4780aa9475da469155f3848473a261d2d18e35245a784e"
245 | "checksum num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
246 | "checksum num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dee092fcdf725aee04dd7da1d21debff559237d49ef1cb3e69bcb8ece44c7364"
247 | "checksum openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "68d293fca3c73ad377ddd2236d32c828a50a611a5b472bf6a884b9b60a4acd97"
248 | "checksum rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eba5f8cb59cc50ed56be8880a5c7b496bfd9bd26394e176bc67884094145c2c5"
249 | "checksum rawpointer 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ebac11a9d2e11f2af219b8b8d833b76b1ea0e054aa0e8d8e9e4cbde353bdf019"
250 | "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
251 | "checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3"
252 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
253 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
254 | 


--------------------------------------------------------------------------------
/rust/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "julia-bench"
 3 | publish = false
 4 | version = "0.1.0"
 5 | 
 6 | [dependencies]
 7 | itertools = "0.7.1"
 8 | mersenne_twister = "1.1.1"
 9 | num = "0.1.37"
10 | rand = "0.4.2"
11 | 
12 | [dependencies.cblas]
13 | version = "0.1.2"
14 | optional = true
15 | 
16 | [dependencies.cblas-sys]
17 | version = "0.1.4"
18 | 
19 | [dependencies.ndarray]
20 | features = ["blas"]
21 | version = "0.11.1"
22 | 
23 | [dependencies.blas-src]
24 | features = ["openblas"]
25 | default-features = false
26 | version = "0.1.2"
27 | 
28 | [dependencies.openblas-src]
29 | features = ["cblas", "system"]
30 | default-features = false
31 | version = "0.5.6"
32 | 
33 | [features]
34 | default = []
35 | direct_blas = ["cblas"]
36 | 


--------------------------------------------------------------------------------
/rust/rust-toolchain:
--------------------------------------------------------------------------------
1 | nightly-2018-04-16
2 | 


--------------------------------------------------------------------------------
/rust/src/direct_blas.rs:
--------------------------------------------------------------------------------
  1 | #![allow(unsafe_code)]
  2 | 
  3 | use rand::Rng;
  4 | use std::iter::Sum;
  5 | use util::{gen_rng, fill_rand, myrand};
  6 | use cblas::{dgemm, Layout, Transpose};
  7 | use itertools::Itertools;
  8 | 
  9 | pub fn randmatstat(t: usize) -> (f64, f64) {
 10 |     let mut rng = gen_rng(1234u64);
 11 | 
 12 |     let n = 5;
 13 | 
 14 |     let mut v = vec![0.; t];
 15 |     let mut w = vec![0.; t];
 16 | 
 17 |     {
 18 |         let mut a = vec![0.; n * n];
 19 |         let mut b = vec![0.; n * n];
 20 |         let mut c = vec![0.; n * n];
 21 |         let mut d = vec![0.; n * n];
 22 |         let mut p = vec![0.; (n) * (4 * n)];
 23 |         let mut q = vec![0.; (2 * n) * (2 * n)];
 24 | 
 25 |         let mut pt_p1 = vec![0.; (4 * n) * (4 * n)];
 26 |         let mut pt_p2 = vec![0.; (4 * n) * (4 * n)];
 27 |         let mut qt_q1 = vec![0.; (2 * n) * (2 * n)];
 28 |         let mut qt_q2 = vec![0.; (2 * n) * (2 * n)];
 29 | 
 30 |         for (ve, we) in v.iter_mut().zip(w.iter_mut()) {
 31 |             fill_rand(&mut a, &mut rng);
 32 |             fill_rand(&mut b, &mut rng);
 33 |             fill_rand(&mut c, &mut rng);
 34 |             fill_rand(&mut d, &mut rng);
 35 | 
 36 |             p[0 .. n * n].copy_from_slice(&a);
 37 |             p[n * n .. 2 * n * n].copy_from_slice(&b);
 38 |             p[2 * n * n .. 3 * n * n].copy_from_slice(&c);
 39 |             p[3 * n * n .. 4 * n * n].copy_from_slice(&d);
 40 | 
 41 |             for j in 0..n {
 42 |                 for k in 0..n {
 43 |                     q[2 * n * j + k] = a[k];
 44 |                     q[2 * n * j + n + k] = b[k];
 45 |                     q[2 * n * (n + j) + k] = c[k];
 46 |                     q[2 * n * (n + j) + n + k] = d[k];
 47 |                 }
 48 |             }
 49 | 
 50 |             unsafe {
 51 |                 let n = n as i32;
 52 | 
 53 |                 dgemm(Layout::ColumnMajor, Transpose::Ordinary, Transpose::None,
 54 |                     n , n, 4 * n, 1., &p, 4 * n, &p, 4 * n, 0.,
 55 |                     &mut pt_p1, 4 * n);
 56 |                 dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
 57 |                     4 * n, 4 * n, 4 * n, 1., &pt_p1, 4 * n, &pt_p1, 4 * n, 0.,
 58 |                     &mut pt_p2, 4 * n);
 59 |                 dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
 60 |                     4 * n, 4 * n, 4 * n, 1., &pt_p2, 4 * n, &pt_p2, 4 * n, 0.,
 61 |                     &mut pt_p1, 4 * n);
 62 |             }
 63 | 
 64 |             *ve = trace(&pt_p1, n * 4);
 65 | 
 66 |             unsafe {
 67 |                 let n = n as i32;
 68 | 
 69 |                 dgemm(Layout::ColumnMajor, Transpose::Ordinary, Transpose::None,
 70 |                     2 * n, 2 * n, 2 * n, 1., &q, 2 * n, &q, 2 * n, 0.,
 71 |                     &mut qt_q1, 2 * n);
 72 |                 dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
 73 |                     2 * n, 2 * n, 2 * n, 1., &qt_q1, 2 * n, &qt_q1, 2 * n, 0.,
 74 |                     &mut qt_q2, 2 * n);
 75 |                 dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
 76 |                     2 * n, 2 * n, 2 * n, 1., &qt_q2, 2 * n, &qt_q2, 2 * n, 0.,
 77 |                     &mut qt_q1, 2 * n);
 78 |             }
 79 | 
 80 |             *we = trace(&qt_q1, 2 * n);
 81 |         }
 82 |     }
 83 | 
 84 |     let (v1, v2, w1, w2) = v.iter()
 85 |         .zip(w.iter())
 86 |         .fold((0., 0., 0., 0.), |(v1, v2, w1, w2), (ve, we)| (
 87 |             v1 + *ve,
 88 |             v2 + ve * ve,
 89 |             w1 + *we,
 90 |             w2 + we * we
 91 |         ));
 92 | 
 93 |     let t = t as f64;
 94 | 
 95 |     (
 96 |         f64::sqrt((t * (t * v2 - v1 * v1)) / ((t - 1.) * v1 * v1)),
 97 |         f64::sqrt((t * (t * w2 - w1 * w1)) / ((t - 1.) * w1 * w1)),
 98 |     )
 99 | }
100 | 
101 | /// Calculate the trace of a square matrix
102 | #[inline]
103 | fn trace<'a, T>(m: &'a [T], n: usize) -> T
104 | where
105 |     T: Sum<&'a T>
106 | {
107 |     debug_assert_eq!(m.len(), n * n);
108 |     m.into_iter().step(n + 1).sum()
109 | }
110 | 
111 | pub fn randmatmul<R: Rng>(n: usize, mut rng: R) -> Vec<f64> {
112 |     let a = myrand(n * n, &mut rng);
113 |     let b = myrand(n * n, &mut rng);
114 |     let mut c = vec![0.; n * n];
115 | 
116 |     unsafe {
117 |         let n = n as i32;
118 |         dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
119 |             n, n, n, 1., &a, n, &b, n, 0., &mut c, n);
120 |     }
121 | 
122 |     c
123 | }
124 | 
125 | #[inline]
126 | pub fn check_randmatmul(m: Vec<f64>) {
127 |     assert!(0. <= m[0]);
128 | }
129 | 


--------------------------------------------------------------------------------
/rust/src/main.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | #![deny(unsafe_code)]
  3 | 
  4 | extern crate itertools;
  5 | extern crate mersenne_twister;
  6 | extern crate num;
  7 | extern crate rand;
  8 | extern crate test;
  9 | 
 10 | // Use BLAS directly
 11 | #[cfg(feature = "direct_blas")]
 12 | extern crate cblas;
 13 | 
 14 | #[cfg(feature = "direct_blas")]
 15 | extern crate blas_src;
 16 | 
 17 | // Use ndarray (with BLAS implementation)
 18 | #[cfg(not(feature = "direct_blas"))]
 19 | #[macro_use(s)]
 20 | extern crate ndarray;
 21 | 
 22 | use std::time::{Duration, Instant};
 23 | use std::u32;
 24 | use std::fs::OpenOptions;
 25 | use std::io::{BufWriter, Write};
 26 | 
 27 | use test::black_box;
 28 | use num::complex::Complex64;
 29 | use rand::Rng;
 30 | 
 31 | mod util;
 32 | use util::{gen_rng, myrand};
 33 | 
 34 | #[cfg(feature = "direct_blas")]
 35 | mod direct_blas;
 36 | #[cfg(feature = "direct_blas")]
 37 | use direct_blas::{randmatstat, randmatmul, check_randmatmul};
 38 | 
 39 | #[cfg(not(feature = "direct_blas"))]
 40 | use ndarray::Array2;
 41 | #[cfg(not(feature = "direct_blas"))]
 42 | use util::fill_rand;
 43 | #[cfg(not(feature = "direct_blas"))]
 44 | use num::Zero;
 45 | 
 46 | const NITER: u32 = 5;
 47 | 
 48 | #[cfg(not(feature = "direct_blas"))]
 49 | fn nrand<R: Rng>(shape: (usize, usize), rng: &mut R) -> Array2<f64> {
 50 |     let mut m = Array2::zeros(shape);
 51 |     fill_rand(&mut m, rng);
 52 |     m
 53 | }
 54 | 
 55 | fn fib(n: i32) -> i32 {
 56 |     let n = black_box(n); // prevent over-optimization
 57 |     if n < 2 {
 58 |         n
 59 |     } else {
 60 |         fib(n - 1) + fib(n - 2)
 61 |     }
 62 | }
 63 | 
 64 | fn mandel(z: Complex64) -> u32 {
 65 |     use std::iter;
 66 | 
 67 |     iter::repeat(z)
 68 |         .scan(z, |z, c| {
 69 |             let current = *z;
 70 |             *z = current * current + c;
 71 |             Some(current)
 72 |         })
 73 |         .take(80)
 74 |         .take_while(|z| z.norm_sqr() <= 4.0)
 75 |         .count() as u32
 76 | }
 77 | 
 78 | fn mandelperf() -> Vec<u32> {
 79 |     (-10..=10).flat_map(|i| (-20..=5).map(move |j| (i, j)))
 80 |         .map(|(i, j)| (j as f64 / 10., i as f64 / 10.))
 81 |         .map(|(re, im)| mandel(Complex64::new(re, im)))
 82 |         .collect()
 83 | }
 84 | 
 85 | fn pisum() -> f64 {
 86 |     let mut sum = 0.;
 87 |     for _ in 0..500 {
 88 |         sum = (1..10001)
 89 |             .map(|k| {
 90 |                 let k = k as f64;
 91 |                 1. / (k * k)
 92 |             })
 93 |             .sum();
 94 |     }
 95 |     sum
 96 | }
 97 | 
 98 | #[cfg(not(feature = "direct_blas"))]
 99 | fn randmatstat(t: usize) -> (f64, f64) {
100 |     let mut rng = gen_rng(1234u64);
101 | 
102 |     let n = 5;
103 | 
104 |     let mut v = vec![0.; t];
105 |     let mut w = vec![0.; t];
106 | 
107 |     for (ve, we) in v.iter_mut().zip(w.iter_mut()) {
108 |         let a = nrand((n, n), &mut rng);
109 |         let b = nrand((n, n), &mut rng);
110 |         let c = nrand((n, n), &mut rng);
111 |         let d = nrand((n, n), &mut rng);
112 |         let p = { // P = [a b c d]
113 |             let mut p = Array2::<f64>::zeros((n, 4 * n));
114 |             let n = n as isize;
115 |             p.slice_mut(s![.., 0..n]).assign(&a);
116 |             p.slice_mut(s![.., n..2*n]).assign(&b);
117 |             p.slice_mut(s![.., 2*n..3*n]).assign(&c);
118 |             p.slice_mut(s![.., 3*n..4*n]).assign(&d);
119 |             p
120 |         };
121 |         let q = { // Q = [a b ; c d]
122 |             let mut q = Array2::<f64>::zeros((2 * n, 2 * n));
123 |             let n = n as isize;
124 |             q.slice_mut(s![0..n, 0..n]).assign(&a);
125 |             q.slice_mut(s![0..n, n..2*n]).assign(&b);
126 |             q.slice_mut(s![n..2*n, 0..n]).assign(&c);
127 |             q.slice_mut(s![n..2*n, n..2*n]).assign(&d);
128 |             q
129 |         };
130 | 
131 |         let pt = p.t();
132 |         let ptp = pt.dot(&p);
133 |         let ptp2 = ptp.dot(&ptp);
134 |         let ptp4 = ptp2.dot(&ptp2);
135 |         *ve = trace_arr(&ptp4);
136 | 
137 |         let qt = q.t();
138 |         let ptq = qt.dot(&q);
139 |         let ptq2 = ptq.dot(&ptq);
140 |         let ptq4 = ptq2.dot(&ptq2);
141 |         *we = trace_arr(&ptq4);
142 |     }
143 | 
144 |     let (v1, v2, w1, w2) = v.iter()
145 |         .zip(w.iter())
146 |         .fold((0., 0., 0., 0.), |(v1, v2, w1, w2), (ve, we)| (
147 |             v1 + *ve,
148 |             v2 + ve * ve,
149 |             w1 + *we,
150 |             w2 + we * we
151 |         ));
152 | 
153 |     let t = t as f64;
154 | 
155 |     (
156 |         f64::sqrt((t * (t * v2 - v1 * v1)) / ((t - 1.) * v1 * v1)),
157 |         f64::sqrt((t * (t * w2 - w1 * w1)) / ((t - 1.) * w1 * w1)),
158 |     )
159 | }
160 | 
161 | /// Calculate the trace of a square matrix
162 | #[cfg(not(feature = "direct_blas"))]
163 | #[inline]
164 | fn trace_arr<'a, T: 'a>(m: &'a Array2<T>) -> T
165 | where
166 |     T: Zero + Clone
167 | {
168 |     m.diag().scalar_sum()
169 | }
170 | 
171 | #[cfg(not(feature = "direct_blas"))]
172 | fn randmatmul<R: Rng>(n: usize, mut rng: R) -> Array2<f64> {
173 |     let a = nrand((n, n), &mut rng);
174 |     let b = nrand((n, n), &mut rng);
175 | 
176 |     a.dot(&b)
177 | }
178 | 
179 | #[cfg(not(feature = "direct_blas"))]
180 | #[inline]
181 | fn check_randmatmul(m: Array2<f64>) {
182 |     assert!(0. <= m[[0, 0]]);
183 | }
184 | 
185 | #[test]
186 | fn test_quicksort() {
187 |     let mut a = [10., 9., 8., 7., 6., 5., 4., 3., 2., 1.];
188 |     quicksort(a.as_mut(), 0);
189 |     assert_eq!(a, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]);
190 | 
191 |     let mut a = [8., 2., 10., 4., 7., 6., 9., 5., 1., 3.];
192 |     quicksort(a.as_mut(), 0);
193 |     assert_eq!(a, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]);
194 | }
195 | 
196 | fn quicksort(a: &mut [f64], mut lo: usize) {
197 |     let hi = a.len() as usize - 1;
198 |     let mut i: usize = lo;
199 |     // j is isize because it can be -1
200 |     let mut j: isize = hi as isize;
201 | 
202 |     while i < hi {
203 |         let pivot = a[(lo + hi) / 2];
204 |         while i as isize <= j {
205 |             while a[i] < pivot {
206 |                 i += 1;
207 |             }
208 |             while a[j as usize] > pivot {
209 |                 j -= 1;
210 |             }
211 |             if i as isize <= j {
212 |                 a.swap(i, j as usize);
213 |                 i += 1;
214 |                 j -= 1;
215 |             }
216 |         }
217 | 
218 |         let (l, _r) = a.split_at_mut((j + 1) as usize);
219 | 
220 |         if (lo as isize) < j {
221 |             quicksort(l, lo);
222 |         }
223 | 
224 |         lo = i;
225 |         j = hi as isize;
226 |     }
227 | }
228 | 
229 | fn printfd(n: usize) {
230 |     let f = OpenOptions::new()
231 |         .write(true).open("/dev/null").unwrap();
232 |     let mut f = BufWriter::new(f);
233 |     for i in 0..n {
234 |         writeln!(f, "{} {}", i, i).unwrap();
235 |     }
236 | }
237 | 
238 | fn print_perf(name: &str, t: f64) {
239 |     println!("rust,{},{:.6}", name, t * 1000.);
240 | }
241 | 
242 | /// convert duration to float in seconds
243 | fn to_float(d: Duration) -> f64 {
244 |     d.as_secs() as f64 + d.subsec_nanos() as f64 / 1e9
245 | }
246 | 
247 | #[inline]
248 | fn measure_best<F: FnMut()>(niters: u32, mut op: F) -> Duration {
249 |     (0..niters)
250 |         .map(move |_| {
251 |             let t = Instant::now();
252 |             op();
253 |             t.elapsed()
254 |         }).min().unwrap()
255 | }
256 | 
257 | fn main() {
258 |     // initialize RNG
259 |     let mut rng = gen_rng(0);
260 | 
261 |     // fib(20)
262 |     assert_eq!(fib(20), 6765);
263 |     let mut f = 0i32;
264 |     let fibarg = 20;
265 |     let tmin = measure_best(NITER, || {
266 |         for _ in 0..1000 {
267 |             f = f.wrapping_add(fib(fibarg));
268 |         }
269 |     });
270 |     print_perf("recursion_fibonacci", to_float(tmin) / 1000.0);
271 | 
272 |     // parse_int
273 |     let tmin = measure_best(NITER, || {
274 |         for _ in 0..1000 * 100 {
275 |             let n: u32 = rng.gen();
276 |             let s = format!("{:x}", n);
277 |             let m = u32::from_str_radix(&s, 16).unwrap();
278 |             assert_eq!(m, n);
279 |         }
280 |     });
281 |     print_perf("parse_integers", to_float(tmin) / 100.0);
282 | 
283 |     let mandel_sum_init = black_box(0u32);
284 |     let mut mandel_sum2 = mandel_sum_init;
285 |     let tmin = measure_best(NITER, || {
286 |         for j in 0..100 {
287 |             let m = mandelperf();
288 |             if j == 0 {
289 |                 let mandel_sum: u32 = m.iter().sum();
290 |                 assert_eq!(mandel_sum, 14791);
291 |                 mandel_sum2 += mandel_sum;
292 |             }
293 |         }
294 |     });
295 |     assert_eq!(mandel_sum2, 14791 * NITER);
296 |     print_perf("userfunc_mandelbrot", to_float(tmin) / 100.0);
297 | 
298 |     // sort
299 |     let tmin = measure_best(NITER, || {
300 |         let mut d = myrand(5000, &mut rng);
301 |         quicksort(&mut d, 0);
302 |     });
303 |     print_perf("recursion_quicksort", to_float(tmin));
304 | 
305 |     // pi sum
306 |     let mut pi = 0.;
307 |     let tmin = measure_best(NITER, || {
308 |         pi = black_box(pisum());
309 |     });
310 |     assert!(f64::abs(pi - 1.644834071848065) < 1e-12);
311 |     print_perf("iteration_pi_sum", to_float(tmin));
312 | 
313 |     // rand mat stat
314 |     let mut r = (0., 0.);
315 |     let tmin = measure_best(NITER, || {
316 |         r = black_box(randmatstat(1000));
317 |     });
318 |     print_perf("matrix_statistics", to_float(tmin));
319 | 
320 |     // rand mat mul
321 |     let tmin = measure_best(NITER, || {
322 |         let c = randmatmul(1000, &mut rng);
323 |         check_randmatmul(c);
324 |     });
325 |     print_perf("matrix_multiply", to_float(tmin));
326 | 
327 |     // printfd
328 |     let tmin = measure_best(NITER, || {
329 |         printfd(100000);
330 |     });
331 |     print_perf("print_to_file", to_float(tmin));
332 | }
333 | 


--------------------------------------------------------------------------------
/rust/src/util.rs:
--------------------------------------------------------------------------------
 1 | use rand::{Rand, Rng, SeedableRng};
 2 | 
 3 | use mersenne_twister::MT19937_64;
 4 | pub type MTRng = MT19937_64;
 5 | 
 6 | #[inline]
 7 | pub fn gen_rng(seed: u64) -> MTRng {
 8 |     MTRng::from_seed(seed)
 9 | }
10 | 
11 | pub fn fill_rand<'a, I, T: 'a, R>(a: I, rng: &mut R)
12 | where
13 |     I: IntoIterator<Item=&'a mut T>,
14 |     T: Rand,
15 |     R: Rng,
16 | {
17 |     for v in a.into_iter() {
18 |         *v = rng.gen();
19 |     }
20 | }
21 | 
22 | pub fn myrand<R: Rng>(n: usize, rng: &mut R) -> Vec<f64> {
23 |     let mut d: Vec<f64> = vec![0.; n];
24 |     fill_rand(&mut d, rng);
25 |     d
26 | }
27 | 


--------------------------------------------------------------------------------
/scala/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | 
 4 | # sbt specific
 5 | .cache
 6 | .history
 7 | .lib/
 8 | dist/*
 9 | target/
10 | lib_managed/
11 | src_managed/
12 | project/boot/
13 | project/plugins/project/
14 | 
15 | # Scala-IDE specific
16 | .scala_dependencies
17 | .worksheet
18 | 


--------------------------------------------------------------------------------
/scala/build.sbt:
--------------------------------------------------------------------------------
 1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license
 2 | 
 3 | libraryDependencies ++= Seq(
 4 |     "org.scalanlp" %% "breeze" % "0.10",
 5 |     "org.scalanlp" %% "breeze-natives" % "0.10"
 6 | )
 7 | 
 8 | resolvers ++= Seq(
 9 |     "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
10 | )
11 | 
12 | scalaVersion := "2.11.1"
13 | 
14 | showSuccess := false
15 | 
16 | onLoadMessage := ""
17 | 
18 | logLevel := Level.Warn
19 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/perf.scala:
--------------------------------------------------------------------------------
  1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license
  2 | 
  3 | import scala.util._
  4 | import java.io._
  5 | import breeze.linalg._
  6 | import breeze.numerics._
  7 | import breeze.stats._
  8 | import breeze.math._
  9 | //import com.github.fommil.netlib.{BLAS}
 10 | 
 11 | object PerfBreeze {
 12 |   final val NITER = 5
 13 | 
 14 |   // print results appropriately. times are in milliseconds
 15 |   def print_perf(name:String, t:Double) = {
 16 |     printf("scala,%s,%.9f\n", name, t/1e6)
 17 |   }
 18 | 
 19 |   // time fib
 20 |   def fib(n:Int):Int = {
 21 |     if (n < 2) n else fib(n-1) + fib(n-2)
 22 |   }
 23 | 
 24 |   def time_fib() = {
 25 |     assert(fib(20) == 6765)
 26 |     var tmin = Long.MaxValue
 27 |     var f = 0
 28 | 
 29 |     for(i <- 1 to NITER) {
 30 |       val t1 = System.nanoTime()
 31 |       for(j <- 1 to 1000) {
 32 |         f += fib(20)
 33 |       }
 34 |       val t = System.nanoTime() - t1
 35 |       if(t < tmin) tmin = t
 36 |     }
 37 | 
 38 |     tmin / 1000.0
 39 |   }
 40 | 
 41 |   // time parseint
 42 |   def time_parseint() = {
 43 |     val generator = scala.util.Random
 44 |     var tmin = Long.MaxValue
 45 | 
 46 |     for(i <- 1 to NITER) {
 47 |       var rand:Int = 0
 48 |       var rands:String = "0"
 49 |       var parsed:Int = 0
 50 |       val t1 = System.nanoTime()
 51 |       for(j <- 1 to 1000) {
 52 |         rand = generator.nextInt()
 53 |         rands = if(rand < 0) "-" + abs(rand).toHexString else rand.toHexString
 54 |         parsed = Integer.parseInt(rands, 16)
 55 |         assert(rand == parsed)
 56 |       }
 57 |       val t = System.nanoTime() - t1
 58 |       if(t < tmin) tmin = t
 59 |     }
 60 |     tmin / 1000.0
 61 |   }
 62 | 
 63 |   // time mandel
 64 |   def mandel(zin:Complex):Int = {
 65 |     val c = zin
 66 |     var z = zin
 67 |     val maxiter = 80
 68 |     for(n <- 0 to maxiter) {
 69 |       if(z.abs > 2) return n
 70 |       z = c + (z * z)
 71 |     }
 72 |     maxiter
 73 |   }
 74 | 
 75 |   def mandelperf() = {
 76 |     for(re <- -20 to 5; im <- -10 to 10) yield mandel(re/10.0 + i * im/10.0)
 77 |   }
 78 | 
 79 |   def time_mandel() = {
 80 |     var mandel_sum = 0
 81 |     var mandel_sum2 = 0
 82 |     var tmin = Long.MaxValue
 83 | 
 84 |     for(i <- 1 to NITER) {
 85 |       val t1 = System.nanoTime()
 86 |       for(j <- 1 to 100) {
 87 |         val mandel_arr = mandelperf()
 88 |         if(j == 1) {
 89 |           mandel_sum = sum(mandel_arr)
 90 |           mandel_sum2 += mandel_sum
 91 |         }
 92 |       }
 93 |       val t = System.nanoTime() - t1
 94 |       if(t < tmin) tmin = t
 95 |     }
 96 |     assert(mandel_sum == 14791)
 97 |     assert(mandel_sum2 == mandel_sum * NITER)
 98 |     tmin / 100.0
 99 |   }
100 | 
101 |   // time quicksort
102 |   def quicksort(a:Array[Double], lo:Int, hi:Int):Array[Double] = {
103 |     var i, l = lo
104 |     var j = hi
105 | 
106 |     def _swap(i:Int, j:Int) = {
107 |       val tmp = a(i)
108 |       a(i) = a(j)
109 |       a(j) = tmp
110 |     }
111 | 
112 |     while(i < hi) {
113 |       val pivot = a((l+hi)>>>1)
114 |       while(i <= j) {
115 |         while(a(i) < pivot) i += 1
116 |         while(a(j) > pivot) j -= 1
117 |         if(i <= j) {
118 |           _swap(i, j)
119 |           i += 1
120 |           j -= 1
121 |         }
122 |       }
123 |       if(l < j) quicksort(a, l, j)
124 |       l = j
125 |       j = hi
126 |     }
127 |     a
128 |   }
129 | 
130 |   /*
131 |   def checksorted(a:Array[Double]):Boolean = {
132 |     for(i <- 0 to a.length-2) {
133 |       assert(a(i) < a(i+1))
134 |     }
135 |     true
136 |   }
137 |   */
138 | 
139 |   def time_quicksort() = {
140 |     var tmin = Long.MaxValue
141 | 
142 |     for(i <- 1 to NITER) {
143 |       val t1 = System.nanoTime()
144 |       for(j <- 1 to 1000) {
145 |         val A = DenseVector.rand[Double](5000)
146 |         quicksort(A.data, 0, 4999)
147 |       }
148 |       val t = System.nanoTime() - t1
149 |       if(t < tmin) tmin = t
150 |     }
151 |     tmin / 1000.0
152 |   }
153 | 
154 |   // time pisum
155 |   def pisum() = {
156 |     var sum = 0.0
157 |     for(j <- 1 to 500) {
158 |       sum = 0.0
159 |       for(k <- 1 to 10000) {
160 |         sum += 1.0/(k*k)
161 |       }
162 |     }
163 |     sum
164 |   }
165 | 
166 |   def time_pisum() = {
167 |     var tmin = Long.MaxValue
168 |     var pi = 0:Double
169 |     for(i <- 1 to NITER) {
170 |       val t1 = System.nanoTime()
171 |       pi = pisum()
172 |       val t = System.nanoTime() - t1
173 |       if(t < tmin) tmin = t
174 |       assert(abs(pi-1.644834071848065) < 1e-12)
175 |     }
176 |     tmin
177 |   }
178 | 
179 |   // time printfd
180 |   def printfd(n:Int) = {
181 |     var stream = None: Option[PrintStream]
182 |     try {
183 |       stream = Some(new PrintStream(new BufferedOutputStream(new FileOutputStream("/dev/null"))))
184 |       val valid_stream = stream.get
185 |       for (i <- 1 to n) {
186 |         valid_stream.printf(i + " " + i)
187 |       }
188 |     } catch {
189 |       case e: Exception => println("Exception caught: " + e)
190 |     } finally {
191 |       if(stream.isDefined) stream.get.close()
192 |     }
193 |   }
194 | 
195 |   def time_printfd() = {
196 |     var tmin = Long.MaxValue
197 |     for(i <- 1 to NITER) {
198 |       val t1 = System.nanoTime()
199 |       printfd(100000)
200 |       val t = System.nanoTime() - t1
201 |       if(t < tmin) tmin = t
202 |     }
203 |     tmin
204 |   }
205 | 
206 |   // random matrix statistics
207 |   def randmatstat(t:Int):(Double,Double) = {
208 |     val n = 5
209 |     val v = DenseVector.zeros[Double](t)
210 |     val w = DenseVector.zeros[Double](t)
211 | 
212 |     val g = breeze.stats.distributions.Gaussian(0, 1)
213 |     for(i <- 0 to t-1) {
214 |       val a = DenseMatrix.rand(n, n, g)
215 |       val b = DenseMatrix.rand(n, n, g)
216 |       val c = DenseMatrix.rand(n, n, g)
217 |       val d = DenseMatrix.rand(n, n, g)
218 |       val P = DenseMatrix.horzcat(a, b, c, d)
219 |       val Q = DenseMatrix.vertcat(DenseMatrix.horzcat(a, b), DenseMatrix.horzcat(c, d))
220 |       val V = P.t * P
221 |       val W = Q.t * Q
222 | 
223 |       v(i) = trace(V * V * V * V)
224 |       w(i) = trace(W * W * W * W)
225 |     }
226 |     (stddev(v)/mean(v), stddev(w)/mean(w))
227 |   }
228 | 
229 |   def time_randmatstat() = {
230 |     var tmin = Long.MaxValue
231 |     for(i <- 1 to NITER) {
232 |       val t1 = System.nanoTime()
233 |       val (s1, s2) = randmatstat(1000)
234 |       val t = System.nanoTime() - t1
235 |       assert(0.5 < s1 && s1 < 1.0 && 0.5 < s2 && s2 < 1.0)
236 | 
237 |       if(t < tmin) tmin = t
238 |     }
239 |     tmin
240 |   }
241 | 
242 |   // random matrix multiplication
243 |   def randmatmul(t:Int):DenseMatrix[Double] = {
244 |     val m1 = randomDouble((t, t))
245 |     val m2 = randomDouble((t, t))
246 |     m1 * m2
247 |   }
248 | 
249 |   def time_randmatmul() = {
250 |     var tmin = Long.MaxValue
251 |     for(i <- 1 to NITER) {
252 |       val t1 = System.nanoTime()
253 |       val m = randmatmul(1000)
254 |       val t = System.nanoTime() - t1
255 |       assert(0 <= m(0,0))
256 | 
257 |       if(t < tmin) tmin = t
258 |     }
259 |     tmin
260 |   }
261 | 
262 | 
263 |   def main(args: Array[String]) = {
264 |     //println("BLAS: " + BLAS.getInstance().getClass().getName())
265 |     print_perf("fib", time_fib())
266 |     print_perf("parse_int", time_parseint())
267 |     print_perf("mandel", time_mandel())
268 |     print_perf("quicksort", time_quicksort())
269 |     print_perf("pi_sum", time_pisum())
270 |     print_perf("rand_mat_stat", time_randmatstat())
271 |     print_perf("rand_mat_mul", time_randmatmul())
272 |     print_perf("printfd", time_printfd())
273 |   }
274 | }
275 | 


--------------------------------------------------------------------------------