├── .github
└── workflows
│ └── benchmarks.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── benchmarks
└── .gitignore
├── bin
├── .gitignore
├── benchmarks.csv
├── benchmarks.ipynb
├── collect.jl
├── plot.jl
├── table.jl
└── versions.sh
├── java
├── .gitignore
├── pom.xml
├── setup.sh
└── src
│ └── main
│ └── java
│ ├── Complex.java
│ ├── PerfBLAS.java
│ └── PerfPure.java
├── lua
└── lua-install.sh
├── perf.R
├── perf.c
├── perf.f90
├── perf.go
├── perf.jl
├── perf.js
├── perf.lua
├── perf.m
├── perf.nb
├── perf.py
├── perfutil.jl
├── randmtzig.c
├── rust
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── rust-toolchain
└── src
│ ├── direct_blas.rs
│ ├── main.rs
│ └── util.rs
└── scala
├── .gitignore
├── build.sbt
└── src
└── main
└── scala
└── perf.scala
/.github/workflows/benchmarks.yml:
--------------------------------------------------------------------------------
1 | name: Benchmarks
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches:
7 | - master
8 | tags: '*'
9 | workflow_dispatch:
10 |
11 | concurrency:
12 | # Skip intermediate builds: all builds except for builds on the `master` or `release-*` branches
13 | # Cancel intermediate builds: only pull request builds
14 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref != 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release-') || github.run_number }}
15 | cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
16 |
17 | permissions:
18 | contents: read
19 |
20 | jobs:
21 | test:
22 | runs-on: ${{ matrix.os }}
23 | defaults:
24 | run:
25 | shell: bash
26 | strategy:
27 | matrix:
28 | os: [ubuntu-latest]
29 | java-version: ['17']
30 | julia-version: ['1.9.3']
31 | python-version: ['3.10']
32 | numpy-version: ['1.23.2']
33 | gfortran-version: ['9'] # Note: unused since is built-in.
34 | rust-version: ['1.42.0'] # Note: unused since controlled by `rust/rust-toolchain`
35 | js-version: ['18']
36 | r-version: ['4.2.1']
37 | lua-version: ['latest'] # Note: unused since lua distribution manually downloaded
38 | go-version: ['1.19']
39 |
40 | steps:
41 | - uses: actions/checkout@v3
42 | with:
43 | persist-credentials: false
44 | - name: "Cache Julia"
45 | id: cache-julia
46 | uses: actions/cache@v2
47 | with:
48 | path: ~/julia
49 | key: ${{ runner.os }}-v${{ matrix.julia-version }}
50 | - name: "Build Julia"
51 | if: steps.cache-julia.outputs.cache-hit != 'true'
52 | uses: julia-actions/build-julia@v1
53 | with:
54 | ref: v${{ matrix.julia-version }}
55 | - name: "Set up dSFMT"
56 | run: |
57 | cd ~/
58 | mkdir -p dSFMT
59 | cd dSFMT
60 | wget https://github.com/MersenneTwister-Lab/dSFMT/archive/refs/tags/v2.2.4.tar.gz
61 | echo "39682961ecfba621a98dbb6610b6ae2b7d6add450d4f08d8d4edd0e10abd8174 v2.2.4.tar.gz" | sha256sum --check --status
62 | tar -xzf v2.2.4.tar.gz
63 | mv dSFMT-*/* ./
64 | - name: "Set up OpenBLAS"
65 | run: |
66 | sudo apt-get install -y libopenblas-dev
67 | - name: "Set up Python"
68 | uses: actions/setup-python@v1
69 | with:
70 | python-version: ${{ matrix.python-version }}
71 | - name: "Set up NumPy"
72 | run: pip install numpy==${{ matrix.numpy-version }}
73 | - name: "Set up Rust"
74 | uses: actions-rs/toolchain@v1
75 | with:
76 | toolchain: ${{ matrix.rust-version }}
77 | - name: "Set up Java"
78 | uses: actions/setup-java@v2
79 | with:
80 | distribution: 'temurin'
81 | java-version: ${{ matrix.java-version }}
82 | cache: 'maven'
83 | - name: "Set up JavaScript"
84 | uses: actions/setup-node@v2
85 | with:
86 | node-version: ${{ matrix.js-version }}
87 | - name: "Set up R"
88 | uses: r-lib/actions/setup-r@v2
89 | with:
90 | r-version: ${{ matrix.r-version }}
91 | - name: "Set up LuaJit"
92 | run: |
93 | cd ~/work/Microbenchmarks/Microbenchmarks/lua
94 | ./lua-install.sh
95 | - name: "Set up Go"
96 | uses: actions/setup-go@v3
97 | with:
98 | go-version: ${{ matrix.go-version }}
99 | - name: "Run benchmark"
100 | run: |
101 | JULIAHOME=~/julia DSFMTDIR=~/dSFMT/ make gh_action_benchmarks.html
102 | - name: "Print benchmark data"
103 | run: cat gh_action_benchmarks.csv
104 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /perf.h
2 | /versions.csv
3 | /benchmarks.csv
4 | /benchmarks.txt
5 | /benchmarks.html
6 | /gopath
7 | /mods/*
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2009-2018 Jeff Bezanson, Stefan Karpinski, Viral B. Shah,
4 | and other contributors.
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ifndef JULIAHOME
2 | $(error JULIAHOME not defined. Set value to the root of the Julia source tree.)
3 | endif
4 | ifndef DSFMTDIR
5 | $(error DSFMTDIR not defined. Set value to the root of the dSFMT source tree.)
6 | endif
7 |
8 |
9 | # Will make multi-line targets work
10 | # (so we can use @for on the second line)
11 | .ONESHELL:
12 |
13 | include $(JULIAHOME)/Make.inc
14 | include $(JULIAHOME)/deps/Versions.make
15 |
16 | NODEJSBIN = node
17 |
18 | ITERATIONS=$(shell seq 1 5)
19 |
20 | #Use python2 for Python 2.x
21 | PYTHON = python3
22 |
23 | OCTAVE = octave-cli
24 |
25 | ifeq ($(OS), WINNT)
26 | MATHEMATICABIN = MathKernel
27 | else ifeq ($(OS), Darwin)
28 | MATHEMATICABIN = MathKernel
29 | else
30 | MATHEMATICABIN = math
31 | endif
32 |
33 | FFLAGS=-fexternal-blas
34 | #gfortran cannot multiply matrices using 64-bit external BLAS.
35 | ifeq ($(findstring gfortran, $(FC)), gfortran)
36 | ifeq ($(USE_BLAS64), 1)
37 | FFLAGS=
38 | endif
39 | FFLAGS+= -static-libgfortran
40 | endif
41 |
42 | #Which libm library am I using?
43 | LIBMDIR = $(JULIAHOME)/usr/lib/
44 | ifeq ($(USE_SYSTEM_LIBM), 0)
45 | ifeq ($(USE_SYSTEM_OPENLIBM), 0)
46 | LIBM = $(LIBMDIR)libopenlibm.a
47 | endif
48 | endif
49 |
50 | default: benchmarks.html
51 |
52 | export OMP_NUM_THREADS=1
53 | export GOTO_NUM_THREADS=1
54 | export OPENBLAS_NUM_THREADS=1
55 |
56 | perf.h: $(JULIAHOME)/deps/Versions.make
57 | echo '#include "cblas.h"' > $@
58 | echo '#include "$(DSFMTDIR)/dSFMT.c"' >> $@
59 |
60 | bin/perf%: perf.c perf.h
61 | $(CC) -std=c99 -O$* $< -o $@ -I$(DSFMTDIR) -lopenblas -L$(LIBMDIR) $(LIBM) $(CFLAGS) -lpthread
62 |
63 | bin/fperf%: perf.f90
64 | mkdir -p mods/$@ #Modules for each binary go in separate directories
65 | $(FC) $(FFLAGS) -Jmods/$@ -O$* $< -o $@ -lopenblas -L$(LIBMDIR) $(LIBM) -lpthread
66 |
67 | benchmarks/c.csv: \
68 | benchmarks/c0.csv \
69 | benchmarks/c1.csv \
70 | benchmarks/c2.csv \
71 | benchmarks/c3.csv
72 | cat $^ > $@
73 |
74 | benchmarks/fortran.csv: \
75 | benchmarks/fortran0.csv \
76 | benchmarks/fortran1.csv \
77 | benchmarks/fortran2.csv \
78 | benchmarks/fortran3.csv
79 | cat $^ > $@
80 |
81 |
82 | benchmarks/c%.csv: bin/perf%
83 | @for t in $(ITERATIONS); do $<; done >$@
84 |
85 | benchmarks/fortran%.csv: bin/fperf%
86 | @for t in $(ITERATIONS); do $<; done >$@
87 |
88 | benchmarks/go.csv: export GOPATH=$(abspath gopath)
89 | benchmarks/go.csv: perf.go
90 | go env -w GO111MODULE=off
91 | export CGO_LDFLAGS="-L${LIBM} -lopenblas"
92 | go get gonum.org/v1/netlib/blas/netlib
93 | go get gonum.org/v1/gonum/mat
94 | go get gonum.org/v1/gonum/stat
95 | @for t in $(ITERATIONS); do go run $<; done >$@
96 |
97 | benchmarks/julia.csv: perf.jl
98 | @for t in $(ITERATIONS); do $(JULIAHOME)/usr/bin/julia $<; done >$@
99 |
100 | benchmarks/python.csv: perf.py
101 | @for t in $(ITERATIONS); do $(PYTHON) $<; done >$@
102 |
103 | benchmarks/matlab.csv: perf.m
104 | @for t in $(ITERATIONS); do matlab -nojvm -singleCompThread -r 'perf; perf; exit' | grep ^matlab | tail -8; done >$@
105 |
106 | benchmarks/octave.csv: perf.m
107 | @for t in $(ITERATIONS); do $(OCTAVE) -q --eval perf 2>/dev/null; done >$@
108 |
109 | benchmarks/r.csv: perf.R
110 | @for t in $(ITERATIONS); do cat $< | R --vanilla --slave 2>/dev/null; done >$@
111 |
112 | benchmarks/javascript.csv: perf.js
113 | @for t in $(ITERATIONS); do $(NODEJSBIN) $<; done >$@
114 |
115 | benchmarks/mathematica.csv: perf.nb
116 | @for t in $(ITERATIONS); do $(MATHEMATICABIN) -noprompt -run "<<$<; Exit[]"; done >$@
117 |
118 | benchmarks/lua.csv: perf.lua
119 | export BIT=64
120 | @for t in $(ITERATIONS); do ./lua/ulua/bin/scilua $<; done >$@
121 |
122 | benchmarks/java.csv: java/src/main/java/PerfBLAS.java
123 | cd java
124 | sh setup.sh
125 | @for t in $(ITERATIONS); do mvn -q exec:java; done >../$@
126 |
127 | benchmarks/scala.csv: scala/src/main/scala/perf.scala scala/build.sbt
128 | cd scala
129 | @for t in $(ITERATIONS); do sbt run; done >../$@
130 |
131 | benchmarks/rust.csv: rust/src/main.rs rust/src/util.rs rust/Cargo.lock
132 | cd rust
133 | @for t in $(ITERATIONS); do cargo run --release -q; done >../$@
134 |
135 | LANGUAGES = c fortran go java javascript julia lua mathematica matlab octave python r rust
136 | GH_ACTION_LANGUAGES = c fortran go java javascript julia lua python r rust
137 |
138 | # These were formerly listed in LANGUAGES, but I can't get them to run
139 | # 2017-09-27 johnfgibson
140 | # scala
141 |
142 | BENCHMARKS = $(foreach lang,$(LANGUAGES),benchmarks/$(lang).csv)
143 | GH_ACTION_BENCHMARKS = $(foreach lang,$(GH_ACTION_LANGUAGES),benchmarks/$(lang).csv)
144 |
145 | COLON_SEPARATED_GHA_LANGUAGES = $(shell echo $(GH_ACTION_LANGUAGES) | sed 's/ /:/g')
146 |
147 | versions.csv: bin/versions.sh
148 | $^ >$@
149 |
150 | gh_action_versions.csv: bin/versions.sh
151 | $^ $(COLON_SEPARATED_GHA_LANGUAGES) >$@
152 |
153 | benchmarks.csv: bin/collect.jl $(BENCHMARKS)
154 | @$(call PRINT_JULIA, $^ >$@)
155 |
156 | gh_action_benchmarks.csv: bin/collect.jl $(GH_ACTION_BENCHMARKS)
157 | @$(call PRINT_JULIA, $^ >$@)
158 |
159 | benchmarks.html: bin/table.jl versions.csv benchmarks.csv
160 | @$(call PRINT_JULIA, $^ >$@)
161 |
162 | gh_action_benchmarks.html: bin/table.jl gh_action_versions.csv gh_action_benchmarks.csv
163 | @$(call PRINT_JULIA, $^ >$@)
164 |
165 | clean:
166 | @rm -rf perf.h bin/perf* bin/fperf* benchmarks/*.csv benchmarks.csv mods *~ octave-core perf.log gopath/*
167 |
168 | .PHONY: all perf clean
169 |
170 | .PRECIOUS: bin/perf0 bin/perf1 bin/perf2 bin/perf3
171 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Microbenchmarks
2 |
3 | This is a collection of micro-benchmarks used to compare Julia's performance against
4 | that of other languages.
5 | It was formerly part of the Julia source tree.
6 | The results of these benchmarks are used to generate the performance graph on the
7 | [Julia benchmarks page](https://julialang.org/benchmarks).
8 |
9 | ## Running benchmarks
10 |
11 | This repository assumes that Julia has been built from source and that there exists
12 | an environment variable `JULIAHOME` that points to the root of the Julia source tree.
13 | This can also be set when invoking `make`, e.g. `make JULIAHOME=path/to/julia`.
14 |
15 | To build binaries and run the benchmarks, simply run `make`.
16 | Note that this refers to GNU Make, so BSD users will need to run `gmake`.
17 |
18 | ## Included languages:
19 |
20 | * C
21 | * Fortran
22 | * Go
23 | * Java
24 | * JavaScript
25 | * Julia
26 | * LuaJIT
27 | * Mathematica
28 | * Matlab
29 | * Python
30 | * R
31 | * Rust
32 | * Scala
33 |
--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | /*.csv
2 |
--------------------------------------------------------------------------------
/bin/.gitignore:
--------------------------------------------------------------------------------
1 | /perf*
2 | /fperf*
3 |
--------------------------------------------------------------------------------
/bin/benchmarks.csv:
--------------------------------------------------------------------------------
1 | c,iteration_pi_sum,27.368069
2 | c,matrix_multiply,72.014809
3 | c,matrix_statistics,4.528999
4 | c,parse_integers,0.099249
5 | c,print_to_file,9.929895
6 | c,recursion_fibonacci,0.022732
7 | c,recursion_quicksort,0.258923
8 | c,userfunc_mandelbrot,0.076702
9 | fortran,iteration_pi_sum,27.368789
10 | fortran,matrix_multiply,83.416557
11 | fortran,matrix_statistics,6.984467
12 | fortran,parse_integers,0.682692
13 | fortran,print_to_file,59.287684
14 | fortran,recursion_fibonacci,0.022466
15 | fortran,recursion_quicksort,0.308118
16 | fortran,userfunc_mandelbrot,0.053836
17 | go,iteration_pi_sum,27.3706417
18 | go,matrix_multiply,102.9984314
19 | go,matrix_statistics,27.57979652
20 | go,parse_integers,0.0953154785
21 | go,print_to_file,17.37055717
22 | go,recursion_fibonacci,0.04101122396666667
23 | go,recursion_quicksort,0.32159370860000003
24 | go,userfunc_mandelbrot,0.059685397066666666
25 | java,iteration_pi_sum,29.723044
26 | java,iteration_sinc_sum,0.118555
27 | java,matrix_multiply,581.467297
28 | java,matrix_statistics,22.776195
29 | java,parse_integers,0.314691
30 | java,print_to_file,95.850461
31 | java,recursion_fibonacci,0.082739
32 | java,recursion_quicksort,0.772211
33 | java,userfunc_mandelbrot,0.109468
34 | javascript,iteration_pi_sum,27.6
35 | javascript,matrix_multiply,2288.0
36 | javascript,matrix_statistics,63.3
37 | javascript,parse_integers,0.5
38 | javascript,print_to_file,72.0
39 | javascript,recursion_fibonacci,0.08
40 | javascript,recursion_quicksort,1.11
41 | javascript,userfunc_mandelbrot,0.087
42 | julia,iteration_pi_sum,27.670768
43 | julia,matrix_multiply,70.249355
44 | julia,matrix_statistics,7.396705
45 | julia,parse_integers,0.217658
46 | julia,print_to_file,10.868424
47 | julia,recursion_fibonacci,0.030162
48 | julia,recursion_quicksort,0.258018
49 | julia,userfunc_mandelbrot,0.052706
50 | lua,iteration_pi_sum,27.368
51 | lua,matrix_multiply,77.87
52 | lua,matrix_statistics,7.731
53 | lua,parse_integers,0.097
54 | lua,print_to_file,5.996
55 | lua,recursion_fibonacci,0.027
56 | lua,recursion_quicksort,0.404
57 | lua,userfunc_mandelbrot,0.077
58 | mathematica,iteration_pi_sum,39.862
59 | mathematica,matrix_multiply,85.409
60 | mathematica,matrix_statistics,33.94
61 | mathematica,parse_integers,2.249
62 | mathematica,print_to_file,664.313
63 | mathematica,recursion_fibonacci,3.002
64 | mathematica,recursion_quicksort,11.518
65 | mathematica,userfunc_mandelbrot,1.403
66 | matlab,iteration_pi_sum,27.56
67 | matlab,matrix_multiply,83.906
68 | matlab,matrix_statistics,36.674
69 | matlab,parse_integers,17.688
70 | matlab,print_to_file,1009.8
71 | matlab,recursion_fibonacci,0.4
72 | matlab,recursion_quicksort,0.612
73 | matlab,userfunc_mandelbrot,0.755
74 | octave,iteration_pi_sum,8691.11084938
75 | octave,matrix_multiply,87.54110336
76 | octave,matrix_statistics,209.43498611
77 | octave,parse_integers,57.0089817
78 | octave,print_to_file,1293.26105118
79 | octave,recursion_fibonacci,228.35707664
80 | octave,recursion_quicksort,575.16098022
81 | octave,userfunc_mandelbrot,445.80197334
82 | python,iteration_pi_sum,404.39462661743164
83 | python,matrix_multiply,84.99646186828613
84 | python,matrix_statistics,80.32107353210449
85 | python,parse_integers,1.9617080688476562
86 | python,print_to_file,47.04570770263672
87 | python,recursion_fibonacci,2.1429061889648438
88 | python,recursion_quicksort,9.729623794555664
89 | python,userfunc_mandelbrot,5.036592483520508
90 | r,iteration_pi_sum,320.0
91 | r,matrix_multiply,595.0
92 | r,matrix_statistics,95.0
93 | r,parse_integers,5.0
94 | r,print_to_file,1009.0
95 | r,recursion_fibonacci,6.0
96 | r,recursion_quicksort,15.0
97 | r,userfunc_mandelbrot,15.0
98 | rust,iteration_pi_sum,27.373818
99 | rust,matrix_multiply,77.615658
100 | rust,matrix_statistics,6.497923
101 | rust,parse_integers,0.121999
102 | rust,print_to_file,8.81542
103 | rust,recursion_fibonacci,0.039227
104 | rust,recursion_quicksort,0.253416
105 | rust,userfunc_mandelbrot,0.057636
106 |
--------------------------------------------------------------------------------
/bin/collect.jl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env julia
2 |
3 | const data = Dict{Tuple{String,String},Float64}()
4 |
5 | for arg in ARGS, line in eachline(arg)
6 | lang, bench, time_str = split(line, ',')
7 | old_time = get(data, (lang, bench), Inf)
8 | new_time = parse(Float64, time_str)
9 | 0 < new_time < old_time || continue
10 | data[lang, bench] = new_time
11 | end
12 |
13 | for ((lang, bench), min_time) in sort!(collect(data))
14 | println("$lang,$bench,$min_time")
15 | end
16 |
--------------------------------------------------------------------------------
/bin/plot.jl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env julia
2 |
3 | # Producing the Julia Microbenchmarks plot
4 |
5 | # Load the required Julia packages
6 | using Base.MathConstants
7 | using CSV
8 | using DataFrames
9 | using Gadfly
10 | using StatsBase
11 |
12 | # Load benchmark data from file
13 | benchmarks =
14 | CSV.read("benchmarks.csv", DataFrame; header = ["language", "benchmark", "time"])
15 |
16 | # Capitalize and decorate language names from datafile
17 | dict = Dict(
18 | "c" => "C",
19 | "julia" => "Julia",
20 | "lua" => "LuaJIT",
21 | "fortran" => "Fortran",
22 | "java" => "Java",
23 | "javascript" => "JavaScript",
24 | "matlab" => "Matlab",
25 | "mathematica" => "Mathematica",
26 | "python" => "Python",
27 | "octave" => "Octave",
28 | "r" => "R",
29 | "rust" => "Rust",
30 | "go" => "Go",
31 | );
32 | benchmarks[!, :language] = [dict[lang] for lang in benchmarks[!, :language]]
33 |
34 | # Normalize benchmark times by C times
35 | ctime = benchmarks[benchmarks[!, :language] .== "C", :]
36 | benchmarks = innerjoin(benchmarks, ctime, on = :benchmark, makeunique = true)
37 | select!(benchmarks, Not(:language_1))
38 | rename!(benchmarks, :time_1 => :ctime)
39 | benchmarks[!, :normtime] = benchmarks[!, :time] ./ benchmarks[!, :ctime];
40 |
41 | # Compute the geometric mean for each language
42 | langs = [];
43 | means = [];
44 | priorities = [];
45 | for lang in benchmarks[!, :language]
46 | data = benchmarks[benchmarks[!, :language] .== lang, :]
47 | gmean = geomean(data[!, :normtime])
48 | push!(langs, lang)
49 | push!(means, gmean)
50 | if (lang == "C")
51 | push!(priorities, 1)
52 | elseif (lang == "Julia")
53 | push!(priorities, 2)
54 | else
55 | push!(priorities, 3)
56 | end
57 | end
58 |
59 | # Add the geometric means back into the benchmarks dataframe
60 | langmean = DataFrame(language = langs, geomean = means, priority = priorities)
61 | benchmarks = innerjoin(benchmarks, langmean, on = :language)
62 |
63 | # Put C first, Julia second, and sort the rest by geometric mean
64 | sort!(benchmarks, [:priority, :geomean]);
65 | sort!(langmean, [:priority, :geomean]);
66 |
67 | p = plot(
68 | benchmarks,
69 | x = :language,
70 | y = :normtime,
71 | color = :benchmark,
72 | Scale.y_log10,
73 | Guide.ylabel(nothing),
74 | Guide.xlabel(nothing),
75 | Theme(
76 | guide_title_position = :left,
77 | colorkey_swatch_shape = :circle,
78 | minor_label_font = "Georgia",
79 | major_label_font = "Georgia",
80 | ),
81 | )
82 |
83 | draw(SVG("benchmarks.svg", 9inch, 9inch / golden), p)
84 |
--------------------------------------------------------------------------------
/bin/table.jl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env julia
2 |
3 | # This script generates an HTML table with the benchmark values and language versions.
4 |
5 | import Statistics
6 | import Printf
7 |
8 | const benchmark_order = [
9 | "iteration_pi_sum",
10 | "recursion_fibonacci",
11 | "recursion_quicksort",
12 | "parse_integers",
13 | "print_to_file",
14 | "matrix_statistics",
15 | "matrix_multiply",
16 | "userfunc_mandelbrot",
17 | ]
18 |
19 | const versions = Dict{String, String}()
20 | const benchmarks = Dict{String, Dict{String, Float64}}()
21 |
22 | # Read versions.csv
23 | for line in eachline(ARGS[1])
24 | lang, version = split(line, ',')
25 | versions[lang] = version
26 | end
27 |
28 | # Read benchmarks.csv
29 | for line in eachline(ARGS[2])
30 | lang, bench, time_str = split(line, ',')
31 | times = get!(benchmarks, lang, Dict{String, Float64}())
32 | times[bench] = parse(Float64, time_str)
33 | end
34 |
35 | const labels = Dict{String, String}(
36 | "c" => "C" ,
37 | "julia" => "Julia" ,
38 | "lua" => "LuaJIT" ,
39 | "fortran" => "Fortran" ,
40 | "java" => "Java" ,
41 | "javascript" => "JavaScript" ,
42 | "matlab" => "Matlab" ,
43 | "python" => "Python" ,
44 | "mathematica" => "Mathematica" ,
45 | "r" => "R" ,
46 | "octave" => "Octave" ,
47 | "go" => "Go" ,
48 | "rust" => "Rust" ,
49 | )
50 |
51 | # Produce the sorting order for the list of languages
52 | function lang_by(lang::String)
53 | # C is placed at the start of the list
54 | lang == "c" ? -Inf :
55 | # Julia is sorted immediately after C
56 | lang == "julia" ? -floatmax() :
57 | # The rest of the languages are sorted by the geometric mean of their benchmark values
58 | # See https://en.wikipedia.org/wiki/Geometric_mean#Relationship_with_logarithms for details
59 | exp(Statistics.mean(log.(collect(values(benchmarks[lang])))))
60 | end
61 |
62 | const language_order = sort!(collect(keys(benchmarks)), by=lang_by)
63 |
64 | print("""
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | |
74 | """)
75 |
76 | for lang in language_order
77 | label = get(labels, lang, lang)
78 | println(" $label | ")
79 | end
80 |
81 | print("""
82 |
83 | |
84 | """)
85 |
86 | for lang in language_order
87 | version = get(versions, lang, "???")
88 | println(" $version | ")
89 | end
90 |
91 | print("""
92 |
93 |
94 |
95 | """)
96 |
97 | for benchmark in benchmark_order
98 | println(" $benchmark | ")
99 | c_time = benchmarks["c"][benchmark]
100 | for lang in language_order
101 | rel_time = "n/a"
102 | if haskey(benchmarks[lang], benchmark)
103 | rel_time = Printf.@sprintf "%.2f" benchmarks[lang][benchmark]/c_time
104 | end
105 | println(" $rel_time | ")
106 | end
107 | println("
")
108 | end
109 |
110 | print("""
111 |
112 |
113 |
114 |
115 | """)
116 |
--------------------------------------------------------------------------------
/bin/versions.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # User argument declaring what languages to query:
4 | DEFAULT_LANGUAGES="c:fortran:go:java:javascript:julia:lua:mathematica:matlab:octave:python:r:rust"
5 | LANGUAGES=${1:-DEFAULT_LANGUAGES}
6 |
7 | LANGUAGES=":${LANGUAGES}:"
8 |
9 | # Check if ":c:" in languages:
10 | if [[ $LANGUAGES == *":c:"* ]]; then
11 | echo -n "c,gcc "
12 | gcc -v 2>&1 | grep "gcc version" | cut -f3 -d" "
13 | fi
14 |
15 | if [[ $LANGUAGES == *":fortran:"* ]]; then
16 | echo -n "fortran,gcc "
17 | gfortran -v 2>&1 | grep "gcc version" | cut -f3 -d" "
18 | fi
19 |
20 | if [[ $LANGUAGES == *":go:"* ]]; then
21 | echo -n go,
22 | go version | cut -f3 -d" "
23 | fi
24 |
25 | if [[ $LANGUAGES == *":java:"* ]]; then
26 | echo -n java,
27 | java -version 2>&1 | grep "version" | cut -f2 -d "\""
28 | fi
29 |
30 | if [[ $LANGUAGES == *":javascript:"* ]]; then
31 | echo -n "javascript,V8 "
32 | node -e "console.log(process.versions.v8)"
33 | fi
34 |
35 | if [[ $LANGUAGES == *":julia:"* ]]; then
36 | echo -n "julia,"
37 | $JULIAHOME/usr/bin/julia -v | cut -f3 -d" "
38 | fi
39 |
40 | if [[ $LANGUAGES == *":lua:"* ]]; then
41 | echo -n "lua,"
42 | (cd lua; ./ulua/luajit/*/Linux/x64/luajit -v | cut -f2 -d" ")
43 | fi
44 |
45 | if [[ $LANGUAGES == *":mathematica:"* ]]; then
46 | echo -n "mathematica,"
47 | echo quit | math -version | head -n 1 | cut -f2 -d" "
48 | fi
49 |
50 | if [[ $LANGUAGES == *":matlab:"* ]]; then
51 | echo -n "matlab,R"
52 | matlab -nodisplay -nojvm -nosplash -r "version -release, quit" | tail -n3 | head -n1 | cut -f5 -d" " | sed "s/'//g"
53 | fi
54 |
55 | if [[ $LANGUAGES == *":octave:"* ]]; then
56 | echo -n "octave,"
57 | octave-cli -v | grep version | cut -f4 -d" "
58 | fi
59 |
60 | if [[ $LANGUAGES == *":python:"* ]]; then
61 | echo -n "python,"
62 | python3 -V 2>&1 | cut -f2 -d" "
63 | fi
64 |
65 | if [[ $LANGUAGES == *":r:"* ]]; then
66 | echo -n "r,"
67 | R --version | grep "R version" | cut -f3 -d" "
68 | fi
69 |
70 | if [[ $LANGUAGES == *":rust:"* ]]; then
71 | echo -n "rust,"
72 | (cd rust; rustc --version | cut -c 7- | sed 's/ ([0-9a-f]* /
(/g')
73 | fi
74 |
--------------------------------------------------------------------------------
/java/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 |
--------------------------------------------------------------------------------
/java/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | julialang.org
4 | javaBenchmarks
5 | 0.0.1-SNAPSHOT
6 | javaBenchmarks
7 | micro benchmarks for Julia done in Java
8 |
9 |
10 | org.jblas
11 | jblas
12 | 1.2.3
13 |
14 |
15 | com.googlecode.efficient-java-matrix-library
16 | ejml
17 | 0.23
18 |
19 |
20 |
21 |
22 |
23 | maven-compiler-plugin
24 | 3.0
25 |
26 | 1.7
27 | 1.7
28 |
29 |
30 |
31 | org.codehaus.mojo
32 | exec-maven-plugin
33 | 1.2.1
34 |
35 |
36 |
37 | java
38 |
39 |
40 |
41 |
42 | PerfBLAS
43 |
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/java/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license
3 |
4 | mvn compile exec:java
5 | # requires maven and java 7
6 |
--------------------------------------------------------------------------------
/java/src/main/java/Complex.java:
--------------------------------------------------------------------------------
1 | public class Complex {
2 | private final double re;
3 | private final double im;
4 |
5 | public Complex(double real, double imag) {
6 | re = real;
7 | im = imag;
8 | }
9 |
10 | public static double abs(Complex z) {
11 | return Math.sqrt(z.re*z.re + z.im*z.im);
12 | }
13 |
14 | public static Complex add(Complex a, Complex b) {
15 | return new Complex(a.re + b.re, a.im + b.im);
16 | }
17 |
18 | public static Complex mul(Complex a, Complex b) {
19 | return new Complex(a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re);
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/java/src/main/java/PerfBLAS.java:
--------------------------------------------------------------------------------
1 | import java.io.FileNotFoundException;
2 | import java.io.FileOutputStream;
3 | import java.io.PrintStream;
4 | import java.util.List;
5 | import java.util.Random;
6 |
7 | import org.jblas.DoubleMatrix;
8 |
9 | /**
10 | * Benchmark tests that call BLAS.
11 | */
12 | public class PerfBLAS extends PerfPure {
13 |
14 | public static void main(String[] args) {
15 | PerfBLAS p = new PerfBLAS();
16 | p.runBenchmarks();
17 | }
18 |
19 | private double randmatmul(int i) {
20 | DoubleMatrix a = DoubleMatrix.randn(i,i);
21 | DoubleMatrix b = DoubleMatrix.randn(i,i);
22 | return a.mmul(b).get(0);
23 | }
24 |
25 | private double[] randmatstat(int t) {
26 | int n=5;
27 | DoubleMatrix p;
28 | DoubleMatrix q;
29 | DoubleMatrix v = new DoubleMatrix(new double[t][1]); //zeros(t,1);
30 | DoubleMatrix w = new DoubleMatrix(new double[t][1]); //zeros(t,1);
31 | for (int i=0; i < t; i++) {
32 | DoubleMatrix a = DoubleMatrix.randn(n,n);
33 | DoubleMatrix b = DoubleMatrix.randn(n,n);
34 | DoubleMatrix c = DoubleMatrix.randn(n,n);
35 | DoubleMatrix d = DoubleMatrix.randn(n,n);
36 |
37 | p = DoubleMatrix.concatHorizontally(DoubleMatrix.concatHorizontally(a, b),DoubleMatrix.concatHorizontally(c, d));
38 | q = DoubleMatrix.concatVertically(DoubleMatrix.concatHorizontally(a, b),DoubleMatrix.concatHorizontally(c, d));
39 |
40 | DoubleMatrix x = p.transpose().mmul(p);
41 | x = x.mmul(x);
42 | x = x.mmul(x);
43 | v.data[i]=x.diag().sum();
44 |
45 | x = q.transpose().mmul(q);
46 | x = x.mmul(x);
47 | x = x.mmul(x);
48 | w.data[i]=x.diag().sum();
49 |
50 | }
51 |
52 | List vElements = v.elementsAsList();
53 | List wElements = w.elementsAsList();
54 |
55 | return new double[]{stdev(vElements)/mean(vElements),stdev(wElements)/mean(wElements)};
56 | }
57 |
58 | private static int mandel(double re, double im) {
59 | int n = 0;
60 | Complex z = new Complex(re, im);
61 | Complex c = new Complex(re, im);
62 | for (n=0; n<=79; ++n) {
63 | if (Complex.abs(z) > 2.0) {
64 | n -= 1;
65 | break;
66 | }
67 |
68 | // z = z*z + c
69 | z = Complex.add(Complex.mul(z, z), c);
70 | }
71 | return n+1;
72 | }
73 |
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/java/src/main/java/PerfPure.java:
--------------------------------------------------------------------------------
1 | import java.io.FileNotFoundException;
2 | import java.io.FileOutputStream;
3 | import java.io.PrintStream;
4 | import java.util.List;
5 | import java.util.Random;
6 |
7 | import org.ejml.simple.SimpleMatrix;
8 |
9 | /**
10 | * (Below excerpt is printed on the website and repeated here)
11 | *
12 | * These benchmarks, while not comprehensive, do test compiler performance on a range of common code patterns,
13 | * such as function calls, string parsing, sorting, numerical loops, random number generation, and array operations.
14 | * It is important to note that these benchmark implementations are not written for absolute maximal performance
15 | * (the fastest code to compute fib(20) is the constant literal 6765). Rather, all of the benchmarks are written
16 | * to test the performance of specific algorithms, expressed in a reasonable idiom in each language.
17 | * In particular, all languages use the same algorithm: the Fibonacci benchmarks are all recursive while
18 | * the pi summation benchmarks are all iterative; the “algorithm” for random matrix multiplication is to
19 | * call LAPACK, except where that’s not possible, such as in JavaScript. The point of these benchmarks is to
20 | * compare the performance of specific algorithms across language implementations, not to compare the fastest
21 | * means of computing a result, which in most high-level languages relies on calling C code.
22 | *
23 | */
24 | public class PerfPure {
25 |
26 | protected final int NITER = 5;
27 | protected Random rand = new Random(0);
28 |
29 | public static void main(String[] args) {
30 | PerfPure p = new PerfPure();
31 | p.runBenchmarks();
32 | }
33 |
34 | void runBenchmarks() {
35 |
36 | long t, tmin;
37 |
38 | assert(fib(20) == 6765);
39 | int f = 0;
40 | tmin = Long.MAX_VALUE;
41 | for (int i=0; i=0) {
83 | d[j] = rand.nextDouble();
84 | }
85 | quicksort(d, 0, 5000-1);
86 | t = System.nanoTime()-t;
87 | if (t < tmin) tmin = t;
88 | }
89 | print_perf("recursion_quicksort", tmin);
90 |
91 | // pi sum
92 | double pi = 0;
93 | tmin = Long.MAX_VALUE;
94 | for (int i=0; i elements) {
215 | double m = mean(elements);
216 | double total = 0;
217 | for(Double d:elements) {
218 | double dif = (d-m);
219 | total += dif*dif;
220 | }
221 | return Math.sqrt(total/(elements.size()-1));
222 | }
223 |
224 | public double mean(List elements) {
225 | double total = 0;
226 | for(Double d:elements) {
227 | total += d;
228 | }
229 | return total/elements.size();
230 | }
231 |
232 | public double stdev(SimpleMatrix sm) {
233 | double m = mean(sm);
234 | double total = 0;
235 |
236 | int i = sm.getNumElements();
237 | while (--i>=0) {
238 | double dif = (sm.get(i)-m);
239 | total += dif*dif;
240 | }
241 | return Math.sqrt(total/(sm.getNumElements()-1));
242 | }
243 |
244 | public double mean(SimpleMatrix sm) {
245 | double total = 0;
246 | int i = sm.getNumElements();
247 | while (--i>=0) {
248 | total += sm.get(i);
249 | }
250 | return total/sm.getNumElements();
251 | }
252 |
253 | protected void quicksort(double[] a, int lo, int hi) {
254 | int i = lo;
255 | int j = hi;
256 | while (i < hi) {
257 | double pivot = a[(lo+hi)/2];
258 | // Partition
259 | while (i <= j) {
260 | while (a[i] < pivot) {
261 | i = i + 1;
262 | }
263 | while (a[j] > pivot) {
264 | j = j - 1;
265 | }
266 | if (i <= j) {
267 | double t = a[i];
268 | a[i] = a[j];
269 | a[j] = t;
270 | i = i + 1;
271 | j = j - 1;
272 | }
273 | }
274 |
275 | // Recursion for quicksort
276 | if (lo < j) {
277 | quicksort(a, lo, j);
278 | }
279 | lo = i;
280 | j = hi;
281 | }
282 | }
283 |
284 | protected double pisum() {
285 | double sum = 0.0;
286 | for (int j=0; j<500; ++j) {
287 | sum = 0.0;
288 | for (int k=1; k<=10000; ++k) {
289 | sum += 1.0/(k*k);
290 | }
291 | }
292 | return sum;
293 | }
294 |
295 | private int mandel(double zReal, double zImag) {
296 | int n = 0;
297 | double cReal = zReal;
298 | double cImag = zImag;
299 | for (n=0; n<=79; ++n) {
300 | if (complexAbs2(zReal,zImag) > 4.0) {
301 | n -= 1;
302 | break;
303 | }
304 |
305 | // z^2
306 | double zSquaredReal = zReal*zReal-zImag*zImag;
307 | double zSquaredImag = zReal*zImag+zImag*zReal;
308 |
309 | // +c
310 | zReal = zSquaredReal+cReal;
311 | zImag = zSquaredImag+cImag;
312 |
313 | }
314 | return n+1;
315 | }
316 |
317 | private double complexAbs(double zReal, double zImag) {
318 | return Math.sqrt(zReal*zReal + zImag*zImag);
319 | }
320 |
321 | private double complexAbs2(double zReal, double zImag) {
322 | return zReal*zReal + zImag*zImag;
323 | }
324 |
325 | protected int mandelperf() {
326 | int mandel_sum = 0;
327 | for (double re=-2.0; re<=0.5; re+=0.1) {
328 | for (double im=-1.0; im<=1.0; im+=0.1) {
329 | int m = mandel(re,im);
330 | mandel_sum += m;
331 | }
332 | }
333 | return mandel_sum;
334 | }
335 |
336 | protected void print_perf(String name, long t) {
337 | System.out.printf("java,%s,%.6f\n", name, t/(double)1E6);
338 | }
339 |
340 | protected int fib(int n) {
341 | return n < 2 ? n : fib(n-1) + fib(n-2);
342 | }
343 |
344 | }
345 |
346 |
--------------------------------------------------------------------------------
/lua/lua-install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Install lua-sci-lang as recommended via ulua
3 |
4 | wget https://ulua.io/download/ulua~latest.zip
5 | unzip ulua~latest.zip
6 | sed -i 's/noconfirm = false,/noconfirm = true,/g' ulua/host/config.lua
7 | ulua/bin/upkg add time
8 | ulua/bin/upkg add sci
9 | ulua/bin/upkg add sci-lang
10 |
--------------------------------------------------------------------------------
/perf.R:
--------------------------------------------------------------------------------
1 | require(compiler)
2 |
3 | assert = function(bool) {
4 | if (!bool) stop('Assertion failed')
5 | }
6 |
7 | timeit = function(name, f, ..., times=5) {
8 | tmin = Inf
9 | f = cmpfun(f)
10 | for (t in 1:times) {
11 | t = system.time(f(...))["elapsed"]
12 | if (t < tmin) tmin = t
13 | }
14 | cat(sprintf("r,%s,%.8f\n", name, tmin*1000))
15 | }
16 |
17 | ## fib ##
18 |
19 | fib = function(n) {
20 | if (n < 2) {
21 | return(n)
22 | } else {
23 | return(fib(n-1) + fib(n-2))
24 | }
25 | }
26 |
27 | assert(fib(20) == 6765)
28 | timeit("recursion_fibonacci", fib, 20)
29 |
30 | ## parse_int ##
31 |
32 | parseintperf = function(t) {
33 | for (i in 1:t) {
34 | # R doesn't support uint32 values
35 | n = floor(runif(1, min=0, max=2^31-1))
36 | s = sprintf("0x%x", n)
37 | m = as.numeric(s)
38 | assert(m == n)
39 | }
40 | }
41 |
42 | timeit("parse_integers", parseintperf, 1000)
43 |
44 | printfdperf = function(t) {
45 | fd<-file("/dev/null")
46 | on.exit(close(fd))
47 | for (i in 1:t) {
48 | s = sprintf("%d %d", i, i+1)
49 | writeLines(s, fd)
50 | }
51 | }
52 |
53 | timeit("print_to_file", printfdperf, 100000)
54 |
55 | ## quicksort ##
56 |
57 | qsort = function(a) {
58 | qsort_kernel = function(lo, hi) {
59 | i = lo
60 | j = hi
61 | while (i < hi) {
62 | pivot = a[floor((lo+hi)/2)]
63 | while (i <= j) {
64 | while (a[i] < pivot) i = i + 1
65 | while (a[j] > pivot) j = j - 1
66 | if (i <= j) {
67 | t = a[i]
68 | a[i] <<- a[j]
69 | a[j] <<- t
70 | i = i + 1;
71 | j = j - 1;
72 | }
73 | }
74 | if (lo < j) qsort_kernel(lo, j)
75 | lo = i
76 | j = hi
77 | }
78 | }
79 | qsort_kernel(1, length(a))
80 | return(a)
81 | }
82 |
83 | sortperf = function(n) {
84 | v = runif(n)
85 | return(qsort(v))
86 | }
87 |
88 | assert(!is.unsorted(sortperf(5000)))
89 | timeit('recursion_quicksort', sortperf, 5000)
90 |
91 | ## mandel ##
92 | Mod2 = function(z) {
93 | return(Re(z)*Re(z) + Im(z)*Im(z))
94 | }
95 |
96 | mandel = function(z) {
97 | c = z
98 | maxiter = 80
99 | for (n in 1:maxiter) {
100 | if (Mod2(z) > 4) return(n-1)
101 | z = z^2+c
102 | }
103 | return(maxiter)
104 | }
105 |
106 | mandelperf = function() {
107 | re = seq(-2,0.5,.1)
108 | im = seq(-1,1,.1)
109 | M = matrix(0.0,nrow=length(re),ncol=length(im))
110 | count = 1
111 | for (r in re) {
112 | for (i in im) {
113 | M[count] = mandel(complex(real=r,imag=i))
114 | count = count + 1
115 | }
116 | }
117 | return(M)
118 | }
119 |
120 | assert(sum(mandelperf()) == 14791)
121 | timeit("userfunc_mandelbrot", mandelperf)
122 |
123 | ## pi_sum ##
124 |
125 | pisum = function() {
126 | t = 0.0
127 | for (j in 1:500) {
128 | t = 0.0
129 | for (k in 1:10000) {
130 | t = t + 1.0/(k*k)
131 | }
132 | }
133 | return(t)
134 | }
135 |
136 | assert(abs(pisum()-1.644834071848065) < 1e-12);
137 | timeit("iteration_pi_sum", pisum, times=1)
138 |
139 | ## pi_sum_vec ##
140 |
141 | pisumvec = function() {
142 | r = 1:10000
143 | return(replicate(500, sum(1/((r)^2)))[1])
144 | }
145 |
146 | #assert(abs(pisumvec()-1.644834071848065) < 1e-12);
147 | #timeit("pi_sum_vec", pisumvec, times=10)
148 |
149 | ## rand_mat_stat ##
150 |
151 | randmatstat = function(t) {
152 | n = 5
153 | v = matrix(0, nrow=t)
154 | w = matrix(0, nrow=t)
155 | for (i in 1:t) {
156 | a = matrix(rnorm(n*n), ncol=n, nrow=n)
157 | b = matrix(rnorm(n*n), ncol=n, nrow=n)
158 | c = matrix(rnorm(n*n), ncol=n, nrow=n)
159 | d = matrix(rnorm(n*n), ncol=n, nrow=n)
160 | P = cbind(a,b,c,d)
161 | Q = rbind(cbind(a,b),cbind(c,d))
162 | v[i] = sum(diag((t(P)%*%P)^4))
163 | w[i] = sum(diag((t(Q)%*%Q)^4))
164 | }
165 | s1 = apply(v,2,sd)/mean(v)
166 | s2 = apply(w,2,sd)/mean(w)
167 | return(c(s1,s2))
168 | }
169 |
170 | timeit("matrix_statistics", randmatstat, 1000)
171 |
172 | ## rand_mat_mul ##
173 |
174 | randmatmul = function(n) {
175 | A = matrix(runif(n*n), ncol=n, nrow=n)
176 | B = matrix(runif(n*n), ncol=n, nrow=n)
177 | return(A %*% B)
178 | }
179 |
180 | assert(randmatmul(1000)[1] >= 0)
181 | timeit("matrix_multiply", randmatmul, 1000)
182 |
--------------------------------------------------------------------------------
/perf.c:
--------------------------------------------------------------------------------
1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license
2 |
3 | #include
4 |
5 | // include header file generated by make:
6 | #define DSFMT_MEXP 19937
7 | #include "perf.h"
8 | #include "randmtzig.c"
9 |
10 | double *myrand(int n) {
11 | double *d = (double *)malloc(n*sizeof(double));
12 | dsfmt_gv_fill_array_close_open(d, n);
13 | return d;
14 | }
15 |
16 | #define NITER 5
17 |
18 | double clock_now()
19 | {
20 | struct timeval now;
21 |
22 | gettimeofday(&now, NULL);
23 | return (double)now.tv_sec + (double)now.tv_usec/1.0e6;
24 | }
25 |
26 | int fib(int n) {
27 | return n < 2 ? n : fib(n-1) + fib(n-2);
28 | }
29 |
30 | long parse_int(const char *s, long base) {
31 | long n = 0;
32 | for (; *s; ++s) {
33 | char c = *s;
34 | long d = 0;
35 | if (c >= '0' && c <= '9') d = c-'0';
36 | else if (c >= 'A' && c <= 'Z') d = c-'A' + (int) 10;
37 | else if (c >= 'a' && c <= 'z') d = c-'a' + (int) 10;
38 | else exit(-1);
39 |
40 | if (base <= d) exit(-1);
41 | n = n*base + d;
42 | }
43 | return n;
44 | }
45 |
46 | double *ones(int m, int n) {
47 | double *a = (double *) malloc(m*n*sizeof(double));
48 | for (int k=0; k 4.0) {
69 | return n;
70 | }
71 | z = z*z+c;
72 | }
73 | return maxiter;
74 | }
75 |
76 | int *mandelperf() {
77 | int *M = (int*) malloc(21*26*sizeof(int));
78 | for (int i = 0; i < 21; i++) {
79 | for (int j = 0; j < 26; j++) {
80 | M[26*i + j] = mandel((j-20)/10.0 + ((i-10)/10.0)*I);
81 | }
82 | }
83 | return M;
84 | }
85 |
86 | void quicksort(double *a, int lo, int hi) {
87 | int i = lo;
88 | int j = hi;
89 | while (i < hi) {
90 | double pivot = a[(lo+hi)/2];
91 | // Partition
92 | while (i <= j) {
93 | while (a[i] < pivot) {
94 | i = i + 1;
95 | }
96 | while (a[j] > pivot) {
97 | j = j - 1;
98 | }
99 | if (i <= j) {
100 | double t = a[i];
101 | a[i] = a[j];
102 | a[j] = t;
103 | i = i + 1;
104 | j = j - 1;
105 | }
106 | }
107 |
108 | // Recursion for quicksort
109 | if (lo < j) {
110 | quicksort(a, lo, j);
111 | }
112 | lo = i;
113 | j = hi;
114 | }
115 | }
116 |
117 | double pisum() {
118 | double sum = 0.0;
119 | for (int j=0; j<500; ++j) {
120 | sum = 0.0;
121 | for (int k=1; k<=10000; ++k) {
122 | sum += 1.0/(k*k);
123 | }
124 | }
125 | return sum;
126 | }
127 |
128 | struct double_pair { double s1, s2; };
129 |
130 | static void randmtzig_fill_randn(dsfmt_t *dsfmt, double *a, int size) {
131 | for (int i=0; i 0)
164 |
165 | hexchar(i:i) = table(iand(quotient,15))
166 | i = i-1
167 |
168 | quotient = ishft(quotient,-4)
169 | end do
170 |
171 | end subroutine hex_string
172 |
173 | end module
174 |
175 |
176 |
177 | module bench
178 | use utils, only: trace, randn, std, mean, stop_error
179 | use types, only: dp
180 | implicit none
181 | private
182 | public fib, parse_int, printfd, quicksort, mandelperf, pisum, randmatstat, randmatmul
183 |
184 | contains
185 |
186 | integer recursive function fib(n) result(r)
187 | integer, intent(in) :: n
188 | if (n < 2) then
189 | r = n
190 | else
191 | r = fib(n-1) + fib(n-2)
192 | end if
193 | end function
194 |
195 | integer function parse_int(s, base) result(n)
196 | character(len=*), intent(in) :: s
197 | integer, intent(in) :: base
198 | integer :: i, d
199 | character :: c
200 | n = 0
201 | do i = 1, len(s)
202 | c = s(i:i)
203 | d = 0
204 | if (ichar(c) >= ichar('0') .and. ichar(c) <= ichar('9')) then
205 | d = ichar(c) - ichar('0')
206 | else if (ichar(c) >= ichar('A') .and. ichar(c) <= ichar('Z')) then
207 | d = ichar(c) - ichar('A') + 10
208 | else if (ichar(c) >= ichar('a') .and. ichar(c) <= ichar('z')) then
209 | d = ichar(c) - ichar('a') + 10
210 | else
211 | call stop_error("parse_int 1")
212 | end if
213 |
214 | if (base <= d) call stop_error("parse_int 2")
215 | n = n*base + d
216 | end do
217 |
218 | end function
219 |
220 | subroutine printfd(n)
221 | integer, intent(in) :: n
222 | integer :: i , unit
223 | open(unit=1, file="/dev/null")
224 | do i = 1, n
225 | write(unit=1, fmt=*) i, i+1
226 | end do
227 | close(unit=1)
228 | end subroutine
229 |
230 | real(dp) function abs2(z) result(r)
231 | complex(dp), intent(in) :: z
232 | r = real(z)*real(z) + imag(z)*imag(z);
233 | end function
234 |
235 | integer function mandel(z0) result(r)
236 | complex(dp), intent(in) :: z0
237 | complex(dp) :: c, z
238 | integer :: n, maxiter
239 | maxiter = 80
240 | z = z0
241 | c = z0
242 | do n = 1, maxiter
243 | if (abs2(z) > 4) then
244 | r = n-1
245 | return
246 | end if
247 | z = z**2 + c
248 | end do
249 | r = maxiter
250 | end function
251 |
252 | integer function mandelperf() result(mandel_sum)
253 | integer :: re, im
254 | volatile :: mandel_sum
255 | mandel_sum = 0
256 | re = -20
257 | do while (re <= 5)
258 | im = -10
259 | do while (im <= 10)
260 | mandel_sum = mandel_sum + mandel(cmplx(re/10._dp, im/10._dp, dp))
261 | im = im + 1
262 | end do
263 | re = re + 1
264 | end do
265 | end function
266 |
267 | recursive subroutine quicksort(a, lo0, hi)
268 | real(dp), intent(inout) :: a(:)
269 | integer, intent(in) :: lo0, hi
270 | integer :: i, j, lo
271 | real(dp) :: pivot, t
272 | lo = lo0
273 | i = lo
274 | j = hi
275 | do while (i < hi)
276 | pivot = a((lo+hi)/2)
277 | do while (i <= j)
278 | do while (a(i) < pivot)
279 | i = i + 1
280 | end do
281 | do while (a(j) > pivot)
282 | j = j - 1
283 | end do
284 | if (i <= j) then
285 | t = a(i)
286 | a(i) = a(j)
287 | a(j) = t
288 | i = i + 1
289 | j = j - 1
290 | end if
291 | end do
292 | if (lo < j) call quicksort(a, lo, j)
293 | lo = i
294 | j = hi
295 | end do
296 | end subroutine
297 |
298 | real(dp) function pisum() result(s)
299 | integer :: j, k
300 | do j = 1, 500
301 | s = 0
302 | do k = 1, 10000
303 | s = s + 1._dp / k**2
304 | end do
305 | end do
306 | end function
307 |
308 | subroutine randmatstat(t, s1, s2)
309 | integer, intent(in) :: t
310 | real(dp), intent(out) :: s1, s2
311 | real(dp), allocatable, dimension(:, :) :: a, b, c, d, P, Q, X
312 | real(dp), allocatable :: v(:), w(:)
313 | integer :: n, i
314 | n = 5
315 | allocate(a(n, n), b(n, n), c(n, n), d(n, n))
316 | allocate(P(4*n, n), Q(2*n, 2*n), X(2*n, 2*n))
317 | allocate(v(t), w(t))
318 | do i = 1, t
319 | call randn(a)
320 | call randn(b)
321 | call randn(c)
322 | call randn(d)
323 | P(:n, :)=a; P(n+1:2*n, :)=b; P(2*n+1:3*n, :)=c; P(3*n+1:, :)=d
324 | Q(:n, :n) = a; Q(n+1:, :n) = b
325 | Q(:n, n+1: ) = c; Q(n+1:, n+1: ) = d
326 | X = matmul(transpose(P), P)
327 | X = matmul(X, X)
328 | X = matmul(X, X)
329 | v(i) = trace(X)
330 | X = matmul(transpose(Q), Q)
331 | X = matmul(X, X)
332 | X = matmul(X, X)
333 | w(i) = trace(X)
334 | end do
335 | s1 = std(v) / mean(v)
336 | s2 = std(w) / mean(w)
337 | end subroutine
338 |
339 | subroutine randmatmul(n, C)
340 | integer, intent(in) :: n
341 | real(dp), intent(out), allocatable :: C(:, :)
342 | real(dp), allocatable :: A(:, :), B(:, :)
343 | allocate(A(n, n), B(n, n), C(n, n))
344 | call random_number(A)
345 | call random_number(B)
346 | !C = matmul(A, B)
347 | call dgemm('N','N',n,n,n,1.0d0,A,n,B,n,0.0d0,C,n)
348 | end subroutine
349 |
350 | end module
351 |
352 | program perf
353 | use types, only: dp, i64
354 | use utils, only: assert, init_random_seed, sysclock2ms, hex_string
355 | use bench, only: fib, parse_int, printfd, quicksort, mandelperf, pisum, randmatstat, &
356 | randmatmul
357 | implicit none
358 |
359 | integer, parameter :: NRUNS = 1000
360 | integer :: i, f, n, m, k, k2
361 | integer(i64) :: t1, t2, tmin
362 | real(dp) :: pi, s1, s2
363 | real(dp), allocatable :: C(:, :), d(:)
364 | character(len=11) :: s
365 |
366 | call init_random_seed()
367 |
368 | tmin = huge(0_i64)
369 | do i = 1, 5
370 | call system_clock(t1)
371 | do k = 1, NRUNS
372 | f = fib(20)
373 | end do
374 | call system_clock(t2)
375 | if (t2-t1 < tmin) tmin = t2-t1
376 | end do
377 | call assert(f == 6765)
378 | print "('fortran,recursion_fibonacci,',f0.6)", sysclock2ms(tmin) / NRUNS
379 |
380 | tmin = huge(0_i64)
381 | do i = 1, 5
382 | call system_clock(t1)
383 | do k2 = 1, NRUNS
384 | do k = 1, 1000
385 | call random_number(s1)
386 | n = int(s1*huge(n))
387 | call hex_string(n,s)
388 | m = parse_int(s(:len_trim(s)), 16)
389 | call assert(m == n)
390 | end do
391 | end do
392 | call system_clock(t2)
393 | if (t2-t1 < tmin) tmin = t2-t1
394 | end do
395 | print "('fortran,parse_integers,',f0.6)", sysclock2ms(tmin) / NRUNS
396 |
397 | tmin = huge(0_i64)
398 | do i = 1, 5
399 | call system_clock(t1)
400 | call printfd(100000)
401 | call system_clock(t2)
402 | if (t2-t1 < tmin) tmin = t2-t1
403 | end do
404 | print "('fortran,print_to_file,',f0.6)", sysclock2ms(tmin)
405 |
406 |
407 | tmin = huge(0_i64)
408 | do i = 1, 5
409 | call system_clock(t1)
410 | do k = 1, NRUNS
411 | f = mandelperf()
412 | end do
413 | call system_clock(t2)
414 | if (t2-t1 < tmin) tmin = t2-t1
415 | end do
416 | call assert(f == 14791)
417 | print "('fortran,userfunc_mandelbrot,',f0.6)", sysclock2ms(tmin) / NRUNS
418 |
419 | tmin = huge(0_i64)
420 | do i = 1, 5
421 | call system_clock(t1)
422 | do k = 1, NRUNS
423 | allocate(d(5000))
424 | call random_number(d)
425 | call quicksort(d, 1, size(d))
426 | deallocate(d)
427 | end do
428 | call system_clock(t2)
429 | if (t2-t1 < tmin) tmin = t2-t1
430 | end do
431 | print "('fortran,recursion_quicksort,',f0.6)", sysclock2ms(tmin) / NRUNS
432 |
433 | tmin = huge(0_i64)
434 | do i = 1, 5
435 | call system_clock(t1)
436 | pi = pisum()
437 | call system_clock(t2)
438 | if (t2-t1 < tmin) tmin = t2-t1
439 | end do
440 | call assert(abs(pi - 1.644834071848065_dp) < 1e-6_dp)
441 | print "('fortran,iteration_pi_sum,',f0.6)", sysclock2ms(tmin)
442 |
443 | tmin = huge(0_i64)
444 | do i = 1, 5
445 | call system_clock(t1)
446 | call randmatstat(1000, s1, s2)
447 | call system_clock(t2)
448 | if (t2-t1 < tmin) tmin = t2-t1
449 | end do
450 | ! call assert(s1 > 0.5_dp .and. s1 < 1)
451 | ! call assert(s2 > 0.5_dp .and. s2 < 1)
452 | print "('fortran,matrix_statistics,',f0.6)", sysclock2ms(tmin)
453 |
454 | tmin = huge(0_i64)
455 | do i = 1, 5
456 | call system_clock(t1)
457 | call randmatmul(1000, C)
458 | call assert(C(1, 1) >= 0)
459 | call system_clock(t2)
460 | if (t2-t1 < tmin) tmin = t2-t1
461 | end do
462 | print "('fortran,matrix_multiply,',f0.6)", sysclock2ms(tmin)
463 |
464 | end program
465 |
--------------------------------------------------------------------------------
/perf.go:
--------------------------------------------------------------------------------
1 | // Implementation of the Julia benchmark suite in Go.
2 | //
3 | // Three gonum packages must be installed, and then an additional environment
4 | // variable must be set to use the BLAS installation.
5 | // To install the gonum packages, run:
6 | // go get gonum.org/v1/netlib/blas/netlib
7 | // go get gonum.org/v1/gonum/mat
8 | // go get gonum.org/v1/gonum/stat
9 | // The cgo ldflags must then be set to use the BLAS implementation. As an example,
10 | // download OpenBLAS to ~/software
11 | // git clone https://github.com/xianyi/OpenBLAS
12 | // cd OpenBLAS
13 | // make
14 | // Then edit the environment variable to have
15 | // export CGO_LDFLAGS="-L/$HOME/software/OpenBLAS -lopenblas"
16 | package main
17 |
18 | import (
19 | "bufio"
20 | "errors"
21 | "fmt"
22 | "log"
23 | "math"
24 | "math/rand"
25 | "os"
26 | "strconv"
27 | "testing"
28 |
29 | "gonum.org/v1/gonum/mat"
30 | "gonum.org/v1/gonum/stat"
31 | "gonum.org/v1/netlib/blas/netlib"
32 | )
33 |
34 | func init() {
35 | // Use the BLAS implementation specified in CGO_LDFLAGS. This line can be
36 | // commented out to use the native Go BLAS implementation found in
37 | // gonum.org/v1/gonum/blas/gonum.
38 | //blas64.Use(gonum.Implementation{})
39 |
40 | // These are here so that toggling the BLAS implementation does not make imports unused
41 | _ = netlib.Implementation{}
42 | }
43 |
44 | // fibonacci
45 |
46 | func fib(n int) int {
47 | if n < 2 {
48 | return n
49 | }
50 | return fib(n-1) + fib(n-2)
51 | }
52 |
53 | // print to file descriptor
54 |
55 | func printfd(n int) {
56 | f, err := os.Create("/dev/null")
57 | if err != nil {
58 | panic(err)
59 | }
60 | defer f.Close()
61 | w := bufio.NewWriter(f)
62 |
63 | for i := 0; i < n; i++ {
64 | _, err = fmt.Fprintf(w, "%d %d\n", i, i+1)
65 | }
66 | w.Flush()
67 | f.Close()
68 | }
69 |
70 | // quicksort
71 |
72 | func qsort_kernel(a []float64, lo, hi int) []float64 {
73 | i := lo
74 | j := hi
75 | for i < hi {
76 | pivot := a[(lo+hi)/2]
77 | for i <= j {
78 | for a[i] < pivot {
79 | i += 1
80 | }
81 | for a[j] > pivot {
82 | j -= 1
83 | }
84 | if i <= j {
85 | a[i], a[j] = a[j], a[i]
86 | i += 1
87 | j -= 1
88 | }
89 | }
90 | if lo < j {
91 | qsort_kernel(a, lo, j)
92 | }
93 | lo = i
94 | j = hi
95 | }
96 | return a
97 | }
98 |
99 | var rnd = rand.New(rand.NewSource(1))
100 |
101 | // randmatstat
102 |
103 | func randmatstat(t int) (float64, float64) {
104 | n := 5
105 | v := make([]float64, t)
106 | w := make([]float64, t)
107 | ad := make([]float64, n*n)
108 | bd := make([]float64, n*n)
109 | cd := make([]float64, n*n)
110 | dd := make([]float64, n*n)
111 | P := mat.NewDense(n, 4*n, nil)
112 | Q := mat.NewDense(2*n, 2*n, nil)
113 | pTmp := mat.NewDense(4*n, 4*n, nil)
114 | qTmp := mat.NewDense(2*n, 2*n, nil)
115 | for i := 0; i < t; i++ {
116 | for i := range ad {
117 | ad[i] = rnd.NormFloat64()
118 | bd[i] = rnd.NormFloat64()
119 | cd[i] = rnd.NormFloat64()
120 | dd[i] = rnd.NormFloat64()
121 | }
122 | a := mat.NewDense(n, n, ad)
123 | b := mat.NewDense(n, n, bd)
124 | c := mat.NewDense(n, n, cd)
125 | d := mat.NewDense(n, n, dd)
126 | P.Copy(a)
127 | P.Slice(0, n, n, n+n).(*mat.Dense).Copy(b)
128 | P.Slice(0, n, 2*n, 3*n).(*mat.Dense).Copy(c)
129 | P.Slice(0, n, 3*n, 4*n).(*mat.Dense).Copy(d)
130 |
131 | Q.Copy(a)
132 | Q.Slice(0, n, n, 2*n).(*mat.Dense).Copy(b)
133 | Q.Slice(n, 2*n, 0, n).(*mat.Dense).Copy(c)
134 | Q.Slice(n, 2*n, n, 2*n).(*mat.Dense).Copy(d)
135 |
136 | pTmp.Mul(P.T(), P)
137 | pTmp.Pow(pTmp, 4)
138 |
139 | qTmp.Mul(Q.T(), Q)
140 | qTmp.Pow(qTmp, 4)
141 |
142 | v[i] = mat.Trace(pTmp)
143 | w[i] = mat.Trace(qTmp)
144 | }
145 | mv, stdv := stat.MeanStdDev(v, nil)
146 | mw, stdw := stat.MeanStdDev(v, nil)
147 | return stdv / mv, stdw / mw
148 | }
149 |
150 | // randmatmul
151 |
152 | func randmatmul(n int) *mat.Dense {
153 | aData := make([]float64, n*n)
154 | for i := range aData {
155 | aData[i] = rnd.Float64()
156 | }
157 | a := mat.NewDense(n, n, aData)
158 |
159 | bData := make([]float64, n*n)
160 | for i := range bData {
161 | bData[i] = rnd.Float64()
162 | }
163 | b := mat.NewDense(n, n, bData)
164 | var c mat.Dense
165 | c.Mul(a, b)
166 | return &c
167 | }
168 |
169 | // mandelbrot
170 | func abs2(z complex128) float64 {
171 | return real(z)*real(z) + imag(z)*imag(z)
172 | }
173 | func mandel(z complex128) int {
174 | maxiter := 80
175 | c := z
176 | for n := 0; n < maxiter; n++ {
177 | if abs2(z) > 4 {
178 | return n
179 | }
180 | z = z*z + c
181 | }
182 | return maxiter
183 | }
184 |
185 | // mandelperf
186 |
187 | func mandelperf() int {
188 | mandel_sum := 0
189 | // These loops are constructed as such because mandel is very sensitive to
190 | // its input and this avoids very small floating point issues.
191 | for re := -20.0; re <= 5; re += 1 {
192 | for im := -10.0; im <= 10; im += 1 {
193 | m := mandel(complex(re/10, im/10))
194 | mandel_sum += m
195 | }
196 | }
197 | return mandel_sum
198 | }
199 |
200 | // pisum
201 |
202 | func pisum() float64 {
203 | var sum float64
204 | for i := 0; i < 500; i++ {
205 | sum = 0.0
206 | for k := 1.0; k <= 10000; k += 1 {
207 | sum += 1.0 / (k * k)
208 | }
209 | }
210 | return sum
211 | }
212 |
213 | func print_perf(name string, time float64) {
214 | fmt.Printf("go,%v,%v\n", name, time*1000)
215 | }
216 |
217 | // run tests
218 |
219 | func assert(b *testing.B, t bool) {
220 | if t != true {
221 | b.Fatal("assert failed")
222 | }
223 | }
224 |
225 | func main() {
226 | for _, bm := range benchmarks {
227 | seconds, err := runBenchmarkFor(bm.fn)
228 | if err != nil {
229 | log.Fatalf("%s %s", bm.name, err)
230 | }
231 | print_perf(bm.name, seconds)
232 | }
233 | }
234 |
235 | func runBenchmarkFor(fn func(*testing.B)) (seconds float64, err error) {
236 | bm := testing.Benchmark(fn)
237 | if (bm.N == 0) {
238 | return 0, errors.New("failed")
239 | }
240 | return bm.T.Seconds() / float64(bm.N), nil
241 | }
242 |
243 | var benchmarks = []struct {
244 | name string
245 | fn func(*testing.B)
246 | }{
247 | {
248 | name: "recursion_fibonacci",
249 | fn: func(b *testing.B) {
250 | for i := 0; i < b.N; i++ {
251 | if fib(20) != 6765 {
252 | b.Fatal("unexpected value for fib(20)")
253 | }
254 | }
255 | },
256 | },
257 |
258 | {
259 | name: "parse_integers",
260 | fn: func(b *testing.B) {
261 | for i := 0; i < b.N; i++ {
262 | for k := 0; k < 1000; k++ {
263 | n := rnd.Uint32()
264 | m, _ := strconv.ParseUint(strconv.FormatUint(uint64(n), 16), 16, 32)
265 | if uint32(m) != n {
266 | b.Fatal("incorrect value for m")
267 | }
268 | }
269 | }
270 | },
271 | },
272 |
273 | {
274 | name: "userfunc_mandelbrot",
275 | fn: func(b *testing.B) {
276 | for i := 0; i < b.N; i++ {
277 | if mandelperf() != 14791 {
278 | b.Fatal("unexpected value for mandelperf")
279 | }
280 | }
281 | },
282 | },
283 |
284 | {
285 | name: "print_to_file",
286 | fn: func(b *testing.B) {
287 | for i := 0; i < b.N; i++ {
288 | printfd(100000)
289 | }
290 | },
291 | },
292 |
293 | {
294 | name: "recursion_quicksort",
295 | fn: func(b *testing.B) {
296 | lst := make([]float64, 5000)
297 | b.ResetTimer()
298 | for i := 0; i < b.N; i++ {
299 | for k := range lst {
300 | lst[k] = rnd.Float64()
301 | }
302 | qsort_kernel(lst, 0, len(lst)-1)
303 | }
304 | },
305 | },
306 |
307 | {
308 | name: "iteration_pi_sum",
309 | fn: func(b *testing.B) {
310 | for i := 0; i < b.N; i++ {
311 | if math.Abs(pisum()-1.644834071848065) >= 1e-6 {
312 | b.Fatal("pi_sum out of range")
313 | }
314 | }
315 | },
316 | },
317 |
318 | {
319 | name: "matrix_statistics",
320 | fn: func(b *testing.B) {
321 | for i := 0; i < b.N; i++ {
322 | c1, c2 := randmatstat(1000)
323 | assert(b, 0.5 < c1)
324 | assert(b, c1 < 1.0)
325 | assert(b, 0.5 < c2)
326 | assert(b, c2 < 1.0)
327 | }
328 | },
329 | },
330 |
331 | {
332 | name: "matrix_multiply",
333 | fn: func(b *testing.B) {
334 | for i := 0; i < b.N; i++ {
335 | c := randmatmul(1000)
336 | assert(b, c.At(0, 0) >= 0)
337 | }
338 | },
339 | },
340 | }
341 |
--------------------------------------------------------------------------------
/perf.jl:
--------------------------------------------------------------------------------
1 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license
2 |
3 | import LinearAlgebra
4 | import Test
5 | import Printf
6 | import Statistics
7 | import Base.Sys
8 |
9 | include("./perfutil.jl")
10 |
11 | ## recursive fib ##
12 |
13 | fib(n) = n < 2 ? n : fib(n-1) + fib(n-2)
14 |
15 | Test.@test fib(20) == 6765
16 | @timeit fib(20) "recursion_fibonacci" "Recursive fibonacci"
17 |
18 | ## parse integer ##
19 |
20 | function parseintperf(t)
21 | local n, m
22 | for i=1:t
23 | n = rand(UInt32)
24 | @static if VERSION >= v"0.7.0-DEV.4446"
25 | s = string(n, base = 16)
26 | m = UInt32(parse(Int64, s, base = 16))
27 | else
28 | s = hex(n)
29 | m = UInt32(parse(Int64, s, 16))
30 | end
31 | @assert m == n
32 | end
33 | return n
34 | end
35 |
36 | @timeit parseintperf(1000) "parse_integers" "Integer parsing"
37 |
38 | ## array constructors ##
39 |
40 | Test.@test all(fill(1.,200,200) .== 1)
41 |
42 | ## matmul and transpose ##
43 |
44 | A = fill(1.,200,200)
45 | Test.@test all(A*A' .== 200)
46 | # @timeit A*A' "AtA" "description"
47 |
48 | ## mandelbrot set: complex arithmetic and comprehensions ##
49 |
50 | function myabs2(z)
51 | return real(z)*real(z) + imag(z)*imag(z)
52 | end
53 |
54 | function mandel(z)
55 | c = z
56 | maxiter = 80
57 | for n = 1:maxiter
58 | if myabs2(z) > 4
59 | return n-1
60 | end
61 | z = z^2 + c
62 | end
63 | return maxiter
64 | end
65 |
66 | mandelperf() = [ mandel(complex(r,i)) for i=-1.:.1:1., r=-2.0:.1:0.5 ]
67 | Test.@test sum(mandelperf()) == 14791
68 | @timeit mandelperf() "userfunc_mandelbrot" "Calculation of mandelbrot set"
69 |
70 | ## numeric vector sort ##
71 |
72 | function qsort!(a,lo,hi)
73 | i, j = lo, hi
74 | while i < hi
75 | pivot = a[(lo+hi)>>>1]
76 | while i <= j
77 | while a[i] < pivot; i += 1; end
78 | while a[j] > pivot; j -= 1; end
79 | if i <= j
80 | a[i], a[j] = a[j], a[i]
81 | i, j = i+1, j-1
82 | end
83 | end
84 | if lo < j; qsort!(a,lo,j); end
85 | lo, j = i, hi
86 | end
87 | return a
88 | end
89 |
90 | sortperf(n) = qsort!(rand(n), 1, n)
91 | Test.@test issorted(sortperf(5000))
92 | @timeit sortperf(5000) "recursion_quicksort" "Sorting of random numbers using quicksort"
93 |
94 | ## slow pi series ##
95 |
96 | function pisum()
97 | sum = 0.0
98 | for j = 1:500
99 | sum = 0.0
100 | for k = 1:10000
101 | sum += 1.0/(k*k)
102 | end
103 | end
104 | sum
105 | end
106 |
107 | Test.@test abs(pisum()-1.644834071848065) < 1e-12
108 | @timeit pisum() "iteration_pi_sum" "Summation of a power series"
109 |
110 | ## slow pi series, vectorized ##
111 |
112 | function pisumvec()
113 | s = 0.0
114 | a = [1:10000]
115 | for j = 1:500
116 | s = sum(1 ./ (a.^2))
117 | end
118 | s
119 | end
120 |
121 | #@test abs(pisumvec()-1.644834071848065) < 1e-12
122 | #@timeit pisumvec() "pi_sum_vec"
123 |
124 | ## random matrix statistics ##
125 |
126 | function randmatstat(t)
127 | n = 5
128 | v = zeros(t)
129 | w = zeros(t)
130 | for i=1:t
131 | a = randn(n,n)
132 | b = randn(n,n)
133 | c = randn(n,n)
134 | d = randn(n,n)
135 | P = [a b c d]
136 | Q = [a b; c d]
137 | @static if VERSION >= v"0.7.0"
138 | v[i] = LinearAlgebra.tr((P'*P)^4)
139 | w[i] = LinearAlgebra.tr((Q'*Q)^4)
140 | else
141 | v[i] = trace((P'*P)^4)
142 | w[i] = trace((Q'*Q)^4)
143 | end
144 | end
145 | return (Statistics.std(v)/Statistics.mean(v), Statistics.std(w)/Statistics.mean(w))
146 | end
147 |
148 | (s1, s2) = randmatstat(1000)
149 | Test.@test 0.5 < s1 < 1.0 && 0.5 < s2 < 1.0
150 | @timeit randmatstat(1000) "matrix_statistics" "Statistics on a random matrix"
151 |
152 | ## largish random number gen & matmul ##
153 |
154 | @timeit rand(1000,1000)*rand(1000,1000) "matrix_multiply" "Multiplication of random matrices"
155 |
156 | ## printfd ##
157 |
158 | if Sys.isunix()
159 | function printfd(n)
160 | open("/dev/null", "w") do io
161 | for i = 1:n
162 | Printf.@printf(io, "%d %d\n", i, i + 1)
163 | end
164 | end
165 | end
166 |
167 | printfd(1)
168 | @timeit printfd(100000) "print_to_file" "Printing to a file descriptor"
169 | end
170 |
171 | #maxrss("micro")
172 |
--------------------------------------------------------------------------------
/perf.js:
--------------------------------------------------------------------------------
1 | const fs = require('fs'); // for print to file benchmark
2 |
3 | (function () {
4 | 'use strict';
5 |
6 | var tmin, i, j, t, n, m, s, a, sum, a0, v, r, C, filename, fd;
7 |
8 | function assert(t) { if (!t) { throw new Error("assertion failed"); } }
9 |
10 | // recursive fib //
11 |
12 | function fib(n) {
13 | if (n < 2) { return n; }
14 | return fib(n-1) + fib(n-2);
15 | }
16 |
17 | tmin = Number.POSITIVE_INFINITY;
18 | for (i=0; i < 5; i++) {
19 | t = (new Date()).getTime();
20 | for (j=0; j < 1000; j++) {
21 | assert(fib(20) === 6765);
22 | }
23 | t = (new Date()).getTime()-t;
24 | if (t < tmin) { tmin = t; }
25 | }
26 | console.log("javascript,recursion_fibonacci," + tmin/1000);
27 |
28 | // parse int //
29 |
30 | tmin = Number.POSITIVE_INFINITY;
31 | for (i=0; i < 5; i++) {
32 | t = (new Date()).getTime();
33 | for (j=0; j < 1000*100; j++) {
34 | n = Math.floor(4294967295*Math.random());
35 | s = n.toString(16);
36 | m = parseInt(s,16);
37 | assert(m === n);
38 | }
39 | t = (new Date()).getTime()-t;
40 | if (t < tmin) { tmin = t; }
41 | }
42 | console.log("javascript,parse_integers," + tmin/100);
43 |
44 |
45 | // print to file
46 |
47 | function printfd(n) {
48 | let f = fs.openSync("/dev/null", "w");
49 | for (let i = 1; i <= n; i++) {
50 | fs.writeSync(f, `${i} ${i + 1}\n`);
51 | }
52 | fs.closeSync(f);
53 | }
54 |
55 | tmin = Number.POSITIVE_INFINITY;
56 | for (i=0; i < 5; i++) {
57 | t = (new Date()).getTime();
58 | printfd(100000)
59 | t = (new Date()).getTime()-t;
60 | if (t < tmin) { tmin = t; }
61 | }
62 | console.log("javascript,print_to_file," + tmin);
63 |
64 | // mandelbrot set //
65 |
66 | function Complex(real, imag) {
67 | this.re = real;
68 | this.im = imag;
69 | }
70 | function complex_abs(z) {
71 | return Math.sqrt(z.re*z.re + z.im*z.im);
72 | }
73 | function complex_abs2(z) {
74 | return z.re*z.re + z.im*z.im;
75 | }
76 | function complex_add(z,w) {
77 | return new Complex(z.re+w.re, z.im+w.im);
78 | }
79 | function complex_multiply(z,w) {
80 | return new Complex(z.re*w.re-z.im*w.im, z.re*w.im+z.im*w.re);
81 | }
82 |
83 | function mandel(z) {
84 | var c, n, maxiter;
85 | c = z;
86 | maxiter = 80;
87 | n = 0;
88 | for (n = 0; n < maxiter; n++) {
89 | if (complex_abs2(z) > 4) { return n; }
90 | z = complex_add(complex_multiply(z,z),c);
91 | }
92 | return maxiter;
93 | }
94 |
95 | function mandelperf() {
96 | var a, r, re, i, im, z;
97 | a = new Array(26*21);
98 | r = 0;
99 | for (r = 0; r < 26; r++) {
100 | re = -2.0 + r*0.1;
101 | i = 0;
102 | for (i = 0; i < 21; i++) {
103 | im = -1.0 + i*0.1;
104 | z = new Complex(re,im);
105 | a[r*21+i] = mandel(z);
106 | }
107 | }
108 | return a;
109 | }
110 |
111 | a = mandelperf();
112 | i = 0;
113 | sum = 0;
114 | for (i = 0; i < a.length; i++) { sum += a[i]; }
115 | assert(sum === 14791);
116 | a0 = a[0];
117 |
118 | tmin = Number.POSITIVE_INFINITY;
119 | for (i=0; i < 5; i++) {
120 | t = (new Date()).getTime();
121 | for (j=0; j < 1000; j++) {
122 | a = mandelperf();
123 | assert(a[0] === a0);
124 | }
125 | t = (new Date()).getTime()-t;
126 | if (t < tmin) { tmin=t; }
127 | }
128 | console.log("javascript,userfunc_mandelbrot," + tmin/1000);
129 |
130 | // numeric vector sort //
131 |
132 | function rand(n) {
133 | var v, i;
134 | v = new Array(n);
135 |
136 | for (i = 0; i < n; i++) {
137 | v[i] = Math.random();
138 | }
139 |
140 | return v;
141 | }
142 |
143 | function qsort_kernel(a, lo, hi) {
144 | var i, j, pivot, t;
145 | i = lo;
146 | j = hi;
147 | while (i < hi) {
148 | pivot = a[Math.floor((lo+hi)/2)];
149 | while (i <= j) {
150 | while (a[i] < pivot) {
151 | i = i + 1;
152 | }
153 | while (a[j] > pivot) {
154 | j = j - 1;
155 | }
156 | if (i <= j) {
157 | t = a[i];
158 | a[i] = a[j];
159 | a[j] = t;
160 | i = i + 1;
161 | j = j - 1;
162 | }
163 | }
164 | if (lo < j) {
165 | qsort_kernel(a, lo, j);
166 | }
167 | lo = i;
168 | j = hi;
169 | }
170 | }
171 |
172 | function sortperf(n) {
173 | var v = rand(n);
174 | qsort_kernel(v, 0, n);
175 | return v;
176 | }
177 |
178 | tmin = Number.POSITIVE_INFINITY;
179 | for (i=0; i < 5; i++) {
180 | t = (new Date()).getTime();
181 | for (j=0; j < 100; j++) {
182 | v = sortperf(5000);
183 | assert(a[0] < 0.99);
184 | }
185 | t = (new Date()).getTime()-t;
186 | if (t < tmin) { tmin=t; }
187 | }
188 | console.log("javascript,recursion_quicksort," + tmin/100);
189 |
190 | // slow pi series //
191 |
192 | function pisum() {
193 | var sum, k;
194 | sum = 0.0;
195 | for (i=0; i < 500; i++) {
196 | sum = 0.0;
197 | for (k=1; k <= 10000; k++) {
198 | sum += 1.0/(k*k);
199 | }
200 | }
201 | return sum;
202 | }
203 |
204 | tmin = Number.POSITIVE_INFINITY;
205 | for (i=0; i < 5; i++) {
206 | t = (new Date()).getTime();
207 | for (j=0; j < 10; j++) {
208 | assert(Math.abs(pisum()-1.644834071848065) < 1e-12);
209 | }
210 | t = (new Date()).getTime()-t;
211 | if (t < tmin) { tmin=t; }
212 | }
213 | console.log("javascript,iteration_pi_sum," + tmin/10);
214 |
215 | // random matrix statistics //
216 |
217 | function gaussian() {
218 | var k, i, j;
219 | k = 2;
220 | do {
221 | i = 2*Math.random()-1;
222 | j = 2*Math.random()-1;
223 | k = i*i+j*j;
224 | } while (k >= 1);
225 | return i*Math.sqrt((-2*Math.log(k))/k);
226 | }
227 |
228 | function randn( a, sub ) {
229 | var subLen, len, i;
230 | subLen = sub.length;
231 | len = a.length;
232 |
233 | for (i = 0; i < subLen; i++) {
234 | a[i] = sub[i] = gaussian();
235 | }
236 |
237 | for (i = subLen; i < len; i++) {
238 | a[i] = gaussian();
239 | }
240 |
241 | return a;
242 | }
243 |
244 | function transpose(dest, src,m,n) {
245 | var i, j;
246 | i = 0;
247 | j = 0;
248 |
249 | for (i = 0; i < m; i++) {
250 | for (j = 0; j < n; j++) {
251 | dest[i*n+j] = src[j*m+i];
252 | }
253 | }
254 | }
255 |
256 | function matmulCopy( dest, A,B,m,l,n) {
257 | var i, j, k, sum;
258 | i = 0;
259 | j = 0;
260 | k = 0;
261 |
262 | for (i = 0; i < m; i++) {
263 | for (j = 0; j < n; j++) {
264 | sum = 0.0;
265 |
266 | for (k = 0; k < l; k++) {
267 | sum += A[i*l+k]*B[k*n+j];
268 | }
269 |
270 | dest[i*n+j] = sum;
271 | }
272 | }
273 | }
274 |
275 | function randmatstat(t) {
276 | var n, P, PTransposed, Pt1P, Pt2P, Q, QTransposed, Pt1Q, Pt2Q,
277 | a, b, c, d, aSub, bSub, cSub, dSub, v, w, i, j, k,
278 | trP, trQ, v1, v2, w1, w2;
279 | n = 5;
280 |
281 | P = new Float64Array( 4*n*n );
282 | Q = new Float64Array( 4*n*n );
283 |
284 | PTransposed = new Float64Array( P.length );
285 | QTransposed = new Float64Array( Q.length );
286 |
287 | Pt1P = new Float64Array( (4*n) * (4*n) );
288 | Pt2P = new Float64Array( (4*n) * (4*n) );
289 | Pt1Q = new Float64Array( (2*n) * (2*n) );
290 | Pt2Q = new Float64Array( (2*n) * (2*n) );
291 |
292 | a = new Float64Array( n*n );
293 | b = new Float64Array( n*n );
294 | c = new Float64Array( n*n );
295 | d = new Float64Array( n*n );
296 |
297 | // the first n number of elements of a to d
298 | aSub = new Float64Array( n );
299 | bSub = new Float64Array( n );
300 | cSub = new Float64Array( n );
301 | dSub = new Float64Array( n );
302 |
303 | v = new Float64Array( t );
304 | w = new Float64Array( t );
305 |
306 | i = 0;
307 | j = 0;
308 | k = 0;
309 |
310 | for (i = 0; i < t; i++) {
311 | a = randn( a, aSub );
312 | b = randn( b, bSub );
313 | c = randn( c, cSub );
314 | d = randn( d, dSub );
315 |
316 | P.set( a, 0*n*n );
317 | P.set( b, 1*n*n );
318 | P.set( c, 2*n*n );
319 | P.set( d, 3*n*n );
320 |
321 | for (j = 0; j < n; j++) {
322 | Q.set( aSub, 2*n*j );
323 | Q.set( bSub, 2*n*j+n );
324 | Q.set( cSub, 2*n*(n+j) );
325 | Q.set( dSub, 2*n*(n+j)+n );
326 | /*
327 | for (k = 0; k < n; k++) {
328 | Q[ 2*n*j + k ] = a[k];
329 | Q[ 2*n*j+n + k ] = b[k];
330 | Q[ 2*n*(n+j) + k ] = c[k];
331 | Q[ 2*n*(n+j)+n + k ] = d[k];
332 | }
333 | */
334 | }
335 |
336 | transpose( PTransposed, P, n, 4*n );
337 | matmulCopy( Pt1P, PTransposed, P, 4*n, n, 4*n );
338 | matmulCopy( Pt2P, Pt1P, Pt1P, 4*n, 4*n, 4*n);
339 | matmulCopy( Pt1P, Pt2P, Pt2P, 4*n, 4*n, 4*n);
340 |
341 | trP = 0;
342 | for (j = 0; j < 4*n; j++) {
343 | trP += Pt1P[(4*n+1)*j];
344 | }
345 | v[i] = trP;
346 |
347 | transpose( QTransposed, Q, 2*n, 2*n );
348 | matmulCopy( Pt1Q, QTransposed, Q, 2*n, 2*n, 2*n );
349 | matmulCopy( Pt2Q, Pt1Q, Pt1Q, 2*n, 2*n, 2*n);
350 | matmulCopy( Pt1Q, Pt2Q, Pt2Q, 2*n, 2*n, 2*n);
351 |
352 | trQ = 0;
353 | for (j = 0; j < 2*n; j++) {
354 | trQ += Pt1Q[(2*n+1)*j];
355 | }
356 | w[i] = trQ;
357 | }
358 |
359 | v1 = 0.0;
360 | v2 = 0.0;
361 | w1 = 0.0;
362 | w2 = 0.0;
363 | for (i = 0; i < t; i++) {
364 | v1 += v[i]; v2 += v[i]*v[i];
365 | w1 += w[i]; w2 += w[i]*w[i];
366 | }
367 |
368 | return {
369 | s1: Math.sqrt((t*(t*v2-v1*v1))/((t-1)*v1*v1)),
370 | s2: Math.sqrt((t*(t*w2-w1*w1))/((t-1)*w1*w1))
371 | };
372 | }
373 |
374 | tmin = Number.POSITIVE_INFINITY;
375 | for (i=0; i < 5; i++) {
376 | t = (new Date()).getTime();
377 | for (j=0; j < 10; j++) {
378 | r = randmatstat(1000);
379 | // assert(0.5 < r.s1 < 1.0);
380 | // assert(0.5 < r.s2 < 1.0);
381 | }
382 | t = (new Date()).getTime()-t;
383 | if (t < tmin) { tmin=t; }
384 | }
385 | console.log("javascript,matrix_statistics," + tmin/10);
386 |
387 | // random matrix multiply //
388 |
389 | function randFloat64(n) {
390 | var v, i;
391 | v = new Float64Array(n);
392 |
393 | for (i = 0; i < n; i++) {
394 | v[i] = Math.random();
395 | }
396 |
397 | return v;
398 | }
399 |
400 | // Transpose mxn matrix.
401 | function mattransp(A, m, n) {
402 | var i, j, T;
403 | T = new Float64Array(m * n);
404 |
405 | for (i = 0; i < m; ++i) {
406 | for (j = 0; j < n; ++j) {
407 | T[j * m + i] = A[i * n + j];
408 | }
409 | }
410 |
411 | return T;
412 | }
413 |
414 | function matmul(A,B,m,l,n) {
415 | var C, i, j, k, total;
416 | C = new Float64Array(m*n);
417 | i = 0;
418 | j = 0;
419 | k = 0;
420 |
421 | // Use the transpose of B so that
422 | // during the matrix multiplication
423 | // we access consecutive memory locations.
424 | // This is a fairer comparison of JS
425 | // with the other languages which call on
426 | // custom multiplication routines, which
427 | // likely make use of such aligned memory.
428 | B = mattransp(B,l,n);
429 |
430 | for (i = 0; i < m; i++) {
431 | for (j = 0; j < n; j++) {
432 | total = 0.0;
433 |
434 | for (k = 0; k < l; k++) {
435 | total += A[i*l+k]*B[j*l+k];
436 | }
437 |
438 | C[i*n+j] = total;
439 | }
440 | }
441 |
442 | return C;
443 | }
444 |
445 | function randmatmul(n) {
446 | var A, B;
447 | A = randFloat64(n*n);
448 | B = randFloat64(n*n);
449 |
450 | return matmul(A, B, n, n, n);
451 | }
452 |
453 | tmin = Number.POSITIVE_INFINITY;
454 | t = (new Date()).getTime();
455 | C = randmatmul(1000);
456 | assert(0 <= C[0]);
457 | t = (new Date()).getTime()-t;
458 | if (t < tmin) { tmin=t; }
459 | console.log("javascript,matrix_multiply," + tmin);
460 | }());
461 |
--------------------------------------------------------------------------------
/perf.lua:
--------------------------------------------------------------------------------
1 |
2 | if jit.arch ~= 'x64' then
3 | print('WARNING: please use BIT=64 for optimal OpenBLAS performance')
4 | end
5 |
6 | local ffi = require 'ffi'
7 | local bit = require 'bit'
8 | local time = require 'time'
9 | local alg = require 'sci.alg'
10 | local prng = require 'sci.prng'
11 | local stat = require 'sci.stat'
12 | local dist = require 'sci.dist'
13 | local complex = require 'sci.complex'
14 |
15 | local min, sqrt, random, abs = math.min, math.sqrt, math.random, math.abs
16 | local cabs = complex.abs
17 | local rshift = bit.rshift
18 | local format = string.format
19 | local nowutc = time.nowutc
20 | local rng = prng.std()
21 | local vec, mat, join = alg.vec, alg.mat, alg.join
22 | local sum, trace = alg.sum, alg.trace
23 | local var, mean = stat.var, stat.mean
24 |
25 | --------------------------------------------------------------------------------
26 | local function elapsed(f)
27 | local t0 = nowutc()
28 | local val1, val2 = f()
29 | local t1 = nowutc()
30 | return (t1 - t0):tomilliseconds(), val1, val2
31 | end
32 |
33 | local function timeit(f, name, check)
34 | local t, k, s = 1/0, 0, nowutc()
35 | while true do
36 | k = k + 1
37 | local tx, val1, val2 = elapsed(f)
38 | t = min(t, tx)
39 | if check then
40 | check(val1, val2)
41 | end
42 | if k > 5 and (nowutc() - s):toseconds() >= 2 then break end
43 | end
44 | io.write(format('lua,%s,%g\n', name, t))
45 | end
46 |
47 | --------------------------------------------------------------------------------
48 | local function fib(n)
49 | if n < 2 then
50 | return n
51 | else
52 | return fib(n-1) + fib(n-2)
53 | end
54 | end
55 |
56 | timeit(function() return fib(20) end, 'recursion_fibonacci', function(x) assert(x == 6765) end)
57 |
58 | local function parseint()
59 | local lmt = 2^32 - 1
60 | local n, m
61 | for i = 1, 1000 do
62 | n = random(lmt) -- Between 0 and 2^32 - 1, i.e. uint32_t.
63 | local s = format('0x%x', tonumber(n))
64 | m = tonumber(s)
65 | assert(n == m) -- Done here to be even with Julia benchmark.
66 | end
67 | return n, m
68 | end
69 |
70 | timeit(parseint, 'parse_integers')
71 |
72 | local function cabs2( z )
73 | return z[1]*z[1] + z[2]*z[2]
74 | end
75 |
76 | local function mandel(z)
77 | local c = z
78 | local maxiter = 80
79 | for n = 1, maxiter do
80 | if cabs2(z) > 4 then
81 | return n-1
82 | end
83 | z = z*z + c
84 | end
85 | return maxiter
86 | end
87 | local function mandelperf()
88 | local a = mat(26, 21)
89 | for r=1,26 do -- Lua's for i=l,u,c doesn't match Julia's for i=l:c:u.
90 | for c=1,21 do
91 | local re, im = (r - 21)*0.1, (c - 11)*0.1
92 | a[{r, c}] = mandel(re + im*1i)
93 | end
94 | end
95 | return a
96 | end
97 |
98 | timeit(mandelperf, 'userfunc_mandelbrot', function(a) assert(sum(a) == 14791) end)
99 |
100 | local function qsort(a, lo, hi)
101 | local i, j = lo, hi
102 | while i < hi do
103 | local pivot = a[rshift(lo+hi, 1)]
104 | while i <= j do
105 | while a[i] < pivot do i = i+1 end
106 | while a[j] > pivot do j = j-1 end
107 | if i <= j then
108 | a[i], a[j] = a[j], a[i]
109 | i, j = i+1, j-1
110 | end
111 | end
112 | if lo < j then qsort(a, lo, j) end
113 | lo, j = i, hi
114 | end
115 | return a
116 | end
117 |
118 | local function sortperf()
119 | local n = 5000
120 | local v = ffi.new('double[?]', n+1)
121 | for i=1,n do
122 | v[i] = rng:sample()
123 | end
124 | return qsort(v, 1, n)
125 | end
126 |
127 | timeit(sortperf, 'recursion_quicksort', function(x)
128 | for i=2,5000 do
129 | assert(x[i-1] <= x[i])
130 | end
131 | end
132 | )
133 |
134 | local function pisum()
135 | local s
136 | for j = 1, 500 do
137 | s = 0
138 | for k = 1, 10000 do
139 | s = s + 1 / (k*k)
140 | end
141 | end
142 | return s
143 | end
144 |
145 | timeit(pisum, 'iteration_pi_sum', function(x)
146 | assert(abs(x - 1.644834071848065) < 1e-12)
147 | end)
148 |
149 | local function rand(r, c)
150 | local x = mat(r, c)
151 | for i=1,#x do
152 | x[i] = rng:sample()
153 | end
154 | return x
155 | end
156 |
157 | local function randn(r, c)
158 | local x = mat(r, c)
159 | for i=1,#x do
160 | x[i] = dist.normal(0, 1):sample(rng)
161 | end
162 | return x
163 | end
164 |
165 | local function randmatstat(t)
166 | local n = 5
167 | local v, w = vec(t), vec(t)
168 | for i=1,t do
169 | local a, b, c, d = randn(n, n), randn(n, n), randn(n, n), randn(n, n)
170 | local P = join(a..b..c..d)
171 | local Q = join(a..b, c..d)
172 | v[i] = trace((P[]`**P[])^^4)
173 | w[i] = trace((Q[]`**Q[])^^4)
174 | end
175 | return sqrt(var(v))/mean(v), sqrt(var(w))/mean(w)
176 | end
177 |
178 | timeit(function() return randmatstat(1000) end, 'matrix_statistics',
179 | function(s1, s2)
180 | assert( 0.5 < s1 and s1 < 1.0 and 0.5 < s2 and s2 < 1.0 )
181 | end)
182 |
183 | local function randmatmult(n)
184 | local a, b = rand(n, n), rand(n, n)
185 | return a[]**b[]
186 | end
187 |
188 | timeit(function() return randmatmult(1000) end, 'matrix_multiply')
189 |
190 | if jit.os ~= 'Windows' then
191 | local function printfd(n)
192 | local f = io.open('/dev/null','w')
193 | for i = 1, n do
194 | f:write(format('%d %d\n', i, i+1))
195 | end
196 | f:close()
197 | end
198 |
199 | timeit(function() return printfd(100000) end, 'print_to_file')
200 | end
201 |
--------------------------------------------------------------------------------
/perf.m:
--------------------------------------------------------------------------------
1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 | %% Main function. All the tests are run here. %%
3 | %% The functions declarations can be found at the end. %%
4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5 |
6 | function perf()
7 |
8 | warning off;
9 |
10 | f = fib(20);
11 | assert(f == 6765)
12 | timeit('recursion_fibonacci', @fib, 20)
13 |
14 | timeit('parse_integers', @parseintperf, 1000)
15 |
16 | %% array constructors %%
17 |
18 | %o = ones(200,200);
19 | %assert(all(o) == 1)
20 | %timeit('ones', @ones, 200, 200)
21 |
22 | %assert(all(matmul(o) == 200))
23 | %timeit('AtA', @matmul, o)
24 |
25 | mandel(complex(-.53,.68));
26 | assert(sum(sum(mandelperf(true))) == 14791)
27 | timeit('userfunc_mandelbrot', @mandelperf, true)
28 |
29 | assert(issorted(sortperf(5000)))
30 | timeit('recursion_quicksort', @sortperf, 5000)
31 |
32 | s = pisum(true);
33 | assert(abs(s-1.644834071848065) < 1e-12);
34 | timeit('iteration_pi_sum',@pisum, true)
35 |
36 | %s = pisumvec(true);
37 | %assert(abs(s-1.644834071848065) < 1e-12);
38 | %timeit('pi_sum_vec',@pisumvec, true)
39 |
40 | [s1, s2] = randmatstat(1000);
41 | assert(round(10*s1) > 5 && round(10*s1) < 10);
42 | timeit('matrix_statistics', @randmatstat, 1000)
43 |
44 | timeit('matrix_multiply', @randmatmul, 1000);
45 |
46 | printfd(1)
47 | timeit('print_to_file', @printfd, 100000)
48 |
49 | end
50 |
51 |
52 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
53 | %% Functions declarations %%
54 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
55 |
56 | function assert(bool)
57 | if ~bool
58 | error('Assertion failed')
59 | end
60 | end
61 |
62 | function timeit(name, func, varargin)
63 | lang = 'matlab';
64 | if exist('OCTAVE_VERSION') ~= 0
65 | lang = 'octave';
66 | end
67 |
68 | nexpt = 5;
69 | times = zeros(nexpt, 1);
70 |
71 | for i=1:nexpt
72 | tic(); func(varargin{:}); times(i) = toc();
73 | end
74 |
75 | times = sort(times);
76 | fprintf ('%s,%s,%.8f\n', lang, name, times(1)*1000);
77 | end
78 |
79 | %% recursive fib %%
80 |
81 | function f = fib(n)
82 | if n < 2
83 | f = n;
84 | return
85 | else
86 | f = fib(n-1) + fib(n-2);
87 | end
88 | end
89 |
90 | %% parse int %%
91 |
92 | function n = parseintperf(t)
93 | for i = 1:t
94 | n = fix(rand*(2^32));
95 | s = sprintf('%08x',n);
96 | m = sscanf(s,'%x');
97 | assert(m == n);
98 | end
99 | end
100 |
101 | %% matmul and transpose %%
102 |
103 | %function oo = matmul(o)
104 | % oo = o * o.';
105 | %end
106 |
107 | %% mandelbrot set: complex arithmetic and comprehensions %%
108 |
109 | function r = abs2(z)
110 | r = real(z)*real(z) + imag(z)*imag(z);
111 | end
112 |
113 | function n = mandel(z)
114 | n = 0;
115 | c = z;
116 | for n=0:79
117 | if abs2(z)>4
118 | return
119 | end
120 | z = z^2+c;
121 | end
122 | n = 80;
123 | end
124 |
125 | function M = mandelperf(ignore)
126 | x=-2.0:.1:0.5;
127 | y=-1:.1:1;
128 | M=zeros(length(y),length(x));
129 | for r=1:size(M,1)
130 | for c=1:size(M,2)
131 | M(r,c) = mandel(x(c)+y(r)*i);
132 | end
133 | end
134 | end
135 |
136 | %% numeric vector quicksort %%
137 |
138 | function b = qsort(a)
139 | b = qsort_kernel(a, 1, length(a));
140 | end
141 |
142 | function a = qsort_kernel(a, lo, hi)
143 | i = lo;
144 | j = hi;
145 | while i < hi
146 | pivot = a(floor((lo+hi)/2));
147 | while i <= j
148 | while a(i) < pivot, i = i + 1; end
149 | while a(j) > pivot, j = j - 1; end
150 | if i <= j
151 | t = a(i);
152 | a(i) = a(j);
153 | a(j) = t;
154 | i = i + 1;
155 | j = j - 1;
156 | end
157 | end
158 | if lo < j; a=qsort_kernel(a, lo, j); end
159 | lo = i;
160 | j = hi;
161 | end
162 | end
163 |
164 | function v = sortperf(n)
165 | v = rand(n,1);
166 | v = qsort(v);
167 | end
168 |
169 | %% slow pi series %%
170 |
171 | function sum = pisum(ignore)
172 | sum = 0.0;
173 | for j=1:500
174 | sum = 0.0;
175 | for k=1:10000
176 | sum = sum + 1.0/(k*k);
177 | end
178 | end
179 | end
180 |
181 | %% slow pi series, vectorized %%
182 |
183 | function s = pisumvec(ignore)
184 | a = [1:10000];
185 | for j=1:500
186 | s = sum( 1./(a.^2));
187 | end
188 | end
189 |
190 | %% random matrix statistics %%
191 |
192 | function [s1, s2] = randmatstat(t)
193 | n=5;
194 | v = zeros(t,1);
195 | w = zeros(t,1);
196 | for i=1:t
197 | a = randn(n, n);
198 | b = randn(n, n);
199 | c = randn(n, n);
200 | d = randn(n, n);
201 | P = [a b c d];
202 | Q = [a b;c d];
203 | v(i) = trace((P.'*P)^4);
204 | w(i) = trace((Q.'*Q)^4);
205 | end
206 | s1 = std(v)/mean(v);
207 | s2 = std(w)/mean(w);
208 | end
209 |
210 | function t = mytranspose(x)
211 | [m, n] = size(x);
212 | t = zeros(n, m);
213 | for i=1:n
214 | for j=1:m
215 | t(i,j) = x(j,i);
216 | end
217 | end
218 | end
219 |
220 | %% largish random number gen & matmul %%
221 |
222 | function X = randmatmul(n)
223 | X = rand(n,n)*rand(n,n);
224 | end
225 |
226 | %% printf %%
227 |
228 | function printfd(n)
229 | f = fopen('/dev/null','w');
230 | for i = 1:n
231 | fprintf(f, '%d %d\n', i, i + 1);
232 | end
233 | fclose(f);
234 | end
235 |
--------------------------------------------------------------------------------
/perf.nb:
--------------------------------------------------------------------------------
1 | (* Benchmark script *)
2 |
3 | (* Set up output stream *)
4 | SetOptions[$Output, FormatType -> OutputForm];
5 |
6 | (* Test if system has a C compiler and if so set target to "C"*)
7 | Needs["CCompilerDriver`"];
8 | If[ Length[CCompilers[]] > 0,
9 | $CompilationTarget = "C"
10 | ];
11 |
12 |
13 | ClearAll[$printOutput];
14 | $printOutput = True;
15 |
16 | ClearAll[timeit];
17 | SetAttributes[timeit, HoldFirst];
18 | timeit[ex_, name_String] := Module[
19 | {t},
20 | t = Infinity;
21 | Do[
22 | t = Min[t, N[First[AbsoluteTiming[ex]]]];
23 | ,
24 | {i, 1, 5}
25 | ];
26 | If[$printOutput,
27 | (*Print[OutputForm["mathematica," <> name <> ","], t*1000];*)
28 | Print["mathematica,", name, ",", t*1000];
29 | ];
30 | ];
31 |
32 | ClearAll[test];
33 | SetAttributes[test, HoldFirst];
34 | test[ex_] := Assert[ex];
35 | On[Assert];
36 |
37 |
38 | (* recursive fib *)
39 |
40 | ClearAll[fib];
41 | fib = Compile[{{n, _Integer}},
42 | If[n < 2, n, fib[n - 1] + fib[n - 2]],
43 | CompilationTarget -> "WVM"
44 | ];
45 |
46 | test[fib[20] == 6765];
47 | timeit[fib[20], "recursion_fibonacci"];
48 |
49 | (* parse integer *)
50 |
51 | ClearAll[parseintperf];
52 | parseintperf[t_] := Module[
53 | {n, m, i, s},
54 | Do[
55 | n = RandomInteger[{0, 4294967295}];
56 | s = IntegerString[n, 16];
57 | m = FromDigits[s, 16];
58 | test[ m == n];
59 | ,
60 | {i, 1, t}
61 | ];
62 | n
63 | ];
64 |
65 | timeit[parseintperf[1000], "parse_integers"];
66 |
67 | (* print to file *)
68 |
69 | ClearAll[printfdperf];
70 | printfdperf[t_] := Module[
71 | {i,fd,filename},
72 | filename = "/dev/null";
73 | fd = OpenWrite[filename];
74 | For[i=1, i<=t, ++i,
75 | WriteString[fd, StringForm["`1` `2`\n", i, i + 1]];
76 | ];
77 | Close[fd];
78 | ];
79 |
80 | timeit[printfdperf[100000], "print_to_file"];
81 |
82 | (* array constructors *)
83 |
84 | test[ And @@ And @@@ Thread /@ Thread[ConstantArray[1, {200, 200}] == 1]];
85 |
86 | (* matmul and transpose *)
87 |
88 | ClearAll[A];
89 | A = ConstantArray[1, {200, 200}];
90 | test[And @@ And @@@ Thread /@ Thread[A.ConjugateTranspose[A] == 200]];
91 |
92 | (* mandelbrot set: complex arithmetic and comprehensions *)
93 |
94 | ClearAll[abs2];
95 | (*abs2[z_] := Module[
96 | Re(z)*Re(z) + Im(z)*Im(z);
97 | ];*)
98 |
99 | ClearAll[mandel];
100 | (*mandel[zin_] := Module[
101 | {z, c, maxiter, n},
102 | z = zin;
103 | c = z;
104 | maxiter = 80;
105 | Do[
106 | If[ Abs2[z] > 4,
107 | maxiter = n-1;
108 | Break[]
109 | ];
110 | z = z^2 + c;
111 | ,
112 | {n, 1, maxiter}
113 | ];
114 | maxiter
115 | ];*)
116 | mandel = Compile[{{zin, _Complex}},
117 | Module[
118 | {z = zin, c = zin, maxiter = 80, n = 0},
119 | Do[
120 | If[ Abs[z] > 2,
121 | maxiter = n-1;
122 | Break[]
123 | ];
124 | z = z^2 + c;
125 | ,
126 | {n, 1, maxiter}
127 | ];
128 | maxiter
129 | ]
130 | ];
131 |
132 | ClearAll[mandelperf];
133 | mandelperf[] := Table[mandel[r + i*I], {i, -1., 1., 0.1}, {r, -2.0, 0.5, 0.1}];
134 |
135 | test[ Total[mandelperf[], 2] == 14791];
136 | timeit[mandelperf[], "userfunc_mandelbrot"];
137 |
138 | (* numeric vector sort *)
139 |
140 | ClearAll[qsort];
141 | (* qsort[ain_, loin_, hiin_] := Module[
142 | {a = ain, i = loin, j = hiin, lo = loin, hi = hiin, pivot},
143 | While[ i < hi,
144 | pivot = a[[BitShiftRight[lo + hi] ]];
145 | While[ i <= j,
146 | While[a[[i]] < pivot, i++];
147 | While[a[[j]] > pivot, j--];
148 | If[ i <= j,
149 | a[[{i,j}]] = a[[{j, i}]];
150 | i++; j--;
151 | ];
152 | ];
153 | If[ lo < j, a = qsort[a, lo, j] ];
154 | {lo, j} = {i, hi};
155 | ];
156 | a
157 | ]; *)
158 | qsort = Compile[
159 | {{ain, _Real, 1}, {loin, _Integer}, {hiin, _Integer}},
160 | Module[
161 | {a = ain, i = loin, j = hiin, lo = loin, hi = hiin, pivot},
162 | While[ i < hi,
163 | pivot = a[[ Floor[(lo + hi)/2] ]];
164 | While[ i <= j,
165 | While[a[[i]] < pivot, i++];
166 | While[a[[j]] > pivot, j--];
167 | If[ i <= j,
168 | a[[{i,j}]] = a[[{j, i}]];
169 | i++; j--;
170 | ];
171 | ];
172 | If[ lo < j, a[[lo;;j]] = qsort[ a[[lo;;j]], 1, j - lo + 1] ];
173 | {lo, j} = {i, hi};
174 | ];
175 | a
176 | ]
177 | ];
178 |
179 |
180 | ClearAll[sortperf];
181 | sortperf[n_] := Module[{vec = RandomReal[1, n]}, qsort[vec, 1, n]];
182 |
183 | test[OrderedQ[sortperf[5000]] ];
184 | timeit[sortperf[5000], "recursion_quicksort"];
185 |
186 | (* slow pi series *)
187 |
188 | ClearAll[pisum];
189 | pisum = Compile[ {},
190 | Module[
191 | {sum = 0.`},
192 | Do[sum = Sum[1/(k*k), {k, 1, 10000}],
193 | {500}];
194 | sum
195 | ]
196 | ];
197 |
198 |
199 | test[Abs[pisum[] - 1.644834071848065`] < 1.`*^-12 ];
200 | timeit[pisum[], "iteration_pi_sum"];
201 |
202 | (* slow pi series, vectorized *)
203 |
204 | pisumvec = Compile[{},
205 | Module[
206 | {sum = 0.},
207 | Do[
208 | sum = Total[1/Range[1, 10000]^2];,
209 | {500}
210 | ];
211 | sum
212 | ]
213 | ];
214 |
215 | (* test[Abs[pisumvec[] - 1.644834071848065`] < 1.`*^-12 ];*)
216 | (* timeit[pisumvec[], "pi_sum_vec"];*)
217 |
218 | (* random matrix statistics *)
219 |
220 | ClearAll[randmatstat];
221 | (*randmatstat[t_] := Module[
222 | {n, v, w, a, b, c, d, P, Q},
223 | n = 5;
224 | v = w = ConstantArray[0., {t}];
225 | Do[
226 | a = RandomReal[NormalDistribution[], {n, n}];
227 | b = RandomReal[NormalDistribution[], {n, n}];
228 | c = RandomReal[NormalDistribution[], {n, n}];
229 | d = RandomReal[NormalDistribution[], {n, n}];
230 | P = Join[a, b, c, d, 2];
231 | Q = ArrayFlatten[{{a, b}, {c, d}}];
232 | v[[i]] = Tr[MatrixPower[Transpose[P].P, 4]];
233 | w[[i]] = Tr[MatrixPower[Transpose[Q].Q, 4]];
234 | ,
235 | {i, 1, t}
236 | ];
237 | {StandardDeviation[v]/Mean[v], StandardDeviation[w]/Mean[w]}
238 | ];*)
239 | randmatstat = Compile[{{t, _Integer}},
240 | Module[
241 | {
242 | n = 5,
243 | v = ConstantArray[0., t],
244 | w = ConstantArray[0., t],
245 | a = {{0.}}, b = {{0.}},
246 | c = {{0.}}, d = {{0.}},
247 | P = {{0.}}, Q = {{0.}}
248 | },
249 | Do[
250 | a = RandomReal[NormalDistribution[], {n, n}];
251 | b = RandomReal[NormalDistribution[], {n, n}];
252 | c = RandomReal[NormalDistribution[], {n, n}];
253 | d = RandomReal[NormalDistribution[], {n, n}];
254 | P = Join[a, b, c, d, 2];
255 | Q = ArrayFlatten[{{a, b}, {c, d}}];
256 | v[[i]] = Tr[MatrixPower[Transpose[P].P, 4]];
257 | w[[i]] = Tr[MatrixPower[Transpose[Q].Q, 4]];
258 | ,
259 | {i, 1, t}
260 | ];
261 | {StandardDeviation[v]/Mean[v], StandardDeviation[w]/Mean[w]}
262 | ],
263 | {{_ArrayFlatten, _Real, 2}}
264 | ];
265 |
266 |
267 | ClearAll[s1,s2];
268 | {s1, s2} = randmatstat[1000];
269 | test[0.5 < s1 < 1.0 && 0.5 < s2 < 1.0];
270 |
271 | timeit[randmatstat[1000], "matrix_statistics"];
272 |
273 | (* largish random number gen & matmul *)
274 |
275 | timeit[RandomReal[1, {1000, 1000}].RandomReal[1, {1000, 1000}], "matrix_multiply"];
276 |
277 | (* printfd *)
278 |
279 | (* only on unix systems *)
280 | If[ $OperatingSystem == "Linux"||$OperatingSystem == "MacOSX",
281 |
282 | ClearAll[printfd];
283 | printfd[n_] := Module[
284 | {stream},
285 | stream = OpenWrite["/dev/null"];
286 | Do[
287 | WriteString[stream, i, " ", i+1, "\n" ];
288 | ,
289 | {i, 1, n}
290 | ];
291 | Close[stream];
292 | ];
293 |
294 | timeit[printfd[100000], "print_to_file"];
295 |
296 | ];
297 |
--------------------------------------------------------------------------------
/perf.py:
--------------------------------------------------------------------------------
1 | from numpy import *
2 | from numpy.random import rand, randn
3 | from numpy.linalg import matrix_power
4 | import sys
5 | import time
6 | import random
7 |
8 | if sys.version_info < (3,):
9 | range = xrange
10 |
11 | ## fibonacci ##
12 |
13 | def fib(n):
14 | if n<2:
15 | return n
16 | return fib(n-1)+fib(n-2)
17 |
18 | ## quicksort ##
19 |
20 | def qsort_kernel(a, lo, hi):
21 | i = lo
22 | j = hi
23 | while i < hi:
24 | pivot = a[(lo+hi) // 2]
25 | while i <= j:
26 | while a[i] < pivot:
27 | i += 1
28 | while a[j] > pivot:
29 | j -= 1
30 | if i <= j:
31 | a[i], a[j] = a[j], a[i]
32 | i += 1
33 | j -= 1
34 | if lo < j:
35 | qsort_kernel(a, lo, j)
36 | lo = i
37 | j = hi
38 | return a
39 |
40 | ## randmatstat ##
41 |
42 | def randmatstat(t):
43 | n = 5
44 | v = zeros(t)
45 | w = zeros(t)
46 | for i in range(t):
47 | a = randn(n, n)
48 | b = randn(n, n)
49 | c = randn(n, n)
50 | d = randn(n, n)
51 | P = concatenate((a, b, c, d), axis=1)
52 | Q = concatenate((concatenate((a, b), axis=1), concatenate((c, d), axis=1)), axis=0)
53 | v[i] = trace(matrix_power(dot(P.T,P), 4))
54 | w[i] = trace(matrix_power(dot(Q.T,Q), 4))
55 | return (std(v)/mean(v), std(w)/mean(w))
56 |
57 | ## randmatmul ##
58 |
59 | def randmatmul(n):
60 | A = rand(n,n)
61 | B = rand(n,n)
62 | return dot(A,B)
63 |
64 | ## mandelbrot ##
65 |
66 | def abs2(z):
67 | return z.real*z.real + z.imag*z.imag
68 |
69 | def mandel(z):
70 | maxiter = 80
71 | c = z
72 | for n in range(maxiter):
73 | if abs2(z) > 4:
74 | return n
75 | z = z*z + c
76 | return maxiter
77 |
78 | def mandelperf():
79 | r1 = [-2. + 0.1*i for i in range(26)]
80 | r2 = [-1. + 0.1*i for i in range(21)]
81 | return [mandel(complex(r, i)) for r in r1 for i in r2]
82 |
83 | def pisum():
84 | sum = 0.0
85 | for j in range(1, 501):
86 | sum = 0.0
87 | for k in range(1, 10001):
88 | sum += 1.0/(k*k)
89 | return sum
90 |
91 | #### Is this single threaded?
92 | # def pisumvec():
93 | # return numpy.sum(1./(numpy.arange(1,10000)**2))
94 |
95 | def parse_int(t):
96 | for i in range(1,t):
97 | n = random.randint(0,2**32-1)
98 | s = hex(n)
99 | #s = string(n, base = 16)
100 | if s[-1]=='L':
101 | s = s[0:-1]
102 | m = int(s,16)
103 | assert m == n
104 | return n
105 |
106 | def printfd(t):
107 | f = open("/dev/null", "w")
108 | for i in range(1,t):
109 | f.write("{:d} {:d}\n".format(i, i+1))
110 | f.close()
111 |
112 |
113 | def print_perf(name, time):
114 | print("python," + name + "," + str(time*1000))
115 |
116 | ## run tests ##
117 |
118 | if __name__=="__main__":
119 |
120 | mintrials = 5
121 |
122 | assert fib(20) == 6765
123 | tmin = float('inf')
124 | for i in range(mintrials):
125 | t = time.time()
126 | f = fib(20)
127 | t = time.time()-t
128 | if t < tmin: tmin = t
129 | print_perf("recursion_fibonacci", tmin)
130 |
131 | tmin = float('inf')
132 | for i in range(mintrials):
133 | t = time.time()
134 | n = parse_int(1000)
135 | t = time.time()-t
136 | if t < tmin: tmin = t
137 | print_perf ("parse_integers", tmin)
138 |
139 | assert sum(mandelperf()) == 14791
140 | tmin = float('inf')
141 | for i in range(mintrials):
142 | t = time.time()
143 | mandelperf()
144 | t = time.time()-t
145 | if t < tmin: tmin = t
146 | print_perf ("userfunc_mandelbrot", tmin)
147 |
148 | tmin = float('inf')
149 | for i in range(mintrials):
150 | lst = [ random.random() for i in range(1,5000) ]
151 | t = time.time()
152 | qsort_kernel(lst, 0, len(lst)-1)
153 | t = time.time()-t
154 | if t < tmin: tmin = t
155 | print_perf ("recursion_quicksort", tmin)
156 |
157 | assert abs(pisum()-1.644834071848065) < 1e-6
158 | tmin = float('inf')
159 | for i in range(mintrials):
160 | t = time.time()
161 | pisum()
162 | t = time.time()-t
163 | if t < tmin: tmin = t
164 | print_perf ("iteration_pi_sum", tmin)
165 |
166 | # assert abs(pisumvec()-1.644834071848065) < 1e-6
167 | # tmin = float('inf')
168 | # for i in range(mintrials):
169 | # t = time.time()
170 | # pisumvec()
171 | # t = time.time()-t
172 | # if t < tmin: tmin = t
173 | # print_perf ("pi_sum_vec", tmin)
174 |
175 | (s1, s2) = randmatstat(1000)
176 | assert s1 > 0.5 and s1 < 1.0
177 | tmin = float('inf')
178 | for i in range(mintrials):
179 | t = time.time()
180 | randmatstat(1000)
181 | t = time.time()-t
182 | if t < tmin: tmin = t
183 | print_perf ("matrix_statistics", tmin)
184 |
185 | tmin = float('inf')
186 | for i in range(mintrials):
187 | t = time.time()
188 | C = randmatmul(1000)
189 | assert C[0,0] >= 0
190 | t = time.time()-t
191 | if t < tmin: tmin = t
192 | print_perf ("matrix_multiply", tmin)
193 |
194 | tmin = float('inf')
195 | for i in range(mintrials):
196 | t = time.time()
197 | printfd(100000)
198 | t = time.time()-t
199 | if t < tmin: tmin = t
200 | print_perf ("print_to_file", tmin)
201 |
--------------------------------------------------------------------------------
/perfutil.jl:
--------------------------------------------------------------------------------
1 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license
2 |
3 | import Printf
4 | import Random
5 | import Statistics
6 | import Base.Sys
7 |
8 | const mintrials = 5
9 | const mintime = 2000.0
10 | print_output = isempty(ARGS)
11 | codespeed = length(ARGS) > 0 && ARGS[1] == "codespeed"
12 |
13 | if codespeed
14 | using JSON
15 | using HTTPClient.HTTPC
16 |
17 | # Ensure that we've got the environment variables we want:
18 | if !haskey(ENV, "JULIA_FLAVOR")
19 | error( "You must provide the JULIA_FLAVOR environment variable identifying this julia build!" )
20 | end
21 |
22 | # Setup codespeed data dict for submissions to codespeed's JSON endpoint. These parameters
23 | # are constant across all benchmarks, so we'll just let them sit here for now
24 | csdata = Dict()
25 | csdata["commitid"] = Base.GIT_VERSION_INFO.commit
26 | csdata["project"] = "Julia"
27 | csdata["branch"] = Base.GIT_VERSION_INFO.branch
28 | csdata["executable"] = ENV["JULIA_FLAVOR"]
29 | csdata["environment"] = chomp(read(`hostname`, String))
30 | csdata["result_date"] = join( split(Base.GIT_VERSION_INFO.date_string)[1:2], " " ) #Cut the timezone out
31 | end
32 |
33 | # Takes in the raw array of values in vals, along with the benchmark name, description, unit and whether less is better
34 | function submit_to_codespeed(vals,name,desc,unit,test_group,lessisbetter=true)
35 | # Points to the server
36 | codespeed_host = "julia-codespeed.csail.mit.edu"
37 |
38 | csdata["benchmark"] = name
39 | csdata["description"] = desc
40 | csdata["result_value"] = Statistics.mean(vals)
41 | csdata["std_dev"] = Statistics.std(vals)
42 | csdata["min"] = minimum(vals)
43 | csdata["max"] = maximum(vals)
44 | csdata["units"] = unit
45 | csdata["units_title"] = test_group
46 | csdata["lessisbetter"] = lessisbetter
47 |
48 | println( "$name: $(Statistics.mean(vals))" )
49 | ret = post( "http://$codespeed_host/result/add/json/", Dict("json" => json([csdata])) )
50 | println( json([csdata]) )
51 | if ret.http_code != 200 && ret.http_code != 202
52 | error("Error submitting $name [HTTP code $(ret.http_code)], dumping headers and text: $(ret.headers)\n$(String(ret.body))\n\n")
53 | return false
54 | end
55 | return true
56 | end
57 |
58 | macro output_timings(t,name,desc,group)
59 | t = esc(t)
60 | name = esc(name)
61 | desc = esc(desc)
62 | group = esc(group)
63 | quote
64 | # If we weren't given anything for the test group, infer off of file path!
65 | test_group = length($group) == 0 ? basename(dirname(Base.source_path())) : $group[1]
66 | if codespeed
67 | submit_to_codespeed( $t, $name, $desc, "seconds", test_group )
68 | elseif print_output
69 | Printf.@printf "julia,%s,%f,%f,%f,%f\n" $name minimum($t) maximum($t) Statistics.mean($t) Statistics.std($t)
70 | end
71 | GC.gc()
72 | end
73 | end
74 |
75 | macro timeit(ex,name,desc,group...)
76 | quote
77 | let
78 | t = Float64[]
79 | tot = 0.0
80 | i = 0
81 | while i < mintrials || tot < mintime
82 | e = 1000*(@elapsed $(esc(ex)))
83 | tot += e
84 | if i > 0
85 | # warm up on first iteration
86 | push!(t, e)
87 | end
88 | i += 1
89 | end
90 | @output_timings t $(esc(name)) $(esc(desc)) $(esc(group))
91 | end
92 | end
93 | end
94 |
95 | macro timeit_init(ex,init,name,desc,group...)
96 | quote
97 | t = zeros(mintrials)
98 | for i=0:mintrials
99 | $(esc(init))
100 | e = 1000*(@elapsed $(esc(ex)))
101 | if i > 0
102 | # warm up on first iteration
103 | t[i] = e
104 | end
105 | end
106 | @output_timings t $(esc(name)) $(esc(desc)) $(esc(group))
107 | end
108 | end
109 |
110 | function maxrss(name)
111 | # FIXME: call uv_getrusage instead here
112 | @static if (Sys.islinux())
113 | rus = Vector{Int64}(uninitialized, div(144,8))
114 | fill!(rus, 0x0)
115 | res = ccall(:getrusage, Int32, (Int32, Ptr{Cvoid}), 0, rus)
116 | if res == 0
117 | mx = rus[5]/1024
118 | Printf.@printf "julia,%s.mem,%f,%f,%f,%f\n" name mx mx mx 0
119 | end
120 | end
121 | end
122 |
123 |
124 | # seed rng for more consistent timings
125 | if VERSION >= v"0.7.0"
126 | Random.seed!(1776)
127 | else
128 | srand(1776)
129 | end
130 |
--------------------------------------------------------------------------------
/randmtzig.c:
--------------------------------------------------------------------------------
1 | /*
2 | A C-program for MT19937, with initialization improved 2002/2/10.
3 | Coded by Takuji Nishimura and Makoto Matsumoto.
4 | This is a faster version by taking Shawn Cokus's optimization,
5 | Matthe Bellew's simplification, Isaku Wada's real version.
6 | David Bateman added normal and exponential distributions following
7 | Marsaglia and Tang's Ziggurat algorithm.
8 |
9 | Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
10 | Copyright (C) 2004, David Bateman
11 | All rights reserved.
12 |
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions
15 | are met:
16 |
17 | 1. Redistributions of source code must retain the above copyright
18 | notice, this list of conditions and the following disclaimer.
19 |
20 | 2. Redistributions in binary form must reproduce the above copyright
21 | notice, this list of conditions and the following disclaimer in the
22 | documentation and/or other materials provided with the distribution.
23 |
24 | 3. The names of its contributors may not be used to endorse or promote
25 | products derived from this software without specific prior written
26 | permission.
27 |
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
32 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
35 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
36 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 |
40 |
41 | Any feedback is very welcome.
42 | http://www.math.keio.ac.jp/matumoto/emt.html
43 | email: matumoto@math.keio.ac.jp
44 | */
45 |
46 | /*
47 | Modified by Viral B. Shah for julia to support dsfmt and only __LP64__
48 | systems. 52-bits of randomness are used from the mantissa of random double
49 | precision numbers generated by dsfmt.
50 | */
51 |
52 | #include
53 | #include
54 | #include
55 | #include
56 | #ifndef _MSC_VER
57 | #include
58 | #endif
59 |
60 | #include
61 | #include
62 |
63 | #define DSFMT_DO_NOT_USE_OLD_NAMES
64 | #define DSFMT_MEXP 19937
65 | #include
66 |
67 | typedef ptrdiff_t randmtzig_idx_type;
68 | typedef signed char randmtzig_int8_t;
69 | typedef unsigned char randmtzig_uint8_t;
70 | typedef short randmtzig_int16_t;
71 | typedef unsigned short randmtzig_uint16_t;
72 | typedef int randmtzig_int32_t;
73 | typedef unsigned int randmtzig_uint32_t;
74 | typedef long long randmtzig_int64_t;
75 | typedef unsigned long long randmtzig_uint64_t;
76 |
77 | /* Declarations */
78 |
79 | extern double randmtzig_randn (dsfmt_t *dsfmt);
80 | extern double randmtzig_gv_randn (void);
81 | extern double randmtzig_gv_exprnd (void);
82 |
83 | /* ===== Uniform generators ===== */
84 |
85 | inline static randmtzig_uint64_t gv_randi (void)
86 | {
87 | double r = dsfmt_gv_genrand_close1_open2();
88 | return *((uint64_t *) &r) & 0x000fffffffffffff;
89 | }
90 |
91 | /* generates a random number on (0,1) with 53-bit resolution */
92 | inline static double gv_randu (void)
93 | {
94 | return dsfmt_gv_genrand_open_open();
95 | }
96 |
97 | inline static randmtzig_uint64_t randi (dsfmt_t *dsfmt)
98 | {
99 | double r = dsfmt_genrand_close1_open2(dsfmt);
100 | return *((uint64_t *) &r) & 0x000fffffffffffff;
101 | }
102 |
103 | /* generates a random number on (0,1) with 53-bit resolution */
104 | inline static double randu (dsfmt_t *dsfmt)
105 | {
106 | return dsfmt_genrand_open_open(dsfmt);
107 | }
108 |
109 | /* ===== Ziggurat normal and exponential generators ===== */
110 | # define ZIGINT randmtzig_uint64_t
111 | # define EMANTISSA 4503599627370496 /* 52 bit mantissa */
112 | # define ERANDI gv_randi() /* 52 bits for mantissa */
113 | # define NMANTISSA 2251799813685248
114 | # define NRANDI gv_randi() /* 51 bits for mantissa + 1 bit sign */
115 | # define RANDU gv_randu()
116 |
117 | #define ZIGGURAT_TABLE_SIZE 256
118 |
119 | #define ZIGGURAT_NOR_R 3.6541528853610088
120 | #define ZIGGURAT_NOR_INV_R 0.27366123732975828
121 | #define NOR_SECTION_AREA 0.00492867323399
122 |
123 | #define ZIGGURAT_EXP_R 7.69711747013104972
124 | #define ZIGGURAT_EXP_INV_R 0.129918765548341586
125 | #define EXP_SECTION_AREA 0.0039496598225815571993
126 |
127 |
128 | /*
129 | This code is based on the paper Marsaglia and Tsang, "The ziggurat method
130 | for generating random variables", Journ. Statistical Software. Code was
131 | presented in this paper for a Ziggurat of 127 levels and using a 32 bit
132 | integer random number generator. This version of the code, uses the
133 | Mersenne Twister as the integer generator and uses 256 levels in the
134 | Ziggurat. This has several advantages.
135 |
136 | 1) As Marsaglia and Tsang themselves states, the more levels the few
137 | times the expensive tail algorithm must be called
138 | 2) The cycle time of the generator is determined by the integer
139 | generator, thus the use of a Mersenne Twister for the core random
140 | generator makes this cycle extremely long.
141 | 3) The license on the original code was unclear, thus rewriting the code
142 | from the article means we are free of copyright issues.
143 | 4) Compile flag for full 53-bit random mantissa.
144 |
145 | It should be stated that the authors made my life easier, by the fact that
146 | the algorithm developed in the text of the article is for a 256 level
147 | ziggurat, even if the code itself isn't...
148 |
149 | One modification to the algorithm developed in the article, is that it is
150 | assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in
151 | terms like 2^32 in the code. As the normal distribution is defined between
152 | -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus
153 | in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for
154 | this term. The exponential distribution is one sided so we use the
155 | full 32 bits. We use EMANTISSA for this term.
156 |
157 | It appears that I'm slightly slower than the code in the article, this
158 | is partially due to a better generator of random integers than they
159 | use. But might also be that the case of rapid return was optimized by
160 | inlining the relevant code with a #define. As the basic Mersenne
161 | Twister is only 25% faster than this code I suspect that the main
162 | reason is just the use of the Mersenne Twister and not the inlining,
163 | so I'm not going to try and optimize further.
164 | */
165 |
166 |
167 | // void randmtzig_create_ziggurat_tables (void)
168 | // {
169 | // int i;
170 | // double x, x1;
171 |
172 | // /* Ziggurat tables for the normal distribution */
173 | // x1 = ZIGGURAT_NOR_R;
174 | // wi[255] = x1 / NMANTISSA;
175 | // fi[255] = exp (-0.5 * x1 * x1);
176 |
177 | // /* Index zero is special for tail strip, where Marsaglia and Tsang
178 | // * defines this as
179 | // * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
180 | // * where v is the area of each strip of the ziggurat.
181 | // */
182 | // ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA);
183 | // wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA;
184 | // fi[0] = 1.;
185 |
186 | // for (i = 254; i > 0; i--)
187 | // {
188 | // /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
189 | // * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
190 | // */
191 | // x = sqrt(-2. * log(NOR_SECTION_AREA / x1 + fi[i+1]));
192 | // ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA);
193 | // wi[i] = x / NMANTISSA;
194 | // fi[i] = exp (-0.5 * x * x);
195 | // x1 = x;
196 | // }
197 |
198 | // ki[1] = 0;
199 |
200 | // /* Zigurrat tables for the exponential distribution */
201 | // x1 = ZIGGURAT_EXP_R;
202 | // we[255] = x1 / EMANTISSA;
203 | // fe[255] = exp (-x1);
204 |
205 | // /* Index zero is special for tail strip, where Marsaglia and Tsang
206 | // * defines this as
207 | // * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
208 | // * where v is the area of each strip of the ziggurat.
209 | // */
210 | // ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA);
211 | // we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA;
212 | // fe[0] = 1.;
213 |
214 | // for (i = 254; i > 0; i--)
215 | // {
216 | // /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
217 | // * need inverse operator of y = exp(-x) -> x = -ln(y)
218 | // */
219 | // x = - log(EXP_SECTION_AREA / x1 + fe[i+1]);
220 | // ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA);
221 | // we[i] = x / EMANTISSA;
222 | // fe[i] = exp (-x);
223 | // x1 = x;
224 | // }
225 | // ke[1] = 0;
226 | // }
227 |
228 | // Tables for randn
229 | static ZIGINT ki[ZIGGURAT_TABLE_SIZE] =
230 | {2104047571230236, 0,1693657211688499,1919380038164751,
231 | 2015384402142420,2068365869415708,2101878624030987,2124958784087614,
232 | 2141808670783638,2154644611559370,2164744887580145,2172897953690771,
233 | 2179616279367521,2185247251864556,2190034623104318,2194154434518163,
234 | 2197736978772008,2200880740889623,2203661538008543,2206138681107245,
235 | 2208359231804928,2210361007256700,2212174742387166,2213825672703393,
236 | 2215334711001466,2216719334486539,2217994262138197,2219171977964129,
237 | 2220263139537873,2221276900116549,2222221164932202,2223102796828387,
238 | 2223927782546019,2224701368169460,2225428170203747,2226112267247709,
239 | 2226757276104752,2227366415327922,2227942558554233,2228488279492093,
240 | 2229005890046815,2229497472774805,2229964908626691,2230409900758245,
241 | 2230833995044249,2231238597815812,2231624991249884,2231994346765634,
242 | 2232347736722468,2232686144665663,2233010474325699,2233321557544631,
243 | 2233620161275830,2233906993781039,2234182710130112,2234447917093281,
244 | 2234703177502812,2234949014149981,2235185913274123,2235414327692697,
245 | 2235634679614740,2235847363174420,2236052746716668,2236251174862705,
246 | 2236442970379808,2236628435876608,2236807855342616,2236981495548416,
247 | 2237149607321006,2237312426707072,2237470176035519,2237623064889274,
248 | 2237771290995262,2237915041040474,2238054491421185,2238189808931596,
249 | 2238321151397547,2238448668260322,2238572501115061,2238692784207837,
250 | 2238809644895031,2238923204068302,2239033576548092,2239140871448347,
251 | 2239245192514865,2239346638439450,2239445303151863,2239541276091355,
252 | 2239634642459413,2239725483455210,2239813876495104,2239899895417414,
253 | 2239983610673598,2240065089506859,2240144396119109,2240221591827156,
254 | 2240296735208897,2240369882240222,2240441086423317,2240510398906937,
255 | 2240577868599239,2240643542273660,2240707464668327,2240769678579424,
256 | 2240830224948918,2240889142947021,2240946470049710,2241002242111632,
257 | 2241056493434688,2241109256832545,2241160563691345,2241210444026824,
258 | 2241258926538069,2241306038658085,2241351806601384,2241396255408737,
259 | 2241439408989263,2241481290159988,2241521920683014,2241561321300414,
260 | 2241599511766981,2241636510880914,2241672336512567,2241707005631317,
261 | 2241740534330669,2241772937851645,2241804230604542,2241834426189118,
262 | 2241863537413270,2241891576310240,2241918554154426,2241944481475803,
263 | 2241969368073032,2241993223025259,2242016054702647,2242037870775672,
264 | 2242058678223188,2242078483339294,2242097291739004,2242115108362739,
265 | 2242131937479636,2242147782689690,2242162646924702,2242176532448058,
266 | 2242189440853303,2242201373061504,2242212329317384,2242222309184204,
267 | 2242231311537365,2242239334556685,2242246375717338,2242252431779384,
268 | 2242257498775863,2242261571999386,2242264645987166,2242266714504423,
269 | 2242267770526080,2242267806216682,2242266812908434,2242264781077261,
270 | 2242261700316790,2242257559310117,2242252345799249,2242246046552055,
271 | 2242238647326588,2242230132832599,2242220486690050,2242209691384432,
272 | 2242197728218658,2242184577261284,2242170217290794,2242154625735654,
273 | 2242137778609814,2242119650443302,2242100214207531,2242079441234882,
274 | 2242057301132111,2242033761687055,2242008788768083,2241982346215658,
275 | 2241954395725333,2241924896721420,2241893806220494,2241861078683807,
276 | 2241826665857576,2241790516600019,2241752576693859,2241712788642894,
277 | 2241671091451056,2241627420382213,2241581706698751,2241533877376746,
278 | 2241483854795259,2241431556397014,2241376894317324,2241319774977796,
279 | 2241260098640839,2241197758920517,2241132642244683,2241064627262631,
280 | 2240993584191722,2240919374095516,2240841848084869,2240760846432212,
281 | 2240676197587764,2240587717084761,2240495206318733,2240398451183547,
282 | 2240297220544145,2240191264522592,2240080312570135,2239964071293311,
283 | 2239842221996510,2239714417896679,2239580280957705,2239439398282173,
284 | 2239291317986176,2239135544468183,2238971532964959,2238798683265249,
285 | 2238616332424332,2238423746288075,2238220109591870,2238004514345197,
286 | 2237775946143192,2237533267957802,2237275200846732,2237000300869931,
287 | 2236706931309079,2236393229029127,2236057063479481,2235695986373225,
288 | 2235307169458838,2234887326941556,2234432617919425,2233938522519742,
289 | 2233399683022654,2232809697779175,2232160850599794,2231443750584617,
290 | 2230646845562145,2229755753817960,2228752329126507,2227613325162477,
291 | 2226308442121145,2224797391720369,2223025347823800,2220915633329775,
292 | 2218357446086993,2215184158448627,2211132412537323,2205758503851011,
293 | 2198248265654920,2186916352102052,2167562552481677,2125549880839429};
294 |
295 | static double wi[ZIGGURAT_TABLE_SIZE] =
296 | {17367254121656703e-31,9558660348275145e-32,12708704832820278e-32,
297 | 14909740960986864e-32,16658733630346416e-32,18136120809053487e-32,
298 | 1942972015219358e-31,20589500627632916e-32,21646860576118966e-32,
299 | 2262294039150043e-31,23532718913376864e-32,24387234556800803e-32,
300 | 25194879828681465e-32,2596219977196592e-31,26694407473112964e-32,
301 | 2739572968463095e-31,280696460019946e-30,28719058903642897e-32,
302 | 29346417484275224e-32,29953809336344285e-32,30543030006769113e-32,
303 | 3111563633851158e-31,3167298801818414e-31,3221628035016365e-31,
304 | 32746570407564125e-32,33264798116476e-29,337718034169968e-30,
305 | 34268340352771636e-32,34755088731390227e-32,3523266384567022e-31,
306 | 3570162463362898e-31,3616248057128073e-31,36615697529342477e-32,
307 | 3706170277693123e-31,37500889278448874e-32,3793361940125627e-31,
308 | 38360228129389374e-32,3878102586096749e-31,3919630085297984e-31,
309 | 39606321365983254e-32,40011337552278087e-32,4041158312387907e-31,
310 | 4080727683070036e-31,4119862377455137e-31,41585816580575855e-32,
311 | 41969036444492247e-32,4234845407127582e-31,42724230518658345e-32,
312 | 43096517956924877e-32,4346546035489394e-31,4383119410062289e-31,
313 | 4419384856424202e-31,4455354660935343e-31,4491040505860591e-31,
314 | 4526453511835132e-31,45616042766683e-29,4596502910863464e-31,
315 | 4631159070186941e-31,4665581985579899e-31,469978049067346e-30,
316 | 4733763047137822e-31,4767537768070579e-31,4801112439606964e-31,
317 | 4834494540915173e-31,4867691262722585e-31,4900709524503576e-31,
318 | 4933555990446197e-31,4966237084303158e-31,499875900322208e-30,
319 | 5031127730640677e-31,5063349048324261e-31,5095428547615612e-31,
320 | 5127371639960692e-31,5159183566767805e-31,5190869408652579e-31,
321 | 5222434094116442e-31,52538824077020155e-32,5285218997665102e-31,
322 | 5316448383199491e-31,5347574961247755e-31,5378603012928409e-31,
323 | 5409536709607314e-31,5440380118638932e-31,5471137208800966e-31,
324 | 550181185544408e-30,5532407845376661e-31,5562928881503102e-31,
325 | 5593378587232605e-31,5623760510674315e-31,5654078128633358e-31,
326 | 5684334850421336e-31,5714534021493849e-31,5744678926926726e-31,
327 | 5774772794741848e-31,5804818799092685e-31,5834820063319006e-31,
328 | 5864779662879593e-31,589470062817121e-30,5924585947241581e-31,
329 | 5954438568403615e-31,598426140275769e-30,601405732662843e-30,
330 | 6043829183921996e-31,6073579788409578e-31,6103311925942512e-31,
331 | 6133028356604082e-31,6162731816802865e-31,6192425021312213e-31,
332 | 6222110665260248e-31,6251791426074554e-31,6281469965385542e-31,
333 | 6311148930892342e-31,6340830958194888e-31,6370518672595733e-31,
334 | 640021469087503e-30,6429921623041988e-31,645964207406601e-30,
335 | 648937864559066e-30,6519133937633505e-31,6548910550274845e-31,
336 | 6578711085338253e-31,6608538148065851e-31,6638394348791179e-31,
337 | 6668282304612498e-31,6698204641069389e-31,6728163993825439e-31,
338 | 6758163010359885e-31,6788204351671041e-31,681829069399439e-30,
339 | 6848424730538249e-31,6878609173239948e-31,6908846754545526e-31,
340 | 6939140229215998e-31,696949237616333e-30,6999906000319335e-31,
341 | 7030383934540792e-31,7060929041554193e-31,7091544215943653e-31,
342 | 7122232386185626e-31,7152996516734219e-31,7183839610161045e-31,
343 | 7214764709353755e-31,7245774899777502e-31,72768733118038725e-32,
344 | 7308063123111988e-31,7339347561166714e-31,7370729905779203e-31,
345 | 7402213491755235e-31,7433801711637146e-31,7465498018545449e-31,
346 | 7497305929126601e-31,7529229026613742e-31,7561270964007667e-31,
347 | 7593435467385694e-31,7625726339346621e-31,7658147462600412e-31,
348 | 7690702803711903e-31,7723396417008341e-31,7756232448661274e-31,
349 | 778921514095401e-30,7822348836746627e-31,7855637984151357e-31,
350 | 7889087141432085e-31,7922700982142658e-31,7956484300519808e-31,
351 | 7990442017147628e-31,8024579184911813e-31,8058900995263265e-31,
352 | 8093412784812165e-31,812812004227522e-30,8163028415800651e-31,
353 | 8198143720697359e-31,8233471947596931e-31,8269019271079405e-31,
354 | 8304792058796362e-31,834079688112767e-30,8377040521411316e-31,
355 | 8413529986789175e-31,8450272519715296e-31,8487275610177406e-31,
356 | 85245470086869e-29,8562094740097588e-31,8599927118319072e-31,
357 | 86380527619967175e-32,8676480611237092e-31,8715219945465259e-31,
358 | 8754280402508787e-31,8793671999012706e-31,8833405152300122e-31,
359 | 88734907038049e-29,8913939944215902e-31,8954764640486935e-31,
360 | 8995977064883017e-31,9037590026252085e-31,9079616903732087e-31,
361 | 9122071683126914e-31,9164968996211253e-31,9208324163254476e-31,
362 | 9252153239087913e-31,9296473063078686e-31,9341301313417584e-31,
363 | 938665656617903e-30,9432558359669126e-31,9479027264644209e-31,
364 | 95260849610588e-29,957375432209002e-30,962205950628746e-30,
365 | 9671026058815726e-31,972068102289435e-30,9771053062699983e-31,
366 | 9822172599183368e-31,9874071960473548e-31,9926785548800904e-31,
367 | 9980350026176626e-31,10034804521429213e-31,10090190861630543e-31,
368 | 10146553831460223e-31,10203941464676316e-31,1026240537260681e-30,
369 | 10322001115479755e-31,10382788623508751e-31,10444832675993878e-31,
370 | 10508203448348659e-31,1057297713900341e-30,10639236690670377e-31,
371 | 10707072623626628e-31,107765840026618e-29,10847879564397177e-31,
372 | 10921079038143372e-31,109963147017795e-29,11073733224929686e-31,
373 | 11153497865847152e-31,11235791107104895e-31,11320817840158973e-31,
374 | 11408809242576976e-31,1150002753783406e-30,11594771891443527e-31,
375 | 11693385786905373e-31,1179626635295029e-30,11903876299277459e-31,
376 | 1201675939253847e-30,12135560818661637e-31,12261054417445396e-31,
377 | 12394179789158183e-31,12536093926597603e-31,1268824481425016e-30,
378 | 12852479319091384e-31,13031206634685398e-31,13227655770190893e-31,
379 | 13446300925006917e-31,13693606835124475e-31,13979436672771461e-31,
380 | 14319989869657897e-31,14744848603594667e-31,1531787274160907e-30,
381 | 16227698675312968e-31};
382 |
383 | static double fi[ZIGGURAT_TABLE_SIZE] =
384 | {1.,.9771017012827331,.9598790918124174,.9451989534530794,
385 | .9320600759689914,.9199915050483614,.9087264400605639,.898095921906305,
386 | .8879846607634008,.8783096558161477,.869008688043794,.8600336212030095,
387 | .8513462584651245,.842915653118442,.8347162929929313,.8267268339520951,
388 | .8189291916094156,.8113078743182208,.8038494831763903,.7965423304282554,
389 | .7893761435711993,.7823418326598627,.775431304986139,.7686373158033355,
390 | .7619533468415471,.7553735065117552,.7488924472237273,.7425052963446368,
391 | .7362075981312672,.729995264565803,.7238645334728822,.717811932634902,
392 | .711834248882359,.7059285013367979,.7000919181404905,.694321916130033,
393 | .6886160830085275,.6829721616487918,.6773880362225135,.6718617199007669,
394 | .6663913439123812,.6609751477802419,.6556114705832252,.650298743114295,
395 | .6450354808242524,.6398202774564395,.6346517992909606,.6295287799281287,
396 | .6244500155502747,.6194143606090396,.6144207238920772,.6094680649288958,
397 | .6045553907005499,.599681752622168,.5948462437709915,.5900479963357922,
398 | .5852861792663006,.5805599961036837,.5758686829752109,.5712115067380753,
399 | .5665877632589521,.5619967758172782,.5574378936214867,.5529104904285204,
400 | .5484139632579217,.5439477311926505,.5395112342595453,.5351039323830201,
401 | .5307253044061945,.5263748471741873,.5220520746747954,.5177565172322012,
402 | .5134877207497434,.5092452459981365,.5050286679458292,.5008375751284826,
403 | .49667156905479676,.4925302636461491,.4884132847077125,.48432026942891204,
404 | .48025086591125016,.4762047327216842,.4721815384698837,.46818096140782267,
405 | .4642026890502793,.460246417814924,.45631185268077407,.4523987068638829,
406 | .4485067015092144,.4446355653977281,.4407850346677702,.43695485254992955,
407 | .43314476911457434,.42935454103134185,.42558393133990086,.4218327092313535,
408 | .41810064983968476,.4143875340427069,.41069314827198344,.40701728433124823,
409 | .4033597392228692,.399720314981932,.3960988185175474,.39249506146101104,
410 | .3889088600204649,.38534003484173424,.3817884108750316,.37825381724723833,
411 | .37473608713949164,.37123505766982157,.3677505697805964,.36428246813054976,
412 | .36083060099117586,.3573948201472906,.35397498080156936,.35057094148288126,
413 | .34718256395825153,.3438097131482915,.3404522570459456,.33711006663841303,
414 | .33378301583210873,.3304709813805373,.32717384281495887,.32389148237773235,
415 | .3206237849582305,.3173706380312227,.3141319315976305,.310907558127564,
416 | .307697412505554,.30450139197789644,.30131939610203423,.29815132669790145,
417 | .2949970878011627,.2918565856182811,.28872972848335393,.28561642681665805,
418 | .28251659308484933,.27943014176276515,.2763569892967811,.27329705406967564,
419 | .2702502563669598,.26721651834463167,.2641957639983174,.2611879191337636,
420 | .25819291133864797,.2552106699556771,.25224112605694377,.2492842124195167,
421 | .24633986350223877,.24340801542371202,.24048860594144916,.23758157443217368,
422 | .2346868618732527,.23180441082524855,.22893416541557743,.22607607132326474,
423 | .22323007576478943,.22039612748101145,.21757417672517823,.2147641752520084,
424 | .21196607630785277,.20917983462193548,.20640540639867916,.20364274931112133,
425 | .20089182249543117,.19815258654653795,.1954250035148854,.19270903690432864,
426 | .19000465167119293,.18731181422451676,.18463049242750437,.18196065560021638,
427 | .17930227452353026,.17665532144440646,.17401977008249914,.17139559563815535,
428 | .16878277480185,.1661812857651097,.1635911082329826,.16101222343811727,
429 | .1584446141565199,.15588826472506426,.15334316106083742,.15080929068240986,
430 | .1482866427331284,.14577520800653765,.14327497897404687,.14078594981496803,
431 | .138308116449064,.13584147657175705,.13338602969216254,.13094177717412792,
432 | .12850872228047336,.12608687022065,.12367622820205106,.12127680548523516,
433 | .11888861344334545,.11651166562603685,.11414597782825504,.1117915681642454,
434 | .10944845714720981,.10711666777507266,.10479622562286683,.10248715894230599,
435 | .10018949876917177,.09790327903921535,.09562853671335306,.09336531191302634,
436 | .09111364806670041,.08887359206859394,.08664519445086755,.08442850957065445,
437 | .0822235958134955,.08003051581494733,.07784933670237201,.07568013035919481,
438 | .07352297371424082,.07137794905914183,.06924514439725017,.06712465382802392,
439 | .06501657797147035,.06292102443797778,.06083810834975175,.05876795292113793,
440 | .056710690106399425,.05466646132507786,.05263541827697361,.05061772386112175,
441 | .04861355321603513,.04662309490208967,.044646552251446515,.042684144916619336,
442 | .04073611065607874,.0388027074046569,.03688421568869112,.034980941461833046,
443 | .033093219458688684,.031221417192023686,.02936593975823011,.027527235669693315,
444 | .02570580400863265,.023902203305873237,.022117062707379908,.020351096230109344,
445 | .018605121275783343,.016880083152595836,.015177088307982065,.013497450601780796,
446 | .0118427578579431,.0102149714397311,.008616582769422912,.007050875471392109,
447 | .005522403299264755,.0040379725933718715,.002609072746106362,.0012602859304985975};
448 |
449 | // Tables for exprnd
450 | static ZIGINT ke[ZIGGURAT_TABLE_SIZE] =
451 | {3985772928715748, 0,2742928985168065,3438700186803721,
452 | 3744780257810519,3914896975372863,4022625697542798,4096776410635450,
453 | 4150853606149210,4192001604687417,4224344877584101,4250427292531740,
454 | 4271901371161554,4289886428824118,4305167164135199,4318309783140431,
455 | 4329732973408940,4339752937704679,4348612900760388,4356502988721768,
456 | 4363573953227346,4369946852445020,4375720012348349,4380974119031481,
457 | 4385776001930298,4390181484145305,4394237557465219,4397984061535398,
458 | 4401454994146430,4404679543790856,4407682910787985,4410486965794400,
459 | 4413110782053579,4415571068741702,4417882526198713,4420058138987325,
460 | 4422109419110772,4424046609003130,4425878851844253,4427614335173868,
461 | 4429260412563040,4430823707156475,4432310200160197,4433725306767517,
462 | 4435073941555377,4436360575016074,4437589282595121,4438763787369085,
463 | 4439887497305303,4440963537889317,4441994780778252,4442983869033585,
464 | 4443933239400428,4444845142028910,4445721657973833,4446564714759241,
465 | 4447376100252993,4448157475061632,4448910383626429,4449636264176642,
466 | 4450336457674983,4451012215872352,4451664708573597,4452295030203006,
467 | 4452904205747010,4453493196141906,4454062903166143,4454614173889474,
468 | 4455147804725090,4455664545125435,4456165100957688,4456650137590828,
469 | 4457120282722585,4457576128971459,4458018236256245,4458447133983073,
470 | 4458863323057847,4459267277740095,4459659447352586,4460040257859578,
471 | 4460410113325310,4460769397263133,4461118473884710,4461457689257740,
472 | 4461787372379910,4462107836175980,4462419378424319,4462722282618581,
473 | 4463016818769709,4463303244152965,4463581804004301,4463852732169940,
474 | 4464116251712773,4464372575478779,4464621906626490,4464864439122178,
475 | 4465100358203284,4465329840812355,4465553056003596,4465770165323939,
476 | 4465981323170417,4466186677125455,4466386368271563,4466580531486827,
477 | 4466769295722448,4466952784263502,4467131114974006,4467304400527265,
478 | 4467472748622447,4467636262188208,4467795039574164,4467949174730939,
479 | 4468098757379442,4468243873170018,4468384603832024,4468521027314373,
480 | 4468653217917530,4468781246417428,4468905180181701,4469025083278642,
481 | 4469141016579234,4469253037852582,4469361201855066,4469465560413474,
482 | 4469566162502383,4469663054316032,4469756279334881,4469845878387080,
483 | 4469931889704995,4470014348976986,4470093289394551,4470168741694984,
484 | 4470240734199652,4470309292847996,4470374441227332,4470436200598525,
485 | 4470494589917605,4470549625853344,4470601322800852,4470649692891185,
486 | 4470694745996980,4470736489734116,4470774929459349,4470810068263924,
487 | 4470841906963074,4470870444081369,4470895675833821,4470917596102651,
488 | 4470936196409614,4470951465883737,4470963391224346,4470971956659198,
489 | 4470977143897542,4470978932077904,4470977297710362,4470972214613072,
490 | 4470963653842747,4470951583618802,4470935969240827,4470916772999009,
491 | 4470893954077117,4470867468447603,4470837268758338,4470803304210460,
492 | 4470765520426769,4470723859310029,4470678258890503,4470628653161980,
493 | 4470574971905457,4470517140499614,4470455079717082,4470388705505446,
494 | 4470317928751818,4470242655029689,4470162784326669,4470078210751556,
495 | 4469988822219058,4469894500110287,4469795118907000,4469690545797298,
496 | 4469580640250319,4469465253557163,4469344228335006,4469217397991048,
497 | 4469084586142556,4468945605988875,4468800259630802,4468648337332217,
498 | 4468489616718259,4468323861903709,4468150822544456,4467970232804102,
499 | 4467781810226787,4467585254506222,4467380246139658,4467166444954116,
500 | 4466943488490515,4466710990229518,4466468537640691,4466215690034133,
501 | 4465951976190801,4465676891744455,4465389896284247,4465090410142477,
502 | 4464777810826750,4464451429049612,4464110544301482,4463754379904174,
503 | 4463382097472202,4462992790697122,4462585478355953,4462159096427753,
504 | 4461712489182116,4461244399078944,4460753455289386,4460238160612098,
505 | 4459696876515553,4459127805983956,4458528973779075,4457898203649722,
506 | 4457233091920646,4456530976767892,4455788902331217,4455003576616607,
507 | 4454171321891082,4453288015951104,4452349022232651,4451349106194827,
508 | 4450282334707462,4449141954247903,4447920242480611,4446608326137821,
509 | 4445195955871677,4443671225661690,4442020220072463,4440226566619900,
510 | 4438270861888260,4436129927556552,4433775834104270,4431174602388627,
511 | 4428284451100006,4425053392146958,4421415870372502,4417287970124084,
512 | 4412560416174562,4407088078325945,4400673742272494,4393042098597073,
513 | 4383796248451589,4372341169422858,4357740343059956,4338425130125967,
514 | 4311541827049177,4271262897902398,4203411844498905,4061213381260384};
515 |
516 | static double we[ZIGGURAT_TABLE_SIZE] =
517 | {19311480126418366e-31,1417802848791084e-32,23278824993382457e-33,
518 | 30487830247064326e-33,3666569771447489e-32,4217930218928974e-32,
519 | 4722256155686277e-32,51911915446217885e-33,5632347108395505e-32,
520 | 6051008260642765e-32,645101650967275e-31,6835264680370054e-32,
521 | 7205993957468906e-32,7564981553739299e-32,7913664396195108e-32,
522 | 8253223556351894e-32,8584643616885051e-32,8908755486564743e-32,
523 | 9226267962966373e-32,9537791450529272e-32,9843856087455926e-32,
524 | 10144925809006294e-32,10441409405585343e-32,10733669323436384e-32,
525 | 1102202874567019e-31,11306777346479334e-32,11588176009705533e-32,
526 | 11866460730417886e-32,1214184586569436e-31,12414526862326387e-32,
527 | 12684682560606153e-32,12952477151912284e-32,1321806185153881e-31,
528 | 13481576335745447e-32,13743149982367625e-32,14002902946807862e-32,
529 | 14260947099321287e-32,14517386844829297e-32,14772319842763584e-32,
530 | 15025837641447456e-32,15278026239101652e-32,15528966581595696e-32,
531 | 1577873500545958e-31,1602740363335091e-31,16275040728083524e-32,
532 | 16521711010420076e-32,16767475945078279e-32,17012393998770646e-32,
533 | 17256520873568226e-32,17499909718432365e-32,17742611321380505e-32,
534 | 17984674284430714e-32,18226145183195818e-32,18467068712763576e-32,
535 | 18707487821298258e-32,18947443832625902e-32,19186976558915997e-32,
536 | 19426124404443042e-32,19664924461299023e-32,19903412597830144e-32,
537 | 20141623540485899e-32,20379590949693882e-32,2061734749030844e-31,
538 | 2085492489712377e-31,21092354035891528e-32,21329664960238294e-32,
539 | 21566886964838972e-32,2180404863516701e-31,22041177894111562e-32,
540 | 2227830204572395e-31,2251544781633135e-31,22752641393233694e-32,
541 | 22989908461180186e-32,23227274236804366e-32,23464763501180916e-32,
542 | 2370240063065339e-31,23940209626069303e-32,2417821414054771e-31,
543 | 24416437505894123e-32,24654902757768304e-32,2489363265970225e-31,
544 | 2513264972605797e-31,2537197624400795e-31,2561163429461499e-31,
545 | 2585164577308239e-31,26092032408240577e-32,2633281578133145e-31,
546 | 2657401734414762e-31,2681565843657999e-31,2705776030362351e-31,
547 | 27300344111887955e-32,27543430965657624e-32,2778704192254128e-31,
548 | 2803119800875143e-31,28275920234049704e-32,2852122960639331e-31,
549 | 28767147146315804e-32,29013693901073754e-32,29260890958589514e-32,
550 | 29508759461219033e-32,2975732061937252e-31,3000659572501474e-31,
551 | 3025660616507079e-31,3050737343476251e-31,3075891915089994e-31,
552 | 31011265065151543e-32,3126443307731675e-31,31518445248623523e-32,
553 | 31773323815073683e-32,32029091200858335e-32,32285770031865573e-32,
554 | 3254338314930261e-31,3280195362345436e-31,3306150476760074e-31,
555 | 3332206015211484e-31,33583643618764577e-32,33846279295240445e-32,
556 | 34109991609932597e-32,34374805306980633e-32,34640745461620167e-32,
557 | 3490783749585068e-31,3517610719444983e-31,3544558072136013e-31,
558 | 3571628463647465e-31,35988245912849274e-32,3626149195437003e-31,
559 | 36536050613905045e-32,36811950211971757e-32,3708921955595139e-31,
560 | 37367887959883854e-32,3764798526487784e-31,37929541860172334e-32,
561 | 3821258870488753e-31,38497157350504876e-32,3878327996411799e-31,
562 | 39070989352498183e-32,3936031898702075e-31,3965130302950038e-31,
563 | 3994397635898684e-31,40238374599574693e-32,40534534149283966e-32,
564 | 4083249221007178e-31,41132286819038357e-32,4143395688089474e-31,
565 | 417375422017632e-30,42043083524385856e-32,4235062256482152e-31,
566 | 4266020205071558e-31,42971865761233266e-32,43285658568752094e-32,
567 | 4360162648241568e-31,43919816693657415e-32,4424027762380992e-31,
568 | 4456305897392361e-31,4488821177692617e-31,4521578845226347e-31,
569 | 4554584286317242e-31,4587843037674623e-31,4621360792696427e-31,
570 | 4655143408087069e-31,4689196910809916e-31,4723527505395548e-31,
571 | 4758141581628553e-31,4793045722637247e-31,4828246713412587e-31,
572 | 4863751549784512e-31,489956744788614e-30,4935701854138577e-31,
573 | 4972162455791703e-31,5008957192059114e-31,5046094265888434e-31,
574 | 5083582156411624e-31,5121429632123542e-31,5159645764841062e-31,
575 | 5198239944499494e-31,5237221894847848e-31,5276601690109886e-31,
576 | 531638977268369e-30,535659697195905e-30,5397234524338979e-31,
577 | 5438314094559637e-31,547984779841163e-30,5521848226975234e-31,
578 | 5564328472492872e-31,5607302156013967e-31,5650783456960506e-31,
579 | 5694787144776348e-31,5739328612839635e-31,5784423914835991e-31,
580 | 5830089803810586e-31,5876343774140057e-31,5923204106690931e-31,
581 | 5970689917460091e-31,6018821210025236e-31,6067618932170007e-31,
582 | 6117105037089722e-31,616730254963062e-30,6218235638068533e-31,
583 | 6269929691993326e-31,6322411406934211e-31,6375708876439426e-31,
584 | 6429851692413595e-31,6484871054618903e-31,6540799890364481e-31,
585 | 6597672985544566e-31,6655527128343343e-31,6714401267106488e-31,
586 | 677433668409101e-30,6835377187051274e-31,6897569320906848e-31,
587 | 6960962602074885e-31,7025609778445959e-31,7091567118449584e-31,
588 | 7158894733208553e-31,7227656936438121e-31,7297922647529085e-31,
589 | 7369765844191243e-31,7443266072160415e-31,7518509020832513e-31,
590 | 7595587175337749e-31,7674600557578427e-31,7755657571215791e-31,
591 | 7838875968622858e-31,792438396157355e-30,8012321502113083e-31,
592 | 8102841765913146e-31,8196112877806125e-31,8292319928581809e-31,
593 | 8391667344146798e-31,849438168364877e-30,8600714963334941e-31,
594 | 8710948629387904e-31,882539833807214e-30,8944419748519865e-31,
595 | 9068415597131669e-31,9197844409811865e-31,9333231329422952e-31,
596 | 9475181706524984e-31,9624398345658476e-31,978170365478442e-30,
597 | 994806847238388e-30,1012465014428832e-30,10312843657756166e-31,
598 | 1051435160404455e-30,10731281954224043e-31,10966288068517408e-31,
599 | 1122277490935032e-30,11505212963006663e-31,11819635283304206e-31,
600 | 12174462832361815e-31,12581958069755114e-31,13060984107128082e-31,
601 | 13642786158057857e-31,14384889932178723e-31,15412190700064194e-31,
602 | 17091034077168055e-31};
603 |
604 | static double fe[ZIGGURAT_TABLE_SIZE] =
605 | { 1.0,.9381436808621746,.9004699299257464,.8717043323812036,
606 | .8477855006239896,.8269932966430503,.8084216515230084,.7915276369724956,
607 | .7759568520401156,.7614633888498963,.7478686219851951,.7350380924314235,
608 | .722867659593572,.711274760805076,.7001926550827882,.689566496117078,
609 | .6793505722647654,.6695063167319247,.6600008410789997,.650805833414571,
610 | .6418967164272661,.6332519942143661,.6248527387036659,.6166821809152077,
611 | .608725382079622,.6009689663652322,.5934009016917334,.586010318477268,
612 | .578787358602845,.5717230486648258,.5648091929124002,.5580382822625874,
613 | .5514034165406413,.5448982376724396,.5385168720028619,.5322538802630432,
614 | .5261042139836197,.5200631773682336,.5141263938147486,.5082897764106429,
615 | .5025495018413477,.49690198724154955,.49134386959403253,.4858719873418849,
616 | .4804833639304542,.4751751930373774,.46994482528396,.4647897562504262,
617 | .4597076156421377,.45469615747461545,.449753251162755,.4448768734145485,
618 | .4400651008423539,.4353161032156366,.43062813728845883,.42599954114303434,
619 | .4214287289976166,.4169141864330029,.4124544659971612,.4080481831520324,
620 | .4036940125305303,.3993906844752311,.39513698183329016,.3909317369847971,
621 | .38677382908413765,.38266218149600983,.3785957594095808,.37457356761590216,
622 | .370594648435146,.36665807978151416,.3627629733548178,.3589084729487498,
623 | .35509375286678746,.35131801643748334,.347580494621637,.3438804447045024,
624 | .34021714906678,.33658991402867755,.332998068761809,.3294409642641363,
625 | .3259179723935562,.3224284849560891,.31897191284495724,.31554768522712895,
626 | .31215524877417955,.3087940669345602,.30546361924459026,.3021634006756935,
627 | .2988929210155818,.2956517042812612,.2924392881618926,.28925522348967775,
628 | .2860990737370768,.28297041453878075,.27986883323697287,.27679392844851736,
629 | .27374530965280297,.27072259679906,.2677254199320448,.2647534188350622,
630 | .2618062426893629,.25888354974901623,.2559850070304154,.25311029001562946,
631 | .2502590823688623,.24743107566532763,.2446259691318921,.24184346939887721,
632 | .23908329026244918,.23634515245705964,.23362878343743335,.2309339171696274,
633 | .2282602939307167,.22560766011668407,.22297576805812017,.2203643758433595,
634 | .21777324714870053,.21520215107537868,.21265086199297828,.21011915938898826,
635 | .20760682772422204,.2051136562938377,.20263943909370902,.20018397469191127,
636 | .19774706610509887,.19532852067956322,.19292814997677132,.1905457696631954,
637 | .1881811994042543,.1858342627621971,.18350478709776746,.1811926034754963,
638 | .1788975465724783,.17661945459049488,.1743581691713535,.17211353531532006,
639 | .16988540130252766,.1676736186172502,.165478041874936,.16329852875190182,
640 | .16113493991759203,.1589871389693142,.15685499236936523,.15473836938446808,
641 | .15263714202744286,.1505511850010399,.1484803756438668,.14642459387834494,
642 | .14438372216063478,.1423576454324722,.14034625107486245,.1383494288635802,
643 | .13636707092642886,.13439907170221363,.13244532790138752,.13050573846833077,
644 | .12858020454522817,.12666862943751067,.12477091858083096,.12288697950954514,
645 | .12101672182667483,.11916005717532768,.11731689921155557,.11548716357863353,
646 | .11367076788274431,.1118676316700563,.11007767640518538,.1083008254510338,
647 | .10653700405000166,.10478613930657017,.10304816017125772,.10132299742595363,
648 | .09961058367063713,.0979108533114922,.0962237425504328,.09454918937605586,
649 | .09288713355604354,.09123751663104016,.08960028191003286,.08797537446727022,
650 | .08636274114075691,.08476233053236812,.08317409300963238,.08159798070923742,
651 | .0800339475423199,.07848194920160642,.0769419431704805,.07541388873405841,
652 | .07389774699236475,.07239348087570874,.07090105516237183,.06942043649872875,
653 | .0679515934219366,.06649449638533977,.06504911778675375,.06361543199980733,
654 | .062193415408540995,.06078304644547963,.059384305633420266,.05799717563120066,
655 | .05662164128374288,.05525768967669704,.05390531019604609,.05256449459307169,
656 | .05123523705512628,.04991753428270637,.0486113855733795,.04731679291318155,
657 | .04603376107617517,.04476229773294328,.04350241356888818,.042254122413316234,
658 | .04101744138041482,.039792391023374125,.03857899550307486,.03737728277295936,
659 | .03618728478193142,.03500903769739741,.03384258215087433,.032687963508959535,
660 | .03154523217289361,.030414443910466604,.029295660224637393,.028188948763978636,
661 | .0270943837809558,.026012046645134217,.024942026419731783,.02388442051155817,
662 | .02283933540638524,.02180688750428358,.020787204072578117,.019780424338009743,
663 | .01878670074469603,.01780620041091136,.016839106826039948,.015885621839973163,
664 | .014945968011691148,.014020391403181938,.013109164931254991,.012212592426255381,
665 | .011331013597834597,.010464810181029979,.00961441364250221,.008780314985808975,
666 | .00796307743801704,.007163353183634984,.006381905937319179,.005619642207205483,
667 | .004877655983542392,.004157295120833795,.003460264777836904,.002788798793574076,
668 | .0021459677437189063,.0015362997803015724,.0009672692823271745,.00045413435384149677};
669 |
670 |
671 | /*
672 | * Here is the guts of the algorithm. As Marsaglia and Tsang state the
673 | * algorithm in their paper
674 | *
675 | * 1) Calculate a random signed integer j and let i be the index
676 | * provided by the rightmost 8-bits of j
677 | * 2) Set x = j * w_i. If j < k_i return x
678 | * 3) If i = 0, then return x from the tail
679 | * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
680 | * 5) goto step 1
681 | *
682 | * Where f is the functional form of the distribution, which for a normal
683 | * distribution is exp(-0.5*x*x)
684 | */
685 |
686 | /* NOTE: This is identical to randmtzig_gv_randn() below except for the random number generation */
687 | double randmtzig_randn (dsfmt_t *dsfmt)
688 | {
689 | while (1)
690 | {
691 | /* arbitrary mantissa (selected by randi, with 1 bit for sign) */
692 | const randmtzig_uint64_t r = randi(dsfmt);
693 | const randmtzig_int64_t rabs=r>>1;
694 | const int idx = (int)(rabs&0xFF);
695 | const double x = ( r&1 ? -rabs : rabs) * wi[idx];
696 |
697 | if (rabs < (randmtzig_int64_t)ki[idx]) {
698 | return x; /* 99.3% of the time we return here 1st try */
699 | } else if (idx == 0) {
700 | /* As stated in Marsaglia and Tsang
701 | *
702 | * For the normal tail, the method of Marsaglia[5] provides:
703 | * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
704 | * then return r+x. Except that r+x is always in the positive
705 | * tail!!!! Any thing random might be used to determine the
706 | * sign, but as we already have r we might as well use it
707 | *
708 | * [PAK] but not the bottom 8 bits, since they are all 0 here!
709 | */
710 | double xx, yy;
711 | do {
712 | xx = - ZIGGURAT_NOR_INV_R * log (randu(dsfmt));
713 | yy = - log (randu(dsfmt));
714 | }
715 | while ( yy+yy <= xx*xx);
716 | return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
717 | } else if ((fi[idx-1] - fi[idx]) * randu(dsfmt) + fi[idx] < exp(-0.5*x*x)) {
718 | return x;
719 | }
720 |
721 | }
722 | }
723 |
724 | /* NOTE: This is identical to randmtzig_randn() above except for the random number generation */
725 | double randmtzig_gv_randn (void)
726 | {
727 | while (1)
728 | {
729 | /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */
730 | const randmtzig_uint64_t r = NRANDI;
731 | const randmtzig_int64_t rabs=r>>1;
732 | const int idx = (int)(rabs&0xFF);
733 | const double x = ( r&1 ? -rabs : rabs) * wi[idx];
734 |
735 | if (rabs < (randmtzig_int64_t)ki[idx]) {
736 | return x; /* 99.3% of the time we return here 1st try */
737 | } else if (idx == 0) {
738 | /* As stated in Marsaglia and Tsang
739 | *
740 | * For the normal tail, the method of Marsaglia[5] provides:
741 | * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
742 | * then return r+x. Except that r+x is always in the positive
743 | * tail!!!! Any thing random might be used to determine the
744 | * sign, but as we already have r we might as well use it
745 | *
746 | * [PAK] but not the bottom 8 bits, since they are all 0 here!
747 | */
748 | double xx, yy;
749 | do {
750 | xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
751 | yy = - log (RANDU);
752 | }
753 | while ( yy+yy <= xx*xx);
754 | return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
755 | } else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) {
756 | return x;
757 | }
758 |
759 | }
760 | }
761 |
762 | double randmtzig_gv_exprnd (void)
763 | {
764 | while (1)
765 | {
766 | ZIGINT ri = ERANDI;
767 | const int idx = (int)(ri & 0xFF);
768 | const double x = ri * we[idx];
769 | if (ri < ke[idx])
770 | return x; // 98.9% of the time we return here 1st try
771 | else if (idx == 0)
772 | {
773 | /* As stated in Marsaglia and Tsang
774 | *
775 | * For the exponential tail, the method of Marsaglia[5] provides:
776 | * x = r - ln(U);
777 | */
778 | return ZIGGURAT_EXP_R - log(RANDU);
779 | }
780 | else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp(-x))
781 | return x;
782 | }
783 | }
784 |
785 | #ifdef STANDALONE
786 |
787 | int main(int ac, char *av[]) {
788 | if (ac == 1) {
789 | printf("Usage: randmtzig \n");
790 | return (-1);
791 | }
792 |
793 | int n = atoi(av[1]);
794 | time_t t1;
795 |
796 | dsfmt_gv_init_gen_rand(0);
797 |
798 | double *p; posix_memalign((void **)&p, 16, n*sizeof(double));
799 | uint32_t *u; posix_memalign((void **)&u, 16, 2*n*sizeof(uint32_t));
800 |
801 | t1 = clock();
802 | dsfmt_gv_fill_array_close_open(p, n);
803 | printf("Uniform fill (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
804 |
805 | t1 = clock();
806 | for (int i = 0; i < n; i++) p[i] = dsfmt_gv_genrand_close_open();
807 | printf("Uniform (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
808 |
809 | t1 = clock();
810 | for (int i = 0; i < 2*n; i++) u[i] = dsfmt_gv_genrand_uint32();
811 | printf("Uniform 32-bit ints (2*n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
812 |
813 | memset((void *)p, 0, n*sizeof(double));
814 | t1 = clock();
815 | for (int i = 0; i < n; i++) p[i] = randmtzig_gv_randn();
816 | printf("Normal (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC);
817 | for (int i = 0; i < 10; i++) printf("%lf\n", p[i]);
818 |
819 | return 0;
820 | }
821 |
822 | #endif
823 |
--------------------------------------------------------------------------------
/rust/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | **/*.rs.bk
3 |
--------------------------------------------------------------------------------
/rust/Cargo.lock:
--------------------------------------------------------------------------------
1 | [[package]]
2 | name = "bitflags"
3 | version = "1.0.1"
4 | source = "registry+https://github.com/rust-lang/crates.io-index"
5 |
6 | [[package]]
7 | name = "blas-src"
8 | version = "0.1.3"
9 | source = "registry+https://github.com/rust-lang/crates.io-index"
10 | dependencies = [
11 | "openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
12 | ]
13 |
14 | [[package]]
15 | name = "cblas"
16 | version = "0.1.5"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | dependencies = [
19 | "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
20 | "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
21 | "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
22 | ]
23 |
24 | [[package]]
25 | name = "cblas-sys"
26 | version = "0.1.4"
27 | source = "registry+https://github.com/rust-lang/crates.io-index"
28 | dependencies = [
29 | "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
30 | ]
31 |
32 | [[package]]
33 | name = "either"
34 | version = "1.5.0"
35 | source = "registry+https://github.com/rust-lang/crates.io-index"
36 |
37 | [[package]]
38 | name = "fuchsia-zircon"
39 | version = "0.3.3"
40 | source = "registry+https://github.com/rust-lang/crates.io-index"
41 | dependencies = [
42 | "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
43 | "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
44 | ]
45 |
46 | [[package]]
47 | name = "fuchsia-zircon-sys"
48 | version = "0.3.3"
49 | source = "registry+https://github.com/rust-lang/crates.io-index"
50 |
51 | [[package]]
52 | name = "itertools"
53 | version = "0.7.8"
54 | source = "registry+https://github.com/rust-lang/crates.io-index"
55 | dependencies = [
56 | "either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
57 | ]
58 |
59 | [[package]]
60 | name = "julia-bench"
61 | version = "0.1.0"
62 | dependencies = [
63 | "blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
64 | "cblas 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
65 | "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
66 | "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)",
67 | "mersenne_twister 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
68 | "ndarray 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)",
69 | "num 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
70 | "openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
71 | "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
72 | ]
73 |
74 | [[package]]
75 | name = "libc"
76 | version = "0.2.40"
77 | source = "registry+https://github.com/rust-lang/crates.io-index"
78 |
79 | [[package]]
80 | name = "matrixmultiply"
81 | version = "0.1.14"
82 | source = "registry+https://github.com/rust-lang/crates.io-index"
83 | dependencies = [
84 | "rawpointer 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
85 | ]
86 |
87 | [[package]]
88 | name = "mersenne_twister"
89 | version = "1.1.1"
90 | source = "registry+https://github.com/rust-lang/crates.io-index"
91 | dependencies = [
92 | "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
93 | ]
94 |
95 | [[package]]
96 | name = "ndarray"
97 | version = "0.11.2"
98 | source = "registry+https://github.com/rust-lang/crates.io-index"
99 | dependencies = [
100 | "blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
101 | "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
102 | "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)",
103 | "matrixmultiply 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
104 | "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
105 | "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
106 | ]
107 |
108 | [[package]]
109 | name = "num"
110 | version = "0.1.42"
111 | source = "registry+https://github.com/rust-lang/crates.io-index"
112 | dependencies = [
113 | "num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
114 | "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
115 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
116 | "num-iter 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
117 | "num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
118 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
119 | ]
120 |
121 | [[package]]
122 | name = "num-bigint"
123 | version = "0.1.43"
124 | source = "registry+https://github.com/rust-lang/crates.io-index"
125 | dependencies = [
126 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
127 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
128 | "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
129 | "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
130 | ]
131 |
132 | [[package]]
133 | name = "num-complex"
134 | version = "0.1.43"
135 | source = "registry+https://github.com/rust-lang/crates.io-index"
136 | dependencies = [
137 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
138 | "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
139 | ]
140 |
141 | [[package]]
142 | name = "num-integer"
143 | version = "0.1.36"
144 | source = "registry+https://github.com/rust-lang/crates.io-index"
145 | dependencies = [
146 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
147 | ]
148 |
149 | [[package]]
150 | name = "num-iter"
151 | version = "0.1.35"
152 | source = "registry+https://github.com/rust-lang/crates.io-index"
153 | dependencies = [
154 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
155 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
156 | ]
157 |
158 | [[package]]
159 | name = "num-rational"
160 | version = "0.1.42"
161 | source = "registry+https://github.com/rust-lang/crates.io-index"
162 | dependencies = [
163 | "num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
164 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
165 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
166 | "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
167 | ]
168 |
169 | [[package]]
170 | name = "num-traits"
171 | version = "0.1.43"
172 | source = "registry+https://github.com/rust-lang/crates.io-index"
173 | dependencies = [
174 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
175 | ]
176 |
177 | [[package]]
178 | name = "num-traits"
179 | version = "0.2.2"
180 | source = "registry+https://github.com/rust-lang/crates.io-index"
181 |
182 | [[package]]
183 | name = "openblas-src"
184 | version = "0.5.6"
185 | source = "registry+https://github.com/rust-lang/crates.io-index"
186 |
187 | [[package]]
188 | name = "rand"
189 | version = "0.4.2"
190 | source = "registry+https://github.com/rust-lang/crates.io-index"
191 | dependencies = [
192 | "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
193 | "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
194 | "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
195 | ]
196 |
197 | [[package]]
198 | name = "rawpointer"
199 | version = "0.1.0"
200 | source = "registry+https://github.com/rust-lang/crates.io-index"
201 |
202 | [[package]]
203 | name = "rustc-serialize"
204 | version = "0.3.24"
205 | source = "registry+https://github.com/rust-lang/crates.io-index"
206 |
207 | [[package]]
208 | name = "winapi"
209 | version = "0.3.4"
210 | source = "registry+https://github.com/rust-lang/crates.io-index"
211 | dependencies = [
212 | "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
213 | "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
214 | ]
215 |
216 | [[package]]
217 | name = "winapi-i686-pc-windows-gnu"
218 | version = "0.4.0"
219 | source = "registry+https://github.com/rust-lang/crates.io-index"
220 |
221 | [[package]]
222 | name = "winapi-x86_64-pc-windows-gnu"
223 | version = "0.4.0"
224 | source = "registry+https://github.com/rust-lang/crates.io-index"
225 |
226 | [metadata]
227 | "checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf"
228 | "checksum blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8d3a12d382bd4c40f95c105f1a7074a18bdb0ee140ddb73f6d924a4f7d333bc9"
229 | "checksum cblas 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ce45c2223361cc6077b505f4d203e3b9494d746f39dfbf7627bbcb5aa7f0a13a"
230 | "checksum cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b6feecd82cce51b0204cf063f0041d69f24ce83f680d87514b004248e7b0fa65"
231 | "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0"
232 | "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
233 | "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
234 | "checksum itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f58856976b776fedd95533137617a02fb25719f40e7d9b01c7043cd65474f450"
235 | "checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b"
236 | "checksum matrixmultiply 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "cac1a66eab356036af85ea093101a14223dc6e3f4c02a59b7d572e5b93270bf7"
237 | "checksum mersenne_twister 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b85dbb2f68dfc026aac8f4c5196579896b10ee45e8b9a1a3b325fab3043d1cb0"
238 | "checksum ndarray 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0e3d24c5ba54015d7d5203ca6f00d4cc16c71042bf7f7be26f091236f390a16a"
239 | "checksum num 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "4703ad64153382334aa8db57c637364c322d3372e097840c72000dabdcf6156e"
240 | "checksum num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "81b483ea42927c463e191802e7334556b48e7875297564c0e9951bd3a0ae53e3"
241 | "checksum num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "b288631d7878aaf59442cffd36910ea604ecd7745c36054328595114001c9656"
242 | "checksum num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "f8d26da319fb45674985c78f1d1caf99aa4941f785d384a2ae36d0740bc3e2fe"
243 | "checksum num-iter 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "4b226df12c5a59b63569dd57fafb926d91b385dfce33d8074a412411b689d593"
244 | "checksum num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "ee314c74bd753fc86b4780aa9475da469155f3848473a261d2d18e35245a784e"
245 | "checksum num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
246 | "checksum num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dee092fcdf725aee04dd7da1d21debff559237d49ef1cb3e69bcb8ece44c7364"
247 | "checksum openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "68d293fca3c73ad377ddd2236d32c828a50a611a5b472bf6a884b9b60a4acd97"
248 | "checksum rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eba5f8cb59cc50ed56be8880a5c7b496bfd9bd26394e176bc67884094145c2c5"
249 | "checksum rawpointer 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ebac11a9d2e11f2af219b8b8d833b76b1ea0e054aa0e8d8e9e4cbde353bdf019"
250 | "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
251 | "checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3"
252 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
253 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
254 |
--------------------------------------------------------------------------------
/rust/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "julia-bench"
3 | publish = false
4 | version = "0.1.0"
5 |
6 | [dependencies]
7 | itertools = "0.7.1"
8 | mersenne_twister = "1.1.1"
9 | num = "0.1.37"
10 | rand = "0.4.2"
11 |
12 | [dependencies.cblas]
13 | version = "0.1.2"
14 | optional = true
15 |
16 | [dependencies.cblas-sys]
17 | version = "0.1.4"
18 |
19 | [dependencies.ndarray]
20 | features = ["blas"]
21 | version = "0.11.1"
22 |
23 | [dependencies.blas-src]
24 | features = ["openblas"]
25 | default-features = false
26 | version = "0.1.2"
27 |
28 | [dependencies.openblas-src]
29 | features = ["cblas", "system"]
30 | default-features = false
31 | version = "0.5.6"
32 |
33 | [features]
34 | default = []
35 | direct_blas = ["cblas"]
36 |
--------------------------------------------------------------------------------
/rust/rust-toolchain:
--------------------------------------------------------------------------------
1 | nightly-2018-04-16
2 |
--------------------------------------------------------------------------------
/rust/src/direct_blas.rs:
--------------------------------------------------------------------------------
1 | #![allow(unsafe_code)]
2 |
3 | use rand::Rng;
4 | use std::iter::Sum;
5 | use util::{gen_rng, fill_rand, myrand};
6 | use cblas::{dgemm, Layout, Transpose};
7 | use itertools::Itertools;
8 |
9 | pub fn randmatstat(t: usize) -> (f64, f64) {
10 | let mut rng = gen_rng(1234u64);
11 |
12 | let n = 5;
13 |
14 | let mut v = vec![0.; t];
15 | let mut w = vec![0.; t];
16 |
17 | {
18 | let mut a = vec![0.; n * n];
19 | let mut b = vec![0.; n * n];
20 | let mut c = vec![0.; n * n];
21 | let mut d = vec![0.; n * n];
22 | let mut p = vec![0.; (n) * (4 * n)];
23 | let mut q = vec![0.; (2 * n) * (2 * n)];
24 |
25 | let mut pt_p1 = vec![0.; (4 * n) * (4 * n)];
26 | let mut pt_p2 = vec![0.; (4 * n) * (4 * n)];
27 | let mut qt_q1 = vec![0.; (2 * n) * (2 * n)];
28 | let mut qt_q2 = vec![0.; (2 * n) * (2 * n)];
29 |
30 | for (ve, we) in v.iter_mut().zip(w.iter_mut()) {
31 | fill_rand(&mut a, &mut rng);
32 | fill_rand(&mut b, &mut rng);
33 | fill_rand(&mut c, &mut rng);
34 | fill_rand(&mut d, &mut rng);
35 |
36 | p[0 .. n * n].copy_from_slice(&a);
37 | p[n * n .. 2 * n * n].copy_from_slice(&b);
38 | p[2 * n * n .. 3 * n * n].copy_from_slice(&c);
39 | p[3 * n * n .. 4 * n * n].copy_from_slice(&d);
40 |
41 | for j in 0..n {
42 | for k in 0..n {
43 | q[2 * n * j + k] = a[k];
44 | q[2 * n * j + n + k] = b[k];
45 | q[2 * n * (n + j) + k] = c[k];
46 | q[2 * n * (n + j) + n + k] = d[k];
47 | }
48 | }
49 |
50 | unsafe {
51 | let n = n as i32;
52 |
53 | dgemm(Layout::ColumnMajor, Transpose::Ordinary, Transpose::None,
54 | n , n, 4 * n, 1., &p, 4 * n, &p, 4 * n, 0.,
55 | &mut pt_p1, 4 * n);
56 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
57 | 4 * n, 4 * n, 4 * n, 1., &pt_p1, 4 * n, &pt_p1, 4 * n, 0.,
58 | &mut pt_p2, 4 * n);
59 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
60 | 4 * n, 4 * n, 4 * n, 1., &pt_p2, 4 * n, &pt_p2, 4 * n, 0.,
61 | &mut pt_p1, 4 * n);
62 | }
63 |
64 | *ve = trace(&pt_p1, n * 4);
65 |
66 | unsafe {
67 | let n = n as i32;
68 |
69 | dgemm(Layout::ColumnMajor, Transpose::Ordinary, Transpose::None,
70 | 2 * n, 2 * n, 2 * n, 1., &q, 2 * n, &q, 2 * n, 0.,
71 | &mut qt_q1, 2 * n);
72 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
73 | 2 * n, 2 * n, 2 * n, 1., &qt_q1, 2 * n, &qt_q1, 2 * n, 0.,
74 | &mut qt_q2, 2 * n);
75 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
76 | 2 * n, 2 * n, 2 * n, 1., &qt_q2, 2 * n, &qt_q2, 2 * n, 0.,
77 | &mut qt_q1, 2 * n);
78 | }
79 |
80 | *we = trace(&qt_q1, 2 * n);
81 | }
82 | }
83 |
84 | let (v1, v2, w1, w2) = v.iter()
85 | .zip(w.iter())
86 | .fold((0., 0., 0., 0.), |(v1, v2, w1, w2), (ve, we)| (
87 | v1 + *ve,
88 | v2 + ve * ve,
89 | w1 + *we,
90 | w2 + we * we
91 | ));
92 |
93 | let t = t as f64;
94 |
95 | (
96 | f64::sqrt((t * (t * v2 - v1 * v1)) / ((t - 1.) * v1 * v1)),
97 | f64::sqrt((t * (t * w2 - w1 * w1)) / ((t - 1.) * w1 * w1)),
98 | )
99 | }
100 |
101 | /// Calculate the trace of a square matrix
102 | #[inline]
103 | fn trace<'a, T>(m: &'a [T], n: usize) -> T
104 | where
105 | T: Sum<&'a T>
106 | {
107 | debug_assert_eq!(m.len(), n * n);
108 | m.into_iter().step(n + 1).sum()
109 | }
110 |
111 | pub fn randmatmul(n: usize, mut rng: R) -> Vec {
112 | let a = myrand(n * n, &mut rng);
113 | let b = myrand(n * n, &mut rng);
114 | let mut c = vec![0.; n * n];
115 |
116 | unsafe {
117 | let n = n as i32;
118 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None,
119 | n, n, n, 1., &a, n, &b, n, 0., &mut c, n);
120 | }
121 |
122 | c
123 | }
124 |
125 | #[inline]
126 | pub fn check_randmatmul(m: Vec) {
127 | assert!(0. <= m[0]);
128 | }
129 |
--------------------------------------------------------------------------------
/rust/src/main.rs:
--------------------------------------------------------------------------------
1 | #![feature(test)]
2 | #![deny(unsafe_code)]
3 |
4 | extern crate itertools;
5 | extern crate mersenne_twister;
6 | extern crate num;
7 | extern crate rand;
8 | extern crate test;
9 |
10 | // Use BLAS directly
11 | #[cfg(feature = "direct_blas")]
12 | extern crate cblas;
13 |
14 | #[cfg(feature = "direct_blas")]
15 | extern crate blas_src;
16 |
17 | // Use ndarray (with BLAS implementation)
18 | #[cfg(not(feature = "direct_blas"))]
19 | #[macro_use(s)]
20 | extern crate ndarray;
21 |
22 | use std::time::{Duration, Instant};
23 | use std::u32;
24 | use std::fs::OpenOptions;
25 | use std::io::{BufWriter, Write};
26 |
27 | use test::black_box;
28 | use num::complex::Complex64;
29 | use rand::Rng;
30 |
31 | mod util;
32 | use util::{gen_rng, myrand};
33 |
34 | #[cfg(feature = "direct_blas")]
35 | mod direct_blas;
36 | #[cfg(feature = "direct_blas")]
37 | use direct_blas::{randmatstat, randmatmul, check_randmatmul};
38 |
39 | #[cfg(not(feature = "direct_blas"))]
40 | use ndarray::Array2;
41 | #[cfg(not(feature = "direct_blas"))]
42 | use util::fill_rand;
43 | #[cfg(not(feature = "direct_blas"))]
44 | use num::Zero;
45 |
46 | const NITER: u32 = 5;
47 |
48 | #[cfg(not(feature = "direct_blas"))]
49 | fn nrand(shape: (usize, usize), rng: &mut R) -> Array2 {
50 | let mut m = Array2::zeros(shape);
51 | fill_rand(&mut m, rng);
52 | m
53 | }
54 |
55 | fn fib(n: i32) -> i32 {
56 | let n = black_box(n); // prevent over-optimization
57 | if n < 2 {
58 | n
59 | } else {
60 | fib(n - 1) + fib(n - 2)
61 | }
62 | }
63 |
64 | fn mandel(z: Complex64) -> u32 {
65 | use std::iter;
66 |
67 | iter::repeat(z)
68 | .scan(z, |z, c| {
69 | let current = *z;
70 | *z = current * current + c;
71 | Some(current)
72 | })
73 | .take(80)
74 | .take_while(|z| z.norm_sqr() <= 4.0)
75 | .count() as u32
76 | }
77 |
78 | fn mandelperf() -> Vec {
79 | (-10..=10).flat_map(|i| (-20..=5).map(move |j| (i, j)))
80 | .map(|(i, j)| (j as f64 / 10., i as f64 / 10.))
81 | .map(|(re, im)| mandel(Complex64::new(re, im)))
82 | .collect()
83 | }
84 |
85 | fn pisum() -> f64 {
86 | let mut sum = 0.;
87 | for _ in 0..500 {
88 | sum = (1..10001)
89 | .map(|k| {
90 | let k = k as f64;
91 | 1. / (k * k)
92 | })
93 | .sum();
94 | }
95 | sum
96 | }
97 |
98 | #[cfg(not(feature = "direct_blas"))]
99 | fn randmatstat(t: usize) -> (f64, f64) {
100 | let mut rng = gen_rng(1234u64);
101 |
102 | let n = 5;
103 |
104 | let mut v = vec![0.; t];
105 | let mut w = vec![0.; t];
106 |
107 | for (ve, we) in v.iter_mut().zip(w.iter_mut()) {
108 | let a = nrand((n, n), &mut rng);
109 | let b = nrand((n, n), &mut rng);
110 | let c = nrand((n, n), &mut rng);
111 | let d = nrand((n, n), &mut rng);
112 | let p = { // P = [a b c d]
113 | let mut p = Array2::::zeros((n, 4 * n));
114 | let n = n as isize;
115 | p.slice_mut(s![.., 0..n]).assign(&a);
116 | p.slice_mut(s![.., n..2*n]).assign(&b);
117 | p.slice_mut(s![.., 2*n..3*n]).assign(&c);
118 | p.slice_mut(s![.., 3*n..4*n]).assign(&d);
119 | p
120 | };
121 | let q = { // Q = [a b ; c d]
122 | let mut q = Array2::::zeros((2 * n, 2 * n));
123 | let n = n as isize;
124 | q.slice_mut(s![0..n, 0..n]).assign(&a);
125 | q.slice_mut(s![0..n, n..2*n]).assign(&b);
126 | q.slice_mut(s![n..2*n, 0..n]).assign(&c);
127 | q.slice_mut(s![n..2*n, n..2*n]).assign(&d);
128 | q
129 | };
130 |
131 | let pt = p.t();
132 | let ptp = pt.dot(&p);
133 | let ptp2 = ptp.dot(&ptp);
134 | let ptp4 = ptp2.dot(&ptp2);
135 | *ve = trace_arr(&ptp4);
136 |
137 | let qt = q.t();
138 | let ptq = qt.dot(&q);
139 | let ptq2 = ptq.dot(&ptq);
140 | let ptq4 = ptq2.dot(&ptq2);
141 | *we = trace_arr(&ptq4);
142 | }
143 |
144 | let (v1, v2, w1, w2) = v.iter()
145 | .zip(w.iter())
146 | .fold((0., 0., 0., 0.), |(v1, v2, w1, w2), (ve, we)| (
147 | v1 + *ve,
148 | v2 + ve * ve,
149 | w1 + *we,
150 | w2 + we * we
151 | ));
152 |
153 | let t = t as f64;
154 |
155 | (
156 | f64::sqrt((t * (t * v2 - v1 * v1)) / ((t - 1.) * v1 * v1)),
157 | f64::sqrt((t * (t * w2 - w1 * w1)) / ((t - 1.) * w1 * w1)),
158 | )
159 | }
160 |
161 | /// Calculate the trace of a square matrix
162 | #[cfg(not(feature = "direct_blas"))]
163 | #[inline]
164 | fn trace_arr<'a, T: 'a>(m: &'a Array2) -> T
165 | where
166 | T: Zero + Clone
167 | {
168 | m.diag().scalar_sum()
169 | }
170 |
171 | #[cfg(not(feature = "direct_blas"))]
172 | fn randmatmul(n: usize, mut rng: R) -> Array2 {
173 | let a = nrand((n, n), &mut rng);
174 | let b = nrand((n, n), &mut rng);
175 |
176 | a.dot(&b)
177 | }
178 |
179 | #[cfg(not(feature = "direct_blas"))]
180 | #[inline]
181 | fn check_randmatmul(m: Array2) {
182 | assert!(0. <= m[[0, 0]]);
183 | }
184 |
185 | #[test]
186 | fn test_quicksort() {
187 | let mut a = [10., 9., 8., 7., 6., 5., 4., 3., 2., 1.];
188 | quicksort(a.as_mut(), 0);
189 | assert_eq!(a, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]);
190 |
191 | let mut a = [8., 2., 10., 4., 7., 6., 9., 5., 1., 3.];
192 | quicksort(a.as_mut(), 0);
193 | assert_eq!(a, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]);
194 | }
195 |
196 | fn quicksort(a: &mut [f64], mut lo: usize) {
197 | let hi = a.len() as usize - 1;
198 | let mut i: usize = lo;
199 | // j is isize because it can be -1
200 | let mut j: isize = hi as isize;
201 |
202 | while i < hi {
203 | let pivot = a[(lo + hi) / 2];
204 | while i as isize <= j {
205 | while a[i] < pivot {
206 | i += 1;
207 | }
208 | while a[j as usize] > pivot {
209 | j -= 1;
210 | }
211 | if i as isize <= j {
212 | a.swap(i, j as usize);
213 | i += 1;
214 | j -= 1;
215 | }
216 | }
217 |
218 | let (l, _r) = a.split_at_mut((j + 1) as usize);
219 |
220 | if (lo as isize) < j {
221 | quicksort(l, lo);
222 | }
223 |
224 | lo = i;
225 | j = hi as isize;
226 | }
227 | }
228 |
229 | fn printfd(n: usize) {
230 | let f = OpenOptions::new()
231 | .write(true).open("/dev/null").unwrap();
232 | let mut f = BufWriter::new(f);
233 | for i in 0..n {
234 | writeln!(f, "{} {}", i, i).unwrap();
235 | }
236 | }
237 |
238 | fn print_perf(name: &str, t: f64) {
239 | println!("rust,{},{:.6}", name, t * 1000.);
240 | }
241 |
242 | /// convert duration to float in seconds
243 | fn to_float(d: Duration) -> f64 {
244 | d.as_secs() as f64 + d.subsec_nanos() as f64 / 1e9
245 | }
246 |
247 | #[inline]
248 | fn measure_best(niters: u32, mut op: F) -> Duration {
249 | (0..niters)
250 | .map(move |_| {
251 | let t = Instant::now();
252 | op();
253 | t.elapsed()
254 | }).min().unwrap()
255 | }
256 |
257 | fn main() {
258 | // initialize RNG
259 | let mut rng = gen_rng(0);
260 |
261 | // fib(20)
262 | assert_eq!(fib(20), 6765);
263 | let mut f = 0i32;
264 | let fibarg = 20;
265 | let tmin = measure_best(NITER, || {
266 | for _ in 0..1000 {
267 | f = f.wrapping_add(fib(fibarg));
268 | }
269 | });
270 | print_perf("recursion_fibonacci", to_float(tmin) / 1000.0);
271 |
272 | // parse_int
273 | let tmin = measure_best(NITER, || {
274 | for _ in 0..1000 * 100 {
275 | let n: u32 = rng.gen();
276 | let s = format!("{:x}", n);
277 | let m = u32::from_str_radix(&s, 16).unwrap();
278 | assert_eq!(m, n);
279 | }
280 | });
281 | print_perf("parse_integers", to_float(tmin) / 100.0);
282 |
283 | let mandel_sum_init = black_box(0u32);
284 | let mut mandel_sum2 = mandel_sum_init;
285 | let tmin = measure_best(NITER, || {
286 | for j in 0..100 {
287 | let m = mandelperf();
288 | if j == 0 {
289 | let mandel_sum: u32 = m.iter().sum();
290 | assert_eq!(mandel_sum, 14791);
291 | mandel_sum2 += mandel_sum;
292 | }
293 | }
294 | });
295 | assert_eq!(mandel_sum2, 14791 * NITER);
296 | print_perf("userfunc_mandelbrot", to_float(tmin) / 100.0);
297 |
298 | // sort
299 | let tmin = measure_best(NITER, || {
300 | let mut d = myrand(5000, &mut rng);
301 | quicksort(&mut d, 0);
302 | });
303 | print_perf("recursion_quicksort", to_float(tmin));
304 |
305 | // pi sum
306 | let mut pi = 0.;
307 | let tmin = measure_best(NITER, || {
308 | pi = black_box(pisum());
309 | });
310 | assert!(f64::abs(pi - 1.644834071848065) < 1e-12);
311 | print_perf("iteration_pi_sum", to_float(tmin));
312 |
313 | // rand mat stat
314 | let mut r = (0., 0.);
315 | let tmin = measure_best(NITER, || {
316 | r = black_box(randmatstat(1000));
317 | });
318 | print_perf("matrix_statistics", to_float(tmin));
319 |
320 | // rand mat mul
321 | let tmin = measure_best(NITER, || {
322 | let c = randmatmul(1000, &mut rng);
323 | check_randmatmul(c);
324 | });
325 | print_perf("matrix_multiply", to_float(tmin));
326 |
327 | // printfd
328 | let tmin = measure_best(NITER, || {
329 | printfd(100000);
330 | });
331 | print_perf("print_to_file", to_float(tmin));
332 | }
333 |
--------------------------------------------------------------------------------
/rust/src/util.rs:
--------------------------------------------------------------------------------
1 | use rand::{Rand, Rng, SeedableRng};
2 |
3 | use mersenne_twister::MT19937_64;
4 | pub type MTRng = MT19937_64;
5 |
6 | #[inline]
7 | pub fn gen_rng(seed: u64) -> MTRng {
8 | MTRng::from_seed(seed)
9 | }
10 |
11 | pub fn fill_rand<'a, I, T: 'a, R>(a: I, rng: &mut R)
12 | where
13 | I: IntoIterator- ,
14 | T: Rand,
15 | R: Rng,
16 | {
17 | for v in a.into_iter() {
18 | *v = rng.gen();
19 | }
20 | }
21 |
22 | pub fn myrand(n: usize, rng: &mut R) -> Vec {
23 | let mut d: Vec = vec![0.; n];
24 | fill_rand(&mut d, rng);
25 | d
26 | }
27 |
--------------------------------------------------------------------------------
/scala/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | *.log
3 |
4 | # sbt specific
5 | .cache
6 | .history
7 | .lib/
8 | dist/*
9 | target/
10 | lib_managed/
11 | src_managed/
12 | project/boot/
13 | project/plugins/project/
14 |
15 | # Scala-IDE specific
16 | .scala_dependencies
17 | .worksheet
18 |
--------------------------------------------------------------------------------
/scala/build.sbt:
--------------------------------------------------------------------------------
1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license
2 |
3 | libraryDependencies ++= Seq(
4 | "org.scalanlp" %% "breeze" % "0.10",
5 | "org.scalanlp" %% "breeze-natives" % "0.10"
6 | )
7 |
8 | resolvers ++= Seq(
9 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
10 | )
11 |
12 | scalaVersion := "2.11.1"
13 |
14 | showSuccess := false
15 |
16 | onLoadMessage := ""
17 |
18 | logLevel := Level.Warn
19 |
--------------------------------------------------------------------------------
/scala/src/main/scala/perf.scala:
--------------------------------------------------------------------------------
1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license
2 |
3 | import scala.util._
4 | import java.io._
5 | import breeze.linalg._
6 | import breeze.numerics._
7 | import breeze.stats._
8 | import breeze.math._
9 | //import com.github.fommil.netlib.{BLAS}
10 |
11 | object PerfBreeze {
12 | final val NITER = 5
13 |
14 | // print results appropriately. times are in milliseconds
15 | def print_perf(name:String, t:Double) = {
16 | printf("scala,%s,%.9f\n", name, t/1e6)
17 | }
18 |
19 | // time fib
20 | def fib(n:Int):Int = {
21 | if (n < 2) n else fib(n-1) + fib(n-2)
22 | }
23 |
24 | def time_fib() = {
25 | assert(fib(20) == 6765)
26 | var tmin = Long.MaxValue
27 | var f = 0
28 |
29 | for(i <- 1 to NITER) {
30 | val t1 = System.nanoTime()
31 | for(j <- 1 to 1000) {
32 | f += fib(20)
33 | }
34 | val t = System.nanoTime() - t1
35 | if(t < tmin) tmin = t
36 | }
37 |
38 | tmin / 1000.0
39 | }
40 |
41 | // time parseint
42 | def time_parseint() = {
43 | val generator = scala.util.Random
44 | var tmin = Long.MaxValue
45 |
46 | for(i <- 1 to NITER) {
47 | var rand:Int = 0
48 | var rands:String = "0"
49 | var parsed:Int = 0
50 | val t1 = System.nanoTime()
51 | for(j <- 1 to 1000) {
52 | rand = generator.nextInt()
53 | rands = if(rand < 0) "-" + abs(rand).toHexString else rand.toHexString
54 | parsed = Integer.parseInt(rands, 16)
55 | assert(rand == parsed)
56 | }
57 | val t = System.nanoTime() - t1
58 | if(t < tmin) tmin = t
59 | }
60 | tmin / 1000.0
61 | }
62 |
63 | // time mandel
64 | def mandel(zin:Complex):Int = {
65 | val c = zin
66 | var z = zin
67 | val maxiter = 80
68 | for(n <- 0 to maxiter) {
69 | if(z.abs > 2) return n
70 | z = c + (z * z)
71 | }
72 | maxiter
73 | }
74 |
75 | def mandelperf() = {
76 | for(re <- -20 to 5; im <- -10 to 10) yield mandel(re/10.0 + i * im/10.0)
77 | }
78 |
79 | def time_mandel() = {
80 | var mandel_sum = 0
81 | var mandel_sum2 = 0
82 | var tmin = Long.MaxValue
83 |
84 | for(i <- 1 to NITER) {
85 | val t1 = System.nanoTime()
86 | for(j <- 1 to 100) {
87 | val mandel_arr = mandelperf()
88 | if(j == 1) {
89 | mandel_sum = sum(mandel_arr)
90 | mandel_sum2 += mandel_sum
91 | }
92 | }
93 | val t = System.nanoTime() - t1
94 | if(t < tmin) tmin = t
95 | }
96 | assert(mandel_sum == 14791)
97 | assert(mandel_sum2 == mandel_sum * NITER)
98 | tmin / 100.0
99 | }
100 |
101 | // time quicksort
102 | def quicksort(a:Array[Double], lo:Int, hi:Int):Array[Double] = {
103 | var i, l = lo
104 | var j = hi
105 |
106 | def _swap(i:Int, j:Int) = {
107 | val tmp = a(i)
108 | a(i) = a(j)
109 | a(j) = tmp
110 | }
111 |
112 | while(i < hi) {
113 | val pivot = a((l+hi)>>>1)
114 | while(i <= j) {
115 | while(a(i) < pivot) i += 1
116 | while(a(j) > pivot) j -= 1
117 | if(i <= j) {
118 | _swap(i, j)
119 | i += 1
120 | j -= 1
121 | }
122 | }
123 | if(l < j) quicksort(a, l, j)
124 | l = j
125 | j = hi
126 | }
127 | a
128 | }
129 |
130 | /*
131 | def checksorted(a:Array[Double]):Boolean = {
132 | for(i <- 0 to a.length-2) {
133 | assert(a(i) < a(i+1))
134 | }
135 | true
136 | }
137 | */
138 |
139 | def time_quicksort() = {
140 | var tmin = Long.MaxValue
141 |
142 | for(i <- 1 to NITER) {
143 | val t1 = System.nanoTime()
144 | for(j <- 1 to 1000) {
145 | val A = DenseVector.rand[Double](5000)
146 | quicksort(A.data, 0, 4999)
147 | }
148 | val t = System.nanoTime() - t1
149 | if(t < tmin) tmin = t
150 | }
151 | tmin / 1000.0
152 | }
153 |
154 | // time pisum
155 | def pisum() = {
156 | var sum = 0.0
157 | for(j <- 1 to 500) {
158 | sum = 0.0
159 | for(k <- 1 to 10000) {
160 | sum += 1.0/(k*k)
161 | }
162 | }
163 | sum
164 | }
165 |
166 | def time_pisum() = {
167 | var tmin = Long.MaxValue
168 | var pi = 0:Double
169 | for(i <- 1 to NITER) {
170 | val t1 = System.nanoTime()
171 | pi = pisum()
172 | val t = System.nanoTime() - t1
173 | if(t < tmin) tmin = t
174 | assert(abs(pi-1.644834071848065) < 1e-12)
175 | }
176 | tmin
177 | }
178 |
179 | // time printfd
180 | def printfd(n:Int) = {
181 | var stream = None: Option[PrintStream]
182 | try {
183 | stream = Some(new PrintStream(new BufferedOutputStream(new FileOutputStream("/dev/null"))))
184 | val valid_stream = stream.get
185 | for (i <- 1 to n) {
186 | valid_stream.printf(i + " " + i)
187 | }
188 | } catch {
189 | case e: Exception => println("Exception caught: " + e)
190 | } finally {
191 | if(stream.isDefined) stream.get.close()
192 | }
193 | }
194 |
195 | def time_printfd() = {
196 | var tmin = Long.MaxValue
197 | for(i <- 1 to NITER) {
198 | val t1 = System.nanoTime()
199 | printfd(100000)
200 | val t = System.nanoTime() - t1
201 | if(t < tmin) tmin = t
202 | }
203 | tmin
204 | }
205 |
206 | // random matrix statistics
207 | def randmatstat(t:Int):(Double,Double) = {
208 | val n = 5
209 | val v = DenseVector.zeros[Double](t)
210 | val w = DenseVector.zeros[Double](t)
211 |
212 | val g = breeze.stats.distributions.Gaussian(0, 1)
213 | for(i <- 0 to t-1) {
214 | val a = DenseMatrix.rand(n, n, g)
215 | val b = DenseMatrix.rand(n, n, g)
216 | val c = DenseMatrix.rand(n, n, g)
217 | val d = DenseMatrix.rand(n, n, g)
218 | val P = DenseMatrix.horzcat(a, b, c, d)
219 | val Q = DenseMatrix.vertcat(DenseMatrix.horzcat(a, b), DenseMatrix.horzcat(c, d))
220 | val V = P.t * P
221 | val W = Q.t * Q
222 |
223 | v(i) = trace(V * V * V * V)
224 | w(i) = trace(W * W * W * W)
225 | }
226 | (stddev(v)/mean(v), stddev(w)/mean(w))
227 | }
228 |
229 | def time_randmatstat() = {
230 | var tmin = Long.MaxValue
231 | for(i <- 1 to NITER) {
232 | val t1 = System.nanoTime()
233 | val (s1, s2) = randmatstat(1000)
234 | val t = System.nanoTime() - t1
235 | assert(0.5 < s1 && s1 < 1.0 && 0.5 < s2 && s2 < 1.0)
236 |
237 | if(t < tmin) tmin = t
238 | }
239 | tmin
240 | }
241 |
242 | // random matrix multiplication
243 | def randmatmul(t:Int):DenseMatrix[Double] = {
244 | val m1 = randomDouble((t, t))
245 | val m2 = randomDouble((t, t))
246 | m1 * m2
247 | }
248 |
249 | def time_randmatmul() = {
250 | var tmin = Long.MaxValue
251 | for(i <- 1 to NITER) {
252 | val t1 = System.nanoTime()
253 | val m = randmatmul(1000)
254 | val t = System.nanoTime() - t1
255 | assert(0 <= m(0,0))
256 |
257 | if(t < tmin) tmin = t
258 | }
259 | tmin
260 | }
261 |
262 |
263 | def main(args: Array[String]) = {
264 | //println("BLAS: " + BLAS.getInstance().getClass().getName())
265 | print_perf("fib", time_fib())
266 | print_perf("parse_int", time_parseint())
267 | print_perf("mandel", time_mandel())
268 | print_perf("quicksort", time_quicksort())
269 | print_perf("pi_sum", time_pisum())
270 | print_perf("rand_mat_stat", time_randmatstat())
271 | print_perf("rand_mat_mul", time_randmatmul())
272 | print_perf("printfd", time_printfd())
273 | }
274 | }
275 |
--------------------------------------------------------------------------------