├── .github └── workflows │ └── benchmarks.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── benchmarks └── .gitignore ├── bin ├── .gitignore ├── benchmarks.csv ├── benchmarks.ipynb ├── collect.jl ├── plot.jl ├── table.jl └── versions.sh ├── java ├── .gitignore ├── pom.xml ├── setup.sh └── src │ └── main │ └── java │ ├── Complex.java │ ├── PerfBLAS.java │ └── PerfPure.java ├── lua └── lua-install.sh ├── perf.R ├── perf.c ├── perf.f90 ├── perf.go ├── perf.jl ├── perf.js ├── perf.lua ├── perf.m ├── perf.nb ├── perf.py ├── perfutil.jl ├── randmtzig.c ├── rust ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── rust-toolchain └── src │ ├── direct_blas.rs │ ├── main.rs │ └── util.rs └── scala ├── .gitignore ├── build.sbt └── src └── main └── scala └── perf.scala /.github/workflows/benchmarks.yml: -------------------------------------------------------------------------------- 1 | name: Benchmarks 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | tags: '*' 9 | workflow_dispatch: 10 | 11 | concurrency: 12 | # Skip intermediate builds: all builds except for builds on the `master` or `release-*` branches 13 | # Cancel intermediate builds: only pull request builds 14 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref != 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release-') || github.run_number }} 15 | cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} 16 | 17 | permissions: 18 | contents: read 19 | 20 | jobs: 21 | test: 22 | runs-on: ${{ matrix.os }} 23 | defaults: 24 | run: 25 | shell: bash 26 | strategy: 27 | matrix: 28 | os: [ubuntu-latest] 29 | java-version: ['17'] 30 | julia-version: ['1.9.3'] 31 | python-version: ['3.10'] 32 | numpy-version: ['1.23.2'] 33 | gfortran-version: ['9'] # Note: unused since is built-in. 34 | rust-version: ['1.42.0'] # Note: unused since controlled by `rust/rust-toolchain` 35 | js-version: ['18'] 36 | r-version: ['4.2.1'] 37 | lua-version: ['latest'] # Note: unused since lua distribution manually downloaded 38 | go-version: ['1.19'] 39 | 40 | steps: 41 | - uses: actions/checkout@v3 42 | with: 43 | persist-credentials: false 44 | - name: "Cache Julia" 45 | id: cache-julia 46 | uses: actions/cache@v2 47 | with: 48 | path: ~/julia 49 | key: ${{ runner.os }}-v${{ matrix.julia-version }} 50 | - name: "Build Julia" 51 | if: steps.cache-julia.outputs.cache-hit != 'true' 52 | uses: julia-actions/build-julia@v1 53 | with: 54 | ref: v${{ matrix.julia-version }} 55 | - name: "Set up dSFMT" 56 | run: | 57 | cd ~/ 58 | mkdir -p dSFMT 59 | cd dSFMT 60 | wget https://github.com/MersenneTwister-Lab/dSFMT/archive/refs/tags/v2.2.4.tar.gz 61 | echo "39682961ecfba621a98dbb6610b6ae2b7d6add450d4f08d8d4edd0e10abd8174 v2.2.4.tar.gz" | sha256sum --check --status 62 | tar -xzf v2.2.4.tar.gz 63 | mv dSFMT-*/* ./ 64 | - name: "Set up OpenBLAS" 65 | run: | 66 | sudo apt-get install -y libopenblas-dev 67 | - name: "Set up Python" 68 | uses: actions/setup-python@v1 69 | with: 70 | python-version: ${{ matrix.python-version }} 71 | - name: "Set up NumPy" 72 | run: pip install numpy==${{ matrix.numpy-version }} 73 | - name: "Set up Rust" 74 | uses: actions-rs/toolchain@v1 75 | with: 76 | toolchain: ${{ matrix.rust-version }} 77 | - name: "Set up Java" 78 | uses: actions/setup-java@v2 79 | with: 80 | distribution: 'temurin' 81 | java-version: ${{ matrix.java-version }} 82 | cache: 'maven' 83 | - name: "Set up JavaScript" 84 | uses: actions/setup-node@v2 85 | with: 86 | node-version: ${{ matrix.js-version }} 87 | - name: "Set up R" 88 | uses: r-lib/actions/setup-r@v2 89 | with: 90 | r-version: ${{ matrix.r-version }} 91 | - name: "Set up LuaJit" 92 | run: | 93 | cd ~/work/Microbenchmarks/Microbenchmarks/lua 94 | ./lua-install.sh 95 | - name: "Set up Go" 96 | uses: actions/setup-go@v3 97 | with: 98 | go-version: ${{ matrix.go-version }} 99 | - name: "Run benchmark" 100 | run: | 101 | JULIAHOME=~/julia DSFMTDIR=~/dSFMT/ make gh_action_benchmarks.html 102 | - name: "Print benchmark data" 103 | run: cat gh_action_benchmarks.csv 104 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /perf.h 2 | /versions.csv 3 | /benchmarks.csv 4 | /benchmarks.txt 5 | /benchmarks.html 6 | /gopath 7 | /mods/* 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2009-2018 Jeff Bezanson, Stefan Karpinski, Viral B. Shah, 4 | and other contributors. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifndef JULIAHOME 2 | $(error JULIAHOME not defined. Set value to the root of the Julia source tree.) 3 | endif 4 | ifndef DSFMTDIR 5 | $(error DSFMTDIR not defined. Set value to the root of the dSFMT source tree.) 6 | endif 7 | 8 | 9 | # Will make multi-line targets work 10 | # (so we can use @for on the second line) 11 | .ONESHELL: 12 | 13 | include $(JULIAHOME)/Make.inc 14 | include $(JULIAHOME)/deps/Versions.make 15 | 16 | NODEJSBIN = node 17 | 18 | ITERATIONS=$(shell seq 1 5) 19 | 20 | #Use python2 for Python 2.x 21 | PYTHON = python3 22 | 23 | OCTAVE = octave-cli 24 | 25 | ifeq ($(OS), WINNT) 26 | MATHEMATICABIN = MathKernel 27 | else ifeq ($(OS), Darwin) 28 | MATHEMATICABIN = MathKernel 29 | else 30 | MATHEMATICABIN = math 31 | endif 32 | 33 | FFLAGS=-fexternal-blas 34 | #gfortran cannot multiply matrices using 64-bit external BLAS. 35 | ifeq ($(findstring gfortran, $(FC)), gfortran) 36 | ifeq ($(USE_BLAS64), 1) 37 | FFLAGS= 38 | endif 39 | FFLAGS+= -static-libgfortran 40 | endif 41 | 42 | #Which libm library am I using? 43 | LIBMDIR = $(JULIAHOME)/usr/lib/ 44 | ifeq ($(USE_SYSTEM_LIBM), 0) 45 | ifeq ($(USE_SYSTEM_OPENLIBM), 0) 46 | LIBM = $(LIBMDIR)libopenlibm.a 47 | endif 48 | endif 49 | 50 | default: benchmarks.html 51 | 52 | export OMP_NUM_THREADS=1 53 | export GOTO_NUM_THREADS=1 54 | export OPENBLAS_NUM_THREADS=1 55 | 56 | perf.h: $(JULIAHOME)/deps/Versions.make 57 | echo '#include "cblas.h"' > $@ 58 | echo '#include "$(DSFMTDIR)/dSFMT.c"' >> $@ 59 | 60 | bin/perf%: perf.c perf.h 61 | $(CC) -std=c99 -O$* $< -o $@ -I$(DSFMTDIR) -lopenblas -L$(LIBMDIR) $(LIBM) $(CFLAGS) -lpthread 62 | 63 | bin/fperf%: perf.f90 64 | mkdir -p mods/$@ #Modules for each binary go in separate directories 65 | $(FC) $(FFLAGS) -Jmods/$@ -O$* $< -o $@ -lopenblas -L$(LIBMDIR) $(LIBM) -lpthread 66 | 67 | benchmarks/c.csv: \ 68 | benchmarks/c0.csv \ 69 | benchmarks/c1.csv \ 70 | benchmarks/c2.csv \ 71 | benchmarks/c3.csv 72 | cat $^ > $@ 73 | 74 | benchmarks/fortran.csv: \ 75 | benchmarks/fortran0.csv \ 76 | benchmarks/fortran1.csv \ 77 | benchmarks/fortran2.csv \ 78 | benchmarks/fortran3.csv 79 | cat $^ > $@ 80 | 81 | 82 | benchmarks/c%.csv: bin/perf% 83 | @for t in $(ITERATIONS); do $<; done >$@ 84 | 85 | benchmarks/fortran%.csv: bin/fperf% 86 | @for t in $(ITERATIONS); do $<; done >$@ 87 | 88 | benchmarks/go.csv: export GOPATH=$(abspath gopath) 89 | benchmarks/go.csv: perf.go 90 | go env -w GO111MODULE=off 91 | export CGO_LDFLAGS="-L${LIBM} -lopenblas" 92 | go get gonum.org/v1/netlib/blas/netlib 93 | go get gonum.org/v1/gonum/mat 94 | go get gonum.org/v1/gonum/stat 95 | @for t in $(ITERATIONS); do go run $<; done >$@ 96 | 97 | benchmarks/julia.csv: perf.jl 98 | @for t in $(ITERATIONS); do $(JULIAHOME)/usr/bin/julia $<; done >$@ 99 | 100 | benchmarks/python.csv: perf.py 101 | @for t in $(ITERATIONS); do $(PYTHON) $<; done >$@ 102 | 103 | benchmarks/matlab.csv: perf.m 104 | @for t in $(ITERATIONS); do matlab -nojvm -singleCompThread -r 'perf; perf; exit' | grep ^matlab | tail -8; done >$@ 105 | 106 | benchmarks/octave.csv: perf.m 107 | @for t in $(ITERATIONS); do $(OCTAVE) -q --eval perf 2>/dev/null; done >$@ 108 | 109 | benchmarks/r.csv: perf.R 110 | @for t in $(ITERATIONS); do cat $< | R --vanilla --slave 2>/dev/null; done >$@ 111 | 112 | benchmarks/javascript.csv: perf.js 113 | @for t in $(ITERATIONS); do $(NODEJSBIN) $<; done >$@ 114 | 115 | benchmarks/mathematica.csv: perf.nb 116 | @for t in $(ITERATIONS); do $(MATHEMATICABIN) -noprompt -run "<<$<; Exit[]"; done >$@ 117 | 118 | benchmarks/lua.csv: perf.lua 119 | export BIT=64 120 | @for t in $(ITERATIONS); do ./lua/ulua/bin/scilua $<; done >$@ 121 | 122 | benchmarks/java.csv: java/src/main/java/PerfBLAS.java 123 | cd java 124 | sh setup.sh 125 | @for t in $(ITERATIONS); do mvn -q exec:java; done >../$@ 126 | 127 | benchmarks/scala.csv: scala/src/main/scala/perf.scala scala/build.sbt 128 | cd scala 129 | @for t in $(ITERATIONS); do sbt run; done >../$@ 130 | 131 | benchmarks/rust.csv: rust/src/main.rs rust/src/util.rs rust/Cargo.lock 132 | cd rust 133 | @for t in $(ITERATIONS); do cargo run --release -q; done >../$@ 134 | 135 | LANGUAGES = c fortran go java javascript julia lua mathematica matlab octave python r rust 136 | GH_ACTION_LANGUAGES = c fortran go java javascript julia lua python r rust 137 | 138 | # These were formerly listed in LANGUAGES, but I can't get them to run 139 | # 2017-09-27 johnfgibson 140 | # scala 141 | 142 | BENCHMARKS = $(foreach lang,$(LANGUAGES),benchmarks/$(lang).csv) 143 | GH_ACTION_BENCHMARKS = $(foreach lang,$(GH_ACTION_LANGUAGES),benchmarks/$(lang).csv) 144 | 145 | COLON_SEPARATED_GHA_LANGUAGES = $(shell echo $(GH_ACTION_LANGUAGES) | sed 's/ /:/g') 146 | 147 | versions.csv: bin/versions.sh 148 | $^ >$@ 149 | 150 | gh_action_versions.csv: bin/versions.sh 151 | $^ $(COLON_SEPARATED_GHA_LANGUAGES) >$@ 152 | 153 | benchmarks.csv: bin/collect.jl $(BENCHMARKS) 154 | @$(call PRINT_JULIA, $^ >$@) 155 | 156 | gh_action_benchmarks.csv: bin/collect.jl $(GH_ACTION_BENCHMARKS) 157 | @$(call PRINT_JULIA, $^ >$@) 158 | 159 | benchmarks.html: bin/table.jl versions.csv benchmarks.csv 160 | @$(call PRINT_JULIA, $^ >$@) 161 | 162 | gh_action_benchmarks.html: bin/table.jl gh_action_versions.csv gh_action_benchmarks.csv 163 | @$(call PRINT_JULIA, $^ >$@) 164 | 165 | clean: 166 | @rm -rf perf.h bin/perf* bin/fperf* benchmarks/*.csv benchmarks.csv mods *~ octave-core perf.log gopath/* 167 | 168 | .PHONY: all perf clean 169 | 170 | .PRECIOUS: bin/perf0 bin/perf1 bin/perf2 bin/perf3 171 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Microbenchmarks 2 | 3 | This is a collection of micro-benchmarks used to compare Julia's performance against 4 | that of other languages. 5 | It was formerly part of the Julia source tree. 6 | The results of these benchmarks are used to generate the performance graph on the 7 | [Julia benchmarks page](https://julialang.org/benchmarks). 8 | 9 | ## Running benchmarks 10 | 11 | This repository assumes that Julia has been built from source and that there exists 12 | an environment variable `JULIAHOME` that points to the root of the Julia source tree. 13 | This can also be set when invoking `make`, e.g. `make JULIAHOME=path/to/julia`. 14 | 15 | To build binaries and run the benchmarks, simply run `make`. 16 | Note that this refers to GNU Make, so BSD users will need to run `gmake`. 17 | 18 | ## Included languages: 19 | 20 | * C 21 | * Fortran 22 | * Go 23 | * Java 24 | * JavaScript 25 | * Julia 26 | * LuaJIT 27 | * Mathematica 28 | * Matlab 29 | * Python 30 | * R 31 | * Rust 32 | * Scala 33 | -------------------------------------------------------------------------------- /benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | /*.csv 2 | -------------------------------------------------------------------------------- /bin/.gitignore: -------------------------------------------------------------------------------- 1 | /perf* 2 | /fperf* 3 | -------------------------------------------------------------------------------- /bin/benchmarks.csv: -------------------------------------------------------------------------------- 1 | c,iteration_pi_sum,27.368069 2 | c,matrix_multiply,72.014809 3 | c,matrix_statistics,4.528999 4 | c,parse_integers,0.099249 5 | c,print_to_file,9.929895 6 | c,recursion_fibonacci,0.022732 7 | c,recursion_quicksort,0.258923 8 | c,userfunc_mandelbrot,0.076702 9 | fortran,iteration_pi_sum,27.368789 10 | fortran,matrix_multiply,83.416557 11 | fortran,matrix_statistics,6.984467 12 | fortran,parse_integers,0.682692 13 | fortran,print_to_file,59.287684 14 | fortran,recursion_fibonacci,0.022466 15 | fortran,recursion_quicksort,0.308118 16 | fortran,userfunc_mandelbrot,0.053836 17 | go,iteration_pi_sum,27.3706417 18 | go,matrix_multiply,102.9984314 19 | go,matrix_statistics,27.57979652 20 | go,parse_integers,0.0953154785 21 | go,print_to_file,17.37055717 22 | go,recursion_fibonacci,0.04101122396666667 23 | go,recursion_quicksort,0.32159370860000003 24 | go,userfunc_mandelbrot,0.059685397066666666 25 | java,iteration_pi_sum,29.723044 26 | java,iteration_sinc_sum,0.118555 27 | java,matrix_multiply,581.467297 28 | java,matrix_statistics,22.776195 29 | java,parse_integers,0.314691 30 | java,print_to_file,95.850461 31 | java,recursion_fibonacci,0.082739 32 | java,recursion_quicksort,0.772211 33 | java,userfunc_mandelbrot,0.109468 34 | javascript,iteration_pi_sum,27.6 35 | javascript,matrix_multiply,2288.0 36 | javascript,matrix_statistics,63.3 37 | javascript,parse_integers,0.5 38 | javascript,print_to_file,72.0 39 | javascript,recursion_fibonacci,0.08 40 | javascript,recursion_quicksort,1.11 41 | javascript,userfunc_mandelbrot,0.087 42 | julia,iteration_pi_sum,27.670768 43 | julia,matrix_multiply,70.249355 44 | julia,matrix_statistics,7.396705 45 | julia,parse_integers,0.217658 46 | julia,print_to_file,10.868424 47 | julia,recursion_fibonacci,0.030162 48 | julia,recursion_quicksort,0.258018 49 | julia,userfunc_mandelbrot,0.052706 50 | lua,iteration_pi_sum,27.368 51 | lua,matrix_multiply,77.87 52 | lua,matrix_statistics,7.731 53 | lua,parse_integers,0.097 54 | lua,print_to_file,5.996 55 | lua,recursion_fibonacci,0.027 56 | lua,recursion_quicksort,0.404 57 | lua,userfunc_mandelbrot,0.077 58 | mathematica,iteration_pi_sum,39.862 59 | mathematica,matrix_multiply,85.409 60 | mathematica,matrix_statistics,33.94 61 | mathematica,parse_integers,2.249 62 | mathematica,print_to_file,664.313 63 | mathematica,recursion_fibonacci,3.002 64 | mathematica,recursion_quicksort,11.518 65 | mathematica,userfunc_mandelbrot,1.403 66 | matlab,iteration_pi_sum,27.56 67 | matlab,matrix_multiply,83.906 68 | matlab,matrix_statistics,36.674 69 | matlab,parse_integers,17.688 70 | matlab,print_to_file,1009.8 71 | matlab,recursion_fibonacci,0.4 72 | matlab,recursion_quicksort,0.612 73 | matlab,userfunc_mandelbrot,0.755 74 | octave,iteration_pi_sum,8691.11084938 75 | octave,matrix_multiply,87.54110336 76 | octave,matrix_statistics,209.43498611 77 | octave,parse_integers,57.0089817 78 | octave,print_to_file,1293.26105118 79 | octave,recursion_fibonacci,228.35707664 80 | octave,recursion_quicksort,575.16098022 81 | octave,userfunc_mandelbrot,445.80197334 82 | python,iteration_pi_sum,404.39462661743164 83 | python,matrix_multiply,84.99646186828613 84 | python,matrix_statistics,80.32107353210449 85 | python,parse_integers,1.9617080688476562 86 | python,print_to_file,47.04570770263672 87 | python,recursion_fibonacci,2.1429061889648438 88 | python,recursion_quicksort,9.729623794555664 89 | python,userfunc_mandelbrot,5.036592483520508 90 | r,iteration_pi_sum,320.0 91 | r,matrix_multiply,595.0 92 | r,matrix_statistics,95.0 93 | r,parse_integers,5.0 94 | r,print_to_file,1009.0 95 | r,recursion_fibonacci,6.0 96 | r,recursion_quicksort,15.0 97 | r,userfunc_mandelbrot,15.0 98 | rust,iteration_pi_sum,27.373818 99 | rust,matrix_multiply,77.615658 100 | rust,matrix_statistics,6.497923 101 | rust,parse_integers,0.121999 102 | rust,print_to_file,8.81542 103 | rust,recursion_fibonacci,0.039227 104 | rust,recursion_quicksort,0.253416 105 | rust,userfunc_mandelbrot,0.057636 106 | -------------------------------------------------------------------------------- /bin/collect.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | 3 | const data = Dict{Tuple{String,String},Float64}() 4 | 5 | for arg in ARGS, line in eachline(arg) 6 | lang, bench, time_str = split(line, ',') 7 | old_time = get(data, (lang, bench), Inf) 8 | new_time = parse(Float64, time_str) 9 | 0 < new_time < old_time || continue 10 | data[lang, bench] = new_time 11 | end 12 | 13 | for ((lang, bench), min_time) in sort!(collect(data)) 14 | println("$lang,$bench,$min_time") 15 | end 16 | -------------------------------------------------------------------------------- /bin/plot.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | 3 | # Producing the Julia Microbenchmarks plot 4 | 5 | # Load the required Julia packages 6 | using Base.MathConstants 7 | using CSV 8 | using DataFrames 9 | using Gadfly 10 | using StatsBase 11 | 12 | # Load benchmark data from file 13 | benchmarks = 14 | CSV.read("benchmarks.csv", DataFrame; header = ["language", "benchmark", "time"]) 15 | 16 | # Capitalize and decorate language names from datafile 17 | dict = Dict( 18 | "c" => "C", 19 | "julia" => "Julia", 20 | "lua" => "LuaJIT", 21 | "fortran" => "Fortran", 22 | "java" => "Java", 23 | "javascript" => "JavaScript", 24 | "matlab" => "Matlab", 25 | "mathematica" => "Mathematica", 26 | "python" => "Python", 27 | "octave" => "Octave", 28 | "r" => "R", 29 | "rust" => "Rust", 30 | "go" => "Go", 31 | ); 32 | benchmarks[!, :language] = [dict[lang] for lang in benchmarks[!, :language]] 33 | 34 | # Normalize benchmark times by C times 35 | ctime = benchmarks[benchmarks[!, :language] .== "C", :] 36 | benchmarks = innerjoin(benchmarks, ctime, on = :benchmark, makeunique = true) 37 | select!(benchmarks, Not(:language_1)) 38 | rename!(benchmarks, :time_1 => :ctime) 39 | benchmarks[!, :normtime] = benchmarks[!, :time] ./ benchmarks[!, :ctime]; 40 | 41 | # Compute the geometric mean for each language 42 | langs = []; 43 | means = []; 44 | priorities = []; 45 | for lang in benchmarks[!, :language] 46 | data = benchmarks[benchmarks[!, :language] .== lang, :] 47 | gmean = geomean(data[!, :normtime]) 48 | push!(langs, lang) 49 | push!(means, gmean) 50 | if (lang == "C") 51 | push!(priorities, 1) 52 | elseif (lang == "Julia") 53 | push!(priorities, 2) 54 | else 55 | push!(priorities, 3) 56 | end 57 | end 58 | 59 | # Add the geometric means back into the benchmarks dataframe 60 | langmean = DataFrame(language = langs, geomean = means, priority = priorities) 61 | benchmarks = innerjoin(benchmarks, langmean, on = :language) 62 | 63 | # Put C first, Julia second, and sort the rest by geometric mean 64 | sort!(benchmarks, [:priority, :geomean]); 65 | sort!(langmean, [:priority, :geomean]); 66 | 67 | p = plot( 68 | benchmarks, 69 | x = :language, 70 | y = :normtime, 71 | color = :benchmark, 72 | Scale.y_log10, 73 | Guide.ylabel(nothing), 74 | Guide.xlabel(nothing), 75 | Theme( 76 | guide_title_position = :left, 77 | colorkey_swatch_shape = :circle, 78 | minor_label_font = "Georgia", 79 | major_label_font = "Georgia", 80 | ), 81 | ) 82 | 83 | draw(SVG("benchmarks.svg", 9inch, 9inch / golden), p) 84 | -------------------------------------------------------------------------------- /bin/table.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | 3 | # This script generates an HTML table with the benchmark values and language versions. 4 | 5 | import Statistics 6 | import Printf 7 | 8 | const benchmark_order = [ 9 | "iteration_pi_sum", 10 | "recursion_fibonacci", 11 | "recursion_quicksort", 12 | "parse_integers", 13 | "print_to_file", 14 | "matrix_statistics", 15 | "matrix_multiply", 16 | "userfunc_mandelbrot", 17 | ] 18 | 19 | const versions = Dict{String, String}() 20 | const benchmarks = Dict{String, Dict{String, Float64}}() 21 | 22 | # Read versions.csv 23 | for line in eachline(ARGS[1]) 24 | lang, version = split(line, ',') 25 | versions[lang] = version 26 | end 27 | 28 | # Read benchmarks.csv 29 | for line in eachline(ARGS[2]) 30 | lang, bench, time_str = split(line, ',') 31 | times = get!(benchmarks, lang, Dict{String, Float64}()) 32 | times[bench] = parse(Float64, time_str) 33 | end 34 | 35 | const labels = Dict{String, String}( 36 | "c" => "C" , 37 | "julia" => "Julia" , 38 | "lua" => "LuaJIT" , 39 | "fortran" => "Fortran" , 40 | "java" => "Java" , 41 | "javascript" => "JavaScript" , 42 | "matlab" => "Matlab" , 43 | "python" => "Python" , 44 | "mathematica" => "Mathe­matica" , 45 | "r" => "R" , 46 | "octave" => "Octave" , 47 | "go" => "Go" , 48 | "rust" => "Rust" , 49 | ) 50 | 51 | # Produce the sorting order for the list of languages 52 | function lang_by(lang::String) 53 | # C is placed at the start of the list 54 | lang == "c" ? -Inf : 55 | # Julia is sorted immediately after C 56 | lang == "julia" ? -floatmax() : 57 | # The rest of the languages are sorted by the geometric mean of their benchmark values 58 | # See https://en.wikipedia.org/wiki/Geometric_mean#Relationship_with_logarithms for details 59 | exp(Statistics.mean(log.(collect(values(benchmarks[lang]))))) 60 | end 61 | 62 | const language_order = sort!(collect(keys(benchmarks)), by=lang_by) 63 | 64 | print(""" 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | """) 75 | 76 | for lang in language_order 77 | label = get(labels, lang, lang) 78 | println(" ") 79 | end 80 | 81 | print(""" 82 | 83 | 84 | """) 85 | 86 | for lang in language_order 87 | version = get(versions, lang, "???") 88 | println(" ") 89 | end 90 | 91 | print(""" 92 | 93 | 94 | 95 | """) 96 | 97 | for benchmark in benchmark_order 98 | println(" ") 99 | c_time = benchmarks["c"][benchmark] 100 | for lang in language_order 101 | rel_time = "n/a" 102 | if haskey(benchmarks[lang], benchmark) 103 | rel_time = Printf.@sprintf "%.2f" benchmarks[lang][benchmark]/c_time 104 | end 105 | println(" ") 106 | end 107 | println(" ") 108 | end 109 | 110 | print(""" 111 | 112 |
$label
$version
$benchmark$rel_time
113 | 114 | 115 | """) 116 | -------------------------------------------------------------------------------- /bin/versions.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # User argument declaring what languages to query: 4 | DEFAULT_LANGUAGES="c:fortran:go:java:javascript:julia:lua:mathematica:matlab:octave:python:r:rust" 5 | LANGUAGES=${1:-DEFAULT_LANGUAGES} 6 | 7 | LANGUAGES=":${LANGUAGES}:" 8 | 9 | # Check if ":c:" in languages: 10 | if [[ $LANGUAGES == *":c:"* ]]; then 11 | echo -n "c,gcc " 12 | gcc -v 2>&1 | grep "gcc version" | cut -f3 -d" " 13 | fi 14 | 15 | if [[ $LANGUAGES == *":fortran:"* ]]; then 16 | echo -n "fortran,gcc " 17 | gfortran -v 2>&1 | grep "gcc version" | cut -f3 -d" " 18 | fi 19 | 20 | if [[ $LANGUAGES == *":go:"* ]]; then 21 | echo -n go, 22 | go version | cut -f3 -d" " 23 | fi 24 | 25 | if [[ $LANGUAGES == *":java:"* ]]; then 26 | echo -n java, 27 | java -version 2>&1 | grep "version" | cut -f2 -d "\"" 28 | fi 29 | 30 | if [[ $LANGUAGES == *":javascript:"* ]]; then 31 | echo -n "javascript,V8 " 32 | node -e "console.log(process.versions.v8)" 33 | fi 34 | 35 | if [[ $LANGUAGES == *":julia:"* ]]; then 36 | echo -n "julia," 37 | $JULIAHOME/usr/bin/julia -v | cut -f3 -d" " 38 | fi 39 | 40 | if [[ $LANGUAGES == *":lua:"* ]]; then 41 | echo -n "lua," 42 | (cd lua; ./ulua/luajit/*/Linux/x64/luajit -v | cut -f2 -d" ") 43 | fi 44 | 45 | if [[ $LANGUAGES == *":mathematica:"* ]]; then 46 | echo -n "mathematica," 47 | echo quit | math -version | head -n 1 | cut -f2 -d" " 48 | fi 49 | 50 | if [[ $LANGUAGES == *":matlab:"* ]]; then 51 | echo -n "matlab,R" 52 | matlab -nodisplay -nojvm -nosplash -r "version -release, quit" | tail -n3 | head -n1 | cut -f5 -d" " | sed "s/'//g" 53 | fi 54 | 55 | if [[ $LANGUAGES == *":octave:"* ]]; then 56 | echo -n "octave," 57 | octave-cli -v | grep version | cut -f4 -d" " 58 | fi 59 | 60 | if [[ $LANGUAGES == *":python:"* ]]; then 61 | echo -n "python," 62 | python3 -V 2>&1 | cut -f2 -d" " 63 | fi 64 | 65 | if [[ $LANGUAGES == *":r:"* ]]; then 66 | echo -n "r," 67 | R --version | grep "R version" | cut -f3 -d" " 68 | fi 69 | 70 | if [[ $LANGUAGES == *":rust:"* ]]; then 71 | echo -n "rust," 72 | (cd rust; rustc --version | cut -c 7- | sed 's/ ([0-9a-f]* /
(/g') 73 | fi 74 | -------------------------------------------------------------------------------- /java/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /java/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | julialang.org 4 | javaBenchmarks 5 | 0.0.1-SNAPSHOT 6 | javaBenchmarks 7 | micro benchmarks for Julia done in Java 8 | 9 | 10 | org.jblas 11 | jblas 12 | 1.2.3 13 | 14 | 15 | com.googlecode.efficient-java-matrix-library 16 | ejml 17 | 0.23 18 | 19 | 20 | 21 | 22 | 23 | maven-compiler-plugin 24 | 3.0 25 | 26 | 1.7 27 | 1.7 28 | 29 | 30 | 31 | org.codehaus.mojo 32 | exec-maven-plugin 33 | 1.2.1 34 | 35 | 36 | 37 | java 38 | 39 | 40 | 41 | 42 | PerfBLAS 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /java/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license 3 | 4 | mvn compile exec:java 5 | # requires maven and java 7 6 | -------------------------------------------------------------------------------- /java/src/main/java/Complex.java: -------------------------------------------------------------------------------- 1 | public class Complex { 2 | private final double re; 3 | private final double im; 4 | 5 | public Complex(double real, double imag) { 6 | re = real; 7 | im = imag; 8 | } 9 | 10 | public static double abs(Complex z) { 11 | return Math.sqrt(z.re*z.re + z.im*z.im); 12 | } 13 | 14 | public static Complex add(Complex a, Complex b) { 15 | return new Complex(a.re + b.re, a.im + b.im); 16 | } 17 | 18 | public static Complex mul(Complex a, Complex b) { 19 | return new Complex(a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /java/src/main/java/PerfBLAS.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.FileOutputStream; 3 | import java.io.PrintStream; 4 | import java.util.List; 5 | import java.util.Random; 6 | 7 | import org.jblas.DoubleMatrix; 8 | 9 | /** 10 | * Benchmark tests that call BLAS. 11 | */ 12 | public class PerfBLAS extends PerfPure { 13 | 14 | public static void main(String[] args) { 15 | PerfBLAS p = new PerfBLAS(); 16 | p.runBenchmarks(); 17 | } 18 | 19 | private double randmatmul(int i) { 20 | DoubleMatrix a = DoubleMatrix.randn(i,i); 21 | DoubleMatrix b = DoubleMatrix.randn(i,i); 22 | return a.mmul(b).get(0); 23 | } 24 | 25 | private double[] randmatstat(int t) { 26 | int n=5; 27 | DoubleMatrix p; 28 | DoubleMatrix q; 29 | DoubleMatrix v = new DoubleMatrix(new double[t][1]); //zeros(t,1); 30 | DoubleMatrix w = new DoubleMatrix(new double[t][1]); //zeros(t,1); 31 | for (int i=0; i < t; i++) { 32 | DoubleMatrix a = DoubleMatrix.randn(n,n); 33 | DoubleMatrix b = DoubleMatrix.randn(n,n); 34 | DoubleMatrix c = DoubleMatrix.randn(n,n); 35 | DoubleMatrix d = DoubleMatrix.randn(n,n); 36 | 37 | p = DoubleMatrix.concatHorizontally(DoubleMatrix.concatHorizontally(a, b),DoubleMatrix.concatHorizontally(c, d)); 38 | q = DoubleMatrix.concatVertically(DoubleMatrix.concatHorizontally(a, b),DoubleMatrix.concatHorizontally(c, d)); 39 | 40 | DoubleMatrix x = p.transpose().mmul(p); 41 | x = x.mmul(x); 42 | x = x.mmul(x); 43 | v.data[i]=x.diag().sum(); 44 | 45 | x = q.transpose().mmul(q); 46 | x = x.mmul(x); 47 | x = x.mmul(x); 48 | w.data[i]=x.diag().sum(); 49 | 50 | } 51 | 52 | List vElements = v.elementsAsList(); 53 | List wElements = w.elementsAsList(); 54 | 55 | return new double[]{stdev(vElements)/mean(vElements),stdev(wElements)/mean(wElements)}; 56 | } 57 | 58 | private static int mandel(double re, double im) { 59 | int n = 0; 60 | Complex z = new Complex(re, im); 61 | Complex c = new Complex(re, im); 62 | for (n=0; n<=79; ++n) { 63 | if (Complex.abs(z) > 2.0) { 64 | n -= 1; 65 | break; 66 | } 67 | 68 | // z = z*z + c 69 | z = Complex.add(Complex.mul(z, z), c); 70 | } 71 | return n+1; 72 | } 73 | 74 | } 75 | 76 | -------------------------------------------------------------------------------- /java/src/main/java/PerfPure.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.FileOutputStream; 3 | import java.io.PrintStream; 4 | import java.util.List; 5 | import java.util.Random; 6 | 7 | import org.ejml.simple.SimpleMatrix; 8 | 9 | /** 10 | * (Below excerpt is printed on the website and repeated here) 11 | * 12 | * These benchmarks, while not comprehensive, do test compiler performance on a range of common code patterns, 13 | * such as function calls, string parsing, sorting, numerical loops, random number generation, and array operations. 14 | * It is important to note that these benchmark implementations are not written for absolute maximal performance 15 | * (the fastest code to compute fib(20) is the constant literal 6765). Rather, all of the benchmarks are written 16 | * to test the performance of specific algorithms, expressed in a reasonable idiom in each language. 17 | * In particular, all languages use the same algorithm: the Fibonacci benchmarks are all recursive while 18 | * the pi summation benchmarks are all iterative; the “algorithm” for random matrix multiplication is to 19 | * call LAPACK, except where that’s not possible, such as in JavaScript. The point of these benchmarks is to 20 | * compare the performance of specific algorithms across language implementations, not to compare the fastest 21 | * means of computing a result, which in most high-level languages relies on calling C code. 22 | * 23 | */ 24 | public class PerfPure { 25 | 26 | protected final int NITER = 5; 27 | protected Random rand = new Random(0); 28 | 29 | public static void main(String[] args) { 30 | PerfPure p = new PerfPure(); 31 | p.runBenchmarks(); 32 | } 33 | 34 | void runBenchmarks() { 35 | 36 | long t, tmin; 37 | 38 | assert(fib(20) == 6765); 39 | int f = 0; 40 | tmin = Long.MAX_VALUE; 41 | for (int i=0; i=0) { 83 | d[j] = rand.nextDouble(); 84 | } 85 | quicksort(d, 0, 5000-1); 86 | t = System.nanoTime()-t; 87 | if (t < tmin) tmin = t; 88 | } 89 | print_perf("recursion_quicksort", tmin); 90 | 91 | // pi sum 92 | double pi = 0; 93 | tmin = Long.MAX_VALUE; 94 | for (int i=0; i elements) { 215 | double m = mean(elements); 216 | double total = 0; 217 | for(Double d:elements) { 218 | double dif = (d-m); 219 | total += dif*dif; 220 | } 221 | return Math.sqrt(total/(elements.size()-1)); 222 | } 223 | 224 | public double mean(List elements) { 225 | double total = 0; 226 | for(Double d:elements) { 227 | total += d; 228 | } 229 | return total/elements.size(); 230 | } 231 | 232 | public double stdev(SimpleMatrix sm) { 233 | double m = mean(sm); 234 | double total = 0; 235 | 236 | int i = sm.getNumElements(); 237 | while (--i>=0) { 238 | double dif = (sm.get(i)-m); 239 | total += dif*dif; 240 | } 241 | return Math.sqrt(total/(sm.getNumElements()-1)); 242 | } 243 | 244 | public double mean(SimpleMatrix sm) { 245 | double total = 0; 246 | int i = sm.getNumElements(); 247 | while (--i>=0) { 248 | total += sm.get(i); 249 | } 250 | return total/sm.getNumElements(); 251 | } 252 | 253 | protected void quicksort(double[] a, int lo, int hi) { 254 | int i = lo; 255 | int j = hi; 256 | while (i < hi) { 257 | double pivot = a[(lo+hi)/2]; 258 | // Partition 259 | while (i <= j) { 260 | while (a[i] < pivot) { 261 | i = i + 1; 262 | } 263 | while (a[j] > pivot) { 264 | j = j - 1; 265 | } 266 | if (i <= j) { 267 | double t = a[i]; 268 | a[i] = a[j]; 269 | a[j] = t; 270 | i = i + 1; 271 | j = j - 1; 272 | } 273 | } 274 | 275 | // Recursion for quicksort 276 | if (lo < j) { 277 | quicksort(a, lo, j); 278 | } 279 | lo = i; 280 | j = hi; 281 | } 282 | } 283 | 284 | protected double pisum() { 285 | double sum = 0.0; 286 | for (int j=0; j<500; ++j) { 287 | sum = 0.0; 288 | for (int k=1; k<=10000; ++k) { 289 | sum += 1.0/(k*k); 290 | } 291 | } 292 | return sum; 293 | } 294 | 295 | private int mandel(double zReal, double zImag) { 296 | int n = 0; 297 | double cReal = zReal; 298 | double cImag = zImag; 299 | for (n=0; n<=79; ++n) { 300 | if (complexAbs2(zReal,zImag) > 4.0) { 301 | n -= 1; 302 | break; 303 | } 304 | 305 | // z^2 306 | double zSquaredReal = zReal*zReal-zImag*zImag; 307 | double zSquaredImag = zReal*zImag+zImag*zReal; 308 | 309 | // +c 310 | zReal = zSquaredReal+cReal; 311 | zImag = zSquaredImag+cImag; 312 | 313 | } 314 | return n+1; 315 | } 316 | 317 | private double complexAbs(double zReal, double zImag) { 318 | return Math.sqrt(zReal*zReal + zImag*zImag); 319 | } 320 | 321 | private double complexAbs2(double zReal, double zImag) { 322 | return zReal*zReal + zImag*zImag; 323 | } 324 | 325 | protected int mandelperf() { 326 | int mandel_sum = 0; 327 | for (double re=-2.0; re<=0.5; re+=0.1) { 328 | for (double im=-1.0; im<=1.0; im+=0.1) { 329 | int m = mandel(re,im); 330 | mandel_sum += m; 331 | } 332 | } 333 | return mandel_sum; 334 | } 335 | 336 | protected void print_perf(String name, long t) { 337 | System.out.printf("java,%s,%.6f\n", name, t/(double)1E6); 338 | } 339 | 340 | protected int fib(int n) { 341 | return n < 2 ? n : fib(n-1) + fib(n-2); 342 | } 343 | 344 | } 345 | 346 | -------------------------------------------------------------------------------- /lua/lua-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Install lua-sci-lang as recommended via ulua 3 | 4 | wget https://ulua.io/download/ulua~latest.zip 5 | unzip ulua~latest.zip 6 | sed -i 's/noconfirm = false,/noconfirm = true,/g' ulua/host/config.lua 7 | ulua/bin/upkg add time 8 | ulua/bin/upkg add sci 9 | ulua/bin/upkg add sci-lang 10 | -------------------------------------------------------------------------------- /perf.R: -------------------------------------------------------------------------------- 1 | require(compiler) 2 | 3 | assert = function(bool) { 4 | if (!bool) stop('Assertion failed') 5 | } 6 | 7 | timeit = function(name, f, ..., times=5) { 8 | tmin = Inf 9 | f = cmpfun(f) 10 | for (t in 1:times) { 11 | t = system.time(f(...))["elapsed"] 12 | if (t < tmin) tmin = t 13 | } 14 | cat(sprintf("r,%s,%.8f\n", name, tmin*1000)) 15 | } 16 | 17 | ## fib ## 18 | 19 | fib = function(n) { 20 | if (n < 2) { 21 | return(n) 22 | } else { 23 | return(fib(n-1) + fib(n-2)) 24 | } 25 | } 26 | 27 | assert(fib(20) == 6765) 28 | timeit("recursion_fibonacci", fib, 20) 29 | 30 | ## parse_int ## 31 | 32 | parseintperf = function(t) { 33 | for (i in 1:t) { 34 | # R doesn't support uint32 values 35 | n = floor(runif(1, min=0, max=2^31-1)) 36 | s = sprintf("0x%x", n) 37 | m = as.numeric(s) 38 | assert(m == n) 39 | } 40 | } 41 | 42 | timeit("parse_integers", parseintperf, 1000) 43 | 44 | printfdperf = function(t) { 45 | fd<-file("/dev/null") 46 | on.exit(close(fd)) 47 | for (i in 1:t) { 48 | s = sprintf("%d %d", i, i+1) 49 | writeLines(s, fd) 50 | } 51 | } 52 | 53 | timeit("print_to_file", printfdperf, 100000) 54 | 55 | ## quicksort ## 56 | 57 | qsort = function(a) { 58 | qsort_kernel = function(lo, hi) { 59 | i = lo 60 | j = hi 61 | while (i < hi) { 62 | pivot = a[floor((lo+hi)/2)] 63 | while (i <= j) { 64 | while (a[i] < pivot) i = i + 1 65 | while (a[j] > pivot) j = j - 1 66 | if (i <= j) { 67 | t = a[i] 68 | a[i] <<- a[j] 69 | a[j] <<- t 70 | i = i + 1; 71 | j = j - 1; 72 | } 73 | } 74 | if (lo < j) qsort_kernel(lo, j) 75 | lo = i 76 | j = hi 77 | } 78 | } 79 | qsort_kernel(1, length(a)) 80 | return(a) 81 | } 82 | 83 | sortperf = function(n) { 84 | v = runif(n) 85 | return(qsort(v)) 86 | } 87 | 88 | assert(!is.unsorted(sortperf(5000))) 89 | timeit('recursion_quicksort', sortperf, 5000) 90 | 91 | ## mandel ## 92 | Mod2 = function(z) { 93 | return(Re(z)*Re(z) + Im(z)*Im(z)) 94 | } 95 | 96 | mandel = function(z) { 97 | c = z 98 | maxiter = 80 99 | for (n in 1:maxiter) { 100 | if (Mod2(z) > 4) return(n-1) 101 | z = z^2+c 102 | } 103 | return(maxiter) 104 | } 105 | 106 | mandelperf = function() { 107 | re = seq(-2,0.5,.1) 108 | im = seq(-1,1,.1) 109 | M = matrix(0.0,nrow=length(re),ncol=length(im)) 110 | count = 1 111 | for (r in re) { 112 | for (i in im) { 113 | M[count] = mandel(complex(real=r,imag=i)) 114 | count = count + 1 115 | } 116 | } 117 | return(M) 118 | } 119 | 120 | assert(sum(mandelperf()) == 14791) 121 | timeit("userfunc_mandelbrot", mandelperf) 122 | 123 | ## pi_sum ## 124 | 125 | pisum = function() { 126 | t = 0.0 127 | for (j in 1:500) { 128 | t = 0.0 129 | for (k in 1:10000) { 130 | t = t + 1.0/(k*k) 131 | } 132 | } 133 | return(t) 134 | } 135 | 136 | assert(abs(pisum()-1.644834071848065) < 1e-12); 137 | timeit("iteration_pi_sum", pisum, times=1) 138 | 139 | ## pi_sum_vec ## 140 | 141 | pisumvec = function() { 142 | r = 1:10000 143 | return(replicate(500, sum(1/((r)^2)))[1]) 144 | } 145 | 146 | #assert(abs(pisumvec()-1.644834071848065) < 1e-12); 147 | #timeit("pi_sum_vec", pisumvec, times=10) 148 | 149 | ## rand_mat_stat ## 150 | 151 | randmatstat = function(t) { 152 | n = 5 153 | v = matrix(0, nrow=t) 154 | w = matrix(0, nrow=t) 155 | for (i in 1:t) { 156 | a = matrix(rnorm(n*n), ncol=n, nrow=n) 157 | b = matrix(rnorm(n*n), ncol=n, nrow=n) 158 | c = matrix(rnorm(n*n), ncol=n, nrow=n) 159 | d = matrix(rnorm(n*n), ncol=n, nrow=n) 160 | P = cbind(a,b,c,d) 161 | Q = rbind(cbind(a,b),cbind(c,d)) 162 | v[i] = sum(diag((t(P)%*%P)^4)) 163 | w[i] = sum(diag((t(Q)%*%Q)^4)) 164 | } 165 | s1 = apply(v,2,sd)/mean(v) 166 | s2 = apply(w,2,sd)/mean(w) 167 | return(c(s1,s2)) 168 | } 169 | 170 | timeit("matrix_statistics", randmatstat, 1000) 171 | 172 | ## rand_mat_mul ## 173 | 174 | randmatmul = function(n) { 175 | A = matrix(runif(n*n), ncol=n, nrow=n) 176 | B = matrix(runif(n*n), ncol=n, nrow=n) 177 | return(A %*% B) 178 | } 179 | 180 | assert(randmatmul(1000)[1] >= 0) 181 | timeit("matrix_multiply", randmatmul, 1000) 182 | -------------------------------------------------------------------------------- /perf.c: -------------------------------------------------------------------------------- 1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license 2 | 3 | #include 4 | 5 | // include header file generated by make: 6 | #define DSFMT_MEXP 19937 7 | #include "perf.h" 8 | #include "randmtzig.c" 9 | 10 | double *myrand(int n) { 11 | double *d = (double *)malloc(n*sizeof(double)); 12 | dsfmt_gv_fill_array_close_open(d, n); 13 | return d; 14 | } 15 | 16 | #define NITER 5 17 | 18 | double clock_now() 19 | { 20 | struct timeval now; 21 | 22 | gettimeofday(&now, NULL); 23 | return (double)now.tv_sec + (double)now.tv_usec/1.0e6; 24 | } 25 | 26 | int fib(int n) { 27 | return n < 2 ? n : fib(n-1) + fib(n-2); 28 | } 29 | 30 | long parse_int(const char *s, long base) { 31 | long n = 0; 32 | for (; *s; ++s) { 33 | char c = *s; 34 | long d = 0; 35 | if (c >= '0' && c <= '9') d = c-'0'; 36 | else if (c >= 'A' && c <= 'Z') d = c-'A' + (int) 10; 37 | else if (c >= 'a' && c <= 'z') d = c-'a' + (int) 10; 38 | else exit(-1); 39 | 40 | if (base <= d) exit(-1); 41 | n = n*base + d; 42 | } 43 | return n; 44 | } 45 | 46 | double *ones(int m, int n) { 47 | double *a = (double *) malloc(m*n*sizeof(double)); 48 | for (int k=0; k 4.0) { 69 | return n; 70 | } 71 | z = z*z+c; 72 | } 73 | return maxiter; 74 | } 75 | 76 | int *mandelperf() { 77 | int *M = (int*) malloc(21*26*sizeof(int)); 78 | for (int i = 0; i < 21; i++) { 79 | for (int j = 0; j < 26; j++) { 80 | M[26*i + j] = mandel((j-20)/10.0 + ((i-10)/10.0)*I); 81 | } 82 | } 83 | return M; 84 | } 85 | 86 | void quicksort(double *a, int lo, int hi) { 87 | int i = lo; 88 | int j = hi; 89 | while (i < hi) { 90 | double pivot = a[(lo+hi)/2]; 91 | // Partition 92 | while (i <= j) { 93 | while (a[i] < pivot) { 94 | i = i + 1; 95 | } 96 | while (a[j] > pivot) { 97 | j = j - 1; 98 | } 99 | if (i <= j) { 100 | double t = a[i]; 101 | a[i] = a[j]; 102 | a[j] = t; 103 | i = i + 1; 104 | j = j - 1; 105 | } 106 | } 107 | 108 | // Recursion for quicksort 109 | if (lo < j) { 110 | quicksort(a, lo, j); 111 | } 112 | lo = i; 113 | j = hi; 114 | } 115 | } 116 | 117 | double pisum() { 118 | double sum = 0.0; 119 | for (int j=0; j<500; ++j) { 120 | sum = 0.0; 121 | for (int k=1; k<=10000; ++k) { 122 | sum += 1.0/(k*k); 123 | } 124 | } 125 | return sum; 126 | } 127 | 128 | struct double_pair { double s1, s2; }; 129 | 130 | static void randmtzig_fill_randn(dsfmt_t *dsfmt, double *a, int size) { 131 | for (int i=0; i 0) 164 | 165 | hexchar(i:i) = table(iand(quotient,15)) 166 | i = i-1 167 | 168 | quotient = ishft(quotient,-4) 169 | end do 170 | 171 | end subroutine hex_string 172 | 173 | end module 174 | 175 | 176 | 177 | module bench 178 | use utils, only: trace, randn, std, mean, stop_error 179 | use types, only: dp 180 | implicit none 181 | private 182 | public fib, parse_int, printfd, quicksort, mandelperf, pisum, randmatstat, randmatmul 183 | 184 | contains 185 | 186 | integer recursive function fib(n) result(r) 187 | integer, intent(in) :: n 188 | if (n < 2) then 189 | r = n 190 | else 191 | r = fib(n-1) + fib(n-2) 192 | end if 193 | end function 194 | 195 | integer function parse_int(s, base) result(n) 196 | character(len=*), intent(in) :: s 197 | integer, intent(in) :: base 198 | integer :: i, d 199 | character :: c 200 | n = 0 201 | do i = 1, len(s) 202 | c = s(i:i) 203 | d = 0 204 | if (ichar(c) >= ichar('0') .and. ichar(c) <= ichar('9')) then 205 | d = ichar(c) - ichar('0') 206 | else if (ichar(c) >= ichar('A') .and. ichar(c) <= ichar('Z')) then 207 | d = ichar(c) - ichar('A') + 10 208 | else if (ichar(c) >= ichar('a') .and. ichar(c) <= ichar('z')) then 209 | d = ichar(c) - ichar('a') + 10 210 | else 211 | call stop_error("parse_int 1") 212 | end if 213 | 214 | if (base <= d) call stop_error("parse_int 2") 215 | n = n*base + d 216 | end do 217 | 218 | end function 219 | 220 | subroutine printfd(n) 221 | integer, intent(in) :: n 222 | integer :: i , unit 223 | open(unit=1, file="/dev/null") 224 | do i = 1, n 225 | write(unit=1, fmt=*) i, i+1 226 | end do 227 | close(unit=1) 228 | end subroutine 229 | 230 | real(dp) function abs2(z) result(r) 231 | complex(dp), intent(in) :: z 232 | r = real(z)*real(z) + imag(z)*imag(z); 233 | end function 234 | 235 | integer function mandel(z0) result(r) 236 | complex(dp), intent(in) :: z0 237 | complex(dp) :: c, z 238 | integer :: n, maxiter 239 | maxiter = 80 240 | z = z0 241 | c = z0 242 | do n = 1, maxiter 243 | if (abs2(z) > 4) then 244 | r = n-1 245 | return 246 | end if 247 | z = z**2 + c 248 | end do 249 | r = maxiter 250 | end function 251 | 252 | integer function mandelperf() result(mandel_sum) 253 | integer :: re, im 254 | volatile :: mandel_sum 255 | mandel_sum = 0 256 | re = -20 257 | do while (re <= 5) 258 | im = -10 259 | do while (im <= 10) 260 | mandel_sum = mandel_sum + mandel(cmplx(re/10._dp, im/10._dp, dp)) 261 | im = im + 1 262 | end do 263 | re = re + 1 264 | end do 265 | end function 266 | 267 | recursive subroutine quicksort(a, lo0, hi) 268 | real(dp), intent(inout) :: a(:) 269 | integer, intent(in) :: lo0, hi 270 | integer :: i, j, lo 271 | real(dp) :: pivot, t 272 | lo = lo0 273 | i = lo 274 | j = hi 275 | do while (i < hi) 276 | pivot = a((lo+hi)/2) 277 | do while (i <= j) 278 | do while (a(i) < pivot) 279 | i = i + 1 280 | end do 281 | do while (a(j) > pivot) 282 | j = j - 1 283 | end do 284 | if (i <= j) then 285 | t = a(i) 286 | a(i) = a(j) 287 | a(j) = t 288 | i = i + 1 289 | j = j - 1 290 | end if 291 | end do 292 | if (lo < j) call quicksort(a, lo, j) 293 | lo = i 294 | j = hi 295 | end do 296 | end subroutine 297 | 298 | real(dp) function pisum() result(s) 299 | integer :: j, k 300 | do j = 1, 500 301 | s = 0 302 | do k = 1, 10000 303 | s = s + 1._dp / k**2 304 | end do 305 | end do 306 | end function 307 | 308 | subroutine randmatstat(t, s1, s2) 309 | integer, intent(in) :: t 310 | real(dp), intent(out) :: s1, s2 311 | real(dp), allocatable, dimension(:, :) :: a, b, c, d, P, Q, X 312 | real(dp), allocatable :: v(:), w(:) 313 | integer :: n, i 314 | n = 5 315 | allocate(a(n, n), b(n, n), c(n, n), d(n, n)) 316 | allocate(P(4*n, n), Q(2*n, 2*n), X(2*n, 2*n)) 317 | allocate(v(t), w(t)) 318 | do i = 1, t 319 | call randn(a) 320 | call randn(b) 321 | call randn(c) 322 | call randn(d) 323 | P(:n, :)=a; P(n+1:2*n, :)=b; P(2*n+1:3*n, :)=c; P(3*n+1:, :)=d 324 | Q(:n, :n) = a; Q(n+1:, :n) = b 325 | Q(:n, n+1: ) = c; Q(n+1:, n+1: ) = d 326 | X = matmul(transpose(P), P) 327 | X = matmul(X, X) 328 | X = matmul(X, X) 329 | v(i) = trace(X) 330 | X = matmul(transpose(Q), Q) 331 | X = matmul(X, X) 332 | X = matmul(X, X) 333 | w(i) = trace(X) 334 | end do 335 | s1 = std(v) / mean(v) 336 | s2 = std(w) / mean(w) 337 | end subroutine 338 | 339 | subroutine randmatmul(n, C) 340 | integer, intent(in) :: n 341 | real(dp), intent(out), allocatable :: C(:, :) 342 | real(dp), allocatable :: A(:, :), B(:, :) 343 | allocate(A(n, n), B(n, n), C(n, n)) 344 | call random_number(A) 345 | call random_number(B) 346 | !C = matmul(A, B) 347 | call dgemm('N','N',n,n,n,1.0d0,A,n,B,n,0.0d0,C,n) 348 | end subroutine 349 | 350 | end module 351 | 352 | program perf 353 | use types, only: dp, i64 354 | use utils, only: assert, init_random_seed, sysclock2ms, hex_string 355 | use bench, only: fib, parse_int, printfd, quicksort, mandelperf, pisum, randmatstat, & 356 | randmatmul 357 | implicit none 358 | 359 | integer, parameter :: NRUNS = 1000 360 | integer :: i, f, n, m, k, k2 361 | integer(i64) :: t1, t2, tmin 362 | real(dp) :: pi, s1, s2 363 | real(dp), allocatable :: C(:, :), d(:) 364 | character(len=11) :: s 365 | 366 | call init_random_seed() 367 | 368 | tmin = huge(0_i64) 369 | do i = 1, 5 370 | call system_clock(t1) 371 | do k = 1, NRUNS 372 | f = fib(20) 373 | end do 374 | call system_clock(t2) 375 | if (t2-t1 < tmin) tmin = t2-t1 376 | end do 377 | call assert(f == 6765) 378 | print "('fortran,recursion_fibonacci,',f0.6)", sysclock2ms(tmin) / NRUNS 379 | 380 | tmin = huge(0_i64) 381 | do i = 1, 5 382 | call system_clock(t1) 383 | do k2 = 1, NRUNS 384 | do k = 1, 1000 385 | call random_number(s1) 386 | n = int(s1*huge(n)) 387 | call hex_string(n,s) 388 | m = parse_int(s(:len_trim(s)), 16) 389 | call assert(m == n) 390 | end do 391 | end do 392 | call system_clock(t2) 393 | if (t2-t1 < tmin) tmin = t2-t1 394 | end do 395 | print "('fortran,parse_integers,',f0.6)", sysclock2ms(tmin) / NRUNS 396 | 397 | tmin = huge(0_i64) 398 | do i = 1, 5 399 | call system_clock(t1) 400 | call printfd(100000) 401 | call system_clock(t2) 402 | if (t2-t1 < tmin) tmin = t2-t1 403 | end do 404 | print "('fortran,print_to_file,',f0.6)", sysclock2ms(tmin) 405 | 406 | 407 | tmin = huge(0_i64) 408 | do i = 1, 5 409 | call system_clock(t1) 410 | do k = 1, NRUNS 411 | f = mandelperf() 412 | end do 413 | call system_clock(t2) 414 | if (t2-t1 < tmin) tmin = t2-t1 415 | end do 416 | call assert(f == 14791) 417 | print "('fortran,userfunc_mandelbrot,',f0.6)", sysclock2ms(tmin) / NRUNS 418 | 419 | tmin = huge(0_i64) 420 | do i = 1, 5 421 | call system_clock(t1) 422 | do k = 1, NRUNS 423 | allocate(d(5000)) 424 | call random_number(d) 425 | call quicksort(d, 1, size(d)) 426 | deallocate(d) 427 | end do 428 | call system_clock(t2) 429 | if (t2-t1 < tmin) tmin = t2-t1 430 | end do 431 | print "('fortran,recursion_quicksort,',f0.6)", sysclock2ms(tmin) / NRUNS 432 | 433 | tmin = huge(0_i64) 434 | do i = 1, 5 435 | call system_clock(t1) 436 | pi = pisum() 437 | call system_clock(t2) 438 | if (t2-t1 < tmin) tmin = t2-t1 439 | end do 440 | call assert(abs(pi - 1.644834071848065_dp) < 1e-6_dp) 441 | print "('fortran,iteration_pi_sum,',f0.6)", sysclock2ms(tmin) 442 | 443 | tmin = huge(0_i64) 444 | do i = 1, 5 445 | call system_clock(t1) 446 | call randmatstat(1000, s1, s2) 447 | call system_clock(t2) 448 | if (t2-t1 < tmin) tmin = t2-t1 449 | end do 450 | ! call assert(s1 > 0.5_dp .and. s1 < 1) 451 | ! call assert(s2 > 0.5_dp .and. s2 < 1) 452 | print "('fortran,matrix_statistics,',f0.6)", sysclock2ms(tmin) 453 | 454 | tmin = huge(0_i64) 455 | do i = 1, 5 456 | call system_clock(t1) 457 | call randmatmul(1000, C) 458 | call assert(C(1, 1) >= 0) 459 | call system_clock(t2) 460 | if (t2-t1 < tmin) tmin = t2-t1 461 | end do 462 | print "('fortran,matrix_multiply,',f0.6)", sysclock2ms(tmin) 463 | 464 | end program 465 | -------------------------------------------------------------------------------- /perf.go: -------------------------------------------------------------------------------- 1 | // Implementation of the Julia benchmark suite in Go. 2 | // 3 | // Three gonum packages must be installed, and then an additional environment 4 | // variable must be set to use the BLAS installation. 5 | // To install the gonum packages, run: 6 | // go get gonum.org/v1/netlib/blas/netlib 7 | // go get gonum.org/v1/gonum/mat 8 | // go get gonum.org/v1/gonum/stat 9 | // The cgo ldflags must then be set to use the BLAS implementation. As an example, 10 | // download OpenBLAS to ~/software 11 | // git clone https://github.com/xianyi/OpenBLAS 12 | // cd OpenBLAS 13 | // make 14 | // Then edit the environment variable to have 15 | // export CGO_LDFLAGS="-L/$HOME/software/OpenBLAS -lopenblas" 16 | package main 17 | 18 | import ( 19 | "bufio" 20 | "errors" 21 | "fmt" 22 | "log" 23 | "math" 24 | "math/rand" 25 | "os" 26 | "strconv" 27 | "testing" 28 | 29 | "gonum.org/v1/gonum/mat" 30 | "gonum.org/v1/gonum/stat" 31 | "gonum.org/v1/netlib/blas/netlib" 32 | ) 33 | 34 | func init() { 35 | // Use the BLAS implementation specified in CGO_LDFLAGS. This line can be 36 | // commented out to use the native Go BLAS implementation found in 37 | // gonum.org/v1/gonum/blas/gonum. 38 | //blas64.Use(gonum.Implementation{}) 39 | 40 | // These are here so that toggling the BLAS implementation does not make imports unused 41 | _ = netlib.Implementation{} 42 | } 43 | 44 | // fibonacci 45 | 46 | func fib(n int) int { 47 | if n < 2 { 48 | return n 49 | } 50 | return fib(n-1) + fib(n-2) 51 | } 52 | 53 | // print to file descriptor 54 | 55 | func printfd(n int) { 56 | f, err := os.Create("/dev/null") 57 | if err != nil { 58 | panic(err) 59 | } 60 | defer f.Close() 61 | w := bufio.NewWriter(f) 62 | 63 | for i := 0; i < n; i++ { 64 | _, err = fmt.Fprintf(w, "%d %d\n", i, i+1) 65 | } 66 | w.Flush() 67 | f.Close() 68 | } 69 | 70 | // quicksort 71 | 72 | func qsort_kernel(a []float64, lo, hi int) []float64 { 73 | i := lo 74 | j := hi 75 | for i < hi { 76 | pivot := a[(lo+hi)/2] 77 | for i <= j { 78 | for a[i] < pivot { 79 | i += 1 80 | } 81 | for a[j] > pivot { 82 | j -= 1 83 | } 84 | if i <= j { 85 | a[i], a[j] = a[j], a[i] 86 | i += 1 87 | j -= 1 88 | } 89 | } 90 | if lo < j { 91 | qsort_kernel(a, lo, j) 92 | } 93 | lo = i 94 | j = hi 95 | } 96 | return a 97 | } 98 | 99 | var rnd = rand.New(rand.NewSource(1)) 100 | 101 | // randmatstat 102 | 103 | func randmatstat(t int) (float64, float64) { 104 | n := 5 105 | v := make([]float64, t) 106 | w := make([]float64, t) 107 | ad := make([]float64, n*n) 108 | bd := make([]float64, n*n) 109 | cd := make([]float64, n*n) 110 | dd := make([]float64, n*n) 111 | P := mat.NewDense(n, 4*n, nil) 112 | Q := mat.NewDense(2*n, 2*n, nil) 113 | pTmp := mat.NewDense(4*n, 4*n, nil) 114 | qTmp := mat.NewDense(2*n, 2*n, nil) 115 | for i := 0; i < t; i++ { 116 | for i := range ad { 117 | ad[i] = rnd.NormFloat64() 118 | bd[i] = rnd.NormFloat64() 119 | cd[i] = rnd.NormFloat64() 120 | dd[i] = rnd.NormFloat64() 121 | } 122 | a := mat.NewDense(n, n, ad) 123 | b := mat.NewDense(n, n, bd) 124 | c := mat.NewDense(n, n, cd) 125 | d := mat.NewDense(n, n, dd) 126 | P.Copy(a) 127 | P.Slice(0, n, n, n+n).(*mat.Dense).Copy(b) 128 | P.Slice(0, n, 2*n, 3*n).(*mat.Dense).Copy(c) 129 | P.Slice(0, n, 3*n, 4*n).(*mat.Dense).Copy(d) 130 | 131 | Q.Copy(a) 132 | Q.Slice(0, n, n, 2*n).(*mat.Dense).Copy(b) 133 | Q.Slice(n, 2*n, 0, n).(*mat.Dense).Copy(c) 134 | Q.Slice(n, 2*n, n, 2*n).(*mat.Dense).Copy(d) 135 | 136 | pTmp.Mul(P.T(), P) 137 | pTmp.Pow(pTmp, 4) 138 | 139 | qTmp.Mul(Q.T(), Q) 140 | qTmp.Pow(qTmp, 4) 141 | 142 | v[i] = mat.Trace(pTmp) 143 | w[i] = mat.Trace(qTmp) 144 | } 145 | mv, stdv := stat.MeanStdDev(v, nil) 146 | mw, stdw := stat.MeanStdDev(v, nil) 147 | return stdv / mv, stdw / mw 148 | } 149 | 150 | // randmatmul 151 | 152 | func randmatmul(n int) *mat.Dense { 153 | aData := make([]float64, n*n) 154 | for i := range aData { 155 | aData[i] = rnd.Float64() 156 | } 157 | a := mat.NewDense(n, n, aData) 158 | 159 | bData := make([]float64, n*n) 160 | for i := range bData { 161 | bData[i] = rnd.Float64() 162 | } 163 | b := mat.NewDense(n, n, bData) 164 | var c mat.Dense 165 | c.Mul(a, b) 166 | return &c 167 | } 168 | 169 | // mandelbrot 170 | func abs2(z complex128) float64 { 171 | return real(z)*real(z) + imag(z)*imag(z) 172 | } 173 | func mandel(z complex128) int { 174 | maxiter := 80 175 | c := z 176 | for n := 0; n < maxiter; n++ { 177 | if abs2(z) > 4 { 178 | return n 179 | } 180 | z = z*z + c 181 | } 182 | return maxiter 183 | } 184 | 185 | // mandelperf 186 | 187 | func mandelperf() int { 188 | mandel_sum := 0 189 | // These loops are constructed as such because mandel is very sensitive to 190 | // its input and this avoids very small floating point issues. 191 | for re := -20.0; re <= 5; re += 1 { 192 | for im := -10.0; im <= 10; im += 1 { 193 | m := mandel(complex(re/10, im/10)) 194 | mandel_sum += m 195 | } 196 | } 197 | return mandel_sum 198 | } 199 | 200 | // pisum 201 | 202 | func pisum() float64 { 203 | var sum float64 204 | for i := 0; i < 500; i++ { 205 | sum = 0.0 206 | for k := 1.0; k <= 10000; k += 1 { 207 | sum += 1.0 / (k * k) 208 | } 209 | } 210 | return sum 211 | } 212 | 213 | func print_perf(name string, time float64) { 214 | fmt.Printf("go,%v,%v\n", name, time*1000) 215 | } 216 | 217 | // run tests 218 | 219 | func assert(b *testing.B, t bool) { 220 | if t != true { 221 | b.Fatal("assert failed") 222 | } 223 | } 224 | 225 | func main() { 226 | for _, bm := range benchmarks { 227 | seconds, err := runBenchmarkFor(bm.fn) 228 | if err != nil { 229 | log.Fatalf("%s %s", bm.name, err) 230 | } 231 | print_perf(bm.name, seconds) 232 | } 233 | } 234 | 235 | func runBenchmarkFor(fn func(*testing.B)) (seconds float64, err error) { 236 | bm := testing.Benchmark(fn) 237 | if (bm.N == 0) { 238 | return 0, errors.New("failed") 239 | } 240 | return bm.T.Seconds() / float64(bm.N), nil 241 | } 242 | 243 | var benchmarks = []struct { 244 | name string 245 | fn func(*testing.B) 246 | }{ 247 | { 248 | name: "recursion_fibonacci", 249 | fn: func(b *testing.B) { 250 | for i := 0; i < b.N; i++ { 251 | if fib(20) != 6765 { 252 | b.Fatal("unexpected value for fib(20)") 253 | } 254 | } 255 | }, 256 | }, 257 | 258 | { 259 | name: "parse_integers", 260 | fn: func(b *testing.B) { 261 | for i := 0; i < b.N; i++ { 262 | for k := 0; k < 1000; k++ { 263 | n := rnd.Uint32() 264 | m, _ := strconv.ParseUint(strconv.FormatUint(uint64(n), 16), 16, 32) 265 | if uint32(m) != n { 266 | b.Fatal("incorrect value for m") 267 | } 268 | } 269 | } 270 | }, 271 | }, 272 | 273 | { 274 | name: "userfunc_mandelbrot", 275 | fn: func(b *testing.B) { 276 | for i := 0; i < b.N; i++ { 277 | if mandelperf() != 14791 { 278 | b.Fatal("unexpected value for mandelperf") 279 | } 280 | } 281 | }, 282 | }, 283 | 284 | { 285 | name: "print_to_file", 286 | fn: func(b *testing.B) { 287 | for i := 0; i < b.N; i++ { 288 | printfd(100000) 289 | } 290 | }, 291 | }, 292 | 293 | { 294 | name: "recursion_quicksort", 295 | fn: func(b *testing.B) { 296 | lst := make([]float64, 5000) 297 | b.ResetTimer() 298 | for i := 0; i < b.N; i++ { 299 | for k := range lst { 300 | lst[k] = rnd.Float64() 301 | } 302 | qsort_kernel(lst, 0, len(lst)-1) 303 | } 304 | }, 305 | }, 306 | 307 | { 308 | name: "iteration_pi_sum", 309 | fn: func(b *testing.B) { 310 | for i := 0; i < b.N; i++ { 311 | if math.Abs(pisum()-1.644834071848065) >= 1e-6 { 312 | b.Fatal("pi_sum out of range") 313 | } 314 | } 315 | }, 316 | }, 317 | 318 | { 319 | name: "matrix_statistics", 320 | fn: func(b *testing.B) { 321 | for i := 0; i < b.N; i++ { 322 | c1, c2 := randmatstat(1000) 323 | assert(b, 0.5 < c1) 324 | assert(b, c1 < 1.0) 325 | assert(b, 0.5 < c2) 326 | assert(b, c2 < 1.0) 327 | } 328 | }, 329 | }, 330 | 331 | { 332 | name: "matrix_multiply", 333 | fn: func(b *testing.B) { 334 | for i := 0; i < b.N; i++ { 335 | c := randmatmul(1000) 336 | assert(b, c.At(0, 0) >= 0) 337 | } 338 | }, 339 | }, 340 | } 341 | -------------------------------------------------------------------------------- /perf.jl: -------------------------------------------------------------------------------- 1 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license 2 | 3 | import LinearAlgebra 4 | import Test 5 | import Printf 6 | import Statistics 7 | import Base.Sys 8 | 9 | include("./perfutil.jl") 10 | 11 | ## recursive fib ## 12 | 13 | fib(n) = n < 2 ? n : fib(n-1) + fib(n-2) 14 | 15 | Test.@test fib(20) == 6765 16 | @timeit fib(20) "recursion_fibonacci" "Recursive fibonacci" 17 | 18 | ## parse integer ## 19 | 20 | function parseintperf(t) 21 | local n, m 22 | for i=1:t 23 | n = rand(UInt32) 24 | @static if VERSION >= v"0.7.0-DEV.4446" 25 | s = string(n, base = 16) 26 | m = UInt32(parse(Int64, s, base = 16)) 27 | else 28 | s = hex(n) 29 | m = UInt32(parse(Int64, s, 16)) 30 | end 31 | @assert m == n 32 | end 33 | return n 34 | end 35 | 36 | @timeit parseintperf(1000) "parse_integers" "Integer parsing" 37 | 38 | ## array constructors ## 39 | 40 | Test.@test all(fill(1.,200,200) .== 1) 41 | 42 | ## matmul and transpose ## 43 | 44 | A = fill(1.,200,200) 45 | Test.@test all(A*A' .== 200) 46 | # @timeit A*A' "AtA" "description" 47 | 48 | ## mandelbrot set: complex arithmetic and comprehensions ## 49 | 50 | function myabs2(z) 51 | return real(z)*real(z) + imag(z)*imag(z) 52 | end 53 | 54 | function mandel(z) 55 | c = z 56 | maxiter = 80 57 | for n = 1:maxiter 58 | if myabs2(z) > 4 59 | return n-1 60 | end 61 | z = z^2 + c 62 | end 63 | return maxiter 64 | end 65 | 66 | mandelperf() = [ mandel(complex(r,i)) for i=-1.:.1:1., r=-2.0:.1:0.5 ] 67 | Test.@test sum(mandelperf()) == 14791 68 | @timeit mandelperf() "userfunc_mandelbrot" "Calculation of mandelbrot set" 69 | 70 | ## numeric vector sort ## 71 | 72 | function qsort!(a,lo,hi) 73 | i, j = lo, hi 74 | while i < hi 75 | pivot = a[(lo+hi)>>>1] 76 | while i <= j 77 | while a[i] < pivot; i += 1; end 78 | while a[j] > pivot; j -= 1; end 79 | if i <= j 80 | a[i], a[j] = a[j], a[i] 81 | i, j = i+1, j-1 82 | end 83 | end 84 | if lo < j; qsort!(a,lo,j); end 85 | lo, j = i, hi 86 | end 87 | return a 88 | end 89 | 90 | sortperf(n) = qsort!(rand(n), 1, n) 91 | Test.@test issorted(sortperf(5000)) 92 | @timeit sortperf(5000) "recursion_quicksort" "Sorting of random numbers using quicksort" 93 | 94 | ## slow pi series ## 95 | 96 | function pisum() 97 | sum = 0.0 98 | for j = 1:500 99 | sum = 0.0 100 | for k = 1:10000 101 | sum += 1.0/(k*k) 102 | end 103 | end 104 | sum 105 | end 106 | 107 | Test.@test abs(pisum()-1.644834071848065) < 1e-12 108 | @timeit pisum() "iteration_pi_sum" "Summation of a power series" 109 | 110 | ## slow pi series, vectorized ## 111 | 112 | function pisumvec() 113 | s = 0.0 114 | a = [1:10000] 115 | for j = 1:500 116 | s = sum(1 ./ (a.^2)) 117 | end 118 | s 119 | end 120 | 121 | #@test abs(pisumvec()-1.644834071848065) < 1e-12 122 | #@timeit pisumvec() "pi_sum_vec" 123 | 124 | ## random matrix statistics ## 125 | 126 | function randmatstat(t) 127 | n = 5 128 | v = zeros(t) 129 | w = zeros(t) 130 | for i=1:t 131 | a = randn(n,n) 132 | b = randn(n,n) 133 | c = randn(n,n) 134 | d = randn(n,n) 135 | P = [a b c d] 136 | Q = [a b; c d] 137 | @static if VERSION >= v"0.7.0" 138 | v[i] = LinearAlgebra.tr((P'*P)^4) 139 | w[i] = LinearAlgebra.tr((Q'*Q)^4) 140 | else 141 | v[i] = trace((P'*P)^4) 142 | w[i] = trace((Q'*Q)^4) 143 | end 144 | end 145 | return (Statistics.std(v)/Statistics.mean(v), Statistics.std(w)/Statistics.mean(w)) 146 | end 147 | 148 | (s1, s2) = randmatstat(1000) 149 | Test.@test 0.5 < s1 < 1.0 && 0.5 < s2 < 1.0 150 | @timeit randmatstat(1000) "matrix_statistics" "Statistics on a random matrix" 151 | 152 | ## largish random number gen & matmul ## 153 | 154 | @timeit rand(1000,1000)*rand(1000,1000) "matrix_multiply" "Multiplication of random matrices" 155 | 156 | ## printfd ## 157 | 158 | if Sys.isunix() 159 | function printfd(n) 160 | open("/dev/null", "w") do io 161 | for i = 1:n 162 | Printf.@printf(io, "%d %d\n", i, i + 1) 163 | end 164 | end 165 | end 166 | 167 | printfd(1) 168 | @timeit printfd(100000) "print_to_file" "Printing to a file descriptor" 169 | end 170 | 171 | #maxrss("micro") 172 | -------------------------------------------------------------------------------- /perf.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); // for print to file benchmark 2 | 3 | (function () { 4 | 'use strict'; 5 | 6 | var tmin, i, j, t, n, m, s, a, sum, a0, v, r, C, filename, fd; 7 | 8 | function assert(t) { if (!t) { throw new Error("assertion failed"); } } 9 | 10 | // recursive fib // 11 | 12 | function fib(n) { 13 | if (n < 2) { return n; } 14 | return fib(n-1) + fib(n-2); 15 | } 16 | 17 | tmin = Number.POSITIVE_INFINITY; 18 | for (i=0; i < 5; i++) { 19 | t = (new Date()).getTime(); 20 | for (j=0; j < 1000; j++) { 21 | assert(fib(20) === 6765); 22 | } 23 | t = (new Date()).getTime()-t; 24 | if (t < tmin) { tmin = t; } 25 | } 26 | console.log("javascript,recursion_fibonacci," + tmin/1000); 27 | 28 | // parse int // 29 | 30 | tmin = Number.POSITIVE_INFINITY; 31 | for (i=0; i < 5; i++) { 32 | t = (new Date()).getTime(); 33 | for (j=0; j < 1000*100; j++) { 34 | n = Math.floor(4294967295*Math.random()); 35 | s = n.toString(16); 36 | m = parseInt(s,16); 37 | assert(m === n); 38 | } 39 | t = (new Date()).getTime()-t; 40 | if (t < tmin) { tmin = t; } 41 | } 42 | console.log("javascript,parse_integers," + tmin/100); 43 | 44 | 45 | // print to file 46 | 47 | function printfd(n) { 48 | let f = fs.openSync("/dev/null", "w"); 49 | for (let i = 1; i <= n; i++) { 50 | fs.writeSync(f, `${i} ${i + 1}\n`); 51 | } 52 | fs.closeSync(f); 53 | } 54 | 55 | tmin = Number.POSITIVE_INFINITY; 56 | for (i=0; i < 5; i++) { 57 | t = (new Date()).getTime(); 58 | printfd(100000) 59 | t = (new Date()).getTime()-t; 60 | if (t < tmin) { tmin = t; } 61 | } 62 | console.log("javascript,print_to_file," + tmin); 63 | 64 | // mandelbrot set // 65 | 66 | function Complex(real, imag) { 67 | this.re = real; 68 | this.im = imag; 69 | } 70 | function complex_abs(z) { 71 | return Math.sqrt(z.re*z.re + z.im*z.im); 72 | } 73 | function complex_abs2(z) { 74 | return z.re*z.re + z.im*z.im; 75 | } 76 | function complex_add(z,w) { 77 | return new Complex(z.re+w.re, z.im+w.im); 78 | } 79 | function complex_multiply(z,w) { 80 | return new Complex(z.re*w.re-z.im*w.im, z.re*w.im+z.im*w.re); 81 | } 82 | 83 | function mandel(z) { 84 | var c, n, maxiter; 85 | c = z; 86 | maxiter = 80; 87 | n = 0; 88 | for (n = 0; n < maxiter; n++) { 89 | if (complex_abs2(z) > 4) { return n; } 90 | z = complex_add(complex_multiply(z,z),c); 91 | } 92 | return maxiter; 93 | } 94 | 95 | function mandelperf() { 96 | var a, r, re, i, im, z; 97 | a = new Array(26*21); 98 | r = 0; 99 | for (r = 0; r < 26; r++) { 100 | re = -2.0 + r*0.1; 101 | i = 0; 102 | for (i = 0; i < 21; i++) { 103 | im = -1.0 + i*0.1; 104 | z = new Complex(re,im); 105 | a[r*21+i] = mandel(z); 106 | } 107 | } 108 | return a; 109 | } 110 | 111 | a = mandelperf(); 112 | i = 0; 113 | sum = 0; 114 | for (i = 0; i < a.length; i++) { sum += a[i]; } 115 | assert(sum === 14791); 116 | a0 = a[0]; 117 | 118 | tmin = Number.POSITIVE_INFINITY; 119 | for (i=0; i < 5; i++) { 120 | t = (new Date()).getTime(); 121 | for (j=0; j < 1000; j++) { 122 | a = mandelperf(); 123 | assert(a[0] === a0); 124 | } 125 | t = (new Date()).getTime()-t; 126 | if (t < tmin) { tmin=t; } 127 | } 128 | console.log("javascript,userfunc_mandelbrot," + tmin/1000); 129 | 130 | // numeric vector sort // 131 | 132 | function rand(n) { 133 | var v, i; 134 | v = new Array(n); 135 | 136 | for (i = 0; i < n; i++) { 137 | v[i] = Math.random(); 138 | } 139 | 140 | return v; 141 | } 142 | 143 | function qsort_kernel(a, lo, hi) { 144 | var i, j, pivot, t; 145 | i = lo; 146 | j = hi; 147 | while (i < hi) { 148 | pivot = a[Math.floor((lo+hi)/2)]; 149 | while (i <= j) { 150 | while (a[i] < pivot) { 151 | i = i + 1; 152 | } 153 | while (a[j] > pivot) { 154 | j = j - 1; 155 | } 156 | if (i <= j) { 157 | t = a[i]; 158 | a[i] = a[j]; 159 | a[j] = t; 160 | i = i + 1; 161 | j = j - 1; 162 | } 163 | } 164 | if (lo < j) { 165 | qsort_kernel(a, lo, j); 166 | } 167 | lo = i; 168 | j = hi; 169 | } 170 | } 171 | 172 | function sortperf(n) { 173 | var v = rand(n); 174 | qsort_kernel(v, 0, n); 175 | return v; 176 | } 177 | 178 | tmin = Number.POSITIVE_INFINITY; 179 | for (i=0; i < 5; i++) { 180 | t = (new Date()).getTime(); 181 | for (j=0; j < 100; j++) { 182 | v = sortperf(5000); 183 | assert(a[0] < 0.99); 184 | } 185 | t = (new Date()).getTime()-t; 186 | if (t < tmin) { tmin=t; } 187 | } 188 | console.log("javascript,recursion_quicksort," + tmin/100); 189 | 190 | // slow pi series // 191 | 192 | function pisum() { 193 | var sum, k; 194 | sum = 0.0; 195 | for (i=0; i < 500; i++) { 196 | sum = 0.0; 197 | for (k=1; k <= 10000; k++) { 198 | sum += 1.0/(k*k); 199 | } 200 | } 201 | return sum; 202 | } 203 | 204 | tmin = Number.POSITIVE_INFINITY; 205 | for (i=0; i < 5; i++) { 206 | t = (new Date()).getTime(); 207 | for (j=0; j < 10; j++) { 208 | assert(Math.abs(pisum()-1.644834071848065) < 1e-12); 209 | } 210 | t = (new Date()).getTime()-t; 211 | if (t < tmin) { tmin=t; } 212 | } 213 | console.log("javascript,iteration_pi_sum," + tmin/10); 214 | 215 | // random matrix statistics // 216 | 217 | function gaussian() { 218 | var k, i, j; 219 | k = 2; 220 | do { 221 | i = 2*Math.random()-1; 222 | j = 2*Math.random()-1; 223 | k = i*i+j*j; 224 | } while (k >= 1); 225 | return i*Math.sqrt((-2*Math.log(k))/k); 226 | } 227 | 228 | function randn( a, sub ) { 229 | var subLen, len, i; 230 | subLen = sub.length; 231 | len = a.length; 232 | 233 | for (i = 0; i < subLen; i++) { 234 | a[i] = sub[i] = gaussian(); 235 | } 236 | 237 | for (i = subLen; i < len; i++) { 238 | a[i] = gaussian(); 239 | } 240 | 241 | return a; 242 | } 243 | 244 | function transpose(dest, src,m,n) { 245 | var i, j; 246 | i = 0; 247 | j = 0; 248 | 249 | for (i = 0; i < m; i++) { 250 | for (j = 0; j < n; j++) { 251 | dest[i*n+j] = src[j*m+i]; 252 | } 253 | } 254 | } 255 | 256 | function matmulCopy( dest, A,B,m,l,n) { 257 | var i, j, k, sum; 258 | i = 0; 259 | j = 0; 260 | k = 0; 261 | 262 | for (i = 0; i < m; i++) { 263 | for (j = 0; j < n; j++) { 264 | sum = 0.0; 265 | 266 | for (k = 0; k < l; k++) { 267 | sum += A[i*l+k]*B[k*n+j]; 268 | } 269 | 270 | dest[i*n+j] = sum; 271 | } 272 | } 273 | } 274 | 275 | function randmatstat(t) { 276 | var n, P, PTransposed, Pt1P, Pt2P, Q, QTransposed, Pt1Q, Pt2Q, 277 | a, b, c, d, aSub, bSub, cSub, dSub, v, w, i, j, k, 278 | trP, trQ, v1, v2, w1, w2; 279 | n = 5; 280 | 281 | P = new Float64Array( 4*n*n ); 282 | Q = new Float64Array( 4*n*n ); 283 | 284 | PTransposed = new Float64Array( P.length ); 285 | QTransposed = new Float64Array( Q.length ); 286 | 287 | Pt1P = new Float64Array( (4*n) * (4*n) ); 288 | Pt2P = new Float64Array( (4*n) * (4*n) ); 289 | Pt1Q = new Float64Array( (2*n) * (2*n) ); 290 | Pt2Q = new Float64Array( (2*n) * (2*n) ); 291 | 292 | a = new Float64Array( n*n ); 293 | b = new Float64Array( n*n ); 294 | c = new Float64Array( n*n ); 295 | d = new Float64Array( n*n ); 296 | 297 | // the first n number of elements of a to d 298 | aSub = new Float64Array( n ); 299 | bSub = new Float64Array( n ); 300 | cSub = new Float64Array( n ); 301 | dSub = new Float64Array( n ); 302 | 303 | v = new Float64Array( t ); 304 | w = new Float64Array( t ); 305 | 306 | i = 0; 307 | j = 0; 308 | k = 0; 309 | 310 | for (i = 0; i < t; i++) { 311 | a = randn( a, aSub ); 312 | b = randn( b, bSub ); 313 | c = randn( c, cSub ); 314 | d = randn( d, dSub ); 315 | 316 | P.set( a, 0*n*n ); 317 | P.set( b, 1*n*n ); 318 | P.set( c, 2*n*n ); 319 | P.set( d, 3*n*n ); 320 | 321 | for (j = 0; j < n; j++) { 322 | Q.set( aSub, 2*n*j ); 323 | Q.set( bSub, 2*n*j+n ); 324 | Q.set( cSub, 2*n*(n+j) ); 325 | Q.set( dSub, 2*n*(n+j)+n ); 326 | /* 327 | for (k = 0; k < n; k++) { 328 | Q[ 2*n*j + k ] = a[k]; 329 | Q[ 2*n*j+n + k ] = b[k]; 330 | Q[ 2*n*(n+j) + k ] = c[k]; 331 | Q[ 2*n*(n+j)+n + k ] = d[k]; 332 | } 333 | */ 334 | } 335 | 336 | transpose( PTransposed, P, n, 4*n ); 337 | matmulCopy( Pt1P, PTransposed, P, 4*n, n, 4*n ); 338 | matmulCopy( Pt2P, Pt1P, Pt1P, 4*n, 4*n, 4*n); 339 | matmulCopy( Pt1P, Pt2P, Pt2P, 4*n, 4*n, 4*n); 340 | 341 | trP = 0; 342 | for (j = 0; j < 4*n; j++) { 343 | trP += Pt1P[(4*n+1)*j]; 344 | } 345 | v[i] = trP; 346 | 347 | transpose( QTransposed, Q, 2*n, 2*n ); 348 | matmulCopy( Pt1Q, QTransposed, Q, 2*n, 2*n, 2*n ); 349 | matmulCopy( Pt2Q, Pt1Q, Pt1Q, 2*n, 2*n, 2*n); 350 | matmulCopy( Pt1Q, Pt2Q, Pt2Q, 2*n, 2*n, 2*n); 351 | 352 | trQ = 0; 353 | for (j = 0; j < 2*n; j++) { 354 | trQ += Pt1Q[(2*n+1)*j]; 355 | } 356 | w[i] = trQ; 357 | } 358 | 359 | v1 = 0.0; 360 | v2 = 0.0; 361 | w1 = 0.0; 362 | w2 = 0.0; 363 | for (i = 0; i < t; i++) { 364 | v1 += v[i]; v2 += v[i]*v[i]; 365 | w1 += w[i]; w2 += w[i]*w[i]; 366 | } 367 | 368 | return { 369 | s1: Math.sqrt((t*(t*v2-v1*v1))/((t-1)*v1*v1)), 370 | s2: Math.sqrt((t*(t*w2-w1*w1))/((t-1)*w1*w1)) 371 | }; 372 | } 373 | 374 | tmin = Number.POSITIVE_INFINITY; 375 | for (i=0; i < 5; i++) { 376 | t = (new Date()).getTime(); 377 | for (j=0; j < 10; j++) { 378 | r = randmatstat(1000); 379 | // assert(0.5 < r.s1 < 1.0); 380 | // assert(0.5 < r.s2 < 1.0); 381 | } 382 | t = (new Date()).getTime()-t; 383 | if (t < tmin) { tmin=t; } 384 | } 385 | console.log("javascript,matrix_statistics," + tmin/10); 386 | 387 | // random matrix multiply // 388 | 389 | function randFloat64(n) { 390 | var v, i; 391 | v = new Float64Array(n); 392 | 393 | for (i = 0; i < n; i++) { 394 | v[i] = Math.random(); 395 | } 396 | 397 | return v; 398 | } 399 | 400 | // Transpose mxn matrix. 401 | function mattransp(A, m, n) { 402 | var i, j, T; 403 | T = new Float64Array(m * n); 404 | 405 | for (i = 0; i < m; ++i) { 406 | for (j = 0; j < n; ++j) { 407 | T[j * m + i] = A[i * n + j]; 408 | } 409 | } 410 | 411 | return T; 412 | } 413 | 414 | function matmul(A,B,m,l,n) { 415 | var C, i, j, k, total; 416 | C = new Float64Array(m*n); 417 | i = 0; 418 | j = 0; 419 | k = 0; 420 | 421 | // Use the transpose of B so that 422 | // during the matrix multiplication 423 | // we access consecutive memory locations. 424 | // This is a fairer comparison of JS 425 | // with the other languages which call on 426 | // custom multiplication routines, which 427 | // likely make use of such aligned memory. 428 | B = mattransp(B,l,n); 429 | 430 | for (i = 0; i < m; i++) { 431 | for (j = 0; j < n; j++) { 432 | total = 0.0; 433 | 434 | for (k = 0; k < l; k++) { 435 | total += A[i*l+k]*B[j*l+k]; 436 | } 437 | 438 | C[i*n+j] = total; 439 | } 440 | } 441 | 442 | return C; 443 | } 444 | 445 | function randmatmul(n) { 446 | var A, B; 447 | A = randFloat64(n*n); 448 | B = randFloat64(n*n); 449 | 450 | return matmul(A, B, n, n, n); 451 | } 452 | 453 | tmin = Number.POSITIVE_INFINITY; 454 | t = (new Date()).getTime(); 455 | C = randmatmul(1000); 456 | assert(0 <= C[0]); 457 | t = (new Date()).getTime()-t; 458 | if (t < tmin) { tmin=t; } 459 | console.log("javascript,matrix_multiply," + tmin); 460 | }()); 461 | -------------------------------------------------------------------------------- /perf.lua: -------------------------------------------------------------------------------- 1 | 2 | if jit.arch ~= 'x64' then 3 | print('WARNING: please use BIT=64 for optimal OpenBLAS performance') 4 | end 5 | 6 | local ffi = require 'ffi' 7 | local bit = require 'bit' 8 | local time = require 'time' 9 | local alg = require 'sci.alg' 10 | local prng = require 'sci.prng' 11 | local stat = require 'sci.stat' 12 | local dist = require 'sci.dist' 13 | local complex = require 'sci.complex' 14 | 15 | local min, sqrt, random, abs = math.min, math.sqrt, math.random, math.abs 16 | local cabs = complex.abs 17 | local rshift = bit.rshift 18 | local format = string.format 19 | local nowutc = time.nowutc 20 | local rng = prng.std() 21 | local vec, mat, join = alg.vec, alg.mat, alg.join 22 | local sum, trace = alg.sum, alg.trace 23 | local var, mean = stat.var, stat.mean 24 | 25 | -------------------------------------------------------------------------------- 26 | local function elapsed(f) 27 | local t0 = nowutc() 28 | local val1, val2 = f() 29 | local t1 = nowutc() 30 | return (t1 - t0):tomilliseconds(), val1, val2 31 | end 32 | 33 | local function timeit(f, name, check) 34 | local t, k, s = 1/0, 0, nowutc() 35 | while true do 36 | k = k + 1 37 | local tx, val1, val2 = elapsed(f) 38 | t = min(t, tx) 39 | if check then 40 | check(val1, val2) 41 | end 42 | if k > 5 and (nowutc() - s):toseconds() >= 2 then break end 43 | end 44 | io.write(format('lua,%s,%g\n', name, t)) 45 | end 46 | 47 | -------------------------------------------------------------------------------- 48 | local function fib(n) 49 | if n < 2 then 50 | return n 51 | else 52 | return fib(n-1) + fib(n-2) 53 | end 54 | end 55 | 56 | timeit(function() return fib(20) end, 'recursion_fibonacci', function(x) assert(x == 6765) end) 57 | 58 | local function parseint() 59 | local lmt = 2^32 - 1 60 | local n, m 61 | for i = 1, 1000 do 62 | n = random(lmt) -- Between 0 and 2^32 - 1, i.e. uint32_t. 63 | local s = format('0x%x', tonumber(n)) 64 | m = tonumber(s) 65 | assert(n == m) -- Done here to be even with Julia benchmark. 66 | end 67 | return n, m 68 | end 69 | 70 | timeit(parseint, 'parse_integers') 71 | 72 | local function cabs2( z ) 73 | return z[1]*z[1] + z[2]*z[2] 74 | end 75 | 76 | local function mandel(z) 77 | local c = z 78 | local maxiter = 80 79 | for n = 1, maxiter do 80 | if cabs2(z) > 4 then 81 | return n-1 82 | end 83 | z = z*z + c 84 | end 85 | return maxiter 86 | end 87 | local function mandelperf() 88 | local a = mat(26, 21) 89 | for r=1,26 do -- Lua's for i=l,u,c doesn't match Julia's for i=l:c:u. 90 | for c=1,21 do 91 | local re, im = (r - 21)*0.1, (c - 11)*0.1 92 | a[{r, c}] = mandel(re + im*1i) 93 | end 94 | end 95 | return a 96 | end 97 | 98 | timeit(mandelperf, 'userfunc_mandelbrot', function(a) assert(sum(a) == 14791) end) 99 | 100 | local function qsort(a, lo, hi) 101 | local i, j = lo, hi 102 | while i < hi do 103 | local pivot = a[rshift(lo+hi, 1)] 104 | while i <= j do 105 | while a[i] < pivot do i = i+1 end 106 | while a[j] > pivot do j = j-1 end 107 | if i <= j then 108 | a[i], a[j] = a[j], a[i] 109 | i, j = i+1, j-1 110 | end 111 | end 112 | if lo < j then qsort(a, lo, j) end 113 | lo, j = i, hi 114 | end 115 | return a 116 | end 117 | 118 | local function sortperf() 119 | local n = 5000 120 | local v = ffi.new('double[?]', n+1) 121 | for i=1,n do 122 | v[i] = rng:sample() 123 | end 124 | return qsort(v, 1, n) 125 | end 126 | 127 | timeit(sortperf, 'recursion_quicksort', function(x) 128 | for i=2,5000 do 129 | assert(x[i-1] <= x[i]) 130 | end 131 | end 132 | ) 133 | 134 | local function pisum() 135 | local s 136 | for j = 1, 500 do 137 | s = 0 138 | for k = 1, 10000 do 139 | s = s + 1 / (k*k) 140 | end 141 | end 142 | return s 143 | end 144 | 145 | timeit(pisum, 'iteration_pi_sum', function(x) 146 | assert(abs(x - 1.644834071848065) < 1e-12) 147 | end) 148 | 149 | local function rand(r, c) 150 | local x = mat(r, c) 151 | for i=1,#x do 152 | x[i] = rng:sample() 153 | end 154 | return x 155 | end 156 | 157 | local function randn(r, c) 158 | local x = mat(r, c) 159 | for i=1,#x do 160 | x[i] = dist.normal(0, 1):sample(rng) 161 | end 162 | return x 163 | end 164 | 165 | local function randmatstat(t) 166 | local n = 5 167 | local v, w = vec(t), vec(t) 168 | for i=1,t do 169 | local a, b, c, d = randn(n, n), randn(n, n), randn(n, n), randn(n, n) 170 | local P = join(a..b..c..d) 171 | local Q = join(a..b, c..d) 172 | v[i] = trace((P[]`**P[])^^4) 173 | w[i] = trace((Q[]`**Q[])^^4) 174 | end 175 | return sqrt(var(v))/mean(v), sqrt(var(w))/mean(w) 176 | end 177 | 178 | timeit(function() return randmatstat(1000) end, 'matrix_statistics', 179 | function(s1, s2) 180 | assert( 0.5 < s1 and s1 < 1.0 and 0.5 < s2 and s2 < 1.0 ) 181 | end) 182 | 183 | local function randmatmult(n) 184 | local a, b = rand(n, n), rand(n, n) 185 | return a[]**b[] 186 | end 187 | 188 | timeit(function() return randmatmult(1000) end, 'matrix_multiply') 189 | 190 | if jit.os ~= 'Windows' then 191 | local function printfd(n) 192 | local f = io.open('/dev/null','w') 193 | for i = 1, n do 194 | f:write(format('%d %d\n', i, i+1)) 195 | end 196 | f:close() 197 | end 198 | 199 | timeit(function() return printfd(100000) end, 'print_to_file') 200 | end 201 | -------------------------------------------------------------------------------- /perf.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %% Main function. All the tests are run here. %% 3 | %% The functions declarations can be found at the end. %% 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | 6 | function perf() 7 | 8 | warning off; 9 | 10 | f = fib(20); 11 | assert(f == 6765) 12 | timeit('recursion_fibonacci', @fib, 20) 13 | 14 | timeit('parse_integers', @parseintperf, 1000) 15 | 16 | %% array constructors %% 17 | 18 | %o = ones(200,200); 19 | %assert(all(o) == 1) 20 | %timeit('ones', @ones, 200, 200) 21 | 22 | %assert(all(matmul(o) == 200)) 23 | %timeit('AtA', @matmul, o) 24 | 25 | mandel(complex(-.53,.68)); 26 | assert(sum(sum(mandelperf(true))) == 14791) 27 | timeit('userfunc_mandelbrot', @mandelperf, true) 28 | 29 | assert(issorted(sortperf(5000))) 30 | timeit('recursion_quicksort', @sortperf, 5000) 31 | 32 | s = pisum(true); 33 | assert(abs(s-1.644834071848065) < 1e-12); 34 | timeit('iteration_pi_sum',@pisum, true) 35 | 36 | %s = pisumvec(true); 37 | %assert(abs(s-1.644834071848065) < 1e-12); 38 | %timeit('pi_sum_vec',@pisumvec, true) 39 | 40 | [s1, s2] = randmatstat(1000); 41 | assert(round(10*s1) > 5 && round(10*s1) < 10); 42 | timeit('matrix_statistics', @randmatstat, 1000) 43 | 44 | timeit('matrix_multiply', @randmatmul, 1000); 45 | 46 | printfd(1) 47 | timeit('print_to_file', @printfd, 100000) 48 | 49 | end 50 | 51 | 52 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 53 | %% Functions declarations %% 54 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 55 | 56 | function assert(bool) 57 | if ~bool 58 | error('Assertion failed') 59 | end 60 | end 61 | 62 | function timeit(name, func, varargin) 63 | lang = 'matlab'; 64 | if exist('OCTAVE_VERSION') ~= 0 65 | lang = 'octave'; 66 | end 67 | 68 | nexpt = 5; 69 | times = zeros(nexpt, 1); 70 | 71 | for i=1:nexpt 72 | tic(); func(varargin{:}); times(i) = toc(); 73 | end 74 | 75 | times = sort(times); 76 | fprintf ('%s,%s,%.8f\n', lang, name, times(1)*1000); 77 | end 78 | 79 | %% recursive fib %% 80 | 81 | function f = fib(n) 82 | if n < 2 83 | f = n; 84 | return 85 | else 86 | f = fib(n-1) + fib(n-2); 87 | end 88 | end 89 | 90 | %% parse int %% 91 | 92 | function n = parseintperf(t) 93 | for i = 1:t 94 | n = fix(rand*(2^32)); 95 | s = sprintf('%08x',n); 96 | m = sscanf(s,'%x'); 97 | assert(m == n); 98 | end 99 | end 100 | 101 | %% matmul and transpose %% 102 | 103 | %function oo = matmul(o) 104 | % oo = o * o.'; 105 | %end 106 | 107 | %% mandelbrot set: complex arithmetic and comprehensions %% 108 | 109 | function r = abs2(z) 110 | r = real(z)*real(z) + imag(z)*imag(z); 111 | end 112 | 113 | function n = mandel(z) 114 | n = 0; 115 | c = z; 116 | for n=0:79 117 | if abs2(z)>4 118 | return 119 | end 120 | z = z^2+c; 121 | end 122 | n = 80; 123 | end 124 | 125 | function M = mandelperf(ignore) 126 | x=-2.0:.1:0.5; 127 | y=-1:.1:1; 128 | M=zeros(length(y),length(x)); 129 | for r=1:size(M,1) 130 | for c=1:size(M,2) 131 | M(r,c) = mandel(x(c)+y(r)*i); 132 | end 133 | end 134 | end 135 | 136 | %% numeric vector quicksort %% 137 | 138 | function b = qsort(a) 139 | b = qsort_kernel(a, 1, length(a)); 140 | end 141 | 142 | function a = qsort_kernel(a, lo, hi) 143 | i = lo; 144 | j = hi; 145 | while i < hi 146 | pivot = a(floor((lo+hi)/2)); 147 | while i <= j 148 | while a(i) < pivot, i = i + 1; end 149 | while a(j) > pivot, j = j - 1; end 150 | if i <= j 151 | t = a(i); 152 | a(i) = a(j); 153 | a(j) = t; 154 | i = i + 1; 155 | j = j - 1; 156 | end 157 | end 158 | if lo < j; a=qsort_kernel(a, lo, j); end 159 | lo = i; 160 | j = hi; 161 | end 162 | end 163 | 164 | function v = sortperf(n) 165 | v = rand(n,1); 166 | v = qsort(v); 167 | end 168 | 169 | %% slow pi series %% 170 | 171 | function sum = pisum(ignore) 172 | sum = 0.0; 173 | for j=1:500 174 | sum = 0.0; 175 | for k=1:10000 176 | sum = sum + 1.0/(k*k); 177 | end 178 | end 179 | end 180 | 181 | %% slow pi series, vectorized %% 182 | 183 | function s = pisumvec(ignore) 184 | a = [1:10000]; 185 | for j=1:500 186 | s = sum( 1./(a.^2)); 187 | end 188 | end 189 | 190 | %% random matrix statistics %% 191 | 192 | function [s1, s2] = randmatstat(t) 193 | n=5; 194 | v = zeros(t,1); 195 | w = zeros(t,1); 196 | for i=1:t 197 | a = randn(n, n); 198 | b = randn(n, n); 199 | c = randn(n, n); 200 | d = randn(n, n); 201 | P = [a b c d]; 202 | Q = [a b;c d]; 203 | v(i) = trace((P.'*P)^4); 204 | w(i) = trace((Q.'*Q)^4); 205 | end 206 | s1 = std(v)/mean(v); 207 | s2 = std(w)/mean(w); 208 | end 209 | 210 | function t = mytranspose(x) 211 | [m, n] = size(x); 212 | t = zeros(n, m); 213 | for i=1:n 214 | for j=1:m 215 | t(i,j) = x(j,i); 216 | end 217 | end 218 | end 219 | 220 | %% largish random number gen & matmul %% 221 | 222 | function X = randmatmul(n) 223 | X = rand(n,n)*rand(n,n); 224 | end 225 | 226 | %% printf %% 227 | 228 | function printfd(n) 229 | f = fopen('/dev/null','w'); 230 | for i = 1:n 231 | fprintf(f, '%d %d\n', i, i + 1); 232 | end 233 | fclose(f); 234 | end 235 | -------------------------------------------------------------------------------- /perf.nb: -------------------------------------------------------------------------------- 1 | (* Benchmark script *) 2 | 3 | (* Set up output stream *) 4 | SetOptions[$Output, FormatType -> OutputForm]; 5 | 6 | (* Test if system has a C compiler and if so set target to "C"*) 7 | Needs["CCompilerDriver`"]; 8 | If[ Length[CCompilers[]] > 0, 9 | $CompilationTarget = "C" 10 | ]; 11 | 12 | 13 | ClearAll[$printOutput]; 14 | $printOutput = True; 15 | 16 | ClearAll[timeit]; 17 | SetAttributes[timeit, HoldFirst]; 18 | timeit[ex_, name_String] := Module[ 19 | {t}, 20 | t = Infinity; 21 | Do[ 22 | t = Min[t, N[First[AbsoluteTiming[ex]]]]; 23 | , 24 | {i, 1, 5} 25 | ]; 26 | If[$printOutput, 27 | (*Print[OutputForm["mathematica," <> name <> ","], t*1000];*) 28 | Print["mathematica,", name, ",", t*1000]; 29 | ]; 30 | ]; 31 | 32 | ClearAll[test]; 33 | SetAttributes[test, HoldFirst]; 34 | test[ex_] := Assert[ex]; 35 | On[Assert]; 36 | 37 | 38 | (* recursive fib *) 39 | 40 | ClearAll[fib]; 41 | fib = Compile[{{n, _Integer}}, 42 | If[n < 2, n, fib[n - 1] + fib[n - 2]], 43 | CompilationTarget -> "WVM" 44 | ]; 45 | 46 | test[fib[20] == 6765]; 47 | timeit[fib[20], "recursion_fibonacci"]; 48 | 49 | (* parse integer *) 50 | 51 | ClearAll[parseintperf]; 52 | parseintperf[t_] := Module[ 53 | {n, m, i, s}, 54 | Do[ 55 | n = RandomInteger[{0, 4294967295}]; 56 | s = IntegerString[n, 16]; 57 | m = FromDigits[s, 16]; 58 | test[ m == n]; 59 | , 60 | {i, 1, t} 61 | ]; 62 | n 63 | ]; 64 | 65 | timeit[parseintperf[1000], "parse_integers"]; 66 | 67 | (* print to file *) 68 | 69 | ClearAll[printfdperf]; 70 | printfdperf[t_] := Module[ 71 | {i,fd,filename}, 72 | filename = "/dev/null"; 73 | fd = OpenWrite[filename]; 74 | For[i=1, i<=t, ++i, 75 | WriteString[fd, StringForm["`1` `2`\n", i, i + 1]]; 76 | ]; 77 | Close[fd]; 78 | ]; 79 | 80 | timeit[printfdperf[100000], "print_to_file"]; 81 | 82 | (* array constructors *) 83 | 84 | test[ And @@ And @@@ Thread /@ Thread[ConstantArray[1, {200, 200}] == 1]]; 85 | 86 | (* matmul and transpose *) 87 | 88 | ClearAll[A]; 89 | A = ConstantArray[1, {200, 200}]; 90 | test[And @@ And @@@ Thread /@ Thread[A.ConjugateTranspose[A] == 200]]; 91 | 92 | (* mandelbrot set: complex arithmetic and comprehensions *) 93 | 94 | ClearAll[abs2]; 95 | (*abs2[z_] := Module[ 96 | Re(z)*Re(z) + Im(z)*Im(z); 97 | ];*) 98 | 99 | ClearAll[mandel]; 100 | (*mandel[zin_] := Module[ 101 | {z, c, maxiter, n}, 102 | z = zin; 103 | c = z; 104 | maxiter = 80; 105 | Do[ 106 | If[ Abs2[z] > 4, 107 | maxiter = n-1; 108 | Break[] 109 | ]; 110 | z = z^2 + c; 111 | , 112 | {n, 1, maxiter} 113 | ]; 114 | maxiter 115 | ];*) 116 | mandel = Compile[{{zin, _Complex}}, 117 | Module[ 118 | {z = zin, c = zin, maxiter = 80, n = 0}, 119 | Do[ 120 | If[ Abs[z] > 2, 121 | maxiter = n-1; 122 | Break[] 123 | ]; 124 | z = z^2 + c; 125 | , 126 | {n, 1, maxiter} 127 | ]; 128 | maxiter 129 | ] 130 | ]; 131 | 132 | ClearAll[mandelperf]; 133 | mandelperf[] := Table[mandel[r + i*I], {i, -1., 1., 0.1}, {r, -2.0, 0.5, 0.1}]; 134 | 135 | test[ Total[mandelperf[], 2] == 14791]; 136 | timeit[mandelperf[], "userfunc_mandelbrot"]; 137 | 138 | (* numeric vector sort *) 139 | 140 | ClearAll[qsort]; 141 | (* qsort[ain_, loin_, hiin_] := Module[ 142 | {a = ain, i = loin, j = hiin, lo = loin, hi = hiin, pivot}, 143 | While[ i < hi, 144 | pivot = a[[BitShiftRight[lo + hi] ]]; 145 | While[ i <= j, 146 | While[a[[i]] < pivot, i++]; 147 | While[a[[j]] > pivot, j--]; 148 | If[ i <= j, 149 | a[[{i,j}]] = a[[{j, i}]]; 150 | i++; j--; 151 | ]; 152 | ]; 153 | If[ lo < j, a = qsort[a, lo, j] ]; 154 | {lo, j} = {i, hi}; 155 | ]; 156 | a 157 | ]; *) 158 | qsort = Compile[ 159 | {{ain, _Real, 1}, {loin, _Integer}, {hiin, _Integer}}, 160 | Module[ 161 | {a = ain, i = loin, j = hiin, lo = loin, hi = hiin, pivot}, 162 | While[ i < hi, 163 | pivot = a[[ Floor[(lo + hi)/2] ]]; 164 | While[ i <= j, 165 | While[a[[i]] < pivot, i++]; 166 | While[a[[j]] > pivot, j--]; 167 | If[ i <= j, 168 | a[[{i,j}]] = a[[{j, i}]]; 169 | i++; j--; 170 | ]; 171 | ]; 172 | If[ lo < j, a[[lo;;j]] = qsort[ a[[lo;;j]], 1, j - lo + 1] ]; 173 | {lo, j} = {i, hi}; 174 | ]; 175 | a 176 | ] 177 | ]; 178 | 179 | 180 | ClearAll[sortperf]; 181 | sortperf[n_] := Module[{vec = RandomReal[1, n]}, qsort[vec, 1, n]]; 182 | 183 | test[OrderedQ[sortperf[5000]] ]; 184 | timeit[sortperf[5000], "recursion_quicksort"]; 185 | 186 | (* slow pi series *) 187 | 188 | ClearAll[pisum]; 189 | pisum = Compile[ {}, 190 | Module[ 191 | {sum = 0.`}, 192 | Do[sum = Sum[1/(k*k), {k, 1, 10000}], 193 | {500}]; 194 | sum 195 | ] 196 | ]; 197 | 198 | 199 | test[Abs[pisum[] - 1.644834071848065`] < 1.`*^-12 ]; 200 | timeit[pisum[], "iteration_pi_sum"]; 201 | 202 | (* slow pi series, vectorized *) 203 | 204 | pisumvec = Compile[{}, 205 | Module[ 206 | {sum = 0.}, 207 | Do[ 208 | sum = Total[1/Range[1, 10000]^2];, 209 | {500} 210 | ]; 211 | sum 212 | ] 213 | ]; 214 | 215 | (* test[Abs[pisumvec[] - 1.644834071848065`] < 1.`*^-12 ];*) 216 | (* timeit[pisumvec[], "pi_sum_vec"];*) 217 | 218 | (* random matrix statistics *) 219 | 220 | ClearAll[randmatstat]; 221 | (*randmatstat[t_] := Module[ 222 | {n, v, w, a, b, c, d, P, Q}, 223 | n = 5; 224 | v = w = ConstantArray[0., {t}]; 225 | Do[ 226 | a = RandomReal[NormalDistribution[], {n, n}]; 227 | b = RandomReal[NormalDistribution[], {n, n}]; 228 | c = RandomReal[NormalDistribution[], {n, n}]; 229 | d = RandomReal[NormalDistribution[], {n, n}]; 230 | P = Join[a, b, c, d, 2]; 231 | Q = ArrayFlatten[{{a, b}, {c, d}}]; 232 | v[[i]] = Tr[MatrixPower[Transpose[P].P, 4]]; 233 | w[[i]] = Tr[MatrixPower[Transpose[Q].Q, 4]]; 234 | , 235 | {i, 1, t} 236 | ]; 237 | {StandardDeviation[v]/Mean[v], StandardDeviation[w]/Mean[w]} 238 | ];*) 239 | randmatstat = Compile[{{t, _Integer}}, 240 | Module[ 241 | { 242 | n = 5, 243 | v = ConstantArray[0., t], 244 | w = ConstantArray[0., t], 245 | a = {{0.}}, b = {{0.}}, 246 | c = {{0.}}, d = {{0.}}, 247 | P = {{0.}}, Q = {{0.}} 248 | }, 249 | Do[ 250 | a = RandomReal[NormalDistribution[], {n, n}]; 251 | b = RandomReal[NormalDistribution[], {n, n}]; 252 | c = RandomReal[NormalDistribution[], {n, n}]; 253 | d = RandomReal[NormalDistribution[], {n, n}]; 254 | P = Join[a, b, c, d, 2]; 255 | Q = ArrayFlatten[{{a, b}, {c, d}}]; 256 | v[[i]] = Tr[MatrixPower[Transpose[P].P, 4]]; 257 | w[[i]] = Tr[MatrixPower[Transpose[Q].Q, 4]]; 258 | , 259 | {i, 1, t} 260 | ]; 261 | {StandardDeviation[v]/Mean[v], StandardDeviation[w]/Mean[w]} 262 | ], 263 | {{_ArrayFlatten, _Real, 2}} 264 | ]; 265 | 266 | 267 | ClearAll[s1,s2]; 268 | {s1, s2} = randmatstat[1000]; 269 | test[0.5 < s1 < 1.0 && 0.5 < s2 < 1.0]; 270 | 271 | timeit[randmatstat[1000], "matrix_statistics"]; 272 | 273 | (* largish random number gen & matmul *) 274 | 275 | timeit[RandomReal[1, {1000, 1000}].RandomReal[1, {1000, 1000}], "matrix_multiply"]; 276 | 277 | (* printfd *) 278 | 279 | (* only on unix systems *) 280 | If[ $OperatingSystem == "Linux"||$OperatingSystem == "MacOSX", 281 | 282 | ClearAll[printfd]; 283 | printfd[n_] := Module[ 284 | {stream}, 285 | stream = OpenWrite["/dev/null"]; 286 | Do[ 287 | WriteString[stream, i, " ", i+1, "\n" ]; 288 | , 289 | {i, 1, n} 290 | ]; 291 | Close[stream]; 292 | ]; 293 | 294 | timeit[printfd[100000], "print_to_file"]; 295 | 296 | ]; 297 | -------------------------------------------------------------------------------- /perf.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | from numpy.random import rand, randn 3 | from numpy.linalg import matrix_power 4 | import sys 5 | import time 6 | import random 7 | 8 | if sys.version_info < (3,): 9 | range = xrange 10 | 11 | ## fibonacci ## 12 | 13 | def fib(n): 14 | if n<2: 15 | return n 16 | return fib(n-1)+fib(n-2) 17 | 18 | ## quicksort ## 19 | 20 | def qsort_kernel(a, lo, hi): 21 | i = lo 22 | j = hi 23 | while i < hi: 24 | pivot = a[(lo+hi) // 2] 25 | while i <= j: 26 | while a[i] < pivot: 27 | i += 1 28 | while a[j] > pivot: 29 | j -= 1 30 | if i <= j: 31 | a[i], a[j] = a[j], a[i] 32 | i += 1 33 | j -= 1 34 | if lo < j: 35 | qsort_kernel(a, lo, j) 36 | lo = i 37 | j = hi 38 | return a 39 | 40 | ## randmatstat ## 41 | 42 | def randmatstat(t): 43 | n = 5 44 | v = zeros(t) 45 | w = zeros(t) 46 | for i in range(t): 47 | a = randn(n, n) 48 | b = randn(n, n) 49 | c = randn(n, n) 50 | d = randn(n, n) 51 | P = concatenate((a, b, c, d), axis=1) 52 | Q = concatenate((concatenate((a, b), axis=1), concatenate((c, d), axis=1)), axis=0) 53 | v[i] = trace(matrix_power(dot(P.T,P), 4)) 54 | w[i] = trace(matrix_power(dot(Q.T,Q), 4)) 55 | return (std(v)/mean(v), std(w)/mean(w)) 56 | 57 | ## randmatmul ## 58 | 59 | def randmatmul(n): 60 | A = rand(n,n) 61 | B = rand(n,n) 62 | return dot(A,B) 63 | 64 | ## mandelbrot ## 65 | 66 | def abs2(z): 67 | return z.real*z.real + z.imag*z.imag 68 | 69 | def mandel(z): 70 | maxiter = 80 71 | c = z 72 | for n in range(maxiter): 73 | if abs2(z) > 4: 74 | return n 75 | z = z*z + c 76 | return maxiter 77 | 78 | def mandelperf(): 79 | r1 = [-2. + 0.1*i for i in range(26)] 80 | r2 = [-1. + 0.1*i for i in range(21)] 81 | return [mandel(complex(r, i)) for r in r1 for i in r2] 82 | 83 | def pisum(): 84 | sum = 0.0 85 | for j in range(1, 501): 86 | sum = 0.0 87 | for k in range(1, 10001): 88 | sum += 1.0/(k*k) 89 | return sum 90 | 91 | #### Is this single threaded? 92 | # def pisumvec(): 93 | # return numpy.sum(1./(numpy.arange(1,10000)**2)) 94 | 95 | def parse_int(t): 96 | for i in range(1,t): 97 | n = random.randint(0,2**32-1) 98 | s = hex(n) 99 | #s = string(n, base = 16) 100 | if s[-1]=='L': 101 | s = s[0:-1] 102 | m = int(s,16) 103 | assert m == n 104 | return n 105 | 106 | def printfd(t): 107 | f = open("/dev/null", "w") 108 | for i in range(1,t): 109 | f.write("{:d} {:d}\n".format(i, i+1)) 110 | f.close() 111 | 112 | 113 | def print_perf(name, time): 114 | print("python," + name + "," + str(time*1000)) 115 | 116 | ## run tests ## 117 | 118 | if __name__=="__main__": 119 | 120 | mintrials = 5 121 | 122 | assert fib(20) == 6765 123 | tmin = float('inf') 124 | for i in range(mintrials): 125 | t = time.time() 126 | f = fib(20) 127 | t = time.time()-t 128 | if t < tmin: tmin = t 129 | print_perf("recursion_fibonacci", tmin) 130 | 131 | tmin = float('inf') 132 | for i in range(mintrials): 133 | t = time.time() 134 | n = parse_int(1000) 135 | t = time.time()-t 136 | if t < tmin: tmin = t 137 | print_perf ("parse_integers", tmin) 138 | 139 | assert sum(mandelperf()) == 14791 140 | tmin = float('inf') 141 | for i in range(mintrials): 142 | t = time.time() 143 | mandelperf() 144 | t = time.time()-t 145 | if t < tmin: tmin = t 146 | print_perf ("userfunc_mandelbrot", tmin) 147 | 148 | tmin = float('inf') 149 | for i in range(mintrials): 150 | lst = [ random.random() for i in range(1,5000) ] 151 | t = time.time() 152 | qsort_kernel(lst, 0, len(lst)-1) 153 | t = time.time()-t 154 | if t < tmin: tmin = t 155 | print_perf ("recursion_quicksort", tmin) 156 | 157 | assert abs(pisum()-1.644834071848065) < 1e-6 158 | tmin = float('inf') 159 | for i in range(mintrials): 160 | t = time.time() 161 | pisum() 162 | t = time.time()-t 163 | if t < tmin: tmin = t 164 | print_perf ("iteration_pi_sum", tmin) 165 | 166 | # assert abs(pisumvec()-1.644834071848065) < 1e-6 167 | # tmin = float('inf') 168 | # for i in range(mintrials): 169 | # t = time.time() 170 | # pisumvec() 171 | # t = time.time()-t 172 | # if t < tmin: tmin = t 173 | # print_perf ("pi_sum_vec", tmin) 174 | 175 | (s1, s2) = randmatstat(1000) 176 | assert s1 > 0.5 and s1 < 1.0 177 | tmin = float('inf') 178 | for i in range(mintrials): 179 | t = time.time() 180 | randmatstat(1000) 181 | t = time.time()-t 182 | if t < tmin: tmin = t 183 | print_perf ("matrix_statistics", tmin) 184 | 185 | tmin = float('inf') 186 | for i in range(mintrials): 187 | t = time.time() 188 | C = randmatmul(1000) 189 | assert C[0,0] >= 0 190 | t = time.time()-t 191 | if t < tmin: tmin = t 192 | print_perf ("matrix_multiply", tmin) 193 | 194 | tmin = float('inf') 195 | for i in range(mintrials): 196 | t = time.time() 197 | printfd(100000) 198 | t = time.time()-t 199 | if t < tmin: tmin = t 200 | print_perf ("print_to_file", tmin) 201 | -------------------------------------------------------------------------------- /perfutil.jl: -------------------------------------------------------------------------------- 1 | # This file was formerly a part of Julia. License is MIT: https://julialang.org/license 2 | 3 | import Printf 4 | import Random 5 | import Statistics 6 | import Base.Sys 7 | 8 | const mintrials = 5 9 | const mintime = 2000.0 10 | print_output = isempty(ARGS) 11 | codespeed = length(ARGS) > 0 && ARGS[1] == "codespeed" 12 | 13 | if codespeed 14 | using JSON 15 | using HTTPClient.HTTPC 16 | 17 | # Ensure that we've got the environment variables we want: 18 | if !haskey(ENV, "JULIA_FLAVOR") 19 | error( "You must provide the JULIA_FLAVOR environment variable identifying this julia build!" ) 20 | end 21 | 22 | # Setup codespeed data dict for submissions to codespeed's JSON endpoint. These parameters 23 | # are constant across all benchmarks, so we'll just let them sit here for now 24 | csdata = Dict() 25 | csdata["commitid"] = Base.GIT_VERSION_INFO.commit 26 | csdata["project"] = "Julia" 27 | csdata["branch"] = Base.GIT_VERSION_INFO.branch 28 | csdata["executable"] = ENV["JULIA_FLAVOR"] 29 | csdata["environment"] = chomp(read(`hostname`, String)) 30 | csdata["result_date"] = join( split(Base.GIT_VERSION_INFO.date_string)[1:2], " " ) #Cut the timezone out 31 | end 32 | 33 | # Takes in the raw array of values in vals, along with the benchmark name, description, unit and whether less is better 34 | function submit_to_codespeed(vals,name,desc,unit,test_group,lessisbetter=true) 35 | # Points to the server 36 | codespeed_host = "julia-codespeed.csail.mit.edu" 37 | 38 | csdata["benchmark"] = name 39 | csdata["description"] = desc 40 | csdata["result_value"] = Statistics.mean(vals) 41 | csdata["std_dev"] = Statistics.std(vals) 42 | csdata["min"] = minimum(vals) 43 | csdata["max"] = maximum(vals) 44 | csdata["units"] = unit 45 | csdata["units_title"] = test_group 46 | csdata["lessisbetter"] = lessisbetter 47 | 48 | println( "$name: $(Statistics.mean(vals))" ) 49 | ret = post( "http://$codespeed_host/result/add/json/", Dict("json" => json([csdata])) ) 50 | println( json([csdata]) ) 51 | if ret.http_code != 200 && ret.http_code != 202 52 | error("Error submitting $name [HTTP code $(ret.http_code)], dumping headers and text: $(ret.headers)\n$(String(ret.body))\n\n") 53 | return false 54 | end 55 | return true 56 | end 57 | 58 | macro output_timings(t,name,desc,group) 59 | t = esc(t) 60 | name = esc(name) 61 | desc = esc(desc) 62 | group = esc(group) 63 | quote 64 | # If we weren't given anything for the test group, infer off of file path! 65 | test_group = length($group) == 0 ? basename(dirname(Base.source_path())) : $group[1] 66 | if codespeed 67 | submit_to_codespeed( $t, $name, $desc, "seconds", test_group ) 68 | elseif print_output 69 | Printf.@printf "julia,%s,%f,%f,%f,%f\n" $name minimum($t) maximum($t) Statistics.mean($t) Statistics.std($t) 70 | end 71 | GC.gc() 72 | end 73 | end 74 | 75 | macro timeit(ex,name,desc,group...) 76 | quote 77 | let 78 | t = Float64[] 79 | tot = 0.0 80 | i = 0 81 | while i < mintrials || tot < mintime 82 | e = 1000*(@elapsed $(esc(ex))) 83 | tot += e 84 | if i > 0 85 | # warm up on first iteration 86 | push!(t, e) 87 | end 88 | i += 1 89 | end 90 | @output_timings t $(esc(name)) $(esc(desc)) $(esc(group)) 91 | end 92 | end 93 | end 94 | 95 | macro timeit_init(ex,init,name,desc,group...) 96 | quote 97 | t = zeros(mintrials) 98 | for i=0:mintrials 99 | $(esc(init)) 100 | e = 1000*(@elapsed $(esc(ex))) 101 | if i > 0 102 | # warm up on first iteration 103 | t[i] = e 104 | end 105 | end 106 | @output_timings t $(esc(name)) $(esc(desc)) $(esc(group)) 107 | end 108 | end 109 | 110 | function maxrss(name) 111 | # FIXME: call uv_getrusage instead here 112 | @static if (Sys.islinux()) 113 | rus = Vector{Int64}(uninitialized, div(144,8)) 114 | fill!(rus, 0x0) 115 | res = ccall(:getrusage, Int32, (Int32, Ptr{Cvoid}), 0, rus) 116 | if res == 0 117 | mx = rus[5]/1024 118 | Printf.@printf "julia,%s.mem,%f,%f,%f,%f\n" name mx mx mx 0 119 | end 120 | end 121 | end 122 | 123 | 124 | # seed rng for more consistent timings 125 | if VERSION >= v"0.7.0" 126 | Random.seed!(1776) 127 | else 128 | srand(1776) 129 | end 130 | -------------------------------------------------------------------------------- /randmtzig.c: -------------------------------------------------------------------------------- 1 | /* 2 | A C-program for MT19937, with initialization improved 2002/2/10. 3 | Coded by Takuji Nishimura and Makoto Matsumoto. 4 | This is a faster version by taking Shawn Cokus's optimization, 5 | Matthe Bellew's simplification, Isaku Wada's real version. 6 | David Bateman added normal and exponential distributions following 7 | Marsaglia and Tang's Ziggurat algorithm. 8 | 9 | Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, 10 | Copyright (C) 2004, David Bateman 11 | All rights reserved. 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions 15 | are met: 16 | 17 | 1. Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | 20 | 2. Redistributions in binary form must reproduce the above copyright 21 | notice, this list of conditions and the following disclaimer in the 22 | documentation and/or other materials provided with the distribution. 23 | 24 | 3. The names of its contributors may not be used to endorse or promote 25 | products derived from this software without specific prior written 26 | permission. 27 | 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 32 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 33 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 34 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 35 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 36 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 37 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 38 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 | 40 | 41 | Any feedback is very welcome. 42 | http://www.math.keio.ac.jp/matumoto/emt.html 43 | email: matumoto@math.keio.ac.jp 44 | */ 45 | 46 | /* 47 | Modified by Viral B. Shah for julia to support dsfmt and only __LP64__ 48 | systems. 52-bits of randomness are used from the mantissa of random double 49 | precision numbers generated by dsfmt. 50 | */ 51 | 52 | #include 53 | #include 54 | #include 55 | #include 56 | #ifndef _MSC_VER 57 | #include 58 | #endif 59 | 60 | #include 61 | #include 62 | 63 | #define DSFMT_DO_NOT_USE_OLD_NAMES 64 | #define DSFMT_MEXP 19937 65 | #include 66 | 67 | typedef ptrdiff_t randmtzig_idx_type; 68 | typedef signed char randmtzig_int8_t; 69 | typedef unsigned char randmtzig_uint8_t; 70 | typedef short randmtzig_int16_t; 71 | typedef unsigned short randmtzig_uint16_t; 72 | typedef int randmtzig_int32_t; 73 | typedef unsigned int randmtzig_uint32_t; 74 | typedef long long randmtzig_int64_t; 75 | typedef unsigned long long randmtzig_uint64_t; 76 | 77 | /* Declarations */ 78 | 79 | extern double randmtzig_randn (dsfmt_t *dsfmt); 80 | extern double randmtzig_gv_randn (void); 81 | extern double randmtzig_gv_exprnd (void); 82 | 83 | /* ===== Uniform generators ===== */ 84 | 85 | inline static randmtzig_uint64_t gv_randi (void) 86 | { 87 | double r = dsfmt_gv_genrand_close1_open2(); 88 | return *((uint64_t *) &r) & 0x000fffffffffffff; 89 | } 90 | 91 | /* generates a random number on (0,1) with 53-bit resolution */ 92 | inline static double gv_randu (void) 93 | { 94 | return dsfmt_gv_genrand_open_open(); 95 | } 96 | 97 | inline static randmtzig_uint64_t randi (dsfmt_t *dsfmt) 98 | { 99 | double r = dsfmt_genrand_close1_open2(dsfmt); 100 | return *((uint64_t *) &r) & 0x000fffffffffffff; 101 | } 102 | 103 | /* generates a random number on (0,1) with 53-bit resolution */ 104 | inline static double randu (dsfmt_t *dsfmt) 105 | { 106 | return dsfmt_genrand_open_open(dsfmt); 107 | } 108 | 109 | /* ===== Ziggurat normal and exponential generators ===== */ 110 | # define ZIGINT randmtzig_uint64_t 111 | # define EMANTISSA 4503599627370496 /* 52 bit mantissa */ 112 | # define ERANDI gv_randi() /* 52 bits for mantissa */ 113 | # define NMANTISSA 2251799813685248 114 | # define NRANDI gv_randi() /* 51 bits for mantissa + 1 bit sign */ 115 | # define RANDU gv_randu() 116 | 117 | #define ZIGGURAT_TABLE_SIZE 256 118 | 119 | #define ZIGGURAT_NOR_R 3.6541528853610088 120 | #define ZIGGURAT_NOR_INV_R 0.27366123732975828 121 | #define NOR_SECTION_AREA 0.00492867323399 122 | 123 | #define ZIGGURAT_EXP_R 7.69711747013104972 124 | #define ZIGGURAT_EXP_INV_R 0.129918765548341586 125 | #define EXP_SECTION_AREA 0.0039496598225815571993 126 | 127 | 128 | /* 129 | This code is based on the paper Marsaglia and Tsang, "The ziggurat method 130 | for generating random variables", Journ. Statistical Software. Code was 131 | presented in this paper for a Ziggurat of 127 levels and using a 32 bit 132 | integer random number generator. This version of the code, uses the 133 | Mersenne Twister as the integer generator and uses 256 levels in the 134 | Ziggurat. This has several advantages. 135 | 136 | 1) As Marsaglia and Tsang themselves states, the more levels the few 137 | times the expensive tail algorithm must be called 138 | 2) The cycle time of the generator is determined by the integer 139 | generator, thus the use of a Mersenne Twister for the core random 140 | generator makes this cycle extremely long. 141 | 3) The license on the original code was unclear, thus rewriting the code 142 | from the article means we are free of copyright issues. 143 | 4) Compile flag for full 53-bit random mantissa. 144 | 145 | It should be stated that the authors made my life easier, by the fact that 146 | the algorithm developed in the text of the article is for a 256 level 147 | ziggurat, even if the code itself isn't... 148 | 149 | One modification to the algorithm developed in the article, is that it is 150 | assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in 151 | terms like 2^32 in the code. As the normal distribution is defined between 152 | -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus 153 | in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for 154 | this term. The exponential distribution is one sided so we use the 155 | full 32 bits. We use EMANTISSA for this term. 156 | 157 | It appears that I'm slightly slower than the code in the article, this 158 | is partially due to a better generator of random integers than they 159 | use. But might also be that the case of rapid return was optimized by 160 | inlining the relevant code with a #define. As the basic Mersenne 161 | Twister is only 25% faster than this code I suspect that the main 162 | reason is just the use of the Mersenne Twister and not the inlining, 163 | so I'm not going to try and optimize further. 164 | */ 165 | 166 | 167 | // void randmtzig_create_ziggurat_tables (void) 168 | // { 169 | // int i; 170 | // double x, x1; 171 | 172 | // /* Ziggurat tables for the normal distribution */ 173 | // x1 = ZIGGURAT_NOR_R; 174 | // wi[255] = x1 / NMANTISSA; 175 | // fi[255] = exp (-0.5 * x1 * x1); 176 | 177 | // /* Index zero is special for tail strip, where Marsaglia and Tsang 178 | // * defines this as 179 | // * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, 180 | // * where v is the area of each strip of the ziggurat. 181 | // */ 182 | // ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA); 183 | // wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA; 184 | // fi[0] = 1.; 185 | 186 | // for (i = 254; i > 0; i--) 187 | // { 188 | // /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus 189 | // * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) 190 | // */ 191 | // x = sqrt(-2. * log(NOR_SECTION_AREA / x1 + fi[i+1])); 192 | // ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA); 193 | // wi[i] = x / NMANTISSA; 194 | // fi[i] = exp (-0.5 * x * x); 195 | // x1 = x; 196 | // } 197 | 198 | // ki[1] = 0; 199 | 200 | // /* Zigurrat tables for the exponential distribution */ 201 | // x1 = ZIGGURAT_EXP_R; 202 | // we[255] = x1 / EMANTISSA; 203 | // fe[255] = exp (-x1); 204 | 205 | // /* Index zero is special for tail strip, where Marsaglia and Tsang 206 | // * defines this as 207 | // * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, 208 | // * where v is the area of each strip of the ziggurat. 209 | // */ 210 | // ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA); 211 | // we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA; 212 | // fe[0] = 1.; 213 | 214 | // for (i = 254; i > 0; i--) 215 | // { 216 | // /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus 217 | // * need inverse operator of y = exp(-x) -> x = -ln(y) 218 | // */ 219 | // x = - log(EXP_SECTION_AREA / x1 + fe[i+1]); 220 | // ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA); 221 | // we[i] = x / EMANTISSA; 222 | // fe[i] = exp (-x); 223 | // x1 = x; 224 | // } 225 | // ke[1] = 0; 226 | // } 227 | 228 | // Tables for randn 229 | static ZIGINT ki[ZIGGURAT_TABLE_SIZE] = 230 | {2104047571230236, 0,1693657211688499,1919380038164751, 231 | 2015384402142420,2068365869415708,2101878624030987,2124958784087614, 232 | 2141808670783638,2154644611559370,2164744887580145,2172897953690771, 233 | 2179616279367521,2185247251864556,2190034623104318,2194154434518163, 234 | 2197736978772008,2200880740889623,2203661538008543,2206138681107245, 235 | 2208359231804928,2210361007256700,2212174742387166,2213825672703393, 236 | 2215334711001466,2216719334486539,2217994262138197,2219171977964129, 237 | 2220263139537873,2221276900116549,2222221164932202,2223102796828387, 238 | 2223927782546019,2224701368169460,2225428170203747,2226112267247709, 239 | 2226757276104752,2227366415327922,2227942558554233,2228488279492093, 240 | 2229005890046815,2229497472774805,2229964908626691,2230409900758245, 241 | 2230833995044249,2231238597815812,2231624991249884,2231994346765634, 242 | 2232347736722468,2232686144665663,2233010474325699,2233321557544631, 243 | 2233620161275830,2233906993781039,2234182710130112,2234447917093281, 244 | 2234703177502812,2234949014149981,2235185913274123,2235414327692697, 245 | 2235634679614740,2235847363174420,2236052746716668,2236251174862705, 246 | 2236442970379808,2236628435876608,2236807855342616,2236981495548416, 247 | 2237149607321006,2237312426707072,2237470176035519,2237623064889274, 248 | 2237771290995262,2237915041040474,2238054491421185,2238189808931596, 249 | 2238321151397547,2238448668260322,2238572501115061,2238692784207837, 250 | 2238809644895031,2238923204068302,2239033576548092,2239140871448347, 251 | 2239245192514865,2239346638439450,2239445303151863,2239541276091355, 252 | 2239634642459413,2239725483455210,2239813876495104,2239899895417414, 253 | 2239983610673598,2240065089506859,2240144396119109,2240221591827156, 254 | 2240296735208897,2240369882240222,2240441086423317,2240510398906937, 255 | 2240577868599239,2240643542273660,2240707464668327,2240769678579424, 256 | 2240830224948918,2240889142947021,2240946470049710,2241002242111632, 257 | 2241056493434688,2241109256832545,2241160563691345,2241210444026824, 258 | 2241258926538069,2241306038658085,2241351806601384,2241396255408737, 259 | 2241439408989263,2241481290159988,2241521920683014,2241561321300414, 260 | 2241599511766981,2241636510880914,2241672336512567,2241707005631317, 261 | 2241740534330669,2241772937851645,2241804230604542,2241834426189118, 262 | 2241863537413270,2241891576310240,2241918554154426,2241944481475803, 263 | 2241969368073032,2241993223025259,2242016054702647,2242037870775672, 264 | 2242058678223188,2242078483339294,2242097291739004,2242115108362739, 265 | 2242131937479636,2242147782689690,2242162646924702,2242176532448058, 266 | 2242189440853303,2242201373061504,2242212329317384,2242222309184204, 267 | 2242231311537365,2242239334556685,2242246375717338,2242252431779384, 268 | 2242257498775863,2242261571999386,2242264645987166,2242266714504423, 269 | 2242267770526080,2242267806216682,2242266812908434,2242264781077261, 270 | 2242261700316790,2242257559310117,2242252345799249,2242246046552055, 271 | 2242238647326588,2242230132832599,2242220486690050,2242209691384432, 272 | 2242197728218658,2242184577261284,2242170217290794,2242154625735654, 273 | 2242137778609814,2242119650443302,2242100214207531,2242079441234882, 274 | 2242057301132111,2242033761687055,2242008788768083,2241982346215658, 275 | 2241954395725333,2241924896721420,2241893806220494,2241861078683807, 276 | 2241826665857576,2241790516600019,2241752576693859,2241712788642894, 277 | 2241671091451056,2241627420382213,2241581706698751,2241533877376746, 278 | 2241483854795259,2241431556397014,2241376894317324,2241319774977796, 279 | 2241260098640839,2241197758920517,2241132642244683,2241064627262631, 280 | 2240993584191722,2240919374095516,2240841848084869,2240760846432212, 281 | 2240676197587764,2240587717084761,2240495206318733,2240398451183547, 282 | 2240297220544145,2240191264522592,2240080312570135,2239964071293311, 283 | 2239842221996510,2239714417896679,2239580280957705,2239439398282173, 284 | 2239291317986176,2239135544468183,2238971532964959,2238798683265249, 285 | 2238616332424332,2238423746288075,2238220109591870,2238004514345197, 286 | 2237775946143192,2237533267957802,2237275200846732,2237000300869931, 287 | 2236706931309079,2236393229029127,2236057063479481,2235695986373225, 288 | 2235307169458838,2234887326941556,2234432617919425,2233938522519742, 289 | 2233399683022654,2232809697779175,2232160850599794,2231443750584617, 290 | 2230646845562145,2229755753817960,2228752329126507,2227613325162477, 291 | 2226308442121145,2224797391720369,2223025347823800,2220915633329775, 292 | 2218357446086993,2215184158448627,2211132412537323,2205758503851011, 293 | 2198248265654920,2186916352102052,2167562552481677,2125549880839429}; 294 | 295 | static double wi[ZIGGURAT_TABLE_SIZE] = 296 | {17367254121656703e-31,9558660348275145e-32,12708704832820278e-32, 297 | 14909740960986864e-32,16658733630346416e-32,18136120809053487e-32, 298 | 1942972015219358e-31,20589500627632916e-32,21646860576118966e-32, 299 | 2262294039150043e-31,23532718913376864e-32,24387234556800803e-32, 300 | 25194879828681465e-32,2596219977196592e-31,26694407473112964e-32, 301 | 2739572968463095e-31,280696460019946e-30,28719058903642897e-32, 302 | 29346417484275224e-32,29953809336344285e-32,30543030006769113e-32, 303 | 3111563633851158e-31,3167298801818414e-31,3221628035016365e-31, 304 | 32746570407564125e-32,33264798116476e-29,337718034169968e-30, 305 | 34268340352771636e-32,34755088731390227e-32,3523266384567022e-31, 306 | 3570162463362898e-31,3616248057128073e-31,36615697529342477e-32, 307 | 3706170277693123e-31,37500889278448874e-32,3793361940125627e-31, 308 | 38360228129389374e-32,3878102586096749e-31,3919630085297984e-31, 309 | 39606321365983254e-32,40011337552278087e-32,4041158312387907e-31, 310 | 4080727683070036e-31,4119862377455137e-31,41585816580575855e-32, 311 | 41969036444492247e-32,4234845407127582e-31,42724230518658345e-32, 312 | 43096517956924877e-32,4346546035489394e-31,4383119410062289e-31, 313 | 4419384856424202e-31,4455354660935343e-31,4491040505860591e-31, 314 | 4526453511835132e-31,45616042766683e-29,4596502910863464e-31, 315 | 4631159070186941e-31,4665581985579899e-31,469978049067346e-30, 316 | 4733763047137822e-31,4767537768070579e-31,4801112439606964e-31, 317 | 4834494540915173e-31,4867691262722585e-31,4900709524503576e-31, 318 | 4933555990446197e-31,4966237084303158e-31,499875900322208e-30, 319 | 5031127730640677e-31,5063349048324261e-31,5095428547615612e-31, 320 | 5127371639960692e-31,5159183566767805e-31,5190869408652579e-31, 321 | 5222434094116442e-31,52538824077020155e-32,5285218997665102e-31, 322 | 5316448383199491e-31,5347574961247755e-31,5378603012928409e-31, 323 | 5409536709607314e-31,5440380118638932e-31,5471137208800966e-31, 324 | 550181185544408e-30,5532407845376661e-31,5562928881503102e-31, 325 | 5593378587232605e-31,5623760510674315e-31,5654078128633358e-31, 326 | 5684334850421336e-31,5714534021493849e-31,5744678926926726e-31, 327 | 5774772794741848e-31,5804818799092685e-31,5834820063319006e-31, 328 | 5864779662879593e-31,589470062817121e-30,5924585947241581e-31, 329 | 5954438568403615e-31,598426140275769e-30,601405732662843e-30, 330 | 6043829183921996e-31,6073579788409578e-31,6103311925942512e-31, 331 | 6133028356604082e-31,6162731816802865e-31,6192425021312213e-31, 332 | 6222110665260248e-31,6251791426074554e-31,6281469965385542e-31, 333 | 6311148930892342e-31,6340830958194888e-31,6370518672595733e-31, 334 | 640021469087503e-30,6429921623041988e-31,645964207406601e-30, 335 | 648937864559066e-30,6519133937633505e-31,6548910550274845e-31, 336 | 6578711085338253e-31,6608538148065851e-31,6638394348791179e-31, 337 | 6668282304612498e-31,6698204641069389e-31,6728163993825439e-31, 338 | 6758163010359885e-31,6788204351671041e-31,681829069399439e-30, 339 | 6848424730538249e-31,6878609173239948e-31,6908846754545526e-31, 340 | 6939140229215998e-31,696949237616333e-30,6999906000319335e-31, 341 | 7030383934540792e-31,7060929041554193e-31,7091544215943653e-31, 342 | 7122232386185626e-31,7152996516734219e-31,7183839610161045e-31, 343 | 7214764709353755e-31,7245774899777502e-31,72768733118038725e-32, 344 | 7308063123111988e-31,7339347561166714e-31,7370729905779203e-31, 345 | 7402213491755235e-31,7433801711637146e-31,7465498018545449e-31, 346 | 7497305929126601e-31,7529229026613742e-31,7561270964007667e-31, 347 | 7593435467385694e-31,7625726339346621e-31,7658147462600412e-31, 348 | 7690702803711903e-31,7723396417008341e-31,7756232448661274e-31, 349 | 778921514095401e-30,7822348836746627e-31,7855637984151357e-31, 350 | 7889087141432085e-31,7922700982142658e-31,7956484300519808e-31, 351 | 7990442017147628e-31,8024579184911813e-31,8058900995263265e-31, 352 | 8093412784812165e-31,812812004227522e-30,8163028415800651e-31, 353 | 8198143720697359e-31,8233471947596931e-31,8269019271079405e-31, 354 | 8304792058796362e-31,834079688112767e-30,8377040521411316e-31, 355 | 8413529986789175e-31,8450272519715296e-31,8487275610177406e-31, 356 | 85245470086869e-29,8562094740097588e-31,8599927118319072e-31, 357 | 86380527619967175e-32,8676480611237092e-31,8715219945465259e-31, 358 | 8754280402508787e-31,8793671999012706e-31,8833405152300122e-31, 359 | 88734907038049e-29,8913939944215902e-31,8954764640486935e-31, 360 | 8995977064883017e-31,9037590026252085e-31,9079616903732087e-31, 361 | 9122071683126914e-31,9164968996211253e-31,9208324163254476e-31, 362 | 9252153239087913e-31,9296473063078686e-31,9341301313417584e-31, 363 | 938665656617903e-30,9432558359669126e-31,9479027264644209e-31, 364 | 95260849610588e-29,957375432209002e-30,962205950628746e-30, 365 | 9671026058815726e-31,972068102289435e-30,9771053062699983e-31, 366 | 9822172599183368e-31,9874071960473548e-31,9926785548800904e-31, 367 | 9980350026176626e-31,10034804521429213e-31,10090190861630543e-31, 368 | 10146553831460223e-31,10203941464676316e-31,1026240537260681e-30, 369 | 10322001115479755e-31,10382788623508751e-31,10444832675993878e-31, 370 | 10508203448348659e-31,1057297713900341e-30,10639236690670377e-31, 371 | 10707072623626628e-31,107765840026618e-29,10847879564397177e-31, 372 | 10921079038143372e-31,109963147017795e-29,11073733224929686e-31, 373 | 11153497865847152e-31,11235791107104895e-31,11320817840158973e-31, 374 | 11408809242576976e-31,1150002753783406e-30,11594771891443527e-31, 375 | 11693385786905373e-31,1179626635295029e-30,11903876299277459e-31, 376 | 1201675939253847e-30,12135560818661637e-31,12261054417445396e-31, 377 | 12394179789158183e-31,12536093926597603e-31,1268824481425016e-30, 378 | 12852479319091384e-31,13031206634685398e-31,13227655770190893e-31, 379 | 13446300925006917e-31,13693606835124475e-31,13979436672771461e-31, 380 | 14319989869657897e-31,14744848603594667e-31,1531787274160907e-30, 381 | 16227698675312968e-31}; 382 | 383 | static double fi[ZIGGURAT_TABLE_SIZE] = 384 | {1.,.9771017012827331,.9598790918124174,.9451989534530794, 385 | .9320600759689914,.9199915050483614,.9087264400605639,.898095921906305, 386 | .8879846607634008,.8783096558161477,.869008688043794,.8600336212030095, 387 | .8513462584651245,.842915653118442,.8347162929929313,.8267268339520951, 388 | .8189291916094156,.8113078743182208,.8038494831763903,.7965423304282554, 389 | .7893761435711993,.7823418326598627,.775431304986139,.7686373158033355, 390 | .7619533468415471,.7553735065117552,.7488924472237273,.7425052963446368, 391 | .7362075981312672,.729995264565803,.7238645334728822,.717811932634902, 392 | .711834248882359,.7059285013367979,.7000919181404905,.694321916130033, 393 | .6886160830085275,.6829721616487918,.6773880362225135,.6718617199007669, 394 | .6663913439123812,.6609751477802419,.6556114705832252,.650298743114295, 395 | .6450354808242524,.6398202774564395,.6346517992909606,.6295287799281287, 396 | .6244500155502747,.6194143606090396,.6144207238920772,.6094680649288958, 397 | .6045553907005499,.599681752622168,.5948462437709915,.5900479963357922, 398 | .5852861792663006,.5805599961036837,.5758686829752109,.5712115067380753, 399 | .5665877632589521,.5619967758172782,.5574378936214867,.5529104904285204, 400 | .5484139632579217,.5439477311926505,.5395112342595453,.5351039323830201, 401 | .5307253044061945,.5263748471741873,.5220520746747954,.5177565172322012, 402 | .5134877207497434,.5092452459981365,.5050286679458292,.5008375751284826, 403 | .49667156905479676,.4925302636461491,.4884132847077125,.48432026942891204, 404 | .48025086591125016,.4762047327216842,.4721815384698837,.46818096140782267, 405 | .4642026890502793,.460246417814924,.45631185268077407,.4523987068638829, 406 | .4485067015092144,.4446355653977281,.4407850346677702,.43695485254992955, 407 | .43314476911457434,.42935454103134185,.42558393133990086,.4218327092313535, 408 | .41810064983968476,.4143875340427069,.41069314827198344,.40701728433124823, 409 | .4033597392228692,.399720314981932,.3960988185175474,.39249506146101104, 410 | .3889088600204649,.38534003484173424,.3817884108750316,.37825381724723833, 411 | .37473608713949164,.37123505766982157,.3677505697805964,.36428246813054976, 412 | .36083060099117586,.3573948201472906,.35397498080156936,.35057094148288126, 413 | .34718256395825153,.3438097131482915,.3404522570459456,.33711006663841303, 414 | .33378301583210873,.3304709813805373,.32717384281495887,.32389148237773235, 415 | .3206237849582305,.3173706380312227,.3141319315976305,.310907558127564, 416 | .307697412505554,.30450139197789644,.30131939610203423,.29815132669790145, 417 | .2949970878011627,.2918565856182811,.28872972848335393,.28561642681665805, 418 | .28251659308484933,.27943014176276515,.2763569892967811,.27329705406967564, 419 | .2702502563669598,.26721651834463167,.2641957639983174,.2611879191337636, 420 | .25819291133864797,.2552106699556771,.25224112605694377,.2492842124195167, 421 | .24633986350223877,.24340801542371202,.24048860594144916,.23758157443217368, 422 | .2346868618732527,.23180441082524855,.22893416541557743,.22607607132326474, 423 | .22323007576478943,.22039612748101145,.21757417672517823,.2147641752520084, 424 | .21196607630785277,.20917983462193548,.20640540639867916,.20364274931112133, 425 | .20089182249543117,.19815258654653795,.1954250035148854,.19270903690432864, 426 | .19000465167119293,.18731181422451676,.18463049242750437,.18196065560021638, 427 | .17930227452353026,.17665532144440646,.17401977008249914,.17139559563815535, 428 | .16878277480185,.1661812857651097,.1635911082329826,.16101222343811727, 429 | .1584446141565199,.15588826472506426,.15334316106083742,.15080929068240986, 430 | .1482866427331284,.14577520800653765,.14327497897404687,.14078594981496803, 431 | .138308116449064,.13584147657175705,.13338602969216254,.13094177717412792, 432 | .12850872228047336,.12608687022065,.12367622820205106,.12127680548523516, 433 | .11888861344334545,.11651166562603685,.11414597782825504,.1117915681642454, 434 | .10944845714720981,.10711666777507266,.10479622562286683,.10248715894230599, 435 | .10018949876917177,.09790327903921535,.09562853671335306,.09336531191302634, 436 | .09111364806670041,.08887359206859394,.08664519445086755,.08442850957065445, 437 | .0822235958134955,.08003051581494733,.07784933670237201,.07568013035919481, 438 | .07352297371424082,.07137794905914183,.06924514439725017,.06712465382802392, 439 | .06501657797147035,.06292102443797778,.06083810834975175,.05876795292113793, 440 | .056710690106399425,.05466646132507786,.05263541827697361,.05061772386112175, 441 | .04861355321603513,.04662309490208967,.044646552251446515,.042684144916619336, 442 | .04073611065607874,.0388027074046569,.03688421568869112,.034980941461833046, 443 | .033093219458688684,.031221417192023686,.02936593975823011,.027527235669693315, 444 | .02570580400863265,.023902203305873237,.022117062707379908,.020351096230109344, 445 | .018605121275783343,.016880083152595836,.015177088307982065,.013497450601780796, 446 | .0118427578579431,.0102149714397311,.008616582769422912,.007050875471392109, 447 | .005522403299264755,.0040379725933718715,.002609072746106362,.0012602859304985975}; 448 | 449 | // Tables for exprnd 450 | static ZIGINT ke[ZIGGURAT_TABLE_SIZE] = 451 | {3985772928715748, 0,2742928985168065,3438700186803721, 452 | 3744780257810519,3914896975372863,4022625697542798,4096776410635450, 453 | 4150853606149210,4192001604687417,4224344877584101,4250427292531740, 454 | 4271901371161554,4289886428824118,4305167164135199,4318309783140431, 455 | 4329732973408940,4339752937704679,4348612900760388,4356502988721768, 456 | 4363573953227346,4369946852445020,4375720012348349,4380974119031481, 457 | 4385776001930298,4390181484145305,4394237557465219,4397984061535398, 458 | 4401454994146430,4404679543790856,4407682910787985,4410486965794400, 459 | 4413110782053579,4415571068741702,4417882526198713,4420058138987325, 460 | 4422109419110772,4424046609003130,4425878851844253,4427614335173868, 461 | 4429260412563040,4430823707156475,4432310200160197,4433725306767517, 462 | 4435073941555377,4436360575016074,4437589282595121,4438763787369085, 463 | 4439887497305303,4440963537889317,4441994780778252,4442983869033585, 464 | 4443933239400428,4444845142028910,4445721657973833,4446564714759241, 465 | 4447376100252993,4448157475061632,4448910383626429,4449636264176642, 466 | 4450336457674983,4451012215872352,4451664708573597,4452295030203006, 467 | 4452904205747010,4453493196141906,4454062903166143,4454614173889474, 468 | 4455147804725090,4455664545125435,4456165100957688,4456650137590828, 469 | 4457120282722585,4457576128971459,4458018236256245,4458447133983073, 470 | 4458863323057847,4459267277740095,4459659447352586,4460040257859578, 471 | 4460410113325310,4460769397263133,4461118473884710,4461457689257740, 472 | 4461787372379910,4462107836175980,4462419378424319,4462722282618581, 473 | 4463016818769709,4463303244152965,4463581804004301,4463852732169940, 474 | 4464116251712773,4464372575478779,4464621906626490,4464864439122178, 475 | 4465100358203284,4465329840812355,4465553056003596,4465770165323939, 476 | 4465981323170417,4466186677125455,4466386368271563,4466580531486827, 477 | 4466769295722448,4466952784263502,4467131114974006,4467304400527265, 478 | 4467472748622447,4467636262188208,4467795039574164,4467949174730939, 479 | 4468098757379442,4468243873170018,4468384603832024,4468521027314373, 480 | 4468653217917530,4468781246417428,4468905180181701,4469025083278642, 481 | 4469141016579234,4469253037852582,4469361201855066,4469465560413474, 482 | 4469566162502383,4469663054316032,4469756279334881,4469845878387080, 483 | 4469931889704995,4470014348976986,4470093289394551,4470168741694984, 484 | 4470240734199652,4470309292847996,4470374441227332,4470436200598525, 485 | 4470494589917605,4470549625853344,4470601322800852,4470649692891185, 486 | 4470694745996980,4470736489734116,4470774929459349,4470810068263924, 487 | 4470841906963074,4470870444081369,4470895675833821,4470917596102651, 488 | 4470936196409614,4470951465883737,4470963391224346,4470971956659198, 489 | 4470977143897542,4470978932077904,4470977297710362,4470972214613072, 490 | 4470963653842747,4470951583618802,4470935969240827,4470916772999009, 491 | 4470893954077117,4470867468447603,4470837268758338,4470803304210460, 492 | 4470765520426769,4470723859310029,4470678258890503,4470628653161980, 493 | 4470574971905457,4470517140499614,4470455079717082,4470388705505446, 494 | 4470317928751818,4470242655029689,4470162784326669,4470078210751556, 495 | 4469988822219058,4469894500110287,4469795118907000,4469690545797298, 496 | 4469580640250319,4469465253557163,4469344228335006,4469217397991048, 497 | 4469084586142556,4468945605988875,4468800259630802,4468648337332217, 498 | 4468489616718259,4468323861903709,4468150822544456,4467970232804102, 499 | 4467781810226787,4467585254506222,4467380246139658,4467166444954116, 500 | 4466943488490515,4466710990229518,4466468537640691,4466215690034133, 501 | 4465951976190801,4465676891744455,4465389896284247,4465090410142477, 502 | 4464777810826750,4464451429049612,4464110544301482,4463754379904174, 503 | 4463382097472202,4462992790697122,4462585478355953,4462159096427753, 504 | 4461712489182116,4461244399078944,4460753455289386,4460238160612098, 505 | 4459696876515553,4459127805983956,4458528973779075,4457898203649722, 506 | 4457233091920646,4456530976767892,4455788902331217,4455003576616607, 507 | 4454171321891082,4453288015951104,4452349022232651,4451349106194827, 508 | 4450282334707462,4449141954247903,4447920242480611,4446608326137821, 509 | 4445195955871677,4443671225661690,4442020220072463,4440226566619900, 510 | 4438270861888260,4436129927556552,4433775834104270,4431174602388627, 511 | 4428284451100006,4425053392146958,4421415870372502,4417287970124084, 512 | 4412560416174562,4407088078325945,4400673742272494,4393042098597073, 513 | 4383796248451589,4372341169422858,4357740343059956,4338425130125967, 514 | 4311541827049177,4271262897902398,4203411844498905,4061213381260384}; 515 | 516 | static double we[ZIGGURAT_TABLE_SIZE] = 517 | {19311480126418366e-31,1417802848791084e-32,23278824993382457e-33, 518 | 30487830247064326e-33,3666569771447489e-32,4217930218928974e-32, 519 | 4722256155686277e-32,51911915446217885e-33,5632347108395505e-32, 520 | 6051008260642765e-32,645101650967275e-31,6835264680370054e-32, 521 | 7205993957468906e-32,7564981553739299e-32,7913664396195108e-32, 522 | 8253223556351894e-32,8584643616885051e-32,8908755486564743e-32, 523 | 9226267962966373e-32,9537791450529272e-32,9843856087455926e-32, 524 | 10144925809006294e-32,10441409405585343e-32,10733669323436384e-32, 525 | 1102202874567019e-31,11306777346479334e-32,11588176009705533e-32, 526 | 11866460730417886e-32,1214184586569436e-31,12414526862326387e-32, 527 | 12684682560606153e-32,12952477151912284e-32,1321806185153881e-31, 528 | 13481576335745447e-32,13743149982367625e-32,14002902946807862e-32, 529 | 14260947099321287e-32,14517386844829297e-32,14772319842763584e-32, 530 | 15025837641447456e-32,15278026239101652e-32,15528966581595696e-32, 531 | 1577873500545958e-31,1602740363335091e-31,16275040728083524e-32, 532 | 16521711010420076e-32,16767475945078279e-32,17012393998770646e-32, 533 | 17256520873568226e-32,17499909718432365e-32,17742611321380505e-32, 534 | 17984674284430714e-32,18226145183195818e-32,18467068712763576e-32, 535 | 18707487821298258e-32,18947443832625902e-32,19186976558915997e-32, 536 | 19426124404443042e-32,19664924461299023e-32,19903412597830144e-32, 537 | 20141623540485899e-32,20379590949693882e-32,2061734749030844e-31, 538 | 2085492489712377e-31,21092354035891528e-32,21329664960238294e-32, 539 | 21566886964838972e-32,2180404863516701e-31,22041177894111562e-32, 540 | 2227830204572395e-31,2251544781633135e-31,22752641393233694e-32, 541 | 22989908461180186e-32,23227274236804366e-32,23464763501180916e-32, 542 | 2370240063065339e-31,23940209626069303e-32,2417821414054771e-31, 543 | 24416437505894123e-32,24654902757768304e-32,2489363265970225e-31, 544 | 2513264972605797e-31,2537197624400795e-31,2561163429461499e-31, 545 | 2585164577308239e-31,26092032408240577e-32,2633281578133145e-31, 546 | 2657401734414762e-31,2681565843657999e-31,2705776030362351e-31, 547 | 27300344111887955e-32,27543430965657624e-32,2778704192254128e-31, 548 | 2803119800875143e-31,28275920234049704e-32,2852122960639331e-31, 549 | 28767147146315804e-32,29013693901073754e-32,29260890958589514e-32, 550 | 29508759461219033e-32,2975732061937252e-31,3000659572501474e-31, 551 | 3025660616507079e-31,3050737343476251e-31,3075891915089994e-31, 552 | 31011265065151543e-32,3126443307731675e-31,31518445248623523e-32, 553 | 31773323815073683e-32,32029091200858335e-32,32285770031865573e-32, 554 | 3254338314930261e-31,3280195362345436e-31,3306150476760074e-31, 555 | 3332206015211484e-31,33583643618764577e-32,33846279295240445e-32, 556 | 34109991609932597e-32,34374805306980633e-32,34640745461620167e-32, 557 | 3490783749585068e-31,3517610719444983e-31,3544558072136013e-31, 558 | 3571628463647465e-31,35988245912849274e-32,3626149195437003e-31, 559 | 36536050613905045e-32,36811950211971757e-32,3708921955595139e-31, 560 | 37367887959883854e-32,3764798526487784e-31,37929541860172334e-32, 561 | 3821258870488753e-31,38497157350504876e-32,3878327996411799e-31, 562 | 39070989352498183e-32,3936031898702075e-31,3965130302950038e-31, 563 | 3994397635898684e-31,40238374599574693e-32,40534534149283966e-32, 564 | 4083249221007178e-31,41132286819038357e-32,4143395688089474e-31, 565 | 417375422017632e-30,42043083524385856e-32,4235062256482152e-31, 566 | 4266020205071558e-31,42971865761233266e-32,43285658568752094e-32, 567 | 4360162648241568e-31,43919816693657415e-32,4424027762380992e-31, 568 | 4456305897392361e-31,4488821177692617e-31,4521578845226347e-31, 569 | 4554584286317242e-31,4587843037674623e-31,4621360792696427e-31, 570 | 4655143408087069e-31,4689196910809916e-31,4723527505395548e-31, 571 | 4758141581628553e-31,4793045722637247e-31,4828246713412587e-31, 572 | 4863751549784512e-31,489956744788614e-30,4935701854138577e-31, 573 | 4972162455791703e-31,5008957192059114e-31,5046094265888434e-31, 574 | 5083582156411624e-31,5121429632123542e-31,5159645764841062e-31, 575 | 5198239944499494e-31,5237221894847848e-31,5276601690109886e-31, 576 | 531638977268369e-30,535659697195905e-30,5397234524338979e-31, 577 | 5438314094559637e-31,547984779841163e-30,5521848226975234e-31, 578 | 5564328472492872e-31,5607302156013967e-31,5650783456960506e-31, 579 | 5694787144776348e-31,5739328612839635e-31,5784423914835991e-31, 580 | 5830089803810586e-31,5876343774140057e-31,5923204106690931e-31, 581 | 5970689917460091e-31,6018821210025236e-31,6067618932170007e-31, 582 | 6117105037089722e-31,616730254963062e-30,6218235638068533e-31, 583 | 6269929691993326e-31,6322411406934211e-31,6375708876439426e-31, 584 | 6429851692413595e-31,6484871054618903e-31,6540799890364481e-31, 585 | 6597672985544566e-31,6655527128343343e-31,6714401267106488e-31, 586 | 677433668409101e-30,6835377187051274e-31,6897569320906848e-31, 587 | 6960962602074885e-31,7025609778445959e-31,7091567118449584e-31, 588 | 7158894733208553e-31,7227656936438121e-31,7297922647529085e-31, 589 | 7369765844191243e-31,7443266072160415e-31,7518509020832513e-31, 590 | 7595587175337749e-31,7674600557578427e-31,7755657571215791e-31, 591 | 7838875968622858e-31,792438396157355e-30,8012321502113083e-31, 592 | 8102841765913146e-31,8196112877806125e-31,8292319928581809e-31, 593 | 8391667344146798e-31,849438168364877e-30,8600714963334941e-31, 594 | 8710948629387904e-31,882539833807214e-30,8944419748519865e-31, 595 | 9068415597131669e-31,9197844409811865e-31,9333231329422952e-31, 596 | 9475181706524984e-31,9624398345658476e-31,978170365478442e-30, 597 | 994806847238388e-30,1012465014428832e-30,10312843657756166e-31, 598 | 1051435160404455e-30,10731281954224043e-31,10966288068517408e-31, 599 | 1122277490935032e-30,11505212963006663e-31,11819635283304206e-31, 600 | 12174462832361815e-31,12581958069755114e-31,13060984107128082e-31, 601 | 13642786158057857e-31,14384889932178723e-31,15412190700064194e-31, 602 | 17091034077168055e-31}; 603 | 604 | static double fe[ZIGGURAT_TABLE_SIZE] = 605 | { 1.0,.9381436808621746,.9004699299257464,.8717043323812036, 606 | .8477855006239896,.8269932966430503,.8084216515230084,.7915276369724956, 607 | .7759568520401156,.7614633888498963,.7478686219851951,.7350380924314235, 608 | .722867659593572,.711274760805076,.7001926550827882,.689566496117078, 609 | .6793505722647654,.6695063167319247,.6600008410789997,.650805833414571, 610 | .6418967164272661,.6332519942143661,.6248527387036659,.6166821809152077, 611 | .608725382079622,.6009689663652322,.5934009016917334,.586010318477268, 612 | .578787358602845,.5717230486648258,.5648091929124002,.5580382822625874, 613 | .5514034165406413,.5448982376724396,.5385168720028619,.5322538802630432, 614 | .5261042139836197,.5200631773682336,.5141263938147486,.5082897764106429, 615 | .5025495018413477,.49690198724154955,.49134386959403253,.4858719873418849, 616 | .4804833639304542,.4751751930373774,.46994482528396,.4647897562504262, 617 | .4597076156421377,.45469615747461545,.449753251162755,.4448768734145485, 618 | .4400651008423539,.4353161032156366,.43062813728845883,.42599954114303434, 619 | .4214287289976166,.4169141864330029,.4124544659971612,.4080481831520324, 620 | .4036940125305303,.3993906844752311,.39513698183329016,.3909317369847971, 621 | .38677382908413765,.38266218149600983,.3785957594095808,.37457356761590216, 622 | .370594648435146,.36665807978151416,.3627629733548178,.3589084729487498, 623 | .35509375286678746,.35131801643748334,.347580494621637,.3438804447045024, 624 | .34021714906678,.33658991402867755,.332998068761809,.3294409642641363, 625 | .3259179723935562,.3224284849560891,.31897191284495724,.31554768522712895, 626 | .31215524877417955,.3087940669345602,.30546361924459026,.3021634006756935, 627 | .2988929210155818,.2956517042812612,.2924392881618926,.28925522348967775, 628 | .2860990737370768,.28297041453878075,.27986883323697287,.27679392844851736, 629 | .27374530965280297,.27072259679906,.2677254199320448,.2647534188350622, 630 | .2618062426893629,.25888354974901623,.2559850070304154,.25311029001562946, 631 | .2502590823688623,.24743107566532763,.2446259691318921,.24184346939887721, 632 | .23908329026244918,.23634515245705964,.23362878343743335,.2309339171696274, 633 | .2282602939307167,.22560766011668407,.22297576805812017,.2203643758433595, 634 | .21777324714870053,.21520215107537868,.21265086199297828,.21011915938898826, 635 | .20760682772422204,.2051136562938377,.20263943909370902,.20018397469191127, 636 | .19774706610509887,.19532852067956322,.19292814997677132,.1905457696631954, 637 | .1881811994042543,.1858342627621971,.18350478709776746,.1811926034754963, 638 | .1788975465724783,.17661945459049488,.1743581691713535,.17211353531532006, 639 | .16988540130252766,.1676736186172502,.165478041874936,.16329852875190182, 640 | .16113493991759203,.1589871389693142,.15685499236936523,.15473836938446808, 641 | .15263714202744286,.1505511850010399,.1484803756438668,.14642459387834494, 642 | .14438372216063478,.1423576454324722,.14034625107486245,.1383494288635802, 643 | .13636707092642886,.13439907170221363,.13244532790138752,.13050573846833077, 644 | .12858020454522817,.12666862943751067,.12477091858083096,.12288697950954514, 645 | .12101672182667483,.11916005717532768,.11731689921155557,.11548716357863353, 646 | .11367076788274431,.1118676316700563,.11007767640518538,.1083008254510338, 647 | .10653700405000166,.10478613930657017,.10304816017125772,.10132299742595363, 648 | .09961058367063713,.0979108533114922,.0962237425504328,.09454918937605586, 649 | .09288713355604354,.09123751663104016,.08960028191003286,.08797537446727022, 650 | .08636274114075691,.08476233053236812,.08317409300963238,.08159798070923742, 651 | .0800339475423199,.07848194920160642,.0769419431704805,.07541388873405841, 652 | .07389774699236475,.07239348087570874,.07090105516237183,.06942043649872875, 653 | .0679515934219366,.06649449638533977,.06504911778675375,.06361543199980733, 654 | .062193415408540995,.06078304644547963,.059384305633420266,.05799717563120066, 655 | .05662164128374288,.05525768967669704,.05390531019604609,.05256449459307169, 656 | .05123523705512628,.04991753428270637,.0486113855733795,.04731679291318155, 657 | .04603376107617517,.04476229773294328,.04350241356888818,.042254122413316234, 658 | .04101744138041482,.039792391023374125,.03857899550307486,.03737728277295936, 659 | .03618728478193142,.03500903769739741,.03384258215087433,.032687963508959535, 660 | .03154523217289361,.030414443910466604,.029295660224637393,.028188948763978636, 661 | .0270943837809558,.026012046645134217,.024942026419731783,.02388442051155817, 662 | .02283933540638524,.02180688750428358,.020787204072578117,.019780424338009743, 663 | .01878670074469603,.01780620041091136,.016839106826039948,.015885621839973163, 664 | .014945968011691148,.014020391403181938,.013109164931254991,.012212592426255381, 665 | .011331013597834597,.010464810181029979,.00961441364250221,.008780314985808975, 666 | .00796307743801704,.007163353183634984,.006381905937319179,.005619642207205483, 667 | .004877655983542392,.004157295120833795,.003460264777836904,.002788798793574076, 668 | .0021459677437189063,.0015362997803015724,.0009672692823271745,.00045413435384149677}; 669 | 670 | 671 | /* 672 | * Here is the guts of the algorithm. As Marsaglia and Tsang state the 673 | * algorithm in their paper 674 | * 675 | * 1) Calculate a random signed integer j and let i be the index 676 | * provided by the rightmost 8-bits of j 677 | * 2) Set x = j * w_i. If j < k_i return x 678 | * 3) If i = 0, then return x from the tail 679 | * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x 680 | * 5) goto step 1 681 | * 682 | * Where f is the functional form of the distribution, which for a normal 683 | * distribution is exp(-0.5*x*x) 684 | */ 685 | 686 | /* NOTE: This is identical to randmtzig_gv_randn() below except for the random number generation */ 687 | double randmtzig_randn (dsfmt_t *dsfmt) 688 | { 689 | while (1) 690 | { 691 | /* arbitrary mantissa (selected by randi, with 1 bit for sign) */ 692 | const randmtzig_uint64_t r = randi(dsfmt); 693 | const randmtzig_int64_t rabs=r>>1; 694 | const int idx = (int)(rabs&0xFF); 695 | const double x = ( r&1 ? -rabs : rabs) * wi[idx]; 696 | 697 | if (rabs < (randmtzig_int64_t)ki[idx]) { 698 | return x; /* 99.3% of the time we return here 1st try */ 699 | } else if (idx == 0) { 700 | /* As stated in Marsaglia and Tsang 701 | * 702 | * For the normal tail, the method of Marsaglia[5] provides: 703 | * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, 704 | * then return r+x. Except that r+x is always in the positive 705 | * tail!!!! Any thing random might be used to determine the 706 | * sign, but as we already have r we might as well use it 707 | * 708 | * [PAK] but not the bottom 8 bits, since they are all 0 here! 709 | */ 710 | double xx, yy; 711 | do { 712 | xx = - ZIGGURAT_NOR_INV_R * log (randu(dsfmt)); 713 | yy = - log (randu(dsfmt)); 714 | } 715 | while ( yy+yy <= xx*xx); 716 | return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); 717 | } else if ((fi[idx-1] - fi[idx]) * randu(dsfmt) + fi[idx] < exp(-0.5*x*x)) { 718 | return x; 719 | } 720 | 721 | } 722 | } 723 | 724 | /* NOTE: This is identical to randmtzig_randn() above except for the random number generation */ 725 | double randmtzig_gv_randn (void) 726 | { 727 | while (1) 728 | { 729 | /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ 730 | const randmtzig_uint64_t r = NRANDI; 731 | const randmtzig_int64_t rabs=r>>1; 732 | const int idx = (int)(rabs&0xFF); 733 | const double x = ( r&1 ? -rabs : rabs) * wi[idx]; 734 | 735 | if (rabs < (randmtzig_int64_t)ki[idx]) { 736 | return x; /* 99.3% of the time we return here 1st try */ 737 | } else if (idx == 0) { 738 | /* As stated in Marsaglia and Tsang 739 | * 740 | * For the normal tail, the method of Marsaglia[5] provides: 741 | * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, 742 | * then return r+x. Except that r+x is always in the positive 743 | * tail!!!! Any thing random might be used to determine the 744 | * sign, but as we already have r we might as well use it 745 | * 746 | * [PAK] but not the bottom 8 bits, since they are all 0 here! 747 | */ 748 | double xx, yy; 749 | do { 750 | xx = - ZIGGURAT_NOR_INV_R * log (RANDU); 751 | yy = - log (RANDU); 752 | } 753 | while ( yy+yy <= xx*xx); 754 | return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); 755 | } else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) { 756 | return x; 757 | } 758 | 759 | } 760 | } 761 | 762 | double randmtzig_gv_exprnd (void) 763 | { 764 | while (1) 765 | { 766 | ZIGINT ri = ERANDI; 767 | const int idx = (int)(ri & 0xFF); 768 | const double x = ri * we[idx]; 769 | if (ri < ke[idx]) 770 | return x; // 98.9% of the time we return here 1st try 771 | else if (idx == 0) 772 | { 773 | /* As stated in Marsaglia and Tsang 774 | * 775 | * For the exponential tail, the method of Marsaglia[5] provides: 776 | * x = r - ln(U); 777 | */ 778 | return ZIGGURAT_EXP_R - log(RANDU); 779 | } 780 | else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp(-x)) 781 | return x; 782 | } 783 | } 784 | 785 | #ifdef STANDALONE 786 | 787 | int main(int ac, char *av[]) { 788 | if (ac == 1) { 789 | printf("Usage: randmtzig \n"); 790 | return (-1); 791 | } 792 | 793 | int n = atoi(av[1]); 794 | time_t t1; 795 | 796 | dsfmt_gv_init_gen_rand(0); 797 | 798 | double *p; posix_memalign((void **)&p, 16, n*sizeof(double)); 799 | uint32_t *u; posix_memalign((void **)&u, 16, 2*n*sizeof(uint32_t)); 800 | 801 | t1 = clock(); 802 | dsfmt_gv_fill_array_close_open(p, n); 803 | printf("Uniform fill (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); 804 | 805 | t1 = clock(); 806 | for (int i = 0; i < n; i++) p[i] = dsfmt_gv_genrand_close_open(); 807 | printf("Uniform (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); 808 | 809 | t1 = clock(); 810 | for (int i = 0; i < 2*n; i++) u[i] = dsfmt_gv_genrand_uint32(); 811 | printf("Uniform 32-bit ints (2*n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); 812 | 813 | memset((void *)p, 0, n*sizeof(double)); 814 | t1 = clock(); 815 | for (int i = 0; i < n; i++) p[i] = randmtzig_gv_randn(); 816 | printf("Normal (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); 817 | for (int i = 0; i < 10; i++) printf("%lf\n", p[i]); 818 | 819 | return 0; 820 | } 821 | 822 | #endif 823 | -------------------------------------------------------------------------------- /rust/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | **/*.rs.bk 3 | -------------------------------------------------------------------------------- /rust/Cargo.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "bitflags" 3 | version = "1.0.1" 4 | source = "registry+https://github.com/rust-lang/crates.io-index" 5 | 6 | [[package]] 7 | name = "blas-src" 8 | version = "0.1.3" 9 | source = "registry+https://github.com/rust-lang/crates.io-index" 10 | dependencies = [ 11 | "openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", 12 | ] 13 | 14 | [[package]] 15 | name = "cblas" 16 | version = "0.1.5" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | dependencies = [ 19 | "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", 20 | "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", 21 | "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", 22 | ] 23 | 24 | [[package]] 25 | name = "cblas-sys" 26 | version = "0.1.4" 27 | source = "registry+https://github.com/rust-lang/crates.io-index" 28 | dependencies = [ 29 | "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", 30 | ] 31 | 32 | [[package]] 33 | name = "either" 34 | version = "1.5.0" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | 37 | [[package]] 38 | name = "fuchsia-zircon" 39 | version = "0.3.3" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | dependencies = [ 42 | "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 43 | "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", 44 | ] 45 | 46 | [[package]] 47 | name = "fuchsia-zircon-sys" 48 | version = "0.3.3" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | 51 | [[package]] 52 | name = "itertools" 53 | version = "0.7.8" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | dependencies = [ 56 | "either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", 57 | ] 58 | 59 | [[package]] 60 | name = "julia-bench" 61 | version = "0.1.0" 62 | dependencies = [ 63 | "blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", 64 | "cblas 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", 65 | "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", 66 | "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)", 67 | "mersenne_twister 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 68 | "ndarray 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)", 69 | "num 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", 70 | "openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", 71 | "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", 72 | ] 73 | 74 | [[package]] 75 | name = "libc" 76 | version = "0.2.40" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | 79 | [[package]] 80 | name = "matrixmultiply" 81 | version = "0.1.14" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | dependencies = [ 84 | "rawpointer 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 85 | ] 86 | 87 | [[package]] 88 | name = "mersenne_twister" 89 | version = "1.1.1" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | dependencies = [ 92 | "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", 93 | ] 94 | 95 | [[package]] 96 | name = "ndarray" 97 | version = "0.11.2" 98 | source = "registry+https://github.com/rust-lang/crates.io-index" 99 | dependencies = [ 100 | "blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", 101 | "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", 102 | "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)", 103 | "matrixmultiply 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", 104 | "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", 105 | "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", 106 | ] 107 | 108 | [[package]] 109 | name = "num" 110 | version = "0.1.42" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | dependencies = [ 113 | "num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", 114 | "num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", 115 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", 116 | "num-iter 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)", 117 | "num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", 118 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 119 | ] 120 | 121 | [[package]] 122 | name = "num-bigint" 123 | version = "0.1.43" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | dependencies = [ 126 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", 127 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 128 | "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", 129 | "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", 130 | ] 131 | 132 | [[package]] 133 | name = "num-complex" 134 | version = "0.1.43" 135 | source = "registry+https://github.com/rust-lang/crates.io-index" 136 | dependencies = [ 137 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 138 | "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", 139 | ] 140 | 141 | [[package]] 142 | name = "num-integer" 143 | version = "0.1.36" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | dependencies = [ 146 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 147 | ] 148 | 149 | [[package]] 150 | name = "num-iter" 151 | version = "0.1.35" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | dependencies = [ 154 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", 155 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 156 | ] 157 | 158 | [[package]] 159 | name = "num-rational" 160 | version = "0.1.42" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | dependencies = [ 163 | "num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", 164 | "num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", 165 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 166 | "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", 167 | ] 168 | 169 | [[package]] 170 | name = "num-traits" 171 | version = "0.1.43" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | dependencies = [ 174 | "num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 175 | ] 176 | 177 | [[package]] 178 | name = "num-traits" 179 | version = "0.2.2" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | 182 | [[package]] 183 | name = "openblas-src" 184 | version = "0.5.6" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | 187 | [[package]] 188 | name = "rand" 189 | version = "0.4.2" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | dependencies = [ 192 | "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", 193 | "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", 194 | "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", 195 | ] 196 | 197 | [[package]] 198 | name = "rawpointer" 199 | version = "0.1.0" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | 202 | [[package]] 203 | name = "rustc-serialize" 204 | version = "0.3.24" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | 207 | [[package]] 208 | name = "winapi" 209 | version = "0.3.4" 210 | source = "registry+https://github.com/rust-lang/crates.io-index" 211 | dependencies = [ 212 | "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 213 | "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 214 | ] 215 | 216 | [[package]] 217 | name = "winapi-i686-pc-windows-gnu" 218 | version = "0.4.0" 219 | source = "registry+https://github.com/rust-lang/crates.io-index" 220 | 221 | [[package]] 222 | name = "winapi-x86_64-pc-windows-gnu" 223 | version = "0.4.0" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | 226 | [metadata] 227 | "checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf" 228 | "checksum blas-src 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8d3a12d382bd4c40f95c105f1a7074a18bdb0ee140ddb73f6d924a4f7d333bc9" 229 | "checksum cblas 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ce45c2223361cc6077b505f4d203e3b9494d746f39dfbf7627bbcb5aa7f0a13a" 230 | "checksum cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b6feecd82cce51b0204cf063f0041d69f24ce83f680d87514b004248e7b0fa65" 231 | "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0" 232 | "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" 233 | "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" 234 | "checksum itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f58856976b776fedd95533137617a02fb25719f40e7d9b01c7043cd65474f450" 235 | "checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b" 236 | "checksum matrixmultiply 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "cac1a66eab356036af85ea093101a14223dc6e3f4c02a59b7d572e5b93270bf7" 237 | "checksum mersenne_twister 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b85dbb2f68dfc026aac8f4c5196579896b10ee45e8b9a1a3b325fab3043d1cb0" 238 | "checksum ndarray 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0e3d24c5ba54015d7d5203ca6f00d4cc16c71042bf7f7be26f091236f390a16a" 239 | "checksum num 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "4703ad64153382334aa8db57c637364c322d3372e097840c72000dabdcf6156e" 240 | "checksum num-bigint 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "81b483ea42927c463e191802e7334556b48e7875297564c0e9951bd3a0ae53e3" 241 | "checksum num-complex 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "b288631d7878aaf59442cffd36910ea604ecd7745c36054328595114001c9656" 242 | "checksum num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "f8d26da319fb45674985c78f1d1caf99aa4941f785d384a2ae36d0740bc3e2fe" 243 | "checksum num-iter 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "4b226df12c5a59b63569dd57fafb926d91b385dfce33d8074a412411b689d593" 244 | "checksum num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "ee314c74bd753fc86b4780aa9475da469155f3848473a261d2d18e35245a784e" 245 | "checksum num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31" 246 | "checksum num-traits 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dee092fcdf725aee04dd7da1d21debff559237d49ef1cb3e69bcb8ece44c7364" 247 | "checksum openblas-src 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "68d293fca3c73ad377ddd2236d32c828a50a611a5b472bf6a884b9b60a4acd97" 248 | "checksum rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eba5f8cb59cc50ed56be8880a5c7b496bfd9bd26394e176bc67884094145c2c5" 249 | "checksum rawpointer 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ebac11a9d2e11f2af219b8b8d833b76b1ea0e054aa0e8d8e9e4cbde353bdf019" 250 | "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" 251 | "checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3" 252 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 253 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 254 | -------------------------------------------------------------------------------- /rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "julia-bench" 3 | publish = false 4 | version = "0.1.0" 5 | 6 | [dependencies] 7 | itertools = "0.7.1" 8 | mersenne_twister = "1.1.1" 9 | num = "0.1.37" 10 | rand = "0.4.2" 11 | 12 | [dependencies.cblas] 13 | version = "0.1.2" 14 | optional = true 15 | 16 | [dependencies.cblas-sys] 17 | version = "0.1.4" 18 | 19 | [dependencies.ndarray] 20 | features = ["blas"] 21 | version = "0.11.1" 22 | 23 | [dependencies.blas-src] 24 | features = ["openblas"] 25 | default-features = false 26 | version = "0.1.2" 27 | 28 | [dependencies.openblas-src] 29 | features = ["cblas", "system"] 30 | default-features = false 31 | version = "0.5.6" 32 | 33 | [features] 34 | default = [] 35 | direct_blas = ["cblas"] 36 | -------------------------------------------------------------------------------- /rust/rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly-2018-04-16 2 | -------------------------------------------------------------------------------- /rust/src/direct_blas.rs: -------------------------------------------------------------------------------- 1 | #![allow(unsafe_code)] 2 | 3 | use rand::Rng; 4 | use std::iter::Sum; 5 | use util::{gen_rng, fill_rand, myrand}; 6 | use cblas::{dgemm, Layout, Transpose}; 7 | use itertools::Itertools; 8 | 9 | pub fn randmatstat(t: usize) -> (f64, f64) { 10 | let mut rng = gen_rng(1234u64); 11 | 12 | let n = 5; 13 | 14 | let mut v = vec![0.; t]; 15 | let mut w = vec![0.; t]; 16 | 17 | { 18 | let mut a = vec![0.; n * n]; 19 | let mut b = vec![0.; n * n]; 20 | let mut c = vec![0.; n * n]; 21 | let mut d = vec![0.; n * n]; 22 | let mut p = vec![0.; (n) * (4 * n)]; 23 | let mut q = vec![0.; (2 * n) * (2 * n)]; 24 | 25 | let mut pt_p1 = vec![0.; (4 * n) * (4 * n)]; 26 | let mut pt_p2 = vec![0.; (4 * n) * (4 * n)]; 27 | let mut qt_q1 = vec![0.; (2 * n) * (2 * n)]; 28 | let mut qt_q2 = vec![0.; (2 * n) * (2 * n)]; 29 | 30 | for (ve, we) in v.iter_mut().zip(w.iter_mut()) { 31 | fill_rand(&mut a, &mut rng); 32 | fill_rand(&mut b, &mut rng); 33 | fill_rand(&mut c, &mut rng); 34 | fill_rand(&mut d, &mut rng); 35 | 36 | p[0 .. n * n].copy_from_slice(&a); 37 | p[n * n .. 2 * n * n].copy_from_slice(&b); 38 | p[2 * n * n .. 3 * n * n].copy_from_slice(&c); 39 | p[3 * n * n .. 4 * n * n].copy_from_slice(&d); 40 | 41 | for j in 0..n { 42 | for k in 0..n { 43 | q[2 * n * j + k] = a[k]; 44 | q[2 * n * j + n + k] = b[k]; 45 | q[2 * n * (n + j) + k] = c[k]; 46 | q[2 * n * (n + j) + n + k] = d[k]; 47 | } 48 | } 49 | 50 | unsafe { 51 | let n = n as i32; 52 | 53 | dgemm(Layout::ColumnMajor, Transpose::Ordinary, Transpose::None, 54 | n , n, 4 * n, 1., &p, 4 * n, &p, 4 * n, 0., 55 | &mut pt_p1, 4 * n); 56 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None, 57 | 4 * n, 4 * n, 4 * n, 1., &pt_p1, 4 * n, &pt_p1, 4 * n, 0., 58 | &mut pt_p2, 4 * n); 59 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None, 60 | 4 * n, 4 * n, 4 * n, 1., &pt_p2, 4 * n, &pt_p2, 4 * n, 0., 61 | &mut pt_p1, 4 * n); 62 | } 63 | 64 | *ve = trace(&pt_p1, n * 4); 65 | 66 | unsafe { 67 | let n = n as i32; 68 | 69 | dgemm(Layout::ColumnMajor, Transpose::Ordinary, Transpose::None, 70 | 2 * n, 2 * n, 2 * n, 1., &q, 2 * n, &q, 2 * n, 0., 71 | &mut qt_q1, 2 * n); 72 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None, 73 | 2 * n, 2 * n, 2 * n, 1., &qt_q1, 2 * n, &qt_q1, 2 * n, 0., 74 | &mut qt_q2, 2 * n); 75 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None, 76 | 2 * n, 2 * n, 2 * n, 1., &qt_q2, 2 * n, &qt_q2, 2 * n, 0., 77 | &mut qt_q1, 2 * n); 78 | } 79 | 80 | *we = trace(&qt_q1, 2 * n); 81 | } 82 | } 83 | 84 | let (v1, v2, w1, w2) = v.iter() 85 | .zip(w.iter()) 86 | .fold((0., 0., 0., 0.), |(v1, v2, w1, w2), (ve, we)| ( 87 | v1 + *ve, 88 | v2 + ve * ve, 89 | w1 + *we, 90 | w2 + we * we 91 | )); 92 | 93 | let t = t as f64; 94 | 95 | ( 96 | f64::sqrt((t * (t * v2 - v1 * v1)) / ((t - 1.) * v1 * v1)), 97 | f64::sqrt((t * (t * w2 - w1 * w1)) / ((t - 1.) * w1 * w1)), 98 | ) 99 | } 100 | 101 | /// Calculate the trace of a square matrix 102 | #[inline] 103 | fn trace<'a, T>(m: &'a [T], n: usize) -> T 104 | where 105 | T: Sum<&'a T> 106 | { 107 | debug_assert_eq!(m.len(), n * n); 108 | m.into_iter().step(n + 1).sum() 109 | } 110 | 111 | pub fn randmatmul(n: usize, mut rng: R) -> Vec { 112 | let a = myrand(n * n, &mut rng); 113 | let b = myrand(n * n, &mut rng); 114 | let mut c = vec![0.; n * n]; 115 | 116 | unsafe { 117 | let n = n as i32; 118 | dgemm(Layout::ColumnMajor, Transpose::None, Transpose::None, 119 | n, n, n, 1., &a, n, &b, n, 0., &mut c, n); 120 | } 121 | 122 | c 123 | } 124 | 125 | #[inline] 126 | pub fn check_randmatmul(m: Vec) { 127 | assert!(0. <= m[0]); 128 | } 129 | -------------------------------------------------------------------------------- /rust/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | #![deny(unsafe_code)] 3 | 4 | extern crate itertools; 5 | extern crate mersenne_twister; 6 | extern crate num; 7 | extern crate rand; 8 | extern crate test; 9 | 10 | // Use BLAS directly 11 | #[cfg(feature = "direct_blas")] 12 | extern crate cblas; 13 | 14 | #[cfg(feature = "direct_blas")] 15 | extern crate blas_src; 16 | 17 | // Use ndarray (with BLAS implementation) 18 | #[cfg(not(feature = "direct_blas"))] 19 | #[macro_use(s)] 20 | extern crate ndarray; 21 | 22 | use std::time::{Duration, Instant}; 23 | use std::u32; 24 | use std::fs::OpenOptions; 25 | use std::io::{BufWriter, Write}; 26 | 27 | use test::black_box; 28 | use num::complex::Complex64; 29 | use rand::Rng; 30 | 31 | mod util; 32 | use util::{gen_rng, myrand}; 33 | 34 | #[cfg(feature = "direct_blas")] 35 | mod direct_blas; 36 | #[cfg(feature = "direct_blas")] 37 | use direct_blas::{randmatstat, randmatmul, check_randmatmul}; 38 | 39 | #[cfg(not(feature = "direct_blas"))] 40 | use ndarray::Array2; 41 | #[cfg(not(feature = "direct_blas"))] 42 | use util::fill_rand; 43 | #[cfg(not(feature = "direct_blas"))] 44 | use num::Zero; 45 | 46 | const NITER: u32 = 5; 47 | 48 | #[cfg(not(feature = "direct_blas"))] 49 | fn nrand(shape: (usize, usize), rng: &mut R) -> Array2 { 50 | let mut m = Array2::zeros(shape); 51 | fill_rand(&mut m, rng); 52 | m 53 | } 54 | 55 | fn fib(n: i32) -> i32 { 56 | let n = black_box(n); // prevent over-optimization 57 | if n < 2 { 58 | n 59 | } else { 60 | fib(n - 1) + fib(n - 2) 61 | } 62 | } 63 | 64 | fn mandel(z: Complex64) -> u32 { 65 | use std::iter; 66 | 67 | iter::repeat(z) 68 | .scan(z, |z, c| { 69 | let current = *z; 70 | *z = current * current + c; 71 | Some(current) 72 | }) 73 | .take(80) 74 | .take_while(|z| z.norm_sqr() <= 4.0) 75 | .count() as u32 76 | } 77 | 78 | fn mandelperf() -> Vec { 79 | (-10..=10).flat_map(|i| (-20..=5).map(move |j| (i, j))) 80 | .map(|(i, j)| (j as f64 / 10., i as f64 / 10.)) 81 | .map(|(re, im)| mandel(Complex64::new(re, im))) 82 | .collect() 83 | } 84 | 85 | fn pisum() -> f64 { 86 | let mut sum = 0.; 87 | for _ in 0..500 { 88 | sum = (1..10001) 89 | .map(|k| { 90 | let k = k as f64; 91 | 1. / (k * k) 92 | }) 93 | .sum(); 94 | } 95 | sum 96 | } 97 | 98 | #[cfg(not(feature = "direct_blas"))] 99 | fn randmatstat(t: usize) -> (f64, f64) { 100 | let mut rng = gen_rng(1234u64); 101 | 102 | let n = 5; 103 | 104 | let mut v = vec![0.; t]; 105 | let mut w = vec![0.; t]; 106 | 107 | for (ve, we) in v.iter_mut().zip(w.iter_mut()) { 108 | let a = nrand((n, n), &mut rng); 109 | let b = nrand((n, n), &mut rng); 110 | let c = nrand((n, n), &mut rng); 111 | let d = nrand((n, n), &mut rng); 112 | let p = { // P = [a b c d] 113 | let mut p = Array2::::zeros((n, 4 * n)); 114 | let n = n as isize; 115 | p.slice_mut(s![.., 0..n]).assign(&a); 116 | p.slice_mut(s![.., n..2*n]).assign(&b); 117 | p.slice_mut(s![.., 2*n..3*n]).assign(&c); 118 | p.slice_mut(s![.., 3*n..4*n]).assign(&d); 119 | p 120 | }; 121 | let q = { // Q = [a b ; c d] 122 | let mut q = Array2::::zeros((2 * n, 2 * n)); 123 | let n = n as isize; 124 | q.slice_mut(s![0..n, 0..n]).assign(&a); 125 | q.slice_mut(s![0..n, n..2*n]).assign(&b); 126 | q.slice_mut(s![n..2*n, 0..n]).assign(&c); 127 | q.slice_mut(s![n..2*n, n..2*n]).assign(&d); 128 | q 129 | }; 130 | 131 | let pt = p.t(); 132 | let ptp = pt.dot(&p); 133 | let ptp2 = ptp.dot(&ptp); 134 | let ptp4 = ptp2.dot(&ptp2); 135 | *ve = trace_arr(&ptp4); 136 | 137 | let qt = q.t(); 138 | let ptq = qt.dot(&q); 139 | let ptq2 = ptq.dot(&ptq); 140 | let ptq4 = ptq2.dot(&ptq2); 141 | *we = trace_arr(&ptq4); 142 | } 143 | 144 | let (v1, v2, w1, w2) = v.iter() 145 | .zip(w.iter()) 146 | .fold((0., 0., 0., 0.), |(v1, v2, w1, w2), (ve, we)| ( 147 | v1 + *ve, 148 | v2 + ve * ve, 149 | w1 + *we, 150 | w2 + we * we 151 | )); 152 | 153 | let t = t as f64; 154 | 155 | ( 156 | f64::sqrt((t * (t * v2 - v1 * v1)) / ((t - 1.) * v1 * v1)), 157 | f64::sqrt((t * (t * w2 - w1 * w1)) / ((t - 1.) * w1 * w1)), 158 | ) 159 | } 160 | 161 | /// Calculate the trace of a square matrix 162 | #[cfg(not(feature = "direct_blas"))] 163 | #[inline] 164 | fn trace_arr<'a, T: 'a>(m: &'a Array2) -> T 165 | where 166 | T: Zero + Clone 167 | { 168 | m.diag().scalar_sum() 169 | } 170 | 171 | #[cfg(not(feature = "direct_blas"))] 172 | fn randmatmul(n: usize, mut rng: R) -> Array2 { 173 | let a = nrand((n, n), &mut rng); 174 | let b = nrand((n, n), &mut rng); 175 | 176 | a.dot(&b) 177 | } 178 | 179 | #[cfg(not(feature = "direct_blas"))] 180 | #[inline] 181 | fn check_randmatmul(m: Array2) { 182 | assert!(0. <= m[[0, 0]]); 183 | } 184 | 185 | #[test] 186 | fn test_quicksort() { 187 | let mut a = [10., 9., 8., 7., 6., 5., 4., 3., 2., 1.]; 188 | quicksort(a.as_mut(), 0); 189 | assert_eq!(a, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]); 190 | 191 | let mut a = [8., 2., 10., 4., 7., 6., 9., 5., 1., 3.]; 192 | quicksort(a.as_mut(), 0); 193 | assert_eq!(a, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]); 194 | } 195 | 196 | fn quicksort(a: &mut [f64], mut lo: usize) { 197 | let hi = a.len() as usize - 1; 198 | let mut i: usize = lo; 199 | // j is isize because it can be -1 200 | let mut j: isize = hi as isize; 201 | 202 | while i < hi { 203 | let pivot = a[(lo + hi) / 2]; 204 | while i as isize <= j { 205 | while a[i] < pivot { 206 | i += 1; 207 | } 208 | while a[j as usize] > pivot { 209 | j -= 1; 210 | } 211 | if i as isize <= j { 212 | a.swap(i, j as usize); 213 | i += 1; 214 | j -= 1; 215 | } 216 | } 217 | 218 | let (l, _r) = a.split_at_mut((j + 1) as usize); 219 | 220 | if (lo as isize) < j { 221 | quicksort(l, lo); 222 | } 223 | 224 | lo = i; 225 | j = hi as isize; 226 | } 227 | } 228 | 229 | fn printfd(n: usize) { 230 | let f = OpenOptions::new() 231 | .write(true).open("/dev/null").unwrap(); 232 | let mut f = BufWriter::new(f); 233 | for i in 0..n { 234 | writeln!(f, "{} {}", i, i).unwrap(); 235 | } 236 | } 237 | 238 | fn print_perf(name: &str, t: f64) { 239 | println!("rust,{},{:.6}", name, t * 1000.); 240 | } 241 | 242 | /// convert duration to float in seconds 243 | fn to_float(d: Duration) -> f64 { 244 | d.as_secs() as f64 + d.subsec_nanos() as f64 / 1e9 245 | } 246 | 247 | #[inline] 248 | fn measure_best(niters: u32, mut op: F) -> Duration { 249 | (0..niters) 250 | .map(move |_| { 251 | let t = Instant::now(); 252 | op(); 253 | t.elapsed() 254 | }).min().unwrap() 255 | } 256 | 257 | fn main() { 258 | // initialize RNG 259 | let mut rng = gen_rng(0); 260 | 261 | // fib(20) 262 | assert_eq!(fib(20), 6765); 263 | let mut f = 0i32; 264 | let fibarg = 20; 265 | let tmin = measure_best(NITER, || { 266 | for _ in 0..1000 { 267 | f = f.wrapping_add(fib(fibarg)); 268 | } 269 | }); 270 | print_perf("recursion_fibonacci", to_float(tmin) / 1000.0); 271 | 272 | // parse_int 273 | let tmin = measure_best(NITER, || { 274 | for _ in 0..1000 * 100 { 275 | let n: u32 = rng.gen(); 276 | let s = format!("{:x}", n); 277 | let m = u32::from_str_radix(&s, 16).unwrap(); 278 | assert_eq!(m, n); 279 | } 280 | }); 281 | print_perf("parse_integers", to_float(tmin) / 100.0); 282 | 283 | let mandel_sum_init = black_box(0u32); 284 | let mut mandel_sum2 = mandel_sum_init; 285 | let tmin = measure_best(NITER, || { 286 | for j in 0..100 { 287 | let m = mandelperf(); 288 | if j == 0 { 289 | let mandel_sum: u32 = m.iter().sum(); 290 | assert_eq!(mandel_sum, 14791); 291 | mandel_sum2 += mandel_sum; 292 | } 293 | } 294 | }); 295 | assert_eq!(mandel_sum2, 14791 * NITER); 296 | print_perf("userfunc_mandelbrot", to_float(tmin) / 100.0); 297 | 298 | // sort 299 | let tmin = measure_best(NITER, || { 300 | let mut d = myrand(5000, &mut rng); 301 | quicksort(&mut d, 0); 302 | }); 303 | print_perf("recursion_quicksort", to_float(tmin)); 304 | 305 | // pi sum 306 | let mut pi = 0.; 307 | let tmin = measure_best(NITER, || { 308 | pi = black_box(pisum()); 309 | }); 310 | assert!(f64::abs(pi - 1.644834071848065) < 1e-12); 311 | print_perf("iteration_pi_sum", to_float(tmin)); 312 | 313 | // rand mat stat 314 | let mut r = (0., 0.); 315 | let tmin = measure_best(NITER, || { 316 | r = black_box(randmatstat(1000)); 317 | }); 318 | print_perf("matrix_statistics", to_float(tmin)); 319 | 320 | // rand mat mul 321 | let tmin = measure_best(NITER, || { 322 | let c = randmatmul(1000, &mut rng); 323 | check_randmatmul(c); 324 | }); 325 | print_perf("matrix_multiply", to_float(tmin)); 326 | 327 | // printfd 328 | let tmin = measure_best(NITER, || { 329 | printfd(100000); 330 | }); 331 | print_perf("print_to_file", to_float(tmin)); 332 | } 333 | -------------------------------------------------------------------------------- /rust/src/util.rs: -------------------------------------------------------------------------------- 1 | use rand::{Rand, Rng, SeedableRng}; 2 | 3 | use mersenne_twister::MT19937_64; 4 | pub type MTRng = MT19937_64; 5 | 6 | #[inline] 7 | pub fn gen_rng(seed: u64) -> MTRng { 8 | MTRng::from_seed(seed) 9 | } 10 | 11 | pub fn fill_rand<'a, I, T: 'a, R>(a: I, rng: &mut R) 12 | where 13 | I: IntoIterator, 14 | T: Rand, 15 | R: Rng, 16 | { 17 | for v in a.into_iter() { 18 | *v = rng.gen(); 19 | } 20 | } 21 | 22 | pub fn myrand(n: usize, rng: &mut R) -> Vec { 23 | let mut d: Vec = vec![0.; n]; 24 | fill_rand(&mut d, rng); 25 | d 26 | } 27 | -------------------------------------------------------------------------------- /scala/.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache 6 | .history 7 | .lib/ 8 | dist/* 9 | target/ 10 | lib_managed/ 11 | src_managed/ 12 | project/boot/ 13 | project/plugins/project/ 14 | 15 | # Scala-IDE specific 16 | .scala_dependencies 17 | .worksheet 18 | -------------------------------------------------------------------------------- /scala/build.sbt: -------------------------------------------------------------------------------- 1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license 2 | 3 | libraryDependencies ++= Seq( 4 | "org.scalanlp" %% "breeze" % "0.10", 5 | "org.scalanlp" %% "breeze-natives" % "0.10" 6 | ) 7 | 8 | resolvers ++= Seq( 9 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 10 | ) 11 | 12 | scalaVersion := "2.11.1" 13 | 14 | showSuccess := false 15 | 16 | onLoadMessage := "" 17 | 18 | logLevel := Level.Warn 19 | -------------------------------------------------------------------------------- /scala/src/main/scala/perf.scala: -------------------------------------------------------------------------------- 1 | // This file was formerly a part of Julia. License is MIT: https://julialang.org/license 2 | 3 | import scala.util._ 4 | import java.io._ 5 | import breeze.linalg._ 6 | import breeze.numerics._ 7 | import breeze.stats._ 8 | import breeze.math._ 9 | //import com.github.fommil.netlib.{BLAS} 10 | 11 | object PerfBreeze { 12 | final val NITER = 5 13 | 14 | // print results appropriately. times are in milliseconds 15 | def print_perf(name:String, t:Double) = { 16 | printf("scala,%s,%.9f\n", name, t/1e6) 17 | } 18 | 19 | // time fib 20 | def fib(n:Int):Int = { 21 | if (n < 2) n else fib(n-1) + fib(n-2) 22 | } 23 | 24 | def time_fib() = { 25 | assert(fib(20) == 6765) 26 | var tmin = Long.MaxValue 27 | var f = 0 28 | 29 | for(i <- 1 to NITER) { 30 | val t1 = System.nanoTime() 31 | for(j <- 1 to 1000) { 32 | f += fib(20) 33 | } 34 | val t = System.nanoTime() - t1 35 | if(t < tmin) tmin = t 36 | } 37 | 38 | tmin / 1000.0 39 | } 40 | 41 | // time parseint 42 | def time_parseint() = { 43 | val generator = scala.util.Random 44 | var tmin = Long.MaxValue 45 | 46 | for(i <- 1 to NITER) { 47 | var rand:Int = 0 48 | var rands:String = "0" 49 | var parsed:Int = 0 50 | val t1 = System.nanoTime() 51 | for(j <- 1 to 1000) { 52 | rand = generator.nextInt() 53 | rands = if(rand < 0) "-" + abs(rand).toHexString else rand.toHexString 54 | parsed = Integer.parseInt(rands, 16) 55 | assert(rand == parsed) 56 | } 57 | val t = System.nanoTime() - t1 58 | if(t < tmin) tmin = t 59 | } 60 | tmin / 1000.0 61 | } 62 | 63 | // time mandel 64 | def mandel(zin:Complex):Int = { 65 | val c = zin 66 | var z = zin 67 | val maxiter = 80 68 | for(n <- 0 to maxiter) { 69 | if(z.abs > 2) return n 70 | z = c + (z * z) 71 | } 72 | maxiter 73 | } 74 | 75 | def mandelperf() = { 76 | for(re <- -20 to 5; im <- -10 to 10) yield mandel(re/10.0 + i * im/10.0) 77 | } 78 | 79 | def time_mandel() = { 80 | var mandel_sum = 0 81 | var mandel_sum2 = 0 82 | var tmin = Long.MaxValue 83 | 84 | for(i <- 1 to NITER) { 85 | val t1 = System.nanoTime() 86 | for(j <- 1 to 100) { 87 | val mandel_arr = mandelperf() 88 | if(j == 1) { 89 | mandel_sum = sum(mandel_arr) 90 | mandel_sum2 += mandel_sum 91 | } 92 | } 93 | val t = System.nanoTime() - t1 94 | if(t < tmin) tmin = t 95 | } 96 | assert(mandel_sum == 14791) 97 | assert(mandel_sum2 == mandel_sum * NITER) 98 | tmin / 100.0 99 | } 100 | 101 | // time quicksort 102 | def quicksort(a:Array[Double], lo:Int, hi:Int):Array[Double] = { 103 | var i, l = lo 104 | var j = hi 105 | 106 | def _swap(i:Int, j:Int) = { 107 | val tmp = a(i) 108 | a(i) = a(j) 109 | a(j) = tmp 110 | } 111 | 112 | while(i < hi) { 113 | val pivot = a((l+hi)>>>1) 114 | while(i <= j) { 115 | while(a(i) < pivot) i += 1 116 | while(a(j) > pivot) j -= 1 117 | if(i <= j) { 118 | _swap(i, j) 119 | i += 1 120 | j -= 1 121 | } 122 | } 123 | if(l < j) quicksort(a, l, j) 124 | l = j 125 | j = hi 126 | } 127 | a 128 | } 129 | 130 | /* 131 | def checksorted(a:Array[Double]):Boolean = { 132 | for(i <- 0 to a.length-2) { 133 | assert(a(i) < a(i+1)) 134 | } 135 | true 136 | } 137 | */ 138 | 139 | def time_quicksort() = { 140 | var tmin = Long.MaxValue 141 | 142 | for(i <- 1 to NITER) { 143 | val t1 = System.nanoTime() 144 | for(j <- 1 to 1000) { 145 | val A = DenseVector.rand[Double](5000) 146 | quicksort(A.data, 0, 4999) 147 | } 148 | val t = System.nanoTime() - t1 149 | if(t < tmin) tmin = t 150 | } 151 | tmin / 1000.0 152 | } 153 | 154 | // time pisum 155 | def pisum() = { 156 | var sum = 0.0 157 | for(j <- 1 to 500) { 158 | sum = 0.0 159 | for(k <- 1 to 10000) { 160 | sum += 1.0/(k*k) 161 | } 162 | } 163 | sum 164 | } 165 | 166 | def time_pisum() = { 167 | var tmin = Long.MaxValue 168 | var pi = 0:Double 169 | for(i <- 1 to NITER) { 170 | val t1 = System.nanoTime() 171 | pi = pisum() 172 | val t = System.nanoTime() - t1 173 | if(t < tmin) tmin = t 174 | assert(abs(pi-1.644834071848065) < 1e-12) 175 | } 176 | tmin 177 | } 178 | 179 | // time printfd 180 | def printfd(n:Int) = { 181 | var stream = None: Option[PrintStream] 182 | try { 183 | stream = Some(new PrintStream(new BufferedOutputStream(new FileOutputStream("/dev/null")))) 184 | val valid_stream = stream.get 185 | for (i <- 1 to n) { 186 | valid_stream.printf(i + " " + i) 187 | } 188 | } catch { 189 | case e: Exception => println("Exception caught: " + e) 190 | } finally { 191 | if(stream.isDefined) stream.get.close() 192 | } 193 | } 194 | 195 | def time_printfd() = { 196 | var tmin = Long.MaxValue 197 | for(i <- 1 to NITER) { 198 | val t1 = System.nanoTime() 199 | printfd(100000) 200 | val t = System.nanoTime() - t1 201 | if(t < tmin) tmin = t 202 | } 203 | tmin 204 | } 205 | 206 | // random matrix statistics 207 | def randmatstat(t:Int):(Double,Double) = { 208 | val n = 5 209 | val v = DenseVector.zeros[Double](t) 210 | val w = DenseVector.zeros[Double](t) 211 | 212 | val g = breeze.stats.distributions.Gaussian(0, 1) 213 | for(i <- 0 to t-1) { 214 | val a = DenseMatrix.rand(n, n, g) 215 | val b = DenseMatrix.rand(n, n, g) 216 | val c = DenseMatrix.rand(n, n, g) 217 | val d = DenseMatrix.rand(n, n, g) 218 | val P = DenseMatrix.horzcat(a, b, c, d) 219 | val Q = DenseMatrix.vertcat(DenseMatrix.horzcat(a, b), DenseMatrix.horzcat(c, d)) 220 | val V = P.t * P 221 | val W = Q.t * Q 222 | 223 | v(i) = trace(V * V * V * V) 224 | w(i) = trace(W * W * W * W) 225 | } 226 | (stddev(v)/mean(v), stddev(w)/mean(w)) 227 | } 228 | 229 | def time_randmatstat() = { 230 | var tmin = Long.MaxValue 231 | for(i <- 1 to NITER) { 232 | val t1 = System.nanoTime() 233 | val (s1, s2) = randmatstat(1000) 234 | val t = System.nanoTime() - t1 235 | assert(0.5 < s1 && s1 < 1.0 && 0.5 < s2 && s2 < 1.0) 236 | 237 | if(t < tmin) tmin = t 238 | } 239 | tmin 240 | } 241 | 242 | // random matrix multiplication 243 | def randmatmul(t:Int):DenseMatrix[Double] = { 244 | val m1 = randomDouble((t, t)) 245 | val m2 = randomDouble((t, t)) 246 | m1 * m2 247 | } 248 | 249 | def time_randmatmul() = { 250 | var tmin = Long.MaxValue 251 | for(i <- 1 to NITER) { 252 | val t1 = System.nanoTime() 253 | val m = randmatmul(1000) 254 | val t = System.nanoTime() - t1 255 | assert(0 <= m(0,0)) 256 | 257 | if(t < tmin) tmin = t 258 | } 259 | tmin 260 | } 261 | 262 | 263 | def main(args: Array[String]) = { 264 | //println("BLAS: " + BLAS.getInstance().getClass().getName()) 265 | print_perf("fib", time_fib()) 266 | print_perf("parse_int", time_parseint()) 267 | print_perf("mandel", time_mandel()) 268 | print_perf("quicksort", time_quicksort()) 269 | print_perf("pi_sum", time_pisum()) 270 | print_perf("rand_mat_stat", time_randmatstat()) 271 | print_perf("rand_mat_mul", time_randmatmul()) 272 | print_perf("printfd", time_printfd()) 273 | } 274 | } 275 | --------------------------------------------------------------------------------