├── benches ├── slow │ ├── bigint │ │ ├── Project.toml │ │ └── pidigits.jl │ └── rb_tree │ │ ├── Project.toml │ │ └── rb_tree.jl ├── serial │ ├── append │ │ ├── Project.toml │ │ └── append.jl │ ├── bigint │ │ ├── Project.toml │ │ └── pollard.jl │ ├── linked │ │ ├── Project.toml │ │ ├── list.jl │ │ └── tree.jl │ ├── strings │ │ ├── Project.toml │ │ └── strings.jl │ ├── big_arrays │ │ ├── Project.toml │ │ ├── single_ref.jl │ │ └── many_refs.jl │ └── TimeZones │ │ ├── Project.toml │ │ └── TimeZones.jl ├── fragmentation │ └── synthetic │ │ ├── Project.toml │ │ └── exploit_free_list.jl ├── multithreaded │ ├── binary_tree │ │ ├── Project.toml │ │ ├── tree_immutable.jl │ │ └── tree_mutable.jl │ ├── mergesort_parallel │ │ ├── Project.toml │ │ └── mergesort_parallel.jl │ ├── mm_divide_and_conquer │ │ ├── Project.toml │ │ └── mm_divide_and_conquer.jl │ └── big_arrays │ │ ├── Project.toml │ │ ├── objarray.jl │ │ └── issue-52937.jl └── compiler │ └── inference │ ├── Project.toml │ └── inference_benchmarks.jl ├── .gitignore ├── vis ├── Project.toml └── plot.jl ├── Project.toml ├── LICENSE ├── .buildkite └── pipeline.yml ├── util ├── compare_bins.jl ├── find-all-min-heaps.jl └── utils.jl ├── README.md └── run_benchmarks.jl /benches/slow/bigint/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/serial/append/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/serial/bigint/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/serial/linked/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/serial/strings/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/serial/big_arrays/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/fragmentation/synthetic/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/multithreaded/binary_tree/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/multithreaded/mergesort_parallel/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/multithreaded/mm_divide_and_conquer/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | -------------------------------------------------------------------------------- /benches/serial/TimeZones/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" 3 | -------------------------------------------------------------------------------- /benches/multithreaded/big_arrays/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 3 | -------------------------------------------------------------------------------- /benches/slow/rb_tree/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 3 | -------------------------------------------------------------------------------- /benches/compiler/inference/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temporary 2 | *.DS_Store 3 | *.swp 4 | *.jl.cov 5 | *.jl.*.cov 6 | *.jl.mem 7 | *~ 8 | TAGS 9 | *Manifest.toml 10 | results.csv 11 | .vscode 12 | -------------------------------------------------------------------------------- /vis/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" 3 | CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" 4 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 5 | TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" 6 | -------------------------------------------------------------------------------- /benches/serial/TimeZones/TimeZones.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | using TimeZones 4 | 5 | zdts = [now(tz"UTC") for _ in 1:100_000_000]; 6 | 7 | @gctime sum(hash, ["trashfire"^min(1000, i) for i in 1:500_000]) 8 | 9 | -------------------------------------------------------------------------------- /benches/serial/append/append.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | # simulates allocation profile of some dataframes benchmarks 4 | # by repeatedly append to a vector 5 | function append_lots(iters=100*1024, size=1596) 6 | v = Float64[] 7 | for i = 1:iters 8 | append!(v,rand(size)) 9 | end 10 | return v 11 | end 12 | 13 | @gctime append_lots()[end] 14 | -------------------------------------------------------------------------------- /Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" 3 | DocOpt = "968ba79b-81e4-546f-ab3a-2eecfa62a9db" 4 | Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" 5 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" 6 | PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" 7 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 8 | Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" 9 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 10 | TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" 11 | -------------------------------------------------------------------------------- /benches/serial/linked/list.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | mutable struct ListNode 4 | key::Int64 5 | next::ListNode 6 | ListNode() = new() 7 | ListNode(x)= new(x) 8 | ListNode(x,y) = new(x,y); 9 | end 10 | 11 | function list(n=128) 12 | start::ListNode = ListNode(1) 13 | current::ListNode = start 14 | for i = 2:(n*1024^2) 15 | current = ListNode(i,current) 16 | end 17 | return current.key 18 | end 19 | 20 | @gctime list() 21 | 22 | -------------------------------------------------------------------------------- /benches/multithreaded/big_arrays/objarray.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | using Random: seed! 4 | seed!(1) 5 | 6 | abstract type Cell end 7 | 8 | struct CellA<:Cell 9 | a::Ref{Int} 10 | end 11 | 12 | struct CellB<:Cell 13 | b::String 14 | end 15 | 16 | function fillcells!(mc::Array{Cell}) 17 | for ind in eachindex(mc) 18 | mc[ind] = ifelse(rand() > 0.5, CellA(ind), CellB(string(ind))) 19 | end 20 | return mc 21 | end 22 | 23 | function work(size) 24 | mcells = Array{Cell}(undef, size, size) 25 | mc = fillcells!(mcells) 26 | end 27 | 28 | function run(maxsize) 29 | Threads.@threads for i in 1:maxsize 30 | work(i*1000) 31 | end 32 | end 33 | 34 | @gctime run(8) -------------------------------------------------------------------------------- /benches/serial/big_arrays/single_ref.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | module SingleRef 4 | 5 | """ 6 | This benchmark stresses the array handling in the GC. 7 | We allocate a large arrays that all contain a reference to a singular object. 8 | The mark-queue of the GC should not overflow. 9 | """ 10 | function construct(array_length) 11 | obj = Ref{Int}(0) 12 | arr = Array{Ref{Int}}(undef, array_length) 13 | fill!(arr, obj) 14 | GC.gc(true) 15 | GC.gc(true) 16 | Core.donotdelete(arr) 17 | return nothing 18 | end 19 | 20 | end #module 21 | 22 | using .SingleRef 23 | 24 | const GB = 1<<30 25 | const MAX_MEMORY = round(Int, 0.8 * GB) 26 | const array_length = div(MAX_MEMORY, sizeof(Ptr{C_NULL})) 27 | 28 | @gctime SingleRef.construct(array_length) 29 | -------------------------------------------------------------------------------- /benches/serial/big_arrays/many_refs.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | module ManyRef 4 | 5 | """ 6 | This benchmark stresses the array handling in the GC. 7 | We allocate a large arrays that all contain a reference to a many small objects. 8 | The mark-queue of the GC should not overflow. 9 | """ 10 | function construct(array_length) 11 | GC.enable(false) 12 | arr = Array{Ref{Int}}(undef, array_length) 13 | for i in eachindex(arr) 14 | arr[i] = Ref{Int}(0) 15 | end 16 | GC.enable(true) 17 | GC.gc(true) 18 | GC.gc(true) 19 | Core.donotdelete(arr) 20 | return nothing 21 | end 22 | 23 | end #module 24 | 25 | using .ManyRef 26 | 27 | const GB = 1<<30 28 | const MAX_MEMORY = round(Int, 0.8 * GB) 29 | const array_length = div(MAX_MEMORY, 3*sizeof(Ptr{C_NULL})) 30 | 31 | @gctime ManyRef.construct(array_length) 32 | -------------------------------------------------------------------------------- /benches/multithreaded/big_arrays/issue-52937.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | using Base.Threads: @threads 4 | using Random: shuffle 5 | 6 | function sample_vote!(_rb, chop_counts) 7 | pts = rand(length(chop_counts)) 8 | N = length(_rb) 9 | _srt = 4245 10 | partialsortperm!(_rb, pts, 1:_srt; lt = <, rev = true) 11 | while sum(@views chop_counts[_rb[1:_srt]]) ≤ 5660 12 | _srt = min(2 * _srt, N) 13 | partialsortperm!(_rb, pts, 1:_srt; lt = <, rev = true) 14 | end 15 | end 16 | 17 | function parallel_scores(chop_counts) 18 | @threads for i in 1:8 19 | _rb = collect(1:length(chop_counts)) 20 | # the bigger this number, the more % GC time 21 | for _ ∈ 1:1000 22 | sample_vote!(_rb, chop_counts) 23 | end 24 | end 25 | end 26 | 27 | # kind of arbitrary, but approximates my data 28 | chop_counts = shuffle(trunc.(Int, 6500 ./ (50:100_000))) 29 | @gctime parallel_scores(chop_counts) 30 | -------------------------------------------------------------------------------- /benches/slow/bigint/pidigits.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | # The Computer Language Benchmarks Game 4 | # https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ 5 | 6 | # based on Mario Pernici Python's program 7 | 8 | function pidigits(N=68470) 9 | i = k = ns = 0 10 | k1 = 1 11 | n,a,d,t,u = map(BigInt,(1,0,1,0,0)) 12 | 13 | while true 14 | k += 1 15 | t = n << 1 16 | n *= k 17 | a += t 18 | k1 += 2 19 | a *= k1 20 | d *= k1 21 | 22 | if a >= n 23 | t,u = divrem(n*3 +a, d) 24 | u += n 25 | if d > u 26 | if i >= N 27 | return ns 28 | end 29 | ns = ns*10 + t 30 | i += 1 31 | if mod(i,10) == 0 32 | ns = 0 33 | end 34 | a -= d*t 35 | a *= 10 36 | n *= 10 37 | 38 | end 39 | end 40 | end 41 | end 42 | 43 | @gctime pidigits() 44 | -------------------------------------------------------------------------------- /benches/serial/bigint/pollard.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | function pollardfactor(n::T=big(1208925819691594988651321)) where T<:Integer 4 | for c in T(1):(n - 3) 5 | G, r, q = ones(T,3) 6 | y = 2 7 | m::T = 1900 8 | ys::T = 0 9 | x::T = 0 10 | while G == 1 11 | x = y 12 | for i in 1:r 13 | y = (y^2 + c) % n 14 | end 15 | k = T(0) 16 | G = T(1) 17 | while k < r && G == 1 18 | for i in 1:min(r - k, m) 19 | ys = y 20 | y = (y^2 + c) % n 21 | q = (q * abs(x - y)) % n 22 | end 23 | G = gcd(q, n) 24 | k += m 25 | end 26 | r *= 2 27 | end 28 | G == n && (G = T(1)) 29 | while G == 1 30 | ys = (ys^2 + c) % n 31 | G = gcd(abs(x - ys), n) 32 | end 33 | if G != n 34 | return G 35 | end 36 | end 37 | end 38 | 39 | @gctime pollardfactor() 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Christine H. Flood 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.buildkite/pipeline.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - label: "Julia {{matrix.version}} -- {{matrix.benchmark}} -- {{matrix.category}}" 3 | plugins: 4 | - JuliaCI/julia#v1: 5 | version: "{{matrix.version}}" 6 | matrix: 7 | setup: 8 | version: 9 | - "nightly" 10 | benchmark: 11 | - "serial" 12 | - "multithreaded" 13 | category: 14 | - "all" 15 | adjustments: 16 | # - with: 17 | # version: "nightly" 18 | # benchmark: "slow" 19 | # category: "rb_tree" 20 | - with: 21 | version: "nightly" 22 | benchmark: "slow" 23 | category: "bigint" 24 | agents: 25 | queue: "juliaecosystem" 26 | arch: "x86_64" 27 | os: "linux" 28 | commands: | 29 | echo "--- Instantiate" 30 | julia --project=. -e 'using Pkg; Pkg.instantiate(); Pkg.build()' 31 | 32 | echo "+++ Run benchmarks {{matrix.benchmark}}" 33 | julia --threads=auto --project=. run_benchmarks.jl {{matrix.benchmark}} {{matrix.category}} 34 | artifact_paths: 35 | - "results.csv" 36 | 37 | if: build.message !~ /\[skip tests\]/ 38 | timeout_in_minutes: 60 -------------------------------------------------------------------------------- /benches/serial/linked/tree.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | using Random 4 | 5 | mutable struct TreeNode 6 | key::Int 7 | left::TreeNode 8 | right::TreeNode 9 | TreeNode() = new() 10 | TreeNode(x) = new(x) 11 | TreeNode(x,y,z) = new(x,y,z) 12 | end 13 | 14 | function insert(key, n::TreeNode) 15 | if key < n.key 16 | if !isdefined(n,:left) 17 | n.left = TreeNode(key) 18 | else 19 | insert(key, n.left) 20 | end 21 | elseif key > n.key 22 | if !isdefined(n,:right) 23 | n.right = TreeNode(key) 24 | else 25 | insert(key, n.right) 26 | end 27 | end 28 | end 29 | 30 | function sumTree(n::TreeNode) 31 | sum = n.key 32 | if isdefined(n,:left) 33 | sum += sumTree(n.left) 34 | end 35 | if isdefined(n,:right) 36 | sum += sumTree(n.right) 37 | end 38 | 39 | return sum 40 | end 41 | 42 | # tree_size is the number of elements in mb 43 | function tree(n=8) 44 | n *= 1024^2 45 | rng = Xoshiro(12345) 46 | temp = rand(rng, Int, n) 47 | root::TreeNode = TreeNode(temp[1]) 48 | for i = 2:n 49 | insert(temp[i], root) 50 | end 51 | return sumTree(root) 52 | end 53 | 54 | @gctime tree() 55 | -------------------------------------------------------------------------------- /util/compare_bins.jl: -------------------------------------------------------------------------------- 1 | const doc = """compare_bin.jl -- Cross binary comparison between GC benchmarks 2 | Usage: 3 | compare_bins.jl 4 | Options: 5 | -h, --help Show this screen. 6 | """ 7 | 8 | using DocOpt 9 | using JSON 10 | using PrettyTables 11 | 12 | const args = docopt(doc, version = v"0.1.1") 13 | 14 | function main(args) 15 | f1 = args[""] 16 | f2 = args[""] 17 | 18 | js1 = JSON.parsefile(f1) 19 | js2 = JSON.parsefile(f2) 20 | 21 | labels = ["total time [ms]", 22 | "gc time [ms]", 23 | "mark time [ms]", 24 | "sweep time [ms]", 25 | "max pause [ms]", 26 | "max memory [MB]", 27 | "pct gc"] 28 | header = ["", f1, f2] 29 | 30 | # show medians 31 | raw_data = [js1["total time"][2] js2["total time"][2]; 32 | js1["gc time"][2] js2["gc time"][2]; 33 | js1["mark time"][2] js2["mark time"][2]; 34 | js1["sweep time"][2] js2["sweep time"][2]; 35 | js1["max pause"][2] js2["max pause"][2]; 36 | js1["max memory"][2] js2["max memory"][2]; 37 | js1["pct gc"][2] js2["pct gc"][2]] 38 | 39 | data = hcat(labels, raw_data) 40 | pretty_table(data; header, formatters=ft_printf("%0.0f")) 41 | end 42 | 43 | main(args) 44 | -------------------------------------------------------------------------------- /benches/multithreaded/binary_tree/tree_immutable.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | module BinaryTreeImmutable 4 | 5 | # Adopted from 6 | # https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees 7 | 8 | using Base.Threads 9 | using Printf 10 | 11 | struct Node 12 | l::Union{Nothing, Node} 13 | r::Union{Nothing, Node} 14 | end 15 | 16 | function make(n::Int) 17 | return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1)) 18 | end 19 | 20 | function check(node::Node) 21 | return 1 + (node.l === nothing ? 0 : check(node.l) + check(node.r)) 22 | end 23 | 24 | function binary_trees(io, n::Int) 25 | @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1)) 26 | 27 | long_tree = make(n) 28 | minDepth = 4 29 | resultSize = div((n - minDepth), 2) + 1 30 | results = Vector{String}(undef, resultSize) 31 | Threads.@threads for depth in minDepth:2:n 32 | c = 0 33 | niter = 1 << (n - depth + minDepth) 34 | for _ in 1:niter 35 | c += check(make(depth)) 36 | end 37 | index = div((depth - minDepth),2) + 1 38 | results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c 39 | end 40 | 41 | for i in results 42 | write(io, i) 43 | end 44 | 45 | @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree) 46 | end 47 | 48 | end #module 49 | 50 | using .BinaryTreeImmutable 51 | 52 | @gctime BinaryTreeImmutable.binary_trees(devnull, 21) 53 | -------------------------------------------------------------------------------- /benches/multithreaded/binary_tree/tree_mutable.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | module BinaryTreeMutable 4 | 5 | # Adopted from 6 | # https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees 7 | 8 | using Base.Threads 9 | using Printf 10 | 11 | mutable struct Node 12 | l::Union{Nothing, Node} 13 | r::Union{Nothing, Node} 14 | end 15 | 16 | function make(n::Int) 17 | return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1)) 18 | end 19 | 20 | function check(node::Node) 21 | return 1 + (node.l === nothing ? 0 : check(node.l) + check(node.r)) 22 | end 23 | 24 | function binary_trees(io, n::Int) 25 | @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1)) 26 | 27 | long_tree = make(n) 28 | minDepth = 4 29 | resultSize = div((n - minDepth), 2) + 1 30 | results = Vector{String}(undef, resultSize) 31 | Threads.@threads for depth in minDepth:2:n 32 | c = 0 33 | niter = 1 << (n - depth + minDepth) 34 | for _ in 1:niter 35 | c += check(make(depth)) 36 | end 37 | index = div((depth - minDepth),2) + 1 38 | results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c 39 | end 40 | 41 | for i in results 42 | write(io, i) 43 | end 44 | 45 | @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree) 46 | end 47 | 48 | end #module 49 | 50 | using .BinaryTreeMutable 51 | 52 | @gctime BinaryTreeMutable.binary_trees(devnull, 21) 53 | -------------------------------------------------------------------------------- /benches/multithreaded/mergesort_parallel/mergesort_parallel.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | const N = 32 * (1 << 20) 4 | const BUBBLE_SORT_THRESHOLD = 32 5 | 6 | using Random 7 | Random.seed!(42) 8 | a = rand(1:N, N) 9 | 10 | function bubble_sort(a, start, limit) 11 | for i = start:limit-2 12 | for j = i+1:limit-1 13 | if a[j] < a[i] 14 | a[i], a[j] = a[j], a[i] 15 | end 16 | end 17 | end 18 | end 19 | 20 | function merge(src, dst, start, split, limit) 21 | dst_pos = start 22 | i = start 23 | j = split 24 | while i < split && j < limit 25 | if src[i] <= src[j] 26 | dst[dst_pos] = src[i] 27 | i += 1 28 | else 29 | dst[dst_pos] = src[j] 30 | j += 1 31 | end 32 | dst_pos += 1 33 | end 34 | 35 | while i < split 36 | dst[dst_pos] = src[i] 37 | i += 1 38 | dst_pos += 1 39 | end 40 | 41 | while j < limit 42 | dst[dst_pos] = src[j] 43 | j += 1 44 | dst_pos += 1 45 | end 46 | end 47 | 48 | function merge_sort(move, a, b, start, limit) 49 | if move || limit - start > BUBBLE_SORT_THRESHOLD 50 | split = (start + limit) ÷ 2 51 | r1 = Threads.@spawn merge_sort(!move, a, b, start, split) 52 | r2 = Threads.@spawn merge_sort(!move, a, b, split, limit) 53 | wait(r1) 54 | wait(r2) 55 | if move 56 | merge(a, b, start, split, limit) 57 | else 58 | merge(b, a, start, split, limit) 59 | end 60 | else 61 | bubble_sort(a, start, limit) 62 | end 63 | end 64 | 65 | function sort(a) 66 | b = similar(a) 67 | merge_sort(false, a, b, 1, length(a) + 1) 68 | end 69 | 70 | @gctime sort(a) 71 | -------------------------------------------------------------------------------- /util/find-all-min-heaps.jl: -------------------------------------------------------------------------------- 1 | import Glob 2 | function rdir(dir::AbstractString, pat::Glob.FilenameMatch) 3 | result = String[] 4 | for (root, _, files) in walkdir(dir) 5 | append!(result, filter!(f -> occursin(pat, f), joinpath.(root, files))) 6 | end 7 | return result 8 | end 9 | rdir(dir::AbstractString, pat::AbstractString) = rdir(dir, Glob.FilenameMatch(pat)) 10 | 11 | benches = rdir("benches", "*.jl") 12 | 13 | function find_min_size(bench_path) 14 | @info "Finding heap size for $bench_path" 15 | bench_path_parent = dirname(bench_path) 16 | min_heap = 4 17 | max_heap = min(24 * 1024) # 24GB is more than enough so we don't waste time 18 | heap_size = min_heap 19 | while min_heap <= max_heap 20 | @info "Attempting heap size $(heap_size)MB" 21 | proc = run( 22 | pipeline( 23 | `$(Base.julia_cmd()) --project=$(bench_path_parent) --hard-heap-limit=$(heap_size)M --gc-sweep-always-full $bench_path`, 24 | stdout = stdout, 25 | stderr = stderr, 26 | ); 27 | wait = false, 28 | ) 29 | if success(proc) 30 | max_heap = heap_size 31 | heap_size = round(Int, (max_heap + min_heap) / 2) 32 | else 33 | min_heap = heap_size 34 | heap_size = round(Int, (max_heap + min_heap) / 2) 35 | end 36 | if (max_heap - min_heap) <= 16 37 | break 38 | end 39 | end 40 | @info "Heap size for $bench_path is $(heap_size)MB" 41 | heap_size 42 | end 43 | 44 | results = [bench => find_min_size(bench) for bench in benches] 45 | open("heap_sizes.csv", "w") do io 46 | println(io, "bench,heap_size") 47 | for (bench, heap_size) in results 48 | println(io, "$bench,$heap_size") 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /vis/plot.jl: -------------------------------------------------------------------------------- 1 | using CSV 2 | using CairoMakie 3 | using TypedTables 4 | using Statistics 5 | 6 | struct IntegerTicks end 7 | CairoMakie.Makie.get_tickvalues(::IntegerTicks, vmin, vmax) = ceil(Int, vmin) : floor(Int, vmax) 8 | 9 | function plot_results(table; log2_axes = true, violin = true) 10 | kwargs = (;) 11 | if log2_axes 12 | kwargs = (; xscale = log2, yscale = log2, xticks = LogTicks(IntegerTicks()), kwargs...) 13 | end 14 | 15 | benches = TypedTables.group(getproperty(:file), table) 16 | f = Figure(resolution = (1000, 500*length(benches))) 17 | idx = 1 18 | for (file, bench) in pairs(benches) 19 | mean_data = Any[] 20 | for (gcthreads, t) in pairs(TypedTables.group(getproperty(:gcthreads), bench)) 21 | push!(mean_data, (; file, gcthreads, gc_time = mean(t.gc_time), threads=first(t.threads))) 22 | end 23 | mean_table = Table(row for row in mean_data) 24 | t0 = filter(r -> r.gcthreads == 1, mean_table).gc_time 25 | speedup = t0 ./ mean_table.gc_time 26 | mean_table = Table(mean_table; speedup) 27 | 28 | Label(f[idx, 1:2, Top()], 29 | "$file -- $(first(mean_table.threads)) Threads", 30 | valign = :bottom,font = :bold, padding = (0, 0, 15, 0)) 31 | 32 | ax = Axis(f[idx, 1]; title="Speedup", kwargs...) 33 | scatterlines!(ax, mean_table.gcthreads, mean_table.speedup) 34 | lines!(ax, mean_table.gcthreads, mean_table.gcthreads, color=:lightblue) 35 | 36 | ax = Axis(f[idx, 2]; title="GC times (ms)", kwargs...) 37 | gcthreads = bench.gcthreads 38 | gc_times = bench.gc_time ./ 1_000_000 39 | if violin 40 | violin!(ax, gcthreads, gc_times; 41 | show_median=true) 42 | else 43 | rainclouds!(ax,gcthreads, gc_times; 44 | orientation = :vertical, clouds=hist, cloud_width=0.5) 45 | end 46 | 47 | idx +=1 48 | end 49 | save("plot.png", f, px_per_unit = 2) 50 | f 51 | end 52 | 53 | 54 | if !isinteractive() 55 | table = Table(CSV.File(joinpath(@__DIR__, "..", "results.csv"))) 56 | plot_results(table) 57 | end 58 | -------------------------------------------------------------------------------- /util/utils.jl: -------------------------------------------------------------------------------- 1 | using Pkg 2 | Pkg.instantiate() # It is dumb that I have to do this 3 | using Serialization 4 | 5 | idx = Ref{Int}(0) 6 | thrashing_stamps = zeros(UInt64, 3) 7 | 8 | function gc_cb_on_pressure() 9 | t = time_ns() 10 | thrashing_stamps[idx[] % 3 + 1] = t 11 | idx[] += 1 12 | if idx[] >= 3 13 | # three thrashing stamps in ten seconds: abort 14 | if t - thrashing_stamps[idx[] % 3 + 1] <= 10_000_000_000 15 | @ccall abort()::Cvoid 16 | end 17 | end 18 | nothing 19 | end 20 | 21 | @info "Setting GC memory pressure callback" 22 | ccall(:jl_gc_set_cb_notify_gc_pressure, Cvoid, (Ptr{Cvoid}, Cint), 23 | @cfunction(gc_cb_on_pressure, Cvoid, ()), true) 24 | 25 | macro gctime(ex) 26 | fc = isdefined(Base.Experimental, Symbol("@force_compile")) ? 27 | :(Base.Experimental.@force_compile) : 28 | :() 29 | quote 30 | $fc 31 | local result 32 | local start_gc_num = Base.gc_num() 33 | local end_gc_num = start_gc_num 34 | local start_time = time_ns() 35 | local end_time = start_time 36 | try 37 | local val = $(esc(ex)) 38 | end_time = time_ns() 39 | end_gc_num = Base.gc_num() 40 | result = (; 41 | value = val, 42 | times = (end_time - start_time), 43 | gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), 44 | gc_start = start_gc_num, 45 | gc_end = end_gc_num 46 | ) 47 | catch e 48 | @show e 49 | result = (; 50 | value = e, 51 | times = NaN, 52 | gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), 53 | gc_start = start_gc_num, 54 | gc_end = end_gc_num 55 | ) 56 | end 57 | 58 | #run(`ps uxww`) 59 | #run(`pmap $(getpid())`) 60 | 61 | if "SERIALIZE" in ARGS 62 | # uglyness to communicate over non stdout (specifically file descriptor 3) 63 | @invokelatest serialize(open(RawFD(3)), result) 64 | else 65 | @invokelatest display(result) 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /benches/multithreaded/mm_divide_and_conquer/mm_divide_and_conquer.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | function matrix_multiply(res, x, y) 4 | i_n = size(x, 1) 5 | j_n = size(y, 2) 6 | k_n = size(y, 1) 7 | 8 | for i = 1:i_n 9 | for j = 1:j_n 10 | w = 0 11 | for k = 1:k_n 12 | w += x[i, k] * y[k, j] 13 | end 14 | res[i, j] = w 15 | end 16 | end 17 | end 18 | 19 | function matrix_multiply_recursive(res, x, y) 20 | i_n = size(x, 1) 21 | j_n = size(y, 2) 22 | k_n = size(y, 1) 23 | 24 | if i_n <= 128 || j_n <= 128 || k_n <= 128 25 | # Base case: use a simple matrix multiplication 26 | matrix_multiply(res, x, y) 27 | else 28 | # Divide matrices into submatrices 29 | i_half = i_n ÷ 2 30 | j_half = j_n ÷ 2 31 | k_half = k_n ÷ 2 32 | 33 | a11 = x[1:i_half, 1:k_half] 34 | a12 = x[1:i_half, (k_half+1):k_n] 35 | a21 = x[(i_half+1):i_n, 1:k_half] 36 | a22 = x[(i_half+1):i_n, (k_half+1):k_n] 37 | 38 | b11 = y[1:k_half, 1:j_half] 39 | b12 = y[1:k_half, (j_half+1):j_n] 40 | b21 = y[(k_half+1):k_n, 1:j_half] 41 | b22 = y[(k_half+1):k_n, (j_half+1):j_n] 42 | 43 | c11 = zeros(Int, i_half, j_half) 44 | c12 = zeros(Int, i_half, (j_n - j_half)) 45 | c21 = zeros(Int, (i_n - i_half), j_half) 46 | c22 = zeros(Int, (i_n - i_half), (j_n - j_half)) 47 | 48 | # Recursive matrix multiplication on submatrices 49 | t1 = Threads.@spawn matrix_multiply_recursive(c11, a11 + a22, b11 + b22) 50 | t2 = Threads.@spawn matrix_multiply_recursive(c12, a21 + a22, b11) 51 | t3 = Threads.@spawn matrix_multiply_recursive(c21, a11, b12 - b22) 52 | matrix_multiply_recursive(c22, a22, b21 - b11) 53 | 54 | # Wait for the spawned threads to complete 55 | wait(t1) 56 | wait(t2) 57 | wait(t3) 58 | 59 | # Combine submatrices to get the result 60 | res[1:i_half, 1:j_half] .= c11 .+ c12 61 | res[1:i_half, (j_half+1):j_n] .= c11 .+ c22 62 | res[(i_half+1):i_n, 1:j_half] .= c21 .+ c12 63 | res[(i_half+1):i_n, (j_half+1):j_n] .= c21 .+ c22 64 | end 65 | end 66 | 67 | const M_SIZE = (1 << 12) 68 | 69 | function main_recursive() 70 | m1 = rand(1:100, M_SIZE, M_SIZE) 71 | m2 = rand(1:100, M_SIZE, M_SIZE) 72 | res = zeros(Int, M_SIZE, M_SIZE) 73 | 74 | matrix_multiply_recursive(res, m1, m2) 75 | 76 | return sum(res) 77 | end 78 | 79 | @gctime main_recursive() 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Garbage Collection Test Suite 2 | 3 | This package contains various test programs which measure the efficiency of Garbage 4 | Collection (GC) in Julia. 5 | 6 | ## Running 7 | 8 | ``` 9 | Usage: 10 | run_benchmarks.jl (serial|multithreaded|slow) (all| []) [options] 11 | run_benchmarks.jl -h | --help 12 | run_benchmarks.jl --version 13 | Options: 14 | -n , --runs= Number of runs for each benchmark [default: 10]. 15 | -t , --threads= Number of mutator threads to use [default: 1]. 16 | --gcthreasds= Number of GC threads to use [default: 1]. 17 | -s , --scale= Maximum number of GC threads for scaling test. 18 | -h, --help Show this screen. 19 | --version Show version. 20 | ``` 21 | 22 | ## Classes 23 | 24 | There are three classes of benchmarks: 25 | - *Serial* benchmarks run on a single mutator thread. 26 | - *Multithreaded* benchmarks may run on multiple mutator threads. 27 | - *Slow* benchmarks are long-running in comparison with the other two classes. 28 | 29 | ## Examples 30 | 31 | - Run all serial benchmarks 5 times each using 1 mutator thread and 1 GC thread: 32 | 33 | `julia --project=. run_benchmarks.jl serial all -n 5` 34 | 35 | - Run the binary tree benchmarks 10 times each with 1, 2, 4 and 8 GC threads (and 8 mutator threads): 36 | 37 | `julia --project=. run_benchmarks.jl multithreaded binary_tree -t 8 -s 8` 38 | 39 | - Run the red-black tree benchmark once using 1 mutator thread and 4 GC threads: 40 | 41 | `julia --project=. run_benchmarks.jl slow rb_tree rb_tree -n 1 --gcthreads 4` 42 | 43 | ## The benchmarks 44 | 45 | | Class | Category | Name | Description | 46 | | --- | --- | --- | --- | 47 | | Serial | TimeZones | TimeZones.jl | Creation of timezones which involve repeated short `String` allocations. | 48 | | | append | append.jl | Repeatedly growing `Vector`s. | 49 | | | bigint | pollard.jl | Tests small `BigInt`s. | 50 | | | linked | list.jl | Small pointer-heavy data structure. | 51 | | | | tree.jl | Small pointer-heavy data structure. | 52 | | | strings | strings.jl | Exercises fragmentation through repeated allocation of short multi-sized strings. | 53 | | | big_arrays | many_refs.jl | Forces a mark-phase traversal through a large array of pointers (all distinct). | 54 | | | big_arrays | single_ref.jl | Forces a mark-phase traversal through a large array of pointers (all the same). | 55 | | Multithreaded | binary_tree | tree_immutable.jl | Small pointer-heavy data structure. | 56 | | | | tree_mutable.jl | Small pointer-heavy data structure. | 57 | | | mergesort_parallel | mergesort_parallel.jl | Parallel merge-sort. | 58 | | | mm_divide_and_conquer | mm_divide_and_conquer.jl | Divide-and-conquer matrix multiply. | 59 | | | big_arrays | objarray.jl | Allocates large arrays of boxed objects, each containing a small number of references. | 60 | | | big_arrays | issue-52937.jl | Parallel allocation of arrays of immutable types. | 61 | | Slow | rb\_tree | rb\_tree.jl | Pointer graph whose minimum linear arrangement has cost Θ(n²). | 62 | | | pidigits | pidigits.jl | Tests large `BigInt`s. | 63 | -------------------------------------------------------------------------------- /benches/serial/strings/strings.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | using Random 4 | 5 | # This program generates random length strings made up of ACTG characters. 6 | # The idea is that there will be a significant number of repeated strings. 7 | # The repeated strings are counted but the strings themselves are garbage. 8 | # The result should be significant multi-sized fragmentation in the heap. 9 | 10 | mutable struct TreeNode 11 | key::String 12 | count::Int 13 | left::TreeNode 14 | right::TreeNode 15 | next::TreeNode 16 | TreeNode() = new() 17 | TreeNode(x) = new(x,1) 18 | TreeNode(x,y) = new(x,y) 19 | TreeNode(n::TreeNode) = new(n.key, n.count) 20 | end 21 | 22 | function getKey(x::TreeNode) return x.key end 23 | function getCount(x::TreeNode) return x.count end 24 | 25 | function compare(g, x::TreeNode, y::TreeNode) 26 | if (g(x) < g(y)) 27 | return -1 28 | elseif (g(x) == g(y)) 29 | return 0 30 | else 31 | return 1 32 | end 33 | end 34 | 35 | function compareCount(x::TreeNode, y::TreeNode) 36 | return compare(getCount, x, y) 37 | end 38 | 39 | function compareKey(x::TreeNode, y::TreeNode) 40 | return compare(getKey, x, y) 41 | end 42 | 43 | function duplicateKey(root::TreeNode, n::TreeNode) 44 | root.count = root.count + 1; 45 | end 46 | 47 | function duplicateCount(root::TreeNode,n::TreeNode) 48 | if !isdefined(root,:next) 49 | root.next = n 50 | else 51 | n.next = root.next 52 | root.next = n 53 | end 54 | end 55 | 56 | function insert(root::TreeNode,n::TreeNode,compare,duplicate) 57 | result = compare(root,n) 58 | if result < 0 59 | if !isdefined(root,:left) 60 | root.left = TreeNode(n) 61 | else 62 | insert(root.left, n, compare, duplicate) 63 | end 64 | elseif result > 0 65 | if !isdefined(root,:right) 66 | root.right = TreeNode(n) 67 | else 68 | insert(root.right, n, compare, duplicate) 69 | end 70 | else 71 | duplicate(root,n) 72 | end 73 | end 74 | 75 | function traverse(n::TreeNode, f) 76 | if isdefined(n,:left) 77 | traverse(n.left, f) 78 | end 79 | 80 | f(n) 81 | 82 | if isdefined(n,:right) 83 | traverse(n.right, f) 84 | end 85 | end 86 | 87 | function print(n::TreeNode) 88 | count = 1 89 | while (isdefined(n,:next)) 90 | n = n.next 91 | count = count + 1 92 | end 93 | println("There was/were ", count, " string(s) that was/were repeated ", n.count, " times") 94 | end 95 | 96 | function tree(root::TreeNode, n) 97 | for i in 1:n 98 | insert(root, TreeNode(randstring("ACTG", rand(1:32))), compareKey,duplicateKey) 99 | end 100 | end 101 | 102 | resultRoot = TreeNode("end") 103 | 104 | 105 | function insertHelper(n::TreeNode) 106 | insert(resultRoot, n, compareCount, duplicateCount) 107 | end 108 | 109 | #Build the tree sorted by count 110 | function SortTree(n::TreeNode) 111 | traverse(n::TreeNode, insertHelper) 112 | return resultRoot 113 | end 114 | 115 | function test(n) 116 | startroot = TreeNode("start") 117 | buildtree = tree(startroot, n) 118 | result = SortTree(startroot) 119 | # println("done with sort") 120 | # traverse(result, print) 121 | return 6847 122 | end 123 | 124 | @gctime test(1024 * 1024 * 8) 125 | -------------------------------------------------------------------------------- /benches/slow/rb_tree/rb_tree.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | # Simple GC benchmark for performance on pointer graphs whose minimum linear arrangement 4 | # has cost θ(n^2). tvbench() maintains a set of N points each of which has a random (x,y) 5 | # coordinate. The points are indexed by two red-black trees, one ordered by x, the other 6 | # one ordered by y. At each step we create a new point and add it to the indexes. If the 7 | # total number of points is N+1 we delete the oldest point. 8 | # 9 | # Author: Todd Veldhuizen 10 | # 11 | # Example use: 12 | # julia> include("tvgcbench.jl") 13 | # julia> tvbench(100000000,1200) 14 | # 15 | # On my macbook pro (64Gb ram, 2.3GHz 8-Core Intel i9) and Julia 1.7.3-pre.3, 16 | # running the above example with some gc tracing enabled I see gc pauses of 24 seconds 17 | # while julia process memory usage is only 5Gb. 18 | # 19 | # Quoted below are some tracing output from julia src/gc.c. 20 | # The fields for the #@GC@# lines are: 21 | # #@GC@# jl_gc_pass_count, jl_mark_counter, jl_marked_already_counter, pause, t0, d1, mark_time, d3, d4, d5, sweep_time, d7, sweep_full 22 | # 23 | # #@GC@# 4,59679,312306,9493554,6673304950506491,21985,5996094,1442,474,1186,3466187,6630,0 24 | # #@GC_PAUSE_SECONDS@# 0.009494 25 | # #@GC@# 5,1262302,1707391,155991578,6673305957633691,26669,146731925,2207,404,1357,9224256,5281,0 26 | # #@GC_PAUSE_SECONDS@# 0.155992 27 | # #@GC@# 6,2601172,3271231,309064862,6673307232597647,14024,294877138,1636,2092,498,14167736,2297,0 28 | # #@GC_PAUSE_SECONDS@# 0.309065 29 | # #@GC@# 7,3963665,4837077,484216108,6673308816193910,5398,456634339,1967,137,26211,27546197,2528,1 30 | # #@GC_PAUSE_SECONDS@# 0.484216 31 | # #@GC@# 8,8451979,11612046,948559090,6673312801368228,2831,912492807,4107,836,82544,35974458,2023,1 32 | # #@GC_PAUSE_SECONDS@# 0.948559 33 | # #@GC@# 9,16979033,21842180,1954845268,6673323797397558,7148,1892234116,3718,750,19069,62580629,2001,1 34 | # #@GC_PAUSE_SECONDS@# 1.954845 35 | # #@GC@# 10,38296771,47417640,4937244893,6673355028202042,3998,4802409026,4815,886,83290,134740941,2644,1 36 | # #@GC_PAUSE_SECONDS@# 4.937245 37 | # #@GC@# 11,91590987,111356129,13998846831,6673446463639536,4052,13686747979,3716,1155,49822,312038121,2555,1 38 | # #@GC_PAUSE_SECONDS@# 13.998847 39 | # #@GC@# 12,144885203,175294623,24036204691,6673554514445920,3224,23551304164,4770,949,57853,484831470,2825,1 40 | # #@GC_PAUSE_SECONDS@# 24.036205 41 | # 42 | # In GC pass 12 (which lasts 24 seconds) there are (144885203+175294623) calls to 43 | # gc_try_setmark() and the mark phase takes 23551304164ns, so about 73ns (approx 170 44 | # clock cycles) per mark attempt. My suspicion is that the poor gc performance on this 45 | # benchmark is caused by the mark phase doing inefficient random memory accesses with 46 | # no prefetching, causing cache and TLB misses. On the STREAMS benchmark my laptop does 47 | # about 18GB/s, so in the length of that 24 second gc pause it could linearly scan the 48 | # entire julia process memory 85 times. 49 | 50 | using DataStructures 51 | using Random 52 | import Base: isless 53 | 54 | mutable struct Point 55 | x::Int 56 | y::Int 57 | end 58 | 59 | struct PointByX 60 | p::Point 61 | end 62 | Base.isless(a::PointByX, b::PointByX) = isless(a.p.x, b.p.x) 63 | 64 | struct PointByY 65 | p::Point 66 | end 67 | Base.isless(a::PointByY, b::PointByY) = isless(a.p.y, b.p.y) 68 | 69 | function tvbench(; N = 50_000_000) 70 | t0 = time() 71 | queue = Queue{Point}() 72 | xtree = RBTree{PointByX}() 73 | ytree = RBTree{PointByY}() 74 | count = 0 75 | tcheck = 0 76 | i = 0 77 | while true 78 | count = count + 1 79 | p = Point(Random.rand(Int), Random.rand(Int)) 80 | enqueue!(queue, p) 81 | push!(xtree, PointByX(p)) 82 | push!(ytree, PointByY(p)) 83 | 84 | if length(queue) > N 85 | p = dequeue!(queue) 86 | delete!(xtree, PointByX(p)) 87 | delete!(ytree, PointByY(p)) 88 | end 89 | 90 | i = i + 1 91 | if i == 100 92 | i = 0 93 | @assert length(xtree) <= N 94 | elapsed = time() - t0 95 | tcheck2 = floor(elapsed/10) 96 | if tcheck != tcheck2 97 | tcheck = tcheck2 98 | println("elapsed=$(elapsed)s, $(length(queue)) current points, $(count) total, $(floor(count/elapsed)) per second") 99 | end 100 | if count >= 2 * N 101 | break 102 | end 103 | end 104 | #= 105 | nm, nr = fldmod(count, 1_000_000) 106 | if nr == 0 107 | @show nm 108 | @timev GC.gc() 109 | end 110 | elapsed = time() - t0 111 | if (elapsed >= min_seconds) && ((count >= N) || (elapsed >= max_seconds)) 112 | break 113 | end 114 | =# 115 | end 116 | end 117 | 118 | @gctime tvbench() 119 | -------------------------------------------------------------------------------- /benches/fragmentation/synthetic/exploit_free_list.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | mutable struct ListNode{N} 4 | next::Union{ListNode{N}, Nothing} 5 | data::NTuple{N, UInt8} 6 | end 7 | 8 | @static if Sys.WORD_SIZE != 64 9 | error("This script requires a 64-bit version of Julia") 10 | end 11 | const NEXT_PTR_BYTES = 8 12 | const JL_HEADER_BYTES = 8 13 | 14 | @inline function create_node(::Val{N}) where N 15 | # N is now compile-time known 16 | ListNode{N}(nothing, ntuple(_ -> UInt8(0), Val(N))) 17 | end 18 | 19 | # These constants are specific to Julia's pool allocator. If Julia changes the implementation, these value may need to be updated. 20 | const JL_GC_SIZECLASS = [8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304, 336, 368, 400, 448, 496, 544, 576, 624, 672, 736, 816, 896, 1008, 1088, 1168, 1248, 1360, 1488, 1632, 1808, 2032] 21 | const N_SIZE_CLASSES = length(JL_GC_SIZECLASS) 22 | const PAGE_SIZE = 16384 23 | 24 | # These constants are configurable 25 | # How many pages to allocate for each size class 26 | const PAGES_TO_ALLOCATE = 10240 27 | # The keep alive pattern for the allocated pool objects 28 | # 1 = keep one object alive per page 29 | # 2 = keep every other objects alive 30 | const KEEP_ALIVE_PATTERN = 2 31 | 32 | const KEPT_LISTS = [] 33 | 34 | function process_size_class(sz_class_index::Int, sz::Int) 35 | payload_sz = sz - JL_HEADER_BYTES 36 | tuple_len = payload_sz - NEXT_PTR_BYTES 37 | T = Val(tuple_len) 38 | if tuple_len <= 0 39 | return 40 | end 41 | objs_per_page = div(PAGE_SIZE, sz) 42 | total_objs = Int64(PAGES_TO_ALLOCATE * objs_per_page) 43 | 44 | actual_sz = sizeof(ListNode{tuple_len}) 45 | actual_sz + JL_HEADER_BYTES == sz || error("Actual size does not match expected size") 46 | 47 | println("Processing size class $sz") 48 | println(" Actual payload size: $actual_sz bytes, N: $tuple_len") 49 | println(" Allocating $total_objs objects") 50 | 51 | print_page_utilization("Before", sz_class_index) 52 | 53 | # Create initial linked list 54 | head = create_list(total_objs, T) 55 | n_nodes(head) == total_objs || error("Actual number of objects does not match total_objs") 56 | 57 | print_page_utilization("Alloc", sz_class_index) 58 | 59 | # Fragment the linked list 60 | new_head = fragment_list(head, objs_per_page) 61 | # Preserve remaining objects 62 | global KEPT_LISTS 63 | push!(KEPT_LISTS, new_head) 64 | 65 | print_page_utilization("Fragment", sz_class_index) 66 | end 67 | 68 | function create_list(total_objs::Int64, T::Val) 69 | # We need to be careful that the loop does not introduce allocations 70 | head = current = create_node(T) 71 | i = 2 72 | while true 73 | current.next = create_node(T) 74 | current = current.next 75 | i += 1 76 | i > total_objs && break 77 | end 78 | return head 79 | end 80 | 81 | function fragment_list(head::ListNode, objs_per_page::Int) 82 | new_head = head 83 | last = head 84 | current = head 85 | counter = 1 86 | while !isnothing(current) && !isnothing(current.next) 87 | next = current.next 88 | # If pattern == 1, keep one object alive per page 89 | # If pattern == 2, keep every other object alive 90 | if (KEEP_ALIVE_PATTERN == 1 && mod(counter, objs_per_page) == 0) || (KEEP_ALIVE_PATTERN == 2 && mod(counter, 2) == 0) 91 | last.next = current 92 | last = current 93 | end 94 | current = next 95 | counter += 1 96 | end 97 | return new_head 98 | end 99 | 100 | function n_nodes(node::ListNode) 101 | count = 0 102 | current = node 103 | while !isnothing(current) 104 | count += 1 105 | current = current.next 106 | end 107 | return count 108 | end 109 | 110 | function print_page_utilization(msg::String, sz_class_index::Int) 111 | GC.gc(true) 112 | utils = Base.gc_page_utilization_data() 113 | 114 | for (i, sz) in enumerate(JL_GC_SIZECLASS) 115 | if sz_class_index == -1 || i == sz_class_index 116 | println(" $(lpad(msg, 10)): Pool $i: $(round(utils[i]*100, digits=2))%") 117 | end 118 | end 119 | end 120 | 121 | function main() 122 | for (i,sz) in enumerate(JL_GC_SIZECLASS) 123 | # Julia aligns up object size to 16 bytes. We only allocate if the size class is 16 bytes aligned. 124 | if mod(sz, 16) != 0 125 | continue 126 | end 127 | process_size_class(i, sz) 128 | end 129 | 130 | println("\nFragmentation complete.") 131 | print_page_utilization("Final", -1) 132 | 133 | sum = 0 134 | println("Kept lists: $(length(KEPT_LISTS))") 135 | for l in KEPT_LISTS 136 | println(" List length: $(n_nodes(l))") 137 | sum += n_nodes(l) 138 | end 139 | println("Total live objects: $sum") 140 | end 141 | 142 | @gctime main() 143 | -------------------------------------------------------------------------------- /run_benchmarks.jl: -------------------------------------------------------------------------------- 1 | const doc = """run_benchmarks.jl -- GC benchmarks test harness 2 | Usage: 3 | run_benchmarks.jl (serial|multithreaded|compiler|fragmentation|slow) (all| []) [options] 4 | run_benchmarks.jl -h | --help 5 | run_benchmarks.jl --version 6 | Options: 7 | -n , --runs= Number of runs for each benchmark [default: 10]. 8 | -t , --threads= Number of threads to use [default: 1]. 9 | -g , --gcthreads= Number of GC threads to use [default: 0]. 10 | -s , --scale= Maximum number of gcthreads for scaling test. 11 | -h, --help Show this screen. 12 | --version Show version. 13 | --json Serializes output to `json` file 14 | """ 15 | 16 | using DocOpt 17 | using JSON 18 | using PrettyTables 19 | using Printf 20 | using Serialization 21 | using Statistics 22 | using TypedTables 23 | using CSV 24 | 25 | const args = docopt(doc, version = v"0.1.1") 26 | const JULIAVER = Base.julia_cmd()[1] 27 | 28 | # times in ns 29 | # TODO: get better stats 30 | function get_stats(times::Vector) 31 | return [minimum(times), median(times), maximum(times), std(times)] 32 | end 33 | 34 | """ 35 | Highlights cells in a column based on value 36 | green if less than lo 37 | yellow if between lo and hi 38 | red if above hi 39 | """ 40 | function highlight_col(col, lo, hi) 41 | [Highlighter((data,i,j) -> (j == col) && data[i, j] <= lo; foreground=:green), 42 | Highlighter((data,i,j) -> (j == col) && lo < data[i, j] < hi; foreground=:yellow), 43 | Highlighter((data,i,j) -> (j == col) && hi <= data[i, j]; foreground=:red),] 44 | end 45 | 46 | function diff(gc_end, gc_start, p) 47 | v0 = getproperty(gc_start, p) 48 | v1 = getproperty(gc_end, p) 49 | v1-v0 50 | end 51 | 52 | function extract(gc_end, gc_start, p) 53 | map((gc_end, gc_start)->diff(gc_end, gc_start, p), gc_end, gc_start) 54 | end 55 | 56 | function run_bench(runs, threads, gcthreads, file, show_json = false) 57 | value = [] 58 | times = [] 59 | gc_diff = [] 60 | gc_end = [] 61 | gc_start = [] 62 | for _ in 1:runs 63 | # uglyness to communicate over non stdout (specifically file descriptor 3) 64 | p = Base.PipeEndpoint() 65 | _gcthreads = gcthreads == 0 ? `` : `--gcthreads=$gcthreads` 66 | cmd = `$JULIAVER --project=. --threads=$threads $_gcthreads $file SERIALIZE` 67 | cmd = run(Base.CmdRedirect(cmd, p, 3), stdin, stdout, stderr, wait=false) 68 | r = deserialize(p) 69 | @assert success(cmd) 70 | # end uglyness 71 | push!(value, r.value) 72 | push!(times, r.times) 73 | push!(gc_diff, r.gc_diff) 74 | push!(gc_end, r.gc_end) 75 | push!(gc_start, r.gc_start) 76 | end 77 | gc_times = extract(gc_end, gc_start, :total_time) 78 | mark_times = extract(gc_end, gc_start, :total_mark_time) 79 | sweep_times = extract(gc_end, gc_start, :total_sweep_time) 80 | times_to_safepoint = extract(gc_end, gc_start, :total_time_to_safepoint) 81 | ncollect = extract(gc_end, gc_start, :collect) 82 | nfull_sweep = extract(gc_end, gc_start, :full_sweep) 83 | 84 | data = Table( 85 | time = times, 86 | gc_time = gc_times, 87 | mark_time = mark_times, 88 | sweep_time = sweep_times, 89 | time_to_safepoint = times_to_safepoint, 90 | ncollections = ncollect, 91 | nfull_sweeps = nfull_sweep, 92 | file = [file for _ in 1:runs], 93 | threads = [threads for _ in 1:runs], 94 | gcthreads = [gcthreads for _ in 1:runs], 95 | version = [string(Base.VERSION) for _ in 1:runs], 96 | ) 97 | results = joinpath(@__DIR__, "results.csv") 98 | CSV.write(results, data; append=isfile(results)) 99 | 100 | total_stats = get_stats(times) ./ 1_000_000 101 | gc_time = get_stats(gc_times) ./ 1_000_000 102 | mark_time = get_stats(mark_times) ./ 1_000_000 103 | sweep_time = get_stats(sweep_times) ./ 1_000_000 104 | time_to_safepoint = get_stats(times_to_safepoint) ./ 1_000 105 | 106 | max_pause = get_stats(map(stat->stat.max_pause, gc_end)) ./ 1_000_000 107 | max_mem = get_stats(map(stat->stat.max_memory, gc_end)) ./ 1024^2 108 | pct_gc = get_stats(map((t,stat)->(stat.total_time/t), times, gc_diff)) .* 100 109 | 110 | header = (["", "total time", "gc time", "mark time", "sweep time", "max GC pause", "time to safepoint", "max heap", "percent gc"], 111 | ["", "ms", "ms", "ms", "ms", "ms", "us", "MB", "%" ]) 112 | labels = ["minimum", "median", "maximum", "stdev"] 113 | highlighters = highlight_col(6, 10, 100) # max pause 114 | append!(highlighters, highlight_col(7, 1, 10)) # time to safepoint 115 | append!(highlighters, highlight_col(9, 10, 50)) # pct gc 116 | highlighters = Tuple(highlighters) 117 | if show_json 118 | data = Dict([("total time", total_stats), 119 | ("gc time", gc_time), 120 | ("mark time", mark_time), 121 | ("sweep time", sweep_time), 122 | ("max pause", max_pause), 123 | ("ttsp", time_to_safepoint), 124 | ("max memory", max_mem), 125 | ("pct gc", pct_gc)]) 126 | JSON.print(data) 127 | else 128 | data = hcat(labels, total_stats, gc_time, mark_time, sweep_time, max_pause, time_to_safepoint, max_mem, pct_gc) 129 | pretty_table(data; header, formatters=ft_printf("%0.0f"), highlighters) 130 | end 131 | end 132 | 133 | function run_category_files(benches, args, show_json = false) 134 | local runs = parse(Int, args["--runs"]) 135 | local threads = parse(Int, args["--threads"]) 136 | local gcthreads = parse(Int, args["--gcthreads"]) 137 | local max = if isnothing(args["--scale"]) 0 else parse(Int, args["--scale"]) end 138 | for bench in benches 139 | if !show_json 140 | @show bench 141 | end 142 | if isnothing(args["--scale"]) 143 | run_bench(runs, threads, gcthreads, bench, show_json) 144 | else 145 | local n = 0 146 | while true 147 | gcthreads = 2^n 148 | gcthreads > max && break 149 | @show (gcthreads, threads) 150 | run_bench(runs, threads, gcthreads, bench, show_json) 151 | n += 1 152 | end 153 | end 154 | end 155 | end 156 | 157 | function run_all_categories(args, show_json = false) 158 | for category in readdir() 159 | @show category 160 | cd(category) 161 | benches = filter(f -> endswith(f, ".jl"), readdir()) 162 | run_category_files(benches, args, show_json) 163 | cd("..") 164 | end 165 | end 166 | 167 | function main(args) 168 | rm("results.csv", force=true) 169 | cd(joinpath(@__DIR__, "benches")) 170 | 171 | # validate choices 172 | if !isnothing(args["--scale"]) 173 | @assert args["--gcthreads"] == "0" "Specify either --scale or --threads." 174 | end 175 | 176 | # select benchmark class 177 | if args["serial"] 178 | cd("serial") 179 | elseif args["multithreaded"] 180 | cd("multithreaded") 181 | elseif args["compiler"] 182 | cd("compiler") 183 | elseif args["fragmentation"] 184 | cd("fragmentation") 185 | else # slow 186 | cd("slow") 187 | end 188 | 189 | show_json = args["--json"] 190 | 191 | if args["all"] 192 | run_all_categories(args, show_json) 193 | else 194 | cd(args[""]) 195 | benches = if isnothing(args[""]) 196 | filter(f -> endswith(f, ".jl"), readdir()) 197 | else 198 | ["$(args[""]).jl"] 199 | end 200 | run_category_files(benches, args, show_json) 201 | end 202 | end 203 | 204 | main(args) 205 | -------------------------------------------------------------------------------- /benches/compiler/inference/inference_benchmarks.jl: -------------------------------------------------------------------------------- 1 | include(joinpath("..", "..", "..", "util", "utils.jl")) 2 | 3 | # InferenceBenchmarks taken from BaseBenchmarks.jl (https://github.com/JuliaCI/BaseBenchmarks.jl) 4 | 5 | module InferenceBenchmarks 6 | 7 | # InferenceBenchmarker 8 | # ==================== 9 | # this new `AbstractInterpreter` satisfies the minimum interface requirements and manages 10 | # its cache independently in a way it is totally separated from the native code cache 11 | # managed by the runtime system: this allows us to profile Julia-level inference reliably 12 | # without being influenced by previous trials or some native execution 13 | 14 | @static if VERSION ≥ v"1.12.0-DEV.1581" 15 | if Base.REFLECTION_COMPILER[] === nothing 16 | const CC = Base.Compiler 17 | else 18 | const CC = Base.REFLECTION_COMPILER[] 19 | end 20 | else 21 | const CC = Core.Compiler 22 | end 23 | 24 | using Core: 25 | MethodInstance, CodeInstance, MethodTable, SimpleVector 26 | using .CC: 27 | AbstractInterpreter, InferenceParams, InferenceResult, InferenceState, 28 | OptimizationParams, OptimizationState, WorldRange, WorldView, 29 | specialize_method, unwrap_unionall, rewrap_unionall, copy 30 | @static if VERSION ≥ v"1.11.0-DEV.1498" 31 | import .CC: get_inference_world 32 | else 33 | import .CC: get_world_counter as get_inference_world 34 | end 35 | using Base: get_world_counter 36 | using InteractiveUtils: gen_call_with_extracted_types_and_kwargs 37 | using BenchmarkTools: @benchmarkable, BenchmarkGroup, addgroup! 38 | 39 | struct InferenceBenchmarkerCache 40 | dict::IdDict{MethodInstance,CodeInstance} 41 | InferenceBenchmarkerCache() = new(IdDict{MethodInstance,CodeInstance}()) 42 | end 43 | struct InferenceBenchmarker <: AbstractInterpreter 44 | world::UInt 45 | inf_params::InferenceParams 46 | opt_params::OptimizationParams 47 | optimize::Bool 48 | compress::Bool 49 | discard_trees::Bool 50 | inf_cache::Vector{InferenceResult} 51 | code_cache::InferenceBenchmarkerCache 52 | function InferenceBenchmarker( 53 | world::UInt = get_world_counter(); 54 | inf_params::InferenceParams = InferenceParams(), 55 | opt_params::OptimizationParams = OptimizationParams(), 56 | optimize::Bool = true, 57 | compress::Bool = true, 58 | discard_trees::Bool = true, 59 | inf_cache::Vector{InferenceResult} = InferenceResult[], 60 | code_cache::InferenceBenchmarkerCache = InferenceBenchmarkerCache()) 61 | return new( 62 | world, 63 | inf_params, 64 | opt_params, 65 | optimize, 66 | compress, 67 | discard_trees, 68 | inf_cache, 69 | code_cache) 70 | end 71 | end 72 | 73 | CC.may_optimize(interp::InferenceBenchmarker) = interp.optimize 74 | CC.may_compress(interp::InferenceBenchmarker) = interp.compress 75 | CC.may_discard_trees(interp::InferenceBenchmarker) = interp.discard_trees 76 | CC.InferenceParams(interp::InferenceBenchmarker) = interp.inf_params 77 | CC.OptimizationParams(interp::InferenceBenchmarker) = interp.opt_params 78 | #=CC.=#get_inference_world(interp::InferenceBenchmarker) = interp.world 79 | CC.get_inference_cache(interp::InferenceBenchmarker) = interp.inf_cache 80 | CC.code_cache(interp::InferenceBenchmarker) = WorldView(interp.code_cache, WorldRange(get_inference_world(interp))) 81 | CC.get(wvc::WorldView{InferenceBenchmarkerCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default) 82 | CC.getindex(wvc::WorldView{InferenceBenchmarkerCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi) 83 | CC.haskey(wvc::WorldView{InferenceBenchmarkerCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi) 84 | CC.setindex!(wvc::WorldView{InferenceBenchmarkerCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi) 85 | @static if isdefined(CC, :cache_owner) 86 | CC.cache_owner(wvc::InferenceBenchmarker) = wvc.code_cache 87 | end 88 | 89 | function inf_gf_by_type!(interp::InferenceBenchmarker, @nospecialize(tt::Type{<:Tuple}); kwargs...) 90 | match = Base._which(tt; world=get_inference_world(interp)) 91 | return inf_method_signature!(interp, match.method, match.spec_types, match.sparams; kwargs...) 92 | end 93 | 94 | inf_method!(interp::InferenceBenchmarker, m::Method; kwargs...) = 95 | inf_method_signature!(interp, m, m.sig, method_sparams(m); kwargs...) 96 | function method_sparams(m::Method) 97 | s = TypeVar[] 98 | sig = m.sig 99 | while isa(sig, UnionAll) 100 | push!(s, sig.var) 101 | sig = sig.body 102 | end 103 | return svec(s...) 104 | end 105 | inf_method_signature!(interp::InferenceBenchmarker, m::Method, @nospecialize(atype), sparams::SimpleVector; kwargs...) = 106 | inf_method_instance!(interp, specialize_method(m, atype, sparams)::MethodInstance; kwargs...) 107 | 108 | function inf_method_instance!(interp::InferenceBenchmarker, mi::MethodInstance; 109 | run_optimizer::Bool = true) 110 | result = InferenceResult(mi) 111 | frame = InferenceState(result, #=cache_mode=#run_optimizer ? :global : :no, interp)::InferenceState 112 | CC.typeinf(interp, frame) 113 | return frame 114 | end 115 | 116 | macro inf_call(ex0...) 117 | return gen_call_with_extracted_types_and_kwargs(__module__, :inf_call, ex0) 118 | end 119 | function inf_call(@nospecialize(f), @nospecialize(types = Base.default_tt(f)); 120 | interp::InferenceBenchmarker = InferenceBenchmarker(), 121 | run_optimizer::Bool = true) 122 | ft = Core.Typeof(f) 123 | if isa(types, Type) 124 | u = unwrap_unionall(types) 125 | tt = rewrap_unionall(Tuple{ft, u.parameters...}, types) 126 | else 127 | tt = Tuple{ft, types...} 128 | end 129 | frame = inf_gf_by_type!(interp, tt; run_optimizer) 130 | frame.bestguess !== Union{} || error("invalid inference benchmark found") 131 | return frame 132 | end 133 | 134 | macro abs_call(ex0...) 135 | return gen_call_with_extracted_types_and_kwargs(__module__, :abs_call, ex0) 136 | end 137 | function abs_call(@nospecialize(f), @nospecialize(types = Base.default_tt(f)); 138 | interp::InferenceBenchmarker = InferenceBenchmarker(; optimize = false)) 139 | return inf_call(f, types; interp) 140 | end 141 | 142 | macro opt_call(ex0...) 143 | return gen_call_with_extracted_types_and_kwargs(__module__, :opt_call, ex0) 144 | end 145 | function opt_call(@nospecialize(f), @nospecialize(types = Base.default_tt(f)); 146 | interp::InferenceBenchmarker = InferenceBenchmarker()) 147 | frame = inf_call(f, types; interp, run_optimizer = false) 148 | evals = 0 149 | return function () 150 | @assert (evals += 1) <= 1 151 | # # `optimize` may modify these objects, so need to stash the pre-optimization states, if we want to allow multiple evals 152 | # src, stmt_info, slottypes, ssavalue_uses = copy(frame.src), copy(frame.stmt_info), copy(frame.slottypes), copy(frame.ssavalue_uses) 153 | # cfg = copy(frame.cfg) 154 | # unreachable = @static hasfield(InferenceState, :unreachable) ? copy(frame.unreachable) : nothing 155 | # bb_vartables = @static hasfield(InferenceState, :bb_vartables) ? copy(frame.bb_vartables) : nothing 156 | opt = OptimizationState(frame, interp) 157 | CC.optimize(interp, opt, frame.result) 158 | # frame.src, frame.stmt_info, frame.slottypes, frame.ssavalue_uses = src, stmt_info, slottypes, ssavalue_uses 159 | # cfg === nothing || (frame.cfg = cfg) 160 | # unreachable === nothing || (frame.unreachable = unreachable) 161 | # bb_vartables === nothing || (frame.bb_vartables = bb_vartables) 162 | end 163 | end 164 | 165 | function tune_benchmarks!( 166 | g::BenchmarkGroup; 167 | seconds=30, 168 | gcsample=true, 169 | ) 170 | for v in values(g) 171 | v.params.seconds = seconds 172 | v.params.gcsample = gcsample 173 | v.params.evals = 1 # `setup` must be functional 174 | end 175 | end 176 | 177 | # "inference" benchmark targets 178 | # ============================= 179 | 180 | # TODO add TTFP? 181 | # XXX some targets below really depends on the compiler implementation itself 182 | # (e.g. `abstract_call_gf_by_type`) and thus a bit more unreliable -- ideally 183 | # we want to replace them with other functions that have the similar characteristics 184 | # but whose call graph are orthogonal to the Julia's compiler implementation 185 | 186 | using REPL.REPLCompletions: completions 187 | broadcasting(xs, x) = findall(>(x), abs.(xs)) 188 | let # check the compilation behavior for a function with lots of local variables 189 | # (where the sparse state management is critical to get a reasonable performance) 190 | # see https://github.com/JuliaLang/julia/pull/45276 191 | n = 10000 192 | ex = Expr(:block) 193 | var = gensym() 194 | push!(ex.args, :($var = x)) 195 | for _ = 1:n 196 | newvar = gensym() 197 | push!(ex.args, :($newvar = $var + 1)) 198 | var = newvar 199 | end 200 | @eval global function many_local_vars(x) 201 | $ex 202 | end 203 | end 204 | let # benchmark the performance benefit of `CachedMethodTable` 205 | # see https://github.com/JuliaLang/julia/pull/46535 206 | n = 100 207 | ex = Expr(:block) 208 | var = gensym() 209 | push!(ex.args, :(y = sum(x))) 210 | for i = 1:n 211 | push!(ex.args, :(x .= $(Float64(i)))) 212 | push!(ex.args, :(y += sum(x))) 213 | end 214 | push!(ex.args, :(return y)) 215 | @eval global function many_method_matches(x) 216 | $ex 217 | end 218 | end 219 | let # check the performance benefit of concrete evaluation 220 | param = 1000 221 | ex = Expr(:block) 222 | var = gensym() 223 | push!(ex.args, :($var = x)) 224 | for _ = 1:param 225 | newvar = gensym() 226 | push!(ex.args, :($newvar = sin($var))) 227 | var = newvar 228 | end 229 | @eval let 230 | sins(x) = $ex 231 | global many_const_calls() = sins(42) 232 | end 233 | end 234 | # check the performance benefit of caching `GlobalRef`-lookup result 235 | # see https://github.com/JuliaLang/julia/pull/46729 236 | using Core.Intrinsics: add_int 237 | const ONE = 1 238 | @eval function many_global_refs(x) 239 | z = 0 240 | $([:(z = add_int(x, add_int(z, ONE))) for _ = 1:10000]...) 241 | return add_int(z, ONE) 242 | end 243 | strangesum(::Vector{Float64}) = error("this should not be called") 244 | strangesum(x::AbstractArray) = sum(x) 245 | let # check performance of invoke call handling 246 | n = 100 247 | ex = Expr(:block) 248 | var = gensym() 249 | push!(ex.args, :(y = sum(x))) 250 | for i = 1:n 251 | push!(ex.args, :(y += Base.@invoke strangesum(x::AbstractArray))) 252 | end 253 | push!(ex.args, :(return y)) 254 | @eval global function many_invoke_calls(x) 255 | $ex 256 | end 257 | end 258 | import Base.Experimental: @opaque 259 | let # check performance of opaque closure handling 260 | n = 100 261 | ex = Expr(:block) 262 | var = gensym() 263 | push!(ex.args, :(y = sum(x))) 264 | for i = 1:n 265 | push!(ex.args, :(oc = @inline @opaque (i, x, y) -> begin 266 | x .= Float64(i) 267 | y += sum(x) 268 | end)) 269 | push!(ex.args, :(oc($i, x, y))) 270 | end 271 | push!(ex.args, :(return y)) 272 | @eval global function many_opaque_closures(x) 273 | $ex 274 | end 275 | end 276 | 277 | 278 | function run_all_benchmarks() 279 | # abstract interpretation 280 | @abs_call sin(42) 281 | @abs_call rand(Float64) 282 | abs_call(println, (QuoteNode,)) 283 | abs_call(broadcasting, (Vector{Float64},Float64)) 284 | abs_call(completions, (String,Int)) 285 | abs_call(Base.init_stdio, (Ptr{Cvoid},)) 286 | abs_call(many_local_vars, (Int,)) 287 | abs_call(many_method_matches, (Vector{Float64},)) 288 | abs_call(many_const_calls) 289 | abs_call(many_global_refs, (Int,)) 290 | abs_call(many_invoke_calls, (Vector{Float64},)) 291 | abs_call(many_opaque_closures, (Vector{Float64},)) 292 | # optimization 293 | @opt_call sin(42) 294 | @opt_call rand(Float64) 295 | opt_call(println, (QuoteNode,)) 296 | opt_call(broadcasting, (Vector{Float64},Float64)) 297 | opt_call(completions, (String,Int)) 298 | opt_call(Base.init_stdio, (Ptr{Cvoid},)) 299 | opt_call(many_local_vars, (Int,)) 300 | opt_call(many_method_matches, (Vector{Float64},)) 301 | opt_call(many_const_calls) 302 | opt_call(many_global_refs, (Int,)) 303 | opt_call(many_invoke_calls, (Vector{Float64},)) 304 | opt_call(many_opaque_closures, (Vector{Float64},)) 305 | # all inference 306 | @inf_call sin(42) 307 | @inf_call rand(Float64) 308 | inf_call(println, (QuoteNode,)) 309 | inf_call(broadcasting, (Vector{Float64},Float64)) 310 | inf_call(completions, (String,Int)) 311 | inf_call(Base.init_stdio, (Ptr{Cvoid},)) 312 | inf_call(many_local_vars, (Int,)) 313 | inf_call(many_method_matches, (Vector{Float64},)) 314 | inf_call(many_const_calls) 315 | inf_call(many_global_refs, (Int,)) 316 | inf_call(many_invoke_calls, (Vector{Float64},)) 317 | inf_call(many_opaque_closures, (Vector{Float64},)) 318 | return nothing 319 | end 320 | 321 | end # module InferenceBenchmarks 322 | 323 | using .InferenceBenchmarks 324 | 325 | @gctime InferenceBenchmarks.run_all_benchmarks() 326 | --------------------------------------------------------------------------------