├── benches
    ├── slow
    │   ├── bigint
    │   │   ├── Project.toml
    │   │   └── pidigits.jl
    │   └── rb_tree
    │   │   ├── Project.toml
    │   │   └── rb_tree.jl
    ├── serial
    │   ├── append
    │   │   ├── Project.toml
    │   │   └── append.jl
    │   ├── bigint
    │   │   ├── Project.toml
    │   │   └── pollard.jl
    │   ├── linked
    │   │   ├── Project.toml
    │   │   ├── list.jl
    │   │   └── tree.jl
    │   ├── strings
    │   │   ├── Project.toml
    │   │   └── strings.jl
    │   ├── big_arrays
    │   │   ├── Project.toml
    │   │   ├── single_ref.jl
    │   │   └── many_refs.jl
    │   └── TimeZones
    │   │   ├── Project.toml
    │   │   └── TimeZones.jl
    ├── fragmentation
    │   └── synthetic
    │   │   ├── Project.toml
    │   │   └── exploit_free_list.jl
    ├── multithreaded
    │   ├── binary_tree
    │   │   ├── Project.toml
    │   │   ├── tree_immutable.jl
    │   │   └── tree_mutable.jl
    │   ├── mergesort_parallel
    │   │   ├── Project.toml
    │   │   └── mergesort_parallel.jl
    │   ├── mm_divide_and_conquer
    │   │   ├── Project.toml
    │   │   └── mm_divide_and_conquer.jl
    │   └── big_arrays
    │   │   ├── Project.toml
    │   │   ├── objarray.jl
    │   │   └── issue-52937.jl
    └── compiler
    │   └── inference
    │       ├── Project.toml
    │       └── inference_benchmarks.jl
├── .gitignore
├── vis
    ├── Project.toml
    └── plot.jl
├── Project.toml
├── LICENSE
├── .buildkite
    └── pipeline.yml
├── util
    ├── compare_bins.jl
    ├── find-all-min-heaps.jl
    └── utils.jl
├── README.md
└── run_benchmarks.jl


/benches/slow/bigint/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/serial/append/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/serial/bigint/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/serial/linked/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/serial/strings/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/serial/big_arrays/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/fragmentation/synthetic/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/multithreaded/binary_tree/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/multithreaded/mergesort_parallel/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/multithreaded/mm_divide_and_conquer/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | 


--------------------------------------------------------------------------------
/benches/serial/TimeZones/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
3 | 


--------------------------------------------------------------------------------
/benches/multithreaded/big_arrays/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
3 | 


--------------------------------------------------------------------------------
/benches/slow/rb_tree/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
3 | 


--------------------------------------------------------------------------------
/benches/compiler/inference/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Temporary
 2 | *.DS_Store
 3 | *.swp
 4 | *.jl.cov
 5 | *.jl.*.cov
 6 | *.jl.mem
 7 | *~
 8 | TAGS
 9 | *Manifest.toml
10 | results.csv
11 | .vscode
12 | 


--------------------------------------------------------------------------------
/vis/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
3 | CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
4 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
5 | TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
6 | 


--------------------------------------------------------------------------------
/benches/serial/TimeZones/TimeZones.jl:
--------------------------------------------------------------------------------
1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
2 | 
3 | using TimeZones
4 | 
5 | zdts = [now(tz"UTC") for _ in  1:100_000_000];
6 | 
7 | @gctime sum(hash, ["trashfire"^min(1000, i) for i in 1:500_000])
8 | 
9 | 


--------------------------------------------------------------------------------
/benches/serial/append/append.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | # simulates allocation profile of some dataframes benchmarks
 4 | # by repeatedly append to a vector
 5 | function append_lots(iters=100*1024, size=1596)
 6 |     v = Float64[]
 7 |     for i = 1:iters
 8 |         append!(v,rand(size))
 9 |     end
10 |     return v
11 | end
12 | 
13 | @gctime append_lots()[end]
14 | 


--------------------------------------------------------------------------------
/Project.toml:
--------------------------------------------------------------------------------
 1 | [deps]
 2 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 3 | DocOpt = "968ba79b-81e4-546f-ab3a-2eecfa62a9db"
 4 | Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
 5 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 6 | PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 7 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 8 | Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 9 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
10 | TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
11 | 


--------------------------------------------------------------------------------
/benches/serial/linked/list.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | mutable struct ListNode
 4 |   key::Int64
 5 |   next::ListNode
 6 |   ListNode() = new()
 7 |   ListNode(x)= new(x)
 8 |   ListNode(x,y) = new(x,y);
 9 | end
10 | 
11 | function list(n=128)
12 |     start::ListNode = ListNode(1)
13 |     current::ListNode = start
14 |     for i = 2:(n*1024^2)
15 |         current = ListNode(i,current)
16 |     end
17 |     return current.key
18 | end
19 | 
20 | @gctime list()
21 | 
22 | 


--------------------------------------------------------------------------------
/benches/multithreaded/big_arrays/objarray.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | using Random: seed!
 4 | seed!(1)
 5 | 
 6 | abstract type Cell end
 7 | 
 8 | struct CellA<:Cell
 9 |     a::Ref{Int}
10 | end
11 | 
12 | struct CellB<:Cell
13 |     b::String
14 | end
15 | 
16 | function fillcells!(mc::Array{Cell})
17 |     for ind in eachindex(mc)
18 |         mc[ind] = ifelse(rand() > 0.5, CellA(ind), CellB(string(ind)))
19 |     end
20 |     return mc
21 | end
22 | 
23 | function work(size)
24 |     mcells = Array{Cell}(undef, size, size)
25 |     mc = fillcells!(mcells)
26 | end
27 | 
28 | function run(maxsize)
29 |     Threads.@threads for i in 1:maxsize
30 |         work(i*1000)
31 |     end
32 | end
33 | 
34 | @gctime run(8)


--------------------------------------------------------------------------------
/benches/serial/big_arrays/single_ref.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | module SingleRef
 4 | 
 5 | """
 6 | This benchmark stresses the array handling in the GC.
 7 | We allocate a large arrays that all contain a reference to a singular object.
 8 | The mark-queue of the GC should not overflow.
 9 | """
10 | function construct(array_length)
11 |     obj = Ref{Int}(0)
12 |     arr = Array{Ref{Int}}(undef, array_length)
13 |     fill!(arr, obj)
14 |     GC.gc(true)
15 |     GC.gc(true)
16 |     Core.donotdelete(arr)
17 |     return nothing
18 | end
19 | 
20 | end #module
21 | 
22 | using .SingleRef
23 | 
24 | const GB = 1<<30
25 | const MAX_MEMORY = round(Int, 0.8 * GB)
26 | const array_length = div(MAX_MEMORY, sizeof(Ptr{C_NULL}))
27 | 
28 | @gctime SingleRef.construct(array_length)
29 | 


--------------------------------------------------------------------------------
/benches/serial/big_arrays/many_refs.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | module ManyRef
 4 | 
 5 | """
 6 | This benchmark stresses the array handling in the GC.
 7 | We allocate a large arrays that all contain a reference to a many small objects.
 8 | The mark-queue of the GC should not overflow.
 9 | """
10 | function construct(array_length)
11 |     GC.enable(false)
12 |     arr = Array{Ref{Int}}(undef, array_length)
13 |     for i in eachindex(arr)
14 |         arr[i] = Ref{Int}(0)
15 |     end
16 |     GC.enable(true)
17 |     GC.gc(true)
18 |     GC.gc(true)
19 |     Core.donotdelete(arr)
20 |     return nothing
21 | end
22 | 
23 | end #module
24 | 
25 | using .ManyRef
26 | 
27 | const GB = 1<<30
28 | const MAX_MEMORY = round(Int, 0.8 * GB)
29 | const array_length = div(MAX_MEMORY, 3*sizeof(Ptr{C_NULL}))
30 | 
31 | @gctime ManyRef.construct(array_length)
32 | 


--------------------------------------------------------------------------------
/benches/multithreaded/big_arrays/issue-52937.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | using Base.Threads: @threads
 4 | using Random: shuffle
 5 | 
 6 | function sample_vote!(_rb, chop_counts)
 7 |     pts = rand(length(chop_counts))
 8 |     N = length(_rb)
 9 |     _srt = 4245
10 |     partialsortperm!(_rb, pts, 1:_srt; lt = <, rev = true)
11 |     while sum(@views chop_counts[_rb[1:_srt]]) ≤ 5660
12 |         _srt = min(2 * _srt, N)
13 |         partialsortperm!(_rb, pts, 1:_srt; lt = <, rev = true)
14 |     end
15 | end
16 | 
17 | function parallel_scores(chop_counts)
18 |     @threads for i in 1:8
19 |         _rb = collect(1:length(chop_counts))
20 |         # the bigger this number, the more % GC time
21 |         for _ ∈ 1:1000
22 |             sample_vote!(_rb, chop_counts)
23 |         end
24 |     end
25 | end
26 | 
27 | # kind of arbitrary, but approximates my data
28 | chop_counts = shuffle(trunc.(Int, 6500 ./ (50:100_000)))
29 | @gctime parallel_scores(chop_counts)
30 | 


--------------------------------------------------------------------------------
/benches/slow/bigint/pidigits.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | # The Computer Language Benchmarks Game
 4 | # https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 5 | 
 6 | # based on Mario Pernici Python's program
 7 | 
 8 | function pidigits(N=68470)
 9 |     i = k = ns = 0
10 |     k1 = 1
11 |     n,a,d,t,u = map(BigInt,(1,0,1,0,0))
12 | 
13 |     while true
14 |         k += 1
15 |         t = n << 1
16 |         n *= k
17 |         a += t
18 |         k1 += 2
19 |         a *= k1
20 |         d *= k1
21 | 
22 |         if a >= n
23 |             t,u = divrem(n*3 +a, d)
24 |             u += n
25 |             if d > u
26 |                 if i >= N
27 |                     return ns
28 |                 end
29 |                 ns = ns*10 + t
30 |                 i += 1
31 |                 if mod(i,10) == 0
32 |                     ns = 0
33 |                 end
34 |                 a -= d*t
35 |                 a *= 10
36 |                 n *= 10
37 | 
38 |             end
39 |         end
40 |     end
41 | end
42 | 
43 | @gctime pidigits()
44 | 


--------------------------------------------------------------------------------
/benches/serial/bigint/pollard.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | function pollardfactor(n::T=big(1208925819691594988651321)) where T<:Integer
 4 |     for c in T(1):(n - 3)
 5 |         G, r, q = ones(T,3)
 6 |         y = 2
 7 |         m::T = 1900
 8 |         ys::T = 0
 9 |         x::T = 0
10 |         while G == 1
11 |             x = y
12 |             for i in 1:r
13 |                 y = (y^2 + c) % n
14 |             end
15 |             k = T(0)
16 |             G = T(1)
17 |             while k < r && G == 1
18 |                 for i in 1:min(r - k, m)
19 |                     ys = y
20 |                     y = (y^2 + c) % n
21 |                     q = (q * abs(x - y)) % n
22 |                 end
23 |                 G = gcd(q, n)
24 |                 k += m
25 |             end
26 |             r *= 2
27 |         end
28 |         G == n && (G = T(1))
29 |         while G == 1
30 |             ys = (ys^2 + c) % n
31 |             G = gcd(abs(x - ys), n)
32 |         end
33 |         if G != n
34 |             return G
35 |         end
36 |     end
37 | end
38 | 
39 | @gctime pollardfactor()
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Christine H. Flood
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.buildkite/pipeline.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   - label: "Julia {{matrix.version}} -- {{matrix.benchmark}} -- {{matrix.category}}"
 3 |     plugins:
 4 |       - JuliaCI/julia#v1:
 5 |           version: "{{matrix.version}}"
 6 |     matrix:
 7 |       setup:
 8 |         version:
 9 |           - "nightly"
10 |         benchmark:
11 |           - "serial"
12 |           - "multithreaded"
13 |         category:
14 |           - "all"
15 |       adjustments:
16 |         #  - with:
17 |         #     version: "nightly"
18 |         #     benchmark: "slow"
19 |         #     category: "rb_tree"
20 |          - with:
21 |             version: "nightly"
22 |             benchmark: "slow"
23 |             category: "bigint"
24 |     agents:
25 |       queue: "juliaecosystem"
26 |       arch: "x86_64"
27 |       os: "linux"
28 |     commands: |
29 |       echo "--- Instantiate"
30 |       julia --project=. -e 'using Pkg; Pkg.instantiate(); Pkg.build()'
31 | 
32 |       echo "+++ Run benchmarks {{matrix.benchmark}}"
33 |       julia --threads=auto --project=. run_benchmarks.jl {{matrix.benchmark}} {{matrix.category}}
34 |     artifact_paths:
35 |       - "results.csv"
36 | 
37 |     if: build.message !~ /\[skip tests\]/
38 |     timeout_in_minutes: 60


--------------------------------------------------------------------------------
/benches/serial/linked/tree.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | using Random
 4 | 
 5 | mutable struct TreeNode
 6 |    key::Int
 7 |    left::TreeNode
 8 |    right::TreeNode
 9 |    TreeNode() = new()
10 |    TreeNode(x) = new(x)
11 |    TreeNode(x,y,z) = new(x,y,z)
12 | end
13 | 
14 | function insert(key, n::TreeNode)
15 |    if key < n.key
16 |       if !isdefined(n,:left)
17 |          n.left = TreeNode(key)
18 |       else
19 |          insert(key, n.left)
20 |       end
21 |    elseif key > n.key
22 |       if !isdefined(n,:right)
23 |          n.right = TreeNode(key)
24 |       else
25 |          insert(key, n.right)
26 |       end
27 |    end
28 | end
29 | 
30 | function sumTree(n::TreeNode)
31 |    sum = n.key
32 |    if isdefined(n,:left)
33 |       sum += sumTree(n.left)
34 |    end
35 |    if isdefined(n,:right)
36 |        sum += sumTree(n.right)
37 |    end
38 | 
39 |    return sum
40 | end
41 | 
42 | # tree_size is the number of elements in mb
43 | function tree(n=8)
44 |     n *= 1024^2
45 |     rng = Xoshiro(12345)
46 |     temp = rand(rng, Int, n)
47 |     root::TreeNode = TreeNode(temp[1])
48 |     for i = 2:n
49 |        insert(temp[i], root)
50 |     end
51 |     return sumTree(root)
52 | end
53 | 
54 | @gctime tree()
55 | 


--------------------------------------------------------------------------------
/util/compare_bins.jl:
--------------------------------------------------------------------------------
 1 | const doc = """compare_bin.jl -- Cross binary comparison between GC benchmarks
 2 | Usage:
 3 |     compare_bins.jl <json1> <json2>
 4 | Options:
 5 |     -h, --help                            Show this screen.
 6 | """
 7 | 
 8 | using DocOpt
 9 | using JSON
10 | using PrettyTables
11 | 
12 | const args = docopt(doc, version = v"0.1.1")
13 | 
14 | function main(args)
15 |     f1 = args["<json1>"]
16 |     f2 = args["<json2>"]
17 | 
18 |     js1 = JSON.parsefile(f1)
19 |     js2 = JSON.parsefile(f2)
20 | 
21 |     labels = ["total time [ms]",
22 |               "gc time [ms]",
23 |               "mark time [ms]",
24 |               "sweep time [ms]",
25 |               "max pause [ms]",
26 |               "max memory [MB]",
27 |               "pct gc"]
28 |     header = ["", f1, f2]
29 | 
30 |     # show medians
31 |     raw_data = [js1["total time"][2] js2["total time"][2];
32 |                 js1["gc time"][2] js2["gc time"][2];
33 |                 js1["mark time"][2] js2["mark time"][2];
34 |                 js1["sweep time"][2] js2["sweep time"][2];
35 |                 js1["max pause"][2] js2["max pause"][2];
36 |                 js1["max memory"][2] js2["max memory"][2];
37 |                 js1["pct gc"][2] js2["pct gc"][2]]
38 | 
39 |     data = hcat(labels, raw_data)
40 |     pretty_table(data; header, formatters=ft_printf("%0.0f"))
41 | end
42 | 
43 | main(args)
44 | 


--------------------------------------------------------------------------------
/benches/multithreaded/binary_tree/tree_immutable.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | module BinaryTreeImmutable
 4 | 
 5 | # Adopted from
 6 | # https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees
 7 | 
 8 | using Base.Threads
 9 | using Printf
10 | 
11 | struct Node
12 |     l::Union{Nothing, Node}
13 |     r::Union{Nothing, Node}
14 | end
15 | 
16 | function make(n::Int)
17 |     return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1))
18 | end
19 | 
20 | function check(node::Node)
21 |     return  1 + (node.l === nothing ? 0 : check(node.l) + check(node.r))
22 | end
23 | 
24 | function binary_trees(io, n::Int)
25 |     @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1))
26 | 
27 |     long_tree = make(n)
28 |     minDepth = 4
29 |     resultSize = div((n - minDepth), 2) + 1
30 |     results = Vector{String}(undef, resultSize)
31 |     Threads.@threads for depth in minDepth:2:n
32 |         c = 0
33 |         niter = 1 << (n - depth + minDepth)
34 |         for _ in 1:niter
35 |             c += check(make(depth))
36 |         end
37 |         index = div((depth - minDepth),2) + 1
38 |         results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c
39 |     end
40 | 
41 |     for i in results
42 |         write(io, i)
43 |     end
44 | 
45 |     @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree)
46 | end
47 | 
48 | end #module
49 | 
50 | using .BinaryTreeImmutable
51 | 
52 | @gctime BinaryTreeImmutable.binary_trees(devnull, 21)
53 | 


--------------------------------------------------------------------------------
/benches/multithreaded/binary_tree/tree_mutable.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | module BinaryTreeMutable
 4 | 
 5 | # Adopted from
 6 | # https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees
 7 | 
 8 | using Base.Threads
 9 | using Printf
10 | 
11 | mutable struct Node
12 |     l::Union{Nothing, Node}
13 |     r::Union{Nothing, Node}
14 | end
15 | 
16 | function make(n::Int)
17 |     return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1))
18 | end
19 | 
20 | function check(node::Node)
21 |     return  1 + (node.l === nothing ? 0 : check(node.l) + check(node.r))
22 | end
23 | 
24 | function binary_trees(io, n::Int)
25 |     @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1))
26 | 
27 |     long_tree = make(n)
28 |     minDepth = 4
29 |     resultSize = div((n - minDepth), 2) + 1
30 |     results = Vector{String}(undef, resultSize)
31 |     Threads.@threads for depth in minDepth:2:n
32 |         c = 0
33 |         niter = 1 << (n - depth + minDepth)
34 |         for _ in 1:niter
35 |             c += check(make(depth))
36 |         end
37 |         index = div((depth - minDepth),2) + 1
38 |         results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c
39 |     end
40 | 
41 |     for i in results
42 |         write(io, i)
43 |     end
44 | 
45 |     @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree)
46 | end
47 | 
48 | end #module
49 | 
50 | using .BinaryTreeMutable
51 | 
52 | @gctime BinaryTreeMutable.binary_trees(devnull, 21)
53 | 


--------------------------------------------------------------------------------
/benches/multithreaded/mergesort_parallel/mergesort_parallel.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | const N = 32 * (1 << 20)
 4 | const BUBBLE_SORT_THRESHOLD = 32
 5 | 
 6 | using Random
 7 | Random.seed!(42)
 8 | a = rand(1:N, N)
 9 | 
10 | function bubble_sort(a, start, limit)
11 |     for i = start:limit-2
12 |         for j = i+1:limit-1
13 |             if a[j] < a[i]
14 |                 a[i], a[j] = a[j], a[i]
15 |             end
16 |         end
17 |     end
18 | end
19 | 
20 | function merge(src, dst, start, split, limit)
21 |     dst_pos = start
22 |     i = start
23 |     j = split
24 |     while i < split && j < limit
25 |         if src[i] <= src[j]
26 |             dst[dst_pos] = src[i]
27 |             i += 1
28 |         else
29 |             dst[dst_pos] = src[j]
30 |             j += 1
31 |         end
32 |         dst_pos += 1
33 |     end
34 | 
35 |     while i < split
36 |         dst[dst_pos] = src[i]
37 |         i += 1
38 |         dst_pos += 1
39 |     end
40 | 
41 |     while j < limit
42 |         dst[dst_pos] = src[j]
43 |         j += 1
44 |         dst_pos += 1
45 |     end
46 | end
47 | 
48 | function merge_sort(move, a, b, start, limit)
49 |     if move || limit - start > BUBBLE_SORT_THRESHOLD
50 |         split = (start + limit) ÷ 2
51 |         r1 = Threads.@spawn merge_sort(!move, a, b, start, split)
52 |         r2 = Threads.@spawn merge_sort(!move, a, b, split, limit)
53 |         wait(r1)
54 |         wait(r2)
55 |         if move
56 |             merge(a, b, start, split, limit)
57 |         else
58 |             merge(b, a, start, split, limit)
59 |         end
60 |     else
61 |         bubble_sort(a, start, limit)
62 |     end
63 | end
64 | 
65 | function sort(a)
66 |     b = similar(a)
67 |     merge_sort(false, a, b, 1, length(a) + 1)
68 | end
69 | 
70 | @gctime sort(a)
71 | 


--------------------------------------------------------------------------------
/util/find-all-min-heaps.jl:
--------------------------------------------------------------------------------
 1 | import Glob
 2 | function rdir(dir::AbstractString, pat::Glob.FilenameMatch)
 3 |     result = String[]
 4 |     for (root, _, files) in walkdir(dir)
 5 |         append!(result, filter!(f -> occursin(pat, f), joinpath.(root, files)))
 6 |     end
 7 |     return result
 8 | end
 9 | rdir(dir::AbstractString, pat::AbstractString) = rdir(dir, Glob.FilenameMatch(pat))
10 | 
11 | benches = rdir("benches", "*.jl")
12 | 
13 | function find_min_size(bench_path)
14 |     @info "Finding heap size for $bench_path"
15 |     bench_path_parent = dirname(bench_path)
16 |     min_heap = 4
17 |     max_heap = min(24 * 1024) # 24GB is more than enough so we don't waste time
18 |     heap_size = min_heap
19 |     while min_heap <= max_heap
20 |         @info "Attempting heap size $(heap_size)MB"
21 |         proc = run(
22 |             pipeline(
23 |                 `$(Base.julia_cmd()) --project=$(bench_path_parent) --hard-heap-limit=$(heap_size)M --gc-sweep-always-full $bench_path`,
24 |                 stdout = stdout,
25 |                 stderr = stderr,
26 |             );
27 |             wait = false,
28 |         )
29 |         if success(proc)
30 |             max_heap = heap_size
31 |             heap_size = round(Int, (max_heap + min_heap) / 2)
32 |         else
33 |             min_heap = heap_size
34 |             heap_size = round(Int, (max_heap + min_heap) / 2)
35 |         end
36 |         if (max_heap - min_heap) <= 16
37 |             break
38 |         end
39 |     end
40 |     @info "Heap size for $bench_path is $(heap_size)MB"
41 |     heap_size
42 | end
43 | 
44 | results = [bench => find_min_size(bench) for bench in benches]
45 | open("heap_sizes.csv", "w") do io
46 |     println(io, "bench,heap_size")
47 |     for (bench, heap_size) in results
48 |         println(io, "$bench,$heap_size")
49 |     end
50 | end
51 | 


--------------------------------------------------------------------------------
/vis/plot.jl:
--------------------------------------------------------------------------------
 1 | using CSV
 2 | using CairoMakie
 3 | using TypedTables
 4 | using Statistics
 5 | 
 6 | struct IntegerTicks end
 7 | CairoMakie.Makie.get_tickvalues(::IntegerTicks, vmin, vmax) = ceil(Int, vmin) : floor(Int, vmax)
 8 | 
 9 | function plot_results(table; log2_axes = true, violin = true)
10 | 	kwargs = (;)
11 | 	if log2_axes
12 | 		kwargs = (; xscale = log2, yscale = log2, xticks = LogTicks(IntegerTicks()), kwargs...)
13 | 	end 
14 | 
15 | 	benches = TypedTables.group(getproperty(:file), table)
16 | 	f = Figure(resolution = (1000, 500*length(benches)))
17 | 	idx = 1
18 | 	for (file, bench) in pairs(benches)
19 | 		mean_data = Any[]
20 | 		for (gcthreads, t) in pairs(TypedTables.group(getproperty(:gcthreads), bench))
21 | 			push!(mean_data, (; file, gcthreads, gc_time = mean(t.gc_time), threads=first(t.threads)))
22 | 		end
23 | 		mean_table = Table(row for row in mean_data)
24 | 		t0 = filter(r -> r.gcthreads == 1, mean_table).gc_time
25 | 		speedup = t0 ./ mean_table.gc_time
26 | 		mean_table = Table(mean_table; speedup)
27 | 
28 | 		Label(f[idx, 1:2, Top()], 
29 | 			"$file -- $(first(mean_table.threads)) Threads", 
30 | 			valign = :bottom,font = :bold, padding = (0, 0, 15, 0))
31 | 		
32 | 		ax = Axis(f[idx, 1]; title="Speedup", kwargs...)
33 | 		scatterlines!(ax, mean_table.gcthreads, mean_table.speedup)
34 | 		lines!(ax, mean_table.gcthreads, mean_table.gcthreads, color=:lightblue)
35 | 
36 | 		ax = Axis(f[idx, 2]; title="GC times (ms)", kwargs...)
37 | 		gcthreads = bench.gcthreads
38 | 		gc_times = bench.gc_time ./ 1_000_000
39 | 		if violin
40 | 			violin!(ax, gcthreads, gc_times;
41 | 				show_median=true)
42 | 		else
43 | 			rainclouds!(ax,gcthreads, gc_times;
44 | 				orientation = :vertical, clouds=hist, cloud_width=0.5)
45 | 		end
46 | 	
47 | 		idx +=1
48 | 	end
49 | 	save("plot.png", f, px_per_unit = 2)
50 | 	f
51 | end
52 | 
53 | 
54 | if !isinteractive()
55 | 	table = Table(CSV.File(joinpath(@__DIR__, "..", "results.csv")))
56 | 	plot_results(table)
57 | end
58 | 


--------------------------------------------------------------------------------
/util/utils.jl:
--------------------------------------------------------------------------------
 1 | using Pkg
 2 | Pkg.instantiate() # It is dumb that I have to do this
 3 | using Serialization
 4 | 
 5 | idx = Ref{Int}(0)
 6 | thrashing_stamps = zeros(UInt64, 3)
 7 | 
 8 | function gc_cb_on_pressure()
 9 |     t = time_ns()
10 |     thrashing_stamps[idx[] % 3 + 1] = t
11 |     idx[] += 1
12 |     if idx[] >= 3
13 |         # three thrashing stamps in ten seconds: abort
14 |         if t - thrashing_stamps[idx[] % 3 + 1] <= 10_000_000_000
15 |             @ccall abort()::Cvoid
16 |         end
17 |     end
18 |     nothing
19 | end
20 | 
21 | @info "Setting GC memory pressure callback"
22 | ccall(:jl_gc_set_cb_notify_gc_pressure, Cvoid, (Ptr{Cvoid}, Cint),
23 |     @cfunction(gc_cb_on_pressure, Cvoid, ()), true)
24 | 
25 | macro gctime(ex)
26 |     fc = isdefined(Base.Experimental, Symbol("@force_compile")) ?
27 |         :(Base.Experimental.@force_compile) :
28 |         :()
29 |     quote
30 |         $fc
31 |         local result
32 |         local start_gc_num = Base.gc_num()
33 |         local end_gc_num = start_gc_num
34 |         local start_time = time_ns()
35 |         local end_time = start_time
36 |         try
37 |             local val = $(esc(ex))
38 |             end_time = time_ns()
39 |             end_gc_num = Base.gc_num()
40 |             result = (;
41 |                 value = val,
42 |                 times = (end_time - start_time),
43 |                 gc_diff = Base.GC_Diff(end_gc_num, start_gc_num),
44 |                 gc_start = start_gc_num,
45 |                 gc_end = end_gc_num
46 |             )
47 |         catch e
48 |             @show e
49 |             result = (;
50 |                 value = e,
51 |                 times = NaN,
52 |                 gc_diff = Base.GC_Diff(end_gc_num, start_gc_num),
53 |                 gc_start = start_gc_num,
54 |                 gc_end = end_gc_num
55 |             )
56 |         end
57 | 
58 |         #run(`ps uxww`)
59 |         #run(`pmap $(getpid())`)
60 | 
61 |         if "SERIALIZE" in ARGS
62 |             # uglyness to communicate over non stdout (specifically file descriptor 3)
63 |             @invokelatest serialize(open(RawFD(3)), result)
64 |         else
65 |             @invokelatest display(result)
66 |         end
67 |     end
68 | end
69 | 


--------------------------------------------------------------------------------
/benches/multithreaded/mm_divide_and_conquer/mm_divide_and_conquer.jl:
--------------------------------------------------------------------------------
 1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
 2 | 
 3 | function matrix_multiply(res, x, y)
 4 |     i_n = size(x, 1)
 5 |     j_n = size(y, 2)
 6 |     k_n = size(y, 1)
 7 | 
 8 |     for i = 1:i_n
 9 |         for j = 1:j_n
10 |             w = 0
11 |             for k = 1:k_n
12 |                 w += x[i, k] * y[k, j]
13 |             end
14 |             res[i, j] = w
15 |         end
16 |     end
17 | end
18 | 
19 | function matrix_multiply_recursive(res, x, y)
20 |     i_n = size(x, 1)
21 |     j_n = size(y, 2)
22 |     k_n = size(y, 1)
23 | 
24 |     if i_n <= 128 || j_n <= 128 || k_n <= 128
25 |         # Base case: use a simple matrix multiplication
26 |         matrix_multiply(res, x, y)
27 |     else
28 |         # Divide matrices into submatrices
29 |         i_half = i_n ÷ 2
30 |         j_half = j_n ÷ 2
31 |         k_half = k_n ÷ 2
32 | 
33 |         a11 = x[1:i_half, 1:k_half]
34 |         a12 = x[1:i_half, (k_half+1):k_n]
35 |         a21 = x[(i_half+1):i_n, 1:k_half]
36 |         a22 = x[(i_half+1):i_n, (k_half+1):k_n]
37 | 
38 |         b11 = y[1:k_half, 1:j_half]
39 |         b12 = y[1:k_half, (j_half+1):j_n]
40 |         b21 = y[(k_half+1):k_n, 1:j_half]
41 |         b22 = y[(k_half+1):k_n, (j_half+1):j_n]
42 | 
43 |         c11 = zeros(Int, i_half, j_half)
44 |         c12 = zeros(Int, i_half, (j_n - j_half))
45 |         c21 = zeros(Int, (i_n - i_half), j_half)
46 |         c22 = zeros(Int, (i_n - i_half), (j_n - j_half))
47 | 
48 |         # Recursive matrix multiplication on submatrices
49 |         t1 = Threads.@spawn matrix_multiply_recursive(c11, a11 + a22, b11 + b22)
50 |         t2 = Threads.@spawn matrix_multiply_recursive(c12, a21 + a22, b11)
51 |         t3 = Threads.@spawn matrix_multiply_recursive(c21, a11, b12 - b22)
52 |         matrix_multiply_recursive(c22, a22, b21 - b11)
53 | 
54 |         # Wait for the spawned threads to complete
55 |         wait(t1)
56 |         wait(t2)
57 |         wait(t3)
58 | 
59 |         # Combine submatrices to get the result
60 |         res[1:i_half, 1:j_half] .= c11 .+ c12
61 |         res[1:i_half, (j_half+1):j_n] .= c11 .+ c22
62 |         res[(i_half+1):i_n, 1:j_half] .= c21 .+ c12
63 |         res[(i_half+1):i_n, (j_half+1):j_n] .= c21 .+ c22
64 |     end
65 | end
66 | 
67 | const M_SIZE = (1 << 12)
68 | 
69 | function main_recursive()
70 |     m1 = rand(1:100, M_SIZE, M_SIZE)
71 |     m2 = rand(1:100, M_SIZE, M_SIZE)
72 |     res = zeros(Int, M_SIZE, M_SIZE)
73 | 
74 |     matrix_multiply_recursive(res, m1, m2)
75 | 
76 |     return sum(res)
77 | end
78 | 
79 | @gctime main_recursive()
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Garbage Collection Test Suite
 2 | 
 3 | This package contains various test programs which measure the efficiency of Garbage
 4 | Collection (GC) in Julia.
 5 | 
 6 | ## Running
 7 | 
 8 | ```
 9 | Usage:
10 |     run_benchmarks.jl (serial|multithreaded|slow) (all|<category> [<name>]) [options]
11 |     run_benchmarks.jl -h | --help
12 |     run_benchmarks.jl --version
13 | Options:
14 |     -n <runs>, --runs=<runs>              Number of runs for each benchmark [default: 10].
15 |     -t <threads>, --threads=<threads>     Number of mutator threads to use [default: 1].
16 |     --gcthreasds=<gcthreads>              Number of GC threads to use [default: 1].
17 |     -s <max>, --scale=<max>               Maximum number of GC threads for scaling test.
18 |     -h, --help                            Show this screen.
19 |     --version                             Show version.
20 | ```
21 | 
22 | ## Classes
23 | 
24 | There are three classes of benchmarks:
25 | - *Serial* benchmarks run on a single mutator thread.
26 | - *Multithreaded* benchmarks may run on multiple mutator threads.
27 | - *Slow* benchmarks are long-running in comparison with the other two classes.
28 | 
29 | ## Examples
30 | 
31 | - Run all serial benchmarks 5 times each using 1 mutator thread and 1 GC thread:
32 | 
33 |   `julia --project=. run_benchmarks.jl serial all -n 5`
34 | 
35 | - Run the binary tree benchmarks 10 times each with 1, 2, 4 and 8 GC threads (and 8 mutator threads):
36 | 
37 |   `julia --project=. run_benchmarks.jl multithreaded binary_tree -t 8 -s 8`
38 | 
39 | - Run the red-black tree benchmark once using 1 mutator thread and 4 GC threads:
40 | 
41 |   `julia --project=. run_benchmarks.jl slow rb_tree rb_tree -n 1 --gcthreads 4`
42 | 
43 | ## The benchmarks
44 | 
45 | | Class | Category | Name | Description |
46 | | ---   | ---      | ---  | ---         |
47 | | Serial | TimeZones | TimeZones.jl | Creation of timezones which involve repeated short `String` allocations. |
48 | |        | append | append.jl | Repeatedly growing `Vector`s. |
49 | |        | bigint | pollard.jl | Tests small `BigInt`s. |
50 | |        | linked | list.jl | Small pointer-heavy data structure. |
51 | |        |        | tree.jl | Small pointer-heavy data structure. |
52 | |        | strings | strings.jl | Exercises fragmentation through repeated allocation of short multi-sized strings. |
53 | |        | big_arrays | many_refs.jl | Forces a mark-phase traversal through a large array of pointers (all distinct).  |
54 | |        | big_arrays | single_ref.jl | Forces a mark-phase traversal through a large array of pointers (all the same).  |
55 | | Multithreaded | binary_tree | tree_immutable.jl | Small pointer-heavy data structure. |
56 | |               |             | tree_mutable.jl | Small pointer-heavy data structure. |
57 | |               | mergesort_parallel | mergesort_parallel.jl | Parallel merge-sort. |
58 | |               | mm_divide_and_conquer | mm_divide_and_conquer.jl | Divide-and-conquer matrix multiply. |
59 | |               | big_arrays | objarray.jl | Allocates large arrays of boxed objects, each containing a small number of references. |
60 | |               | big_arrays | issue-52937.jl | Parallel allocation of arrays of immutable types. |
61 | | Slow | rb\_tree | rb\_tree.jl | Pointer graph whose minimum linear arrangement has cost Θ(n²). |
62 | |      | pidigits | pidigits.jl | Tests large `BigInt`s. |
63 | 


--------------------------------------------------------------------------------
/benches/serial/strings/strings.jl:
--------------------------------------------------------------------------------
  1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
  2 | 
  3 | using Random
  4 | 
  5 | # This program generates random length strings made up of ACTG characters.
  6 | # The idea is that there will be a significant number of repeated strings.
  7 | # The repeated strings are counted but the strings themselves are garbage.
  8 | # The result should be significant multi-sized fragmentation in the heap.
  9 | 
 10 | mutable struct TreeNode
 11 |    key::String
 12 |    count::Int
 13 |    left::TreeNode
 14 |    right::TreeNode
 15 |    next::TreeNode
 16 |    TreeNode() = new()
 17 |    TreeNode(x) = new(x,1)
 18 |    TreeNode(x,y) = new(x,y)
 19 |    TreeNode(n::TreeNode) = new(n.key, n.count)
 20 | end
 21 | 
 22 | function getKey(x::TreeNode) return x.key end
 23 | function getCount(x::TreeNode) return x.count end
 24 | 
 25 | function compare(g, x::TreeNode, y::TreeNode)
 26 |    if (g(x) < g(y))
 27 |       return -1
 28 |    elseif (g(x) == g(y))
 29 |       return 0
 30 |    else
 31 |       return 1
 32 |    end
 33 | end
 34 |    
 35 | function compareCount(x::TreeNode, y::TreeNode)
 36 |   return compare(getCount, x, y)
 37 | end
 38 | 
 39 | function compareKey(x::TreeNode, y::TreeNode)
 40 |   return compare(getKey, x, y)
 41 | end
 42 | 
 43 | function duplicateKey(root::TreeNode, n::TreeNode)
 44 |   root.count = root.count + 1;
 45 | end
 46 | 
 47 | function duplicateCount(root::TreeNode,n::TreeNode)
 48 |   if !isdefined(root,:next)
 49 |      root.next = n
 50 |   else
 51 |      n.next = root.next
 52 |      root.next = n
 53 |   end
 54 | end
 55 | 
 56 | function insert(root::TreeNode,n::TreeNode,compare,duplicate)
 57 |    result = compare(root,n)
 58 |    if result < 0
 59 |       if !isdefined(root,:left)
 60 |          root.left = TreeNode(n)
 61 |       else
 62 |          insert(root.left, n, compare, duplicate)
 63 |       end
 64 |    elseif result > 0
 65 |       if !isdefined(root,:right)
 66 |          root.right = TreeNode(n)
 67 |       else
 68 |          insert(root.right, n, compare, duplicate)
 69 |       end
 70 |    else
 71 |       duplicate(root,n)
 72 |   end
 73 | end
 74 | 
 75 | function traverse(n::TreeNode, f)
 76 |    if isdefined(n,:left)
 77 |       traverse(n.left, f)
 78 |    end
 79 | 
 80 |    f(n)
 81 | 
 82 |    if isdefined(n,:right)
 83 |       traverse(n.right, f)
 84 |    end
 85 | end
 86 | 
 87 | function print(n::TreeNode)
 88 |    count = 1
 89 |    while (isdefined(n,:next))
 90 |       n = n.next
 91 |       count = count + 1
 92 |    end
 93 |    println("There was/were ", count,  " string(s) that was/were repeated ", n.count, " times")
 94 | end
 95 | 
 96 | function tree(root::TreeNode, n)
 97 |    for i in 1:n
 98 |       insert(root, TreeNode(randstring("ACTG", rand(1:32))), compareKey,duplicateKey)
 99 |    end
100 | end
101 | 
102 | resultRoot = TreeNode("end")
103 | 
104 | 
105 | function insertHelper(n::TreeNode)
106 |    insert(resultRoot, n, compareCount, duplicateCount)
107 | end
108 | 
109 | #Build the tree sorted by count
110 | function SortTree(n::TreeNode)
111 |    traverse(n::TreeNode, insertHelper)
112 |    return resultRoot
113 | end
114 | 
115 | function test(n)
116 |    startroot = TreeNode("start")
117 |    buildtree = tree(startroot, n)   
118 |    result = SortTree(startroot)
119 | #   println("done with sort")
120 | #   traverse(result, print)
121 |    return 6847
122 | end
123 | 
124 | @gctime test(1024 * 1024 * 8)
125 | 


--------------------------------------------------------------------------------
/benches/slow/rb_tree/rb_tree.jl:
--------------------------------------------------------------------------------
  1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
  2 | 
  3 | # Simple GC benchmark for performance on pointer graphs whose minimum linear arrangement
  4 | # has cost θ(n^2). tvbench() maintains a set of N points each of which has a random (x,y)
  5 | # coordinate. The points are indexed by two red-black trees, one ordered by x, the other
  6 | # one ordered by y. At each step we create a new point and add it to the indexes. If the
  7 | # total number of points is N+1 we delete the oldest point.
  8 | #
  9 | # Author: Todd Veldhuizen
 10 | #
 11 | # Example use:
 12 | # julia> include("tvgcbench.jl")
 13 | # julia> tvbench(100000000,1200)
 14 | #
 15 | # On my macbook pro (64Gb ram, 2.3GHz 8-Core Intel i9) and Julia 1.7.3-pre.3,
 16 | # running the above example with some gc tracing enabled I see gc pauses of 24 seconds
 17 | # while julia process memory usage is only 5Gb.
 18 | #
 19 | # Quoted below are some tracing output from julia src/gc.c.
 20 | # The fields for the #@GC@# lines are:
 21 | # #@GC@# jl_gc_pass_count, jl_mark_counter, jl_marked_already_counter, pause, t0, d1, mark_time, d3, d4, d5, sweep_time, d7, sweep_full
 22 | #
 23 | # #@GC@# 4,59679,312306,9493554,6673304950506491,21985,5996094,1442,474,1186,3466187,6630,0
 24 | # #@GC_PAUSE_SECONDS@# 0.009494
 25 | # #@GC@# 5,1262302,1707391,155991578,6673305957633691,26669,146731925,2207,404,1357,9224256,5281,0
 26 | # #@GC_PAUSE_SECONDS@# 0.155992
 27 | # #@GC@# 6,2601172,3271231,309064862,6673307232597647,14024,294877138,1636,2092,498,14167736,2297,0
 28 | # #@GC_PAUSE_SECONDS@# 0.309065
 29 | # #@GC@# 7,3963665,4837077,484216108,6673308816193910,5398,456634339,1967,137,26211,27546197,2528,1
 30 | # #@GC_PAUSE_SECONDS@# 0.484216
 31 | # #@GC@# 8,8451979,11612046,948559090,6673312801368228,2831,912492807,4107,836,82544,35974458,2023,1
 32 | # #@GC_PAUSE_SECONDS@# 0.948559
 33 | # #@GC@# 9,16979033,21842180,1954845268,6673323797397558,7148,1892234116,3718,750,19069,62580629,2001,1
 34 | # #@GC_PAUSE_SECONDS@# 1.954845
 35 | # #@GC@# 10,38296771,47417640,4937244893,6673355028202042,3998,4802409026,4815,886,83290,134740941,2644,1
 36 | # #@GC_PAUSE_SECONDS@# 4.937245
 37 | # #@GC@# 11,91590987,111356129,13998846831,6673446463639536,4052,13686747979,3716,1155,49822,312038121,2555,1
 38 | # #@GC_PAUSE_SECONDS@# 13.998847
 39 | # #@GC@# 12,144885203,175294623,24036204691,6673554514445920,3224,23551304164,4770,949,57853,484831470,2825,1
 40 | # #@GC_PAUSE_SECONDS@# 24.036205
 41 | #
 42 | # In GC pass 12 (which lasts 24 seconds) there are (144885203+175294623) calls to
 43 | # gc_try_setmark() and the mark phase takes 23551304164ns, so about 73ns (approx 170
 44 | # clock cycles) per mark attempt. My suspicion is that the poor gc performance on this
 45 | # benchmark is caused by the mark phase doing inefficient random memory accesses with
 46 | # no prefetching, causing cache and TLB misses. On the STREAMS benchmark my laptop does
 47 | # about 18GB/s, so in the length of that 24 second gc pause it could linearly scan the
 48 | # entire julia process memory 85 times.
 49 | 
 50 | using DataStructures
 51 | using Random
 52 | import Base: isless
 53 | 
 54 | mutable struct Point
 55 |     x::Int
 56 |     y::Int
 57 | end
 58 | 
 59 | struct PointByX
 60 |     p::Point
 61 | end
 62 | Base.isless(a::PointByX, b::PointByX) = isless(a.p.x, b.p.x)
 63 | 
 64 | struct PointByY
 65 |     p::Point
 66 | end
 67 | Base.isless(a::PointByY, b::PointByY) = isless(a.p.y, b.p.y)
 68 | 
 69 | function tvbench(; N = 50_000_000)
 70 |     t0 = time()
 71 |     queue = Queue{Point}()
 72 |     xtree = RBTree{PointByX}()
 73 |     ytree = RBTree{PointByY}()
 74 |     count = 0
 75 |     tcheck = 0
 76 |     i = 0
 77 |     while true
 78 |         count = count + 1
 79 |         p = Point(Random.rand(Int), Random.rand(Int))
 80 |         enqueue!(queue, p)
 81 |         push!(xtree, PointByX(p))
 82 |         push!(ytree, PointByY(p))
 83 | 
 84 |         if length(queue) > N
 85 |             p = dequeue!(queue)
 86 |             delete!(xtree, PointByX(p))
 87 |             delete!(ytree, PointByY(p))
 88 |         end
 89 | 
 90 |         i = i + 1
 91 |         if i == 100
 92 |             i = 0
 93 |             @assert length(xtree) <= N
 94 |             elapsed = time() - t0
 95 |             tcheck2 = floor(elapsed/10)
 96 |             if tcheck != tcheck2
 97 |                 tcheck = tcheck2
 98 |                 println("elapsed=$(elapsed)s, $(length(queue)) current points, $(count) total, $(floor(count/elapsed)) per second")
 99 |             end
100 |             if count >= 2 * N
101 |                 break
102 |             end
103 | 	end
104 |         #=
105 |         nm, nr = fldmod(count, 1_000_000)
106 |         if nr == 0
107 |             @show nm
108 |             @timev GC.gc()
109 |         end
110 |         elapsed = time() - t0
111 |         if (elapsed >= min_seconds) && ((count >= N) || (elapsed >= max_seconds))
112 |             break
113 |         end
114 |         =#
115 |     end
116 | end
117 | 
118 | @gctime tvbench()
119 | 


--------------------------------------------------------------------------------
/benches/fragmentation/synthetic/exploit_free_list.jl:
--------------------------------------------------------------------------------
  1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
  2 | 
  3 | mutable struct ListNode{N}
  4 |     next::Union{ListNode{N}, Nothing}
  5 |     data::NTuple{N, UInt8}
  6 | end
  7 | 
  8 | @static if Sys.WORD_SIZE != 64
  9 |     error("This script requires a 64-bit version of Julia")
 10 | end
 11 | const NEXT_PTR_BYTES = 8
 12 | const JL_HEADER_BYTES = 8
 13 | 
 14 | @inline function create_node(::Val{N}) where N
 15 |     # N is now compile-time known
 16 |     ListNode{N}(nothing, ntuple(_ -> UInt8(0), Val(N)))
 17 | end
 18 | 
 19 | # These constants are specific to Julia's pool allocator. If Julia changes the implementation, these value may need to be updated.
 20 | const JL_GC_SIZECLASS = [8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304, 336, 368, 400, 448, 496, 544, 576, 624, 672, 736, 816, 896, 1008, 1088, 1168, 1248, 1360, 1488, 1632, 1808, 2032]
 21 | const N_SIZE_CLASSES = length(JL_GC_SIZECLASS)
 22 | const PAGE_SIZE = 16384
 23 | 
 24 | # These constants are configurable
 25 | # How many pages to allocate for each size class
 26 | const PAGES_TO_ALLOCATE = 10240
 27 | # The keep alive pattern for the allocated pool objects
 28 | # 1 = keep one object alive per page
 29 | # 2 = keep every other objects alive
 30 | const KEEP_ALIVE_PATTERN = 2
 31 | 
 32 | const KEPT_LISTS = []
 33 | 
 34 | function process_size_class(sz_class_index::Int, sz::Int)
 35 |     payload_sz = sz - JL_HEADER_BYTES
 36 |     tuple_len = payload_sz - NEXT_PTR_BYTES
 37 |     T = Val(tuple_len)
 38 |     if tuple_len <= 0
 39 |         return
 40 |     end
 41 |     objs_per_page = div(PAGE_SIZE, sz)
 42 |     total_objs = Int64(PAGES_TO_ALLOCATE * objs_per_page)
 43 | 
 44 |     actual_sz = sizeof(ListNode{tuple_len})
 45 |     actual_sz + JL_HEADER_BYTES == sz || error("Actual size does not match expected size")
 46 | 
 47 |     println("Processing size class $sz")
 48 |     println("  Actual payload size: $actual_sz bytes, N: $tuple_len")
 49 |     println("  Allocating $total_objs objects")
 50 | 
 51 |     print_page_utilization("Before", sz_class_index)
 52 | 
 53 |     # Create initial linked list
 54 |     head = create_list(total_objs, T)
 55 |     n_nodes(head) == total_objs || error("Actual number of objects does not match total_objs")
 56 | 
 57 |     print_page_utilization("Alloc", sz_class_index)
 58 | 
 59 |     # Fragment the linked list
 60 |     new_head = fragment_list(head, objs_per_page)
 61 |     # Preserve remaining objects
 62 |     global KEPT_LISTS
 63 |     push!(KEPT_LISTS, new_head)
 64 | 
 65 |     print_page_utilization("Fragment", sz_class_index)
 66 | end
 67 | 
 68 | function create_list(total_objs::Int64, T::Val)
 69 |     # We need to be careful that the loop does not introduce allocations
 70 |     head = current = create_node(T)
 71 |     i = 2
 72 |     while true
 73 |         current.next = create_node(T)
 74 |         current = current.next
 75 |         i += 1
 76 |         i > total_objs && break
 77 |     end
 78 |     return head
 79 | end
 80 | 
 81 | function fragment_list(head::ListNode, objs_per_page::Int)
 82 |     new_head = head
 83 |     last = head
 84 |     current = head
 85 |     counter = 1
 86 |     while !isnothing(current) && !isnothing(current.next)
 87 |         next = current.next
 88 |         # If pattern == 1, keep one object alive per page
 89 |         # If pattern == 2, keep every other object alive
 90 |         if (KEEP_ALIVE_PATTERN == 1 && mod(counter, objs_per_page) == 0) || (KEEP_ALIVE_PATTERN == 2 && mod(counter, 2) == 0)
 91 |             last.next = current
 92 |             last = current
 93 |         end
 94 |         current = next
 95 |         counter += 1
 96 |     end
 97 |     return new_head
 98 | end
 99 | 
100 | function n_nodes(node::ListNode)
101 |     count = 0
102 |     current = node
103 |     while !isnothing(current)
104 |         count += 1
105 |         current = current.next
106 |     end
107 |     return count
108 | end
109 | 
110 | function print_page_utilization(msg::String, sz_class_index::Int)
111 |     GC.gc(true)
112 |     utils = Base.gc_page_utilization_data()
113 | 
114 |     for (i, sz) in enumerate(JL_GC_SIZECLASS)
115 |         if sz_class_index == -1 || i == sz_class_index
116 |             println("    $(lpad(msg, 10)): Pool $i: $(round(utils[i]*100, digits=2))%")
117 |         end
118 |     end
119 | end
120 | 
121 | function main()
122 |     for (i,sz) in enumerate(JL_GC_SIZECLASS)
123 |         # Julia aligns up object size to 16 bytes. We only allocate if the size class is 16 bytes aligned.
124 |         if mod(sz, 16) != 0
125 |             continue
126 |         end
127 |         process_size_class(i, sz)
128 |     end
129 | 
130 |     println("\nFragmentation complete.")
131 |     print_page_utilization("Final", -1)
132 | 
133 |     sum = 0
134 |     println("Kept lists: $(length(KEPT_LISTS))")
135 |     for l in KEPT_LISTS
136 |         println("  List length: $(n_nodes(l))")
137 |         sum += n_nodes(l)
138 |     end
139 |     println("Total live objects: $sum")
140 | end
141 | 
142 | @gctime main()
143 | 


--------------------------------------------------------------------------------
/run_benchmarks.jl:
--------------------------------------------------------------------------------
  1 | const doc = """run_benchmarks.jl -- GC benchmarks test harness
  2 | Usage:
  3 |     run_benchmarks.jl (serial|multithreaded|compiler|fragmentation|slow) (all|<category> [<name>]) [options]
  4 |     run_benchmarks.jl -h | --help
  5 |     run_benchmarks.jl --version
  6 | Options:
  7 |     -n <runs>, --runs=<runs>              Number of runs for each benchmark [default: 10].
  8 |     -t <threads>, --threads=<threads>     Number of threads to use [default: 1].
  9 |     -g <threads>, --gcthreads=<threads>   Number of GC threads to use [default: 0]. 
 10 |     -s <max>, --scale=<max>               Maximum number of gcthreads for scaling test.
 11 |     -h, --help                            Show this screen.
 12 |     --version                             Show version.
 13 |     --json                                Serializes output to `json` file
 14 | """
 15 | 
 16 | using DocOpt
 17 | using JSON
 18 | using PrettyTables
 19 | using Printf
 20 | using Serialization
 21 | using Statistics
 22 | using TypedTables
 23 | using CSV
 24 | 
 25 | const args = docopt(doc, version = v"0.1.1")
 26 | const JULIAVER = Base.julia_cmd()[1]
 27 | 
 28 | # times in ns
 29 | # TODO: get better stats
 30 | function get_stats(times::Vector)
 31 |     return [minimum(times), median(times), maximum(times), std(times)]
 32 | end
 33 | 
 34 | """
 35 |     Highlights cells in a column based on value
 36 |         green if less than lo
 37 |         yellow if between lo and hi
 38 |         red if above hi
 39 | """
 40 | function highlight_col(col, lo, hi)
 41 |     [Highlighter((data,i,j) -> (j == col) && data[i, j] <= lo; foreground=:green),
 42 |      Highlighter((data,i,j) -> (j == col) && lo < data[i, j] < hi; foreground=:yellow),
 43 |      Highlighter((data,i,j) -> (j == col) && hi <= data[i, j]; foreground=:red),]
 44 | end
 45 | 
 46 | function diff(gc_end, gc_start, p)
 47 |     v0 = getproperty(gc_start, p)
 48 |     v1 = getproperty(gc_end, p)
 49 |     v1-v0
 50 | end
 51 | 
 52 | function extract(gc_end, gc_start, p)
 53 |     map((gc_end, gc_start)->diff(gc_end, gc_start, p), gc_end, gc_start)
 54 | end
 55 | 
 56 | function run_bench(runs, threads, gcthreads, file, show_json = false)
 57 |     value = []
 58 |     times = []
 59 |     gc_diff = []
 60 |     gc_end = []
 61 |     gc_start = []
 62 |     for _ in 1:runs
 63 |         # uglyness to communicate over non stdout (specifically file descriptor 3)
 64 |         p = Base.PipeEndpoint()
 65 |         _gcthreads = gcthreads == 0 ? `` : `--gcthreads=$gcthreads`
 66 |         cmd = `$JULIAVER --project=. --threads=$threads $_gcthreads $file SERIALIZE`
 67 |         cmd = run(Base.CmdRedirect(cmd, p, 3), stdin, stdout, stderr, wait=false)
 68 |         r = deserialize(p)
 69 |         @assert success(cmd)
 70 |         # end uglyness
 71 |         push!(value, r.value)
 72 |         push!(times, r.times)
 73 |         push!(gc_diff, r.gc_diff)
 74 |         push!(gc_end, r.gc_end)
 75 |         push!(gc_start, r.gc_start)
 76 |     end
 77 |     gc_times =  extract(gc_end, gc_start, :total_time)
 78 |     mark_times = extract(gc_end, gc_start, :total_mark_time)
 79 |     sweep_times = extract(gc_end, gc_start, :total_sweep_time)
 80 |     times_to_safepoint = extract(gc_end, gc_start, :total_time_to_safepoint)
 81 |     ncollect = extract(gc_end, gc_start, :collect)
 82 |     nfull_sweep = extract(gc_end, gc_start, :full_sweep)
 83 | 
 84 |     data = Table(
 85 |         time = times,
 86 |         gc_time = gc_times,
 87 |         mark_time = mark_times,
 88 |         sweep_time = sweep_times,
 89 |         time_to_safepoint = times_to_safepoint,
 90 |         ncollections = ncollect,
 91 |         nfull_sweeps = nfull_sweep,
 92 |         file = [file for _ in 1:runs],
 93 |         threads = [threads for _ in 1:runs],
 94 |         gcthreads = [gcthreads for _ in 1:runs],
 95 |         version = [string(Base.VERSION) for _ in 1:runs],
 96 |     )
 97 |     results = joinpath(@__DIR__, "results.csv")
 98 |     CSV.write(results, data; append=isfile(results))
 99 | 
100 |     total_stats = get_stats(times) ./ 1_000_000
101 |     gc_time =  get_stats(gc_times) ./ 1_000_000
102 |     mark_time = get_stats(mark_times) ./ 1_000_000
103 |     sweep_time = get_stats(sweep_times) ./ 1_000_000
104 |     time_to_safepoint = get_stats(times_to_safepoint) ./ 1_000
105 | 
106 |     max_pause = get_stats(map(stat->stat.max_pause, gc_end)) ./ 1_000_000
107 |     max_mem = get_stats(map(stat->stat.max_memory, gc_end)) ./ 1024^2
108 |     pct_gc = get_stats(map((t,stat)->(stat.total_time/t), times, gc_diff)) .* 100
109 | 
110 |     header = (["", "total time", "gc time", "mark time", "sweep time", "max GC pause", "time to safepoint", "max heap", "percent gc"],
111 |               ["", "ms",         "ms",       "ms",          "ms",       "ms",          "us",                "MB",       "%"        ])
112 |     labels = ["minimum", "median", "maximum", "stdev"]
113 |     highlighters = highlight_col(6, 10, 100) # max pause
114 |     append!(highlighters, highlight_col(7, 1, 10)) # time to safepoint
115 |     append!(highlighters, highlight_col(9, 10, 50)) # pct gc
116 |     highlighters = Tuple(highlighters)
117 |     if show_json
118 |         data = Dict([("total time", total_stats),
119 |                      ("gc time", gc_time),
120 |                      ("mark time", mark_time),
121 |                      ("sweep time", sweep_time),
122 |                      ("max pause", max_pause),
123 |                      ("ttsp", time_to_safepoint),
124 |                      ("max memory", max_mem),
125 |                      ("pct gc", pct_gc)])
126 |         JSON.print(data)
127 |     else
128 |         data = hcat(labels, total_stats, gc_time, mark_time, sweep_time, max_pause, time_to_safepoint, max_mem, pct_gc)
129 |         pretty_table(data; header, formatters=ft_printf("%0.0f"), highlighters)
130 |     end
131 | end
132 | 
133 | function run_category_files(benches, args, show_json = false)
134 |     local runs = parse(Int, args["--runs"])
135 |     local threads = parse(Int, args["--threads"])
136 |     local gcthreads = parse(Int, args["--gcthreads"])
137 |     local max = if isnothing(args["--scale"]) 0 else parse(Int, args["--scale"]) end
138 |     for bench in benches
139 |         if !show_json
140 |             @show bench
141 |         end
142 |         if isnothing(args["--scale"])
143 |             run_bench(runs, threads, gcthreads, bench, show_json)
144 |         else
145 |             local n = 0
146 |             while true
147 |                 gcthreads = 2^n
148 |                 gcthreads > max && break
149 |                 @show (gcthreads, threads)
150 |                 run_bench(runs, threads, gcthreads, bench, show_json)
151 |                 n += 1
152 |             end
153 |         end
154 |     end
155 | end
156 | 
157 | function run_all_categories(args, show_json = false)
158 |     for category in readdir()
159 |         @show category
160 |         cd(category)
161 |         benches = filter(f -> endswith(f, ".jl"), readdir())
162 |         run_category_files(benches, args, show_json)
163 |         cd("..")
164 |     end
165 | end
166 | 
167 | function main(args)
168 |     rm("results.csv", force=true)
169 |     cd(joinpath(@__DIR__, "benches"))
170 | 
171 |     # validate choices
172 |     if !isnothing(args["--scale"])
173 |         @assert args["--gcthreads"] == "0" "Specify either --scale or --threads."
174 |     end
175 | 
176 |     # select benchmark class
177 |     if args["serial"]
178 |         cd("serial")
179 |     elseif args["multithreaded"]
180 |         cd("multithreaded")
181 |     elseif args["compiler"]
182 |         cd("compiler")
183 |     elseif args["fragmentation"]
184 |         cd("fragmentation")
185 |     else # slow
186 |         cd("slow")
187 |     end
188 | 
189 |     show_json = args["--json"]
190 | 
191 |     if args["all"]
192 |         run_all_categories(args, show_json)
193 |     else
194 |         cd(args["<category>"])
195 |         benches = if isnothing(args["<name>"])
196 |             filter(f -> endswith(f, ".jl"), readdir())
197 |         else
198 |             ["$(args["<name>"]).jl"]
199 |         end
200 |         run_category_files(benches, args, show_json)
201 |     end
202 | end
203 | 
204 | main(args)
205 | 


--------------------------------------------------------------------------------
/benches/compiler/inference/inference_benchmarks.jl:
--------------------------------------------------------------------------------
  1 | include(joinpath("..", "..", "..", "util", "utils.jl"))
  2 | 
  3 | # InferenceBenchmarks taken from BaseBenchmarks.jl (https://github.com/JuliaCI/BaseBenchmarks.jl)
  4 | 
  5 | module InferenceBenchmarks
  6 | 
  7 | # InferenceBenchmarker
  8 | # ====================
  9 | # this new `AbstractInterpreter` satisfies the minimum interface requirements and manages
 10 | # its cache independently in a way it is totally separated from the native code cache
 11 | # managed by the runtime system: this allows us to profile Julia-level inference reliably
 12 | # without being influenced by previous trials or some native execution
 13 | 
 14 | @static if VERSION ≥ v"1.12.0-DEV.1581"
 15 | if Base.REFLECTION_COMPILER[] === nothing
 16 | const CC = Base.Compiler
 17 | else
 18 | const CC = Base.REFLECTION_COMPILER[]
 19 | end
 20 | else
 21 | const CC = Core.Compiler
 22 | end
 23 | 
 24 | using Core:
 25 |     MethodInstance, CodeInstance, MethodTable, SimpleVector
 26 | using .CC:
 27 |     AbstractInterpreter, InferenceParams, InferenceResult, InferenceState,
 28 |     OptimizationParams, OptimizationState, WorldRange, WorldView,
 29 |     specialize_method, unwrap_unionall, rewrap_unionall, copy
 30 | @static if VERSION ≥ v"1.11.0-DEV.1498"
 31 |     import .CC: get_inference_world
 32 | else
 33 |     import .CC: get_world_counter as get_inference_world
 34 | end
 35 | using Base: get_world_counter
 36 | using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
 37 | using BenchmarkTools: @benchmarkable, BenchmarkGroup, addgroup!
 38 | 
 39 | struct InferenceBenchmarkerCache
 40 |     dict::IdDict{MethodInstance,CodeInstance}
 41 |     InferenceBenchmarkerCache() = new(IdDict{MethodInstance,CodeInstance}())
 42 | end
 43 | struct InferenceBenchmarker <: AbstractInterpreter
 44 |     world::UInt
 45 |     inf_params::InferenceParams
 46 |     opt_params::OptimizationParams
 47 |     optimize::Bool
 48 |     compress::Bool
 49 |     discard_trees::Bool
 50 |     inf_cache::Vector{InferenceResult}
 51 |     code_cache::InferenceBenchmarkerCache
 52 |     function InferenceBenchmarker(
 53 |         world::UInt = get_world_counter();
 54 |         inf_params::InferenceParams = InferenceParams(),
 55 |         opt_params::OptimizationParams = OptimizationParams(),
 56 |         optimize::Bool = true,
 57 |         compress::Bool = true,
 58 |         discard_trees::Bool = true,
 59 |         inf_cache::Vector{InferenceResult} = InferenceResult[],
 60 |         code_cache::InferenceBenchmarkerCache = InferenceBenchmarkerCache())
 61 |         return new(
 62 |             world,
 63 |             inf_params,
 64 |             opt_params,
 65 |             optimize,
 66 |             compress,
 67 |             discard_trees,
 68 |             inf_cache,
 69 |             code_cache)
 70 |     end
 71 | end
 72 | 
 73 | CC.may_optimize(interp::InferenceBenchmarker) = interp.optimize
 74 | CC.may_compress(interp::InferenceBenchmarker) = interp.compress
 75 | CC.may_discard_trees(interp::InferenceBenchmarker) = interp.discard_trees
 76 | CC.InferenceParams(interp::InferenceBenchmarker) = interp.inf_params
 77 | CC.OptimizationParams(interp::InferenceBenchmarker) = interp.opt_params
 78 | #=CC.=#get_inference_world(interp::InferenceBenchmarker) = interp.world
 79 | CC.get_inference_cache(interp::InferenceBenchmarker) = interp.inf_cache
 80 | CC.code_cache(interp::InferenceBenchmarker) = WorldView(interp.code_cache, WorldRange(get_inference_world(interp)))
 81 | CC.get(wvc::WorldView{InferenceBenchmarkerCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
 82 | CC.getindex(wvc::WorldView{InferenceBenchmarkerCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
 83 | CC.haskey(wvc::WorldView{InferenceBenchmarkerCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
 84 | CC.setindex!(wvc::WorldView{InferenceBenchmarkerCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
 85 | @static if isdefined(CC, :cache_owner)
 86 | CC.cache_owner(wvc::InferenceBenchmarker) = wvc.code_cache
 87 | end
 88 | 
 89 | function inf_gf_by_type!(interp::InferenceBenchmarker, @nospecialize(tt::Type{<:Tuple}); kwargs...)
 90 |     match = Base._which(tt; world=get_inference_world(interp))
 91 |     return inf_method_signature!(interp, match.method, match.spec_types, match.sparams; kwargs...)
 92 | end
 93 | 
 94 | inf_method!(interp::InferenceBenchmarker, m::Method; kwargs...) =
 95 |     inf_method_signature!(interp, m, m.sig, method_sparams(m); kwargs...)
 96 | function method_sparams(m::Method)
 97 |     s = TypeVar[]
 98 |     sig = m.sig
 99 |     while isa(sig, UnionAll)
100 |         push!(s, sig.var)
101 |         sig = sig.body
102 |     end
103 |     return svec(s...)
104 | end
105 | inf_method_signature!(interp::InferenceBenchmarker, m::Method, @nospecialize(atype), sparams::SimpleVector; kwargs...) =
106 |     inf_method_instance!(interp, specialize_method(m, atype, sparams)::MethodInstance; kwargs...)
107 | 
108 | function inf_method_instance!(interp::InferenceBenchmarker, mi::MethodInstance;
109 |                               run_optimizer::Bool = true)
110 |     result = InferenceResult(mi)
111 |     frame = InferenceState(result, #=cache_mode=#run_optimizer ? :global : :no, interp)::InferenceState
112 |     CC.typeinf(interp, frame)
113 |     return frame
114 | end
115 | 
116 | macro inf_call(ex0...)
117 |     return gen_call_with_extracted_types_and_kwargs(__module__, :inf_call, ex0)
118 | end
119 | function inf_call(@nospecialize(f), @nospecialize(types = Base.default_tt(f));
120 |                   interp::InferenceBenchmarker = InferenceBenchmarker(),
121 |                   run_optimizer::Bool = true)
122 |     ft = Core.Typeof(f)
123 |     if isa(types, Type)
124 |         u = unwrap_unionall(types)
125 |         tt = rewrap_unionall(Tuple{ft, u.parameters...}, types)
126 |     else
127 |         tt = Tuple{ft, types...}
128 |     end
129 |     frame = inf_gf_by_type!(interp, tt; run_optimizer)
130 |     frame.bestguess !== Union{} || error("invalid inference benchmark found")
131 |     return frame
132 | end
133 | 
134 | macro abs_call(ex0...)
135 |     return gen_call_with_extracted_types_and_kwargs(__module__, :abs_call, ex0)
136 | end
137 | function abs_call(@nospecialize(f), @nospecialize(types = Base.default_tt(f));
138 |                   interp::InferenceBenchmarker = InferenceBenchmarker(; optimize = false))
139 |     return inf_call(f, types; interp)
140 | end
141 | 
142 | macro opt_call(ex0...)
143 |     return gen_call_with_extracted_types_and_kwargs(__module__, :opt_call, ex0)
144 | end
145 | function opt_call(@nospecialize(f), @nospecialize(types = Base.default_tt(f));
146 |                   interp::InferenceBenchmarker = InferenceBenchmarker())
147 |     frame = inf_call(f, types; interp, run_optimizer = false)
148 |     evals = 0
149 |     return function ()
150 |         @assert (evals += 1) <= 1
151 |         # # `optimize` may modify these objects, so need to stash the pre-optimization states, if we want to allow multiple evals
152 |         # src, stmt_info, slottypes, ssavalue_uses = copy(frame.src), copy(frame.stmt_info), copy(frame.slottypes), copy(frame.ssavalue_uses)
153 |         # cfg = copy(frame.cfg)
154 |         # unreachable = @static hasfield(InferenceState, :unreachable) ? copy(frame.unreachable) : nothing
155 |         # bb_vartables = @static hasfield(InferenceState, :bb_vartables) ? copy(frame.bb_vartables) : nothing
156 |         opt = OptimizationState(frame, interp)
157 |         CC.optimize(interp, opt, frame.result)
158 |         # frame.src, frame.stmt_info, frame.slottypes, frame.ssavalue_uses = src, stmt_info, slottypes, ssavalue_uses
159 |         # cfg === nothing || (frame.cfg = cfg)
160 |         # unreachable === nothing || (frame.unreachable = unreachable)
161 |         # bb_vartables === nothing || (frame.bb_vartables = bb_vartables)
162 |     end
163 | end
164 | 
165 | function tune_benchmarks!(
166 |     g::BenchmarkGroup;
167 |     seconds=30,
168 |     gcsample=true,
169 |     )
170 |     for v in values(g)
171 |         v.params.seconds = seconds
172 |         v.params.gcsample = gcsample
173 |         v.params.evals = 1 # `setup` must be functional
174 |     end
175 | end
176 | 
177 | # "inference" benchmark targets
178 | # =============================
179 | 
180 | # TODO add TTFP?
181 | # XXX some targets below really depends on the compiler implementation itself
182 | # (e.g. `abstract_call_gf_by_type`) and thus a bit more unreliable --  ideally
183 | # we want to replace them with other functions that have the similar characteristics
184 | # but whose call graph are orthogonal to the Julia's compiler implementation
185 | 
186 | using REPL.REPLCompletions: completions
187 | broadcasting(xs, x) = findall(>(x), abs.(xs))
188 | let # check the compilation behavior for a function with lots of local variables
189 |     # (where the sparse state management is critical to get a reasonable performance)
190 |     # see https://github.com/JuliaLang/julia/pull/45276
191 |     n = 10000
192 |     ex = Expr(:block)
193 |     var = gensym()
194 |     push!(ex.args, :($var = x))
195 |     for _ = 1:n
196 |         newvar = gensym()
197 |         push!(ex.args, :($newvar = $var + 1))
198 |         var = newvar
199 |     end
200 |     @eval global function many_local_vars(x)
201 |         $ex
202 |     end
203 | end
204 | let # benchmark the performance benefit of `CachedMethodTable`
205 |     # see https://github.com/JuliaLang/julia/pull/46535
206 |     n = 100
207 |     ex = Expr(:block)
208 |     var = gensym()
209 |     push!(ex.args, :(y = sum(x)))
210 |     for i = 1:n
211 |         push!(ex.args, :(x .= $(Float64(i))))
212 |         push!(ex.args, :(y += sum(x)))
213 |     end
214 |     push!(ex.args, :(return y))
215 |     @eval global function many_method_matches(x)
216 |         $ex
217 |     end
218 | end
219 | let # check the performance benefit of concrete evaluation
220 |     param = 1000
221 |     ex = Expr(:block)
222 |     var = gensym()
223 |     push!(ex.args, :($var = x))
224 |     for _ = 1:param
225 |         newvar = gensym()
226 |         push!(ex.args, :($newvar = sin($var)))
227 |         var = newvar
228 |     end
229 |     @eval let
230 |         sins(x) = $ex
231 |         global many_const_calls() = sins(42)
232 |     end
233 | end
234 | # check the performance benefit of caching `GlobalRef`-lookup result
235 | # see https://github.com/JuliaLang/julia/pull/46729
236 | using Core.Intrinsics: add_int
237 | const ONE = 1
238 | @eval function many_global_refs(x)
239 |     z = 0
240 |     $([:(z = add_int(x, add_int(z, ONE))) for _ = 1:10000]...)
241 |     return add_int(z, ONE)
242 | end
243 | strangesum(::Vector{Float64}) = error("this should not be called")
244 | strangesum(x::AbstractArray) = sum(x)
245 | let # check performance of invoke call handling
246 |     n = 100
247 |     ex = Expr(:block)
248 |     var = gensym()
249 |     push!(ex.args, :(y = sum(x)))
250 |     for i = 1:n
251 |         push!(ex.args, :(y += Base.@invoke strangesum(x::AbstractArray)))
252 |     end
253 |     push!(ex.args, :(return y))
254 |     @eval global function many_invoke_calls(x)
255 |         $ex
256 |     end
257 | end
258 | import Base.Experimental: @opaque
259 | let # check performance of opaque closure handling
260 |     n = 100
261 |     ex = Expr(:block)
262 |     var = gensym()
263 |     push!(ex.args, :(y = sum(x)))
264 |     for i = 1:n
265 |         push!(ex.args, :(oc = @inline @opaque (i, x, y) -> begin
266 |             x .= Float64(i)
267 |             y += sum(x)
268 |         end))
269 |         push!(ex.args, :(oc($i, x, y)))
270 |     end
271 |     push!(ex.args, :(return y))
272 |     @eval global function many_opaque_closures(x)
273 |         $ex
274 |     end
275 | end
276 | 
277 | 
278 | function run_all_benchmarks()
279 |     # abstract interpretation
280 |     @abs_call sin(42)
281 |     @abs_call rand(Float64)
282 |     abs_call(println, (QuoteNode,))
283 |     abs_call(broadcasting, (Vector{Float64},Float64))
284 |     abs_call(completions, (String,Int))
285 |     abs_call(Base.init_stdio, (Ptr{Cvoid},))
286 |     abs_call(many_local_vars, (Int,))
287 |     abs_call(many_method_matches, (Vector{Float64},))
288 |     abs_call(many_const_calls)
289 |     abs_call(many_global_refs, (Int,))
290 |     abs_call(many_invoke_calls, (Vector{Float64},))
291 |     abs_call(many_opaque_closures, (Vector{Float64},))
292 |     # optimization
293 |     @opt_call sin(42)
294 |     @opt_call rand(Float64)
295 |     opt_call(println, (QuoteNode,))
296 |     opt_call(broadcasting, (Vector{Float64},Float64))
297 |     opt_call(completions, (String,Int))
298 |     opt_call(Base.init_stdio, (Ptr{Cvoid},))
299 |     opt_call(many_local_vars, (Int,))
300 |     opt_call(many_method_matches, (Vector{Float64},))
301 |     opt_call(many_const_calls)
302 |     opt_call(many_global_refs, (Int,))
303 |     opt_call(many_invoke_calls, (Vector{Float64},))
304 |     opt_call(many_opaque_closures, (Vector{Float64},))
305 |     # all inference
306 |     @inf_call sin(42)
307 |     @inf_call rand(Float64)
308 |     inf_call(println, (QuoteNode,))
309 |     inf_call(broadcasting, (Vector{Float64},Float64))
310 |     inf_call(completions, (String,Int))
311 |     inf_call(Base.init_stdio, (Ptr{Cvoid},))
312 |     inf_call(many_local_vars, (Int,))
313 |     inf_call(many_method_matches, (Vector{Float64},))
314 |     inf_call(many_const_calls)
315 |     inf_call(many_global_refs, (Int,))
316 |     inf_call(many_invoke_calls, (Vector{Float64},))
317 |     inf_call(many_opaque_closures, (Vector{Float64},))
318 |     return nothing
319 | end
320 | 
321 | end # module InferenceBenchmarks
322 | 
323 | using .InferenceBenchmarks
324 | 
325 | @gctime InferenceBenchmarks.run_all_benchmarks()
326 | 


--------------------------------------------------------------------------------