├── project ├── build.properties ├── metals.sbt ├── plugins.sbt └── project │ ├── metals.sbt │ └── project │ └── metals.sbt ├── .vscode └── settings.json ├── src ├── main │ ├── resources │ │ ├── application.conf │ │ ├── db │ │ │ └── migration │ │ │ │ └── V1__Create_items_table.sql │ │ └── logback.xml │ ├── java.skip │ │ └── net │ │ │ └── degoes │ │ │ ├── project │ │ │ ├── dataset1 │ │ │ │ ├── Field.java │ │ │ │ ├── Row.java │ │ │ │ ├── Value.java │ │ │ │ └── Dataset.java │ │ │ ├── 09-project.java │ │ │ └── ProjectBenchmark.java │ │ │ ├── algorithms │ │ │ ├── FindMostPopularFriendBenchmark.java │ │ │ └── 00-algorithms.java │ │ │ ├── tuning │ │ │ ├── TuningBenchmark2.java │ │ │ ├── TuningBenchmark3.java │ │ │ ├── TuningBenchmark1.java │ │ │ ├── TuningBenchmark4.java │ │ │ └── 10-tuning.java │ │ │ ├── collections │ │ │ ├── IterationBenchmark.java │ │ │ ├── ConcatBenchmark.java │ │ │ ├── RandomAccessBenchmark.java │ │ │ ├── ElementPrependBenchmark.java │ │ │ ├── LookupBenchmark.java │ │ │ ├── GraduationBenchmark.java │ │ │ └── 01-collections.java │ │ │ ├── exceptions │ │ │ ├── ThrowExceptionBenchmark.java │ │ │ ├── FillInStackTraceBenchmark.java │ │ │ ├── ThrowSameExceptionBenchmark.java │ │ │ └── 05-exceptions.java │ │ │ ├── tools │ │ │ ├── PrintCompilationBenchmark.java │ │ │ ├── PrintInliningBenchmark.java │ │ │ ├── ProfilerExample.java │ │ │ ├── 08-tools.java │ │ │ └── JavapBenchmark.java │ │ │ ├── allocation │ │ │ ├── AllocBenchmark.java │ │ │ ├── CopyAllocBenchmark.java │ │ │ ├── 03-allocation.java │ │ │ └── MarkSweepBenchmark.java │ │ │ ├── gotchas │ │ │ ├── MisleadingBenchmark.java │ │ │ ├── SetupOverheadBenchmark.java │ │ │ └── 11-gotchas.java │ │ │ ├── tricks │ │ │ ├── PrimitivizeReturnBenchmark.java │ │ │ ├── NoExceptionsBenchmark.java │ │ │ ├── UseNullBenchmark.java │ │ │ ├── UseArraysBenchmark.java │ │ │ ├── FlattenProductsBenchmark.java │ │ │ ├── SpecializeBenchmark.java │ │ │ ├── DevirtualizeBenchmark.java │ │ │ ├── NoAllocationBenchmark.java │ │ │ ├── StackInterpreterBenchmark.java │ │ │ ├── MapToArrayBenchmark.java │ │ │ └── 07-tricks.java │ │ │ ├── boxing │ │ │ ├── BoxedComparatorBenchmark.java │ │ │ ├── BoxedBenchmark.java │ │ │ └── 04-boxing.java │ │ │ ├── estimation │ │ │ ├── Estimation1Benchmark.java │ │ │ ├── Estimation2Benchmark.java │ │ │ ├── Estimation3Benchmark.java │ │ │ ├── 06-estimation.java │ │ │ └── Estimation4Benchmark.java │ │ │ └── virtual │ │ │ ├── PolySimBenchmark.java │ │ │ ├── PolyBenchmark.java │ │ │ └── 02-virtual.java │ ├── scala.skip │ │ └── net │ │ │ └── degoes │ │ │ ├── welcome.scala │ │ │ ├── JmhExample.scala.skip │ │ │ ├── 05-exceptions.scala │ │ │ ├── 04-boxing.scala │ │ │ ├── 11-gotchas.scala │ │ │ ├── 00-algorithms.scala │ │ │ ├── 03-allocation.scala │ │ │ ├── 09-project.scala │ │ │ ├── 02-virtual.scala │ │ │ ├── 10-tuning.scala │ │ │ └── 01-collections.scala │ └── kotlin.skip │ │ └── net │ │ └── degoes │ │ ├── 05-exceptions.kt │ │ ├── 11-gotchas.kt │ │ ├── 04-boxing.kt │ │ ├── 00-algorithms.kt │ │ ├── 03-allocation.kt │ │ ├── 02-virtual.kt │ │ ├── 09-project.kt │ │ ├── 10-tuning.kt │ │ └── 01-collections.kt └── test │ └── scala.skip │ └── scala │ └── net │ └── degoes │ └── AlgorithmsSpec.scala ├── .sbtopts ├── .gitignore ├── .scalafmt.conf └── README.md /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.8.2 2 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.watcherExclude": { 3 | "**/target": true 4 | } 5 | } -------------------------------------------------------------------------------- /src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | api { 2 | host = "0.0.0.0" 3 | port = 8080 4 | } 5 | 6 | -------------------------------------------------------------------------------- /.sbtopts: -------------------------------------------------------------------------------- 1 | # -J-XX:+PrintFlagsFinal 2 | # -J-XX:-DoEscapeAnalysis 3 | # -J-XX:-Inline 4 | # -J-XX:-UseCompiler -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .bsp/ 2 | .idea/ 3 | target/ 4 | boot/ 5 | logs/ 6 | lib_managed/ 7 | src_managed/ 8 | project/plugins/project/ 9 | .bloop 10 | .metals -------------------------------------------------------------------------------- /src/main/resources/db/migration/V1__Create_items_table.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE items ( 2 | id serial primary key NOT NULL, 3 | name varchar NOT NULL, 4 | price numeric(21,2) NOT NULL 5 | ); 6 | -------------------------------------------------------------------------------- /project/metals.sbt: -------------------------------------------------------------------------------- 1 | // DO NOT EDIT! This file is auto-generated. 2 | 3 | // This file enables sbt-bloop to create bloop config files. 4 | 5 | addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.5.8") 6 | 7 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.3") 2 | addSbtPlugin("com.hanhuy.sbt" % "kotlin-plugin" % "2.0.0") 3 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") 4 | -------------------------------------------------------------------------------- /project/project/metals.sbt: -------------------------------------------------------------------------------- 1 | // DO NOT EDIT! This file is auto-generated. 2 | 3 | // This file enables sbt-bloop to create bloop config files. 4 | 5 | addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.5.8") 6 | 7 | -------------------------------------------------------------------------------- /project/project/project/metals.sbt: -------------------------------------------------------------------------------- 1 | // DO NOT EDIT! This file is auto-generated. 2 | 3 | // This file enables sbt-bloop to create bloop config files. 4 | 5 | addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.5.8") 6 | 7 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/project/dataset1/Field.java: -------------------------------------------------------------------------------- 1 | package net.degoes.project.dataset1; 2 | 3 | public class Field { 4 | String name; 5 | public Field(String name) { 6 | this.name = name; 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "3.7.3" 2 | runner.dialect = scala213 3 | maxColumn = 100 4 | align.preset = most 5 | continuationIndent.defnSite = 2 6 | assumeStandardLibraryStripMargin = true 7 | docstrings.style = Asterisk 8 | lineEndings = preserve 9 | includeCurlyBraceInSelectChains = false 10 | danglingParentheses.preset = true 11 | spaces { 12 | inImportCurlyBraces = true 13 | } 14 | indentOperator.preset = akka-http 15 | optIn.annotationNewlines = true 16 | 17 | rewrite.rules = [SortImports, RedundantBraces] 18 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/welcome.scala: -------------------------------------------------------------------------------- 1 | package net.degoes 2 | 3 | object welcome { 4 | // 5 | // Please join and say hello: 6 | // 7 | // CHAT ROOM: https://discord.gg/VYvFKC8 8 | // 9 | // Please git clone and build: 10 | // 11 | // REPOSITORY: https://github.com/jdegoes/jvm-perf 12 | // 13 | // Daily Schedule: 14 | // 15 | // START : 7:00 London Time 16 | // BIG BREAK : 8:45 London Time 17 | // RESUME : 9:20 London Time 18 | // END : 11:00 London Time 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/algorithms/FindMostPopularFriendBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.algorithms; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.List; 7 | import java.util.Comparator; 8 | import java.util.Optional; 9 | import java.util.Random; 10 | import java.util.stream.IntStream; 11 | import java.util.stream.Collectors; 12 | 13 | public class FindMostPopularFriendBenchmark { 14 | @Benchmark 15 | public void findMostPopularFriend(Blackhole blackHole) { 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/project/dataset1/Row.java: -------------------------------------------------------------------------------- 1 | package net.degoes.project.dataset1; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.function.BinaryOperator; 7 | import java.util.concurrent.atomic.AtomicReference; 8 | import zio.Chunk; 9 | import scala.util.Random; 10 | import io.vavr.collection.Map; 11 | import io.vavr.collection.HashMap; 12 | 13 | public class Row { 14 | Map map; 15 | 16 | public Row(Map map) { 17 | this.map = map; 18 | } 19 | 20 | Value apply(Field field) { 21 | return map.apply(field.name); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/project/09-project.java: -------------------------------------------------------------------------------- 1 | /** 2 | * GRADUATION PROJECT 3 | * 4 | * In this section, you will tie together everything you have learned in order to significantly 5 | * optimize the performance of JVM-based code. 6 | * 7 | * See project/dataset1/*.java and project/ProjectBenchmark.java 8 | * 9 | * GRADUATION PROJECT 10 | * 11 | * Develop a version of `Dataset` that has a similar API, but which is at least 10x as fast. See how 12 | * far you can push it (can you get to 100x?). 13 | * 14 | * You may assume data is completely homogeneous and that no values are null. However, if ambitious, 15 | * you may solve the same problem under the assumption of undefined values and heterogeneous data. 16 | */ -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tuning/TuningBenchmark2.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tuning; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.SECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(value = 1, jvmArgsAppend = {}) 13 | @Threads(16) 14 | public class TuningBenchmark2 { 15 | @Param({"8000000"}) 16 | int size = 0; 17 | 18 | @Benchmark 19 | public void constantHeap(Blackhole blackhole) { 20 | blackhole.consume(new byte[size]); 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tuning/TuningBenchmark3.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tuning; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(value = 1, jvmArgsAppend = {}) 13 | @Threads(16) 14 | public class TuningBenchmark3 { 15 | @Param({"4000"}) 16 | int size = 0; 17 | 18 | @Benchmark 19 | public void heapBandwidth(Blackhole blackhole) { 20 | blackhole.consume(new byte[size]); 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/collections/IterationBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.collections; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.List; 7 | import java.util.Collections; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class IterationBenchmark { 17 | @Setup(Level.Trial) 18 | public void setup() { 19 | } 20 | 21 | @Benchmark 22 | public void list(Blackhole blackhole) { 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/collections/ConcatBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.collections; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.List; 7 | import java.util.Collections; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class ConcatBenchmark { 17 | @Setup(Level.Trial) 18 | public void setup() { 19 | } 20 | 21 | @Benchmark 22 | public void list(Blackhole blackhole) { 23 | 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/collections/RandomAccessBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.collections; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.List; 7 | import java.util.Collections; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class RandomAccessBenchmark { 17 | @Setup(Level.Trial) 18 | public void setup() { 19 | } 20 | 21 | @Benchmark 22 | public void list(Blackhole blackhole) { 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/JmhExample.scala.skip: -------------------------------------------------------------------------------- 1 | package net.degoes 2 | 3 | import org.openjdk.jmh.annotations._ 4 | import org.openjdk.jmh.infra.Blackhole 5 | import java.util.concurrent.TimeUnit 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.SECONDS) 9 | @BenchmarkMode(Array(Mode.Throughput)) 10 | @Warmup(iterations = 5, time = 100, timeUnit = TimeUnit.MILLISECONDS) 11 | @Measurement(iterations = 5, time = 100, timeUnit = TimeUnit.MILLISECONDS) 12 | @Fork(1) 13 | @Threads(16) 14 | class JmhExample { 15 | 16 | @Benchmark 17 | def testMethod(blackHole: Blackhole): Double = { 18 | val list: List[Int] = List.range(1, Integer.MAX_VALUE / 100000) 19 | val sum: Double = list.sum 20 | blackHole.consume(sum) 21 | sum 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/exceptions/ThrowExceptionBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.exceptions; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(value = 1, jvmArgsAppend = {}) 13 | @Threads(16) 14 | public class ThrowExceptionBenchmark { 15 | class MyException extends Exception { 16 | public MyException(String message) { 17 | super(message); 18 | } 19 | } 20 | 21 | @Benchmark 22 | public void throwCatchException() { 23 | } 24 | 25 | @Benchmark 26 | public void constructException() { 27 | } 28 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tools/PrintCompilationBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tools; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.ArrayList; 7 | 8 | @org.openjdk.jmh.annotations.State(Scope.Thread) 9 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 10 | @BenchmarkMode({Mode.Throughput}) 11 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Fork(value = 1, jvmArgsAppend = {}) 14 | @Threads(16) 15 | public class PrintCompilationBenchmark { 16 | @Param({"10", "20"}) 17 | int depth = 0; 18 | 19 | int fib(int n) { 20 | if (n <= 1) return 1; 21 | else return fib(n - 1) + fib(n - 2); 22 | } 23 | 24 | @Benchmark 25 | public void fib(Blackhole blackhole) { 26 | blackhole.consume(fib(depth)); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/scala.skip/scala/net/degoes/AlgorithmsSpec.scala: -------------------------------------------------------------------------------- 1 | package net.degoes.algorithms 2 | 3 | import zio.test._ 4 | 5 | object AlgorithmsSpec extends ZIOSpecDefault { 6 | def spec = 7 | suite("AlgorithmsSpec")( 8 | suite("SocialNetwork")( 9 | test("findMostPopularFriend - 3 person social network") { 10 | val network = 11 | SocialNetwork( 12 | List( 13 | Person(0, 20, "John", List(1)), 14 | Person(1, 30, "Jane", List(0, 2)), 15 | Person(2, 40, "Fred", List(1)) 16 | ) 17 | ) 18 | val result = network.findMostPopularFriend 19 | 20 | assertTrue(result == Some(1)) 21 | }, 22 | test("findMostPopularFriend - random social network") { 23 | val network = SocialNetwork.random(1000, 10) 24 | val result = network.findMostPopularFriend 25 | 26 | assertTrue(result.isDefined) 27 | } 28 | ) 29 | ) 30 | 31 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/allocation/AllocBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.allocation; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | import zio.Chunk; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class AllocBenchmark { 17 | @Param({"100", "1000", "10000"}) 18 | int size = 0; 19 | 20 | @Setup 21 | public void setup() { 22 | } 23 | 24 | @Benchmark 25 | public void alloc(Blackhole blackhole) { 26 | int sum = 0; 27 | int i = 0; 28 | while (i < size) { 29 | sum = sum + (new Object().hashCode()); 30 | i = i + 1; 31 | } 32 | blackhole.consume(sum); 33 | } 34 | 35 | @Benchmark 36 | public void noAlloc(Blackhole blackhole) { 37 | } 38 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/exceptions/FillInStackTraceBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.exceptions; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(value = 1, jvmArgsAppend = {}) 13 | @Threads(16) 14 | public class FillInStackTraceBenchmark { 15 | class MyException extends Exception { 16 | public MyException(String message) { 17 | super(message); 18 | } 19 | } 20 | 21 | MyException exception = new MyException("Hello"); 22 | 23 | @Benchmark 24 | public void fillInStackTrace() { 25 | } 26 | 27 | @Benchmark 28 | public void throwCatchNewException() { 29 | try { 30 | throw new MyException("Hello"); 31 | } catch (Throwable th) { 32 | } 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/collections/ElementPrependBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.collections; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.List; 7 | import java.util.Collections; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class ElementPrependBenchmark { 17 | int PrependsPerIteration = 100; 18 | 19 | @Param({"1000", "10000", "100000"}) 20 | int size = 0; 21 | 22 | List startList = null; 23 | 24 | @Setup(Level.Trial) 25 | public void setup() { 26 | startList = List.ofAll(Collections.nCopies(size, "a")); 27 | } 28 | 29 | @Benchmark 30 | public void list(Blackhole blackhole) { 31 | blackhole.consume(startList.prepend("a")); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/exceptions/ThrowSameExceptionBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.exceptions; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(value = 1, jvmArgsAppend = {}) 13 | @Threads(16) 14 | public class ThrowSameExceptionBenchmark { 15 | class MyException extends Exception { 16 | public MyException(String message) { 17 | super(message); 18 | } 19 | } 20 | 21 | MyException exception = new MyException("Hello"); 22 | 23 | @Benchmark 24 | public void throwCatchNewException() { 25 | try { 26 | throw new MyException("Hello"); 27 | } catch (Throwable th) { 28 | } 29 | } 30 | 31 | @Benchmark 32 | public void throwCatchSameException() { 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/project/dataset1/Value.java: -------------------------------------------------------------------------------- 1 | package net.degoes.project.dataset1; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.function.BinaryOperator; 7 | import java.util.concurrent.atomic.AtomicReference; 8 | import zio.Chunk; 9 | import scala.util.Random; 10 | import io.vavr.collection.Map; 11 | import net.degoes.project.dataset1.Field; 12 | import io.vavr.collection.HashMap; 13 | 14 | public abstract class Value { 15 | static Value NA = new Value() {}; 16 | 17 | public static class Text extends Value { 18 | String value; 19 | public Text(String value) { 20 | this.value = value; 21 | } 22 | } 23 | 24 | public static class Integer extends Value { 25 | long value; 26 | public Integer(long value) { 27 | this.value = value; 28 | } 29 | } 30 | 31 | public static class Decimal extends Value { 32 | double value; 33 | public Decimal(double value) { 34 | this.value = value; 35 | } 36 | } 37 | 38 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/gotchas/MisleadingBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.gotchas; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(value = 1, jvmArgsAppend = {}) 13 | @Threads(16) 14 | public class MisleadingBenchmark { 15 | @Param({"100", "1000", "10000"}) 16 | int size = 0; 17 | 18 | Age getBoxedAge(int i) { 19 | return new Age(i); 20 | } 21 | 22 | @Benchmark 23 | public void boxed(Blackhole blackhole) { 24 | int i = 0; 25 | int sum = 0; 26 | 27 | while (i < size) { 28 | Age age = getBoxedAge(i); 29 | sum = sum + age.value; 30 | i = i + 1; 31 | } 32 | blackhole.consume(sum); 33 | } 34 | 35 | class Age { 36 | int value; 37 | Age(int value) { 38 | this.value = value; 39 | } 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tuning/TuningBenchmark1.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tuning; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(value = 1, jvmArgsAppend = {}) 13 | @Threads(16) 14 | public class TuningBenchmark1 { 15 | @Param({"10000", "1000000"}) 16 | int size = 0; 17 | 18 | @Param({"100000"}) 19 | int numberOfObjects = 0; 20 | 21 | @Benchmark 22 | public void burstHeap(Blackhole blackhole) { 23 | int iter = 0; 24 | while (iter < 4) { 25 | java.util.ArrayList junk = new java.util.ArrayList(numberOfObjects); 26 | int j = 0; 27 | while (j < numberOfObjects) { 28 | junk.add(new byte[size]); 29 | j = j + 1; 30 | } 31 | blackhole.consume(junk); 32 | iter = iter + 1; 33 | } 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/PrimitivizeReturnBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(value = 1, jvmArgsAppend = {"-XX:-Inline", "-XX:-DoEscapeAnalysis"}) 15 | @Threads(16) 16 | public class PrimitivizeReturnBenchmark { 17 | class Geolocation { 18 | boolean precise; 19 | int lat; 20 | int lng; 21 | Geolocation(boolean precise, int lat, int lng) { 22 | this.precise = precise; 23 | this.lat = lat; 24 | this.lng = lng; 25 | } 26 | } 27 | 28 | @Benchmark 29 | public void unpacked(Blackhole blackhole) { 30 | blackhole.consume(new Geolocation(true, 1, 2)); 31 | } 32 | 33 | @Benchmark 34 | public void packed(Blackhole blackhole) { 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tools/PrintInliningBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tools; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.ArrayList; 7 | 8 | @org.openjdk.jmh.annotations.State(Scope.Thread) 9 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 10 | @BenchmarkMode({Mode.Throughput}) 11 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Fork(value = 1, jvmArgsAppend = {}) 14 | @Threads(16) 15 | public class PrintInliningBenchmark { 16 | @Param({"100", "1000", "10000"}) 17 | int size = 0; 18 | 19 | Size makeSize(int i) { 20 | return new Size(i); 21 | } 22 | 23 | @Benchmark 24 | public void benchmark(Blackhole blackhole) { 25 | int i = 0; 26 | int sum = 0; 27 | while (i < size) { 28 | Size size = makeSize(i); 29 | sum = sum + size.value; 30 | i = i + 1; 31 | } 32 | blackhole.consume(sum); 33 | } 34 | 35 | class Size { 36 | int value; 37 | Size(int value) { 38 | this.value = value; 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/collections/LookupBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.collections; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.List; 7 | import java.util.Collections; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class LookupBenchmark { 17 | int Size = 1000; 18 | int IdToLookup = Size - 1; 19 | 20 | class Person { 21 | int id; 22 | int age; 23 | String name; 24 | 25 | Person(int id, int age, String name) { 26 | this.id = id; 27 | this.age = age; 28 | this.name = name; 29 | } 30 | } 31 | 32 | List peopleList = List.range(0, Size).map(i -> new Person(i, i, "Person "+i)); 33 | 34 | @Setup(Level.Trial) 35 | public void setup() { 36 | } 37 | 38 | @Benchmark 39 | public void list(Blackhole blackhole) { 40 | blackhole.consume(peopleList.find(x -> x.id == IdToLookup).get()); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/gotchas/SetupOverheadBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.gotchas; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.stream.IntStream; 7 | 8 | @State(Scope.Thread) 9 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 10 | @BenchmarkMode({Mode.Throughput}) 11 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Fork(value = 1, jvmArgsAppend = {}) 14 | @Threads(16) 15 | public class SetupOverheadBenchmark { 16 | @Param({"100", "1000"}) 17 | int maxFib = 0; 18 | 19 | @Param({"10", "100"}) 20 | int fib = 0; 21 | 22 | int fibAcc(int n, int a, int b) { 23 | if (n == 0) return a; 24 | else return fibAcc(n - 1, b, a + b); 25 | } 26 | 27 | int fib(int n) { 28 | return fibAcc(n, 0, 1); 29 | } 30 | 31 | @Benchmark 32 | public void precomputedFib(Blackhole blackhole) { 33 | int[] precomputedFib = IntStream.rangeClosed(0, maxFib).map(n -> fib(n)).toArray(); 34 | 35 | blackhole.consume(precomputedFib[fib]); 36 | } 37 | 38 | @Benchmark 39 | public void dynamicFib(Blackhole blackhole) { 40 | blackhole.consume(fib(fib)); 41 | } 42 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/collections/GraduationBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.collections; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.List; 7 | import java.util.Collections; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class GraduationBenchmark { 17 | @Param({"100", "1000", "10000"}) 18 | int size = 0; 19 | 20 | @Benchmark 21 | public void concat(Blackhole blackhole) { 22 | int i = 0; 23 | Chain c = Chain.make(1); 24 | 25 | while (i < size) { 26 | c = c.concat(c); 27 | i = i + 1; 28 | } 29 | blackhole.consume(c); 30 | } 31 | 32 | public static class Chain { 33 | public Chain concat(Chain that) { 34 | return Chain.empty(); // TODO 35 | } 36 | 37 | public static Chain empty() { 38 | return new Chain<>(); 39 | } 40 | 41 | public static Chain make(A... as) { 42 | return new Chain<>(); // TODO 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/allocation/CopyAllocBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.allocation; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.stream.IntStream; 7 | 8 | import scala.collection.JavaConverters; 9 | 10 | import zio.Chunk; 11 | 12 | @State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 14 | @BenchmarkMode({Mode.Throughput}) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Fork(1) 18 | @Threads(16) 19 | public class CopyAllocBenchmark { 20 | @Param({"100", "1000", "10000"}) 21 | int size = 0; 22 | 23 | Chunk people = null; 24 | 25 | @Setup 26 | public void setup() { 27 | people = Chunk.fromIterator(JavaConverters.asScalaIterator(IntStream.range(0, size).boxed().map(i -> new Person(i)).iterator())); 28 | } 29 | 30 | @Benchmark 31 | public void alloc() { 32 | people.map(p -> p.copy(p.age + 1)); 33 | } 34 | 35 | public static class Person { 36 | int age; 37 | 38 | public Person(int age) { 39 | this.age = age; 40 | } 41 | 42 | public static Person copy(int age) { 43 | return new Person(age); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ${application.home:-.}/logs/application.log 5 | 6 | UTF-8 7 | 8 | %d{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) %cyan(%logger{36}) %magenta(%X{akkaSource}) %msg%n 9 | 10 | 11 | 12 | 13 | 14 | true 15 | 16 | UTF-8 17 | 18 | %d{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) %cyan(%logger{36}) %magenta(%X{akkaSource}) %msg%n 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/exceptions/05-exceptions.java: -------------------------------------------------------------------------------- 1 | /** 2 | * EXCEPTIONS 3 | * 4 | * Exceptions can be a source of overhead in any case where they cease to be "exceptional" (i.e. 5 | * when they occur frequently and are expected to occur as part of the business logic). 6 | * 7 | * In this section, you will explore and isolate the overhead of exceptions. 8 | */ 9 | 10 | /** 11 | * EXERCISE 1 12 | * 13 | * Develop a benchmark to measure the overhead of throwing and catching `MyException` with a fixed 14 | * message. Compare this with the overhead of constructing a new `MyException` without throwing (or 15 | * catching) it. What can you conclude from this benchmark? 16 | */ 17 | 18 | // See exceptions/ThrowExceptionBenchmark.java 19 | 20 | /** 21 | * EXERCISE 2 22 | * 23 | * Develop a benchmark to measure the overhead of throwing and catching the same exception. Compare 24 | * this with the overhead of throwing and catching new exceptions. What can you conclude from this 25 | * comparison, together with the previous exercise? 26 | */ 27 | 28 | // See exceptions/ThrowSameExceptionBenchmark.java 29 | 30 | /** 31 | * EXERCISE 3 32 | * 33 | * Develop a benchmark to measure the overhead of calling Exception#fillInStackTrace. What can you 34 | * conclude from this benchmark? 35 | */ 36 | 37 | // See exceptions/FillInStackTraceBenchmark.java 38 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tools/ProfilerExample.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tools; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.ArrayList; 7 | import scala.util.Random; 8 | import zio.Chunk; 9 | import net.degoes.project.dataset1.*; 10 | import io.vavr.collection.Map; 11 | import io.vavr.collection.HashMap; 12 | 13 | class ProfilerExample { 14 | public static void main(String[] args) { 15 | 16 | int Size = 10_000; 17 | 18 | Random rng = new Random(0L); 19 | 20 | Field start = new net.degoes.project.dataset1.Field("start"); 21 | Field end = new Field("end"); 22 | Field netPay = new Field("netPay"); 23 | 24 | Dataset dataset = new Dataset(Chunk.fill(Size, () -> { 25 | int dstart = rng.between(0, 360); 26 | int dend = rng.between(dstart, 360); 27 | int dnetPay = rng.between(20000, 60000); 28 | 29 | return new Row( 30 | HashMap.of( 31 | "start" , new Value.Integer(dstart), 32 | "end" , new Value.Integer(dend), 33 | "netPay", new Value.Integer(dnetPay) 34 | ) 35 | ); 36 | })); 37 | 38 | long i = 0L; 39 | while (i < 1_000_000) { 40 | //(dataset.apply(start) + dataset.apply(end)) / dataset.apply(netPay); 41 | i = i + 1L; 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/boxing/BoxedComparatorBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.boxing; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(1) 13 | @Threads(16) 14 | public class BoxedComparatorBenchmark { 15 | @Param({"10", "100", "1000", "10000", "100000"}) 16 | int size = 0; 17 | 18 | int[] ints = null; 19 | 20 | @Setup 21 | public void setup() { 22 | ints = new int[size]; 23 | int i = 0; 24 | while (i < size) { 25 | ints[i] = 0; 26 | i = i + 1; 27 | } 28 | } 29 | 30 | @Benchmark 31 | public void boxed(Blackhole blackhole) { 32 | int i = 0; 33 | int sum = 0; 34 | while (i < size) { 35 | sum = sum + IntGenericComparator.compare(ints[i], 0); 36 | i = i + 1; 37 | } 38 | blackhole.consume(sum); 39 | } 40 | 41 | abstract class Comparator { 42 | abstract int compare(T l, T r); 43 | } 44 | 45 | Comparator IntGenericComparator = new Comparator() { 46 | int compare(Integer l , Integer r) { 47 | return l - r; 48 | } 49 | }; 50 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/boxing/BoxedBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.boxing; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | 7 | @State(Scope.Thread) 8 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 9 | @BenchmarkMode({Mode.Throughput}) 10 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 11 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 12 | @Fork(1) 13 | @Threads(16) 14 | public class BoxedBenchmark { 15 | @Param({"100", "1000", "10000"}) 16 | int size = 0; 17 | 18 | Boxed[] boxed = null; 19 | 20 | @Setup 21 | public void setup() { 22 | boxed = new Boxed[size]; 23 | int i = 0; 24 | while (i < size) { 25 | boxed[i] = new Boxed(0); 26 | i = i + 1; 27 | } 28 | } 29 | 30 | @Benchmark 31 | public void boxed(Blackhole blackhole) { 32 | int i = 0; 33 | int sum = 0; 34 | while (i < size) { 35 | int newValue = boxed[i].value + 1; 36 | boxed[i] = new Boxed(newValue); 37 | sum = sum + newValue; 38 | i = i + 1; 39 | } 40 | blackhole.consume(sum); 41 | } 42 | 43 | @Benchmark 44 | public void unboxed(Blackhole blackhole) { 45 | } 46 | 47 | public static class Boxed { 48 | T value; 49 | 50 | public Boxed(T value) { 51 | this.value = value; 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/NoExceptionsBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.Random; 7 | import scala.util.control.NoStackTrace; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.SECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis"}) 15 | @Threads(16) 16 | public class NoExceptionsBenchmark { 17 | Random rng = new Random(0L); 18 | 19 | @Param({"10000", "100000"}) 20 | int size = 0; 21 | 22 | class Exception1 extends Exception { 23 | String message; 24 | public Exception1(String message) { 25 | this.message = message; 26 | } 27 | 28 | public String getMessage() { 29 | return message; 30 | } 31 | } 32 | 33 | int maybeException1() throws Exception1 { 34 | if (rng.nextBoolean()) { 35 | throw new Exception1("message"); 36 | } 37 | return 42; 38 | } 39 | 40 | @Benchmark 41 | public void throwException(Blackhole blackhole) { 42 | int i = 0; 43 | while (i < size) { 44 | try { 45 | maybeException1(); 46 | } catch (Exception1 ex) { 47 | blackhole.consume(ex.message); 48 | } 49 | i = i + 1; 50 | } 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/estimation/Estimation1Benchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.estimation; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.Iterator; 7 | import java.util.stream.IntStream; 8 | import io.vavr.collection.List; 9 | 10 | @State(Scope.Thread) 11 | @OutputTimeUnit(TimeUnit.SECONDS) 12 | @BenchmarkMode({Mode.Throughput}) 13 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Fork(value = 1, jvmArgsAppend = {}) 16 | @Threads(16) 17 | public class Estimation1Benchmark { 18 | @Param({"1000", "10000"}) 19 | int size = 0; 20 | 21 | List list = null; 22 | Integer[] array = null; 23 | 24 | @Setup 25 | public void setup() { 26 | list = List.range(0, size); 27 | array = IntStream.range(0, size).boxed().toArray(Integer[]::new); 28 | } 29 | 30 | @Benchmark 31 | public void list(Blackhole blackhole) { 32 | Iterator iterator = list.iterator(); 33 | int sum = 0; 34 | 35 | while (iterator.hasNext()) { 36 | var next = iterator.next(); 37 | sum = sum + next; 38 | } 39 | blackhole.consume(sum); 40 | } 41 | 42 | @Benchmark 43 | public void array(Blackhole blackhole) { 44 | int i = 0; 45 | int sum = 0; 46 | while (i < array.length) { 47 | sum += array[i]; 48 | i = i + 1; 49 | } 50 | blackhole.consume(sum); 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/boxing/04-boxing.java: -------------------------------------------------------------------------------- 1 | /** 2 | * BOXING 3 | * 4 | * The JVM draws a sharp distinction between primitive types (such as integers, floats, and bytes) 5 | * and reference types (such as String and user-defined classes). 6 | * 7 | * Primitive types may be stored on the stack, and when they are stored on the heap (for example, as 8 | * part of a user-defined class), they are stored in a very compact form. Finally, arrays are 9 | * specialized for primitive types, which enable very compact and performant access to their 10 | * elements. 11 | * 12 | * In this section, you will explore the nature and overhead of boxing. 13 | * 14 | * EXERCISE 1 15 | * 16 | * Design a benchmark to measure the overhead of boxing. In order to be fair to the boxing 17 | * benchmark, you should design it to have a similar structure and process. The difference is that 18 | * it will not box the individual integers in an array. 19 | * 20 | * Discuss the overhead of boxing and how it compared with your initial expectations. 21 | * 22 | * See boxing/BoxedBenchmark.java 23 | * 24 | * EXERCISE 2 25 | * 26 | * Boxing is not just something that occurs with generic data structures, such as lists, sets, and 27 | * maps. It occurs also with interfaces that provide generic functionality. 28 | * 29 | * In this exercise, you will explore the cost of boxing with the Comparator interface. The 30 | * Comparator interface is a generic interface that allows you to compare two values of the same 31 | * type. Create a specialized version to see the overhead of boxing in this example. 32 | * 33 | * See boxing/BoxedComparatorBenchmark.java 34 | */ -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/UseNullBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | import java.util.function.Supplier; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.SECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis"}) 15 | @Threads(16) 16 | public class UseNullBenchmark { 17 | @Param({"10000", "1000000"}) 18 | int size = 0; 19 | 20 | abstract class Optional { 21 | Optional orElse(Supplier> that) { 22 | if (this.equals(None)) { 23 | return that.get(); 24 | } else { 25 | return this; 26 | } 27 | } 28 | } 29 | 30 | class Some extends Optional { 31 | A value; 32 | public Some(A value) { 33 | this.value = value; 34 | } 35 | } 36 | 37 | Optional None = new Optional() { 38 | 39 | }; 40 | 41 | @Benchmark 42 | public void optionals(Blackhole blackhole) { 43 | int i = 0; 44 | Optional current = new Some("a"); 45 | int cutoff = size - 10; 46 | 47 | while (i < size) { 48 | if (i > cutoff) { 49 | current = None; 50 | } else { 51 | current = current.orElse(() -> new Some("a")); 52 | } 53 | i = i + 1; 54 | } 55 | 56 | blackhole.consume(current); 57 | } 58 | 59 | @Benchmark 60 | public void nulls(Blackhole blackhole) { 61 | } 62 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/UseArraysBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.stream.Stream; 7 | import scala.util.control.NoStackTrace; 8 | import io.vavr.collection.List; 9 | import java.util.Random; 10 | 11 | @State(Scope.Thread) 12 | @OutputTimeUnit(TimeUnit.SECONDS) 13 | @BenchmarkMode({Mode.Throughput}) 14 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Fork(value = 1, jvmArgsAppend = {}) 17 | @Threads(16) 18 | public class UseArraysBenchmark { 19 | private Random rng = new Random(0L); 20 | 21 | @Param({"10000", "100000"}) 22 | int size = 0; 23 | 24 | List list = null; 25 | 26 | java.util.List builder = new java.util.ArrayList(); 27 | 28 | @Setup 29 | public void setup() { 30 | list = List.ofAll(Stream.generate(() -> rng.nextFloat()).limit(size)); 31 | builder.clear(); 32 | } 33 | 34 | @Benchmark 35 | public void list(Blackhole blackhole) { 36 | int i = 0; 37 | List current = list; 38 | float x1 = current.head(); 39 | float x2 = current.head(); 40 | builder.clear(); 41 | 42 | while (i < size) { 43 | float x3 = current.head(); 44 | 45 | builder.add((x1 + x2 + x3) / 3); 46 | 47 | current = current.tail(); 48 | i = i + 1; 49 | x1 = x2; 50 | x2 = x3; 51 | } 52 | 53 | blackhole.consume(List.ofAll(builder)); 54 | } 55 | 56 | @Benchmark 57 | public void array(Blackhole blackhole) { 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tuning/TuningBenchmark4.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tuning; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.List; 7 | import io.vavr.Tuple2; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(value = 1, jvmArgsAppend = {}) 15 | @Threads(16) 16 | public class TuningBenchmark4 { 17 | @Param({"2", "4", "8"}) 18 | int n = 0; 19 | 20 | @Benchmark 21 | public void nqueens(Blackhole blackhole) { 22 | queens(n); 23 | } 24 | 25 | boolean isAttacked(Tuple2 q1, Tuple2 q2) { 26 | return q1._1 == q2._1 || 27 | q1._2 == q2._2 || 28 | Math.abs(q2._1 - q1._1) == Math.abs(q2._2 - q1._2); 29 | } 30 | 31 | boolean isSafe(Tuple2 queen, List> others) { 32 | return others.forAll(xy -> !isAttacked(queen, xy)); 33 | } 34 | 35 | List>> placeQueens(int k) { 36 | if (k == 0) return List.of(List.empty()); 37 | else 38 | return placeQueens(k - 1).flatMap(queens -> 39 | List.range(1, n + 1).filter(column -> 40 | isSafe(new Tuple2(k, column), queens) 41 | ).map(column -> { 42 | List> result = queens.prepend(new Tuple2(k, column)); 43 | return result; 44 | }) 45 | ); 46 | 47 | } 48 | 49 | List>> queens(int n) { 50 | return placeQueens(n); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/gotchas/11-gotchas.java: -------------------------------------------------------------------------------- 1 | /** 2 | * GOTCHAS 3 | * 4 | * The JVM is a highly dynamic environment. You may believe that a benchmark shows you one thing, 5 | * when in fact the opposite may be the case in your application code. 6 | * 7 | * It is for this reason that everyone should treat the result of benchmarks and profiling data, 8 | * which can feed into a hypothesis, which can then be tested and either rejected or tenatively 9 | * accepted. 10 | * 11 | * In this section, you will see for yourself reasons to be cautious. 12 | * 13 | * EXERCISE 1 14 | * 15 | * Create an unboxed version of this benchmark, which follows the structure and flow of the boxed 16 | * version (for fairness). What do you expect to happen? What actually happens? 17 | * 18 | * Note that the results you see in this benchmark are NOT generally applicable to your application. 19 | * It would be a gross error to generalize them. 20 | * 21 | * EXERCISE 2 22 | * 23 | * Add the JVM options "-XX:-DoEscapeAnalysis", "-XX:-Inline" and re-run the benchmark. Now guess why 24 | * you see the behavior you are seeing, and come up with a modification to the benchmark that will 25 | * enable you to see the expected behavior (a modification that would accurately reflect some 26 | * application code you might write). 27 | * 28 | * See gotchas/MisleadingBenchmark.java 29 | * 30 | * EXERCISE 3 31 | * 32 | * This benchmark purports to show that precomputing fibonacci numbers is slower than just computing 33 | * them dynamically. However, the benchmark is flawed. Fix the benchmark so that it shows the 34 | * expected result. 35 | * 36 | * NOTE: In general, mistakes involving setup overhead will NOT be this easy to identify and fix. 37 | * 38 | * See gotchas/SetupOverheadBenchmark.java 39 | */ -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/project/ProjectBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.project; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.Random; 7 | import net.degoes.project.dataset1.*; 8 | import io.vavr.collection.HashMap; 9 | 10 | import zio.Chunk; 11 | 12 | @State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.SECONDS) 14 | @BenchmarkMode({Mode.Throughput}) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Fork(1) 18 | @Threads(16) 19 | public class ProjectBenchmark { 20 | @Param({"100", "1000", "10000"}) 21 | int size = 0; 22 | 23 | class benchmark1 { 24 | 25 | static Dataset dataset = null; 26 | 27 | static Field start = new Field("start"); 28 | static Field end = new Field("end"); 29 | static Field netPay = new Field("netPay"); 30 | } 31 | 32 | @Setup 33 | public void setupSlow() { 34 | Random rng = new Random(0L); 35 | 36 | benchmark1.dataset = new Dataset(Chunk.fill(size, () -> { 37 | int start = rng.between(0, 360); 38 | int end = rng.between(start, 360); 39 | int netPay = rng.between(20000, 60000); 40 | 41 | return new Row( 42 | HashMap.of( 43 | "start", new Value.Integer(start), 44 | "end", new Value.Integer(end), 45 | "netPay", new Value.Integer(netPay) 46 | ) 47 | ); 48 | })); 49 | } 50 | 51 | @Benchmark 52 | public void baseline(Blackhole blackhole) { 53 | var result = (benchmark1.dataset.apply(benchmark1.start).plus(benchmark1.dataset.apply(benchmark1.end))).divide(benchmark1.dataset.apply(benchmark1.netPay)); 54 | blackhole.consume(result); 55 | } 56 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/FlattenProductsBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | import java.util.Random; 8 | 9 | 10 | @State(Scope.Thread) 11 | @OutputTimeUnit(TimeUnit.SECONDS) 12 | @BenchmarkMode({Mode.Throughput}) 13 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis"}) 16 | @Threads(16) 17 | public class FlattenProductsBenchmark { 18 | Random rng = new Random(0L); 19 | 20 | class Billing { 21 | int startDay; 22 | int endDay; 23 | double dailyRate; 24 | 25 | public Billing(int startDay, int endDay, double dailyRate) { 26 | this.startDay = startDay; 27 | this.endDay = endDay; 28 | this.dailyRate = dailyRate; 29 | } 30 | } 31 | 32 | @Param({"10000", "100000"}) 33 | int size = 0; 34 | 35 | Billing[] billings = null; 36 | 37 | @Setup 38 | public void setup() { 39 | Random rng = new Random(0L); 40 | 41 | billings = new Billing[size]; 42 | } 43 | 44 | @Benchmark 45 | public void unflattened(Blackhole blackhole) { 46 | int i = 0; 47 | while (i < size) { 48 | Billing billing = new Billing(0, 30, 300); 49 | billings[i] = billing; 50 | blackhole.consume(billing); 51 | i = i + 1; 52 | } 53 | i = 0; 54 | double total = 0.0; 55 | while (i < size) { 56 | Billing billing = billings[i]; 57 | total = total + (billing.endDay - billing.startDay) * billing.dailyRate; 58 | i = i + 1; 59 | } 60 | blackhole.consume(total); 61 | } 62 | 63 | @Benchmark 64 | public void flattened(Blackhole blackhole) { 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/allocation/03-allocation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * ALLOCATION 3 | * 4 | * In theory, the JVM allocates by merely incrementing a pointer to the next free memory location, 5 | * making allocation extremely cheap. While mostly correct, this model of allocation is misleadingly 6 | * incomplete. 7 | * 8 | * Whatever must be allocated, must also be unallocated. In the JVM, this is the job of the garbage 9 | * collector, which must run to reclaim memory that is no longer in use. The process of garbage 10 | * collection is not free, but rather imposes significant cost on low-latency and high-performance 11 | * applications. 12 | * 13 | * In this section, you will explore the cost of allocation. 14 | * 15 | * EXERCISE 1 16 | * 17 | * Design a 'noAlloc' benchmark that attempts to follow the exact same process as the 'alloc' 18 | * benchmark, but without the allocation. 19 | * 20 | * HINT: Think about pre-allocation. 21 | * 22 | * See allocation/AllocBenchmark.java 23 | * 24 | * EXERCISE 2 25 | * 26 | * Design another 'noAlloc' benchmark that attempts to follow the exact same process as the 'alloc' 27 | * benchmark, but without the allocation. How many times faster is the no allocation benchmark? 28 | * 29 | * See allocation/CopyAllocBenchmark.java 30 | * 31 | * GRADUATION PROJECT 32 | * 33 | * In order to better understand the process of garbage collection, in this exercise, you will 34 | * implement a toy mark/sweep garbage collector. It is only a toy because (a) it only considers on 35 | * -heap objects, and (b) it does not try to encode any information about the object graph into the 36 | * linear raw memory, but rather, uses high-level data structures that are easy to work with. 37 | * 38 | * Implement the mark/sweep algorithm in the `markSweep` benchmark by iterating over all objects in 39 | * the heap twice. In the first iteration, mark all objects that are reachable from the root object. 40 | * In the second iteration, sweep all objects that are not marked. 41 | * 42 | * See allocation/MarkSweepBenchmark.java 43 | */ -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/estimation/Estimation2Benchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.estimation; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.Arrays; 7 | import io.vavr.collection.List; 8 | import java.util.stream.IntStream; 9 | 10 | @State(Scope.Thread) 11 | @OutputTimeUnit(TimeUnit.SECONDS) 12 | @BenchmarkMode({Mode.Throughput}) 13 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis", "-XX:-Inline"}) 16 | @Threads(16) 17 | public class Estimation2Benchmark { 18 | @Param({"1000", "10000"}) 19 | int size = 0; 20 | 21 | List list = null; 22 | int[] array = null; 23 | 24 | @Setup 25 | public void setup() { 26 | list = List.range(0, size); 27 | array = IntStream.range(0, size).toArray(); 28 | } 29 | 30 | int plus(int left, int right) { 31 | return left + right; 32 | } 33 | 34 | @Benchmark 35 | public void list(Blackhole blackhole) { 36 | int s = sum(list.map(i -> plus(1, i))); 37 | 38 | blackhole.consume(s); 39 | } 40 | 41 | @Benchmark 42 | public void array_boxing(Blackhole blackhole) { 43 | int s = sum(Arrays.stream(array).map(value -> { 44 | Integer newValue = IntAdder.add(value, 1); 45 | return newValue; 46 | }).toArray()); 47 | 48 | blackhole.consume(s); 49 | } 50 | 51 | int sum(List list) { 52 | int sum = 0; 53 | List cur = list; 54 | while (!cur.isEmpty()) { 55 | sum += cur.head(); 56 | 57 | cur = cur.tail(); 58 | } 59 | return sum; 60 | } 61 | int sum(int[] array) { 62 | int sum = 0; 63 | int i = 0; 64 | int len = array.length; 65 | while (i < len) { 66 | sum += array[i]; 67 | i = i + 1; 68 | } 69 | return sum; 70 | } 71 | 72 | static abstract class Adder { 73 | abstract A add(A left, A right); 74 | } 75 | 76 | Adder IntAdder = new Adder() { 77 | Integer add(Integer left, Integer right) { 78 | return left + right; 79 | } 80 | }; 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/estimation/Estimation3Benchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.estimation; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.Random; 7 | import java.util.function.IntPredicate; 8 | import java.util.stream.IntStream; 9 | import java.util.stream.Stream; 10 | import java.util.Arrays; 11 | 12 | @State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.SECONDS) 14 | @BenchmarkMode({Mode.Throughput}) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis", "-XX:-Inline"}) 18 | @Threads(16) 19 | public class Estimation3Benchmark { 20 | Random rng = new Random(0L); 21 | 22 | @Param({"1000", "10000"}) 23 | int size = 0; 24 | 25 | String[] maybeInts = null; 26 | 27 | @Setup 28 | public void setup() { 29 | maybeInts = IntStream.range(0, size) 30 | .mapToObj(i -> rng.nextBoolean()?Integer.toString(i):(Integer.toString(i) + "haha")) 31 | .toArray(String[]::new); 32 | } 33 | 34 | boolean isInt(String s) { 35 | try { 36 | Integer.parseInt(s); 37 | } catch (NumberFormatException e) { 38 | return false; 39 | } 40 | return true; 41 | } 42 | 43 | @Benchmark 44 | public void checkInt1(Blackhole blackhole) { 45 | int i = 0; 46 | int ints = 0; 47 | 48 | while (i < maybeInts.length) { 49 | if (isInt(maybeInts[i])) { 50 | ints += 1; 51 | } 52 | i = i + 1; 53 | } 54 | blackhole.consume(ints); 55 | } 56 | 57 | IntPredicate isDigit = ch -> Character.isDigit(ch); 58 | 59 | boolean isInt2(String s) { 60 | int i = 0; 61 | int len = s.length(); 62 | while (i < len) { 63 | if (!isDigit.test(s.charAt(i))) return false; 64 | i += 1; 65 | } 66 | return true; 67 | } 68 | 69 | @Benchmark 70 | public void checkInt2(Blackhole blackhole) { 71 | 72 | int i = 0; 73 | int ints = 0; 74 | 75 | while (i < maybeInts.length) { 76 | if (isInt2(maybeInts[i])) ints += 1; 77 | i = i + 1; 78 | } 79 | blackhole.consume(ints); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/SpecializeBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | import java.util.Collections; 8 | import java.util.Arrays; 9 | 10 | @State(Scope.Thread) 11 | @OutputTimeUnit(TimeUnit.SECONDS) 12 | @BenchmarkMode({Mode.Throughput}) 13 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Fork(value = 1, jvmArgsAppend = {}) 16 | @Threads(16) 17 | public class SpecializeBenchmark { 18 | @Param({"10000", "100000"}) 19 | int size = 0; 20 | 21 | GenericTree genericTree = null; 22 | 23 | @Setup 24 | public void setupGenericTree() { 25 | int count = (int) Math.sqrt(size); 26 | 27 | GenericTree[] leaves = (GenericTree[]) new GenericTree[count]; 28 | Arrays.fill(leaves, new Leaf(0)); 29 | GenericTree current = new Branch(leaves); 30 | 31 | int i = 0; 32 | while (i < count) { 33 | GenericTree[] newArray = new GenericTree[1]; 34 | newArray[0] = current; 35 | current = new Branch(newArray); 36 | i = i + 1; 37 | } 38 | 39 | genericTree = current; 40 | } 41 | 42 | int loop(GenericTree tree) { 43 | if (tree instanceof Leaf) { 44 | return ((Leaf) tree).value; 45 | } else { 46 | GenericTree[] children = ((Branch) tree).children; 47 | 48 | int sum = 0; 49 | int i = 0; 50 | while (i < children.length) { 51 | sum = sum + loop(children[i]); 52 | i = i + 1; 53 | } 54 | return sum; 55 | } 56 | } 57 | 58 | @Benchmark 59 | public void genericTree(Blackhole blackhole) { 60 | blackhole.consume(loop(genericTree)); 61 | } 62 | 63 | @Benchmark 64 | public void intTree(Blackhole blackhole) { 65 | } 66 | 67 | abstract class GenericTree { 68 | } 69 | 70 | class Leaf extends GenericTree { 71 | A value; 72 | Leaf(A value) { 73 | this.value = value; 74 | } 75 | } 76 | 77 | class Branch extends GenericTree { 78 | GenericTree[] children; 79 | Branch(GenericTree[] children) { 80 | this.children = children; 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/virtual/PolySimBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.virtual; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import io.vavr.collection.HashMap; 7 | 8 | import zio.Chunk; 9 | 10 | @State(Scope.Thread) 11 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 12 | @BenchmarkMode({Mode.Throughput}) 13 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Fork(1) 16 | @Threads(16) 17 | public class PolySimBenchmark { 18 | JVMObject obj = 19 | new JVMObject(1, new JVMClassMetadata("Dog", HashMap.of(new JVMMethod("Dog", "bark"), new Address(0)))); 20 | Bytecode.InvokeStatic is = new Bytecode.InvokeStatic(new Address(0)); 21 | Bytecode.InvokeVirtual iv = new Bytecode.InvokeVirtual(new JVMMethod("Dog", "bark")); 22 | 23 | @Benchmark 24 | public void invokeStatic(Blackhole blackhole) { 25 | blackhole.consume(is.address.value); 26 | } 27 | 28 | static class JVMObject { 29 | Object data; 30 | JVMClassMetadata meta; 31 | 32 | public JVMObject(Object data, JVMClassMetadata meta) { 33 | this.data = data; 34 | this.meta = meta; 35 | } 36 | } 37 | 38 | public static class JVMClassMetadata { 39 | String clazz; 40 | HashMap vtable; 41 | 42 | public JVMClassMetadata(String clazz, HashMap vtable) { 43 | this.clazz = clazz; 44 | this.vtable = vtable; 45 | } 46 | } 47 | 48 | public static class JVMMethod { 49 | String clazz; 50 | String name; 51 | 52 | public JVMMethod(String clazz, String name) { 53 | this.clazz = clazz; 54 | this.name = name; 55 | } 56 | } 57 | 58 | public static class Address { 59 | int value; 60 | 61 | public Address(int value) { 62 | this.value = value; 63 | } 64 | } 65 | 66 | public static class Bytecode { 67 | static class InvokeStatic extends Bytecode { 68 | Address address; 69 | 70 | public InvokeStatic(Address address) { 71 | this.address = address; 72 | } 73 | } 74 | 75 | static class InvokeVirtual extends Bytecode { 76 | JVMMethod method; 77 | 78 | public InvokeVirtual(JVMMethod method) { 79 | this.method = method; 80 | } 81 | } 82 | } 83 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/DevirtualizeBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | import java.util.Random; 8 | import java.util.stream.Stream; 9 | 10 | @State(Scope.Thread) 11 | @OutputTimeUnit(TimeUnit.SECONDS) 12 | @BenchmarkMode({Mode.Throughput}) 13 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis"}) 16 | @Threads(16) 17 | public class DevirtualizeBenchmark { 18 | Random rng = new Random(0L); 19 | 20 | @Param({"10000", "100000"}) 21 | int size = 0; 22 | 23 | Op[] virtual_ops = null; 24 | 25 | @Setup 26 | public void setup() { 27 | Random rng = new Random(0L); 28 | virtual_ops = Stream.generate(() -> { 29 | Op result = null; 30 | switch (rng.nextInt(6)) { 31 | case 0: 32 | result = Inc; 33 | break; 34 | case 1: 35 | result = Dec; 36 | break; 37 | case 2: 38 | result = Mul2; 39 | break; 40 | case 3: 41 | result = Div2; 42 | break; 43 | case 4: 44 | result = Neg; 45 | break; 46 | case 5: 47 | result = Abs; 48 | break; 49 | } 50 | return result; 51 | }).limit(size).toArray(Op[]::new); 52 | } 53 | 54 | @Benchmark 55 | public void virtualized(Blackhole blackhole) { 56 | int current = 0; 57 | int i = 0; 58 | while (i < size) { 59 | Op op = virtual_ops[i]; 60 | current = op.apply(current); 61 | i = i + 1; 62 | } 63 | } 64 | 65 | @Benchmark 66 | public void devirtualized(Blackhole blackhole) { 67 | } 68 | 69 | abstract class Op { 70 | abstract int apply(int x); 71 | } 72 | 73 | Op Inc = new Op() { 74 | int apply(int x) { return x + 1; } 75 | }; 76 | Op Dec = new Op() { 77 | int apply(int x) { return x - 1; } 78 | }; 79 | Op Mul2 = new Op() { 80 | int apply(int x) { return x * 2; } 81 | }; 82 | Op Div2 = new Op() { 83 | int apply(int x) { return x / 2; } 84 | }; 85 | Op Neg = new Op() { 86 | int apply(int x) { return -x; } 87 | }; 88 | Op Abs = new Op() { 89 | int apply(int x) { return Math.abs(x); } 90 | }; 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/estimation/06-estimation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * ESTIMATION 3 | * 4 | * Nothing can replace benchmarking and profiling. However, over time, you can gain an intuition 5 | * about how quickly or slowly code might execute compared to its theoretical optimum. This is 6 | * especially true both as you come to appreciate the different features that combine to reduce the 7 | * performance of code, as well as learn to notice where those different features are introduced 8 | * into your application through use of high-level language features and libraries. 9 | * 10 | * In this section, you will work on your skills of estimation as you work through a variety of 11 | * benchmarks. As you will see, your estimation, even if honed properly, does not always correspond 12 | * to the performance reality on the JVM. 13 | * 14 | * EXERCISE 1 15 | * 16 | * Study both benchmarks and estimate which one you believe will execute more quickly. Then run the 17 | * benchmark. If the results match your expectations, then try to explain why that might be the 18 | * case. If the results do not match your expectations, then hypothesize and test until you can come 19 | * up with an explanation for why. 20 | * 21 | * See estimation/Estimation1Benchmark.java 22 | * 23 | * EXERCISE 2 24 | * 25 | * Study both benchmarks and estimate which one you believe will execute more quickly. Then run the 26 | * benchmark. If the results match your expectations, then try to explain why that might be the 27 | * case. If the results do not match your expectations, then hypothesize and test until you can come 28 | * up with an explanation for why. 29 | * 30 | * See estimation/Estimation2Benchmark.java 31 | * 32 | * EXERCISE 3 33 | * 34 | * Study both benchmarks and estimate which one you believe will execute more quickly. Then run the 35 | * benchmark. If the results match your expectations, then try to explain why that might be the 36 | * case. If the results do not match your expectations, then hypothesize and test until you can come 37 | * up with an explanation for why. 38 | * 39 | * See estimation/Estimation3Benchmark.java 40 | * 41 | * EXERCISE 4 42 | * 43 | * Study both benchmarks and estimate which one you believe will execute more quickly. Then run the 44 | * benchmark. If the results match your expectations, then try to explain why that might be the 45 | * case. If the results do not match your expectations, then hypothesize and test until you can come 46 | * up with an explanation for why. 47 | * 48 | * See estimation/Estimation4Benchmark.java 49 | */ 50 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/virtual/PolyBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.virtual; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.Random; 7 | import java.util.stream.Stream; 8 | import scala.collection.JavaConverters; 9 | 10 | import zio.Chunk; 11 | 12 | @org.openjdk.jmh.annotations.State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 14 | @BenchmarkMode({Mode.Throughput}) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Fork(1) 18 | @Threads(16) 19 | public class PolyBenchmark { 20 | @Param({"1000", "10000", "100000"}) 21 | int size = 0; 22 | 23 | Chunk poly_operators = null; 24 | // Chunk mono_operators = null; 25 | 26 | @Setup 27 | public void setupPoly() { 28 | poly_operators = Operator.randomN(size); 29 | } 30 | 31 | @Benchmark 32 | public void poly(Blackhole blackhole) { 33 | int i = 0; 34 | int result = 0; 35 | 36 | while (i < size) { 37 | Operator operator = poly_operators.apply(i); 38 | 39 | result = operator.apply(result, i + 1); 40 | 41 | i = i + 1; 42 | } 43 | blackhole.consume(result); 44 | } 45 | 46 | interface Operator { 47 | int apply(int l, int r); 48 | 49 | // Deterministic RNG: 50 | static Random rng = new Random(0L); 51 | 52 | static Operator Plus = new Operator() { 53 | public int apply(int l, int r) { 54 | return l + r; 55 | } 56 | }; 57 | 58 | static Operator Times = new Operator() { 59 | public int apply(int l, int r) { 60 | return l + r; 61 | } 62 | }; 63 | 64 | static Operator DividedBy = new Operator() { 65 | public int apply(int l, int r) { 66 | return l + r; 67 | } 68 | }; 69 | 70 | static Operator Max = new Operator() { 71 | public int apply(int l, int r) { 72 | return l + r; 73 | } 74 | }; 75 | 76 | static Operator Min = new Operator() { 77 | public int apply(int l, int r) { 78 | return l + r; 79 | } 80 | }; 81 | 82 | static Operator[] All = {Plus, Times, DividedBy, Max, Min}; 83 | 84 | static Operator random() { 85 | return All[rng.nextInt(All.length)]; 86 | } 87 | 88 | static Chunk randomN(int n) { 89 | return Chunk.fromIterator(JavaConverters.asScalaIterator(Stream.generate(() -> random()).limit(n).iterator())); 90 | } 91 | } 92 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/allocation/MarkSweepBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.allocation; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.Random; 7 | import java.util.Arrays; 8 | 9 | @State(Scope.Thread) 10 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 11 | @BenchmarkMode({Mode.Throughput}) 12 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 13 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 14 | @Fork(1) 15 | @Threads(16) 16 | public class MarkSweepBenchmark { 17 | Random rng = new Random(0L); 18 | 19 | int ObjSize = 10; 20 | 21 | @Param({"1000", "10000", "100000"}) 22 | int size = 0; 23 | 24 | Heap heap = null; 25 | Obj[] rootObjects = null; 26 | 27 | // @Setup 28 | // public void setup() { 29 | // Obj[] objects = new Obj[size]; 30 | // int i = 0; 31 | // while (i < size) { 32 | // Data.Integer[] data = new Data.Integer[ObjSize]; 33 | // int j = 0; 34 | 35 | // while (j < ObjSize) { 36 | // data[j] = new Data.Integer(0); 37 | // j = j + 1; 38 | // } 39 | 40 | // objects[i] = new Obj(false, data); 41 | // i = i + 1; 42 | // } 43 | 44 | // heap = new Heap(objects); 45 | 46 | // while (i < size) { 47 | // Obj obj = heap.objects[i]; 48 | // int j = 0; 49 | // while (j < ObjSize) { 50 | // if (rng.nextBoolean()) { 51 | // int pointerObjIndex = rng.nextInt(size); 52 | // obj.data[j] = new Data.Pointer(heap.objects[pointerObjIndex]); 53 | // } 54 | // j = j + 1; 55 | // } 56 | 57 | // i = i + 1; 58 | // } 59 | 60 | // rootObjects = Arrays.copyOfRange(objects, 0, 10); 61 | // } 62 | 63 | @Benchmark 64 | public void markSweep(Blackhole blackhole) { 65 | } 66 | 67 | public static class Data { 68 | public static class Integer extends Data { 69 | int value; 70 | public Integer(int value) { 71 | this.value = value; 72 | } 73 | } 74 | 75 | public static class Pointer extends Data { 76 | Obj value; 77 | public Pointer(Obj value) { 78 | this.value = value; 79 | } 80 | } 81 | } 82 | 83 | static class Obj { 84 | boolean marked; 85 | Data[] data; 86 | 87 | public Obj(boolean marked, Data[] data) { 88 | this.marked = marked; 89 | this.data = data; 90 | } 91 | } 92 | 93 | static class Heap { 94 | Obj[] objects; 95 | public Heap(Obj[] objects) { 96 | this.objects = objects; 97 | } 98 | } 99 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tools/08-tools.java: -------------------------------------------------------------------------------- 1 | /** 2 | * TOOLS 3 | * 4 | * JMH is an incredibly useful tool for benchmarking and optimizing code. However, although JMH is 5 | * qute useful, it cannot tell you why your code is slow or tell you which parts of your code you 6 | * should benchmark. 7 | * 8 | * In this section, you will explore several tools that you can use both to help identify 9 | * performance bottlenecks, as well as to understand why an identified section of code is slow. 10 | * 11 | * EXERCISE 1 12 | * 13 | * Use the flag "-XX:+PrintCompilation" to print out the JIT compilation of the benchmark. Is the 14 | * `fib` method compiled to native code by HotSpot? 15 | * 16 | * See tools/PrintCompilationBenchmark.java 17 | * 18 | * EXERCISE 2 19 | * 20 | * Use the flag "-XX:+PrintInlining" (together with "-XX:+UnlockDiagnosticVMOptions") to print out 21 | * the inlining of the benchmark. 22 | * 23 | * Is the `makeSize` method inlined by HotSpot? 24 | * 25 | * See tools/PrintInliningBenchmark.java 26 | * 27 | * EXERCISE 3 28 | * 29 | * Profilers can be incredibly useful for identifying performance bottlenecks. Even though it is 30 | * hard to optimize against a profiling, a profiler can help you identify the most expensive 31 | * sections of code (in terms of CPU or memory), which you can then benchmark and optimize. 32 | * 33 | * In this exercise, you will take your benchmark tool of choice to identify performance bottlenecks 34 | * in the provided code. You can use this information in the next module. 35 | * 36 | * See tools/ProfilerExample.java 37 | * 38 | * GRADUATION PROJECT 39 | * 40 | * Sometimes, you need to see something closer to the raw bytecode that your compiler generates. 41 | * This is especially true when you are using higher-level languages like Kotlin, Scala, and 42 | * Clojure, because these languages have features that do not map directly to JVM bytecode. 43 | * 44 | * In order to do this, you can use the `javap` method with the following flags: 45 | * 46 | * - `-c` prints out the bytecode 47 | * - `-l` prints out line numbers 48 | * - `-p` prints out private methods 49 | * - `-s` prints out internal type signatures 50 | * - `-v` prints out verbose information 51 | * 52 | * In this exercise, you will use `javap` to see the bytecode generated by the Scala compiler for 53 | * the provided benchmark. Walk through the reverse-engineered code and try to understand any 54 | * sources of inefficiency that you see. Revise the inefficient code until `javap` shows you cleanly 55 | * generated code that you would expect to be fast. 56 | * 57 | * See tools/JavapBenchmark.java 58 | */ -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/estimation/Estimation4Benchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.estimation; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.Arrays; 7 | import java.util.stream.IntStream; 8 | import java.util.function.IntUnaryOperator; 9 | 10 | 11 | @State(Scope.Thread) 12 | @OutputTimeUnit(TimeUnit.SECONDS) 13 | @BenchmarkMode({Mode.Throughput}) 14 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 15 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis", "-XX:-Inline"}) 17 | @Threads(16) 18 | public class Estimation4Benchmark { 19 | @Param({"1000", "10000"}) 20 | int size = 0; 21 | 22 | IntUnaryOperator[] Adders = {i -> i + 1, i -> i + 2, i -> i + 3, i -> i + 4, i -> i + 5}; 23 | 24 | IntUnaryOperator[] operations1 = null; 25 | ElementChanger[] operations2 = null; 26 | IntegerChanger[] operations3 = null; 27 | 28 | @Setup 29 | public void setup() { 30 | operations1 = IntStream.range(0, size).mapToObj(index -> Adders[index % Adders.length]).toArray(IntUnaryOperator[]::new); 31 | operations2 = new ElementChanger[size]; 32 | Arrays.fill(operations2, Adder); 33 | operations3 = new IntegerChanger[size]; 34 | Arrays.fill(operations3, Adder2); 35 | } 36 | 37 | @Benchmark 38 | public void ops1(Blackhole blackhole) { 39 | int i = 0; 40 | int result = 0; 41 | while (i < size) { 42 | IntUnaryOperator op = operations1[i]; 43 | result = op.applyAsInt(result); 44 | i = i + 1; 45 | } 46 | blackhole.consume(result); 47 | } 48 | 49 | @Benchmark 50 | public void ops2(Blackhole blackhole) { 51 | int i = 0; 52 | int result = 0; 53 | while (i < size) { 54 | var op = operations2[i]; 55 | result = op.change(result); 56 | i = i + 1; 57 | } 58 | blackhole.consume(result); 59 | } 60 | 61 | @Benchmark 62 | public void ops3(Blackhole blackhole) { 63 | int i = 0; 64 | int result = 0; 65 | while (i < size) { 66 | var op = operations3[i]; 67 | result = op.change(result); 68 | i = i + 1; 69 | } 70 | blackhole.consume(result); 71 | } 72 | 73 | @FunctionalInterface 74 | interface ElementChanger { 75 | abstract T change(T t); 76 | } 77 | ElementChanger Adder = i -> i + 1; 78 | 79 | abstract class IntegerChanger { 80 | abstract int change(int t); 81 | } 82 | 83 | IntegerChanger Adder2 = new IntegerChanger() { 84 | int change(int t) { 85 | return t + 1; 86 | } 87 | }; 88 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/virtual/02-virtual.java: -------------------------------------------------------------------------------- 1 | /** 2 | * VIRTUAL DISPATCH 3 | * 4 | * Surprisingly, not all methods are equal: calling some methods can be quite fast, and calling 5 | * other methods can be dangerously slow, even if their implementations are *exactly* the same. 6 | * 7 | * This surprising fact is due to the way that object-oriented languages implement polymorphism. 8 | * Polymorphism allows us to write code that is generic over a type. For example, we might have some 9 | * business logic that can work with any key/value store, whether backed by a database, an in-memory 10 | * hash map, or a cloud API. 11 | * 12 | * In object-oriented programming languages, we achieve this type of polymorphism with inheritance, 13 | * and then implementing or overriding methods in a subtype. 14 | * 15 | * In this section, you will learn more about how this works, its impact on performance, and 16 | * potential workarounds for performance sensitive code. 17 | * 18 | * EXERCISE 1 19 | * 20 | * Every method invocation potentially goes through virtual dispatch, which is a process involving 21 | * looking up which concrete non-final method invocation is potentially a virtual dispatch. 22 | * 23 | * In this exercise, you will explore the cost of virtual dispatch. The current benchmark creates a 24 | * chunk of operators, each one of which is a random operator chosen from among the provided set. At 25 | * runtime, the JVM does not know which element of the chunk has which concrete type, so it must 26 | * lookup the correct method to invoke on an object-by-object basis. This results in lower 27 | * performance. 28 | * 29 | * Augment this benchmark with another benchmark, which uses another chunk, where every element of 30 | * the chunk uses the same concrete operator (e.g. Operator.DividedBy.type). In your new benchmark, 31 | * because the JVM knows the concrete type of the object, when it invokes the apply method, it knows 32 | * exactly where the code for that function is, and does not need to perform a preliminary lookup. 33 | * This should result in faster performance. 34 | * 35 | * See virtual/PolyBenchmark.java 36 | * 37 | * EXERCISE 2 38 | * 39 | * In this exercise, you will simulate the cost of a virtual dispatch by creating a benchark that 40 | * must lookup the correct method based on the virtual method table stored together with the data 41 | * for an object. 42 | * 43 | * Create an invokeVirtual benchmark that uses `obj.meta` to find the address of the method to be 44 | * invoked. Compare the performance of this benchmark to the invokeStatic benchmark. 45 | * 46 | * Note that this benchmark is not that realistic. There is no hash map lookup with invoke dynamic. 47 | * Nonetheless, getting a feel for the extra work the JVM must do to perform a virtual dispatch is 48 | * useful. 49 | */ 50 | 51 | // See virtual/PolySimBenchmark.java -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/05-exceptions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * EXCEPTIONS 3 | * 4 | * Exceptions can be a source of overhead in any case where they cease to be "exceptional" (i.e. 5 | * when they occur frequently and are expected to occur as part of the business logic). 6 | * 7 | * In this section, you will explore and isolate the overhead of exceptions. 8 | */ 9 | package net.degoes.exceptions 10 | 11 | import org.openjdk.jmh.annotations._ 12 | import org.openjdk.jmh.infra.Blackhole 13 | import java.util.concurrent.TimeUnit 14 | 15 | /** 16 | * EXERCISE 1 17 | * 18 | * Develop a benchmark to measure the overhead of throwing and catching `MyException` with a fixed 19 | * message. Compare this with the overhead of constructing a new `MyException` without throwing (or 20 | * catching) it. What can you conclude from this benchmark? 21 | */ 22 | @State(Scope.Thread) 23 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 24 | @BenchmarkMode(Array(Mode.Throughput)) 25 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 26 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 27 | @Fork(value = 1, jvmArgsAppend = Array()) 28 | @Threads(16) 29 | class ThrowExceptionBenchmark { 30 | case class MyException(message: String) extends Exception(message) 31 | 32 | @Benchmark 33 | def throwCatchException(): Unit = ??? 34 | 35 | @Benchmark 36 | def constructException(): Unit = ??? 37 | } 38 | 39 | /** 40 | * EXERCISE 2 41 | * 42 | * Develop a benchmark to measure the overhead of throwing and catching the same exception. Compare 43 | * this with the overhead of throwing and catching new exceptions. What can you conclude from this 44 | * comparison, together with the previous exercise? 45 | */ 46 | @State(Scope.Thread) 47 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 48 | @BenchmarkMode(Array(Mode.Throughput)) 49 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 50 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 51 | @Fork(value = 1, jvmArgsAppend = Array()) 52 | @Threads(16) 53 | class ThrowSameExceptionBenchmark { 54 | case class MyException(message: String) extends Exception(message) 55 | 56 | val exception = MyException("Hello") 57 | 58 | @Benchmark 59 | def throwCatchNewException(): Unit = try 60 | throw MyException("Hello") 61 | catch { case _: Throwable => () } 62 | 63 | @Benchmark 64 | def throwCatchSameException(): Unit = ??? // TODO 65 | } 66 | 67 | /** 68 | * EXERCISE 3 69 | * 70 | * Develop a benchmark to measure the overhead of calling Exception#fillInStackTrace. What can you 71 | * conclude from this benchmark? 72 | */ 73 | @State(Scope.Thread) 74 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 75 | @BenchmarkMode(Array(Mode.Throughput)) 76 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 77 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 78 | @Fork(value = 1, jvmArgsAppend = Array()) 79 | @Threads(16) 80 | class FillInStackTraceBenchmark { 81 | case class MyException(message: String) extends Exception(message) 82 | 83 | val exception = MyException("Hello") 84 | 85 | @Benchmark 86 | def fillInStackTrace(): Unit = ??? // TODO 87 | 88 | @Benchmark 89 | def throwCatchNewException(): Unit = try 90 | throw MyException("Hello") 91 | catch { case _: Throwable => () } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/05-exceptions.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * EXCEPTIONS 3 | * 4 | * Exceptions can be a source of overhead in any case where they cease to be "exceptional" (i.e. 5 | * when they occur frequently and are expected to occur as part of the business logic). 6 | * 7 | * In this section, you will explore and isolate the overhead of exceptions. 8 | */ 9 | package net.degoes.exceptions 10 | 11 | import org.openjdk.jmh.annotations.* 12 | import org.openjdk.jmh.infra.Blackhole 13 | import java.util.concurrent.TimeUnit 14 | 15 | /** 16 | * EXERCISE 1 17 | * 18 | * Develop a benchmark to measure the overhead of throwing and catching `MyException` with a fixed 19 | * message. Compare this with the overhead of constructing a new `MyException` without throwing (or 20 | * catching) it. What can you conclude from this benchmark? 21 | */ 22 | @State(Scope.Thread) 23 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 24 | @BenchmarkMode(Mode.Throughput) 25 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 26 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 27 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 28 | @Threads(16) 29 | open class ThrowExceptionBenchmark { 30 | data class MyException(override val message: String) : Exception(message) 31 | 32 | @Benchmark 33 | fun throwCatchException(): Unit { 34 | } 35 | 36 | @Benchmark 37 | fun constructException(): Unit { 38 | } 39 | } 40 | 41 | /** 42 | * EXERCISE 2 43 | * 44 | * Develop a benchmark to measure the overhead of throwing and catching the same exception. Compare 45 | * this with the overhead of throwing and catching new exceptions. What can you conclude from this 46 | * comparison, together with the previous exercise? 47 | */ 48 | @State(Scope.Thread) 49 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 50 | @BenchmarkMode(Mode.Throughput) 51 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 52 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 53 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 54 | @Threads(16) 55 | open class ThrowSameExceptionBenchmark { 56 | data class MyException(override val message: String) : Exception(message) 57 | 58 | val exception = MyException("Hello") 59 | 60 | @Benchmark 61 | fun throwCatchNewException(): Unit { 62 | try { throw MyException("Hello") } 63 | catch (th: Throwable) {} 64 | } 65 | 66 | @Benchmark 67 | fun throwCatchSameException(): Unit {} // TODO 68 | } 69 | 70 | /** 71 | * EXERCISE 3 72 | * 73 | * Develop a benchmark to measure the overhead of calling Exception#fillInStackTrace. What can you 74 | * conclude from this benchmark? 75 | */ 76 | @State(Scope.Thread) 77 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 78 | @BenchmarkMode(Mode.Throughput) 79 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 80 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 81 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 82 | @Threads(16) 83 | open class FillInStackTraceBenchmark { 84 | data class MyException(override val message: String) : Exception(message) 85 | 86 | val exception = MyException("Hello") 87 | 88 | @Benchmark 89 | fun fillInStackTrace(): Unit {} // TODO 90 | 91 | @Benchmark 92 | fun throwCatchNewException(): Unit { 93 | try { 94 | throw MyException("Hello") 95 | } catch (th: Throwable) {} 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | The JVM is a rock-solid, battle-proven platform for developing and deploying applications. Thousands of engineering years have been invested into the facilities available on it, resulting in sophisticated garbage collection and just-in-time compilation mechanisms. The interaction between this platform and our applications is nuanced and often misunderstood. 4 | 5 | In this 5-day workshop, the participants will learn about the two major runtime mechanisms of the JVM affecting performance - the JIT compiler and the garbage collector; techniques for writing performant JVM code and effective use of tools for analyzing performance. 6 | 7 | ## Who Should Attend 8 | 9 | Engineers, SREs and tech leads responsible for production JVM applications. 10 | 11 | ## Prerequisites 12 | 13 | Basic knowledge of the JVM; basic experience running JVM applications in production. 14 | 15 | ## Topics 16 | 17 | - Overview of the JVM 18 | - Garbage collection on the JVM 19 | - Types of garbage collectors and choosing a garbage collector for a workload 20 | - Analyzing allocations in JVM code 21 | - Writing JVM code that is lean on allocations 22 | - Analyzing bytecode to pinpoint allocations 23 | - Monitoring garbage collector performance at runtime 24 | - Understanding JIT compilation on the JVM 25 | - Benchmarking code using JMH 26 | - Analyzing JIT behaviour at runtime 27 | - Writing JIT-friendly JVM code 28 | 29 | ## Daily Structure 30 | 31 | 5 days, 4 hours a day starting. 32 | 33 | ## Attendance 34 | 35 | Attendance at this workshop is fully remote. Attendees will be provided with a link to a remote meeting session the day before the event, in which they can see and hear the workshop, ask the instructor questions, and chat with other attendees. 36 | 37 | ## Usage 38 | 39 | ### From the UI 40 | 41 | 1. Download the repository as a [zip archive](https://github.com/jdegoes/jvm-perf/archive/master.zip). 42 | 2. Unzip the archive, usually by double-clicking on the file. 43 | 3. Configure the source code files in the IDE or text editor of your choice. 44 | 45 | ### From the Command Line 46 | 47 | 1. Open up a terminal window. 48 | 49 | 2. Clone the repository. 50 | 51 | ```bash 52 | git clone https://github.com/jdegoes/jvm-perf 53 | ``` 54 | 3. Enable your target language, `scala`, `kotlin` or `java`: 55 | 56 | ```bash 57 | mv src/main/{language}.skip src/main/{language} 58 | ``` 59 | 60 | For example, `mv src/main/java.skip src/main/java`. 61 | 62 | 5. Launch project provided `sbt`. 63 | 64 | ```bash 65 | cd jvm-perf; ./sbt 66 | ``` 67 | 6. Enter continuous compilation mode. 68 | 69 | ```bash 70 | sbt:jvm-perf> ~ test:compile 71 | ``` 72 | 73 | Hint: You might get the following error when starting sbt: 74 | 75 | > [error] typesafe-ivy-releases: unable to get resource for com.geirsson#sbt-scalafmt;1.6.0-RC4: res=https://repo.typesafe.com/typesafe/ivy-releases/com.geirsson/sbt-scalafmt/1.6.0-RC4/jars/sbt-scalafmt.jar: javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested targe 76 | 77 | It's because you have an outdated Java version, missing some newer certificates. Install a newer Java version, e.g. using [Jabba](https://github.com/shyiko/jabba), a Java version manager. See [Stackoverflow](https://stackoverflow.com/a/58669704/1885392) for more details about the error. 78 | 79 | ## Legal 80 | 81 | Copyright© 2023 John A. De Goes. All rights reserved. 82 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/11-gotchas.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * GOTCHAS 3 | * 4 | * The JVM is a highly dynamic environment. You may believe that a benchmark shows you one thing, 5 | * when in fact the opposite may be the case in your application code. 6 | * 7 | * It is for this reason that everyone should treat the result of benchmarks and profiling data, 8 | * which can feed into a hypothesis, which can then be tested and either rejected or tenatively 9 | * accepted. 10 | * 11 | * In this section, you will see for yourself reasons to be cautious. 12 | */ 13 | package net.degoes.gotchas 14 | 15 | import org.openjdk.jmh.annotations.* 16 | import org.openjdk.jmh.infra.Blackhole 17 | import java.util.concurrent.TimeUnit 18 | 19 | /** 20 | * EXERCISE 1 21 | * 22 | * Create an unboxed version of this benchmark, which follows the structure and flow of the boxed 23 | * version (for fairness). What do you expect to happen? What actually happens? 24 | * 25 | * Note that the results you see in this benchmark are NOT generally applicable to your application. 26 | * It would be a gross error to generalize them. 27 | * 28 | * EXERCISE 2 29 | * 30 | * Add the JVM options "-XX:-DoEscapeAnalysis", "-XX:-Inline" and re-run the benchmark. Now guess why 31 | * you see the behavior you are seeing, and come up with a modification to the benchmark that will 32 | * enable you to see the expected behavior (a modification that would accurately reflect some 33 | * application code you might write). 34 | */ 35 | @State(Scope.Thread) 36 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 37 | @BenchmarkMode(Mode.Throughput) 38 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 39 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 40 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 41 | @Threads(16) 42 | open class MisleadingBenchmark { 43 | @Param("100", "1000", "10000") 44 | var size: Int = 0 45 | 46 | fun getBoxedAge(i: Int): Age = Age(i) 47 | 48 | @Benchmark 49 | fun boxed(blackhole: Blackhole): Unit { 50 | var i = 0 51 | var sum = 0 52 | while (i < size) { 53 | val age = getBoxedAge(i) 54 | sum = sum + age.value 55 | i = i + 1 56 | } 57 | blackhole.consume(sum) 58 | } 59 | 60 | data class Age(val value: Int) 61 | } 62 | 63 | /** 64 | * EXERCISE 3 65 | * 66 | * This benchmark purports to show that precomputing fibonacci numbers is slower than just computing 67 | * them dynamically. However, the benchmark is flawed. Fix the benchmark so that it shows the 68 | * expected result. 69 | * 70 | * NOTE: In general, mistakes involving setup overhead will NOT be this easy to identify and fix. 71 | */ 72 | @State(Scope.Thread) 73 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 74 | @BenchmarkMode(Mode.Throughput) 75 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 76 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 77 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 78 | @Threads(16) 79 | open class SetupOverheadBenchmark { 80 | @Param("100", "1000") 81 | var maxFib: Int = 0 82 | 83 | @Param("10", "100") 84 | var fib: Int = 0 85 | 86 | fun fib(n: Int): Int { 87 | tailrec fun fibAcc(n: Int, a: Int, b: Int): Int = 88 | if (n == 0) a 89 | else fibAcc(n - 1, b, a + b) 90 | 91 | return fibAcc(n, 0, 1) 92 | } 93 | 94 | @Benchmark 95 | fun precomputedFib(blackhole: Blackhole): Unit { 96 | var precomputedFib: Array = Array(maxFib + 1) { fib(it) } 97 | 98 | blackhole.consume(precomputedFib[fib]) 99 | } 100 | 101 | @Benchmark 102 | fun dynamicFib(blackhole: Blackhole): Unit = 103 | blackhole.consume(fib(fib)) 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/collections/01-collections.java: -------------------------------------------------------------------------------- 1 | /** 2 | * COLLECTIONS 3 | * 4 | * Thanks to powerful abstractions on the JVM, including java.util Collections, or standard library 5 | * collections in Scala, Kotlin, and other JVM-based languages, it is easy to write code that 6 | * processes data in bulk. 7 | * 8 | * With this ease comes a danger: it is easy to write code that is not performant. This performance 9 | * cost comes about because of several factors: 10 | * 11 | * 1. Wrong collection type. Different collection types have different overhead on different kinds 12 | * of operations. For example, doubly-linked linked lists are good at prepending and appending 13 | * single elements, but are terrible at random access. 14 | * 15 | * 2. Boxing of primitives. On the JVM, primitives are not objects, and so they must be boxed into 16 | * objects in order to be stored in collections. This boxing and unboxing can be expensive. 17 | * 18 | * 3. Cache locality. Modern CPUs are very fast, but they are also very complex. One of the ways 19 | * that CPUs achieve their speed is by caching data in memory. Most collection types do not store 20 | * their elements contiguously in memory, even if they are primitives, and so cannot take advantage 21 | * of the CPU cache, resulting in slower performance. 22 | * 23 | * In this section, you will use the JMH benchmarking tool in order to explore collection 24 | * performance across a range of collections, and then you will discover not only how to use the 25 | * fastest collection type but how to increase its applicability to a wider range of use cases. 26 | * 27 | * EXERCISE 1 28 | * 29 | * This benchmark is currently configured with a Vavr List, which is a singly-linked list data type. Add two 30 | * other collection types to this benchmark such as Vector and Array; if completing these 31 | * exercises in another programming language, be sure to at least choose Array). 32 | * 33 | * EXERCISE 2 34 | * 35 | * Identify which collection is the fastest for prepending a single element, and explain why. 36 | * 37 | * See collections/ElementPrependBenchmark.java 38 | * 39 | * EXERCISE 3 40 | * 41 | * Create a benchmark for concatenation across lists, vectors or any other collection type, and arrays. 42 | * 43 | * See collections/ConcatBenchmark.java 44 | * 45 | * EXERCISE 4 46 | * 47 | * Create a benchmark for random access across lists, vectors or any other collection type, and arrays. 48 | * 49 | * See collections/RandomAccessBenchmark.java 50 | * 51 | * EXERCISE 5 52 | * 53 | * Create a benchmark for iteration, which sums all the elements in a collection, across lists, 54 | * vectors or any other collection type, and arrays. 55 | * 56 | * NOTE: Arrays of primitives are specialized on the JVM. Which means they do not incur overhead of 57 | * "boxing", a topic we will return to later. For now, just make sure to store java.lang.Integer 58 | * values in the Array in order to ensure the benchmark is fair. 59 | * 60 | * See collections/IterationBenchmark.java 61 | * 62 | * EXERCISE 6 63 | * 64 | * Create a benchmark for lookup of an element by a property of the element, across lists, arrays, 65 | * and maps. 66 | * 67 | * See collections/LookupBenchmark.java 68 | * 69 | * GRADUATION PROJECT 70 | * 71 | * Develop a new immutable collection type (`Chain`) that has O(1) for concatenation. Compare its 72 | * performance to at least two other collection types. Then augment this collection type with 73 | * iteration, so you can benchmark iteration against the other collection types. 74 | * 75 | * Think carefully about whether or not it is possible to have a single collection type that has 76 | * best-in-class performance across all operations. Why or why not? 77 | */ 78 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/04-boxing.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * BOXING 3 | * 4 | * The JVM draws a sharp distinction between primitive types (such as integers, floats, and bytes) 5 | * and reference types (such as String and user-defined classes). 6 | * 7 | * Primitive types may be stored on the stack, and when they are stored on the heap (for example, as 8 | * part of a user-defined class), they are stored in a very compact form. Finally, arrays are 9 | * specialized for primitive types, which enable very compact and performant access to their 10 | * elements. 11 | * 12 | * In this section, you will explore the nature and overhead of boxing. 13 | */ 14 | package net.degoes.boxing 15 | 16 | import org.openjdk.jmh.annotations._ 17 | import org.openjdk.jmh.infra.Blackhole 18 | import java.util.concurrent.TimeUnit 19 | 20 | /** 21 | * EXERCISE 1 22 | * 23 | * Design a benchmark to measure the overhead of boxing. In order to be fair to the boxing 24 | * benchmark, you should design it to have a similar structure and process. The difference is that 25 | * it will not box the individual integers in an array. 26 | * 27 | * Discuss the overhead of boxing and how it compared with your initial expectations. 28 | */ 29 | @State(Scope.Thread) 30 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 31 | @BenchmarkMode(Array(Mode.Throughput)) 32 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 33 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 34 | @Fork(1) 35 | @Threads(16) 36 | class BoxedBenchmark { 37 | @Param(Array("100", "1000", "10000")) 38 | var size: Int = _ 39 | 40 | var boxed: Array[Boxed[Int]] = _ 41 | 42 | @Setup 43 | def setup(): Unit = 44 | boxed = Array.fill(size)(Boxed[Int](0)) 45 | 46 | @Benchmark 47 | def boxed(blackhole: Blackhole): Unit = { 48 | var i = 0 49 | var sum = 0 50 | while (i < size) { 51 | val newValue = boxed(i).value + 1 52 | boxed(i) = Boxed(newValue) 53 | sum = sum + newValue 54 | i = i + 1 55 | } 56 | blackhole.consume(sum) 57 | } 58 | 59 | @Benchmark 60 | def unboxed(blackhole: Blackhole): Unit = () 61 | 62 | case class Boxed[T](value: T) 63 | } 64 | 65 | /** 66 | * EXERCISE 2 67 | * 68 | * Boxing is not just something that occurs with generic data structures, such as lists, sets, and 69 | * maps. It occurs also with interfaces that provide generic functionality. 70 | * 71 | * In this exercise, you will explore the cost of boxing with the Comparator interface. The 72 | * Comparator interface is a generic interface that allows you to compare two values of the same 73 | * type. Create a specialized version to see the overhead of boxing in this example. 74 | */ 75 | @State(Scope.Thread) 76 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 77 | @BenchmarkMode(Array(Mode.Throughput)) 78 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 79 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 80 | @Fork(1) 81 | @Threads(16) 82 | class BoxedComparatorBenchmark { 83 | @Param(Array("100", "1000", "10000")) 84 | var size: Int = _ 85 | 86 | var ints: Array[Int] = _ 87 | 88 | @Setup 89 | def setup(): Unit = 90 | ints = Array.fill(size)(0) 91 | 92 | @Benchmark 93 | def boxed(blackhole: Blackhole): Unit = { 94 | var i = 0 95 | var sum = 0 96 | while (i < size) { 97 | sum += IntGenericComparator.compare(ints(i), 0) 98 | i = i + 1 99 | } 100 | blackhole.consume(sum) 101 | } 102 | 103 | trait Comparator[T] { 104 | def compare(l: T, r: T): Int 105 | } 106 | val IntGenericComparator: Comparator[Int] = new Comparator[Int] { 107 | def compare(l: Int, r: Int): Int = l - r 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/11-gotchas.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * GOTCHAS 3 | * 4 | * The JVM is a highly dynamic environment. You may believe that a benchmark shows you one thing, 5 | * when in fact the opposite may be the case in your application code. 6 | * 7 | * It is for this reason that everyone should treat the result of benchmarks and profiling data, 8 | * which can feed into a hypothesis, which can then be tested and either rejected or tenatively 9 | * accepted. 10 | * 11 | * In this section, you will see for yourself reasons to be cautious. 12 | */ 13 | package net.degoes.gotchas 14 | 15 | import org.openjdk.jmh.annotations._ 16 | import org.openjdk.jmh.infra.Blackhole 17 | import java.util.concurrent.TimeUnit 18 | 19 | /** 20 | * EXERCISE 1 21 | * 22 | * Create an unboxed version of this benchmark, which follows the structure and flow of the boxed 23 | * version (for fairness). What do you expect to happen? What actually happens? 24 | * 25 | * Note that the results you see in this benchmark are NOT generally applicable to your application. 26 | * It would be a gross error to generalize them. 27 | * 28 | * EXERCISE 2 29 | * 30 | * Add the JVM options "-XX:-DoEscapeAnalysis", "-XX:-Inline" and re-run the benchmark. Now guess why 31 | * you see the behavior you are seeing, and come up with a modification to the benchmark that will 32 | * enable you to see the expected behavior (a modification that would accurately reflect some 33 | * application code you might write). 34 | */ 35 | @State(Scope.Thread) 36 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 37 | @BenchmarkMode(Array(Mode.Throughput)) 38 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 39 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 40 | @Fork(value = 1, jvmArgsAppend = Array()) 41 | @Threads(16) 42 | class MisleadingBenchmark { 43 | @Param(Array("100", "1000", "10000")) 44 | var size: Int = _ 45 | 46 | def getBoxedAge(i: Int): Age = Age(i) 47 | 48 | @Benchmark 49 | def boxed(blackhole: Blackhole): Unit = { 50 | var i = 0 51 | var sum = 0 52 | while (i < size) { 53 | val age = getBoxedAge(i) 54 | sum = sum + age.value 55 | i = i + 1 56 | } 57 | blackhole.consume(sum) 58 | } 59 | 60 | case class Age(value: Int) 61 | } 62 | 63 | /** 64 | * EXERCISE 3 65 | * 66 | * This benchmark purports to show that precomputing fibonacci numbers is slower than just computing 67 | * them dynamically. However, the benchmark is flawed. Fix the benchmark so that it shows the 68 | * expected result. 69 | * 70 | * NOTE: In general, mistakes involving setup overhead will NOT be this easy to identify and fix. 71 | */ 72 | @State(Scope.Thread) 73 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 74 | @BenchmarkMode(Array(Mode.Throughput)) 75 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 76 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 77 | @Fork(value = 1, jvmArgsAppend = Array()) 78 | @Threads(16) 79 | class SetupOverheadBenchmark { 80 | @Param(Array("100", "1000")) 81 | var maxFib: Int = _ 82 | 83 | @Param(Array("10", "100")) 84 | var fib: Int = _ 85 | 86 | def fib(n: Int): Int = { 87 | @annotation.tailrec 88 | def fibAcc(n: Int, a: Int, b: Int): Int = 89 | if (n == 0) a 90 | else fibAcc(n - 1, b, a + b) 91 | 92 | fibAcc(n, 0, 1) 93 | } 94 | 95 | @Benchmark 96 | def precomputedFib(blackhole: Blackhole): Unit = { 97 | var precomputedFib: Array[Int] = Array.from(0 to maxFib).map(fib(_)) 98 | 99 | blackhole.consume(precomputedFib(fib)) 100 | } 101 | 102 | @Benchmark 103 | def dynamicFib(blackhole: Blackhole): Unit = 104 | blackhole.consume(fib(fib)) 105 | } 106 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/04-boxing.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * BOXING 3 | * 4 | * The JVM draws a sharp distinction between primitive types (such as integers, floats, and bytes) 5 | * and reference types (such as String and user-defined classes). 6 | * 7 | * Primitive types may be stored on the stack, and when they are stored on the heap (for example, as 8 | * part of a user-defined class), they are stored in a very compact form. Finally, arrays are 9 | * specialized for primitive types, which enable very compact and performant access to their 10 | * elements. 11 | * 12 | * In this section, you will explore the nature and overhead of boxing. 13 | */ 14 | package net.degoes.boxing 15 | 16 | import org.openjdk.jmh.annotations.* 17 | import org.openjdk.jmh.infra.Blackhole 18 | import java.util.concurrent.TimeUnit 19 | 20 | /** 21 | * EXERCISE 1 22 | * 23 | * Design a benchmark to measure the overhead of boxing. In order to be fair to the boxing 24 | * benchmark, you should design it to have a similar structure and process. The difference is that 25 | * it will not box the individual integers in an array. 26 | * 27 | * Discuss the overhead of boxing and how it compared with your initial expectations. 28 | */ 29 | @State(Scope.Thread) 30 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 31 | @BenchmarkMode(Mode.Throughput) 32 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 33 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 34 | @Fork(1) 35 | @Threads(16) 36 | open class BoxedBenchmark { 37 | @Param("100", "1000", "10000") 38 | var size: Int = 0 39 | 40 | var boxed: Array> = emptyArray() 41 | 42 | @Setup 43 | fun setup(): Unit { 44 | boxed = Array(size) { Boxed(0) } 45 | } 46 | 47 | @Benchmark 48 | fun boxed(blackhole: Blackhole): Unit { 49 | var i = 0 50 | var sum = 0 51 | while (i < size) { 52 | val newValue = boxed[i].value + 1 53 | boxed[i] = Boxed(newValue) 54 | sum = sum + newValue 55 | i = i + 1 56 | } 57 | blackhole.consume(sum) 58 | } 59 | 60 | @Benchmark 61 | fun unboxed(blackhole: Blackhole): Unit { 62 | } 63 | 64 | data class Boxed(val value: T) 65 | } 66 | 67 | /** 68 | * EXERCISE 2 69 | * 70 | * Boxing is not just something that occurs with generic data structures, such as lists, sets, and 71 | * maps. It occurs also with interfaces that provide generic functionality. 72 | * 73 | * In this exercise, you will explore the cost of boxing with the Compare interface. The 74 | * Compare interface is a generic interface that allows you to compare two values of the same 75 | * type. Create a specialized version to see the overhead of boxing in this example. 76 | */ 77 | @State(Scope.Thread) 78 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 79 | @BenchmarkMode(Mode.Throughput) 80 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 81 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 82 | @Fork(1) 83 | @Threads(16) 84 | open class BoxedComparatorBenchmark { 85 | @Param("100", "1000", "10000") 86 | var size: Int = 0 87 | 88 | var ints: Array = emptyArray() 89 | 90 | @Setup 91 | fun setup(): Unit { 92 | ints = Array(size) { _ -> 0 } 93 | } 94 | 95 | @Benchmark 96 | fun boxed(blackhole: Blackhole): Unit { 97 | blackhole.consume(comparison(ints, 0, IntGenericComparator)) 98 | } 99 | 100 | fun comparison(array: Array, n: T, cmp: Compare) { 101 | var i = 0 102 | var sum = 0 103 | while (i < size) { 104 | sum += cmp.compare(array[i], n) 105 | i = i + 1 106 | } 107 | sum 108 | } 109 | 110 | interface Compare { 111 | abstract fun compare(l: T, r: T): Int 112 | } 113 | 114 | val IntGenericComparator: Compare = object : Compare { 115 | override fun compare(l: Int, r: Int): Int = l - r 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tuning/10-tuning.java: -------------------------------------------------------------------------------- 1 | /** 2 | * TUNING 3 | * 4 | * The JVM exposes several knobs that you can use to tweak and tune performance for your 5 | * applications. 6 | * 7 | * In this section, you will explore these knobs, with a special emphasis on garbage collection. 8 | * 9 | * Garbage collection is all about tradeoffs. Broadly speaking, the main tradeoffs are as follows: 10 | * 11 | * Throughput versus latency. Throughput is the amount of work that can be done in a given amount of 12 | * time. Latency is the amount of time it takes to complete a single unit of work. Garbage 13 | * collection can be tuned to maximize throughput, at the expense of latency, or to maximize 14 | * latency, at the expense of throughput. 15 | * 16 | * Memory usage versus throughput. Garbage collection can be tuned to use less memory, at the 17 | * expense of throughput. Alternately, throughput can be maximized, at the expense of memory usage. 18 | * Running JVM applications on memory-constrained environments will require tuning for memory usage. 19 | * 20 | * EXERCISE 1 21 | * 22 | * Execute the benchmarks using the default garbage collector. 23 | * 24 | * See tuning/TuningBenchmark1.java 25 | * 26 | * EXERCISE 2 27 | * 28 | * Execute the benchmarks using the parallel garbage collector by using the JVM flag 29 | * -XX:+UseParallelGC. 30 | * 31 | * Experiment with the following settings to see the effect on performance: 32 | * 33 | * -XX:ParallelGCThreads (default: # of CPU cores) 34 | * -XX:MaxGCPauseMillis (default: 100) 35 | * -XX:GCTimeRatio (default: 99) 36 | * -XX:YoungGenerationSizeIncrement (default: 20) 37 | * -XX:TenuredGenerationSizeIncrement (default: 20) 38 | * -XX:AdaptiveSizeDecrementScaleFactor (default: 4) 39 | * -XX:UseGCOverheadLimit (default: true) 40 | * 41 | * See tuning/TuningBenchmark2.java 42 | * 43 | * EXERCISE 3 44 | * 45 | * Execute the benchmarks using the concurrent mark sweep garbage collector by using the JVM flag 46 | * -XX:+UseConcMarkSweepGC. 47 | * 48 | * Experiment with the following settings to see the effect on performance: 49 | * 50 | * -XX:CMSInitiatingOccupancyFraction (default: 68) 51 | * -XX:UseCMSInitiatingOccupancyOnly (default: false) 52 | * -XX:CMSInitiatingOccupancyFraction (default: 68) 53 | * -XX:CMSScavengeBeforeRemark (default: false) 54 | * -XX:ScavengeBeforeFullGC (default: false) 55 | * -XX:CMSParallelRemarkEnabled (default: true) 56 | * -XX:UseGCOverheadLimit (default: true) 57 | * 58 | * See tuning/TuningBenchmark3.java 59 | * 60 | * EXERCISE 4 61 | * 62 | * Execute the benchmarks using the G1 garbage collector by using the JVM flag -XX:+UseG1GC. 63 | * 64 | * Experiment with the following settings to see the effect on performance: 65 | * 66 | * -XX:InitiatingHeapOccupancyPercent (default: 45) 67 | * -XX:G1UseAdaptiveIHOP (default: true) 68 | * -XX:G1HeapWastePercent (default: 5) 69 | * -XX:G1PeriodicGCSystemLoadThreshold (default: 120) 70 | * -XX:MinHeapFreeRatio (default: 40) 71 | * -XX:MaxHeapFreeRatio (default: 70) 72 | * -XX:G1NewSizePercent (default: 5) 73 | * -XX:G1MaxNewSizePercent (default: 60) 74 | * -XX:NewSize (default: 1/2 of the heap) 75 | * -XX:MaxNewSize (default: 1/2 of the heap) 76 | * -XX:+AlwaysPreTouch (default: false) 77 | * 78 | * See tuning/TuningBenchmark4.java 79 | * 80 | * EXERCISE 5 81 | * 82 | * Execute the benchmarks using the Z garbage collector by using the JVM flag -XX:+UseZGC, 83 | * and -XX:+UnlockExperimentalVMOptions depending on the JVM version you are using. 84 | * 85 | * Experiment with the following settings to see the effect on performance: 86 | * 87 | * -XX:ConcGCThreads (default: # of CPU cores) 88 | * 89 | */ 90 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/NoAllocationBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | import scala.util.Random; 8 | import io.vavr.collection.Map; 9 | import io.vavr.collection.HashMap; 10 | import java.util.function.BinaryOperator; 11 | import java.util.stream.Stream; 12 | 13 | @State(Scope.Thread) 14 | @OutputTimeUnit(TimeUnit.SECONDS) 15 | @BenchmarkMode({Mode.Throughput}) 16 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 18 | @Fork(value = 1, jvmArgsAppend = {}) 19 | @Threads(16) 20 | public class NoAllocationBenchmark { 21 | private Random rng = new Random(0L); 22 | 23 | String[] users = Stream.generate(() -> rng.nextString(10)).limit(1000).toArray(String[]::new); 24 | 25 | @Param({"1000", "10000"}) 26 | int size = 0; 27 | 28 | Event[] events = null; 29 | 30 | @Setup 31 | public void setup() { 32 | events = Stream.generate(() -> { 33 | int userIdx = rng.nextInt(users.length); 34 | String userId = users[userIdx]; 35 | Event event = null; 36 | 37 | switch(rng.between(0, 3)) { 38 | case 0: 39 | event = new AdView(userId); 40 | case 1: 41 | event = new AdClick(userId); 42 | case 2: 43 | event = new AdConversion(userId); 44 | } 45 | return event; 46 | }).limit(size).toArray(Event[]::new); 47 | } 48 | 49 | @Benchmark 50 | public void immutable(Blackhole blackhole) { 51 | int i = 0; 52 | MetricsMap current = new MetricsMap(HashMap.empty()); 53 | while (i < size) { 54 | Event event = events[i]; 55 | current = current.aggregate(MetricsMap.apply(event)); 56 | i = i + 1; 57 | } 58 | blackhole.consume(current); 59 | } 60 | 61 | @Benchmark 62 | public void mutable(Blackhole blackhole) { 63 | } 64 | 65 | static class Metrics { 66 | int adViews; 67 | int adClicks; 68 | int adConversions; 69 | 70 | Metrics(int adViews, int adClicks, int adConversions) { 71 | this.adViews = adViews; 72 | this.adClicks = adClicks; 73 | this.adConversions = adConversions; 74 | } 75 | 76 | Metrics aggregate(Metrics that) { 77 | return new Metrics( 78 | this.adViews + that.adViews, 79 | this.adClicks + that.adClicks, 80 | this.adConversions + that.adConversions 81 | ); 82 | } 83 | } 84 | 85 | static class MetricsMap { 86 | Map map; 87 | 88 | MetricsMap(Map map) { 89 | this.map = map; 90 | } 91 | 92 | MetricsMap add(Event event) { 93 | return this.aggregate(MetricsMap.apply(event)); 94 | } 95 | 96 | MetricsMap aggregate(MetricsMap that) { 97 | return new MetricsMap(combineWith(this.map, that.map, (left, right) -> left.aggregate(right))); 98 | } 99 | 100 | static MetricsMap apply(Event event) { 101 | Map map = null; 102 | if (event instanceof AdView) map = HashMap.of(event.userId, new Metrics(1, 0, 0)); 103 | else if (event instanceof AdClick) map = HashMap.of(event.userId, new Metrics(0, 1, 0)); 104 | else map = HashMap.of(event.userId, new Metrics(0, 0, 1)); 105 | 106 | return new MetricsMap(map); 107 | } 108 | } 109 | 110 | static Map combineWith(Map left, Map right, BinaryOperator f) { 111 | return left.foldLeft(right, (acc, kv) -> acc.put(kv._1, acc.get(kv._1).fold(() -> kv._2, x -> f.apply(x, kv._2)))); 112 | } 113 | 114 | abstract class Event { 115 | String userId; 116 | } 117 | 118 | class AdView extends Event { 119 | AdView(String userId) { 120 | this.userId = userId; 121 | } 122 | } 123 | class AdClick extends Event { 124 | AdClick(String userId) { 125 | this.userId = userId; 126 | } 127 | } 128 | class AdConversion extends Event { 129 | AdConversion(String userId) { 130 | this.userId = userId; 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/00-algorithms.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * ALGORITHMS 3 | * 4 | * It does not matter how aggressively you microptimize code, if the algorithms you are using are 5 | * pathological. 6 | * 7 | * In this introductory session, we will establish the importance of using correct algorithms, and 8 | * see how you can use benchmarking to identify pathological behavior. 9 | * 10 | * As we explore this important subject, you will gain familiarity with JMH, a benchmarking harness 11 | * that is commonly used on the JVM. 12 | * 13 | * The principles you will learn in this workshop apply uniformly to all JVM languages, and most of 14 | * them apply even more broadly, to other languages beyond the JVM. 15 | */ 16 | package net.degoes.algorithms 17 | 18 | import org.openjdk.jmh.annotations.* 19 | import org.openjdk.jmh.infra.Blackhole 20 | import java.util.concurrent.TimeUnit 21 | 22 | data class Person(val id: Int, val age: Int, val name: String, val follows: List) 23 | 24 | final data class SocialNetwork(val people: List) { 25 | fun getFriendsOf(id: Int): List { 26 | // Retrieve all the people that $id follows: 27 | val follows = people[id].follows 28 | 29 | // Return only the people that follow $id back ("friends"): 30 | return follows.filter { candidateId -> 31 | val candidate = people[candidateId] 32 | 33 | candidate.follows.contains(id) 34 | } 35 | } 36 | 37 | fun findMostPopularFriend(): Int? { 38 | val personAndFriendCount: List> = 39 | people.map { person -> 40 | // Map to tuple of person id and number of friends: 41 | Pair(person.id, getFriendsOf(person.id).size) 42 | } 43 | 44 | val mostPopular: Pair? = personAndFriendCount.maxBy { it.second } 45 | 46 | return mostPopular?.let { it.first } 47 | } 48 | 49 | companion object { 50 | // Deterministic RNG: 51 | private val rng = scala.util.Random(0L) 52 | 53 | fun random(people: Int, friendsPerPerson: Int): SocialNetwork = 54 | SocialNetwork( 55 | List(people) { id -> 56 | Person( 57 | id, 58 | rng.nextInt(100), 59 | "Person "+id, 60 | List(friendsPerPerson) { _ -> 61 | rng.nextInt(people) 62 | } 63 | ) 64 | } 65 | ) 66 | } 67 | } 68 | 69 | /** 70 | * EXERCISE 1 71 | * 72 | * Make a real benchmark for the `findMostPopularFriend` method. Initially, just create a social 73 | * network inside the benchmark, and then call `findMostPopularFriend` on it. 74 | * 75 | * EXERCISE 2 76 | * 77 | * In your previous benchmark, the overhead of creating the social network is included in the 78 | * benchmark of the method. This is not ideal, because it means that the benchmark is not measuring 79 | * the performance of the method in isolation. Take advantage of the @Setup annotation to create the 80 | * social network outside the benchmark. 81 | * 82 | * EXERCISE 3 83 | * 84 | * When benchmarking algorithms, a single data point is not useful: it gives you no idea of how the 85 | * performance of the algorithm changes with the size of the input. Use the @Param annotation on a 86 | * new field, `networkSize`, to see how the algorithm performs with differing network sizes. 87 | * 88 | * EXERCISE 4 89 | * 90 | * In our case, the social network has two parameters: the size of the network, and the number of 91 | * friends per person. Use the @Param annotation to create a second parameter, `friendsPerPerson`, 92 | * and see how the algorithm performs with differing numbers of friends per person. 93 | * 94 | * EXERCISE 5 95 | * 96 | * At this point, you should have an idea of how the algorithm performs, both with different network 97 | * sizes, and different numbers of friends per person. Now you will need to analyze the algorithm, 98 | * paying attention to nested loops, in order to figure out why the algorithm performs the way it 99 | * does. 100 | * 101 | * EXERCISE 6 102 | * 103 | * Now that you have some idea of why the algorithm performs the way it does, it is time to 104 | * investigate alternative methods of solving the problem that have improved algorithmic 105 | * performance. Test your potential improvements using the benchmark, and do not stop iterating 106 | * until you have found a solution that scales better with both network size and friend count. 107 | */ 108 | open class FindMostPopularFriendBenchmark { 109 | @Benchmark 110 | fun findMostPopularFriend(blackHole: Blackhole): Unit { 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tools/JavapBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tools; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.concurrent.atomic.AtomicReference; 7 | import java.util.ArrayList; 8 | import java.util.function.Function; 9 | import io.vavr.collection.List; 10 | import io.vavr.Tuple2; 11 | 12 | @org.openjdk.jmh.annotations.State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.SECONDS) 14 | @BenchmarkMode(Mode.Throughput) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis", "-XX:-Inline"}) 18 | @Threads(16) 19 | public class JavapBenchmark { 20 | 21 | @Param({"1000", "10000", "100000"}) 22 | int size = 0; 23 | 24 | State program = null; 25 | 26 | @Setup(Level.Trial) 27 | public void setup() { 28 | program = List.range(0, size).foldLeft(State.succeed(0), ((acc, x) -> { 29 | return acc.flatMap(a -> { 30 | State state = State.getState(); 31 | return state.flatMap(i -> { 32 | return State.setState(i + 1).map(j -> i + 1); 33 | }); 34 | }); 35 | })); 36 | } 37 | 38 | @Benchmark 39 | public void benchmark(Blackhole blackhole) { 40 | program.execute(0); 41 | } 42 | } 43 | 44 | class State { 45 | 46 | State flatMap(Function> f) { 47 | return new FlatMap(this, f); 48 | } 49 | 50 | State map(Function f) { 51 | return flatMap(v -> succeed(f.apply(v))); 52 | } 53 | 54 | static State succeed(A a) { 55 | return new Succeed(a); 56 | } 57 | 58 | static State getState() { 59 | return new GetState(); 60 | } 61 | 62 | static State setState(S s) { 63 | return new SetState(s); 64 | } 65 | 66 | Tuple2 execute(S state0) { 67 | return loop(state0); 68 | } 69 | 70 | State continueWith( 71 | Object value, 72 | AtomicReference>>> stack, 73 | AtomicReference result 74 | ) { 75 | if (stack.get().isEmpty()) { 76 | result.set((A) value); 77 | return null; 78 | } else { 79 | return stack.getAndUpdate(list -> list.tail()).head().apply(value); 80 | } 81 | } 82 | 83 | Function> eraseK(Function f) { 84 | return (x -> (State) f.apply(x)); 85 | } 86 | 87 | Tuple2 loop(S state0) { 88 | AtomicReference> next = new AtomicReference((State) this); 89 | AtomicReference state = new AtomicReference(state0); 90 | AtomicReference result = new AtomicReference(null); 91 | AtomicReference>>> stack = new AtomicReference(List.of()); 92 | 93 | while (next.get() != null) { 94 | Object current = next.get(); 95 | if (current instanceof GetState) { 96 | next.set(continueWith(state.get(), stack, result)); 97 | } else if (current instanceof SetState nextState) { 98 | state.set((S) nextState.s); 99 | next.set(continueWith(Unit.getInstance(), stack, result)); 100 | } else if (current instanceof FlatMap nextState) { 101 | stack.updateAndGet(list -> list.prepend((Function>) eraseK(nextState.f))); 102 | next.set((State) nextState.state); 103 | } else if (current instanceof Succeed nextState) { 104 | next.set(continueWith(nextState.a, stack, result)); 105 | } else throw new IllegalArgumentException(); 106 | } 107 | 108 | return new Tuple2(state.get(), result.get()); 109 | } 110 | } 111 | 112 | class GetState extends State { 113 | } 114 | 115 | class SetState extends State { 116 | S s; 117 | SetState(S s) { 118 | this.s = s; 119 | } 120 | } 121 | 122 | class Succeed extends State { 123 | A a; 124 | Succeed(A a) { 125 | this.a = a; 126 | } 127 | } 128 | class FlatMap extends State { 129 | State state; 130 | Function> f; 131 | FlatMap(State state, Function> f) { 132 | this.state = state; 133 | this.f = f; 134 | } 135 | } 136 | 137 | class Unit { 138 | private static Unit instance; 139 | static Unit getInstance() { 140 | if (instance == null) instance = new Unit(); 141 | return instance; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/00-algorithms.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * ALGORITHMS 3 | * 4 | * It does not matter how aggressively you microptimize code, if the algorithms you are using are 5 | * pathological. 6 | * 7 | * In this introductory session, we will establish the importance of using correct algorithms, and 8 | * see how you can use benchmarking to identify pathological behavior. 9 | * 10 | * As we explore this important subject, you will gain familiarity with JMH, a benchmarking harness 11 | * that is commonly used on the JVM. Note that while these code examples are written in Scala, you 12 | * are free to work exercises in the language of your choice, including Kotlin, Java, or Clojure. 13 | * The principles you will learn in this workshop apply uniformly to all JVM languages, and most of 14 | * them apply even more broadly, to other languages beyond the JVM. 15 | */ 16 | package net.degoes.algorithms 17 | 18 | import org.openjdk.jmh.annotations._ 19 | import org.openjdk.jmh.infra.Blackhole 20 | import java.util.concurrent.TimeUnit 21 | 22 | final case class Person(id: Int, age: Int, name: String, follows: List[Int]) 23 | 24 | final case class SocialNetwork(people: Seq[Person]) { 25 | def getFriendsOf(id: Int): Seq[Int] = { 26 | // Retrieve all the people that $id follows: 27 | val follows = people(id).follows 28 | 29 | // Return only the people that follow $id back ("friends"): 30 | follows.filter { candidateId => 31 | val candidate = people(candidateId) 32 | 33 | candidate.follows.contains(id) 34 | } 35 | } 36 | 37 | def findMostPopularFriend: Option[Int] = { 38 | val personAndFriendCount = 39 | people.map { person => 40 | // Map to tuple of person id and number of friends: 41 | (person.id, getFriendsOf(person.id).size) 42 | } 43 | 44 | val mostPopular = personAndFriendCount.maxByOption(_._2) 45 | 46 | mostPopular.map(_._1) 47 | } 48 | } 49 | object SocialNetwork { 50 | // Deterministic RNG: 51 | private val rng = new scala.util.Random(0L) 52 | 53 | def random(people: Int, friendsPerPerson: Int): SocialNetwork = 54 | SocialNetwork { 55 | (0 until people).map { id => 56 | Person( 57 | id, 58 | rng.nextInt(100), 59 | s"Person $id", 60 | (0 until friendsPerPerson).map { _ => 61 | rng.nextInt(people) 62 | }.toList 63 | ) 64 | } 65 | } 66 | } 67 | 68 | /** 69 | * EXERCISE 1 70 | * 71 | * Make a real benchmark for the `findMostPopularFriend` method. Initially, just create a social 72 | * network inside the benchmark, and then call `findMostPopularFriend` on it. 73 | * 74 | * EXERCISE 2 75 | * 76 | * In your previous benchmark, the overhead of creating the social network is included in the 77 | * benchmark of the method. This is not ideal, because it means that the benchmark is not measuring 78 | * the performance of the method in isolation. Take advantage of the @Setup annotation to create the 79 | * social network outside the benchmark. 80 | * 81 | * EXERCISE 3 82 | * 83 | * When benchmarking algorithms, a single data point is not useful: it gives you no idea of how the 84 | * performance of the algorithm changes with the size of the input. Use the @Param annotation on a 85 | * new field, `networkSize`, to see how the algorithm performs with differing network sizes. 86 | * 87 | * EXERCISE 4 88 | * 89 | * In our case, the social network has two parameters: the size of the network, and the number of 90 | * friends per person. Use the @Param annotation to create a second parameter, `friendsPerPerson`, 91 | * and see how the algorithm performs with differing numbers of friends per person. 92 | * 93 | * EXERCISE 5 94 | * 95 | * At this point, you should have an idea of how the algorithm performs, both with different network 96 | * sizes, and different numbers of friends per person. Now you will need to analyze the algorithm, 97 | * paying attention to nested loops, in order to figure out why the algorithm performs the way it 98 | * does. 99 | * 100 | * EXERCISE 6 101 | * 102 | * Now that you have some idea of why the algorithm performs the way it does, it is time to 103 | * investigate alternative methods of solving the problem that have improved algorithmic 104 | * performance. Test your potential improvements using the benchmark, and do not stop iterating 105 | * until you have found a solution that scales better with both network size and friend count. 106 | */ 107 | class FindMostPopularFriendBenchmark { 108 | @Benchmark 109 | def findMostPopularFriend(blackHole: Blackhole): Unit = () 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/algorithms/00-algorithms.java: -------------------------------------------------------------------------------- 1 | /** 2 | * ALGORITHMS 3 | * 4 | * It does not matter how aggressively you microptimize code, if the algorithms you are using are 5 | * pathological. 6 | * 7 | * In this introductory session, we will establish the importance of using correct algorithms, and 8 | * see how you can use benchmarking to identify pathological behavior. 9 | * 10 | * As we explore this important subject, you will gain familiarity with JMH, a benchmarking harness 11 | * that is commonly used on the JVM. 12 | * 13 | * The principles you will learn in this workshop apply uniformly to all JVM languages, and most of 14 | * them apply even more broadly, to other languages beyond the JVM. 15 | */ 16 | package net.degoes.algorithms; 17 | 18 | import java.util.List; 19 | import java.util.Optional; 20 | import java.util.Random; 21 | import java.util.Comparator; 22 | import java.util.stream.Collectors; 23 | import java.util.stream.IntStream; 24 | 25 | final class SocialNetwork { 26 | List people; 27 | 28 | public SocialNetwork(List people) { 29 | this.people = people; 30 | } 31 | 32 | List getFriendsOf(int id) { 33 | // Retrieve all the people that $id follows: 34 | List follows = people.get(id).follows; 35 | 36 | // Return only the people that follow $id back ("friends"): 37 | return follows.stream().filter(candidateId -> 38 | people.get(candidateId).follows.contains(id) 39 | ).collect(Collectors.toList()); 40 | } 41 | 42 | public Optional findMostPopularFriend() { 43 | List personAndFriendCount = 44 | people.stream().map(person -> 45 | // Map to array of person id and number of friends: 46 | new int[]{person.id, getFriendsOf(person.id).size()} 47 | ).collect(Collectors.toList()); 48 | 49 | Optional mostPopular = personAndFriendCount.stream().max(Comparator.comparingInt(array -> array[1])); 50 | 51 | return mostPopular.map(array -> array[0]); 52 | } 53 | 54 | // Deterministic RNG: 55 | private static Random rng = new Random(0L); 56 | 57 | public static SocialNetwork random(int people, int friendsPerPerson) { 58 | List members = IntStream.range(0, people).boxed().map(id -> { 59 | var friends = IntStream.range(0, friendsPerPerson).map(x -> rng.nextInt(people)).boxed().collect(Collectors.toList()); 60 | 61 | return new Person(id, rng.nextInt(100), "Person "+id, friends); 62 | }).collect(Collectors.toList()); 63 | 64 | return new SocialNetwork(members); 65 | } 66 | } 67 | 68 | final class Person { 69 | int id; 70 | int age; 71 | String name; 72 | List follows; 73 | 74 | public Person(int id, int age, String name, List follows) { 75 | this.id = id; 76 | this.age = age; 77 | this.name = name; 78 | this.follows = follows; 79 | } 80 | } 81 | 82 | /** 83 | * EXERCISE 1 84 | * 85 | * Make a real benchmark for the `findMostPopularFriend` method. Initially, just create a social 86 | * network inside the benchmark, and then call `findMostPopularFriend` on it. 87 | * 88 | * EXERCISE 2 89 | * 90 | * In your previous benchmark, the overhead of creating the social network is included in the 91 | * benchmark of the method. This is not ideal, because it means that the benchmark is not measuring 92 | * the performance of the method in isolation. Take advantage of the @Setup annotation to create the 93 | * social network outside the benchmark. 94 | * 95 | * EXERCISE 3 96 | * 97 | * When benchmarking algorithms, a single data point is not useful: it gives you no idea of how the 98 | * performance of the algorithm changes with the size of the input. Use the @Param annotation on a 99 | * new field, `networkSize`, to see how the algorithm performs with differing network sizes. 100 | * 101 | * EXERCISE 4 102 | * 103 | * In our case, the social network has two parameters: the size of the network, and the number of 104 | * friends per person. Use the @Param annotation to create a second parameter, `friendsPerPerson`, 105 | * and see how the algorithm performs with differing numbers of friends per person. 106 | * 107 | * EXERCISE 5 108 | * 109 | * At this point, you should have an idea of how the algorithm performs, both with different network 110 | * sizes, and different numbers of friends per person. Now you will need to analyze the algorithm, 111 | * paying attention to nested loops, in order to figure out why the algorithm performs the way it 112 | * does. 113 | * 114 | * EXERCISE 6 115 | * 116 | * Now that you have some idea of why the algorithm performs the way it does, it is time to 117 | * investigate alternative methods of solving the problem that have improved algorithmic 118 | * performance. Test your potential improvements using the benchmark, and do not stop iterating 119 | * until you have found a solution that scales better with both network size and friend count. 120 | */ -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/03-allocation.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * ALLOCATION 3 | * 4 | * In theory, the JVM allocates by merely incrementing a pointer to the next free memory location, 5 | * making allocation extremely cheap. While mostly correct, this model of allocation is misleadingly 6 | * incomplete. 7 | * 8 | * Whatever must be allocated, must also be unallocated. In the JVM, this is the job of the garbage 9 | * collector, which must run to reclaim memory that is no longer in use. The process of garbage 10 | * collection is not free, but rather imposes significant cost on low-latency and high-performance 11 | * applications. 12 | * 13 | * In this section, you will explore the cost of allocation. 14 | */ 15 | package net.degoes.allocation 16 | 17 | import org.openjdk.jmh.annotations.* 18 | import org.openjdk.jmh.infra.Blackhole 19 | import java.util.concurrent.TimeUnit 20 | 21 | import zio.Chunk 22 | 23 | /** 24 | * EXERCISE 1 25 | * 26 | * Design a 'noAlloc' benchmark that attempts to follow the exact same process as the 'alloc' 27 | * benchmark, but without the allocation. 28 | * 29 | * HINT: Think about pre-allocation. 30 | */ 31 | @State(Scope.Thread) 32 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 33 | @BenchmarkMode(Mode.Throughput) 34 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 35 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 36 | @Fork(1) 37 | @Threads(16) 38 | open class AllocBenchmark { 39 | @Param("100", "1000", "10000") 40 | var size: Int = 0 41 | 42 | @Setup 43 | fun setup(): Unit {} 44 | 45 | @Benchmark 46 | fun alloc(blackhole: Blackhole): Unit { 47 | var sum = 0 48 | var i = 0 49 | while (i < size) { 50 | sum = sum + (Any().hashCode()) 51 | i = i + 1 52 | } 53 | blackhole.consume(sum) 54 | } 55 | 56 | @Benchmark 57 | fun noAlloc(blackhole: Blackhole): Unit {} 58 | } 59 | 60 | /** 61 | * EXERCISE 2 62 | * 63 | * Design another 'noAlloc' benchmark that attempts to follow the exact same process as the 'alloc' 64 | * benchmark, but without the allocation. How many times faster is the no allocation benchmark? 65 | */ 66 | @State(Scope.Thread) 67 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 68 | @BenchmarkMode(Mode.Throughput) 69 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 70 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 71 | @Fork(1) 72 | @Threads(16) 73 | open class CopyAllocBenchmark { 74 | @Param("100", "1000", "10000") 75 | var size: Int = 0 76 | 77 | var people: Chunk? = null 78 | 79 | @Setup 80 | fun setup(): Unit { 81 | people = Chunk.fromArray(Array(size) { i -> Person(i) }) 82 | } 83 | 84 | @Benchmark 85 | fun alloc(): Unit { 86 | people!!.map { p -> p.copy(age = p.age + 1) } 87 | } 88 | 89 | data class Person(var age: Int) 90 | } 91 | 92 | /** 93 | * GRADUATION PROJECT 94 | * 95 | * In order to better understand the process of garbage collection, in this exercise, you will 96 | * implement a toy mark/sweep garbage collector. It is only a toy because (a) it only considers on 97 | * -heap objects, and (b) it does not try to encode any information about the object graph into the 98 | * linear raw memory, but rather, uses high-level data structures that are easy to work with. 99 | * 100 | * Implement the mark/sweep algorithm in the `markSweep` benchmark by iterating over all objects in 101 | * the heap twice. In the first iteration, mark all objects that are reachable from the root object. 102 | * In the second iteration, sweep all objects that are not marked. 103 | */ 104 | @State(Scope.Thread) 105 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 106 | @BenchmarkMode(Mode.Throughput) 107 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 108 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 109 | @Fork(1) 110 | @Threads(16) 111 | open class MarkSweepBenchmark { 112 | val rng = scala.util.Random(0L) 113 | 114 | val ObjSize = 10 115 | 116 | @Param("1000", "10000", "100000") 117 | var size: Int = 0 118 | 119 | var heap: Heap? = null 120 | var rootObjects: Array = emptyArray() 121 | 122 | @Setup 123 | fun setup(): Unit { 124 | val objects: Array = Array(size) { _ -> Obj(false, Array(ObjSize) { Integer(0) }) } 125 | 126 | heap = Heap(objects) 127 | 128 | var i = 0 129 | while (i < size) { 130 | val obj = heap!!.objects[i] 131 | var j = 0 132 | while (j < ObjSize) { 133 | if (rng.nextBoolean()) { 134 | val pointerObjIndex = rng.between(0, size) 135 | obj.values[j] = Pointer(heap!!.objects[pointerObjIndex]) 136 | } 137 | j = j + 1 138 | } 139 | 140 | i = i + 1 141 | } 142 | 143 | rootObjects = objects.take(10).toTypedArray() 144 | } 145 | 146 | @Benchmark 147 | fun markSweep(blackhole: Blackhole): Unit { 148 | } 149 | 150 | interface Data 151 | data class Integer(val value: Int) : Data 152 | data class Pointer(val value: Obj) : Data 153 | data class Obj(var marked: Boolean, val values: Array) 154 | data class Heap(val objects: Array) 155 | } 156 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/03-allocation.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * ALLOCATION 3 | * 4 | * In theory, the JVM allocates by merely incrementing a pointer to the next free memory location, 5 | * making allocation extremely cheap. While mostly correct, this model of allocation is misleadingly 6 | * incomplete. 7 | * 8 | * Whatever must be allocated, must also be unallocated. In the JVM, this is the job of the garbage 9 | * collector, which must run to reclaim memory that is no longer in use. The process of garbage 10 | * collection is not free, but rather imposes significant cost on low-latency and high-performance 11 | * applications. 12 | * 13 | * In this section, you will explore the cost of allocation. 14 | */ 15 | package net.degoes.allocation 16 | 17 | import org.openjdk.jmh.annotations._ 18 | import org.openjdk.jmh.infra.Blackhole 19 | import java.util.concurrent.TimeUnit 20 | 21 | import zio.Chunk 22 | 23 | /** 24 | * EXERCISE 1 25 | * 26 | * Design a 'noAlloc' benchmark that attempts to follow the exact same process as the 'alloc' 27 | * benchmark, but without the allocation. 28 | * 29 | * HINT: Think about pre-allocation. 30 | */ 31 | @State(Scope.Thread) 32 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 33 | @BenchmarkMode(Array(Mode.Throughput)) 34 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 35 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 36 | @Fork(1) 37 | @Threads(16) 38 | class AllocBenchmark { 39 | @Param(Array("100", "1000", "10000")) 40 | var size: Int = _ 41 | 42 | @Setup 43 | def setup(): Unit = {} 44 | 45 | @Benchmark 46 | def alloc(blackhole: Blackhole): Unit = { 47 | var sum = 0 48 | var i = 0 49 | while (i < size) { 50 | sum = sum + (new {}.hashCode()) 51 | i = i + 1 52 | } 53 | blackhole.consume(sum) 54 | } 55 | 56 | @Benchmark 57 | def noAlloc(blackhole: Blackhole): Unit = () 58 | } 59 | 60 | /** 61 | * EXERCISE 2 62 | * 63 | * Design another 'noAlloc' benchmark that attempts to follow the exact same process as the 'alloc' 64 | * benchmark, but without the allocation. How many times faster is the no allocation benchmark? 65 | */ 66 | @State(Scope.Thread) 67 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 68 | @BenchmarkMode(Array(Mode.Throughput)) 69 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 70 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 71 | @Fork(1) 72 | @Threads(16) 73 | class CopyAllocBenchmark { 74 | @Param(Array("100", "1000", "10000")) 75 | var size: Int = _ 76 | 77 | var people: Chunk[Person] = _ 78 | 79 | @Setup 80 | def setup(): Unit = 81 | people = Chunk.fromIterable(0 until size).map(Person(_)) 82 | 83 | @Benchmark 84 | def alloc(): Unit = 85 | people.map(p => p.copy(age = p.age + 1)) 86 | 87 | case class Person(var age: Int) 88 | } 89 | 90 | /** 91 | * GRADUATION PROJECT 92 | * 93 | * In order to better understand the process of garbage collection, in this exercise, you will 94 | * implement a toy mark/sweep garbage collector. It is only a toy because (a) it only considers on 95 | * -heap objects, and (b) it does not try to encode any information about the object graph into the 96 | * linear raw memory, but rather, uses high-level data structures that are easy to work with. 97 | * 98 | * Implement the mark/sweep algorithm in the `markSweep` benchmark by iterating over all objects in 99 | * the heap twice. In the first iteration, mark all objects that are reachable from the root object. 100 | * In the second iteration, sweep all objects that are not marked. 101 | */ 102 | @State(Scope.Thread) 103 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 104 | @BenchmarkMode(Array(Mode.Throughput)) 105 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 106 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 107 | @Fork(1) 108 | @Threads(16) 109 | class MarkSweepBenchmark { 110 | val rng = new scala.util.Random(0L) 111 | 112 | val ObjSize = 10 113 | 114 | @Param(Array("1000", "10000", "100000")) 115 | var size: Int = _ 116 | 117 | var heap: Heap = _ 118 | var rootObjects: Array[Obj] = _ 119 | 120 | @Setup 121 | def setup(): Unit = { 122 | val objects = Array.fill(size)(Obj(false, Array.fill(ObjSize)(Data.Integer(0)))) 123 | 124 | heap = Heap(objects) 125 | 126 | var i = 0 127 | while (i < size) { 128 | val obj = heap.objects(i) 129 | var j = 0 130 | while (j < ObjSize) { 131 | if (rng.nextBoolean()) { 132 | val pointerObjIndex = rng.between(0, size) 133 | obj.data(j) = Data.Pointer(heap.objects(pointerObjIndex)) 134 | } 135 | j = j + 1 136 | } 137 | 138 | i = i + 1 139 | } 140 | 141 | rootObjects = objects.take(10) 142 | } 143 | 144 | @Benchmark 145 | def markSweep(blackhole: Blackhole): Unit = () 146 | 147 | sealed trait Data 148 | object Data { 149 | case class Integer(value: Int) extends Data 150 | case class Pointer(value: Obj) extends Data 151 | } 152 | case class Obj(var marked: Boolean, data: Array[Data]) 153 | case class Heap(objects: Array[Obj]) 154 | } 155 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/StackInterpreterBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | import java.util.function.BiFunction; 8 | import java.util.function.Function; 9 | import io.vavr.control.Option; 10 | import io.vavr.Tuple2; 11 | 12 | @State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.SECONDS) 14 | @BenchmarkMode({Mode.Throughput}) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Fork(value = 1, jvmArgsAppend = {"-XX:-DoEscapeAnalysis"}) 18 | @Threads(16) 19 | public class StackInterpreterBenchmark { 20 | 21 | // Parses: /users/{username}/posts/{post-id} 22 | RouteParser> parser() { 23 | return Slash 24 | .zipRight(new Literal("users")) 25 | .zipRight(Slash) 26 | .zipRight(StringVar) 27 | .zipLeft(Slash) 28 | .zipLeft(new Literal("posts")) 29 | .zipLeft(Slash) 30 | .zip(IntVar); 31 | } 32 | 33 | @Benchmark 34 | public void classic(Blackhole blackhole) { 35 | blackhole.consume(parser().parse("/users/jdegoes/posts/123")); 36 | } 37 | 38 | @Benchmark 39 | public void interpreted(Blackhole blackhole) {} 40 | 41 | class Literal extends RouteParser { 42 | String value; 43 | 44 | Literal(String value) { 45 | this.value = value; 46 | } 47 | 48 | Option> parse(String path) { 49 | if (path.startsWith(value)) return Option.of(new Tuple2(null, path.substring(value.length()))); 50 | else return Option.none(); 51 | } 52 | } 53 | 54 | RouteParser Slash = new RouteParser() { 55 | Option> parse(String path) { 56 | if (path.startsWith("/")) return Option.of(new Tuple2(null, path.substring(1))); 57 | else return Option.none(); 58 | } 59 | }; 60 | 61 | RouteParser StringVar = new RouteParser() { 62 | Option> parse(String path) { 63 | int idx = path.indexOf('/'); 64 | if (idx == -1) return Option.of(new Tuple2(path, "")); 65 | else return Option.of(new Tuple2(path.substring(0, idx), path.substring(idx))); 66 | } 67 | }; 68 | 69 | RouteParser IntVar = new RouteParser() { 70 | Option> parse(String path) { 71 | int idx = path.indexOf('/'); 72 | Option option; 73 | if (idx == -1) { 74 | try { 75 | option = Option.of(Integer.parseInt(path)); 76 | } catch (NumberFormatException ex) { 77 | option = Option.none(); 78 | } 79 | return option.map(i -> new Tuple2(i, "")); 80 | } else { 81 | String seg = path.substring(0, idx); 82 | try { 83 | option = Option.of(Integer.parseInt(seg)); 84 | } catch (NumberFormatException ex) { 85 | option = Option.none(); 86 | } 87 | return option.map(i -> new Tuple2(i, path.substring(idx))); 88 | } 89 | } 90 | }; 91 | 92 | abstract class RouteParser { 93 | abstract Option> parse(String path); 94 | 95 | RouteParser combineWith(RouteParser that, BiFunction f) { 96 | return new RouteParser.Combine(this, that, f); 97 | } 98 | 99 | RouteParser map(Function f) { 100 | return new RouteParser.Map(this, f); 101 | } 102 | 103 | RouteParser> zip(RouteParser that) { 104 | return combineWith(that, (left, right) -> new Tuple2(left, right)); 105 | } 106 | 107 | RouteParser zipLeft(RouteParser that) { 108 | return combineWith(that, (a, b) -> a); 109 | } 110 | 111 | RouteParser zipRight(RouteParser that) { 112 | return combineWith(that, (a, b) -> b); 113 | } 114 | 115 | class Map extends RouteParser { 116 | RouteParser parser; 117 | Function f; 118 | 119 | Map(RouteParser parser, Function f) { 120 | this.parser = parser; 121 | this.f = f; 122 | } 123 | 124 | Option> parse(String path) { 125 | return parser.parse(path).map(tuple -> new Tuple2(f.apply(tuple._1), tuple._2)); 126 | } 127 | } 128 | 129 | class Combine extends RouteParser { 130 | RouteParser left; 131 | RouteParser right; 132 | BiFunction f; 133 | Combine(RouteParser left, RouteParser right, BiFunction f) { 134 | this.left = left; 135 | this.right = right; 136 | this.f = f; 137 | } 138 | 139 | Option> parse(String path) { 140 | Option> parsed1 = left.parse(path); 141 | var flatMapped = parsed1.flatMap(tuple -> { 142 | Option> parsed2 = right.parse(tuple._2); 143 | Option> mapped = parsed2.map(tuple2 -> new Tuple2(f.apply(tuple._1, tuple2._1), tuple2._2)); 144 | return mapped; 145 | }); 146 | return flatMapped; 147 | } 148 | } 149 | } 150 | } -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/project/dataset1/Dataset.java: -------------------------------------------------------------------------------- 1 | package net.degoes.project.dataset1; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.function.BinaryOperator; 7 | import java.util.concurrent.atomic.AtomicReference; 8 | import zio.Chunk; 9 | import scala.util.Random; 10 | import io.vavr.collection.Map; 11 | import io.vavr.collection.HashMap; 12 | 13 | public class Dataset { 14 | Chunk rows; 15 | 16 | public Dataset(Chunk rows) { 17 | this.rows = rows; 18 | } 19 | 20 | public Dataset apply(Field field) { 21 | return new Dataset( 22 | rows.map(row -> { 23 | if (row.map.containsKey(field.name)) 24 | return new Row(HashMap.of(field.name, row.apply(field))); 25 | else return new Row(HashMap.empty()); 26 | }) 27 | ); 28 | } 29 | 30 | public Dataset times(Dataset that) { 31 | return binary(that, "*", (left, right) -> { 32 | if (left instanceof Value.Integer && right instanceof Value.Integer) 33 | return new Value.Integer(((Value.Integer) left).value * ((Value.Integer) right).value); 34 | 35 | if (left instanceof Value.Integer && right instanceof Value.Decimal) 36 | return new Value.Decimal(((Value.Integer) left).value * ((Value.Decimal) right).value); 37 | 38 | if (left instanceof Value.Decimal && right instanceof Value.Integer) 39 | return new Value.Decimal(((Value.Decimal) left).value * ((Value.Integer) right).value); 40 | 41 | if (left instanceof Value.Decimal && right instanceof Value.Decimal) 42 | return new Value.Decimal(((Value.Decimal) left).value * ((Value.Decimal) right).value); 43 | 44 | throw new UnsupportedOperationException(); 45 | }); 46 | } 47 | 48 | public Dataset plus(Dataset that) { 49 | return binary(that, "+", (left, right) -> { 50 | if (left instanceof Value.Integer && right instanceof Value.Integer) 51 | return new Value.Integer(((Value.Integer) left).value + ((Value.Integer) right).value); 52 | 53 | if (left instanceof Value.Integer && right instanceof Value.Decimal) 54 | return new Value.Decimal(((Value.Integer) left).value + ((Value.Decimal) right).value); 55 | 56 | if (left instanceof Value.Decimal && right instanceof Value.Integer) 57 | return new Value.Decimal(((Value.Decimal) left).value + ((Value.Integer) right).value); 58 | 59 | if (left instanceof Value.Decimal && right instanceof Value.Decimal) 60 | return new Value.Decimal(((Value.Decimal) left).value + ((Value.Decimal) right).value); 61 | 62 | throw new UnsupportedOperationException(); 63 | }); 64 | } 65 | 66 | public Dataset minus(Dataset that) { 67 | return binary(that, "-", (left, right) -> { 68 | if (left instanceof Value.Integer && right instanceof Value.Integer) 69 | return new Value.Integer(((Value.Integer) left).value - ((Value.Integer) right).value); 70 | 71 | if (left instanceof Value.Integer && right instanceof Value.Decimal) 72 | return new Value.Decimal(((Value.Integer) left).value - ((Value.Decimal) right).value); 73 | 74 | if (left instanceof Value.Decimal && right instanceof Value.Integer) 75 | return new Value.Decimal(((Value.Decimal) left).value - ((Value.Integer) right).value); 76 | 77 | if (left instanceof Value.Decimal && right instanceof Value.Decimal) 78 | return new Value.Decimal(((Value.Decimal) left).value - ((Value.Decimal) right).value); 79 | 80 | throw new UnsupportedOperationException(); 81 | }); 82 | } 83 | 84 | public Dataset divide(Dataset that) { 85 | return binary(that, "/", (left, right) -> { 86 | if (left instanceof Value.Integer && right instanceof Value.Integer) 87 | return new Value.Integer(((Value.Integer) left).value / ((Value.Integer) right).value); 88 | 89 | if (left instanceof Value.Integer && right instanceof Value.Decimal) 90 | return new Value.Decimal(((Value.Integer) left).value / ((Value.Decimal) right).value); 91 | 92 | if (left instanceof Value.Decimal && right instanceof Value.Integer) 93 | return new Value.Decimal(((Value.Decimal) left).value / ((Value.Integer) right).value); 94 | 95 | if (left instanceof Value.Decimal && right instanceof Value.Decimal) 96 | return new Value.Decimal(((Value.Decimal) left).value / ((Value.Decimal) right).value); 97 | 98 | throw new UnsupportedOperationException(); 99 | }); 100 | } 101 | 102 | private Dataset binary(Dataset that, String symbol, BinaryOperator f) { 103 | 104 | Chunk> zipped = (Chunk>) rows.zip(that.rows); 105 | 106 | return new Dataset(zipped.map(tuple -> { 107 | Row leftRow = tuple._1; 108 | Row rightRow = tuple._2; 109 | 110 | AtomicReference> newMap = new AtomicReference(HashMap.empty()); 111 | 112 | leftRow.map.forEach((leftName, leftValue) -> { 113 | rightRow.map.forEach((rightName, rightValue) -> { 114 | String name = "(leftName "+symbol+" rightName)"; 115 | try { 116 | newMap.getAndUpdate(map -> map.put(name, f.apply(leftValue, rightValue))); 117 | } catch (Exception ex) { 118 | newMap.getAndUpdate(map -> map.put(name, Value.NA)); 119 | } 120 | }); 121 | }); 122 | 123 | return new Row(newMap.get()); 124 | })); 125 | } 126 | } -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/09-project.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * GRADUATION PROJECT 3 | * 4 | * In this section, you will tie together everything you have learned in order to significantly 5 | * optimize the performance of JVM-based code. 6 | */ 7 | package net.degoes.project 8 | 9 | import org.openjdk.jmh.annotations._ 10 | import org.openjdk.jmh.infra.Blackhole 11 | import java.util.concurrent.TimeUnit 12 | 13 | import zio.Chunk 14 | import scala.util.Random 15 | 16 | object dataset1 { 17 | sealed trait Value 18 | object Value { 19 | final case class Text(value: String) extends Value 20 | final case class Integer(value: Long) extends Value 21 | final case class Decimal(value: Double) extends Value 22 | case object NA extends Value 23 | } 24 | 25 | final case class Field(name: String) 26 | 27 | final case class Row(map: Map[String, Value]) { 28 | def apply(field: Field): Value = map(field.name) 29 | } 30 | 31 | final case class Dataset(rows: Chunk[Row]) { self => 32 | def apply(field: Field): Dataset = 33 | Dataset( 34 | rows.map(row => 35 | if (row.map.contains(field.name)) Row(Map(field.name -> row(field))) 36 | else Row(Map()) 37 | ) 38 | ) 39 | 40 | def *(that: Dataset): Dataset = 41 | self.binary(that, "*") { 42 | case (Value.Integer(left), Value.Integer(right)) => Value.Integer(left * right) 43 | case (Value.Integer(left), Value.Decimal(right)) => Value.Decimal(left * right) 44 | case (Value.Decimal(left), Value.Decimal(right)) => Value.Decimal(left * right) 45 | case (Value.Decimal(left), Value.Integer(right)) => Value.Decimal(left * right) 46 | } 47 | 48 | def +(that: Dataset): Dataset = 49 | self.binary(that, "+") { 50 | case (Value.Integer(left), Value.Integer(right)) => Value.Integer(left + right) 51 | case (Value.Integer(left), Value.Decimal(right)) => Value.Decimal(left + right) 52 | case (Value.Decimal(left), Value.Decimal(right)) => Value.Decimal(left + right) 53 | case (Value.Decimal(left), Value.Integer(right)) => Value.Decimal(left + right) 54 | } 55 | 56 | def -(that: Dataset): Dataset = 57 | self.binary(that, "-") { 58 | case (Value.Integer(left), Value.Integer(right)) => Value.Integer(left - right) 59 | case (Value.Integer(left), Value.Decimal(right)) => Value.Decimal(left - right) 60 | case (Value.Decimal(left), Value.Decimal(right)) => Value.Decimal(left - right) 61 | case (Value.Decimal(left), Value.Integer(right)) => Value.Decimal(left - right) 62 | } 63 | 64 | def /(that: Dataset): Dataset = 65 | self.binary(that, "/") { 66 | case (Value.Integer(left), Value.Integer(right)) => Value.Integer(left / right) 67 | case (Value.Integer(left), Value.Decimal(right)) => Value.Decimal(left / right) 68 | case (Value.Decimal(left), Value.Decimal(right)) => Value.Decimal(left / right) 69 | case (Value.Decimal(left), Value.Integer(right)) => Value.Decimal(left / right) 70 | } 71 | 72 | private def binary(that: Dataset, symbol: String)( 73 | f: PartialFunction[(Value, Value), Value] 74 | ): Dataset = 75 | Dataset(self.rows.zip(that.rows).map { tuple => 76 | val (leftRow, rightRow) = tuple 77 | 78 | val map = 79 | for { 80 | left <- leftRow.map 81 | (leftName, leftValue) = left 82 | right <- rightRow.map 83 | (rightName, rightValue) = right 84 | } yield s"(leftName ${symbol} rightName)" -> 85 | ((leftValue, rightValue) match { 86 | case (left, right) if f.isDefinedAt((left, right)) => f((left, right)) 87 | case (_, _) => Value.NA 88 | }) 89 | 90 | Row(map) 91 | }) 92 | } 93 | } 94 | 95 | /** 96 | * GRADUATION PROJECT 97 | * 98 | * Develop a version of `Dataset` that has a similar API, but which is at least 10x as fast. See how 99 | * far you can push it (can you get to 100x?). 100 | * 101 | * You may assume data is completely homogeneous and that no values are null. However, if ambitious, 102 | * you may solve the same problem under the assumption of undefined values and heterogeneous data. 103 | */ 104 | @State(Scope.Thread) 105 | @OutputTimeUnit(TimeUnit.SECONDS) 106 | @BenchmarkMode(Array(Mode.Throughput)) 107 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 108 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 109 | @Fork(1) 110 | @Threads(16) 111 | class ProjectBenchmark { 112 | @Param(Array("100", "1000", "10000")) 113 | var size: Int = _ 114 | 115 | object benchmark1 { 116 | import dataset1._ 117 | 118 | var dataset: Dataset = _ 119 | 120 | val start: Field = Field("start") 121 | val end: Field = Field("end") 122 | val netPay: Field = Field("netPay") 123 | } 124 | 125 | @Setup 126 | def setupSlow(): Unit = { 127 | import benchmark1._ 128 | import dataset1._ 129 | 130 | val rng: Random = new Random(0L) 131 | 132 | dataset = Dataset(Chunk.fill(size) { 133 | val start = rng.between(0, 360) 134 | val end = rng.between(start, 360) 135 | val netPay = rng.between(20000, 60000) 136 | 137 | Row( 138 | Map( 139 | "start" -> Value.Integer(start), 140 | "end" -> Value.Integer(end), 141 | "netPay" -> Value.Integer(netPay) 142 | ) 143 | ) 144 | }) 145 | } 146 | 147 | @Benchmark 148 | def baseline(blackhole: Blackhole): Unit = { 149 | import benchmark1._ 150 | import dataset1._ 151 | 152 | val result = (dataset(start) + dataset(end)) / dataset(netPay) 153 | 154 | blackhole.consume(result) 155 | } 156 | 157 | } 158 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/MapToArrayBenchmark.java: -------------------------------------------------------------------------------- 1 | package net.degoes.tricks; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | import org.openjdk.jmh.infra.Blackhole; 5 | import java.util.concurrent.TimeUnit; 6 | import scala.util.control.NoStackTrace; 7 | import scala.util.Random; 8 | import java.util.stream.Collectors; 9 | import zio.Chunk; 10 | import io.vavr.collection.Map; 11 | import io.vavr.collection.HashMap; 12 | 13 | @State(Scope.Thread) 14 | @OutputTimeUnit(TimeUnit.SECONDS) 15 | @BenchmarkMode({Mode.Throughput}) 16 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 17 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 18 | @Fork(value = 1, jvmArgsAppend = {}) 19 | @Threads(16) 20 | public class MapToArrayBenchmark { 21 | 22 | @Param({"10000", "100000"}) 23 | int size = 0; 24 | 25 | Random rng = new Random(0L); 26 | 27 | static Component Email = new Component("email"); 28 | static Component Name = new Component("name"); 29 | static Component Phone = new Component("phone"); 30 | static Component Age = new Component("age"); 31 | static Component Zip = new Component("zip"); 32 | static Component City = new Component("city"); 33 | static Component State = new Component("state"); 34 | static Component Country = new Component("country"); 35 | 36 | Operation Identity = new Operation() {}; 37 | 38 | Chunk allData = null; 39 | Transformation transformation = new Transformation( 40 | HashMap.of( 41 | Email, Identity, 42 | Name, Identity, 43 | Phone, Identity, 44 | Age, Identity, 45 | Zip, Identity, 46 | City, Identity, 47 | State, Identity, 48 | Country, Identity 49 | ) 50 | ); 51 | 52 | @Setup 53 | public void setup() { 54 | allData = Chunk.fill(size, () -> new Data( 55 | rng.nextString(10), 56 | rng.nextString(10), 57 | rng.nextString(10), 58 | rng.nextString(10), 59 | rng.nextString(10), 60 | rng.nextString(10), 61 | rng.nextString(10), 62 | rng.nextString(10) 63 | )); 64 | } 65 | 66 | @Benchmark 67 | public void map(Blackhole blackhole) { 68 | var i = 0; 69 | while (i < size) { 70 | Data data = allData.apply(i); 71 | transformData(data, transformation); 72 | i = i + 1; 73 | } 74 | } 75 | 76 | void transformData(Data data, Transformation transformation) { 77 | transformation.map.forEach((component, operation) -> { 78 | if (component.equals(Email)) data.email = operation.apply(data.email); 79 | else if (component.equals(Name)) data.name = operation.apply(data.name); 80 | else if (component.equals(Phone)) data.phone = operation.apply(data.phone); 81 | else if (component.equals(Age)) data.age = operation.apply(data.age); 82 | else if (component.equals(Zip)) data.zip = operation.apply(data.zip); 83 | else if (component.equals(City)) data.city = operation.apply(data.city); 84 | else if (component.equals(State)) data.state = operation.apply(data.state); 85 | else if (component.equals(Country)) data.country = operation.apply(data.country); 86 | }); 87 | } 88 | 89 | class Data { 90 | String email; 91 | String name; 92 | String phone; 93 | String age; 94 | String zip; 95 | String city; 96 | String state; 97 | String country; 98 | 99 | Data( 100 | String email, 101 | String name, 102 | String phone, 103 | String age, 104 | String zip, 105 | String city, 106 | String state, 107 | String country 108 | ) { 109 | this.email = email; 110 | this.name = name; 111 | this.phone = phone; 112 | this.age = age; 113 | this.zip = zip; 114 | this.city = city; 115 | this.state = state; 116 | this.country = country; 117 | } 118 | } 119 | class Transformation { 120 | Map map; 121 | Transformation(Map map) { 122 | this.map = map; 123 | } 124 | } 125 | 126 | static class Component { 127 | String name; 128 | Component(String name) { 129 | this.name = name; 130 | } 131 | 132 | private static Component[] componentArray = {Email, Name, Phone, Age, Zip, City, State, Country}; 133 | 134 | static Chunk All = Chunk.fromArray(componentArray); 135 | } 136 | 137 | abstract class Operation { 138 | String apply(String value) { 139 | if (this.equals(Identity)) { 140 | return value; 141 | } else if (this instanceof Anonymize) { 142 | Anonymize anonymize = (Anonymize) this; 143 | if (anonymize.full) { 144 | return "*****"; 145 | } else { 146 | return value.substring(0, 3) + "*****"; 147 | } 148 | } else if (this instanceof Encrypt) { 149 | Encrypt encrypt = (Encrypt) this; 150 | 151 | return value.chars().map(c -> (char) (c ^ encrypt.key.hashCode())).mapToObj(String::valueOf).collect(Collectors.joining()); 152 | } else if (this.equals(Uppercase)) { 153 | return value.toUpperCase(); 154 | } else if (this instanceof Composite) { 155 | Composite composite = (Composite) this; 156 | return composite.right.apply(composite.left.apply(value)); 157 | } else return value; 158 | } 159 | } 160 | 161 | class Anonymize extends Operation { 162 | boolean full; 163 | Anonymize(boolean full) { 164 | this.full = full; 165 | } 166 | } 167 | 168 | class Encrypt extends Operation { 169 | String key; 170 | Encrypt(String key) { 171 | this.key = key; 172 | } 173 | } 174 | 175 | Operation Uppercase = new Operation() {}; 176 | 177 | class Composite extends Operation { 178 | Operation left; 179 | Operation right; 180 | 181 | Composite(Operation left, Operation right) { 182 | this.left = left; 183 | this.right = right; 184 | } 185 | } 186 | } 187 | 188 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/02-virtual.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * VIRTUAL DISPATCH 3 | * 4 | * Surprisingly, not all methods are equal: calling some methods can be quite fast, and calling 5 | * other methods can be dangerously slow, even if their implementations are *exactly* the same. 6 | * 7 | * This surprising fact is due to the way that object-oriented languages implement polymorphism. 8 | * Polymorphism allows us to write code that is generic over a type. For example, we might have some 9 | * business logic that can work with any key/value store, whether backed by a database, an in-memory 10 | * hash map, or a cloud API. 11 | * 12 | * In object-oriented programming languages, we achieve this type of polymorphism with inheritance, 13 | * and then implementing or overriding methods in a subtype. 14 | * 15 | * In this section, you will learn more about how this works, its impact on performance, and 16 | * potential workarounds for performance sensitive code. 17 | */ 18 | package net.degoes.virtual 19 | 20 | import org.openjdk.jmh.annotations.* 21 | import org.openjdk.jmh.infra.Blackhole 22 | import java.util.concurrent.TimeUnit 23 | 24 | import zio.Chunk 25 | 26 | /** 27 | * EXERCISE 1 28 | * 29 | * Every method invocation potentially goes through virtual dispatch, which is a process involving 30 | * looking up which concrete non-final method invocation is potentially a virtual dispatch. 31 | * 32 | * In this exercise, you will explore the cost of virtual dispatch. The current benchmark creates a 33 | * chunk of operators, each one of which is a random operator chosen from among the provided set. At 34 | * runtime, the JVM does not know which element of the chunk has which concrete type, so it must 35 | * lookup the correct method to invoke on an object-by-object basis. This results in lower 36 | * performance. 37 | * 38 | * Augment this benchmark with another benchmark, which uses another chunk, where every element of 39 | * the chunk uses the same concrete operator (e.g. Operator.DividedBy.type). In your new benchmark, 40 | * because the JVM knows the concrete type of the object, when it invokes the invoke method, it knows 41 | * exactly where the code for that function is, and does not need to perform a preliminary lookup. 42 | * This should result in faster performance. 43 | */ 44 | @State(Scope.Thread) 45 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 46 | @BenchmarkMode(Mode.Throughput) 47 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 48 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 49 | @Fork(1) 50 | @Threads(16) 51 | open class PolyBenchmark { 52 | @Param("1000", "10000", "100000") 53 | var size: Int = 0 54 | 55 | var poly_operators: Chunk? = null 56 | // var mono_operators: Chunk? = null 57 | 58 | @Setup 59 | fun setupPoly(): Unit { 60 | poly_operators = Operator.randomN(size) 61 | } 62 | 63 | @Benchmark 64 | fun poly(blackhole: Blackhole): Unit { 65 | var i = 0 66 | var result = 0 67 | while (i < size) { 68 | val op = poly_operators!!.apply(i) 69 | 70 | result = op(result, i + 1) 71 | 72 | i = i + 1 73 | } 74 | blackhole.consume(result) 75 | } 76 | 77 | interface Operator { 78 | operator fun invoke(l: Int, r: Int): Int 79 | 80 | companion object { 81 | // Deterministic RNG: 82 | private val rng = scala.util.Random(0L) 83 | 84 | val All: Array = arrayOf(Plus, Times, DividedBy, Max, Min) 85 | 86 | object Plus : Operator { 87 | override operator fun invoke(l: Int, r: Int): Int = l + r 88 | } 89 | object Times : Operator { 90 | override operator fun invoke(l: Int, r: Int): Int = l + r 91 | } 92 | object DividedBy : Operator { 93 | override operator fun invoke(l: Int, r: Int): Int = l + r 94 | } 95 | object Max : Operator { 96 | override operator fun invoke(l: Int, r: Int): Int = l + r 97 | } 98 | object Min : Operator { 99 | override operator fun invoke(l: Int, r: Int): Int = l + r 100 | } 101 | 102 | fun random(): Operator = All[rng.nextInt(All.size)] 103 | 104 | fun randomN(n: Int): Chunk = Chunk.fromArray(Array(n) { random() }) 105 | 106 | } 107 | } 108 | } 109 | 110 | /** 111 | * EXERCISE 2 112 | * 113 | * In this exercise, you will simulate the cost of a virtual dispatch by creating a benchark that 114 | * must lookup the correct method based on the virtual method table stored together with the data 115 | * for an object. 116 | * 117 | * Create an invokeVirtual benchmark that uses `obj.meta` to find the address of the method to be 118 | * invoked. Compare the performance of this benchmark to the invokeStatic benchmark. 119 | * 120 | * Note that this benchmark is not that realistic. There is no hash map lookup with invoke dynamic. 121 | * Nonetheless, getting a feel for the extra work the JVM must do to perform a virtual dispatch is 122 | * useful. 123 | */ 124 | @State(Scope.Thread) 125 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 126 | @BenchmarkMode(Mode.Throughput) 127 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 128 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 129 | @Fork(1) 130 | @Threads(16) 131 | open class PolySimBenchmark { 132 | val obj: JVMObject = 133 | JVMObject(1, JVMClassMetadata("Dog", mapOf(JVMMethod("Dog", "bark") to Address(0)))) 134 | 135 | val invs: InvokeStatic = InvokeStatic(Address(0)) 136 | val invv: InvokeVirtual = InvokeVirtual(JVMMethod("Dog", "bark")) 137 | 138 | @Benchmark 139 | fun invokeStatic(blackhole: Blackhole): Unit = 140 | blackhole.consume(invs.address.value) 141 | 142 | data class JVMObject(val dat: Any, val meta: JVMClassMetadata) 143 | data class JVMClassMetadata(val clazz: String, val vtable: Map) 144 | data class JVMMethod(val clazz: String, val name: String) 145 | data class Address(val value: Int) 146 | 147 | interface Bytecode 148 | 149 | object Mul : Bytecode 150 | data class InvokeStatic(val address: Address) : Bytecode 151 | data class InvokeVirtual(val method: JVMMethod) : Bytecode 152 | } 153 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/02-virtual.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * VIRTUAL DISPATCH 3 | * 4 | * Surprisingly, not all methods are equal: calling some methods can be quite fast, and calling 5 | * other methods can be dangerously slow, even if their implementations are *exactly* the same. 6 | * 7 | * This surprising fact is due to the way that object-oriented languages implement polymorphism. 8 | * Polymorphism allows us to write code that is generic over a type. For example, we might have some 9 | * business logic that can work with any key/value store, whether backed by a database, an in-memory 10 | * hash map, or a cloud API. 11 | * 12 | * In object-oriented programming languages, we achieve this type of polymorphism with inheritance, 13 | * and then implementing or overriding methods in a subtype. 14 | * 15 | * In this section, you will learn more about how this works, its impact on performance, and 16 | * potential workarounds for performance sensitive code. 17 | */ 18 | package net.degoes.virtual 19 | 20 | import org.openjdk.jmh.annotations._ 21 | import org.openjdk.jmh.infra.Blackhole 22 | import java.util.concurrent.TimeUnit 23 | 24 | import zio.Chunk 25 | 26 | /** 27 | * EXERCISE 1 28 | * 29 | * Every method invocation potentially goes through virtual dispatch, which is a process involving 30 | * looking up which concrete non-final method invocation is potentially a virtual dispatch. 31 | * 32 | * In this exercise, you will explore the cost of virtual dispatch. The current benchmark creates a 33 | * chunk of operators, each one of which is a random operator chosen from among the provided set. At 34 | * runtime, the JVM does not know which element of the chunk has which concrete type, so it must 35 | * lookup the correct method to invoke on an object-by-object basis. This results in lower 36 | * performance. 37 | * 38 | * Augment this benchmark with another benchmark, which uses another chunk, where every element of 39 | * the chunk uses the same concrete operator (e.g. Operator.DividedBy.type). In your new benchmark, 40 | * because the JVM knows the concrete type of the object, when it invokes the apply method, it knows 41 | * exactly where the code for that function is, and does not need to perform a preliminary lookup. 42 | * This should result in faster performance. 43 | */ 44 | @State(Scope.Thread) 45 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 46 | @BenchmarkMode(Array(Mode.Throughput)) 47 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 48 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 49 | @Fork(1) 50 | @Threads(16) 51 | class PolyBenchmark { 52 | @Param(Array("1000", "10000", "100000")) 53 | var size: Int = _ 54 | 55 | var poly_operators: Chunk[Operator] = _ 56 | // var mono_operators: Chunk[Operator.DividedBy.type] = _ 57 | 58 | @Setup 59 | def setupPoly(): Unit = 60 | poly_operators = Operator.randomN(size) 61 | 62 | @Benchmark 63 | def poly(blackhole: Blackhole): Unit = { 64 | var i = 0 65 | var result = 0 66 | while (i < size) { 67 | val operator = poly_operators(i) 68 | 69 | result = operator(result, i + 1) 70 | 71 | i = i + 1 72 | } 73 | blackhole.consume(result) 74 | } 75 | 76 | trait Operator { 77 | def apply(l: Int, r: Int): Int 78 | } 79 | object Operator { 80 | // Deterministic RNG: 81 | private val rng = new scala.util.Random(0L) 82 | 83 | val All: IndexedSeq[Operator] = 84 | Array(Plus, Times, DividedBy, Max, Min) 85 | 86 | case object Plus extends Operator { 87 | def apply(l: Int, r: Int): Int = l + r 88 | } 89 | case object Times extends Operator { 90 | def apply(l: Int, r: Int): Int = l + r 91 | } 92 | case object DividedBy extends Operator { 93 | def apply(l: Int, r: Int): Int = l + r 94 | } 95 | case object Max extends Operator { 96 | def apply(l: Int, r: Int): Int = l + r 97 | } 98 | case object Min extends Operator { 99 | def apply(l: Int, r: Int): Int = l + r 100 | } 101 | 102 | def random(): Operator = All(rng.nextInt(All.length)) 103 | 104 | def randomN(n: Int): Chunk[Operator] = Chunk.fromIterable(Iterable.fill(n)(random())) 105 | 106 | } 107 | } 108 | 109 | /** 110 | * EXERCISE 2 111 | * 112 | * In this exercise, you will simulate the cost of a virtual dispatch by creating a benchark that 113 | * must lookup the correct method based on the virtual method table stored together with the data 114 | * for an object. 115 | * 116 | * Create an invokeVirtual benchmark that uses `obj.meta` to find the address of the method to be 117 | * invoked. Compare the performance of this benchmark to the invokeStatic benchmark. 118 | * 119 | * Note that this benchmark is not that realistic. There is no hash map lookup with invoke dynamic. 120 | * Nonetheless, getting a feel for the extra work the JVM must do to perform a virtual dispatch is 121 | * useful. 122 | */ 123 | @State(Scope.Thread) 124 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 125 | @BenchmarkMode(Array(Mode.Throughput)) 126 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 127 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 128 | @Fork(1) 129 | @Threads(16) 130 | class PolySimBenchmark { 131 | val obj: JVMObject = 132 | JVMObject(1, JVMClassMetadata("Dog", Map(JVMMethod("Dog", "bark") -> Address(0)))) 133 | val is: Bytecode.InvokeStatic = Bytecode.InvokeStatic(Address(0)) 134 | val iv: Bytecode.InvokeVirtual = Bytecode.InvokeVirtual(JVMMethod("Dog", "bark")) 135 | 136 | @Benchmark 137 | def invokeStatic(blackhole: Blackhole): Unit = 138 | blackhole.consume(is.address.value) 139 | 140 | case class JVMObject(data: Any, meta: JVMClassMetadata) 141 | case class JVMClassMetadata(clazz: String, vtable: Map[JVMMethod, Address]) 142 | case class JVMMethod(clazz: String, name: String) 143 | case class Address(value: Int) 144 | sealed trait Bytecode 145 | object Bytecode { 146 | case object Mul extends Bytecode 147 | case class InvokeStatic(address: Address) extends Bytecode 148 | case class InvokeVirtual(method: JVMMethod) extends Bytecode 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/09-project.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * GRADUATION PROJECT 3 | * 4 | * In this section, you will tie together everything you have learned in order to significantly 5 | * optimize the performance of JVM-based code. 6 | */ 7 | package net.degoes.project 8 | 9 | import org.openjdk.jmh.annotations.* 10 | import org.openjdk.jmh.infra.Blackhole 11 | import java.util.concurrent.TimeUnit 12 | import java.util.concurrent.atomic.AtomicReference 13 | 14 | import zio.Chunk 15 | import scala.util.Random 16 | 17 | object dataset1 { 18 | interface Value 19 | final data class Text(val value: String) : Value 20 | final data class Integer(val value: Long) : Value 21 | final data class Decimal(val value: Double) : Value 22 | object NA : Value 23 | 24 | final data class Field(val name: String) 25 | 26 | final data class Row(val map: Map) { 27 | operator fun invoke(field: Field): Value = map[field.name]!! 28 | } 29 | 30 | final data class Dataset(val rows: Chunk) { 31 | operator fun invoke(field: Field): Dataset = 32 | Dataset( 33 | rows.map { row -> 34 | if (row.map.contains(field.name)) Row(mapOf(field.name to row(field))) 35 | else Row(emptyMap()) 36 | } 37 | ) 38 | 39 | operator fun times(that: Dataset): Dataset = 40 | binary(that, "*", { left, right -> 41 | when (left) { 42 | is Integer -> when (right) { 43 | is Integer -> Integer(left.value * right.value) 44 | is Decimal -> Decimal(left.value * right.value) 45 | else -> NA 46 | } 47 | is Decimal -> when (right) { 48 | is Integer -> Decimal(left.value * right.value) 49 | is Decimal -> Decimal(left.value * right.value) 50 | else -> NA 51 | } 52 | else -> NA 53 | } 54 | }) 55 | 56 | operator fun plus(that: Dataset): Dataset = 57 | binary(that, "+", { left, right -> 58 | when (left) { 59 | is Integer -> when (right) { 60 | is Integer -> Integer(left.value + right.value) 61 | is Decimal -> Decimal(left.value + right.value) 62 | else -> NA 63 | } 64 | is Decimal -> when (right) { 65 | is Integer -> Decimal(left.value + right.value) 66 | is Decimal -> Decimal(left.value + right.value) 67 | else -> NA 68 | } 69 | else -> NA 70 | } 71 | }) 72 | 73 | operator fun minus(that: Dataset): Dataset = 74 | binary(that, "-", { left, right -> 75 | when (left) { 76 | is Integer -> when (right) { 77 | is Integer -> Integer(left.value - right.value) 78 | is Decimal -> Decimal(left.value - right.value) 79 | else -> NA 80 | } 81 | is Decimal -> when (right) { 82 | is Integer -> Decimal(left.value - right.value) 83 | is Decimal -> Decimal(left.value - right.value) 84 | else -> NA 85 | } 86 | else -> NA 87 | } 88 | }) 89 | 90 | operator fun div(that: Dataset): Dataset = 91 | binary(that, "/", { left, right -> 92 | when (left) { 93 | is Integer -> when (right) { 94 | is Integer -> Integer(left.value / right.value) 95 | is Decimal -> Decimal(left.value / right.value) 96 | else -> NA 97 | } 98 | is Decimal -> when (right) { 99 | is Integer -> Decimal(left.value / right.value) 100 | is Decimal -> Decimal(left.value / right.value) 101 | else -> NA 102 | } 103 | else -> NA 104 | } 105 | }) 106 | 107 | fun binary(that: Dataset, symbol: String, f: (Value, Value) -> Value): Dataset { 108 | val chunk = (rows.zip(that.rows) as Chunk>).map { tuple -> Pair(tuple._1, tuple._2) }.map { (leftRow: Row, rightRow: Row) -> 109 | val newMap: HashMap = HashMap() 110 | leftRow.map.forEach { leftName: String, leftValue: Value -> 111 | rightRow.map.forEach { rightName: String, rightValue: Value -> 112 | val name = "(${leftName} ${symbol} ${rightName})" 113 | newMap.put(name, f(leftValue, rightValue)) 114 | } 115 | } 116 | Row(newMap) 117 | } 118 | 119 | return Dataset(chunk) 120 | } 121 | } 122 | } 123 | 124 | /** 125 | * GRADUATION PROJECT 126 | * 127 | * Develop a version of `Dataset` that has a similar API, but which is at least 10x as fast. See how 128 | * far you can push it (can you get to 100x?). 129 | * 130 | * You may assume data is completely homogeneous and that no values are null. However, if ambitious, 131 | * you may solve the same problem under the assumption of undefined values and heterogeneous data. 132 | */ 133 | @State(Scope.Thread) 134 | @OutputTimeUnit(TimeUnit.SECONDS) 135 | @BenchmarkMode(Mode.Throughput) 136 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 137 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 138 | @Fork(1) 139 | @Threads(16) 140 | open class ProjectBenchmark { 141 | @Param("100", "1000", "10000") 142 | var size: Int = 0 143 | 144 | object benchmark1 { 145 | var dataset: dataset1.Dataset? = null 146 | 147 | val start: dataset1.Field = dataset1.Field("start") 148 | val end: dataset1.Field = dataset1.Field("end") 149 | val netPay: dataset1.Field = dataset1.Field("netPay") 150 | } 151 | 152 | @Setup 153 | fun setupSlow(): Unit { 154 | val rng: Random = Random(0L) 155 | 156 | benchmark1.dataset = dataset1.Dataset(Chunk.fill(size) { 157 | val start: Long = rng.between(0, 360).toLong() 158 | val end: Long = rng.between(start, 360).toLong() 159 | val netPay: Long = rng.between(20000, 60000).toLong() 160 | 161 | dataset1.Row( 162 | mapOf( 163 | "start" to dataset1.Integer(start), 164 | "end" to dataset1.Integer(end), 165 | "netPay" to dataset1.Integer(netPay) 166 | ) 167 | ) 168 | }) 169 | } 170 | 171 | @Benchmark 172 | fun baseline(blackhole: Blackhole): Unit { 173 | val ds: dataset1.Dataset = benchmark1.dataset!! 174 | val result = (ds(benchmark1.start) + ds(benchmark1.end)) / ds(benchmark1.netPay) 175 | 176 | blackhole.consume(result) 177 | } 178 | 179 | } 180 | -------------------------------------------------------------------------------- /src/main/java.skip/net/degoes/tricks/07-tricks.java: -------------------------------------------------------------------------------- 1 | /** 2 | * TRICKS 3 | * 4 | * Until now, you have discovered many sources of overhead in developing software for the JVM. 5 | * Although you have some idea of how to avoid these sources of overhead, there has been no 6 | * systematic treatment of different techniques that can be applied to each type of overhead. 7 | * 8 | * In this section, you will learn some of the essential "tricks of the trade". In the process, you 9 | * will become proficient at writing fast code when the occassion requires. 10 | * 11 | * EXERCISE 1 12 | * 13 | * Because the JVM supports null values, you can use the null value as an extra "sentinal" value, 14 | * rather than using a wrapper data structure to propagate the same information. This can reduce 15 | * allocation and indirection and improve performance. 16 | * 17 | * In this exercise, create a version of the benchmark that uses null values instead of the 18 | * `Optional` data type. Ensure it follows the same structure and flow of the existing benchmark in 19 | * order to make a fair comparison. 20 | * 21 | * See tricks/UseNullBenchmark.java 22 | * 23 | * EXERCISE 2 24 | * 25 | * Arrays exploit CPU caches and primitive specialization, which means they can be tremendously 26 | * faster for certain tasks. 27 | * 28 | * In this exercise, create a version of the benchmark that uses arrays instead of lists. Ensure it 29 | * follows the same structure and flow of the existing benchmark in order to make a fair comparison. 30 | * 31 | * See if you can create an Array-based version that is 10x faster than the List-based version. 32 | * 33 | * See tricks/UseArraysBenchmark.java 34 | * 35 | * EXERCISE 3 36 | * 37 | * Although the JVM can optimize away some allocations in some cases, it's safer and more reliable 38 | * to simply avoid them in performance-sensitive code. This means using mutable structures, and 39 | * sometimes re-using structures (using pools, pre-allocation, and other techniques). 40 | * 41 | * In this exercise, create a version of the benchmark that uses a mutable data structure instead of 42 | * an immutable data structure. Ensure it follows the same structure and flow of the existing 43 | * benchmark in order to make a fair comparison. 44 | * 45 | * BONUS: Try to solve the problem using zero allocations in the benchmark. You will have to use 46 | * pre-allocation in order to achieve this goal. 47 | * 48 | * See tricks/NoAllocationBenchmark.java 49 | * 50 | * EXERCISE 4 51 | * 52 | * The JVM implements generics with type erasure, which means that generic data types must box all 53 | * primitives. In cases where you are using generic data types for primitives, it can make sense to 54 | * manually specialize the generic data types to your specific primitives. Although this creates 55 | * much more boilerplate, it allows you to improve performance. 56 | * 57 | * In this exercise, create a version of the benchmark that uses a specialized data type instead of 58 | * a generic data type. Ensure it follows the same structure and flow of the existing benchmark in 59 | * order to make a fair comparison. 60 | * 61 | * See tricks/SpecializeBenchmark.java 62 | * 63 | * EXERCISE 6 64 | * 65 | * In some cases, you can eliminate heap allocation in function return values by packing multiple 66 | * values into a single primitive value. For example, a 64 bit long can actually hold multiple 67 | * separate channels of information, and will not require heap allocation. 68 | * 69 | * In this exercise, create a version of the benchmark that uses a packed return value instead of 70 | * the provided case class. Ensure it follows the same structure and flow of the existing benchmark 71 | * in order to make a fair comparison. 72 | * 73 | * See tricks/PrimitivizeReturnBenchmark.java 74 | * 75 | * EXERCISE 7 76 | * 77 | * If you are processing data in bulk, and the fields of your data type are all primitives, then you 78 | * can reduce heap allocation by using arrays of the individual fields, rather than arrays of the 79 | * data type. This reduces allocation and indirection and improves cache hits. 80 | * 81 | * In this exercise, create a version of the benchmark that uses arrays of primitives instead of 82 | * arrays of the provided data type. Ensure it follows the same structure and flow of the existing 83 | * benchmark in order to make a fair comparison. 84 | * 85 | * See tricks/FlattenProductsBenchmark.java 86 | * 87 | * EXERCISE 8 88 | * 89 | * Virtual dispatch imposes overhead in any case where the JVM cannot devirtualize. If the number of 90 | * subtypes sharing the same (virtual) interface is fixed, then you can manually devirtualize by 91 | * using an integer tag to indicate which subtype should be used. Then in any case where you would 92 | * call a virtual method, you instead match on the tag, and call the concrete (non-virtual) method 93 | * corresponding to that tag. 94 | * 95 | * In this exercise, create a version of the benchmark that uses manual devirtualization instead of 96 | * virtual dispatch. Ensure it follows the same structure and flow of the existing benchmark in 97 | * order to make a fair comparison. 98 | * 99 | * See tricks/DevirtualizeBenchmark.java 100 | * 101 | * EXERCISE 10 102 | * 103 | * Exceptions can impose overhead on performance-sensitive code. This overhead comes primarily from 104 | * stack traces. If you can avoid stack traces, then you can avoid most of the overhead of 105 | * exceptions. 106 | * 107 | * In this exercise, you will try two separate tricks to avoid poorly-performing exception- bound 108 | * code: 109 | * 110 | * 1. Use a return value instead of throwing an exception. 2. Throw a special exception type that 111 | * does not generate a stack trace. 112 | * 113 | * See tricks/NoExceptionsBenchmark.java 114 | * 115 | * EXERCISE 11 116 | * 117 | * Hash maps can offer quite high performance (O(1)), but never as high performance as array 118 | * lookups (lower constant factor). To accelerate some code, you can switch from using non-integer 119 | * sparse keys to using dense integer keys, which lets you replace the map with an array. 120 | * 121 | * In this exercise, create an equivalent implementation to the provided one that uses arrays 122 | * instead of maps and observe the effects on performance. 123 | * 124 | * See tricks/MapToArrayBenchmark.java 125 | * 126 | * GRADUATION PROJECT 127 | * 128 | * Sometimes you can transform a process that is built using interfaces and classes (a so-called 129 | * "executable encoding") into something more primitive and fundamental that can be interpreted by a 130 | * stack machine. This has potential to significantly improve performance. 131 | * 132 | * In this graduation project, you will work with the instructor to implement this optimization for 133 | * a route parser, and see what sort of speedup you can achieve without changing the expressiveness 134 | * of the process. 135 | * 136 | * See tricks/StackInterpreterBenchmark.java 137 | */ 138 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/10-tuning.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * TUNING 3 | * 4 | * The JVM exposes several knobs that you can use to tweak and tune performance for your 5 | * applications. 6 | * 7 | * In this section, you will explore these knobs, with a special emphasis on garbage collection. 8 | * 9 | * Garbage collection is all about tradeoffs. Broadly speaking, the main tradeoffs are as follows: 10 | * 11 | * Throughput versus latency. Throughput is the amount of work that can be done in a given amount of 12 | * time. Latency is the amount of time it takes to complete a single unit of work. Garbage 13 | * collection can be tuned to maximize throughput, at the expense of latency, or to maximize 14 | * latency, at the expense of throughput. 15 | * 16 | * Memory usage versus throughput. Garbage collection can be tuned to use less memory, at the 17 | * expense of throughput. Alternately, throughput can be maximized, at the expense of memory usage. 18 | * Running JVM applications on memory-constrained environments will require tuning for memory usage. 19 | */ 20 | package net.degoes.tuning 21 | 22 | import org.openjdk.jmh.annotations._ 23 | import org.openjdk.jmh.infra.Blackhole 24 | import java.util.concurrent.TimeUnit 25 | 26 | @State(Scope.Thread) 27 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 28 | @BenchmarkMode(Array(Mode.Throughput)) 29 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 30 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 31 | @Fork(value = 1, jvmArgsAppend = Array()) 32 | @Threads(16) 33 | class TuningBenchmark1 { 34 | @Param(Array("10000", "1000000")) 35 | var size: Int = _ 36 | 37 | @Param(Array("100000")) 38 | var numberOfObjects: Int = _ 39 | 40 | @Benchmark 41 | def burstHeap(blackhole: Blackhole): Unit = { 42 | var iter = 0 43 | while (iter < 4) { 44 | var junk = new java.util.ArrayList[Array[Byte]](numberOfObjects) 45 | var j = 0 46 | while (j < numberOfObjects) { 47 | junk.add(new Array[Byte](size)) 48 | j = j + 1 49 | } 50 | blackhole.consume(junk) 51 | iter = iter + 1 52 | } 53 | } 54 | } 55 | 56 | @State(Scope.Thread) 57 | @OutputTimeUnit(TimeUnit.SECONDS) 58 | @BenchmarkMode(Array(Mode.Throughput)) 59 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 60 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 61 | @Fork(value = 1, jvmArgsAppend = Array()) 62 | @Threads(16) 63 | class TuningBenchmark2 { 64 | @Param(Array("8000000")) 65 | var size: Int = _ 66 | 67 | @Benchmark 68 | def constantHeap(blackhole: Blackhole): Unit = 69 | blackhole.consume(new Array[Byte](size)) 70 | } 71 | 72 | @State(Scope.Thread) 73 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 74 | @BenchmarkMode(Array(Mode.Throughput)) 75 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 76 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 77 | @Fork(value = 1, jvmArgsAppend = Array()) 78 | @Threads(16) 79 | class TuningBenchmark3 { 80 | @Param(Array("4000")) 81 | var size: Int = _ 82 | 83 | @Benchmark 84 | def heapBandwidth(blackhole: Blackhole): Unit = 85 | blackhole.consume(new Array[Byte](size)) 86 | } 87 | 88 | @State(Scope.Thread) 89 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 90 | @BenchmarkMode(Array(Mode.Throughput)) 91 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 92 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 93 | @Fork(value = 1, jvmArgsAppend = Array()) 94 | @Threads(16) 95 | class TuningBenchmark4 { 96 | @Param(Array("2", "4", "8")) 97 | var n: Int = _ 98 | 99 | @Benchmark 100 | def nqueens(blackhole: Blackhole): Unit = { 101 | def queens(n: Int): List[List[(Int, Int)]] = { 102 | def isAttacked(q1: (Int, Int), q2: (Int, Int)) = 103 | q1._1 == q2._1 || 104 | q1._2 == q2._2 || 105 | (q2._1 - q1._1).abs == (q2._2 - q1._2).abs 106 | 107 | def isSafe(queen: (Int, Int), others: List[(Int, Int)]) = 108 | others.forall(!isAttacked(queen, _)) 109 | 110 | def placeQueens(k: Int): List[List[(Int, Int)]] = 111 | if (k == 0) 112 | List(List()) 113 | else 114 | for { 115 | queens <- placeQueens(k - 1) 116 | column <- 1 to n 117 | queen = (k, column) 118 | if isSafe(queen, queens) 119 | } yield queen :: queens 120 | placeQueens(n) 121 | } 122 | 123 | queens(n) 124 | } 125 | } 126 | 127 | /* 128 | * EXERCISE 1 129 | * 130 | * Execute the benchmarks using the default garbage collector. 131 | * 132 | * EXERCISE 2 133 | * 134 | * Execute the benchmarks using the parallel garbage collector by using the JVM flag 135 | * -XX:+UseParallelGC. 136 | * 137 | * Experiment with the following settings to see the effect on performance: 138 | * 139 | * -XX:ParallelGCThreads (default: # of CPU cores) 140 | * -XX:MaxGCPauseMillis (default: 100) 141 | * -XX:GCTimeRatio (default: 99) 142 | * -XX:YoungGenerationSizeIncrement (default: 20) 143 | * -XX:TenuredGenerationSizeIncrement (default: 20) 144 | * -XX:AdaptiveSizeDecrementScaleFactor (default: 4) 145 | * -XX:UseGCOverheadLimit (default: true) 146 | * 147 | * EXERCISE 3 148 | * 149 | * Execute the benchmarks using the concurrent mark sweep garbage collector by using the JVM flag 150 | * -XX:+UseConcMarkSweepGC. 151 | * 152 | * Experiment with the following settings to see the effect on performance: 153 | * 154 | * -XX:CMSInitiatingOccupancyFraction (default: 68) 155 | * -XX:UseCMSInitiatingOccupancyOnly (default: false) 156 | * -XX:CMSInitiatingOccupancyFraction (default: 68) 157 | * -XX:CMSScavengeBeforeRemark (default: false) 158 | * -XX:ScavengeBeforeFullGC (default: false) 159 | * -XX:CMSParallelRemarkEnabled (default: true) 160 | * -XX:UseGCOverheadLimit (default: true) 161 | * 162 | * EXERCISE 4 163 | * 164 | * Execute the benchmarks using the G1 garbage collector by using the JVM flag -XX:+UseG1GC. 165 | * 166 | * Experiment with the following settings to see the effect on performance: 167 | * 168 | * -XX:InitiatingHeapOccupancyPercent (default: 45) 169 | * -XX:G1UseAdaptiveIHOP (default: true) 170 | * -XX:G1HeapWastePercent (default: 5) 171 | * -XX:G1PeriodicGCSystemLoadThreshold (default: 120) 172 | * -XX:MinHeapFreeRatio (default: 40) 173 | * -XX:MaxHeapFreeRatio (default: 70) 174 | * -XX:G1NewSizePercent (default: 5) 175 | * -XX:G1MaxNewSizePercent (default: 60) 176 | * -XX:NewSize (default: 1/2 of the heap) 177 | * -XX:MaxNewSize (default: 1/2 of the heap) 178 | * -XX:+AlwaysPreTouch (default: false) 179 | * 180 | * EXERCISE 5 181 | * 182 | * Execute the benchmarks using the Z garbage collector by using the JVM flag -XX:+UseZGC, 183 | * and -XX:+UnlockExperimentalVMOptions depending on the JVM version you are using. 184 | * 185 | * Experiment with the following settings to see the effect on performance: 186 | * 187 | * -XX:ConcGCThreads (default: # of CPU cores) 188 | * 189 | */ 190 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/10-tuning.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * TUNING 3 | * 4 | * The JVM exposes several knobs that you can use to tweak and tune performance for your 5 | * applications. 6 | * 7 | * In this section, you will explore these knobs, with a special emphasis on garbage collection. 8 | * 9 | * Garbage collection is all about tradeoffs. Broadly speaking, the main tradeoffs are as follows: 10 | * 11 | * Throughput versus latency. Throughput is the amount of work that can be done in a given amount of 12 | * time. Latency is the amount of time it takes to complete a single unit of work. Garbage 13 | * collection can be tuned to maximize throughput, at the expense of latency, or to maximize 14 | * latency, at the expense of throughput. 15 | * 16 | * Memory usage versus throughput. Garbage collection can be tuned to use less memory, at the 17 | * expense of throughput. Alternately, throughput can be maximized, at the expense of memory usage. 18 | * Running JVM applications on memory-constrained environments will require tuning for memory usage. 19 | */ 20 | package net.degoes.tuning 21 | 22 | import org.openjdk.jmh.annotations.* 23 | import org.openjdk.jmh.infra.Blackhole 24 | import java.util.concurrent.TimeUnit 25 | 26 | @State(Scope.Thread) 27 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 28 | @BenchmarkMode(Mode.Throughput) 29 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 30 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 31 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 32 | @Threads(16) 33 | open class TuningBenchmark1 { 34 | @Param("10000", "1000000") 35 | var size: Int = 0 36 | 37 | @Param("100000") 38 | var numberOfObjects: Int = 0 39 | 40 | @Benchmark 41 | fun burstHeap(blackhole: Blackhole): Unit { 42 | var iter = 0 43 | while (iter < 4) { 44 | var junk = java.util.ArrayList(numberOfObjects) 45 | var j = 0 46 | while (j < numberOfObjects) { 47 | junk.add(ByteArray(size)) 48 | j = j + 1 49 | } 50 | blackhole.consume(junk) 51 | iter = iter + 1 52 | } 53 | } 54 | } 55 | 56 | @State(Scope.Thread) 57 | @OutputTimeUnit(TimeUnit.SECONDS) 58 | @BenchmarkMode(Mode.Throughput) 59 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 60 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 61 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 62 | @Threads(16) 63 | open class TuningBenchmark2 { 64 | @Param("8000000") 65 | var size: Int = 0 66 | 67 | @Benchmark 68 | fun constantHeap(blackhole: Blackhole): Unit { 69 | blackhole.consume(ByteArray(size)) 70 | } 71 | } 72 | 73 | @State(Scope.Thread) 74 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 75 | @BenchmarkMode(Mode.Throughput) 76 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 77 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 78 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 79 | @Threads(16) 80 | open class TuningBenchmark3 { 81 | @Param("4000") 82 | var size: Int = 0 83 | 84 | @Benchmark 85 | fun heapBandwidth(blackhole: Blackhole): Unit { 86 | blackhole.consume(ByteArray(size)) 87 | } 88 | } 89 | 90 | @State(Scope.Thread) 91 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 92 | @BenchmarkMode(Mode.Throughput) 93 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 94 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 95 | @Fork(value = 1, jvmArgsAppend = emptyArray()) 96 | @Threads(16) 97 | open class TuningBenchmark4 { 98 | @Param("2", "4", "8") 99 | var n: Int = 0 100 | 101 | @Benchmark 102 | fun nqueens(blackhole: Blackhole): Unit { 103 | fun queens(n: Int): List>> { 104 | fun isAttacked(q1: Pair, q2: Pair): Boolean { 105 | return q1.first == q2.first || 106 | q1.second == q2.second || 107 | Math.abs(q2.first - q1.first) == Math.abs(q2.second - q1.second) 108 | } 109 | 110 | fun isSafe(queen: Pair, others: List>) = 111 | others.all { q -> !isAttacked(queen, q) } 112 | 113 | fun placeQueens(k: Int): List>> { 114 | return if (k == 0) listOf(emptyList>()) else 115 | placeQueens(k - 1).flatMap { queens -> 116 | (1..n).toList().filter { column -> 117 | isSafe(k to column, queens) 118 | }.map { column -> listOf(k to column) + queens } 119 | } 120 | } 121 | 122 | return placeQueens(n) 123 | } 124 | 125 | queens(n) 126 | } 127 | } 128 | 129 | /* 130 | * EXERCISE 1 131 | * 132 | * Execute the benchmarks using the default garbage collector. 133 | * 134 | * EXERCISE 2 135 | * 136 | * Execute the benchmarks using the parallel garbage collector by using the JVM flag 137 | * -XX:+UseParallelGC. 138 | * 139 | * Experiment with the following settings to see the effect on performance: 140 | * 141 | * -XX:ParallelGCThreads (default: # of CPU cores) 142 | * -XX:MaxGCPauseMillis (default: 100) 143 | * -XX:GCTimeRatio (default: 99) 144 | * -XX:YoungGenerationSizeIncrement (default: 20) 145 | * -XX:TenuredGenerationSizeIncrement (default: 20) 146 | * -XX:AdaptiveSizeDecrementScaleFactor (default: 4) 147 | * -XX:UseGCOverheadLimit (default: true) 148 | * 149 | * EXERCISE 3 150 | * 151 | * Execute the benchmarks using the concurrent mark sweep garbage collector by using the JVM flag 152 | * -XX:+UseConcMarkSweepGC. 153 | * 154 | * Experiment with the following settings to see the effect on performance: 155 | * 156 | * -XX:CMSInitiatingOccupancyFraction (default: 68) 157 | * -XX:UseCMSInitiatingOccupancyOnly (default: false) 158 | * -XX:CMSInitiatingOccupancyFraction (default: 68) 159 | * -XX:CMSScavengeBeforeRemark (default: false) 160 | * -XX:ScavengeBeforeFullGC (default: false) 161 | * -XX:CMSParallelRemarkEnabled (default: true) 162 | * -XX:UseGCOverheadLimit (default: true) 163 | * 164 | * EXERCISE 4 165 | * 166 | * Execute the benchmarks using the G1 garbage collector by using the JVM flag -XX:+UseG1GC. 167 | * 168 | * Experiment with the following settings to see the effect on performance: 169 | * 170 | * -XX:InitiatingHeapOccupancyPercent (default: 45) 171 | * -XX:G1UseAdaptiveIHOP (default: true) 172 | * -XX:G1HeapWastePercent (default: 5) 173 | * -XX:G1PeriodicGCSystemLoadThreshold (default: 120) 174 | * -XX:MinHeapFreeRatio (default: 40) 175 | * -XX:MaxHeapFreeRatio (default: 70) 176 | * -XX:G1NewSizePercent (default: 5) 177 | * -XX:G1MaxNewSizePercent (default: 60) 178 | * -XX:NewSize (default: 1/2 of the heap) 179 | * -XX:MaxNewSize (default: 1/2 of the heap) 180 | * -XX:+AlwaysPreTouch (default: false) 181 | * 182 | * EXERCISE 5 183 | * 184 | * Execute the benchmarks using the Z garbage collector by using the JVM flag -XX:+UseZGC, 185 | * and -XX:+UnlockExperimentalVMOptions depending on the JVM version you are using. 186 | * 187 | * Experiment with the following settings to see the effect on performance: 188 | * 189 | * -XX:ConcGCThreads (default: # of CPU cores) 190 | * 191 | */ 192 | -------------------------------------------------------------------------------- /src/main/kotlin.skip/net/degoes/01-collections.kt: -------------------------------------------------------------------------------- 1 | /** 2 | * COLLECTIONS 3 | * 4 | * Thanks to powerful abstractions on the JVM, including java.util Collections, or standard library 5 | * collections in Scala, Kotlin, and other JVM-based languages, it is easy to write code that 6 | * processes data in bulk. 7 | * 8 | * With this ease comes a danger: it is easy to write code that is not performant. This performance 9 | * cost comes about because of several factors: 10 | * 11 | * 1. Wrong collection type. Different collection types have different overhead on different kinds 12 | * of operations. For example, doubly-linked linked lists are good at prepending and appending 13 | * single elements, but are terrible at random access. 14 | * 15 | * 2. Boxing of primitives. On the JVM, primitives are not objects, and so they must be boxed into 16 | * objects in order to be stored in collections. This boxing and unboxing can be expensive. 17 | * 18 | * 3. Cache locality. Modern CPUs are very fast, but they are also very complex. One of the ways 19 | * that CPUs achieve their speed is by caching data in memory. Most collection types do not store 20 | * their elements contiguously in memory, even if they are primitives, and so cannot take advantage 21 | * of the CPU cache, resulting in slower performance. 22 | * 23 | * In this section, you will use the JMH benchmarking tool in order to explore collection 24 | * performance across a range of collections, and then you will discover not only how to use the 25 | * fastest collection type but how to increase its applicability to a wider range of use cases. 26 | */ 27 | package net.degoes.collections 28 | 29 | import org.openjdk.jmh.annotations.* 30 | import org.openjdk.jmh.infra.Blackhole 31 | import java.util.concurrent.TimeUnit 32 | import io.vavr.collection.List 33 | 34 | /** 35 | * EXERCISE 1 36 | * 37 | * This benchmark is currently configured with a Vavr List, which is a singly-linked-list data type. Add two 38 | * other collection types to this benchmark, and make sure to at least try Array. 39 | * 40 | * EXERCISE 2 41 | * 42 | * Identify which collection is the fastest for prepending a single element, and explain why. 43 | */ 44 | @State(Scope.Thread) 45 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 46 | @BenchmarkMode(Mode.Throughput) 47 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 48 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 49 | @Fork(1) 50 | @Threads(16) 51 | open class ElementPrependBenchmark { 52 | val PrependsPerIteration = 100 53 | 54 | @Param("1000", "10000", "100000") 55 | var size: Int = 0 56 | 57 | var startList: List = List.of() 58 | 59 | @Setup(Level.Trial) 60 | fun setup(): Unit { 61 | startList = List.range(0, size).map { _ -> "a" } 62 | } 63 | 64 | @Benchmark 65 | fun list(blackhole: Blackhole): Unit = 66 | blackhole.consume(startList.prepend("a")) 67 | } 68 | 69 | /** 70 | * EXERCISE 3 71 | * 72 | * Create a benchmark for concatenation across lists, vectors (or another standard collection 73 | * type), and arrays. 74 | */ 75 | @State(Scope.Thread) 76 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 77 | @BenchmarkMode(Mode.Throughput) 78 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 79 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 80 | @Fork(1) 81 | @Threads(16) 82 | open class ConcatBenchmark { 83 | @Setup(Level.Trial) 84 | fun setup(): Unit { 85 | } 86 | 87 | @Benchmark 88 | fun list(blackhole: Blackhole): Unit { 89 | } 90 | } 91 | 92 | /** 93 | * EXERCISE 4 94 | * 95 | * Create a benchmark for random access across lists, vectors (or another standard collection 96 | * type), and arrays. 97 | */ 98 | @State(Scope.Thread) 99 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 100 | @BenchmarkMode(Mode.Throughput) 101 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 102 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 103 | @Fork(1) 104 | @Threads(16) 105 | open class RandomAccessBenchmark { 106 | @Setup(Level.Trial) 107 | fun setup(): Unit { 108 | } 109 | 110 | @Benchmark 111 | fun list(blackhole: Blackhole): Unit { 112 | } 113 | } 114 | 115 | /** 116 | * EXERCISE 5 117 | * 118 | * Create a benchmark for iteration, which sums all the elements in a collection, across lists, 119 | * vectors (or another standard collection type), and arrays. 120 | * 121 | * NOTE: Arrays of primitives are specialized on the JVM. Which means they do not incur overhead of 122 | * "boxing", a topic we will return to later. For now, just make sure to store java.lang.Integer 123 | * values in the Array in order to ensure the benchmark is fair. 124 | */ 125 | @State(Scope.Thread) 126 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 127 | @BenchmarkMode(Mode.Throughput) 128 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 129 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 130 | @Fork(1) 131 | @Threads(16) 132 | open class IterationBenchmark { 133 | @Setup(Level.Trial) 134 | fun setup(): Unit { 135 | } 136 | 137 | @Benchmark 138 | fun list(blackhole: Blackhole): Unit { 139 | } 140 | } 141 | 142 | /** 143 | * EXERCISE 6 144 | * 145 | * Create a benchmark for lookup of an element by a property of the element, across lists, arrays, 146 | * and maps. 147 | */ 148 | @State(Scope.Thread) 149 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 150 | @BenchmarkMode(Mode.Throughput) 151 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 152 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 153 | @Fork(1) 154 | @Threads(16) 155 | open class LookupBenchmark { 156 | val Size = 1000 157 | val IdToLookup = Size - 1 158 | 159 | data class Person(val id: Int, val age: Int, val name: String) 160 | 161 | val peopleList: List = List.range(0, Size).map { i -> Person(i, i, "Person ${i}") } 162 | 163 | @Setup(Level.Trial) 164 | fun setup(): Unit { 165 | } 166 | 167 | @Benchmark 168 | fun list(blackhole: Blackhole): Unit { 169 | blackhole.consume(peopleList.find { it.id == IdToLookup }!!) 170 | } 171 | } 172 | 173 | /** 174 | * GRADUATION PROJECT 175 | * 176 | * Develop a new immutable collection type (`Chain`) that has O(1) for concatenation. Compare its 177 | * performance to at least two other collection types. Then augment this collection type with 178 | * iteration, so you can benchmark iteration against the other collection types. 179 | * 180 | * Think carefully about whether or not it is possible to have a single collection type that has 181 | * best-in-class performance across all operations. Why or why not? 182 | */ 183 | @State(Scope.Thread) 184 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 185 | @BenchmarkMode(Mode.Throughput) 186 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 187 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 188 | @Fork(1) 189 | @Threads(16) 190 | open class GraduationBenchmark { 191 | @Param("100", "1000", "10000") 192 | var size: Int = 0 193 | 194 | @Benchmark 195 | fun concat(blackhole: Blackhole): Unit { 196 | var i = 0 197 | var c = Chain.make(1) 198 | while (i < size) { 199 | c = c.append(c) 200 | i = i + 1 201 | } 202 | blackhole.consume(c) 203 | } 204 | 205 | class Chain() { 206 | 207 | companion object { 208 | fun make(vararg values: A): Chain = Chain.empty() // TODO 209 | fun empty(): Chain = Chain() 210 | 211 | } 212 | 213 | fun append(that: Chain): Chain = Chain.empty() // TODO 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /src/main/scala.skip/net/degoes/01-collections.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * COLLECTIONS 3 | * 4 | * Thanks to powerful abstractions on the JVM, including java.util Collections, or standard library 5 | * collections in Scala, Kotlin, and other JVM-based languages, it is easy to write code that 6 | * processes data in bulk. 7 | * 8 | * With this ease comes a danger: it is easy to write code that is not performant. This performance 9 | * cost comes about because of several factors: 10 | * 11 | * 1. Wrong collection type. Different collection types have different overhead on different kinds 12 | * of operations. For example, doubly-linked linked lists are good at prepending and appending 13 | * single elements, but are terrible at random access. 14 | * 15 | * 2. Boxing of primitives. On the JVM, primitives are not objects, and so they must be boxed into 16 | * objects in order to be stored in collections. This boxing and unboxing can be expensive. 17 | * 18 | * 3. Cache locality. Modern CPUs are very fast, but they are also very complex. One of the ways 19 | * that CPUs achieve their speed is by caching data in memory. Most collection types do not store 20 | * their elements contiguously in memory, even if they are primitives, and so cannot take advantage 21 | * of the CPU cache, resulting in slower performance. 22 | * 23 | * In this section, you will use the JMH benchmarking tool in order to explore collection 24 | * performance across a range of collections, and then you will discover not only how to use the 25 | * fastest collection type but how to increase its applicability to a wider range of use cases. 26 | */ 27 | package net.degoes.collections 28 | 29 | import org.openjdk.jmh.annotations._ 30 | import org.openjdk.jmh.infra.Blackhole 31 | import java.util.concurrent.TimeUnit 32 | 33 | /** 34 | * EXERCISE 1 35 | * 36 | * This benchmark is currently configured with List, which is a Scala linked-list data type. Add two 37 | * other collection types to this benchmark (in Scala, choose Vector and Array; if completing these 38 | * exercises in another programming language, be sure to at least choose Array). 39 | * 40 | * EXERCISE 2 41 | * 42 | * Identify which collection is the fastest for prepending a single element, and explain why. 43 | */ 44 | @State(Scope.Thread) 45 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 46 | @BenchmarkMode(Array(Mode.Throughput)) 47 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 48 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 49 | @Fork(1) 50 | @Threads(16) 51 | class ElementPrependBenchmark { 52 | val PrependsPerIteration = 100 53 | 54 | @Param(Array("1000", "10000", "100000")) 55 | var size: Int = _ 56 | 57 | var startList: List[String] = _ 58 | 59 | @Setup(Level.Trial) 60 | def setup(): Unit = 61 | startList = List.fill(size)("a") 62 | 63 | @Benchmark 64 | def list(blackhole: Blackhole): Unit = 65 | blackhole.consume("a" :: startList) 66 | } 67 | 68 | /** 69 | * EXERCISE 3 70 | * 71 | * Create a benchmark for concatenation across lists, vectors (or some other standard collection 72 | * type, if not solving these problems in Scala), and arrays. 73 | */ 74 | @State(Scope.Thread) 75 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 76 | @BenchmarkMode(Array(Mode.Throughput)) 77 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 78 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 79 | @Fork(1) 80 | @Threads(16) 81 | class ConcatBenchmark { 82 | @Setup(Level.Trial) 83 | def setup(): Unit = () 84 | 85 | @Benchmark 86 | def list(blackhole: Blackhole): Unit = () 87 | } 88 | 89 | /** 90 | * EXERCISE 4 91 | * 92 | * Create a benchmark for random access across lists, vectors (or some other standard collection 93 | * type, if not solving these problems in Scala), and arrays. 94 | */ 95 | @State(Scope.Thread) 96 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 97 | @BenchmarkMode(Array(Mode.Throughput)) 98 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 99 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 100 | @Fork(1) 101 | @Threads(16) 102 | class RandomAccessBenchmark { 103 | @Setup(Level.Trial) 104 | def setup(): Unit = () 105 | 106 | @Benchmark 107 | def list(blackhole: Blackhole): Unit = () 108 | } 109 | 110 | /** 111 | * EXERCISE 5 112 | * 113 | * Create a benchmark for iteration, which sums all the elements in a collection, across lists, 114 | * vectors (or some other standard collection type, if not solving these problems in Scala), and 115 | * arrays. 116 | * 117 | * NOTE: Arrays of primitives are specialized on the JVM. Which means they do not incur overhead of 118 | * "boxing", a topic we will return to later. For now, just make sure to store java.lang.Integer 119 | * values in the Array in order to ensure the benchmark is fair. 120 | */ 121 | @State(Scope.Thread) 122 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 123 | @BenchmarkMode(Array(Mode.Throughput)) 124 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 125 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 126 | @Fork(1) 127 | @Threads(16) 128 | class IterationBenchmark { 129 | @Setup(Level.Trial) 130 | def setup(): Unit = () 131 | 132 | @Benchmark 133 | def list(blackhole: Blackhole): Unit = () 134 | } 135 | 136 | /** 137 | * EXERCISE 6 138 | * 139 | * Create a benchmark for lookup of an element by a property of the element, across lists, arrays, 140 | * and maps. 141 | */ 142 | @State(Scope.Thread) 143 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 144 | @BenchmarkMode(Array(Mode.Throughput)) 145 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 146 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 147 | @Fork(1) 148 | @Threads(16) 149 | class LookupBenchmark { 150 | val Size = 1000 151 | val IdToLookup = Size - 1 152 | 153 | case class Person(id: Int, age: Int, name: String) 154 | val peopleList: List[Person] = List.tabulate(Size)(i => Person(i, i, s"Person $i")) 155 | 156 | @Setup(Level.Trial) 157 | def setup(): Unit = () 158 | 159 | @Benchmark 160 | def list(blackhole: Blackhole): Unit = 161 | blackhole.consume(peopleList.find(_.id == IdToLookup).get) 162 | } 163 | 164 | /** 165 | * GRADUATION PROJECT 166 | * 167 | * Develop a new immutable collection type (`Chain`) that has O(1) for concatenation. Compare its 168 | * performance to at least two other collection types. Then augment this collection type with 169 | * iteration, so you can benchmark iteration against the other collection types. 170 | * 171 | * Think carefully about whether or not it is possible to have a single collection type that has 172 | * best-in-class performance across all operations. Why or why not? 173 | */ 174 | @State(Scope.Thread) 175 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 176 | @BenchmarkMode(Array(Mode.Throughput)) 177 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 178 | @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 179 | @Fork(1) 180 | @Threads(16) 181 | class GraduationBenchmark { 182 | @Param(Array("100", "1000", "10000")) 183 | var size: Int = _ 184 | 185 | @Benchmark 186 | def concat(blackhole: Blackhole): Unit = { 187 | var i = 0 188 | var c = Chain(1) 189 | while (i < size) { 190 | c = c ++ c 191 | i = i + 1 192 | } 193 | blackhole.consume(c) 194 | } 195 | 196 | } 197 | 198 | case class Chain[+A]() { 199 | def ++[A1 >: A](that: Chain[A1]): Chain[A1] = Chain.empty // TODO 200 | } 201 | 202 | object Chain { 203 | def empty: Chain[Nothing] = Chain() 204 | 205 | def apply[A](as: A*): Chain[A] = Chain() // TODO 206 | } 207 | --------------------------------------------------------------------------------