├── project
    ├── build.properties
    └── plugins.sbt
├── schema
    ├── thrift
    │   ├── spot.thrift
    │   ├── nobid.thrift
    │   └── README.md
    ├── protocol-buffers
    │   ├── spot.proto
    │   ├── nobid.proto
    │   └── README.md
    └── avro
    │   ├── nobid.avsc
    │   └── README.md
├── common
    └── src
    │   └── main
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               ├── BenchmarkSettings.scala
    │                               ├── FileUtil.scala
    │                               └── dataset
    │                                   └── DataSet.scala
├── .travis.yml
├── thrift-bench
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── github
    │       │           └── saint1991
    │       │               └── serialization
    │       │                   └── benchmark
    │       │                       └── thrift
    │       │                           ├── DataSet.scala
    │       │                           └── FileGen.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               └── thrift
    │                                   └── ThriftBench.scala
├── avro-bench
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── github
    │       │           └── saint1991
    │       │               └── serialization
    │       │                   └── benchmark
    │       │                       └── avro
    │       │                           ├── DataSet.scala
    │       │                           └── FileGen.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               └── avro
    │                                   └── AvroBench.scala
├── proto-bench
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── github
    │       │           └── saint1991
    │       │               └── serialization
    │       │                   └── benchmark
    │       │                       └── protobuf
    │       │                           ├── FileGen.scala
    │       │                           └── DataSet.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               └── protobuf
    │                                   └── ProtoBench.scala
├── csv-bench
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── github
    │       │           └── saint1991
    │       │               └── serialization
    │       │                   └── benchmark
    │       │                       └── csv
    │       │                           ├── FileGen.scala
    │       │                           └── Csv.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               └── csv
    │                                   └── CsvBench.scala
├── msgpack4z-bench
    ├── src
    │   ├── main
    │   │   └── scala
    │   │   │   └── com
    │   │   │       └── github
    │   │   │           └── saint1991
    │   │   │               └── serialization
    │   │   │                   └── benchmark
    │   │   │                       └── msgpack
    │   │   │                           └── msgpack4z
    │   │   │                               ├── Codec.scala
    │   │   │                               └── FileGen.scala
    │   └── test
    │   │   └── scala
    │   │       └── com
    │   │           └── github
    │   │               └── saint1991
    │   │                   └── serialization
    │   │                       └── benchmark
    │   │                           └── msgpack
    │   │                               └── msgpack4z
    │   │                                   └── Msgpack4zBench.scala
    └── README.md
├── jsoniter-scala-bench
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── github
    │       │           └── saint1991
    │       │               └── serialization
    │       │                   └── benchmark
    │       │                       └── jsoniter
    │       │                           └── FileGen.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               └── jsoniter
    │                                   └── JsonIterScalaBench.scala
├── LICENSE
├── circe-bench
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── github
    │       │           └── saint1991
    │       │               └── serialization
    │       │                   └── benchmark
    │       │                       └── circe
    │       │                           └── FileGen.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               └── circe
    │                                   └── CirceBench.scala
├── msgpack-jackson-bench
    └── src
    │   ├── main
    │       └── scala
    │       │   └── serialization
    │       │       └── benchmark
    │       │           └── msgpack
    │       │               └── jackson
    │       │                   └── FileGen.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── github
    │                   └── saint1991
    │                       └── serialization
    │                           └── benchmark
    │                               └── msgpack
    │                                   └── jackson
    │                                       └── MsgpackJacksonBench.scala
├── .gitignore
└── README.md


/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.1.6
2 | 


--------------------------------------------------------------------------------
/schema/thrift/spot.thrift:
--------------------------------------------------------------------------------
1 | namespace java com.github.saint1991.serialization.benchmark.thrift
2 | 
3 | struct Spot {
4 |     1:required i32      id
5 |     2:required string   name
6 | }
7 | 


--------------------------------------------------------------------------------
/schema/protocol-buffers/spot.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 | package protobuf;
3 | option java_package = "com.github.saint1991.serialization.benchmark.protobuf";
4 | 
5 | message Spot {
6 |     int32 id = 1;
7 |     string name = 2;
8 | }
9 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.3.4")
2 | addSbtPlugin("com.thesamet" % "sbt-protoc" % "0.99.18")
3 | addSbtPlugin("com.julianpeeters" % "sbt-avrohugger" % "2.0.0-RC9")
4 | addSbtPlugin("com.twitter" % "scrooge-sbt-plugin" % "18.5.0")
5 | 
6 | libraryDependencies += "com.thesamet.scalapb" %% "compilerplugin" % "0.7.0"
7 | 


--------------------------------------------------------------------------------
/common/src/main/scala/com/github/saint1991/serialization/benchmark/BenchmarkSettings.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark
 2 | 
 3 | import java.util.concurrent.TimeUnit
 4 | 
 5 | object BenchmarkSettings {
 6 |   final val WarmUpIteration = 20
 7 |   final val Iteration = 20
 8 |   final val TUnit = TimeUnit.MILLISECONDS
 9 |   final val DatasetSize = 100000
10 | }
11 | 


--------------------------------------------------------------------------------
/common/src/main/scala/com/github/saint1991/serialization/benchmark/FileUtil.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark
 2 | 
 3 | import better.files._
 4 | 
 5 | object FileUtil {
 6 | 
 7 |   final val OutDir = "out"
 8 |   final val NewLineBytes = "\n".getBytes
 9 | 
10 |   def mkOutFile(name: String): File = (OutDir / name)
11 |       .createIfNotExists(createParents = true)
12 |       .clear()
13 | }
14 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: scala
 2 | jdk: oraclejdk8
 3 | cache:
 4 |   directories:
 5 |     - $HOME/.ivy2/cache
 6 |     - $HOME/.sbt/root
 7 | before_script:
 8 |   - sbt jmh:compile
 9 |   - sbt avroBench/run circeBench/run csvBench/run jsoniterScalaBench/run msgpackJacksonBench/run msgpack4zBench/run protoBench/run thriftBench/run
10 |   - sbt avroBench/jmh:run circeBench/jmh:run csvBench/jmh:run jsoniterScalaBench/jmh:run msgpackJacksonBench/jmh:run msgpack4zBench/jmh:run protoBench/jmh:run thriftBench/jmh:run
11 | 


--------------------------------------------------------------------------------
/schema/thrift/nobid.thrift:
--------------------------------------------------------------------------------
 1 | include "spot.thrift"
 2 | namespace java com.github.saint1991.serialization.benchmark.thrift
 3 | 
 4 | struct Nobid {
 5 |     1: required i32                 adnw_id,
 6 |     2: required string              app_name,
 7 |     3: required string              auction_id,
 8 |     4: required string              host,
 9 |     5: required string              logged_at,
10 |     6: required i32                 m_id,
11 |     7: required i32                 nbr,
12 |     8: optional string              page,
13 |     9: required i32                 res_time,
14 |     10:required spot.Spot           spot,
15 |     11:required list<string>        history,
16 |     12:required map<string,string>  tags
17 | }
18 | 


--------------------------------------------------------------------------------
/thrift-bench/src/main/scala/com/github/saint1991/serialization/benchmark/thrift/DataSet.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.thrift
 2 | 
 3 | object DataSet {
 4 |   def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) {
 5 |     Nobid(
 6 |       adnwId = 12345,
 7 |       appName = "sampleApp",
 8 |       auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec",
 9 |       host = "prd-dsp03",
10 |       loggedAt = "2017-06-30 09:07:37.677",
11 |       mId = 234,
12 |       nbr = 6260,
13 |       page = Some("http://diamond.jp/articles/a/15434"),
14 |       resTime = 4,
15 |       spot = Spot(
16 |         id = 2406,
17 |         name = "Mie"
18 |       ),
19 |       history = Seq(
20 |         "a",
21 |         "b",
22 |         "c"
23 |       ),
24 |       tags = Map(
25 |         "media" -> "facebook",
26 |         "ssp" -> "google"
27 |       )
28 |     )
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/avro-bench/src/main/scala/com/github/saint1991/serialization/benchmark/avro/DataSet.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.avro
 2 | 
 3 | object DataSet {
 4 | 
 5 |   def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) {
 6 |     Nobid(
 7 |       adnwId = 12345,
 8 |       appName = "sampleApp",
 9 |       auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec",
10 |       host = "prd-dsp03",
11 |       loggedAt = "2017-06-30 09:07:37.677",
12 |       mId = 234,
13 |       nbr = 6260,
14 |       page = Some("http://diamond.jp/articles/-/15434"),
15 |       resTime = 4,
16 |       spot = spotRecord(
17 |         id = 2406,
18 |         name = "Mie"
19 |       ),
20 |       history = List(
21 |         "a",
22 |         "b",
23 |         "c"
24 |       ),
25 |       tags = Map(
26 |         "media" -> "facebook",
27 |         "ssp" -> "google"
28 |       )
29 |     )
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/proto-bench/src/main/scala/com/github/saint1991/serialization/benchmark/protobuf/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.protobuf
 2 | 
 3 | import java.io.{FileOutputStream, OutputStream}
 4 | 
 5 | import scala.util.control.Exception._
 6 | 
 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize
 8 | import com.github.saint1991.serialization.benchmark.FileUtil
 9 | import com.github.saint1991.serialization.benchmark.protobuf.nobid.Nobid
10 | 
11 | object FileGen extends App {
12 | 
13 |   val dataset = DataSet.createDataset(DatasetSize)
14 | 
15 |   final val outFile = FileUtil.mkOutFile("nobid.protobuf")
16 |   val out = new FileOutputStream(outFile.toJava)
17 | 
18 |   // write to file
19 |   allCatch andFinally {
20 |     out.flush()
21 |     out.close()
22 |   } apply writeToFile(dataset, out)
23 | 
24 |   private def writeToFile(dataset: Seq[Nobid], file: OutputStream): Unit = {
25 |     dataset.foreach(r => r.writeTo(file))
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/csv-bench/src/main/scala/com/github/saint1991/serialization/benchmark/csv/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.csv
 2 | 
 3 | import java.io.{FileOutputStream, PrintWriter}
 4 | 
 5 | import scala.util.control.Exception._
 6 | 
 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize
 8 | import com.github.saint1991.serialization.benchmark.FileUtil
 9 | import com.github.saint1991.serialization.benchmark.dataset._
10 | 
11 | object FileGen extends App {
12 |   import Csv._
13 | 
14 |   val dataset = DataSet.createDataset(DatasetSize)
15 | 
16 |   // write to file
17 |   final val outFile = FileUtil.mkOutFile("nobid.csv")
18 |   val out = new PrintWriter(new FileOutputStream(outFile.toJava))
19 | 
20 |   allCatch andFinally {
21 |     out.flush()
22 |     out.close()
23 |   } apply writeToFile(dataset, out)
24 | 
25 |   private def writeToFile(dataset: Seq[Nobid], file: PrintWriter): Unit =
26 |     dataset.foreach{ r => file.println(toCsv(r)) }
27 | }
28 | 


--------------------------------------------------------------------------------
/proto-bench/src/main/scala/com/github/saint1991/serialization/benchmark/protobuf/DataSet.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.protobuf
 2 | 
 3 | import com.github.saint1991.serialization.benchmark.protobuf.nobid.Nobid
 4 | import com.github.saint1991.serialization.benchmark.protobuf.spot.Spot
 5 | 
 6 | object DataSet {
 7 |   def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) {
 8 |     Nobid(
 9 |       adnwId = 12345,
10 |       appName = "sampleApp",
11 |       auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec",
12 |       host = "prd-dsp03",
13 |       loggedAt = "2017-06-30 09:07:37.677",
14 |       mId = 234,
15 |       nbr = 6260,
16 |       page = "http://diamond.jp/articles/a/15434",
17 |       resTime = 4,
18 |       spot = Some(Spot(
19 |         id = 2406,
20 |         name = "Mie"
21 |       )),
22 |       history = Seq(
23 |         "a",
24 |         "b",
25 |         "c"
26 |       ),
27 |       tags = Map(
28 |         "media" -> "facebook",
29 |         "ssp" -> "google"
30 |       )
31 |     )
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/schema/protocol-buffers/nobid.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | package protobuf;
 3 | option java_package = "com.github.saint1991.serialization.benchmark.protobuf";
 4 | 
 5 | // SPEED (default) | CODE_SIZE | LITE_RUNTIME
 6 | option optimize_for = SPEED;
 7 | 
 8 | // possible to import definition from other files
 9 | import "spot.proto";
10 | 
11 | // message should be CamelCase
12 | message Nobid {
13 | 
14 |   // fields should be snake_case
15 |   // each field should have an unique index
16 |   // required directive indicate a field must exist
17 |   int32  adnw_id = 1;
18 |   string app_name = 2;
19 |   string auction_id = 3;
20 |   string host = 4;
21 |   string logged_at = 5;
22 |   int32  m_id = 6;
23 |   int32  nbr = 7;
24 |   string page = 8;
25 |   int32  res_time = 9;
26 |   protobuf.Spot spot = 10;
27 | 
28 | 
29 |   repeated string history = 11;
30 |   map<string, string> tags = 12;
31 | 
32 |   // it's possible to reserve some indices for future use
33 |   reserved 13 to 15;
34 | }
35 | 
36 | message Response {
37 |   int32 status_code = 1;
38 |   string message = 2;
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/msgpack4z-bench/src/main/scala/com/github/saint1991/serialization/benchmark/msgpack/msgpack4z/Codec.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.msgpack.msgpack4z
 2 | 
 3 | import msgpack4z._
 4 | import msgpack4z.CodecInstances.all._
 5 | 
 6 | import com.github.saint1991.serialization.benchmark.dataset.{Nobid, Spot}
 7 | 
 8 | object Codec {
 9 | 
10 |   private [this] final val Factory: PackerUnpackerFactory = new PackerUnpackerFactory {
11 |     def packer: MsgOutBuffer = MsgOutBuffer.create()
12 |     def unpacker(bytes: Array[Byte]) = MsgInBuffer(bytes)
13 |   }
14 | 
15 |   private [this] final val Codec: CaseMapCodec[String] = CaseMapCodec.string(Factory)
16 | 
17 |   implicit val spotCodec: MsgpackCodec[Spot] = Codec.codec(Spot.apply _, Spot.unapply _)("id", "name")
18 |   val codec: MsgpackCodec[Nobid] = Codec.codec(Nobid.apply _, Nobid.unapply _)(
19 |     "adnwId",
20 |     "appName",
21 |     "auctionId",
22 |     "host",
23 |     "loggedAt",
24 |     "mId",
25 |     "nbr",
26 |     "page",
27 |     "resTime",
28 |     "spot",
29 |     "history",
30 |     "tags"
31 |   )
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/jsoniter-scala-bench/src/main/scala/com/github/saint1991/serialization/benchmark/jsoniter/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.jsoniter
 2 | 
 3 | import java.io.{BufferedOutputStream, FileOutputStream}
 4 | 
 5 | import scala.util.control.Exception._
 6 | 
 7 | import com.github.plokhotnyuk.jsoniter_scala.core._
 8 | import com.github.plokhotnyuk.jsoniter_scala.macros._
 9 | 
10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize
11 | import com.github.saint1991.serialization.benchmark.dataset._
12 | import com.github.saint1991.serialization.benchmark.FileUtil
13 | 
14 | object FileGen extends App {
15 | 
16 |   val dataset = DataSet.createDataset(DatasetSize)
17 | 
18 |   final val outFile = FileUtil.mkOutFile("nobid-jsoniter.json")
19 | 
20 |   implicit val codec: JsonValueCodec[Nobid] = JsonCodecMaker.make[Nobid](CodecMakerConfig())
21 |   val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava))
22 | 
23 |   allCatch andFinally {
24 |     out.flush()
25 |     out.close()
26 |   } apply dataset.foreach(x => writeToStream(x, out))
27 | }
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 mizuno
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/circe-bench/src/main/scala/com/github/saint1991/serialization/benchmark/circe/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.circe
 2 | 
 3 | import java.io.{BufferedOutputStream, FileOutputStream, PrintWriter}
 4 | 
 5 | import scala.util.control.Exception._
 6 | 
 7 | import io.circe.generic.auto._
 8 | import io.circe.syntax._
 9 | 
10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize
11 | import com.github.saint1991.serialization.benchmark.dataset._
12 | import com.github.saint1991.serialization.benchmark.FileUtil
13 | 
14 | object FileGen extends App {
15 | 
16 |   val dataset = DataSet.createDataset(DatasetSize)
17 | 
18 |   final val outFile = FileUtil.mkOutFile("nobid-circe.json")
19 |   val out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(outFile.toJava)))
20 | 
21 |   allCatch andFinally {
22 |     out.flush()
23 |     out.close()
24 |   } apply writeToFile(dataset, out)
25 | 
26 |   private def writeToFile(dataset: Seq[Nobid], out: PrintWriter): Unit =
27 |     dataset.foreach { r =>
28 |       val record = r.asJson.noSpaces
29 |       out.println(record)
30 |     }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/msgpack4z-bench/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # MessagePack
 3 | 
 4 | MessagePack is a kind of serialization format.
 5 | It iss schema-less line JSON.
 6 | 
 7 | ## Type system
 8 | 
 9 | MessagePack has 7 built-in types and extension type.
10 | 
11 | ### Built-in types
12 | 
13 | - Integer: represents integer that has Long precision in most languages.
14 | - Nil: represents nil.
15 | - Boolean: represents true or false.
16 | - Float: represents a floating point number that has double precision in most languages.
17 | - Raw
18 |     - String: represents UTF-8 string.
19 |     - Binary: represents byte array.
20 | - Array: represents a sequence of objects
21 | - Map: represents a key-value pairs of objects
22 | 
23 | ### Extension types
24 | 
25 | Extension type is the functionality to define application specific data type.
26 | It consists of magic bytes, another magic byte representing its type and data as the array of bytes.
27 | 
28 | ## Efficiency
29 | 
30 | MessagePack is efficient and safe format. 
31 | It generally consists of magic bytes and the data as the array of bytes.
32 | Data is serialized into variable-length binary so its size becomes smaller in many cases.
33 | 


--------------------------------------------------------------------------------
/thrift-bench/src/main/scala/com/github/saint1991/serialization/benchmark/thrift/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.thrift
 2 | 
 3 | import java.io._
 4 | import java.nio.ByteBuffer
 5 | 
 6 | import scala.util.control.Exception._
 7 | 
 8 | import org.apache.thrift.protocol.{TCompactProtocol, TProtocol}
 9 | import org.apache.thrift.transport.TIOStreamTransport
10 | 
11 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize
12 | import com.github.saint1991.serialization.benchmark.FileUtil
13 | 
14 | object FileGen extends App {
15 | 
16 |   val dataset = DataSet.createDataset(DatasetSize)
17 | 
18 |   val outFile = FileUtil.mkOutFile("nobid.thrift")
19 |   val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava))
20 |   val outProtocol = new TCompactProtocol(new TIOStreamTransport(out))
21 | 
22 |   allCatch andFinally {
23 |     out.flush()
24 |     out.close()
25 |   } apply writeToFile(dataset, outProtocol)
26 | 
27 |   private def writeToFile(dataset: Seq[Nobid], outProtocol: TProtocol): Unit =
28 |     dataset.foreach { r =>
29 |       r.write(outProtocol)
30 |       outProtocol.writeBinary(ByteBuffer.wrap(FileUtil.NewLineBytes))
31 |     }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/common/src/main/scala/com/github/saint1991/serialization/benchmark/dataset/DataSet.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.dataset
 2 | 
 3 | case class Spot(
 4 |   id: Int,
 5 |   name: String
 6 | )
 7 | 
 8 | case class Nobid(
 9 |   adnwId: Int,
10 |   appName: String,
11 |   auctionId: String,
12 |   host: String,
13 |   loggedAt: String,
14 |   mId: Int,
15 |   nbr: Int,
16 |   page: String,
17 |   resTime: Int,
18 |   spot: Spot,
19 |   history: List[String],
20 |   tags: Map[String, String]
21 | )
22 | 
23 | object DataSet {
24 |   def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) {
25 |     Nobid(
26 |       adnwId = 12345,
27 |       appName = "sampleApp",
28 |       auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec",
29 |       host = "prd-dsp03",
30 |       loggedAt = "2017-06-30 09:07:37.677",
31 |       mId = 234,
32 |       nbr = 6260,
33 |       page = "http://diamond.jp/articles/-/15434",
34 |       resTime = 4,
35 |       spot = Spot(
36 |         id = 2406,
37 |         name = "Mie"
38 |       ),
39 |       history = List(
40 |         "a",
41 |         "b",
42 |         "c"
43 |       ),
44 |       tags = Map(
45 |         "media" -> "facebook",
46 |         "ssp" -> "google"
47 |       )
48 |     )
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/schema/thrift/README.md:
--------------------------------------------------------------------------------
 1 | # Apache Thrift
 2 | 
 3 | ### Installation (Mac)
 4 | ```
 5 | brew install thrift
 6 | ```
 7 | 
 8 | ### Schema
 9 | 
10 | - Similar to proto2, all fields are uniquely indexed so that receivers can decode data based on it.
11 | 
12 | ```nobid.thrift
13 | include "spot.thrift"
14 | namespace java com.githu.saint1991.samples
15 | 
16 | struct Nobid {
17 |     1: required i32                 adnw_id,
18 |     2: required string              app_name,
19 |     3: required string              auction_id,
20 |     4: required string              host,
21 |     5: required string              logged_at,
22 |     6: required i32                 m_id,
23 |     7: required i32                 nbr,
24 |     8: optional string              page,
25 |     9: required i32                 res_time,
26 |     10:required spot.Spot           spot,
27 |     11:optional list<string>        history,
28 |     12:optional map<string,string>  tags
29 | }
30 | ```
31 | 
32 | ```spot.thrift
33 | namespace java com.github.saint1991.samples
34 | 
35 | enum SpotType {
36 |     A,
37 |     S
38 | }
39 | 
40 | struct Spot {
41 |     1:required i32      id
42 |     2:required SpotType type
43 | }
44 | ```
45 | 
46 | ### Code generation
47 | ```
48 | thrift --gen java nobid.thrift
49 | ```


--------------------------------------------------------------------------------
/csv-bench/src/main/scala/com/github/saint1991/serialization/benchmark/csv/Csv.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.csv
 2 | 
 3 | import com.github.saint1991.serialization.benchmark.dataset._
 4 | 
 5 | object Csv {
 6 | 
 7 |   def toCsv(nobid: Nobid): String = {
 8 |     val tags = nobid.tags.map(entry => s"${entry._1}#${entry._2}")
 9 |     s"${nobid.adnwId},${nobid.appName},${nobid.auctionId},${nobid.host},${nobid.loggedAt},${nobid.mId},${nobid.nbr},${nobid.page},${nobid.resTime},${nobid.spot.id}_${nobid.spot.name},${nobid.history.mkString("_")},${tags.mkString("_")}"
10 |   }
11 | 
12 |   def fromCsv(csv: String): Nobid = {
13 |     val line = csv.split(",")
14 |     val spot = line(9).split("_")
15 |     val tags = line(11).split("_").map { i =>
16 |       val entry = i.split("#")
17 |       entry(0) -> entry(1)
18 |     }.toMap
19 | 
20 |     Nobid(
21 |       adnwId = line(0).toInt,
22 |       appName = line(1),
23 |       auctionId = line(2),
24 |       host = line(3),
25 |       loggedAt = line(4),
26 |       mId = line(5).toInt,
27 |       nbr = line(6).toInt,
28 |       page = line(7),
29 |       resTime = line(8).toInt,
30 |       spot = Spot (spot(0).toInt, spot(1)),
31 |       history = line(10).split("_").toList,
32 |       tags = tags
33 |     )
34 |   }
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/schema/avro/nobid.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |     "namespace": "com.github.saint1991.serialization.benchmark.avro",
 3 |     "name":"Nobid",
 4 |     "type":"record",
 5 |     "fields":[
 6 |         {"name":"adnwId","type":"int"},
 7 |         {"name":"appName","type":"string"},
 8 |         {"name":"auctionId","type":"string"},
 9 |         {"name":"host","type":"string"},
10 |         {"name":"loggedAt","type":"string"},
11 |         {"name":"mId","type":"int"},
12 |         {"name":"nbr","type":"int"},
13 |         {"name":"page","type":["null", "string"], "default": null},
14 |         {"name":"resTime","type":"int"},
15 |         {"name":"spot","type": {
16 |             "name": "spotRecord",
17 |             "type": "record",
18 |             "fields": [
19 |                 {"name": "id", "type": "int"},
20 |                 {"name": "name", "type": "string"}
21 |             ]
22 |         }},
23 |         {"name": "history", "type": {
24 |             "name": "historyItems",
25 |             "type": "array",
26 |             "items": {
27 |                 "name": "historyItem",
28 |                 "type": "string"
29 |             }
30 |         }},
31 |         {"name": "tags", "type": {
32 |             "name": "tag",
33 |             "type": "map",
34 |             "values": "string"
35 |         }}
36 |     ]
37 | }
38 | 


--------------------------------------------------------------------------------
/avro-bench/src/main/scala/com/github/saint1991/serialization/benchmark/avro/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.avro
 2 | 
 3 | import java.io.{BufferedOutputStream, FileOutputStream}
 4 | 
 5 | import scala.util.control.Exception.allCatch
 6 | 
 7 | import org.apache.avro.file.DataFileWriter
 8 | import org.apache.avro.specific.SpecificDatumWriter
 9 | 
10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize
11 | import com.github.saint1991.serialization.benchmark.FileUtil
12 | 
13 | object FileGen extends App {
14 | 
15 |   final val Schema = Nobid.SCHEMA$
16 |   val writer = new SpecificDatumWriter[Nobid](Schema)
17 | 
18 |   val dataset = DataSet.createDataset(DatasetSize)
19 | 
20 |   val outFile = FileUtil.mkOutFile("nobid.avro")
21 |   val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava))
22 |   val outFileWriter = new DataFileWriter[Nobid](writer)
23 | 
24 |   // write to file
25 |   allCatch andFinally {
26 |     outFileWriter.flush()
27 |     outFileWriter.close()
28 |   } apply writeToFile(dataset, outFileWriter)
29 | 
30 |   private def writeToFile(dataset: Seq[Nobid], writer: DataFileWriter[Nobid]): Unit = {
31 |     outFileWriter.create(Schema, out)
32 |     dataset.foreach { nobid => writer.append(nobid) }
33 |   }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/proto-bench/src/test/scala/com/github/saint1991/serialization/benchmark/protobuf/ProtoBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.protobuf
 2 | 
 3 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 4 | 
 5 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
 6 | import com.github.saint1991.serialization.benchmark.protobuf.nobid.Nobid
 7 | 
 8 | @State(Scope.Thread)
 9 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
10 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
11 | @Fork(value = 1, jvmArgs = Array(
12 |   "-server",
13 |   "-Xms2g",
14 |   "-Xmx2g",
15 |   "-XX:NewSize=1g",
16 |   "-XX:MaxNewSize=1g",
17 |   "-XX:InitialCodeCacheSize=512m",
18 |   "-XX:ReservedCodeCacheSize=512m",
19 |   "-XX:+UseParallelGC",
20 |   "-XX:-UseBiasedLocking",
21 |   "-XX:+AlwaysPreTouch"
22 | ))
23 | @OutputTimeUnit(TUnit)
24 | class ProtoBench {
25 | 
26 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
27 | 
28 |   val encodedDataset: Seq[Array[Byte]] = encode()
29 |   decode()
30 | 
31 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
32 |   def encode(): Seq[Array[Byte]] = dataset.map(_.toByteArray)
33 | 
34 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
35 |   def decode(): Seq[Nobid] = encodedDataset.map(Nobid.parseFrom)
36 | }
37 | 


--------------------------------------------------------------------------------
/msgpack4z-bench/src/main/scala/com/github/saint1991/serialization/benchmark/msgpack/msgpack4z/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.msgpack.msgpack4z
 2 | 
 3 | import java.io.{BufferedOutputStream, FileOutputStream, OutputStream}
 4 | 
 5 | import scala.util.control.Exception._
 6 | 
 7 | import msgpack4z._
 8 | 
 9 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
10 | import com.github.saint1991.serialization.benchmark.dataset.DataSet
11 | import com.github.saint1991.serialization.benchmark.FileUtil
12 | import com.github.saint1991.serialization.benchmark.FileUtil.NewLineBytes
13 | import com.github.saint1991.serialization.benchmark.msgpack.msgpack4z.Codec._
14 | 
15 | object FileGen extends App {
16 | 
17 |   val dataset = DataSet.createDataset(DatasetSize)
18 | 
19 |   final val outFile = FileUtil.mkOutFile("nobid-msgpack4z.msgpack")
20 |   val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava))
21 | 
22 |   // write to file
23 |   allCatch andFinally {
24 |     out.flush()
25 |     out.close()
26 |   } apply writeToFile(dataset, codec, out)
27 | 
28 |   private def writeToFile[T](dataset: Seq[T], codec: MsgpackCodec[T], file: OutputStream): Unit =
29 |     dataset.foreach { r =>
30 |       val packer = MsgOutBuffer.create()
31 |       val bytes = codec.toBytes(r, packer)
32 |       file.write(bytes)
33 |       file.write(NewLineBytes)
34 |     }
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/csv-bench/src/test/scala/com/github/saint1991/serialization/benchmark/csv/CsvBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.csv
 2 | 
 3 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 4 | 
 5 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
 6 | import com.github.saint1991.serialization.benchmark.dataset._
 7 | 
 8 | @State(Scope.Thread)
 9 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
10 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
11 | @Fork(value = 1, jvmArgs = Array(
12 |   "-server",
13 |   "-Xms2g",
14 |   "-Xmx2g",
15 |   "-XX:NewSize=1g",
16 |   "-XX:MaxNewSize=1g",
17 |   "-XX:InitialCodeCacheSize=512m",
18 |   "-XX:ReservedCodeCacheSize=512m",
19 |   "-XX:+UseParallelGC",
20 |   "-XX:-UseBiasedLocking",
21 |   "-XX:+AlwaysPreTouch"
22 | ))
23 | @OutputTimeUnit(TUnit)
24 | class CsvBench {
25 |   import com.github.saint1991.serialization.benchmark.csv.Csv._
26 | 
27 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
28 | 
29 |   val encodedDataset: Seq[String] = encode()
30 |   decode()
31 | 
32 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
33 |   def encode(): Seq[String] = {
34 |     dataset.map(toCsv)
35 |   }
36 | 
37 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
38 |   def decode(): Seq[Nobid] = {
39 |     encodedDataset.map(fromCsv)
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/msgpack-jackson-bench/src/main/scala/serialization/benchmark/msgpack/jackson/FileGen.scala:
--------------------------------------------------------------------------------
 1 | package serialization.benchmark.msgpack.jackson
 2 | 
 3 | import java.io._
 4 | 
 5 | import scala.util.control.Exception._
 6 | 
 7 | import com.fasterxml.jackson.core.JsonGenerator
 8 | import com.fasterxml.jackson.databind.ObjectMapper
 9 | import com.fasterxml.jackson.module.scala.DefaultScalaModule
10 | import org.msgpack.jackson.dataformat.MessagePackFactory
11 | 
12 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize
13 | import com.github.saint1991.serialization.benchmark.dataset.{DataSet, Nobid}
14 | import com.github.saint1991.serialization.benchmark.FileUtil
15 | import com.github.saint1991.serialization.benchmark.FileUtil.NewLineBytes
16 | 
17 | object FileGen extends App {
18 | 
19 |   val dataset = DataSet.createDataset(DatasetSize)
20 | 
21 |   val mapper = new ObjectMapper(new MessagePackFactory())
22 |   mapper.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false)
23 |   mapper.registerModule(DefaultScalaModule)
24 | 
25 |   final val outFile = FileUtil.mkOutFile("nobid-jackson.msgpack")
26 |   val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava))
27 | 
28 |   // write to file
29 |   allCatch andFinally {
30 |     out.flush()
31 |     out.close()
32 |   } apply writeToFile(dataset, mapper, out)
33 | 
34 |   private def writeToFile(dataset: Seq[Nobid], mapper: ObjectMapper, file: OutputStream): Unit =
35 |     dataset.foreach { r =>
36 |       mapper.writeValue(file, r)
37 |       file.write(NewLineBytes)
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/jsoniter-scala-bench/src/test/scala/com/github/saint1991/serialization/benchmark/jsoniter/JsonIterScalaBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.jsoniter
 2 | 
 3 | import com.github.plokhotnyuk.jsoniter_scala.core._
 4 | import com.github.plokhotnyuk.jsoniter_scala.macros._
 5 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 6 | 
 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
 8 | import com.github.saint1991.serialization.benchmark.dataset._
 9 | 
10 | @State(Scope.Thread)
11 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
12 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
13 | @Fork(value = 1, jvmArgs = Array(
14 |   "-server",
15 |   "-Xms2g",
16 |   "-Xmx2g",
17 |   "-XX:NewSize=1g",
18 |   "-XX:MaxNewSize=1g",
19 |   "-XX:InitialCodeCacheSize=512m",
20 |   "-XX:ReservedCodeCacheSize=512m",
21 |   "-XX:+UseParallelGC",
22 |   "-XX:-UseBiasedLocking",
23 |   "-XX:+AlwaysPreTouch"
24 | ))
25 | @OutputTimeUnit(TUnit)
26 | class JsonIterScalaBench {
27 | 
28 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
29 | 
30 |   implicit val codec: JsonValueCodec[Nobid] = JsonCodecMaker.make[Nobid](CodecMakerConfig())
31 | 
32 |   val encodedDataset: Seq[Array[Byte]] = encode()
33 |   decode()
34 | 
35 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
36 |   def encode(): Seq[Array[Byte]] = dataset.map(x => writeToArray(x))
37 | 
38 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
39 |   def decode(): Seq[Nobid] = encodedDataset.map(str => readFromArray(str))
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/circe-bench/src/test/scala/com/github/saint1991/serialization/benchmark/circe/CirceBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.circe
 2 | 
 3 | import java.nio.charset.StandardCharsets
 4 | 
 5 | import io.circe.syntax._
 6 | import io.circe.generic.auto._
 7 | import io.circe.parser._
 8 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 9 | 
10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
11 | import com.github.saint1991.serialization.benchmark.dataset._
12 | 
13 | @State(Scope.Thread)
14 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
15 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
16 | @Fork(value = 1, jvmArgs = Array(
17 |   "-server",
18 |   "-Xms2g",
19 |   "-Xmx2g",
20 |   "-XX:NewSize=1g",
21 |   "-XX:MaxNewSize=1g",
22 |   "-XX:InitialCodeCacheSize=512m",
23 |   "-XX:ReservedCodeCacheSize=512m",
24 |   "-XX:+UseParallelGC",
25 |   "-XX:-UseBiasedLocking",
26 |   "-XX:+AlwaysPreTouch"
27 | ))
28 | @OutputTimeUnit(TUnit)
29 | class CirceBench {
30 | 
31 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
32 | 
33 |   val encodedDataset: Seq[Array[Byte]] = encode()
34 |   decode()
35 | 
36 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
37 |   def encode(): Seq[Array[Byte]] = {
38 |     dataset.map(_.asJson.noSpaces.getBytes(StandardCharsets.UTF_8))
39 |   }
40 | 
41 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
42 |   def decode(): Seq[Nobid] = {
43 |     encodedDataset.map(str => parse(new String(str, StandardCharsets.UTF_8)).right.get.as[Nobid].right.get)
44 |   }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/msgpack4z-bench/src/test/scala/com/github/saint1991/serialization/benchmark/msgpack/msgpack4z/Msgpack4zBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.msgpack.msgpack4z
 2 | 
 3 | import msgpack4z.{MsgInBuffer, MsgOutBuffer}
 4 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 5 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.{DatasetSize, Iteration, TUnit, WarmUpIteration}
 6 | import com.github.saint1991.serialization.benchmark.dataset.{DataSet, Nobid}
 7 | import com.github.saint1991.serialization.benchmark.msgpack.msgpack4z.Codec._
 8 | 
 9 | @State(Scope.Thread)
10 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
11 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
12 | @Fork(value = 1, jvmArgs = Array(
13 |   "-server",
14 |   "-Xms2g",
15 |   "-Xmx2g",
16 |   "-XX:NewSize=1g",
17 |   "-XX:MaxNewSize=1g",
18 |   "-XX:InitialCodeCacheSize=512m",
19 |   "-XX:ReservedCodeCacheSize=512m",
20 |   "-XX:+UseParallelGC",
21 |   "-XX:-UseBiasedLocking",
22 |   "-XX:+AlwaysPreTouch"
23 | ))
24 | @OutputTimeUnit(TUnit)
25 | class Msgpack4zBench {
26 | 
27 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
28 |   val encodedDataset: Seq[Array[Byte]] = encode()
29 |   decode()
30 | 
31 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
32 |   def encode(): Seq[Array[Byte]] = dataset.map { r => codec.toBytes(r, MsgOutBuffer.create()) }
33 | 
34 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
35 |   def decode(): Seq[Nobid] = encodedDataset.map { bytes =>
36 |     codec.unpack(MsgInBuffer(bytes)).getOrElse(throw new Exception("error on unpacking"))
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/thrift-bench/src/test/scala/com/github/saint1991/serialization/benchmark/thrift/ThriftBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.thrift
 2 | 
 3 | import org.apache.thrift.protocol.TCompactProtocol
 4 | import org.apache.thrift.transport.{TMemoryBuffer, TMemoryInputTransport}
 5 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 6 | 
 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
 8 | 
 9 | @State(Scope.Thread)
10 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
11 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
12 | @Fork(value = 1, jvmArgs = Array(
13 |   "-server",
14 |   "-Xms2g",
15 |   "-Xmx2g",
16 |   "-XX:NewSize=1g",
17 |   "-XX:MaxNewSize=1g",
18 |   "-XX:InitialCodeCacheSize=512m",
19 |   "-XX:ReservedCodeCacheSize=512m",
20 |   "-XX:+UseParallelGC",
21 |   "-XX:-UseBiasedLocking",
22 |   "-XX:+AlwaysPreTouch"
23 | ))
24 | @OutputTimeUnit(TUnit)
25 | class ThriftBench {
26 | 
27 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
28 | 
29 |   val encodedDataset: Seq[Array[Byte]] = encode()
30 |   decode()
31 | 
32 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
33 |   def encode(): Seq[Array[Byte]] = {
34 |     dataset.map { r =>
35 |       val buf = new TMemoryBuffer(0)
36 |       val outProtocol = new TCompactProtocol(buf)
37 |       r.write(outProtocol)
38 |       buf.getArray
39 |     }
40 |   }
41 | 
42 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
43 |   def decode(): Seq[Nobid] = {
44 |     encodedDataset.map { r =>
45 |       val buf = new TMemoryInputTransport(r)
46 |       val inProtocol = new TCompactProtocol(buf)
47 |       Nobid.decode(inProtocol)
48 |     }
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/msgpack-jackson-bench/src/test/scala/com/github/saint1991/serialization/benchmark/msgpack/jackson/MsgpackJacksonBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.msgpack.jackson
 2 | 
 3 | import com.fasterxml.jackson.databind.ObjectMapper
 4 | import com.fasterxml.jackson.module.scala.DefaultScalaModule
 5 | import org.msgpack.jackson.dataformat.MessagePackFactory
 6 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 7 | 
 8 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
 9 | import com.github.saint1991.serialization.benchmark.dataset.{DataSet, Nobid}
10 | 
11 | @State(Scope.Thread)
12 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
13 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
14 | @Fork(value = 1, jvmArgs = Array(
15 |   "-server",
16 |   "-Xms2g",
17 |   "-Xmx2g",
18 |   "-XX:NewSize=1g",
19 |   "-XX:MaxNewSize=1g",
20 |   "-XX:InitialCodeCacheSize=512m",
21 |   "-XX:ReservedCodeCacheSize=512m",
22 |   "-XX:+UseParallelGC",
23 |   "-XX:-UseBiasedLocking",
24 |   "-XX:+AlwaysPreTouch"
25 | ))
26 | @OutputTimeUnit(TUnit)
27 | class MsgpackJacksonBench {
28 | 
29 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
30 | 
31 |   val mapper = new ObjectMapper(new MessagePackFactory())
32 |   mapper.registerModule(DefaultScalaModule)
33 | 
34 |   val encodedDataset: Seq[Array[Byte]] = encode()
35 |   decode()
36 | 
37 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
38 |   def encode(): Seq[Array[Byte]] = dataset.map(x => mapper.writeValueAsBytes(x))
39 | 
40 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
41 |   def decode(): Seq[Nobid] = encodedDataset.map(bytes => mapper.readValue[Nobid](bytes, classOf[Nobid]))
42 | }
43 | 


--------------------------------------------------------------------------------
/schema/avro/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Apache Avro
 3 | 
 4 | ## languages
 5 | officially supported
 6 | - C
 7 | - C++
 8 | - C#
 9 | - Java
10 | - Perl
11 | - Python
12 | - Ruby
13 | - PHP
14 | 
15 | ## schema
16 | - No field index is included in data
17 |     - decoding is always based on writer's (and reader's for schema evolution) schema.
18 | - Dynamic schema resolution
19 |     - no need to generate codes in advance
20 |     -  encode/decode is conduct according only to schema
21 | 
22 |    
23 | ``` nobid.avsc
24 | {
25 |     "namespace": "com.github.saint1991.samples",
26 |     "name":"Nobid",
27 |     "type":"record",
28 |     "fields":[
29 |         {"name":"adnwId","type": "int"},
30 |         {"name":"auctionId","type":"string"},
31 |         {"name":"host","type":"string"},
32 |         {"name":"loggedAt","type":"string"},
33 |         {"name":"mId","type":"int"},
34 |         {"name":"nbr","type":"int"},
35 |         {"name":"page","type":["null", "string"], "default": null},
36 |         {"name":"resTime","type":"int"},
37 |         {"name":"spot","type": {
38 |             "name": "spotRecord",
39 |             "type": "record",
40 |             "fields": [
41 |                 {"name": "id", "type": "int"},
42 |                 {"name": "type", "type": {
43 |                     "name": "spotType",
44 |                     "type": "enum",
45 |                     "symbols": ["A", "S"]
46 |                 }}
47 |             ]
48 |         }},
49 |         {"name": "history", "type": {
50 |             "name": "historyItems",
51 |             "type": "array",
52 |             "items": {
53 |                 "name": "historyItem",
54 |                 "type": "string"
55 |             }
56 |         }},
57 |         {"name": "tags", "type": {
58 |             "name": "tag",
59 |             "type": "map",
60 |             "values": "string"
61 |         }}
62 |     ]
63 | }
64 | 
65 | ```


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Created by https://www.gitignore.io/api/sbt,scala,intellij
 3 | 
 4 | ### Intellij ###
 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 7 | 
 8 | # User-specific stuff:
 9 | .idea/**
10 | scala/.idea/workspace.xml
11 | .idea/**/tasks.xml
12 | .idea/dictionaries
13 | 
14 | # Sensitive or high-churn files:
15 | .idea/**/dataSources/
16 | .idea/**/dataSources.ids
17 | .idea/**/dataSources.xml
18 | .idea/**/dataSources.local.xml
19 | .idea/**/sqlDataSources.xml
20 | .idea/**/dynamic.xml
21 | .idea/**/uiDesigner.xml
22 | 
23 | # Gradle:
24 | .idea/**/gradle.xml
25 | .idea/**/libraries
26 | 
27 | # CMake
28 | cmake-build-debug/
29 | 
30 | # Mongo Explorer plugin:
31 | .idea/**/mongoSettings.xml
32 | 
33 | ## File-based project format:
34 | *.iws
35 | 
36 | ## Plugin-specific files:
37 | 
38 | # IntelliJ
39 | /schema/out/
40 | 
41 | # mpeltonen/sbt-idea plugin
42 | .idea_modules/
43 | 
44 | # JIRA plugin
45 | atlassian-ide-plugin.xml
46 | 
47 | # Cursive Clojure plugin
48 | .idea/replstate.xml
49 | 
50 | # Crashlytics plugin (for Android Studio and IntelliJ)
51 | com_crashlytics_export_strings.xml
52 | crashlytics.properties
53 | crashlytics-build.properties
54 | fabric.properties
55 | 
56 | ### Intellij Patch ###
57 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
58 | 
59 | # *.iml
60 | # modules.xml
61 | # .idea/misc.xml
62 | # *.ipr
63 | 
64 | # Sonarlint plugin
65 | .idea/sonarlint
66 | 
67 | ### SBT ###
68 | # Simple Build Tool
69 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control
70 | 
71 | dist/*
72 | target/
73 | lib_managed/
74 | src_managed/
75 | project/boot/
76 | project/plugins/project/
77 | .history
78 | .cache
79 | .lib/
80 | 
81 | ### Scala ###
82 | *.class
83 | *.log
84 | 
85 | 
86 | out
87 | # End of https://www.gitignore.io/api/sbt,scala,intellij


--------------------------------------------------------------------------------
/avro-bench/src/test/scala/com/github/saint1991/serialization/benchmark/avro/AvroBench.scala:
--------------------------------------------------------------------------------
 1 | package com.github.saint1991.serialization.benchmark.avro
 2 | 
 3 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 4 | 
 5 | import org.apache.avro.io.{DecoderFactory, EncoderFactory}
 6 | import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter}
 7 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark}
 8 | 
 9 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._
10 | 
11 | @State(Scope.Thread)
12 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit)
13 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit)
14 | @Fork(value = 1, jvmArgs = Array(
15 |   "-server",
16 |   "-Xms2g",
17 |   "-Xmx2g",
18 |   "-XX:NewSize=1g",
19 |   "-XX:MaxNewSize=1g",
20 |   "-XX:InitialCodeCacheSize=512m",
21 |   "-XX:ReservedCodeCacheSize=512m",
22 |   "-XX:+UseParallelGC",
23 |   "-XX:-UseBiasedLocking",
24 |   "-XX:+AlwaysPreTouch"
25 | ))
26 | @OutputTimeUnit(TUnit)
27 | class AvroBench {
28 | 
29 |   final val Schema = Nobid.SCHEMA$
30 |   val writer = new SpecificDatumWriter[Nobid](Schema)
31 |   val reader = new SpecificDatumReader[Nobid](Schema)
32 | 
33 |   val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize)
34 |   val encoded: Seq[Array[Byte]] = encode()
35 |   decode()
36 | 
37 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
38 |   def encode(): Seq[Array[Byte]] = {
39 |     dataset.map { nobid =>
40 |       val ostream = new ByteArrayOutputStream()
41 |       val encoder = EncoderFactory.get().binaryEncoder(ostream, null)
42 |       writer.write(nobid, encoder)
43 |       encoder.flush()
44 |       ostream.toByteArray
45 |     }
46 |   }
47 | 
48 |   @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime))
49 |   def decode(): Seq[Nobid] = {
50 |     encoded.map { record =>
51 |       val istream = new ByteArrayInputStream(record)
52 |       val decoder = DecoderFactory.get.binaryDecoder(istream, null)
53 |       reader.read(null, decoder)
54 |     }
55 |   }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Benchmarks against Serialization Systems
 3 | [![Build Status](https://travis-ci.org/saint1991/serialization-benchmark.svg?branch=master)](https://travis-ci.org/saint1991/serialization-benchmark)
 4 | 
 5 | 
 6 | ## Characteristics
 7 | 
 8 | | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON | MessagePack |
 9 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
10 | |        Schema-less?        | No | No | No | No | Yes | Yes |
11 | | Require compiling schema in advance? | Yes | Yes | No | No | No | No |
12 | 
13 | ## Run benchmarks
14 | ```
15 | $ sbt $project/jmh:run
16 | ```
17 | where $project is one of the name of sbt sub project (e.g. avroBench)
18 | 
19 | 
20 | ## Sample data file generation
21 | ```
22 | $ sbt $project/run
23 | ```
24 | 
25 | ## Schemas
26 | The schemas used in this benchmark are under [schema](schema)
27 | 
28 | ## Results
29 | 
30 | **NOTE**: This benchmark is taken place under the specific condition, results may be different under the other conditions.
31 | 
32 | ### Benchmark setup
33 | 
34 | - OS: Ubuntu 16.04 TLS
35 | - CPU: Intel(R) Xeon(R) CPU E5-2680 v3 2.50GHz, 4 cores
36 | - Memory: 14GB
37 | - JDK 1.8.0_171, Java HotSpot(TM) 64-Bit Server VM, 25.171-b11
38 | 
39 | ### Average time to encode 100,000 records in milli seconds.
40 | 
41 | - 20 warming up iteration
42 | - Average of 20 iteration
43 | 
44 | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON (with jsoniter-scala) | JSON (with circe) | MessagePack (jackson-module-msgpack) | MessagePack (msgpack4z) |
45 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
46 | | 43.0  | 235.8 | 232.6 | 116.8 | 74.6  | 488.7 | 354.8 | 358.0  |
47 | 
48 | ### Average time to decode 100,000 binary records in milli seconds.
49 | 
50 | - 20 warming up iteration
51 | - Average of 20 iteration
52 | 
53 | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON (with jsoniter-scala) | JSON (with circe) | MessagePack (jackson-module-msgpack) | MessagePack (msgpack4z) |
54 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
55 | | 139.5 | 162.9 | 586.0 | 160.8 | 151.3 | 503.5 | 414.9 | 609.5 |
56 | 
57 | ### Data size of 100,000 encoded records in MB.
58 | 
59 | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON | MessagePack |
60 | | :---: | :---: | :---: | :---: | :---: | :---: |
61 | | 18.5  | 18.3  | 16.7  | 17.4  | 32.2  | 25.7  |
62 | 


--------------------------------------------------------------------------------
/schema/protocol-buffers/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Protocol Buffers (by Google)
  3 | 
  4 | ## Installation (Mac)
  5 | ```
  6 | $ brew install protobuf
  7 | ```
  8 | 
  9 | ## languages
 10 | officially supported
 11 | 
 12 | - C++
 13 | - Java
 14 | - Python
 15 | - Objective-C
 16 | - C#
 17 | - JavaScript
 18 | - Ruby
 19 | - Go
 20 | - PHP
 21 | - Dart
 22 | 
 23 | 
 24 | ## schema
 25 | - easier to keep compatibility when schema evolution thanks to self annotation
 26 |     - all fields are uniquely indexed so that receivers can decode data based on it.
 27 | - possible to reserve indices for future use as follows, `reserved 1 to 11; `
 28 | - any types are mapped to corresponding type for each language [(reference)](https://developers.google.com/protocol-buffers/docs/proto3#scalar)
 29 | 
 30 | ``` nobid.proto
 31 | syntax = "proto3";
 32 | package samples;
 33 | 
 34 | // SPEED (default) | CODE_SIZE | LITE_RUNTIME
 35 | option optimize_for = SPEED;
 36 | 
 37 | // possible to import definition from other files
 38 | import "./spot.proto";
 39 | 
 40 | // message should be CamelCase
 41 | message Nobid {
 42 | 
 43 |   // fields should be snake_case
 44 |   // each field should have an unique index
 45 |   // required directive indicate a field must exis
 46 |   int32  adnw_id = 1;
 47 |   string app_name = 2;
 48 |   string auction_id = 3;
 49 |   string host = 4;
 50 |   string logged_at = 5;
 51 |   int32  m_id = 6;
 52 |   int32  nbr = 7;
 53 |   string page = 8;
 54 |   int32  res_time = 9;
 55 |   Spot spot = 10;
 56 | 
 57 | 
 58 |   repeated string history = 11;
 59 |   map<string, string> tags = 12;
 60 | 
 61 |   // it's possible to reserve some indices for future use
 62 |   reserved 13 to 15;
 63 | }
 64 | 
 65 | // rpc interface is defined by service
 66 | // it corresponds to interface or trait in Java and Scala respectively
 67 | service NobidBenchmark {
 68 |   // define a method that take a Nobid type parameter and returns int32
 69 |   rpc benchmark (Nobid) returns (int32);
 70 | }
 71 | ```
 72 | 
 73 | ``` spot.proto
 74 | syntax = "proto3";
 75 | package samples;
 76 | 
 77 | // enum should be CamelCase and each member should be UNDERSCORE_SEPARATED_CAPITALS
 78 | enum SpotType {
 79 |     A = 0; // begin with 0 in enum
 80 |     S = 1;
 81 | }
 82 | 
 83 | message Spot {
 84 |     int32 id = 1;
 85 |     SpotType type = 2;
 86 | }
 87 | ```
 88 | 
 89 | ## code generation
 90 | ```
 91 | protoc --proto_path=IMPORT_PATH  \
 92 |     [--cpp_out=DST_DIR] \
 93 |     [ --java_out=DST_DIR] \ 
 94 |     [--python_out=DST_DIR] \
 95 |     [ --go_out=DST_DIR]  \
 96 |     [--ruby_out=DST_DIR] \
 97 |     [--javanano_out=DST_DIR] \ 
 98 |     [--objc_out=DST_DIR] \ 
 99 |     [--csharp_out=DST_DIR] \
100 |         path/to/file.proto
101 | ```
102 | 
103 | For this sample,
104 | ```
105 | protoc --proto_path=. --java_out=. nobid.proto
106 | ```
107 | It generates java classes to the directory specified at `--java_out`
108 | 
109 | ## Binary format
110 | 
111 | 
112 | messages are represented just as key-value pairs  where each key is consist of index and [wire type](https://developers.google.com/protocol-buffers/docs/encoding#structure)
113 | 
114 | ### example
115 | 
116 | 
117 | The following bytes represents the value of index 1 is 150.
118 | ```
119 | 0000 1000 1001 0110  0000 0001
120 | ```
121 | 
122 | a key is always a varint. thus remove the first bit (it is most signifiant bit that represents whether further bytes are following)
123 | ```
124 | 000 1000 1001 0110 0000 0001
125 | ```
126 | 
127 | The last 3 bit in the first byte `00001000`  represents wire type,
128 | Here, it is `000` so that wire type is `Variant` according to [reference](https://developers.google.com/protocol-buffers/docs/encoding#structure)
129 | Then remaining bit is `0001` so we find its index is 1.
130 | 
131 | Following bytes `1001 0110 0000 0001` represents a varint value for index 1.
132 | The first bit is most significant bit, inhere it is 1. That means another byte is following.
133 | By ordering from lower bits, it becomes `00000001 0010110`
134 | Interpret it as 2'complement, we can find it is 150.
135 | 
136 | ### Varint
137 | It is variable length codec of integers.
138 | For each byte, the first bit is flag  whether further bytes are following. ( 1 means further bytes are following)
139 | Except those bits, it is a 2's complement in order of  lower-significant-bytes-first.
140 | 
141 | ### Strings
142 | It is consists of variant and UTF8 binary.
143 | The variant part gives the length of UTF8 binary part.


--------------------------------------------------------------------------------