├── project ├── build.properties └── plugins.sbt ├── schema ├── thrift │ ├── spot.thrift │ ├── nobid.thrift │ └── README.md ├── protocol-buffers │ ├── spot.proto │ ├── nobid.proto │ └── README.md └── avro │ ├── nobid.avsc │ └── README.md ├── common └── src │ └── main │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ ├── BenchmarkSettings.scala │ ├── FileUtil.scala │ └── dataset │ └── DataSet.scala ├── .travis.yml ├── thrift-bench └── src │ ├── main │ └── scala │ │ └── com │ │ └── github │ │ └── saint1991 │ │ └── serialization │ │ └── benchmark │ │ └── thrift │ │ ├── DataSet.scala │ │ └── FileGen.scala │ └── test │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ └── thrift │ └── ThriftBench.scala ├── avro-bench └── src │ ├── main │ └── scala │ │ └── com │ │ └── github │ │ └── saint1991 │ │ └── serialization │ │ └── benchmark │ │ └── avro │ │ ├── DataSet.scala │ │ └── FileGen.scala │ └── test │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ └── avro │ └── AvroBench.scala ├── proto-bench └── src │ ├── main │ └── scala │ │ └── com │ │ └── github │ │ └── saint1991 │ │ └── serialization │ │ └── benchmark │ │ └── protobuf │ │ ├── FileGen.scala │ │ └── DataSet.scala │ └── test │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ └── protobuf │ └── ProtoBench.scala ├── csv-bench └── src │ ├── main │ └── scala │ │ └── com │ │ └── github │ │ └── saint1991 │ │ └── serialization │ │ └── benchmark │ │ └── csv │ │ ├── FileGen.scala │ │ └── Csv.scala │ └── test │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ └── csv │ └── CsvBench.scala ├── msgpack4z-bench ├── src │ ├── main │ │ └── scala │ │ │ └── com │ │ │ └── github │ │ │ └── saint1991 │ │ │ └── serialization │ │ │ └── benchmark │ │ │ └── msgpack │ │ │ └── msgpack4z │ │ │ ├── Codec.scala │ │ │ └── FileGen.scala │ └── test │ │ └── scala │ │ └── com │ │ └── github │ │ └── saint1991 │ │ └── serialization │ │ └── benchmark │ │ └── msgpack │ │ └── msgpack4z │ │ └── Msgpack4zBench.scala └── README.md ├── jsoniter-scala-bench └── src │ ├── main │ └── scala │ │ └── com │ │ └── github │ │ └── saint1991 │ │ └── serialization │ │ └── benchmark │ │ └── jsoniter │ │ └── FileGen.scala │ └── test │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ └── jsoniter │ └── JsonIterScalaBench.scala ├── LICENSE ├── circe-bench └── src │ ├── main │ └── scala │ │ └── com │ │ └── github │ │ └── saint1991 │ │ └── serialization │ │ └── benchmark │ │ └── circe │ │ └── FileGen.scala │ └── test │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ └── circe │ └── CirceBench.scala ├── msgpack-jackson-bench └── src │ ├── main │ └── scala │ │ └── serialization │ │ └── benchmark │ │ └── msgpack │ │ └── jackson │ │ └── FileGen.scala │ └── test │ └── scala │ └── com │ └── github │ └── saint1991 │ └── serialization │ └── benchmark │ └── msgpack │ └── jackson │ └── MsgpackJacksonBench.scala ├── .gitignore └── README.md /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.1.6 2 | -------------------------------------------------------------------------------- /schema/thrift/spot.thrift: -------------------------------------------------------------------------------- 1 | namespace java com.github.saint1991.serialization.benchmark.thrift 2 | 3 | struct Spot { 4 | 1:required i32 id 5 | 2:required string name 6 | } 7 | -------------------------------------------------------------------------------- /schema/protocol-buffers/spot.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package protobuf; 3 | option java_package = "com.github.saint1991.serialization.benchmark.protobuf"; 4 | 5 | message Spot { 6 | int32 id = 1; 7 | string name = 2; 8 | } 9 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.3.4") 2 | addSbtPlugin("com.thesamet" % "sbt-protoc" % "0.99.18") 3 | addSbtPlugin("com.julianpeeters" % "sbt-avrohugger" % "2.0.0-RC9") 4 | addSbtPlugin("com.twitter" % "scrooge-sbt-plugin" % "18.5.0") 5 | 6 | libraryDependencies += "com.thesamet.scalapb" %% "compilerplugin" % "0.7.0" 7 | -------------------------------------------------------------------------------- /common/src/main/scala/com/github/saint1991/serialization/benchmark/BenchmarkSettings.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | object BenchmarkSettings { 6 | final val WarmUpIteration = 20 7 | final val Iteration = 20 8 | final val TUnit = TimeUnit.MILLISECONDS 9 | final val DatasetSize = 100000 10 | } 11 | -------------------------------------------------------------------------------- /common/src/main/scala/com/github/saint1991/serialization/benchmark/FileUtil.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark 2 | 3 | import better.files._ 4 | 5 | object FileUtil { 6 | 7 | final val OutDir = "out" 8 | final val NewLineBytes = "\n".getBytes 9 | 10 | def mkOutFile(name: String): File = (OutDir / name) 11 | .createIfNotExists(createParents = true) 12 | .clear() 13 | } 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | jdk: oraclejdk8 3 | cache: 4 | directories: 5 | - $HOME/.ivy2/cache 6 | - $HOME/.sbt/root 7 | before_script: 8 | - sbt jmh:compile 9 | - sbt avroBench/run circeBench/run csvBench/run jsoniterScalaBench/run msgpackJacksonBench/run msgpack4zBench/run protoBench/run thriftBench/run 10 | - sbt avroBench/jmh:run circeBench/jmh:run csvBench/jmh:run jsoniterScalaBench/jmh:run msgpackJacksonBench/jmh:run msgpack4zBench/jmh:run protoBench/jmh:run thriftBench/jmh:run 11 | -------------------------------------------------------------------------------- /schema/thrift/nobid.thrift: -------------------------------------------------------------------------------- 1 | include "spot.thrift" 2 | namespace java com.github.saint1991.serialization.benchmark.thrift 3 | 4 | struct Nobid { 5 | 1: required i32 adnw_id, 6 | 2: required string app_name, 7 | 3: required string auction_id, 8 | 4: required string host, 9 | 5: required string logged_at, 10 | 6: required i32 m_id, 11 | 7: required i32 nbr, 12 | 8: optional string page, 13 | 9: required i32 res_time, 14 | 10:required spot.Spot spot, 15 | 11:required list history, 16 | 12:required map tags 17 | } 18 | -------------------------------------------------------------------------------- /thrift-bench/src/main/scala/com/github/saint1991/serialization/benchmark/thrift/DataSet.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.thrift 2 | 3 | object DataSet { 4 | def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) { 5 | Nobid( 6 | adnwId = 12345, 7 | appName = "sampleApp", 8 | auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec", 9 | host = "prd-dsp03", 10 | loggedAt = "2017-06-30 09:07:37.677", 11 | mId = 234, 12 | nbr = 6260, 13 | page = Some("http://diamond.jp/articles/a/15434"), 14 | resTime = 4, 15 | spot = Spot( 16 | id = 2406, 17 | name = "Mie" 18 | ), 19 | history = Seq( 20 | "a", 21 | "b", 22 | "c" 23 | ), 24 | tags = Map( 25 | "media" -> "facebook", 26 | "ssp" -> "google" 27 | ) 28 | ) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /avro-bench/src/main/scala/com/github/saint1991/serialization/benchmark/avro/DataSet.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.avro 2 | 3 | object DataSet { 4 | 5 | def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) { 6 | Nobid( 7 | adnwId = 12345, 8 | appName = "sampleApp", 9 | auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec", 10 | host = "prd-dsp03", 11 | loggedAt = "2017-06-30 09:07:37.677", 12 | mId = 234, 13 | nbr = 6260, 14 | page = Some("http://diamond.jp/articles/-/15434"), 15 | resTime = 4, 16 | spot = spotRecord( 17 | id = 2406, 18 | name = "Mie" 19 | ), 20 | history = List( 21 | "a", 22 | "b", 23 | "c" 24 | ), 25 | tags = Map( 26 | "media" -> "facebook", 27 | "ssp" -> "google" 28 | ) 29 | ) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /proto-bench/src/main/scala/com/github/saint1991/serialization/benchmark/protobuf/FileGen.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.protobuf 2 | 3 | import java.io.{FileOutputStream, OutputStream} 4 | 5 | import scala.util.control.Exception._ 6 | 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize 8 | import com.github.saint1991.serialization.benchmark.FileUtil 9 | import com.github.saint1991.serialization.benchmark.protobuf.nobid.Nobid 10 | 11 | object FileGen extends App { 12 | 13 | val dataset = DataSet.createDataset(DatasetSize) 14 | 15 | final val outFile = FileUtil.mkOutFile("nobid.protobuf") 16 | val out = new FileOutputStream(outFile.toJava) 17 | 18 | // write to file 19 | allCatch andFinally { 20 | out.flush() 21 | out.close() 22 | } apply writeToFile(dataset, out) 23 | 24 | private def writeToFile(dataset: Seq[Nobid], file: OutputStream): Unit = { 25 | dataset.foreach(r => r.writeTo(file)) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /csv-bench/src/main/scala/com/github/saint1991/serialization/benchmark/csv/FileGen.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.csv 2 | 3 | import java.io.{FileOutputStream, PrintWriter} 4 | 5 | import scala.util.control.Exception._ 6 | 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize 8 | import com.github.saint1991.serialization.benchmark.FileUtil 9 | import com.github.saint1991.serialization.benchmark.dataset._ 10 | 11 | object FileGen extends App { 12 | import Csv._ 13 | 14 | val dataset = DataSet.createDataset(DatasetSize) 15 | 16 | // write to file 17 | final val outFile = FileUtil.mkOutFile("nobid.csv") 18 | val out = new PrintWriter(new FileOutputStream(outFile.toJava)) 19 | 20 | allCatch andFinally { 21 | out.flush() 22 | out.close() 23 | } apply writeToFile(dataset, out) 24 | 25 | private def writeToFile(dataset: Seq[Nobid], file: PrintWriter): Unit = 26 | dataset.foreach{ r => file.println(toCsv(r)) } 27 | } 28 | -------------------------------------------------------------------------------- /proto-bench/src/main/scala/com/github/saint1991/serialization/benchmark/protobuf/DataSet.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.protobuf 2 | 3 | import com.github.saint1991.serialization.benchmark.protobuf.nobid.Nobid 4 | import com.github.saint1991.serialization.benchmark.protobuf.spot.Spot 5 | 6 | object DataSet { 7 | def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) { 8 | Nobid( 9 | adnwId = 12345, 10 | appName = "sampleApp", 11 | auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec", 12 | host = "prd-dsp03", 13 | loggedAt = "2017-06-30 09:07:37.677", 14 | mId = 234, 15 | nbr = 6260, 16 | page = "http://diamond.jp/articles/a/15434", 17 | resTime = 4, 18 | spot = Some(Spot( 19 | id = 2406, 20 | name = "Mie" 21 | )), 22 | history = Seq( 23 | "a", 24 | "b", 25 | "c" 26 | ), 27 | tags = Map( 28 | "media" -> "facebook", 29 | "ssp" -> "google" 30 | ) 31 | ) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /schema/protocol-buffers/nobid.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package protobuf; 3 | option java_package = "com.github.saint1991.serialization.benchmark.protobuf"; 4 | 5 | // SPEED (default) | CODE_SIZE | LITE_RUNTIME 6 | option optimize_for = SPEED; 7 | 8 | // possible to import definition from other files 9 | import "spot.proto"; 10 | 11 | // message should be CamelCase 12 | message Nobid { 13 | 14 | // fields should be snake_case 15 | // each field should have an unique index 16 | // required directive indicate a field must exist 17 | int32 adnw_id = 1; 18 | string app_name = 2; 19 | string auction_id = 3; 20 | string host = 4; 21 | string logged_at = 5; 22 | int32 m_id = 6; 23 | int32 nbr = 7; 24 | string page = 8; 25 | int32 res_time = 9; 26 | protobuf.Spot spot = 10; 27 | 28 | 29 | repeated string history = 11; 30 | map tags = 12; 31 | 32 | // it's possible to reserve some indices for future use 33 | reserved 13 to 15; 34 | } 35 | 36 | message Response { 37 | int32 status_code = 1; 38 | string message = 2; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /msgpack4z-bench/src/main/scala/com/github/saint1991/serialization/benchmark/msgpack/msgpack4z/Codec.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.msgpack.msgpack4z 2 | 3 | import msgpack4z._ 4 | import msgpack4z.CodecInstances.all._ 5 | 6 | import com.github.saint1991.serialization.benchmark.dataset.{Nobid, Spot} 7 | 8 | object Codec { 9 | 10 | private [this] final val Factory: PackerUnpackerFactory = new PackerUnpackerFactory { 11 | def packer: MsgOutBuffer = MsgOutBuffer.create() 12 | def unpacker(bytes: Array[Byte]) = MsgInBuffer(bytes) 13 | } 14 | 15 | private [this] final val Codec: CaseMapCodec[String] = CaseMapCodec.string(Factory) 16 | 17 | implicit val spotCodec: MsgpackCodec[Spot] = Codec.codec(Spot.apply _, Spot.unapply _)("id", "name") 18 | val codec: MsgpackCodec[Nobid] = Codec.codec(Nobid.apply _, Nobid.unapply _)( 19 | "adnwId", 20 | "appName", 21 | "auctionId", 22 | "host", 23 | "loggedAt", 24 | "mId", 25 | "nbr", 26 | "page", 27 | "resTime", 28 | "spot", 29 | "history", 30 | "tags" 31 | ) 32 | 33 | } 34 | -------------------------------------------------------------------------------- /jsoniter-scala-bench/src/main/scala/com/github/saint1991/serialization/benchmark/jsoniter/FileGen.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.jsoniter 2 | 3 | import java.io.{BufferedOutputStream, FileOutputStream} 4 | 5 | import scala.util.control.Exception._ 6 | 7 | import com.github.plokhotnyuk.jsoniter_scala.core._ 8 | import com.github.plokhotnyuk.jsoniter_scala.macros._ 9 | 10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize 11 | import com.github.saint1991.serialization.benchmark.dataset._ 12 | import com.github.saint1991.serialization.benchmark.FileUtil 13 | 14 | object FileGen extends App { 15 | 16 | val dataset = DataSet.createDataset(DatasetSize) 17 | 18 | final val outFile = FileUtil.mkOutFile("nobid-jsoniter.json") 19 | 20 | implicit val codec: JsonValueCodec[Nobid] = JsonCodecMaker.make[Nobid](CodecMakerConfig()) 21 | val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava)) 22 | 23 | allCatch andFinally { 24 | out.flush() 25 | out.close() 26 | } apply dataset.foreach(x => writeToStream(x, out)) 27 | } 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 mizuno 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /circe-bench/src/main/scala/com/github/saint1991/serialization/benchmark/circe/FileGen.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.circe 2 | 3 | import java.io.{BufferedOutputStream, FileOutputStream, PrintWriter} 4 | 5 | import scala.util.control.Exception._ 6 | 7 | import io.circe.generic.auto._ 8 | import io.circe.syntax._ 9 | 10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize 11 | import com.github.saint1991.serialization.benchmark.dataset._ 12 | import com.github.saint1991.serialization.benchmark.FileUtil 13 | 14 | object FileGen extends App { 15 | 16 | val dataset = DataSet.createDataset(DatasetSize) 17 | 18 | final val outFile = FileUtil.mkOutFile("nobid-circe.json") 19 | val out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(outFile.toJava))) 20 | 21 | allCatch andFinally { 22 | out.flush() 23 | out.close() 24 | } apply writeToFile(dataset, out) 25 | 26 | private def writeToFile(dataset: Seq[Nobid], out: PrintWriter): Unit = 27 | dataset.foreach { r => 28 | val record = r.asJson.noSpaces 29 | out.println(record) 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /msgpack4z-bench/README.md: -------------------------------------------------------------------------------- 1 | 2 | # MessagePack 3 | 4 | MessagePack is a kind of serialization format. 5 | It iss schema-less line JSON. 6 | 7 | ## Type system 8 | 9 | MessagePack has 7 built-in types and extension type. 10 | 11 | ### Built-in types 12 | 13 | - Integer: represents integer that has Long precision in most languages. 14 | - Nil: represents nil. 15 | - Boolean: represents true or false. 16 | - Float: represents a floating point number that has double precision in most languages. 17 | - Raw 18 | - String: represents UTF-8 string. 19 | - Binary: represents byte array. 20 | - Array: represents a sequence of objects 21 | - Map: represents a key-value pairs of objects 22 | 23 | ### Extension types 24 | 25 | Extension type is the functionality to define application specific data type. 26 | It consists of magic bytes, another magic byte representing its type and data as the array of bytes. 27 | 28 | ## Efficiency 29 | 30 | MessagePack is efficient and safe format. 31 | It generally consists of magic bytes and the data as the array of bytes. 32 | Data is serialized into variable-length binary so its size becomes smaller in many cases. 33 | -------------------------------------------------------------------------------- /thrift-bench/src/main/scala/com/github/saint1991/serialization/benchmark/thrift/FileGen.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.thrift 2 | 3 | import java.io._ 4 | import java.nio.ByteBuffer 5 | 6 | import scala.util.control.Exception._ 7 | 8 | import org.apache.thrift.protocol.{TCompactProtocol, TProtocol} 9 | import org.apache.thrift.transport.TIOStreamTransport 10 | 11 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize 12 | import com.github.saint1991.serialization.benchmark.FileUtil 13 | 14 | object FileGen extends App { 15 | 16 | val dataset = DataSet.createDataset(DatasetSize) 17 | 18 | val outFile = FileUtil.mkOutFile("nobid.thrift") 19 | val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava)) 20 | val outProtocol = new TCompactProtocol(new TIOStreamTransport(out)) 21 | 22 | allCatch andFinally { 23 | out.flush() 24 | out.close() 25 | } apply writeToFile(dataset, outProtocol) 26 | 27 | private def writeToFile(dataset: Seq[Nobid], outProtocol: TProtocol): Unit = 28 | dataset.foreach { r => 29 | r.write(outProtocol) 30 | outProtocol.writeBinary(ByteBuffer.wrap(FileUtil.NewLineBytes)) 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /common/src/main/scala/com/github/saint1991/serialization/benchmark/dataset/DataSet.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.dataset 2 | 3 | case class Spot( 4 | id: Int, 5 | name: String 6 | ) 7 | 8 | case class Nobid( 9 | adnwId: Int, 10 | appName: String, 11 | auctionId: String, 12 | host: String, 13 | loggedAt: String, 14 | mId: Int, 15 | nbr: Int, 16 | page: String, 17 | resTime: Int, 18 | spot: Spot, 19 | history: List[String], 20 | tags: Map[String, String] 21 | ) 22 | 23 | object DataSet { 24 | def createDataset(n: Int): Seq[Nobid] = Seq.fill(n) { 25 | Nobid( 26 | adnwId = 12345, 27 | appName = "sampleApp", 28 | auctionId = "14241c7f-7db1-4bcd-a3f7-82885e08e7ec", 29 | host = "prd-dsp03", 30 | loggedAt = "2017-06-30 09:07:37.677", 31 | mId = 234, 32 | nbr = 6260, 33 | page = "http://diamond.jp/articles/-/15434", 34 | resTime = 4, 35 | spot = Spot( 36 | id = 2406, 37 | name = "Mie" 38 | ), 39 | history = List( 40 | "a", 41 | "b", 42 | "c" 43 | ), 44 | tags = Map( 45 | "media" -> "facebook", 46 | "ssp" -> "google" 47 | ) 48 | ) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /schema/thrift/README.md: -------------------------------------------------------------------------------- 1 | # Apache Thrift 2 | 3 | ### Installation (Mac) 4 | ``` 5 | brew install thrift 6 | ``` 7 | 8 | ### Schema 9 | 10 | - Similar to proto2, all fields are uniquely indexed so that receivers can decode data based on it. 11 | 12 | ```nobid.thrift 13 | include "spot.thrift" 14 | namespace java com.githu.saint1991.samples 15 | 16 | struct Nobid { 17 | 1: required i32 adnw_id, 18 | 2: required string app_name, 19 | 3: required string auction_id, 20 | 4: required string host, 21 | 5: required string logged_at, 22 | 6: required i32 m_id, 23 | 7: required i32 nbr, 24 | 8: optional string page, 25 | 9: required i32 res_time, 26 | 10:required spot.Spot spot, 27 | 11:optional list history, 28 | 12:optional map tags 29 | } 30 | ``` 31 | 32 | ```spot.thrift 33 | namespace java com.github.saint1991.samples 34 | 35 | enum SpotType { 36 | A, 37 | S 38 | } 39 | 40 | struct Spot { 41 | 1:required i32 id 42 | 2:required SpotType type 43 | } 44 | ``` 45 | 46 | ### Code generation 47 | ``` 48 | thrift --gen java nobid.thrift 49 | ``` -------------------------------------------------------------------------------- /csv-bench/src/main/scala/com/github/saint1991/serialization/benchmark/csv/Csv.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.csv 2 | 3 | import com.github.saint1991.serialization.benchmark.dataset._ 4 | 5 | object Csv { 6 | 7 | def toCsv(nobid: Nobid): String = { 8 | val tags = nobid.tags.map(entry => s"${entry._1}#${entry._2}") 9 | s"${nobid.adnwId},${nobid.appName},${nobid.auctionId},${nobid.host},${nobid.loggedAt},${nobid.mId},${nobid.nbr},${nobid.page},${nobid.resTime},${nobid.spot.id}_${nobid.spot.name},${nobid.history.mkString("_")},${tags.mkString("_")}" 10 | } 11 | 12 | def fromCsv(csv: String): Nobid = { 13 | val line = csv.split(",") 14 | val spot = line(9).split("_") 15 | val tags = line(11).split("_").map { i => 16 | val entry = i.split("#") 17 | entry(0) -> entry(1) 18 | }.toMap 19 | 20 | Nobid( 21 | adnwId = line(0).toInt, 22 | appName = line(1), 23 | auctionId = line(2), 24 | host = line(3), 25 | loggedAt = line(4), 26 | mId = line(5).toInt, 27 | nbr = line(6).toInt, 28 | page = line(7), 29 | resTime = line(8).toInt, 30 | spot = Spot (spot(0).toInt, spot(1)), 31 | history = line(10).split("_").toList, 32 | tags = tags 33 | ) 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /schema/avro/nobid.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "com.github.saint1991.serialization.benchmark.avro", 3 | "name":"Nobid", 4 | "type":"record", 5 | "fields":[ 6 | {"name":"adnwId","type":"int"}, 7 | {"name":"appName","type":"string"}, 8 | {"name":"auctionId","type":"string"}, 9 | {"name":"host","type":"string"}, 10 | {"name":"loggedAt","type":"string"}, 11 | {"name":"mId","type":"int"}, 12 | {"name":"nbr","type":"int"}, 13 | {"name":"page","type":["null", "string"], "default": null}, 14 | {"name":"resTime","type":"int"}, 15 | {"name":"spot","type": { 16 | "name": "spotRecord", 17 | "type": "record", 18 | "fields": [ 19 | {"name": "id", "type": "int"}, 20 | {"name": "name", "type": "string"} 21 | ] 22 | }}, 23 | {"name": "history", "type": { 24 | "name": "historyItems", 25 | "type": "array", 26 | "items": { 27 | "name": "historyItem", 28 | "type": "string" 29 | } 30 | }}, 31 | {"name": "tags", "type": { 32 | "name": "tag", 33 | "type": "map", 34 | "values": "string" 35 | }} 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /avro-bench/src/main/scala/com/github/saint1991/serialization/benchmark/avro/FileGen.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.avro 2 | 3 | import java.io.{BufferedOutputStream, FileOutputStream} 4 | 5 | import scala.util.control.Exception.allCatch 6 | 7 | import org.apache.avro.file.DataFileWriter 8 | import org.apache.avro.specific.SpecificDatumWriter 9 | 10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize 11 | import com.github.saint1991.serialization.benchmark.FileUtil 12 | 13 | object FileGen extends App { 14 | 15 | final val Schema = Nobid.SCHEMA$ 16 | val writer = new SpecificDatumWriter[Nobid](Schema) 17 | 18 | val dataset = DataSet.createDataset(DatasetSize) 19 | 20 | val outFile = FileUtil.mkOutFile("nobid.avro") 21 | val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava)) 22 | val outFileWriter = new DataFileWriter[Nobid](writer) 23 | 24 | // write to file 25 | allCatch andFinally { 26 | outFileWriter.flush() 27 | outFileWriter.close() 28 | } apply writeToFile(dataset, outFileWriter) 29 | 30 | private def writeToFile(dataset: Seq[Nobid], writer: DataFileWriter[Nobid]): Unit = { 31 | outFileWriter.create(Schema, out) 32 | dataset.foreach { nobid => writer.append(nobid) } 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /proto-bench/src/test/scala/com/github/saint1991/serialization/benchmark/protobuf/ProtoBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.protobuf 2 | 3 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 4 | 5 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 6 | import com.github.saint1991.serialization.benchmark.protobuf.nobid.Nobid 7 | 8 | @State(Scope.Thread) 9 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 10 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 11 | @Fork(value = 1, jvmArgs = Array( 12 | "-server", 13 | "-Xms2g", 14 | "-Xmx2g", 15 | "-XX:NewSize=1g", 16 | "-XX:MaxNewSize=1g", 17 | "-XX:InitialCodeCacheSize=512m", 18 | "-XX:ReservedCodeCacheSize=512m", 19 | "-XX:+UseParallelGC", 20 | "-XX:-UseBiasedLocking", 21 | "-XX:+AlwaysPreTouch" 22 | )) 23 | @OutputTimeUnit(TUnit) 24 | class ProtoBench { 25 | 26 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 27 | 28 | val encodedDataset: Seq[Array[Byte]] = encode() 29 | decode() 30 | 31 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 32 | def encode(): Seq[Array[Byte]] = dataset.map(_.toByteArray) 33 | 34 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 35 | def decode(): Seq[Nobid] = encodedDataset.map(Nobid.parseFrom) 36 | } 37 | -------------------------------------------------------------------------------- /msgpack4z-bench/src/main/scala/com/github/saint1991/serialization/benchmark/msgpack/msgpack4z/FileGen.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.msgpack.msgpack4z 2 | 3 | import java.io.{BufferedOutputStream, FileOutputStream, OutputStream} 4 | 5 | import scala.util.control.Exception._ 6 | 7 | import msgpack4z._ 8 | 9 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 10 | import com.github.saint1991.serialization.benchmark.dataset.DataSet 11 | import com.github.saint1991.serialization.benchmark.FileUtil 12 | import com.github.saint1991.serialization.benchmark.FileUtil.NewLineBytes 13 | import com.github.saint1991.serialization.benchmark.msgpack.msgpack4z.Codec._ 14 | 15 | object FileGen extends App { 16 | 17 | val dataset = DataSet.createDataset(DatasetSize) 18 | 19 | final val outFile = FileUtil.mkOutFile("nobid-msgpack4z.msgpack") 20 | val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava)) 21 | 22 | // write to file 23 | allCatch andFinally { 24 | out.flush() 25 | out.close() 26 | } apply writeToFile(dataset, codec, out) 27 | 28 | private def writeToFile[T](dataset: Seq[T], codec: MsgpackCodec[T], file: OutputStream): Unit = 29 | dataset.foreach { r => 30 | val packer = MsgOutBuffer.create() 31 | val bytes = codec.toBytes(r, packer) 32 | file.write(bytes) 33 | file.write(NewLineBytes) 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /csv-bench/src/test/scala/com/github/saint1991/serialization/benchmark/csv/CsvBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.csv 2 | 3 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 4 | 5 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 6 | import com.github.saint1991.serialization.benchmark.dataset._ 7 | 8 | @State(Scope.Thread) 9 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 10 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 11 | @Fork(value = 1, jvmArgs = Array( 12 | "-server", 13 | "-Xms2g", 14 | "-Xmx2g", 15 | "-XX:NewSize=1g", 16 | "-XX:MaxNewSize=1g", 17 | "-XX:InitialCodeCacheSize=512m", 18 | "-XX:ReservedCodeCacheSize=512m", 19 | "-XX:+UseParallelGC", 20 | "-XX:-UseBiasedLocking", 21 | "-XX:+AlwaysPreTouch" 22 | )) 23 | @OutputTimeUnit(TUnit) 24 | class CsvBench { 25 | import com.github.saint1991.serialization.benchmark.csv.Csv._ 26 | 27 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 28 | 29 | val encodedDataset: Seq[String] = encode() 30 | decode() 31 | 32 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 33 | def encode(): Seq[String] = { 34 | dataset.map(toCsv) 35 | } 36 | 37 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 38 | def decode(): Seq[Nobid] = { 39 | encodedDataset.map(fromCsv) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /msgpack-jackson-bench/src/main/scala/serialization/benchmark/msgpack/jackson/FileGen.scala: -------------------------------------------------------------------------------- 1 | package serialization.benchmark.msgpack.jackson 2 | 3 | import java.io._ 4 | 5 | import scala.util.control.Exception._ 6 | 7 | import com.fasterxml.jackson.core.JsonGenerator 8 | import com.fasterxml.jackson.databind.ObjectMapper 9 | import com.fasterxml.jackson.module.scala.DefaultScalaModule 10 | import org.msgpack.jackson.dataformat.MessagePackFactory 11 | 12 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.DatasetSize 13 | import com.github.saint1991.serialization.benchmark.dataset.{DataSet, Nobid} 14 | import com.github.saint1991.serialization.benchmark.FileUtil 15 | import com.github.saint1991.serialization.benchmark.FileUtil.NewLineBytes 16 | 17 | object FileGen extends App { 18 | 19 | val dataset = DataSet.createDataset(DatasetSize) 20 | 21 | val mapper = new ObjectMapper(new MessagePackFactory()) 22 | mapper.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false) 23 | mapper.registerModule(DefaultScalaModule) 24 | 25 | final val outFile = FileUtil.mkOutFile("nobid-jackson.msgpack") 26 | val out = new BufferedOutputStream(new FileOutputStream(outFile.toJava)) 27 | 28 | // write to file 29 | allCatch andFinally { 30 | out.flush() 31 | out.close() 32 | } apply writeToFile(dataset, mapper, out) 33 | 34 | private def writeToFile(dataset: Seq[Nobid], mapper: ObjectMapper, file: OutputStream): Unit = 35 | dataset.foreach { r => 36 | mapper.writeValue(file, r) 37 | file.write(NewLineBytes) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /jsoniter-scala-bench/src/test/scala/com/github/saint1991/serialization/benchmark/jsoniter/JsonIterScalaBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.jsoniter 2 | 3 | import com.github.plokhotnyuk.jsoniter_scala.core._ 4 | import com.github.plokhotnyuk.jsoniter_scala.macros._ 5 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 6 | 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 8 | import com.github.saint1991.serialization.benchmark.dataset._ 9 | 10 | @State(Scope.Thread) 11 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 12 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 13 | @Fork(value = 1, jvmArgs = Array( 14 | "-server", 15 | "-Xms2g", 16 | "-Xmx2g", 17 | "-XX:NewSize=1g", 18 | "-XX:MaxNewSize=1g", 19 | "-XX:InitialCodeCacheSize=512m", 20 | "-XX:ReservedCodeCacheSize=512m", 21 | "-XX:+UseParallelGC", 22 | "-XX:-UseBiasedLocking", 23 | "-XX:+AlwaysPreTouch" 24 | )) 25 | @OutputTimeUnit(TUnit) 26 | class JsonIterScalaBench { 27 | 28 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 29 | 30 | implicit val codec: JsonValueCodec[Nobid] = JsonCodecMaker.make[Nobid](CodecMakerConfig()) 31 | 32 | val encodedDataset: Seq[Array[Byte]] = encode() 33 | decode() 34 | 35 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 36 | def encode(): Seq[Array[Byte]] = dataset.map(x => writeToArray(x)) 37 | 38 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 39 | def decode(): Seq[Nobid] = encodedDataset.map(str => readFromArray(str)) 40 | } 41 | 42 | -------------------------------------------------------------------------------- /circe-bench/src/test/scala/com/github/saint1991/serialization/benchmark/circe/CirceBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.circe 2 | 3 | import java.nio.charset.StandardCharsets 4 | 5 | import io.circe.syntax._ 6 | import io.circe.generic.auto._ 7 | import io.circe.parser._ 8 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 9 | 10 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 11 | import com.github.saint1991.serialization.benchmark.dataset._ 12 | 13 | @State(Scope.Thread) 14 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 15 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 16 | @Fork(value = 1, jvmArgs = Array( 17 | "-server", 18 | "-Xms2g", 19 | "-Xmx2g", 20 | "-XX:NewSize=1g", 21 | "-XX:MaxNewSize=1g", 22 | "-XX:InitialCodeCacheSize=512m", 23 | "-XX:ReservedCodeCacheSize=512m", 24 | "-XX:+UseParallelGC", 25 | "-XX:-UseBiasedLocking", 26 | "-XX:+AlwaysPreTouch" 27 | )) 28 | @OutputTimeUnit(TUnit) 29 | class CirceBench { 30 | 31 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 32 | 33 | val encodedDataset: Seq[Array[Byte]] = encode() 34 | decode() 35 | 36 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 37 | def encode(): Seq[Array[Byte]] = { 38 | dataset.map(_.asJson.noSpaces.getBytes(StandardCharsets.UTF_8)) 39 | } 40 | 41 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 42 | def decode(): Seq[Nobid] = { 43 | encodedDataset.map(str => parse(new String(str, StandardCharsets.UTF_8)).right.get.as[Nobid].right.get) 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /msgpack4z-bench/src/test/scala/com/github/saint1991/serialization/benchmark/msgpack/msgpack4z/Msgpack4zBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.msgpack.msgpack4z 2 | 3 | import msgpack4z.{MsgInBuffer, MsgOutBuffer} 4 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 5 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings.{DatasetSize, Iteration, TUnit, WarmUpIteration} 6 | import com.github.saint1991.serialization.benchmark.dataset.{DataSet, Nobid} 7 | import com.github.saint1991.serialization.benchmark.msgpack.msgpack4z.Codec._ 8 | 9 | @State(Scope.Thread) 10 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 11 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 12 | @Fork(value = 1, jvmArgs = Array( 13 | "-server", 14 | "-Xms2g", 15 | "-Xmx2g", 16 | "-XX:NewSize=1g", 17 | "-XX:MaxNewSize=1g", 18 | "-XX:InitialCodeCacheSize=512m", 19 | "-XX:ReservedCodeCacheSize=512m", 20 | "-XX:+UseParallelGC", 21 | "-XX:-UseBiasedLocking", 22 | "-XX:+AlwaysPreTouch" 23 | )) 24 | @OutputTimeUnit(TUnit) 25 | class Msgpack4zBench { 26 | 27 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 28 | val encodedDataset: Seq[Array[Byte]] = encode() 29 | decode() 30 | 31 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 32 | def encode(): Seq[Array[Byte]] = dataset.map { r => codec.toBytes(r, MsgOutBuffer.create()) } 33 | 34 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 35 | def decode(): Seq[Nobid] = encodedDataset.map { bytes => 36 | codec.unpack(MsgInBuffer(bytes)).getOrElse(throw new Exception("error on unpacking")) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /thrift-bench/src/test/scala/com/github/saint1991/serialization/benchmark/thrift/ThriftBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.thrift 2 | 3 | import org.apache.thrift.protocol.TCompactProtocol 4 | import org.apache.thrift.transport.{TMemoryBuffer, TMemoryInputTransport} 5 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 6 | 7 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 8 | 9 | @State(Scope.Thread) 10 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 11 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 12 | @Fork(value = 1, jvmArgs = Array( 13 | "-server", 14 | "-Xms2g", 15 | "-Xmx2g", 16 | "-XX:NewSize=1g", 17 | "-XX:MaxNewSize=1g", 18 | "-XX:InitialCodeCacheSize=512m", 19 | "-XX:ReservedCodeCacheSize=512m", 20 | "-XX:+UseParallelGC", 21 | "-XX:-UseBiasedLocking", 22 | "-XX:+AlwaysPreTouch" 23 | )) 24 | @OutputTimeUnit(TUnit) 25 | class ThriftBench { 26 | 27 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 28 | 29 | val encodedDataset: Seq[Array[Byte]] = encode() 30 | decode() 31 | 32 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 33 | def encode(): Seq[Array[Byte]] = { 34 | dataset.map { r => 35 | val buf = new TMemoryBuffer(0) 36 | val outProtocol = new TCompactProtocol(buf) 37 | r.write(outProtocol) 38 | buf.getArray 39 | } 40 | } 41 | 42 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 43 | def decode(): Seq[Nobid] = { 44 | encodedDataset.map { r => 45 | val buf = new TMemoryInputTransport(r) 46 | val inProtocol = new TCompactProtocol(buf) 47 | Nobid.decode(inProtocol) 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /msgpack-jackson-bench/src/test/scala/com/github/saint1991/serialization/benchmark/msgpack/jackson/MsgpackJacksonBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.msgpack.jackson 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import com.fasterxml.jackson.module.scala.DefaultScalaModule 5 | import org.msgpack.jackson.dataformat.MessagePackFactory 6 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 7 | 8 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 9 | import com.github.saint1991.serialization.benchmark.dataset.{DataSet, Nobid} 10 | 11 | @State(Scope.Thread) 12 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 13 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 14 | @Fork(value = 1, jvmArgs = Array( 15 | "-server", 16 | "-Xms2g", 17 | "-Xmx2g", 18 | "-XX:NewSize=1g", 19 | "-XX:MaxNewSize=1g", 20 | "-XX:InitialCodeCacheSize=512m", 21 | "-XX:ReservedCodeCacheSize=512m", 22 | "-XX:+UseParallelGC", 23 | "-XX:-UseBiasedLocking", 24 | "-XX:+AlwaysPreTouch" 25 | )) 26 | @OutputTimeUnit(TUnit) 27 | class MsgpackJacksonBench { 28 | 29 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 30 | 31 | val mapper = new ObjectMapper(new MessagePackFactory()) 32 | mapper.registerModule(DefaultScalaModule) 33 | 34 | val encodedDataset: Seq[Array[Byte]] = encode() 35 | decode() 36 | 37 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 38 | def encode(): Seq[Array[Byte]] = dataset.map(x => mapper.writeValueAsBytes(x)) 39 | 40 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 41 | def decode(): Seq[Nobid] = encodedDataset.map(bytes => mapper.readValue[Nobid](bytes, classOf[Nobid])) 42 | } 43 | -------------------------------------------------------------------------------- /schema/avro/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Apache Avro 3 | 4 | ## languages 5 | officially supported 6 | - C 7 | - C++ 8 | - C# 9 | - Java 10 | - Perl 11 | - Python 12 | - Ruby 13 | - PHP 14 | 15 | ## schema 16 | - No field index is included in data 17 | - decoding is always based on writer's (and reader's for schema evolution) schema. 18 | - Dynamic schema resolution 19 | - no need to generate codes in advance 20 | - encode/decode is conduct according only to schema 21 | 22 | 23 | ``` nobid.avsc 24 | { 25 | "namespace": "com.github.saint1991.samples", 26 | "name":"Nobid", 27 | "type":"record", 28 | "fields":[ 29 | {"name":"adnwId","type": "int"}, 30 | {"name":"auctionId","type":"string"}, 31 | {"name":"host","type":"string"}, 32 | {"name":"loggedAt","type":"string"}, 33 | {"name":"mId","type":"int"}, 34 | {"name":"nbr","type":"int"}, 35 | {"name":"page","type":["null", "string"], "default": null}, 36 | {"name":"resTime","type":"int"}, 37 | {"name":"spot","type": { 38 | "name": "spotRecord", 39 | "type": "record", 40 | "fields": [ 41 | {"name": "id", "type": "int"}, 42 | {"name": "type", "type": { 43 | "name": "spotType", 44 | "type": "enum", 45 | "symbols": ["A", "S"] 46 | }} 47 | ] 48 | }}, 49 | {"name": "history", "type": { 50 | "name": "historyItems", 51 | "type": "array", 52 | "items": { 53 | "name": "historyItem", 54 | "type": "string" 55 | } 56 | }}, 57 | {"name": "tags", "type": { 58 | "name": "tag", 59 | "type": "map", 60 | "values": "string" 61 | }} 62 | ] 63 | } 64 | 65 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/sbt,scala,intellij 3 | 4 | ### Intellij ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff: 9 | .idea/** 10 | scala/.idea/workspace.xml 11 | .idea/**/tasks.xml 12 | .idea/dictionaries 13 | 14 | # Sensitive or high-churn files: 15 | .idea/**/dataSources/ 16 | .idea/**/dataSources.ids 17 | .idea/**/dataSources.xml 18 | .idea/**/dataSources.local.xml 19 | .idea/**/sqlDataSources.xml 20 | .idea/**/dynamic.xml 21 | .idea/**/uiDesigner.xml 22 | 23 | # Gradle: 24 | .idea/**/gradle.xml 25 | .idea/**/libraries 26 | 27 | # CMake 28 | cmake-build-debug/ 29 | 30 | # Mongo Explorer plugin: 31 | .idea/**/mongoSettings.xml 32 | 33 | ## File-based project format: 34 | *.iws 35 | 36 | ## Plugin-specific files: 37 | 38 | # IntelliJ 39 | /schema/out/ 40 | 41 | # mpeltonen/sbt-idea plugin 42 | .idea_modules/ 43 | 44 | # JIRA plugin 45 | atlassian-ide-plugin.xml 46 | 47 | # Cursive Clojure plugin 48 | .idea/replstate.xml 49 | 50 | # Crashlytics plugin (for Android Studio and IntelliJ) 51 | com_crashlytics_export_strings.xml 52 | crashlytics.properties 53 | crashlytics-build.properties 54 | fabric.properties 55 | 56 | ### Intellij Patch ### 57 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 58 | 59 | # *.iml 60 | # modules.xml 61 | # .idea/misc.xml 62 | # *.ipr 63 | 64 | # Sonarlint plugin 65 | .idea/sonarlint 66 | 67 | ### SBT ### 68 | # Simple Build Tool 69 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control 70 | 71 | dist/* 72 | target/ 73 | lib_managed/ 74 | src_managed/ 75 | project/boot/ 76 | project/plugins/project/ 77 | .history 78 | .cache 79 | .lib/ 80 | 81 | ### Scala ### 82 | *.class 83 | *.log 84 | 85 | 86 | out 87 | # End of https://www.gitignore.io/api/sbt,scala,intellij -------------------------------------------------------------------------------- /avro-bench/src/test/scala/com/github/saint1991/serialization/benchmark/avro/AvroBench.scala: -------------------------------------------------------------------------------- 1 | package com.github.saint1991.serialization.benchmark.avro 2 | 3 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream} 4 | 5 | import org.apache.avro.io.{DecoderFactory, EncoderFactory} 6 | import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter} 7 | import org.openjdk.jmh.annotations.{BenchmarkMode, Fork, Measurement, Mode, OutputTimeUnit, Scope, State, Warmup, Benchmark => JmhBenchmark} 8 | 9 | import com.github.saint1991.serialization.benchmark.BenchmarkSettings._ 10 | 11 | @State(Scope.Thread) 12 | @Warmup(iterations = WarmUpIteration, time = 1, timeUnit = TUnit) 13 | @Measurement(iterations = Iteration, time = 1, timeUnit = TUnit) 14 | @Fork(value = 1, jvmArgs = Array( 15 | "-server", 16 | "-Xms2g", 17 | "-Xmx2g", 18 | "-XX:NewSize=1g", 19 | "-XX:MaxNewSize=1g", 20 | "-XX:InitialCodeCacheSize=512m", 21 | "-XX:ReservedCodeCacheSize=512m", 22 | "-XX:+UseParallelGC", 23 | "-XX:-UseBiasedLocking", 24 | "-XX:+AlwaysPreTouch" 25 | )) 26 | @OutputTimeUnit(TUnit) 27 | class AvroBench { 28 | 29 | final val Schema = Nobid.SCHEMA$ 30 | val writer = new SpecificDatumWriter[Nobid](Schema) 31 | val reader = new SpecificDatumReader[Nobid](Schema) 32 | 33 | val dataset: Seq[Nobid] = DataSet.createDataset(DatasetSize) 34 | val encoded: Seq[Array[Byte]] = encode() 35 | decode() 36 | 37 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 38 | def encode(): Seq[Array[Byte]] = { 39 | dataset.map { nobid => 40 | val ostream = new ByteArrayOutputStream() 41 | val encoder = EncoderFactory.get().binaryEncoder(ostream, null) 42 | writer.write(nobid, encoder) 43 | encoder.flush() 44 | ostream.toByteArray 45 | } 46 | } 47 | 48 | @JmhBenchmark @BenchmarkMode(Array(Mode.AverageTime)) 49 | def decode(): Seq[Nobid] = { 50 | encoded.map { record => 51 | val istream = new ByteArrayInputStream(record) 52 | val decoder = DecoderFactory.get.binaryDecoder(istream, null) 53 | reader.read(null, decoder) 54 | } 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Benchmarks against Serialization Systems 3 | [![Build Status](https://travis-ci.org/saint1991/serialization-benchmark.svg?branch=master)](https://travis-ci.org/saint1991/serialization-benchmark) 4 | 5 | 6 | ## Characteristics 7 | 8 | | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON | MessagePack | 9 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 10 | | Schema-less? | No | No | No | No | Yes | Yes | 11 | | Require compiling schema in advance? | Yes | Yes | No | No | No | No | 12 | 13 | ## Run benchmarks 14 | ``` 15 | $ sbt $project/jmh:run 16 | ``` 17 | where $project is one of the name of sbt sub project (e.g. avroBench) 18 | 19 | 20 | ## Sample data file generation 21 | ``` 22 | $ sbt $project/run 23 | ``` 24 | 25 | ## Schemas 26 | The schemas used in this benchmark are under [schema](schema) 27 | 28 | ## Results 29 | 30 | **NOTE**: This benchmark is taken place under the specific condition, results may be different under the other conditions. 31 | 32 | ### Benchmark setup 33 | 34 | - OS: Ubuntu 16.04 TLS 35 | - CPU: Intel(R) Xeon(R) CPU E5-2680 v3 2.50GHz, 4 cores 36 | - Memory: 14GB 37 | - JDK 1.8.0_171, Java HotSpot(TM) 64-Bit Server VM, 25.171-b11 38 | 39 | ### Average time to encode 100,000 records in milli seconds. 40 | 41 | - 20 warming up iteration 42 | - Average of 20 iteration 43 | 44 | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON (with jsoniter-scala) | JSON (with circe) | MessagePack (jackson-module-msgpack) | MessagePack (msgpack4z) | 45 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 46 | | 43.0 | 235.8 | 232.6 | 116.8 | 74.6 | 488.7 | 354.8 | 358.0 | 47 | 48 | ### Average time to decode 100,000 binary records in milli seconds. 49 | 50 | - 20 warming up iteration 51 | - Average of 20 iteration 52 | 53 | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON (with jsoniter-scala) | JSON (with circe) | MessagePack (jackson-module-msgpack) | MessagePack (msgpack4z) | 54 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 55 | | 139.5 | 162.9 | 586.0 | 160.8 | 151.3 | 503.5 | 414.9 | 609.5 | 56 | 57 | ### Data size of 100,000 encoded records in MB. 58 | 59 | | Protocol Buffers (proto3) | Thrift (compact protocol) | Avro | CSV | JSON | MessagePack | 60 | | :---: | :---: | :---: | :---: | :---: | :---: | 61 | | 18.5 | 18.3 | 16.7 | 17.4 | 32.2 | 25.7 | 62 | -------------------------------------------------------------------------------- /schema/protocol-buffers/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Protocol Buffers (by Google) 3 | 4 | ## Installation (Mac) 5 | ``` 6 | $ brew install protobuf 7 | ``` 8 | 9 | ## languages 10 | officially supported 11 | 12 | - C++ 13 | - Java 14 | - Python 15 | - Objective-C 16 | - C# 17 | - JavaScript 18 | - Ruby 19 | - Go 20 | - PHP 21 | - Dart 22 | 23 | 24 | ## schema 25 | - easier to keep compatibility when schema evolution thanks to self annotation 26 | - all fields are uniquely indexed so that receivers can decode data based on it. 27 | - possible to reserve indices for future use as follows, `reserved 1 to 11; ` 28 | - any types are mapped to corresponding type for each language [(reference)](https://developers.google.com/protocol-buffers/docs/proto3#scalar) 29 | 30 | ``` nobid.proto 31 | syntax = "proto3"; 32 | package samples; 33 | 34 | // SPEED (default) | CODE_SIZE | LITE_RUNTIME 35 | option optimize_for = SPEED; 36 | 37 | // possible to import definition from other files 38 | import "./spot.proto"; 39 | 40 | // message should be CamelCase 41 | message Nobid { 42 | 43 | // fields should be snake_case 44 | // each field should have an unique index 45 | // required directive indicate a field must exis 46 | int32 adnw_id = 1; 47 | string app_name = 2; 48 | string auction_id = 3; 49 | string host = 4; 50 | string logged_at = 5; 51 | int32 m_id = 6; 52 | int32 nbr = 7; 53 | string page = 8; 54 | int32 res_time = 9; 55 | Spot spot = 10; 56 | 57 | 58 | repeated string history = 11; 59 | map tags = 12; 60 | 61 | // it's possible to reserve some indices for future use 62 | reserved 13 to 15; 63 | } 64 | 65 | // rpc interface is defined by service 66 | // it corresponds to interface or trait in Java and Scala respectively 67 | service NobidBenchmark { 68 | // define a method that take a Nobid type parameter and returns int32 69 | rpc benchmark (Nobid) returns (int32); 70 | } 71 | ``` 72 | 73 | ``` spot.proto 74 | syntax = "proto3"; 75 | package samples; 76 | 77 | // enum should be CamelCase and each member should be UNDERSCORE_SEPARATED_CAPITALS 78 | enum SpotType { 79 | A = 0; // begin with 0 in enum 80 | S = 1; 81 | } 82 | 83 | message Spot { 84 | int32 id = 1; 85 | SpotType type = 2; 86 | } 87 | ``` 88 | 89 | ## code generation 90 | ``` 91 | protoc --proto_path=IMPORT_PATH \ 92 | [--cpp_out=DST_DIR] \ 93 | [ --java_out=DST_DIR] \ 94 | [--python_out=DST_DIR] \ 95 | [ --go_out=DST_DIR] \ 96 | [--ruby_out=DST_DIR] \ 97 | [--javanano_out=DST_DIR] \ 98 | [--objc_out=DST_DIR] \ 99 | [--csharp_out=DST_DIR] \ 100 | path/to/file.proto 101 | ``` 102 | 103 | For this sample, 104 | ``` 105 | protoc --proto_path=. --java_out=. nobid.proto 106 | ``` 107 | It generates java classes to the directory specified at `--java_out` 108 | 109 | ## Binary format 110 | 111 | 112 | messages are represented just as key-value pairs where each key is consist of index and [wire type](https://developers.google.com/protocol-buffers/docs/encoding#structure) 113 | 114 | ### example 115 | 116 | 117 | The following bytes represents the value of index 1 is 150. 118 | ``` 119 | 0000 1000 1001 0110 0000 0001 120 | ``` 121 | 122 | a key is always a varint. thus remove the first bit (it is most signifiant bit that represents whether further bytes are following) 123 | ``` 124 | 000 1000 1001 0110 0000 0001 125 | ``` 126 | 127 | The last 3 bit in the first byte `00001000` represents wire type, 128 | Here, it is `000` so that wire type is `Variant` according to [reference](https://developers.google.com/protocol-buffers/docs/encoding#structure) 129 | Then remaining bit is `0001` so we find its index is 1. 130 | 131 | Following bytes `1001 0110 0000 0001` represents a varint value for index 1. 132 | The first bit is most significant bit, inhere it is 1. That means another byte is following. 133 | By ordering from lower bits, it becomes `00000001 0010110` 134 | Interpret it as 2'complement, we can find it is 150. 135 | 136 | ### Varint 137 | It is variable length codec of integers. 138 | For each byte, the first bit is flag whether further bytes are following. ( 1 means further bytes are following) 139 | Except those bits, it is a 2's complement in order of lower-significant-bytes-first. 140 | 141 | ### Strings 142 | It is consists of variant and UTF8 binary. 143 | The variant part gives the length of UTF8 binary part. --------------------------------------------------------------------------------