├── README ├── README.md ├── akkahttptest ├── akkahttptest-aws │ └── src │ │ ├── main │ │ └── scala │ │ │ └── net │ │ │ └── ndolgov │ │ │ └── akkahttptest │ │ │ └── S3ToAkkaStream.scala │ │ └── test │ │ ├── resources │ │ └── log4j.xml │ │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── akkahttptest │ │ └── S3ToAkkaStreamSpec.scala ├── akkahttptest-saga │ └── src │ │ ├── main │ │ ├── java │ │ │ └── net │ │ │ │ └── ndolgov │ │ │ │ └── akkahttptest │ │ │ │ └── saga │ │ │ │ └── completable │ │ │ │ ├── CompletableFutureSaga.java │ │ │ │ └── ObjectStoreTxs.java │ │ └── scala │ │ │ └── net │ │ │ └── ndolgov │ │ │ └── akkahttptest │ │ │ └── saga │ │ │ ├── ObjectStorage.scala │ │ │ ├── TxContext.scala │ │ │ ├── futuristic │ │ │ ├── ComposedFuturesSaga.scala │ │ │ ├── FutureSeqSaga.scala │ │ │ ├── ObjectStoreSaga.scala │ │ │ └── ObjectStoreTx.scala │ │ │ ├── monadic │ │ │ ├── ObjectStoreSaga.scala │ │ │ └── ObjectStoreTx.scala │ │ │ └── tried │ │ │ ├── ObjectStoreSaga.scala │ │ │ └── ObjectStoreTx.scala │ │ └── test │ │ ├── resources │ │ └── log4j.xml │ │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── akkahttptest │ │ └── saga │ │ ├── ObjectStoreSagaAsserions.scala │ │ ├── completable │ │ └── CompletableFutureSagaSpec.scala │ │ ├── futuristic │ │ └── ObjectStoreSagaSpec.scala │ │ ├── monadic │ │ └── ObjectStoreSagaSpec.scala │ │ └── tried │ │ └── ObjectStoreSagaSpec.scala ├── akkahttptest-web │ └── src │ │ ├── main │ │ └── scala │ │ │ └── net │ │ │ └── ndolgov │ │ │ └── akkahttptest │ │ │ ├── AkkaHttpServer.scala │ │ │ ├── service │ │ │ ├── HttpEndpointC.scala │ │ │ ├── TestServiceA.scala │ │ │ └── TestServiceB.scala │ │ │ └── web │ │ │ ├── HttpEndpointA.scala │ │ │ ├── HttpEndpointB.scala │ │ │ ├── HttpEndpoints.scala │ │ │ ├── JsonMarshallers.scala │ │ │ └── TestServiceC.scala │ │ └── test │ │ ├── resources │ │ └── log4j.xml │ │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── akkahttptest │ │ ├── AkkaHttpClient.scala │ │ ├── AkkaHttpTest.scala │ │ └── web │ │ └── HttpEndpointATest.scala ├── build.sbt └── project │ └── build.properties ├── antlrtest ├── pom.xml └── src │ ├── main │ ├── antlr3 │ │ └── net │ │ │ └── ndolgov │ │ │ └── antlrtest │ │ │ └── TestQuery.g │ └── java │ │ └── net │ │ └── ndolgov │ │ └── antlrtest │ │ ├── EmbedmentHelper.java │ │ ├── EmbedmentHelperImpl.java │ │ ├── QueryDescriptor.java │ │ ├── QueryParser.java │ │ └── Type.java │ └── test │ └── java │ └── net │ └── ndolgov │ └── antlrtest │ └── QueryParserTest.java ├── avrotest ├── pom.xml └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── avrotest │ │ ├── AvroRecordBuilder.java │ │ └── AvroSchemaBuilder.java │ └── test │ └── java │ └── net │ └── ndolgov │ └── avrotest │ ├── AvroBuilderTest.java │ └── AvroSerializationTest.java ├── disruptortest ├── pom.xml └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── disruptortest │ │ ├── DataRow.java │ │ ├── DataRowEvent.java │ │ ├── DataRowEventConsumer.java │ │ ├── DataRowEventProducer.java │ │ ├── DataRowEventTranslator.java │ │ ├── DisruptorThreadFactory.java │ │ ├── ParallelProcessingContext.java │ │ ├── ParallelProcessor.java │ │ └── ParallelRequestRunner.java │ └── test │ ├── java │ └── net │ │ └── ndolgov │ │ └── disruptortest │ │ └── ParallelProcessorTest.java │ └── resources │ └── log4j.xml ├── flatbufferstest ├── pom.xml └── src │ ├── main │ └── flatbuffers │ │ └── timeseries.fbs │ └── test │ ├── java │ └── net │ │ └── ndolgov │ │ └── flatbufferstest │ │ └── FlatBufferTest.java │ └── resources │ └── log4j.xml ├── grpctest ├── pom.xml └── src │ ├── main │ ├── java │ │ └── net │ │ │ └── ndolgov │ │ │ └── grpctest │ │ │ └── plain │ │ │ ├── GrpcClient.java │ │ │ ├── GrpcServer.java │ │ │ ├── TestServiceAImpl.java │ │ │ └── TestServiceBImpl.java │ └── proto │ │ ├── testsvcA.proto │ │ └── testsvcB.proto │ └── test │ ├── java │ └── net │ │ └── ndolgov │ │ └── grpctest │ │ └── plain │ │ └── AsyncGrpcCallTest.java │ └── resources │ └── log4j.xml ├── lucenetest ├── pom.xml └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── lucenetest │ │ ├── docvalues │ │ ├── DocValuesQuery.java │ │ ├── Processor.java │ │ ├── Provider.java │ │ └── Searcher.java │ │ ├── scoring │ │ ├── Processor.java │ │ ├── Provider.java │ │ ├── ScoringQuery.java │ │ ├── Searcher.java │ │ └── Visitor.java │ │ └── search │ │ ├── LongFieldScorer.java │ │ ├── Processor.java │ │ ├── QueryBuilder.java │ │ ├── QueryScorer.java │ │ ├── SearchQuery.java │ │ └── Searcher.java │ └── test │ └── java │ └── net │ └── ndolgov │ └── lucenetest │ └── search │ ├── LuceneFields.java │ ├── SearchQueryTest.java │ └── TestIndexCreator.java ├── parquettest ├── pom.xml └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── parquettest │ │ ├── ColumnHeader.java │ │ ├── GenericParquetReader.java │ │ ├── LongColumnHeader.java │ │ ├── ParquetLoggerOverride.java │ │ ├── Record.java │ │ ├── RecordFields.java │ │ ├── RecordFileUtil.java │ │ ├── RecordParquetWriter.java │ │ ├── RecordReadSupport.java │ │ ├── RecordWriteSupport.java │ │ ├── ToParquet.java │ │ ├── WriterFactory.java │ │ └── package-info.java │ └── test │ ├── java │ └── net │ │ └── ndolgov │ │ └── parquettest │ │ ├── FilterByValue.java │ │ └── RecordParquetWriterTest.java │ └── resources │ ├── log4j.xml │ └── logging.properties ├── querydsl ├── pom.xml ├── querydsl-antlr │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── antlr4 │ │ │ ├── imports │ │ │ │ └── Common.g4 │ │ │ └── net │ │ │ │ └── ndolgov │ │ │ │ └── querydsl │ │ │ │ └── antlr │ │ │ │ ├── action │ │ │ │ └── QueryDsl.g4 │ │ │ │ └── listener │ │ │ │ └── ParquetDsl.g4 │ │ └── java │ │ │ └── net │ │ │ └── ndolgov │ │ │ └── querydsl │ │ │ └── antlr │ │ │ ├── action │ │ │ ├── AntlrActionDslParser.java │ │ │ ├── AstBuilder.java │ │ │ └── AstBuilderImpl.java │ │ │ └── listener │ │ │ ├── AntlrListenerDslParser.java │ │ │ └── AstBuildingListener.java │ │ └── test │ │ └── java │ │ └── net │ │ └── ndolgov │ │ └── querydsl │ │ └── antlr │ │ └── AntlrDslParserTest.java ├── querydsl-dsl │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── net │ │ └── ndolgov │ │ └── querydsl │ │ ├── ast │ │ ├── AstNode.java │ │ ├── DslQuery.java │ │ ├── From.java │ │ ├── Projection.java │ │ ├── Select.java │ │ ├── Where.java │ │ └── expression │ │ │ ├── AttrEqLong.java │ │ │ ├── BinaryExpr.java │ │ │ ├── NoOpExpr.java │ │ │ └── PredicateExpr.java │ │ └── parser │ │ ├── DslParser.java │ │ └── Tokens.java ├── querydsl-fastparse │ ├── build.sbt │ ├── pom.xml │ ├── project │ │ └── build.properties │ └── src │ │ ├── main │ │ └── scala │ │ │ └── net │ │ │ └── ndolgov │ │ │ └── querydsl │ │ │ └── fastparse │ │ │ ├── FastparseDslParser.scala │ │ │ └── FastparseParser.scala │ │ └── test │ │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── querydsl │ │ └── fastparse │ │ └── FastparseDslParserTest.scala ├── querydsl-parboiled │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── net │ │ │ └── ndolgov │ │ │ └── querydsl │ │ │ └── parboiled │ │ │ ├── ParboiledDslParser.java │ │ │ └── ParboiledParser.java │ │ └── test │ │ ├── java │ │ └── net │ │ │ └── ndolgov │ │ │ └── querydsl │ │ │ └── parboiled │ │ │ └── ParboiledDslParserTest.java │ │ └── resources │ │ └── log4j.xml └── querydsl-parquet │ ├── pom.xml │ └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── querydsl │ │ └── parquet │ │ ├── ParquetQueryBuilder.java │ │ ├── PredicateExprs.java │ │ └── ToParquetFilter.java │ └── test │ ├── java │ └── net │ │ └── ndolgov │ │ └── querydsl │ │ └── parquet │ │ └── ParquetFileFilterQueryTest.java │ └── resources │ └── log4j.xml ├── restgatewaytest ├── build.sbt ├── project │ ├── build.properties │ └── protoc.sbt └── restgatewaytest-web │ └── src │ ├── main │ ├── proto │ │ ├── testsvcA.proto │ │ └── testsvcB.proto │ ├── resources │ │ └── specs │ │ │ ├── TestsvcAService.yml │ │ │ └── TestsvcBService.yml │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── restgatewaytest │ │ ├── GatewayServer.scala │ │ ├── GrpcServer.scala │ │ ├── TestServiceAImpl.scala │ │ └── TestServiceBImpl.scala │ └── test │ ├── resources │ └── log4j.xml │ └── scala │ └── net │ └── ndolgov │ └── restgatewaytest │ ├── GatewayClient.scala │ ├── JsonMarshaller.scala │ └── RestGatewayTest.scala ├── s3test ├── pom.xml └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── s3test │ │ ├── FileDownloader.java │ │ ├── FileHandler.java │ │ ├── FileUploader.java │ │ ├── S3ChangeDetector.java │ │ ├── S3Client.java │ │ ├── S3ClientMBean.java │ │ ├── S3Destination.java │ │ ├── S3Downloader.java │ │ ├── S3FileTransferClient.java │ │ ├── S3TransferProgressListener.java │ │ ├── S3Uploader.java │ │ └── TransferCallback.java │ └── test │ ├── java │ └── net │ │ └── ndolgov │ │ └── s3test │ │ └── S3StorageTest.java │ └── resources │ └── log4j.xml ├── scalapbtest ├── build.sbt ├── project │ ├── build.properties │ └── scalapb.sbt └── scalapbtest-grpc │ └── src │ ├── main │ ├── proto │ │ ├── testsvcA.proto │ │ └── testsvcB.proto │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── scalapbtest │ │ ├── GrpcClient.scala │ │ ├── GrpcServer.scala │ │ ├── TestServiceAImpl.scala │ │ └── TestServiceBImpl.scala │ └── test │ ├── resources │ └── log4j.xml │ └── scala │ └── net │ └── ndolgov │ └── scalapbtest │ └── AsyncScalaPBCallTest.scala ├── sparkdatasourcetest ├── pom.xml └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── sparkdatasourcetest │ │ ├── lucene │ │ ├── LuceneDocumentReader.scala │ │ ├── LuceneFieldFactory.scala │ │ ├── LuceneFieldReader.scala │ │ ├── LuceneFieldWriter.scala │ │ ├── LuceneIndexReader.scala │ │ ├── LuceneIndexWriter.scala │ │ ├── QueryBuilder.scala │ │ └── StoredFieldVisitorQuery.scala │ │ └── sql │ │ ├── DefaultSource.scala │ │ ├── LucenePartition.scala │ │ ├── LuceneRDD.scala │ │ ├── LuceneRelation.scala │ │ ├── LuceneSchema.scala │ │ └── package.scala │ └── test │ └── scala │ └── net │ └── ndolgov │ └── sparkdatasourcetest │ └── sql │ ├── LuceneDataSourceTestEnv.scala │ ├── LuceneDataSourceTestSuit.scala │ └── LuceneSchemaTestSuit.scala ├── sparkdatasourcev2test ├── README.md ├── pom.xml └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ └── scala │ │ └── net │ │ └── ndolgov │ │ └── sparkdatasourcetest │ │ ├── connector │ │ ├── FilePaths.scala │ │ ├── FileUtils.scala │ │ ├── LuceneDataSourceV2.scala │ │ ├── LuceneDataSourceV2Reader.scala │ │ └── LuceneDataSourceV2Writer.scala │ │ └── lucene │ │ ├── LuceneDocumentReader.scala │ │ ├── LuceneFieldFactory.scala │ │ ├── LuceneFieldReader.scala │ │ ├── LuceneFieldWriter.scala │ │ ├── LuceneIndexReader.scala │ │ ├── LuceneIndexWriter.scala │ │ ├── LuceneSchema.scala │ │ ├── QueryBuilder.scala │ │ └── StoredFieldVisitorQuery.scala │ └── test │ ├── resources │ └── log4j.xml │ └── scala │ └── net │ └── ndolgov │ └── sparkdatasourcetest │ └── connector │ ├── LuceneDataSourceTestEnv.scala │ ├── LuceneDataSourceTestSuit.scala │ └── LuceneSchemaTestSuit.scala ├── sqstest ├── pom.xml └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── sqstest │ │ ├── AsyncSqsClient.java │ │ ├── AsyncSqsClientImpl.java │ │ ├── ConcurrentMapMessageRepository.java │ │ ├── Handler.java │ │ ├── MessageHandler.java │ │ ├── MessageRepository.java │ │ ├── SqsQueuePoller.java │ │ ├── VisibilityTimeoutTracker.java │ │ └── VisibilityTimeoutTrackerImpl.java │ └── test │ └── java │ └── net │ └── ndolgov │ └── sqstest │ └── SqsQueuePollerTest.java ├── thriftrpctest ├── pom.xml └── src │ ├── main │ └── java │ │ └── net │ │ └── ndolgov │ │ └── thriftrpctest │ │ ├── ClientFactory.java │ │ ├── HandlerFactory.java │ │ ├── MultiplexedClientFactory.java │ │ ├── Server.java │ │ ├── ServiceDefinition.java │ │ └── TMultiplexedAsyncProcessor.java │ └── test │ ├── java │ └── net │ │ └── ndolgov │ │ └── thriftrpctest │ │ ├── Handler.java │ │ ├── Handler2.java │ │ └── ServerTest.java │ ├── resources │ └── log4j.xml │ └── thrift │ ├── testsvc.thrift │ └── testsvc2.thrift └── timeseriescompressiontest ├── pom.xml └── src ├── main └── java │ └── net │ └── ndolgov │ └── timeseriescompression │ └── CompressedTimeSeries.java └── test └── java └── net └── ndolgov └── timeseriescompression └── CompressedTimeSeriesTest.java /README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndolgov/experiments/6620fd27572ced6e33dca818db085157e036c5fc/README -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code examples for my blog posts 2 | Now and then I work with interesting technologies at my day job or dig deeper into something promising at night. Occasionally I learn things that I both find useful and do not see written about much. Usually it's related to OLAP, data storage, and query engines. 3 | 4 | For historical reasons I stick to a pretty lame project naming convention though it makes clear what technology it's mostly about. If any of them attracts your attention you'd really need to start from my blog posts at http://ndolgov.blogspot.com . There I tend to indulge myself in vintage UML diagrams (shout-out to my all-time favorite https://www.websequencediagrams.com) but usually strive to be terse. 5 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-aws/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-aws/src/test/scala/net/ndolgov/akkahttptest/S3ToAkkaStreamSpec.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest 2 | 3 | import java.nio.file.{Files, Paths} 4 | import java.util.concurrent.TimeUnit 5 | 6 | import akka.actor.ActorSystem 7 | import akka.stream.ActorMaterializer 8 | import akka.stream.scaladsl.FileIO 9 | import org.scalatest.{Assertions, FlatSpec, Matchers} 10 | import org.slf4j.LoggerFactory 11 | import software.amazon.awssdk.regions.Region 12 | 13 | import scala.concurrent.duration.Duration 14 | import scala.concurrent.{Await, ExecutionContext} 15 | 16 | // see also https://ryftcloud.zendesk.com/hc/en-us/articles/115009913207-Get-Sample-Data-from-AWS-S3-Bucket 17 | class S3ToAkkaStreamSpec extends FlatSpec with Assertions with Matchers { 18 | private val logger = LoggerFactory.getLogger(classOf[S3ToAkkaStreamSpec]) 19 | 20 | private val dest = Paths.get(s"target/scala-2.12/test-classes/testfile${System.currentTimeMillis()}.bin") 21 | 22 | private val region = Region.US_EAST_1.toString 23 | private val bucket = "ryft-public-sample-data" 24 | private val s3path = "ODBC/SampleDatabases.tar.gz" 25 | 26 | private implicit val actorSystem: ActorSystem = ActorSystem("test-actor-system") 27 | private implicit val materializer: ActorMaterializer = ActorMaterializer()(actorSystem) 28 | private implicit val executionContext: ExecutionContext = actorSystem.dispatcher 29 | 30 | it should "download a public file" in { 31 | val future = S3ToAkkaStream(Region.of(region), bucket). 32 | download(s3path). 33 | flatMap(src => { 34 | logger.info(s"Writing to ${dest.toAbsolutePath.toString}") 35 | src.runWith(FileIO.toPath(dest)) 36 | }) 37 | 38 | Await.result(future, Duration.create(30, TimeUnit.SECONDS)) 39 | Files.size(dest) shouldEqual 759548 40 | 41 | Files.delete(dest) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-saga/src/main/scala/net/ndolgov/akkahttptest/saga/ObjectStorage.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.saga 2 | 3 | import akka.stream.scaladsl.Source 4 | import akka.util.ByteString 5 | 6 | import scala.concurrent.Future 7 | import scala.util.Try 8 | 9 | /** An object id with a revision assigned by the storage */ 10 | case class ObjectId(name: String, revision: Option[String]) 11 | 12 | /** In real life files would be in HDFS */ 13 | trait ObjectStorage { 14 | def createFile(obj: Array[Byte], path: String): Try[Unit] 15 | 16 | def createFile(obj: Source[ByteString, Any], path: String): Future[Long] 17 | 18 | def renameFile(from: String, to: String): Try[Unit] 19 | 20 | def deleteFile(path: String): Option[Try[Unit]] 21 | 22 | def tmpPath(objId: ObjectId): String 23 | 24 | def persistentPath(objId: ObjectId): String 25 | } 26 | 27 | /** In real life it would invoke a DAO to modify the DB */ 28 | trait ObjectCatalog { 29 | def createRevision(objId: ObjectId): Try[String] 30 | 31 | def forgetRevision(objId: ObjectId): Try[Unit] 32 | } 33 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-saga/src/main/scala/net/ndolgov/akkahttptest/saga/TxContext.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.saga 2 | 3 | import scala.collection.mutable 4 | 5 | /** The state shared among the transactions of a saga */ 6 | trait TxContext { 7 | def getString(name: String): String 8 | 9 | def getLong(name: String): Long 10 | 11 | def setString(name: String, value: String): Unit 12 | 13 | def setLong(name: String, value: Long): Unit 14 | } 15 | 16 | private final class TxContextImpl extends TxContext { 17 | private val kvs = mutable.Map[String, AnyVal]() 18 | 19 | override def getString(name: String): String = kvs(name).asInstanceOf[String] 20 | 21 | override def getLong(name: String): Long = kvs(name).asInstanceOf[Long] 22 | 23 | override def setString(name: String, value: String): Unit = kvs.put(name, value.asInstanceOf[AnyVal]) 24 | 25 | override def setLong(name: String, value: Long): Unit = kvs.put(name, value) 26 | } 27 | 28 | object TxContext { 29 | val TmpLocation = "TmpLocation" 30 | 31 | val PermanentLocation = "PermanentLocation" 32 | 33 | val Size = "Size" 34 | 35 | val Revision = "Revision" 36 | 37 | def apply(): TxContext = new TxContextImpl 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-saga/src/main/scala/net/ndolgov/akkahttptest/saga/futuristic/ComposedFuturesSaga.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.saga.futuristic 2 | 3 | import net.ndolgov.akkahttptest.saga.TxContext 4 | import org.slf4j.LoggerFactory 5 | 6 | import scala.concurrent.{ExecutionContext, Future} 7 | import scala.util.{Failure, Success} 8 | 9 | object ComposedFuturesSaga extends AbstractSaga { 10 | def apply(txs: Seq[ObjectStoreTx])(implicit ec: ExecutionContext): Saga = new ComposedFuturesSaga(txs) 11 | } 12 | 13 | /** 14 | * Recursively traverse a sequence of transactions to compose the corresponding sequence of Futures. 15 | * In each Future try to execute the head tx. In case of a failure revert the previously executed transactions. */ 16 | private[futuristic] final class ComposedFuturesSaga(txs: Seq[ObjectStoreTx]) 17 | (implicit val ec: ExecutionContext) extends Function0[Future[TxContext]] { 18 | 19 | private val logger = LoggerFactory.getLogger(classOf[ComposedFuturesSaga]) 20 | 21 | def apply(): Future[TxContext] = { 22 | executeHeadTx(txs, TxContext()) 23 | } 24 | 25 | private def executeHeadTx(txs: Seq[ObjectStoreTx], ctx: TxContext) : Future[TxContext] = { 26 | txs match { 27 | case Nil => Future.successful(ctx) 28 | 29 | case tx :: tail => 30 | tx. 31 | execute(ctx). 32 | flatMap { _ => 33 | logger.info(s" applied ${tx.toString}") 34 | executeHeadTx(tail, ctx) 35 | }. 36 | recoverWith { case thrown: Exception => 37 | val nested = thrown match { 38 | case _: NestedTxException => thrown // the actual failed tx has been logged 39 | 40 | case _ => 41 | logger.error(s"Failed to execute ${tx.toString}") 42 | new NestedTxException(thrown) 43 | } 44 | 45 | tx.rollback() match { 46 | case Success(_) => logger.info(s" reverted ${tx.toString}") 47 | 48 | case Failure(ue) => logger.error(s"Failed to roll back ${tx.toString}", ue) 49 | } 50 | 51 | Future.failed(nested) 52 | } 53 | } 54 | } 55 | 56 | /** To distinguish between the actual error happening and its propagation through the chain of Futures */ 57 | private final class NestedTxException(e: Exception) extends RuntimeException(e) 58 | } 59 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-saga/src/main/scala/net/ndolgov/akkahttptest/saga/futuristic/ObjectStoreSaga.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.saga.futuristic 2 | 3 | import akka.stream.scaladsl.Source 4 | import akka.util.ByteString 5 | import net.ndolgov.akkahttptest.saga.{ObjectStorage, ObjectCatalog, ObjectId, TxContext} 6 | 7 | import scala.concurrent.{ExecutionContext, Future} 8 | 9 | trait ObjectStoreSaga { 10 | type Saga = (() => Future[TxContext]) 11 | 12 | def apply(txs: Seq[ObjectStoreTx])(implicit ec: ExecutionContext): Saga 13 | 14 | def apply(objId: ObjectId, obj: Array[Byte], catalog: ObjectCatalog, storage: ObjectStorage) 15 | (implicit ec: ExecutionContext): Saga 16 | 17 | def apply(objId: ObjectId, obj: Source[ByteString, Any], catalog: ObjectCatalog, storage: ObjectStorage) 18 | (implicit ec: ExecutionContext): Saga 19 | } 20 | 21 | /** A means of executing a unit of work comprised of multiple atomic transactions. 22 | * A transaction failure is handled by applying compensating actions for the transactions that have been executed. */ 23 | private[futuristic] abstract class AbstractSaga extends ObjectStoreSaga { 24 | override def apply(objId: ObjectId, obj: Array[Byte], catalog: ObjectCatalog, storage: ObjectStorage) 25 | (implicit ec: ExecutionContext): Saga = { 26 | val writeTx = new WriteArrayToTmpLocation(objId, obj, storage.tmpPath(objId), storage) 27 | apply(writeTx, objId, catalog, storage) 28 | } 29 | 30 | override def apply(objId: ObjectId, obj: Source[ByteString, Any], catalog: ObjectCatalog, storage: ObjectStorage) 31 | (implicit ec: ExecutionContext): Saga = { 32 | val writeTx = new WriteSourceToTmpLocation(objId, obj, storage.tmpPath(objId), storage) 33 | apply(writeTx, objId, catalog, storage) 34 | } 35 | 36 | private def apply(writeTx: ObjectStoreTx, objId: ObjectId, catalog: ObjectCatalog, storage: ObjectStorage) 37 | (implicit ec: ExecutionContext): Saga = { 38 | apply( 39 | List( 40 | writeTx, 41 | new CreateObjectRevision(objId, catalog), 42 | new MakeTmpFilePermanent( 43 | v => storage.persistentPath(objId.copy(revision = Option(v))), 44 | storage))) 45 | } 46 | } -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-saga/src/main/scala/net/ndolgov/akkahttptest/saga/monadic/ObjectStoreSaga.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.saga.monadic 2 | 3 | import akka.stream.scaladsl.Source 4 | import akka.util.ByteString 5 | import net.ndolgov.akkahttptest.saga.{ObjectStorage, ObjectCatalog, ObjectId} 6 | 7 | import scala.concurrent.{ExecutionContext, Future} 8 | 9 | /** 10 | * A means of executing a unit of work comprised of multiple atomic transactions. 11 | * A tx failure is handled by applying compensating actions for the stages that have been executed. 12 | */ 13 | object ObjectStoreSaga { 14 | type SagaResult = (String, Long, String) 15 | 16 | def apply(objId: ObjectId, obj: Array[Byte], catalog: ObjectCatalog, storage: ObjectStorage) 17 | (implicit ec: ExecutionContext): Future[SagaResult] = { 18 | val writeTx = WriteToTmpLocation.fromArrayTx(objId, obj, storage.tmpPath(objId), storage) 19 | apply(writeTx, objId, catalog, storage) 20 | } 21 | 22 | def apply(objId: ObjectId, obj: Source[ByteString, Any], catalog: ObjectCatalog, storage: ObjectStorage) 23 | (implicit ec: ExecutionContext): Future[SagaResult] = { 24 | val writeTx = WriteToTmpLocation.fromSourceTx(objId, obj, storage.tmpPath(objId), storage) 25 | apply(writeTx, objId, catalog, storage) 26 | } 27 | 28 | private def apply(writeTx: ObjectStoreTx[(String, Long)], objId: ObjectId, catalog: ObjectCatalog, storage: ObjectStorage) 29 | (implicit ec: ExecutionContext): Future[SagaResult] = { 30 | val saga: ObjectStoreTx[SagaResult] = for { 31 | (tmpLocation, fileSize) <- writeTx 32 | revision <- CreateObjectRevision.tx(objId, catalog) 33 | permanentPath <- MakeTmpFilePermanent.tx(tmpLocation, revision, objId, storage) 34 | } yield (permanentPath, fileSize, revision) 35 | 36 | saga.toFuture 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-saga/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/service/HttpEndpointC.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.web 2 | 3 | import akka.http.scaladsl.model.{ContentType, HttpEntity, HttpResponse, MediaTypes, StatusCodes} 4 | import akka.http.scaladsl.server.Directives.{complete, onComplete, path, pathPrefix, post, _} 5 | import akka.http.scaladsl.server.Route 6 | import net.ndolgov.akkahttptest.service.TestServiceC 7 | import net.ndolgov.akkahttptest.web.HttpEndpoints._ 8 | import net.ndolgov.akkahttptest.web.ServiceCJsonMarshaller._ 9 | import org.slf4j.LoggerFactory 10 | 11 | import scala.util.{Failure, Success} 12 | 13 | /** Streaming HTTP end point */ 14 | class HttpEndpointC(service: TestServiceC, requestTimeoutMs: Int) { 15 | private val logger = LoggerFactory.getLogger(this.getClass) 16 | 17 | private val prefix = pathPrefix("akkahttp" / "test") 18 | 19 | private val exceptionHandler = unexpectedExceptionHandler(logger) 20 | 21 | /** @return all Routes supported by this HTTP end point */ 22 | def endpointRoutes(): Route = prefix { 23 | processPost ~ processGet 24 | } 25 | 26 | private def processPost: Route = 27 | handleExceptions(exceptionHandler) { 28 | post { 29 | path("testservicec") { 30 | parameters("key") { key => 31 | extractDataBytes { stream => 32 | onComplete(service.process(key, stream)) { 33 | case Success(response) => 34 | complete(response) 35 | 36 | case Failure(e) => 37 | completeWithError(e) 38 | } 39 | } 40 | } 41 | } 42 | } 43 | } 44 | 45 | private def processGet: Route = 46 | handleExceptions(exceptionHandler) { 47 | get { 48 | path("testservicec") { 49 | parameters("key") { key => 50 | onComplete(service.process(key)) { 51 | case Success(stream) => 52 | complete(HttpResponse( 53 | entity = HttpEntity(ContentType(MediaTypes.`application/octet-stream`), stream) 54 | )) 55 | 56 | case Failure(e) => 57 | completeWithError(e) 58 | } 59 | } 60 | } 61 | } 62 | } 63 | 64 | private def completeWithError(e: Throwable) = { 65 | val message = "Unexpectedly failed to process request" 66 | logger.error(message, e) 67 | complete(httpErrorResponse(StatusCodes.InternalServerError, message)) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/service/TestServiceA.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.service 2 | 3 | import org.slf4j.LoggerFactory 4 | 5 | import scala.concurrent.{ExecutionContext, Future} 6 | 7 | case class TestRequestA(requestId: Long) 8 | 9 | case class TestResponseA(success: Boolean, requestId: Long, result: String) 10 | 11 | /** Service API analogous to the one generated from "testsvcA.proto" */ 12 | trait TestServiceA { 13 | def process(request: TestRequestA) : Future[TestResponseA] 14 | } 15 | 16 | final class TestServiceAImpl(implicit ec: ExecutionContext) extends TestServiceA { 17 | private val logger = LoggerFactory.getLogger(classOf[TestServiceAImpl]) 18 | 19 | override def process(request: TestRequestA): Future[TestResponseA] = Future { 20 | logger.info("Computing result"); // todo this is where actual time-consuming processing would be 21 | TestResponseA(success = true, request.requestId, "RESULTA") 22 | } 23 | } -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/service/TestServiceB.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.service 2 | 3 | import org.slf4j.LoggerFactory 4 | 5 | import scala.concurrent.{ExecutionContext, Future} 6 | 7 | case class TestRequestB(requestId: Long) 8 | 9 | case class TestResponseB(success: Boolean, requestId: Long, result: String) 10 | 11 | /** Service API analogous to the one generated from "testsvcB.proto" */ 12 | trait TestServiceB { 13 | def process(request: TestRequestB) : Future[TestResponseB] 14 | } 15 | 16 | final class TestServiceBImpl(implicit ec: ExecutionContext) extends TestServiceB { 17 | private val logger = LoggerFactory.getLogger(classOf[TestServiceBImpl]) 18 | 19 | override def process(request: TestRequestB): Future[TestResponseB] = Future { 20 | logger.info("Computing result"); // todo this is where actual time-consuming processing would be 21 | TestResponseB(success = true, request.requestId, "RESULT") 22 | 23 | } 24 | } -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/web/HttpEndpointA.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.web 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import akka.http.scaladsl.model.StatusCodes 6 | import akka.http.scaladsl.server.Directives._ 7 | import akka.http.scaladsl.server.Directives.{as, complete, entity, onComplete, path, pathPrefix, post, withRequestTimeout} 8 | import akka.http.scaladsl.server.Route 9 | import net.ndolgov.akkahttptest.service.{TestRequestA, TestServiceA} 10 | import org.slf4j.LoggerFactory 11 | 12 | import scala.concurrent.duration.Duration 13 | import scala.util.{Failure, Success} 14 | 15 | import HttpEndpoints._ 16 | import ServiceAJsonMarshaller._ 17 | 18 | /** ServiceA HTTP end point */ 19 | class HttpEndpointA(service: TestServiceA, requestTimeoutMs: Int) { 20 | private val logger = LoggerFactory.getLogger(this.getClass) 21 | 22 | private val prefix = pathPrefix("akkahttp" / "test") 23 | 24 | private val maxRequestDuration = Duration.create(requestTimeoutMs, TimeUnit.MILLISECONDS) 25 | 26 | private val exceptionHandler = unexpectedExceptionHandler(logger) 27 | 28 | private val rejectionHandler = garbledRequestHandler(logger) 29 | 30 | /** @return all Routes supported by this HTTP end point */ 31 | def endpointRoutes(): Route = prefix { 32 | process 33 | } 34 | 35 | private def process: Route = 36 | handleExceptions(exceptionHandler) { 37 | post { 38 | path("testservicea") { 39 | handleRejections(rejectionHandler) { 40 | entity(as[TestRequestA]) { request: TestRequestA => 41 | withRequestTimeout(maxRequestDuration) 42 | onComplete(service.process(request)) { 43 | case Success(response) => 44 | complete(response) 45 | 46 | case Failure(e) => 47 | val message = "Unexpectedly failed to process request" 48 | logger.error(message, e) 49 | complete(httpErrorResponse(StatusCodes.InternalServerError, message)) 50 | } 51 | } 52 | } 53 | } 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/web/HttpEndpointB.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.web 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import akka.http.scaladsl.model.StatusCodes 6 | import akka.http.scaladsl.server.Directives.{as, complete, entity, onComplete, path, pathPrefix, post, withRequestTimeout, _} 7 | import akka.http.scaladsl.server.Route 8 | import net.ndolgov.akkahttptest.service.{TestRequestB, TestServiceB} 9 | import org.slf4j.LoggerFactory 10 | 11 | import scala.concurrent.duration.Duration 12 | import scala.util.{Failure, Success} 13 | 14 | import HttpEndpoints._ 15 | import ServiceBJsonMarshaller._ 16 | 17 | /** ServiceB HTTP end point */ 18 | class HttpEndpointB(service: TestServiceB, requestTimeoutMs: Int) { 19 | private val logger = LoggerFactory.getLogger(this.getClass) 20 | 21 | private val prefix = pathPrefix("akkahttp" / "test") 22 | 23 | private val maxRequestDuration = Duration.create(requestTimeoutMs, TimeUnit.MILLISECONDS) 24 | 25 | private val exceptionHandler = unexpectedExceptionHandler(logger) 26 | 27 | private val rejectionHandler = garbledRequestHandler(logger) 28 | 29 | /** @return all Routes supported by this HTTP end point */ 30 | def endpointRoutes(): Route = prefix { 31 | process 32 | } 33 | 34 | private def process: Route = 35 | handleExceptions(exceptionHandler) { 36 | post { 37 | path("testserviceb") { 38 | handleRejections(rejectionHandler) { 39 | entity(as[TestRequestB]) { request: TestRequestB => 40 | withRequestTimeout(maxRequestDuration) 41 | onComplete(service.process(request)) { 42 | case Success(response) => 43 | complete(response) 44 | 45 | case Failure(e) => 46 | val message = "Unexpectedly failed to process request" 47 | logger.error(message, e) 48 | complete(httpErrorResponse(StatusCodes.InternalServerError, message)) 49 | } 50 | } 51 | } 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/web/HttpEndpoints.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.web 2 | 3 | import akka.http.scaladsl.model.{ContentType, HttpEntity, HttpResponse, MediaType, MediaTypes, StatusCode, StatusCodes} 4 | import akka.http.scaladsl.server.Directives.complete 5 | import akka.http.scaladsl.server.{ExceptionHandler, Rejection, RejectionHandler} 6 | import org.slf4j.Logger 7 | 8 | object HttpEndpoints { 9 | val JSON: MediaType.WithFixedCharset = MediaTypes.`application/json` 10 | 11 | /** @return a handler to be used for really unexpected exceptions uncaught by other means */ 12 | def unexpectedExceptionHandler(log: Logger): ExceptionHandler = 13 | ExceptionHandler { 14 | case e: Exception => 15 | log.error("Unexpected error", e) 16 | complete(httpErrorResponse(StatusCodes.InternalServerError, "Unexpected error: " + e.getMessage)) 17 | } 18 | 19 | /** @return a handler to be used for request messages that cannot be deserialized (e.g. wrong message or garbled JSON) */ 20 | def garbledRequestHandler(log: Logger): RejectionHandler = 21 | RejectionHandler.newBuilder(). 22 | handleAll[Rejection] { rejection => 23 | log.error(s"Could not parse request because of $rejection") 24 | complete(httpErrorResponse(StatusCodes.BadRequest, "Could not parse request")) 25 | }.result() 26 | 27 | def httpErrorResponse(status : StatusCode, message: String): HttpResponse = { 28 | HttpResponse( 29 | status, 30 | entity = HttpEntity(ContentType(JSON), message) 31 | ) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/web/JsonMarshallers.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.web 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport 4 | import net.ndolgov.akkahttptest.service.{TestRequestA, TestRequestB, TestResponseA, TestResponseB, TestResponseC} 5 | import spray.json.{DefaultJsonProtocol, RootJsonFormat} 6 | 7 | // akka-http directives such as "as" and "complete" can transparently perform JSON (de)serialization as long as 8 | // an appropriate RootJsonFormat is implicitly available 9 | // for details, see https://doc.akka.io/docs/akka-http/current/common/json-support.html 10 | 11 | /** spray-json ServiceA message marshaller */ 12 | object ServiceAJsonMarshaller extends SprayJsonSupport with DefaultJsonProtocol { 13 | implicit val requestAMarshaller: RootJsonFormat[TestRequestA] = jsonFormat1(TestRequestA) 14 | implicit val responseAMarshaller: RootJsonFormat[TestResponseA] = jsonFormat3(TestResponseA) 15 | } 16 | 17 | /** spray-json ServiceB message marshaller */ 18 | object ServiceBJsonMarshaller extends SprayJsonSupport with DefaultJsonProtocol { 19 | implicit val requestBMarshaller: RootJsonFormat[TestRequestB] = jsonFormat1(TestRequestB) 20 | implicit val responseBMarshaller: RootJsonFormat[TestResponseB] = jsonFormat3(TestResponseB) 21 | } 22 | 23 | /** spray-json ServiceC message marshaller */ 24 | object ServiceCJsonMarshaller extends SprayJsonSupport with DefaultJsonProtocol { 25 | implicit val responseCMarshaller: RootJsonFormat[TestResponseC] = jsonFormat1(TestResponseC) 26 | } -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/main/scala/net/ndolgov/akkahttptest/web/TestServiceC.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.akkahttptest.service 2 | 3 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream} 4 | 5 | import akka.stream.{IOResult, Materializer} 6 | import akka.stream.scaladsl.{Source, StreamConverters} 7 | import akka.util.ByteString 8 | import org.slf4j.LoggerFactory 9 | 10 | import scala.concurrent.{ExecutionContext, Future, Promise} 11 | import scala.util.{Failure, Success} 12 | 13 | case class TestResponseC(key: String) 14 | 15 | /** Service API illustrating Akka stream API usage */ 16 | trait TestServiceC { 17 | def process(key: String, stream: Source[ByteString, Any]) : Future[TestResponseC] 18 | 19 | def process(key: String) : Future[Source[ByteString, Future[IOResult]]] 20 | } 21 | 22 | final class TestServiceCImpl(implicit val ec: ExecutionContext, implicit val materializer: Materializer) extends TestServiceC { 23 | private val logger = LoggerFactory.getLogger(classOf[TestServiceCImpl]) 24 | 25 | override def process(key: String, stream: Source[ByteString, Any]): Future[TestResponseC] = { 26 | val promise = Promise[TestResponseC]() 27 | 28 | Future { 29 | logger.info("Computing result") 30 | stream. 31 | runWith(StreamConverters.fromOutputStream(() => new ByteArrayOutputStream())). 32 | onComplete { 33 | case Success(_) => 34 | promise.complete(Success(TestResponseC(key))) 35 | 36 | case Failure(e) => 37 | promise.failure(new RuntimeException(s"Failed to create $key from a stream", e)) 38 | } 39 | } 40 | 41 | promise.future 42 | } 43 | 44 | override def process(key: String): Future[Source[ByteString, Future[IOResult]]] = Future { 45 | StreamConverters.fromInputStream(() => new ByteArrayInputStream(new Array(0))) 46 | } 47 | } -------------------------------------------------------------------------------- /akkahttptest/akkahttptest-web/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /akkahttptest/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.0.3 2 | -------------------------------------------------------------------------------- /antlrtest/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | net.ndolgov 9 | antlrtest 10 | 0.0.1-SNAPSHOT 11 | jar 12 | ANTLR query parser test 13 | 14 | 15 | 3.4 16 | 1.0 17 | 4.8.2 18 | 19 | 20 | 21 | 22 | org.antlr 23 | antlr-runtime 24 | ${antlr.version} 25 | 26 | 27 | 28 | com.google.collections 29 | google-collections 30 | ${google.collections.version} 31 | 32 | 33 | 34 | junit 35 | junit 36 | ${junit.version} 37 | test 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | org.antlr 46 | antlr3-maven-plugin 47 | ${antlr.version} 48 | 49 | 50 | 51 | antlr 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /antlrtest/src/main/antlr3/net/ndolgov/antlrtest/TestQuery.g: -------------------------------------------------------------------------------- 1 | grammar TestQuery; 2 | 3 | tokens { 4 | SELECT = 'SELECT' ; 5 | FROM = 'FROM' ; 6 | LONG_TYPE = 'LONG'; 7 | DOUBLE_TYPE = 'DOUBLE'; 8 | } 9 | 10 | @header { 11 | package net.ndolgov.antlrtest; 12 | 13 | import org.antlr.runtime.*; 14 | import java.io.IOException; 15 | } 16 | 17 | @members { 18 | private EmbedmentHelper helper; 19 | 20 | public EH parseWithHelper(EH helper) throws RecognitionException { 21 | this.helper = helper; 22 | query(); 23 | return helper; 24 | } 25 | 26 | public void emitErrorMessage(String msg) { 27 | throw new IllegalArgumentException("Query parser error: " + msg); 28 | } 29 | } 30 | 31 | @lexer::header { 32 | package net.ndolgov.antlrtest; 33 | } 34 | 35 | query : SELECT variable (',' variable )* 36 | FROM ID {helper.onStorage($ID.text);}; 37 | 38 | variable : type=varType id=ID {helper.onVariable(type, $id.text);}; 39 | 40 | varType returns [Type type] 41 | : LONG_TYPE {$type = Type.LONG;} 42 | | DOUBLE_TYPE {$type = Type.DOUBLE;}; 43 | 44 | ID : ('a'..'z'|'A'..'Z'|'_'|'$')+; 45 | 46 | WS : ( ' ' 47 | | '\t' 48 | | '\r' 49 | | '\n' 50 | ) {$channel=HIDDEN;}; 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /antlrtest/src/main/java/net/ndolgov/antlrtest/EmbedmentHelper.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.antlrtest; 2 | 3 | /** 4 | * Embedment Helper object (see http://martinfowler.com/dslCatalog/embedmentHelper.html) called by ANTLR-generated query parser. 5 | * 6 | */ 7 | interface EmbedmentHelper { 8 | /** 9 | * Set storage id 10 | * @param id storage id 11 | */ 12 | void onStorage(String id); 13 | 14 | /** 15 | * Add variable definition 16 | * @param type variable type 17 | * @param name variable name 18 | */ 19 | void onVariable(Type type, String name); 20 | } 21 | -------------------------------------------------------------------------------- /antlrtest/src/main/java/net/ndolgov/antlrtest/EmbedmentHelperImpl.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.antlrtest; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | final class EmbedmentHelperImpl implements EmbedmentHelper { 7 | private String storageId; 8 | 9 | private final List varTypes; 10 | 11 | private final List varNames; 12 | 13 | public EmbedmentHelperImpl() { 14 | varTypes = new ArrayList(); 15 | varNames = new ArrayList(); 16 | } 17 | 18 | @Override 19 | public final void onStorage(String storageId) { 20 | this.storageId = storageId; 21 | } 22 | 23 | @Override 24 | public void onVariable(Type type, String name) { 25 | varTypes.add(type); 26 | varNames.add(name); 27 | } 28 | 29 | /** 30 | * @return parsed query descriptor 31 | */ 32 | public final QueryDescriptor queryDescriptor() { 33 | return new QueryDescriptor(storageId, varTypes, varNames); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /antlrtest/src/main/java/net/ndolgov/antlrtest/QueryDescriptor.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.antlrtest; 2 | 3 | import java.util.List; 4 | 5 | import com.google.common.base.Preconditions; 6 | 7 | /** 8 | * Execution-time representation of a parsed query 9 | */ 10 | public final class QueryDescriptor { 11 | public final String storageId; 12 | 13 | public final VarDescriptor[] variables; 14 | 15 | public QueryDescriptor(String storageId, List types, List names) { 16 | this.storageId = storageId; 17 | 18 | Preconditions.checkArgument(types.size() == names.size()); 19 | variables = new VarDescriptor[types.size()]; 20 | for (int i = 0; i < variables.length; i++) { 21 | variables[i] = new VarDescriptor(types.get(i), names.get(i)); 22 | } 23 | } 24 | 25 | public static final class VarDescriptor { 26 | public final Type type; 27 | 28 | public final String name; 29 | 30 | public VarDescriptor(Type type, String name) { 31 | this.type = type; 32 | this.name = name; 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /antlrtest/src/main/java/net/ndolgov/antlrtest/QueryParser.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.antlrtest; 2 | 3 | import org.antlr.runtime.ANTLRStringStream; 4 | import org.antlr.runtime.CommonTokenStream; 5 | import org.antlr.runtime.RecognitionException; 6 | 7 | /** 8 | * Parse a given query and return extracted execution-time representation of the parsed query 9 | */ 10 | public final class QueryParser { 11 | /** 12 | * Parse a query expression and return the extracted request configuration 13 | * @param expr query expression 14 | * @return extracted request configuration 15 | */ 16 | public static QueryDescriptor parse(String expr) { 17 | try { 18 | final TestQueryParser parser = new TestQueryParser(new CommonTokenStream(new TestQueryLexer(new ANTLRStringStream(expr)))); 19 | final EmbedmentHelperImpl helper = parser.parseWithHelper(new EmbedmentHelperImpl()); 20 | 21 | return helper.queryDescriptor(); 22 | } catch (RecognitionException e) { 23 | throw new RuntimeException("Could not parse query: " + expr, e); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /antlrtest/src/main/java/net/ndolgov/antlrtest/Type.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.antlrtest; 2 | 3 | public enum Type { 4 | LONG, DOUBLE 5 | } 6 | -------------------------------------------------------------------------------- /antlrtest/src/test/java/net/ndolgov/antlrtest/QueryParserTest.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.antlrtest; 2 | 3 | import static org.junit.Assert.*; 4 | import org.junit.Test; 5 | 6 | public class QueryParserTest { 7 | @Test 8 | public void testValidQuery() { 9 | final QueryDescriptor descriptor = QueryParser.parse("SELECT LONG varA FROM storageB"); 10 | assertEquals("storageB", descriptor.storageId); 11 | assertEquals("varA", descriptor.variables[0].name); 12 | assertEquals(Type.LONG, descriptor.variables[0].type); 13 | } 14 | 15 | @Test 16 | public void testInvalidQuery() { 17 | try { 18 | QueryParser.parse("SELECT LONGER"); 19 | } catch (Exception e) { 20 | assertTrue(e.getMessage().startsWith("Query parser error: ")); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /avrotest/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | net.ndolgov 9 | avrotest 10 | 0.0.1-SNAPSHOT 11 | jar 12 | Avro without IDL test 13 | 14 | 15 | 1.4.1 16 | 1.0 17 | 4.8.2 18 | 19 | 20 | 21 | 22 | 23 | org.apache.avro 24 | avro 25 | ${avro.version} 26 | 27 | 28 | 29 | com.google.collections 30 | google-collections 31 | ${google.collections.version} 32 | 33 | 34 | 35 | junit 36 | junit 37 | ${junit.version} 38 | test 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /avrotest/src/main/java/net/ndolgov/avrotest/AvroRecordBuilder.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.avrotest; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.avro.Schema; 7 | import org.apache.avro.generic.GenericData; 8 | import org.apache.avro.generic.GenericRecord; 9 | import org.apache.avro.util.Utf8; 10 | 11 | import com.google.common.base.Preconditions; 12 | 13 | /** 14 | * Build a generic record of a given schema by adding field values 15 | */ 16 | public final class AvroRecordBuilder { 17 | private final Schema schema; 18 | 19 | private final List fields; 20 | 21 | public AvroRecordBuilder(Schema schema) { 22 | Preconditions.checkNotNull(schema, "Schema is required"); 23 | 24 | this.schema = schema; 25 | fields = new ArrayList(schema.getFields().size()); 26 | } 27 | 28 | public final AvroRecordBuilder field(String str) { 29 | return field(new Utf8(str)); 30 | } 31 | 32 | public final AvroRecordBuilder field(Object value) { 33 | fields.add(value); 34 | return this; 35 | } 36 | 37 | public final GenericRecord build() { 38 | Preconditions.checkArgument(fields.size() == schema.getFields().size(), "Number of record fields does not match schema"); 39 | 40 | final GenericRecord record = new GenericData.Record(schema); 41 | for (int i = 0; i < fields.size(); i++) { 42 | record.put(i, fields.get(i)); 43 | } 44 | fields.clear(); 45 | 46 | return record; 47 | } 48 | } 49 | 50 | 51 | -------------------------------------------------------------------------------- /avrotest/src/main/java/net/ndolgov/avrotest/AvroSchemaBuilder.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.avrotest; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.avro.Schema; 7 | 8 | import com.google.common.base.Preconditions; 9 | 10 | /** 11 | * Build a schema by adding strictly typed fields 12 | */ 13 | public final class AvroSchemaBuilder { 14 | private final List fields; 15 | 16 | private final int expectedNumberOfFields; 17 | 18 | private int index; 19 | 20 | public AvroSchemaBuilder(int numberOfFields) { 21 | fields = new ArrayList(numberOfFields); 22 | expectedNumberOfFields = numberOfFields; 23 | } 24 | 25 | public final AvroSchemaBuilder string(String name) { 26 | return primitive(name, Schema.Type.STRING); 27 | } 28 | 29 | public final AvroSchemaBuilder int32(String name) { 30 | return primitive(name, Schema.Type.INT); 31 | } 32 | 33 | public final AvroSchemaBuilder int64(String name) { 34 | return primitive(name, Schema.Type.LONG); 35 | } 36 | 37 | private AvroSchemaBuilder primitive(String name, Schema.Type type) { 38 | fields.add(new Schema.Field(name, Schema.create(type), null, null)); 39 | 40 | index++; 41 | Preconditions.checkArgument(expectedNumberOfFields >= index, "Attempted to add more than expected number of schema fields"); 42 | 43 | return this; 44 | } 45 | 46 | public final Schema build() { 47 | Preconditions.checkArgument(fields.size() == index, "Attempted to add fewer than expected number of schema fields"); 48 | 49 | return Schema.createRecord(fields); 50 | } 51 | 52 | public final Schema build(String schemaName) { 53 | Preconditions.checkArgument(fields.size() == index, "Attempted to add fewer than expected number of schema fields"); 54 | 55 | final Schema schema = Schema.createRecord(schemaName, null, null, false); 56 | schema.setFields(fields); 57 | return schema; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /avrotest/src/test/java/net/ndolgov/avrotest/AvroBuilderTest.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.avrotest; 2 | 3 | import org.apache.avro.Schema; 4 | import org.apache.avro.generic.GenericRecord; 5 | import org.apache.avro.util.Utf8; 6 | 7 | import static org.junit.Assert.*; 8 | import org.junit.Test; 9 | 10 | public class AvroBuilderTest { 11 | private static final String SCHEMA = "TESTSCHEMA"; 12 | private static final String COLUMN1 = "CITY"; 13 | private static final String COLUMN2 = "POPULATION"; 14 | private static final String CITY = "San Mateo"; 15 | private static final int POPULATION = 50000; 16 | 17 | @Test 18 | public void testAnonymousSchemaBuilder() throws Exception { 19 | assertSchema(new AvroSchemaBuilder(2).string(COLUMN1).int32(COLUMN2).build(), null); 20 | } 21 | 22 | @Test 23 | public void testNamedSchemaBuilder() throws Exception { 24 | assertSchema(new AvroSchemaBuilder(2).string(COLUMN1).int32(COLUMN2).build(SCHEMA), SCHEMA); 25 | } 26 | 27 | private void assertSchema(Schema schema, String name) { 28 | if (name == null) 29 | assertNull(schema.getName()); 30 | else 31 | assertEquals(name, schema.getName()); 32 | 33 | assertEquals(2, schema.getFields().size()); 34 | 35 | assertEquals(COLUMN1, schema.getFields().get(0).name()); 36 | assertEquals(Schema.Type.STRING, schema.getFields().get(0).schema().getType()); 37 | 38 | assertEquals(COLUMN2, schema.getFields().get(1).name()); 39 | assertEquals(Schema.Type.INT, schema.getFields().get(1).schema().getType()); 40 | } 41 | 42 | @Test 43 | public void testRecordBuilder() throws Exception { 44 | final Schema schema = new AvroSchemaBuilder(2).string(COLUMN1).int32(COLUMN2).build(); 45 | 46 | final GenericRecord record = new AvroRecordBuilder(schema).field(CITY).field(POPULATION).build(); 47 | assertEquals(new Utf8(CITY), record.get(COLUMN1)); 48 | assertEquals(POPULATION, record.get(COLUMN2)); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /disruptortest/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | net.ndolgov 9 | disruptortest 10 | 0.0.1-SNAPSHOT 11 | jar 12 | Disruptor test 13 | 14 | 15 | 3.3.4 16 | 19.0 17 | 1.6.4 18 | 6.8.8 19 | 20 | 21 | 22 | 23 | com.lmax 24 | disruptor 25 | ${disruptor.version} 26 | 27 | 28 | 29 | org.slf4j 30 | slf4j-api 31 | ${slf4j.version} 32 | 33 | 34 | 35 | org.slf4j 36 | slf4j-log4j12 37 | ${slf4j.version} 38 | runtime 39 | 40 | 41 | 42 | com.google.guava 43 | guava 44 | ${guava.version} 45 | 46 | 47 | 48 | org.testng 49 | testng 50 | ${testng.version} 51 | test 52 | 53 | 54 | 55 | 56 | 57 | 58 | org.apache.maven.plugins 59 | maven-compiler-plugin 60 | 2.3.2 61 | 62 | 1.8 63 | 1.8 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/DataRow.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | /** 4 | * A dummy row type 5 | */ 6 | public final class DataRow { 7 | public long someField; 8 | public double anotherField; 9 | 10 | public DataRow() { 11 | } 12 | 13 | public DataRow(long someField, double anotherField) { 14 | this.someField = someField; 15 | this.anotherField = anotherField; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/DataRowEvent.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import com.lmax.disruptor.EventFactory; 4 | 5 | /** 6 | * Ring buffer slot type 7 | */ 8 | public final class DataRowEvent { 9 | public final DataRow row; 10 | 11 | private DataRowEvent(DataRow row) { 12 | this.row = row; 13 | } 14 | 15 | public void from(DataRow buffer) { 16 | row.someField = buffer.someField; 17 | row.anotherField = buffer.anotherField; 18 | } 19 | 20 | public static final class DataRowEventFactory implements EventFactory { 21 | @Override 22 | public DataRowEvent newInstance() { 23 | return new DataRowEvent(new DataRow()); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/DataRowEventConsumer.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import com.lmax.disruptor.WorkHandler; 4 | import java.util.function.Consumer; 5 | 6 | /** 7 | * Parallel data row consumer 8 | */ 9 | public final class DataRowEventConsumer implements WorkHandler { 10 | private final Consumer consumer; 11 | private int rowCount = 0; 12 | 13 | public DataRowEventConsumer(Consumer consumer) { 14 | this.consumer = consumer; 15 | } 16 | 17 | @Override 18 | public void onEvent(DataRowEvent event) throws Exception { 19 | consumer.accept(event.row); 20 | rowCount++; 21 | } 22 | 23 | public int rowCount() { 24 | return rowCount; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/DataRowEventProducer.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import com.lmax.disruptor.RingBuffer; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.util.Iterator; 8 | import java.util.concurrent.CountDownLatch; 9 | 10 | /** 11 | * Pour data rows from input iterator into ring buffer 12 | */ 13 | public final class DataRowEventProducer implements Runnable { 14 | private static final Logger logger = LoggerFactory.getLogger(DataRowEventProducer.class); 15 | 16 | private final Iterator input; 17 | 18 | private final DataRowEventTranslator translator; 19 | 20 | private final CountDownLatch producerIsDone; 21 | 22 | public DataRowEventProducer(Iterator input, RingBuffer ringBuffer) { 23 | this.input = input; 24 | this.translator = new DataRowEventTranslator(ringBuffer); 25 | this.producerIsDone = new CountDownLatch(1); 26 | } 27 | 28 | @Override 29 | public void run() { 30 | input.forEachRemaining(translator); 31 | 32 | logger.info("Producer finished after processing rows: " + translator.rowCount()); 33 | 34 | producerIsDone.countDown(); 35 | } 36 | 37 | public CountDownLatch isDoneLatch() { 38 | return producerIsDone; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/DataRowEventTranslator.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import com.lmax.disruptor.RingBuffer; 4 | 5 | import java.util.function.Consumer; 6 | 7 | /** 8 | * Copy fields from input buffer to the next vacant ring buffer slot 9 | * todo consider implementing com.lmax.disruptor.EventTranslator 10 | */ 11 | public final class DataRowEventTranslator implements Consumer { 12 | private final RingBuffer ringBuffer; 13 | private int rowCount = 0; 14 | 15 | public DataRowEventTranslator(RingBuffer ringBuffer) { 16 | this.ringBuffer = ringBuffer; 17 | } 18 | 19 | @Override 20 | public void accept(DataRow inputBuffer) { 21 | final long seq = ringBuffer.next(); 22 | ringBuffer.get(seq).from(inputBuffer); 23 | ringBuffer.publish(seq); 24 | 25 | rowCount++; 26 | } 27 | 28 | public int rowCount() { 29 | return rowCount; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/DisruptorThreadFactory.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import java.util.concurrent.ThreadFactory; 4 | import java.util.concurrent.atomic.AtomicInteger; 5 | 6 | /** 7 | * Name and sequentially number created threads by service 8 | */ 9 | public final class DisruptorThreadFactory implements ThreadFactory { 10 | private final AtomicInteger threadNumber = new AtomicInteger(1); 11 | private final ThreadGroup group = Thread.currentThread().getThreadGroup(); 12 | private final String namePrefix; 13 | 14 | public DisruptorThreadFactory(String serviceName) { 15 | namePrefix = serviceName + "-thread-"; 16 | } 17 | 18 | @Override 19 | public Thread newThread(Runnable runnable) { 20 | return new Thread(group, runnable, namePrefix + threadNumber.getAndIncrement()); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/ParallelProcessingContext.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import com.lmax.disruptor.dsl.Disruptor; 4 | 5 | import java.util.Arrays; 6 | import java.util.concurrent.CountDownLatch; 7 | import java.util.concurrent.TimeUnit; 8 | 9 | /** 10 | * Allow to await the end of processing and clean up allocated resources afterwards. A latch allows to wait for 11 | * the producer to completely process input. A shutdown timeout allows consumers finish processing of remaining items. 12 | */ 13 | public final class ParallelProcessingContext { 14 | private final Disruptor disruptor; 15 | 16 | private final DataRowEventConsumer[] consumers; 17 | 18 | private final CountDownLatch producerIsDone; 19 | 20 | private final long processingTimeout; // is not expected to be reached unless an unexpected error happens 21 | 22 | private final long shutdownTimeout; // allows consumers to finish processing enqueued events after processor is finished 23 | 24 | public ParallelProcessingContext(Disruptor disruptor, 25 | CountDownLatch producerIsDone, 26 | DataRowEventConsumer[] consumers, 27 | long processingTimeout, 28 | long shutdownTimeout) { 29 | this.consumers = consumers; 30 | this.processingTimeout = processingTimeout; 31 | this.shutdownTimeout = shutdownTimeout; 32 | this.producerIsDone = producerIsDone; 33 | this.disruptor = disruptor; 34 | } 35 | 36 | public boolean await() { 37 | try { 38 | return producerIsDone.await(processingTimeout, TimeUnit.MILLISECONDS); 39 | } catch (Exception e) { 40 | throw new RuntimeException("Processing took too long", e); 41 | } 42 | } 43 | 44 | public void shutDown() { 45 | try { 46 | disruptor.shutdown(shutdownTimeout, TimeUnit.MILLISECONDS); 47 | } catch (Exception e) { 48 | throw new RuntimeException("Consumers took too long to shutdown", e); 49 | } 50 | } 51 | 52 | public int totalRowCount() { 53 | return Arrays.stream(consumers).map(DataRowEventConsumer::rowCount).reduce(0, (a, b) -> a + b); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/ParallelProcessor.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import com.lmax.disruptor.BlockingWaitStrategy; 4 | import com.lmax.disruptor.RingBuffer; 5 | import com.lmax.disruptor.dsl.Disruptor; 6 | import com.lmax.disruptor.dsl.ProducerType; 7 | 8 | import java.util.Iterator; 9 | import java.util.concurrent.Executor; 10 | import java.util.function.Consumer; 11 | 12 | /** 13 | * Disruptor-based single producer/multiple consumer queue 14 | */ 15 | public final class ParallelProcessor { 16 | private final Executor producerExecutor; 17 | private final int nConsumers; 18 | private final int nRingSlots; 19 | 20 | public ParallelProcessor(Executor producerExecutor, int nConsumers, int nRingSlots) { 21 | this.producerExecutor = producerExecutor; 22 | this.nConsumers = nConsumers; 23 | this.nRingSlots = nRingSlots; 24 | } 25 | 26 | public ParallelProcessingContext process(Iterator input, Consumer output, long processingTimeout, long shutdownTimeout) { 27 | final Disruptor disruptor = disruptor(nRingSlots); 28 | final DataRowEventConsumer[] consumers = consumers(nConsumers, output); 29 | disruptor.handleEventsWithWorkerPool(consumers); 30 | 31 | final RingBuffer ringBuffer = disruptor.start(); 32 | 33 | final DataRowEventProducer producer = new DataRowEventProducer(input, ringBuffer); 34 | producerExecutor.execute(producer); 35 | 36 | return new ParallelProcessingContext(disruptor, producer.isDoneLatch(), consumers, processingTimeout, shutdownTimeout); 37 | } 38 | 39 | private static DataRowEventConsumer[] consumers(int nConsumers, Consumer output) { 40 | final DataRowEventConsumer[] consumers = new DataRowEventConsumer[nConsumers]; 41 | for (int i = 0; i < nConsumers; i++) { 42 | consumers[i] = new DataRowEventConsumer(output); 43 | } 44 | return consumers; 45 | } 46 | 47 | private static Disruptor disruptor(int nRingSlots) { 48 | return new Disruptor<>( 49 | new DataRowEvent.DataRowEventFactory(), 50 | nRingSlots, 51 | new DisruptorThreadFactory("consumer"), 52 | ProducerType.SINGLE, 53 | new BlockingWaitStrategy()); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /disruptortest/src/main/java/net/ndolgov/disruptortest/ParallelRequestRunner.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import java.util.Iterator; 7 | import java.util.function.Consumer; 8 | 9 | /** 10 | * Placeholder for an actual client. The run method is supposed to be called for every request. The caller is 11 | * expected to process potential runtime exceptions. 12 | */ 13 | public final class ParallelRequestRunner { 14 | private static final Logger logger = LoggerFactory.getLogger(ParallelRequestRunner.class); 15 | 16 | private final ParallelProcessor processor; 17 | 18 | private final long processingTimeout; 19 | 20 | private final long shutdownTimeout; 21 | 22 | public ParallelRequestRunner(ParallelProcessor processor, long processingTimeout, long shutdownTimeout) { 23 | this.processor = processor; 24 | this.processingTimeout = processingTimeout; 25 | this.shutdownTimeout = shutdownTimeout; 26 | } 27 | 28 | public void run(Iterator input, Consumer output) { 29 | final ParallelProcessingContext ctx = processor.process(input, output, processingTimeout, shutdownTimeout); 30 | 31 | if (ctx.await()) { 32 | logger.info("Producer finished, waiting for consumers"); 33 | 34 | ctx.shutDown(); 35 | logger.info("Consumers finished after processing rows: " + ctx.totalRowCount()); 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /disruptortest/src/test/java/net/ndolgov/disruptortest/ParallelProcessorTest.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.disruptortest; 2 | 3 | import org.testng.annotations.Test; 4 | 5 | import java.util.concurrent.ExecutorService; 6 | import java.util.concurrent.Executors; 7 | import java.util.concurrent.atomic.AtomicInteger; 8 | import java.util.function.Consumer; 9 | import java.util.stream.IntStream; 10 | 11 | import static org.testng.Assert.assertEquals; 12 | 13 | public final class ParallelProcessorTest { 14 | @Test 15 | public void testShortBurst() { 16 | final ExecutorService producerExecutor = Executors.newSingleThreadExecutor(new DisruptorThreadFactory("producer")); 17 | final int nProcessors = Runtime.getRuntime().availableProcessors(); 18 | 19 | try { 20 | final ParallelProcessor processor = new ParallelProcessor(producerExecutor, nProcessors, 1_024); 21 | final ParallelRequestRunner runner = new ParallelRequestRunner(processor, 5_000, 5_000); 22 | 23 | final int expectedCount = 100_000; 24 | final AtomicInteger count = new AtomicInteger(0); 25 | 26 | runner.run( 27 | IntStream.rangeClosed(1, expectedCount).mapToObj(i -> new DataRow(i, i)).iterator(), 28 | new Consumer() { 29 | @Override 30 | public synchronized void accept(DataRow dataRow) { 31 | count.incrementAndGet(); 32 | } 33 | }); 34 | 35 | assertEquals(count.get(), expectedCount); 36 | } finally { 37 | producerExecutor.shutdown(); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /disruptortest/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /flatbufferstest/src/main/flatbuffers/timeseries.fbs: -------------------------------------------------------------------------------- 1 | // 2 | // A time series is a sequence of (t,v) pairs 3 | // 4 | 5 | namespace net.ndolgov.fbstest; 6 | 7 | table TimeSeries { 8 | dataPointCount : int; 9 | times : [long]; 10 | values : [double]; 11 | } 12 | 13 | root_type TimeSeries; -------------------------------------------------------------------------------- /flatbufferstest/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /grpctest/src/main/java/net/ndolgov/grpctest/plain/GrpcServer.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.grpctest.plain; 2 | 3 | import io.grpc.BindableService; 4 | import io.grpc.Server; 5 | import io.grpc.ServerBuilder; 6 | import io.grpc.netty.NettyServerBuilder; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.net.InetSocketAddress; 11 | import java.util.Collection; 12 | import java.util.concurrent.ExecutorService; 13 | 14 | /** 15 | * Create a GRPC server for given request handlers and bind it to provided "host:port". 16 | */ 17 | public final class GrpcServer { 18 | private static final Logger logger = LoggerFactory.getLogger(GrpcServer.class); 19 | private final ExecutorService executor; 20 | private final Server server; 21 | private final int port; 22 | 23 | public GrpcServer(String hostname, int port, Collection services, ExecutorService executor) { 24 | this.port = port; 25 | this.executor = executor; 26 | 27 | final ServerBuilder builder = NettyServerBuilder.forAddress(new InetSocketAddress(hostname, port)); 28 | services.forEach(builder::addService); 29 | this.server = builder.executor(executor).build(); 30 | } 31 | 32 | public void start() { 33 | try { 34 | server.start(); 35 | logger.info("Started " + this); 36 | } catch (Exception e) { 37 | throw new RuntimeException("Could not start server", e); 38 | } 39 | } 40 | 41 | public void stop() { 42 | try { 43 | logger.info("Stopping " + this); 44 | 45 | executor.shutdown(); 46 | 47 | server.shutdown(); 48 | 49 | logger.info("Stopped " + this); 50 | } catch (Exception e) { 51 | logger.warn("Interrupted while shutting down " + this); 52 | } 53 | } 54 | 55 | @Override 56 | public String toString() { 57 | return "{GrpcServer:port=" + port + "}"; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /grpctest/src/main/java/net/ndolgov/grpctest/plain/TestServiceAImpl.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.grpctest.plain; 2 | 3 | import io.grpc.stub.StreamObserver; 4 | import net.ndolgov.grpctest.api.TestServiceAProto.TestRequestA; 5 | import net.ndolgov.grpctest.api.TestServiceAProto.TestResponseA; 6 | import net.ndolgov.grpctest.api.TestServiceAGrpc.TestServiceAImplBase; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.concurrent.CompletableFuture; 11 | import java.util.concurrent.Executor; 12 | 13 | /** 14 | * Service A request handler 15 | */ 16 | public final class TestServiceAImpl extends TestServiceAImplBase { 17 | private static final Logger logger = LoggerFactory.getLogger(TestServiceAImpl.class); 18 | 19 | private final Executor executor; 20 | 21 | public TestServiceAImpl(Executor executor) { 22 | this.executor = executor; 23 | } 24 | 25 | @Override 26 | public void process(TestRequestA request, StreamObserver observer) { 27 | logger.info("Processing request: " + request); 28 | 29 | final CompletableFuture future = CompletableFuture.supplyAsync( 30 | () -> { 31 | logger.info("Computing result"); // todo this is where actual time-consuming processing would be 32 | return "RESULT"; 33 | }, 34 | executor); 35 | 36 | future.whenComplete((result, e) -> { 37 | if (e == null) { 38 | observer.onNext(response(request.getRequestId(), result)); 39 | observer.onCompleted(); 40 | } else { 41 | observer.onError(e); 42 | } 43 | }); 44 | } 45 | 46 | private static TestResponseA response(long requestId, String result) { 47 | return TestResponseA.newBuilder(). 48 | setRequestId(requestId). 49 | setSuccess(true). 50 | setResult(result). 51 | build(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /grpctest/src/main/java/net/ndolgov/grpctest/plain/TestServiceBImpl.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.grpctest.plain; 2 | 3 | import io.grpc.stub.StreamObserver; 4 | import net.ndolgov.grpctest.api.TestServiceBProto.TestRequestB; 5 | import net.ndolgov.grpctest.api.TestServiceBProto.TestResponseB; 6 | import net.ndolgov.grpctest.api.TestServiceBGrpc.TestServiceBImplBase; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.concurrent.CompletableFuture; 11 | import java.util.concurrent.Executor; 12 | 13 | /** 14 | * Service B request handler 15 | */ 16 | public final class TestServiceBImpl extends TestServiceBImplBase { 17 | private static final Logger logger = LoggerFactory.getLogger(TestServiceBImpl.class); 18 | 19 | private final Executor executor; 20 | 21 | public TestServiceBImpl(Executor executor) { 22 | this.executor = executor; 23 | } 24 | 25 | @Override 26 | public void process(TestRequestB request, StreamObserver observer) { 27 | logger.info("Processing request: " + request); 28 | 29 | final CompletableFuture future = CompletableFuture.supplyAsync( 30 | () -> { 31 | logger.info("Computing result"); // todo this is where actual time-consuming processing would be 32 | return "RESULT"; 33 | }, 34 | executor); 35 | 36 | future.whenComplete((result, e) -> { 37 | if (e == null) { 38 | observer.onNext(response(request.getRequestId(), result)); 39 | observer.onCompleted(); 40 | } else { 41 | observer.onError(e); 42 | } 43 | }); 44 | } 45 | 46 | private static TestResponseB response(long requestId, String result) { 47 | return TestResponseB.newBuilder(). 48 | setRequestId(requestId). 49 | setSuccess(true). 50 | setResult(result). 51 | build(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /grpctest/src/main/proto/testsvcA.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package grpctest; 4 | 5 | option java_multiple_files = false; 6 | option java_package = "net.ndolgov.grpctest.api"; 7 | option java_outer_classname = "TestServiceAProto"; 8 | option objc_class_prefix = "TS1P"; 9 | 10 | 11 | service TestServiceA { 12 | rpc Process (TestRequestA) returns (TestResponseA) {} 13 | } 14 | 15 | message TestRequestA { 16 | int64 requestId = 1; 17 | } 18 | 19 | message TestResponseA { 20 | bool success = 1; 21 | int64 requestId = 2; 22 | string result = 3; 23 | } -------------------------------------------------------------------------------- /grpctest/src/main/proto/testsvcB.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package grpctest; 4 | 5 | option java_multiple_files = false; 6 | option java_package = "net.ndolgov.grpctest.api"; 7 | option java_outer_classname = "TestServiceBProto"; 8 | option objc_class_prefix = "TS2P"; 9 | 10 | 11 | service TestServiceB { 12 | rpc Process (TestRequestB) returns (TestResponseB) {} 13 | } 14 | 15 | message TestRequestB { 16 | int64 requestId = 1; 17 | } 18 | 19 | message TestResponseB { 20 | bool success = 1; 21 | int64 requestId = 2; 22 | string result = 3; 23 | } -------------------------------------------------------------------------------- /grpctest/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/docvalues/DocValuesQuery.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.docvalues; 2 | 3 | import org.apache.lucene.index.LeafReader; 4 | import org.apache.lucene.index.LeafReaderContext; 5 | import org.apache.lucene.index.NumericDocValues; 6 | import org.apache.lucene.queries.CustomScoreProvider; 7 | import org.apache.lucene.queries.CustomScoreQuery; 8 | import org.apache.lucene.search.Query; 9 | 10 | import java.io.IOException; 11 | 12 | /** 13 | * Find and remember NumericDocValues for the required fields 14 | */ 15 | public final class DocValuesQuery extends CustomScoreQuery { 16 | private final Processor processor; 17 | 18 | /** 19 | * @param subQuery the actual query to run 20 | * @param processor retrieved field value processor 21 | */ 22 | public DocValuesQuery(Query subQuery, Processor processor) { 23 | super(subQuery); 24 | this.processor = processor; 25 | } 26 | 27 | @Override 28 | protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) throws IOException { 29 | final LeafReader reader = context.reader(); 30 | 31 | final NumericDocValues longs = reader.getNumericDocValues("SomeLongFieldName"); 32 | final NumericDocValues doubles = reader.getNumericDocValues("SomeDoubleFieldName"); 33 | 34 | return new Provider(context, processor, longs, doubles); 35 | } 36 | 37 | public Processor processor() { 38 | return processor; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/docvalues/Processor.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.docvalues; 2 | 3 | /** 4 | * Collect field values from one Lucene document at a time. 5 | */ 6 | public interface Processor { 7 | void setSomeField(long value); 8 | 9 | void setAnotherField(double value); 10 | 11 | /** Process field values collected for the current Lucene document */ 12 | void onDocument(); 13 | } 14 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/docvalues/Provider.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.docvalues; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.index.NumericDocValues; 5 | import org.apache.lucene.queries.CustomScoreProvider; 6 | 7 | /** 8 | * Retrieve values from DocValues fields 9 | */ 10 | public final class Provider extends CustomScoreProvider { 11 | private static final float DEFAULT_SCORE = 0; 12 | 13 | private final Processor processor; 14 | private final NumericDocValues someLongs; 15 | private final NumericDocValues someDoubles; 16 | 17 | public Provider(LeafReaderContext context, Processor processor, NumericDocValues someLongs, NumericDocValues someDoubles) { 18 | super(context); 19 | this.processor = processor; 20 | this.someLongs = someLongs; 21 | this.someDoubles = someDoubles; 22 | } 23 | 24 | @Override 25 | public float customScore(int docId, float subQueryScore, float valSrcScore) { 26 | processor.setSomeField(someLongs.get(docId)); 27 | processor.setAnotherField(Double.longBitsToDouble(someDoubles.get(docId))); 28 | 29 | processor.onDocument(); 30 | 31 | return DEFAULT_SCORE; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/docvalues/Searcher.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.docvalues; 2 | 3 | import org.apache.lucene.search.IndexSearcher; 4 | 5 | /** 6 | * Execute custom query, ignore returned TopDocs 7 | */ 8 | public final class Searcher { 9 | private static final int NONE = 1; // the minimum allowed number 10 | 11 | public static Processor search(DocValuesQuery query, IndexSearcher searcher) { 12 | try { 13 | searcher.search(query, NONE); 14 | return query.processor(); 15 | } catch (Exception e) { 16 | throw new RuntimeException("Failed to process matching Lucene documents", e); 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/scoring/Processor.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.scoring; 2 | 3 | import org.apache.lucene.index.StoredFieldVisitor; 4 | 5 | /** 6 | * Extract data points from Lucene document field values. The visitor approach allows to reduce new object churn. 7 | */ 8 | public interface Processor { 9 | /** @return Lucene field visitor to apply to all matching documents */ 10 | StoredFieldVisitor visitor(); 11 | 12 | /** Process Lucene document fields gathered by the {@link #visitor visitor} */ 13 | void onDocument(); 14 | } 15 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/scoring/Provider.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.scoring; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.index.StoredFieldVisitor; 5 | import org.apache.lucene.queries.CustomScoreProvider; 6 | 7 | import java.io.IOException; 8 | 9 | /** 10 | * Process a matching query document without the need to return it in {@link org.apache.lucene.search.IndexSearcher#search search results} 11 | */ 12 | public final class Provider extends CustomScoreProvider { 13 | private final Processor processor; 14 | 15 | private final StoredFieldVisitor visitor; 16 | 17 | public Provider(LeafReaderContext context, Processor processor) { 18 | super(context); 19 | this.processor = processor; 20 | this.visitor = processor.visitor(); 21 | } 22 | 23 | @Override 24 | public float customScore(int docId, float subQueryScore, float valSrcScore) throws IOException { 25 | context.reader().document(docId, visitor); 26 | processor.onDocument(); 27 | 28 | return super.customScore(docId, subQueryScore, valSrcScore); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/scoring/ScoringQuery.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.scoring; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.queries.CustomScoreProvider; 5 | import org.apache.lucene.queries.CustomScoreQuery; 6 | import org.apache.lucene.search.Query; 7 | 8 | /** 9 | * Inspired by http://opensourceconnections.com/blog/2014/03/12/using-customscorequery-for-custom-solrlucene-scoring/ 10 | */ 11 | public final class ScoringQuery extends CustomScoreQuery { 12 | private final Processor processor; 13 | 14 | public ScoringQuery(Query subQuery, Processor processor) { 15 | super(subQuery); 16 | this.processor = processor; 17 | } 18 | 19 | @Override 20 | protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) { 21 | return new Provider(context, processor); 22 | } 23 | 24 | public Processor processor() { 25 | return processor; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/scoring/Searcher.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.scoring; 2 | 3 | import org.apache.lucene.search.IndexSearcher; 4 | 5 | /** 6 | * Execute custom query, ignore returned TopDocs 7 | */ 8 | public final class Searcher { 9 | private static final int NONE = 1; // the minimum allowed number 10 | 11 | public static Processor search(ScoringQuery query, IndexSearcher searcher) { 12 | try { 13 | searcher.search(query, NONE); 14 | return query.processor(); 15 | } catch (Exception e) { 16 | throw new RuntimeException("Failed to process matching Lucene documents", e); 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/scoring/Visitor.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.scoring; 2 | 3 | import org.apache.lucene.index.FieldInfo; 4 | import org.apache.lucene.index.StoredFieldVisitor; 5 | 6 | import java.util.HashSet; 7 | import java.util.Set; 8 | 9 | /** Extract a fixed set of values */ 10 | public final class Visitor extends StoredFieldVisitor { 11 | private static final String FIELD_1 = "FIELD1"; 12 | 13 | private final Set needed; 14 | 15 | public long value; 16 | 17 | public Visitor() { 18 | needed = new HashSet(); 19 | needed.add(FIELD_1); 20 | } 21 | 22 | @Override 23 | public void longField(FieldInfo fieldInfo, long value) { 24 | if (fieldInfo.name.equals(FIELD_1)) { 25 | this.value = value; 26 | } 27 | } 28 | 29 | @Override 30 | public Status needsField(FieldInfo fieldInfo) { 31 | return needed.contains(fieldInfo.name) ? Status.YES : Status.NO; 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/search/LongFieldScorer.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.search; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.index.PostingsEnum; 5 | import org.apache.lucene.index.TermsEnum; 6 | import org.apache.lucene.search.DocIdSetIterator; 7 | import org.apache.lucene.util.BytesRef; 8 | import org.apache.lucene.util.BytesRefBuilder; 9 | import org.apache.lucene.util.NumericUtils; 10 | 11 | import java.io.IOException; 12 | 13 | /** 14 | * {@link org.apache.lucene.search.TermQuery.TermWeight#scorer}-like scorer of numeric types of long type. 15 | * It returns an iterator of documents with the given value set for the given field. 16 | */ 17 | public class LongFieldScorer implements QueryScorer { 18 | private final String field; 19 | 20 | private final BytesRef fieldValueByteRef; 21 | 22 | private PostingsEnum postingsEnum; // to be reused 23 | 24 | private DocIdSetIterator docIdSetIterator; 25 | 26 | public LongFieldScorer(String field, long value) { 27 | this.field = field; 28 | this.fieldValueByteRef = asByteRef(value); 29 | } 30 | 31 | @Override 32 | public void reset(LeafReaderContext leafReaderContext) throws IOException { 33 | docIdSetIterator = null; 34 | 35 | final TermsEnum termsEnum = leafReaderContext.reader().fields().terms(field).iterator(); 36 | if (termsEnum == null) { 37 | return; 38 | } 39 | 40 | for (BytesRef term = termsEnum.term(); term != null; termsEnum.next()) { 41 | if (term.compareTo(fieldValueByteRef) == 0) { 42 | docIdSetIterator = postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); 43 | return; 44 | } 45 | } 46 | } 47 | 48 | @Override 49 | public DocIdSetIterator iterator() { 50 | return docIdSetIterator; 51 | } 52 | 53 | private static BytesRef asByteRef(long value) { 54 | final BytesRefBuilder refBuilder = new BytesRefBuilder(); 55 | refBuilder.grow(8); 56 | refBuilder.clear(); 57 | NumericUtils.longToPrefixCoded(value, 0, refBuilder); 58 | return refBuilder.get(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/search/Processor.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.search; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | 5 | import java.io.IOException; 6 | 7 | /** 8 | * Collect actual field values from matching Lucene documents 9 | */ 10 | public interface Processor { 11 | /** 12 | * Retrieve values of required fields from the document with given id 13 | * @param docId 14 | */ 15 | void process(int docId); 16 | 17 | /** 18 | * Prepare to process another index segment 19 | * @param readerCtx 20 | * @throws IOException 21 | */ 22 | void reset(LeafReaderContext readerCtx) throws IOException; 23 | } 24 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/search/QueryBuilder.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.search; 2 | 3 | /** 4 | * DIY Lucene query compiler. 5 | * @param Query-like query type in your DSL 6 | */ 7 | public interface QueryBuilder { 8 | /** 9 | * @return scorer corresponding to a given query 10 | */ 11 | QueryScorer build(T query); 12 | } 13 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/search/QueryScorer.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.search; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.search.DocIdSetIterator; 5 | 6 | import java.io.IOException; 7 | 8 | /** 9 | * DIY Lucene scorer that, in contrast to original Lucene one, can be reused for multiple leaf context to avoid 10 | * excessive memory allocations. 11 | */ 12 | public interface QueryScorer { 13 | /** 14 | * @return matching documents from the current index segment 15 | */ 16 | DocIdSetIterator iterator(); 17 | 18 | /** 19 | * Prepare to traverse another index segment 20 | */ 21 | void reset(LeafReaderContext leafReaderContext) throws IOException; 22 | } 23 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/search/SearchQuery.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.search; 2 | 3 | /** 4 | * The simplest possible TermQuery-like example of an abstraction playing the role of a "Lucene Query". 5 | * 6 | * In real life it would be a much richer proprietary data structure that can represent filters and similar 7 | * abstractions of your query DSL. 8 | */ 9 | public final class SearchQuery { 10 | public final String fieldName; 11 | public final long value; 12 | 13 | public SearchQuery(String fieldName, long value) { 14 | this.fieldName = fieldName; 15 | this.value = value; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /lucenetest/src/main/java/net/ndolgov/lucenetest/search/Searcher.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.search; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.search.DocIdSetIterator; 5 | import org.apache.lucene.search.IndexSearcher; 6 | 7 | /** 8 | * DIY IndexSearcher that, in contrast to the original Lucene one, can reset Scorer/Processor to reuse them for 9 | * multiple leaf contexts without excessive memory allocations. 10 | * @param Query-like query type in your DSL 11 | */ 12 | public final class Searcher { 13 | private final QueryBuilder queryBuilder; 14 | private final IndexSearcher indexSearcher; 15 | 16 | public Searcher(QueryBuilder queryBuilder, IndexSearcher indexSearcher) { 17 | this.queryBuilder = queryBuilder; 18 | this.indexSearcher = indexSearcher; 19 | } 20 | 21 | /** 22 | * For every index segment in the given index, apply the processor to the documents returns by the scorer. 23 | * @param query proprietary DSL query to execute 24 | * @param processor field value collector 25 | * @return processor 26 | */ 27 | public

P search(T query, P processor) { 28 | final QueryScorer scorer = queryBuilder.build(query); 29 | 30 | try { 31 | for (LeafReaderContext leafCtx : indexSearcher.getIndexReader().leaves()) { 32 | scorer.reset(leafCtx); 33 | processor.reset(leafCtx); 34 | 35 | final DocIdSetIterator docIdIter = scorer.iterator(); 36 | if (docIdIter != null) { 37 | int docId = docIdIter.nextDoc(); 38 | while (docId != DocIdSetIterator.NO_MORE_DOCS) { 39 | processor.process(docId); 40 | docId = docIdIter.nextDoc(); 41 | } 42 | } 43 | } 44 | 45 | return processor; 46 | } catch (Exception e) { 47 | throw new RuntimeException("Failed to execute query: " + query, e); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /lucenetest/src/test/java/net/ndolgov/lucenetest/search/LuceneFields.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.lucenetest.search; 2 | 3 | import org.apache.lucene.document.DoubleDocValuesField; 4 | import org.apache.lucene.document.FieldType; 5 | import org.apache.lucene.document.LongField; 6 | import org.apache.lucene.document.NumericDocValuesField; 7 | import org.apache.lucene.index.IndexOptions; 8 | 9 | /** 10 | * Test schema fields 11 | */ 12 | public final class LuceneFields { 13 | public static final String INDEXED_FIELD_NAME = "TestIndexed"; 14 | public static final String LONG_FIELD_NAME = "TestLong"; 15 | public static final String DOUBLE_FIELD_NAME = "TestDouble"; 16 | 17 | /** The only indexed field to search by */ 18 | public static final LongField indexedField = new LongField(INDEXED_FIELD_NAME, -1, indexedLong()); 19 | 20 | public static final NumericDocValuesField longValueField = new NumericDocValuesField(LONG_FIELD_NAME, -1); 21 | 22 | public static final NumericDocValuesField doubleValueField = new DoubleDocValuesField(DOUBLE_FIELD_NAME, -1); 23 | 24 | 25 | public static FieldType indexedLong() { 26 | final FieldType type = new FieldType(); 27 | type.setTokenized(false); 28 | type.setOmitNorms(true); 29 | type.setStored(true); 30 | type.setIndexOptions(IndexOptions.DOCS); 31 | type.setNumericType(FieldType.NumericType.LONG); 32 | type.freeze(); 33 | return type; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/ColumnHeader.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | public interface ColumnHeader { 4 | /** @return the field name used for this column */ 5 | String name(); 6 | 7 | /** @return the type of this column */ 8 | ColumnType type(); 9 | 10 | enum ColumnType { 11 | LONG, DOUBLE; 12 | } 13 | } -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/GenericParquetReader.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | import org.apache.hadoop.fs.Path; 4 | import org.apache.log4j.Logger; 5 | import org.apache.log4j.LogManager; 6 | import org.apache.parquet.filter2.compat.FilterCompat; 7 | import org.apache.parquet.hadoop.ParquetReader; 8 | import org.apache.parquet.hadoop.api.ReadSupport; 9 | 10 | /** 11 | * 12 | */ 13 | public final class GenericParquetReader { 14 | private static final Logger logger = LogManager.getLogger(GenericParquetReader.class); 15 | private final ParquetReader reader; 16 | private final String path; 17 | 18 | public GenericParquetReader(ReadSupport support, String path) { 19 | this.path = path; 20 | 21 | try { 22 | reader = ParquetReader.builder(support, new Path(path)).build(); 23 | } catch (Exception e) { 24 | throw new RuntimeException("Failed to open Parquet file: " + path, e); 25 | } 26 | } 27 | 28 | public GenericParquetReader(ReadSupport support, String path, FilterCompat.Filter filter) { 29 | this.path = path; 30 | 31 | try { 32 | reader = ParquetReader.builder(support, new Path(path)).withFilter(filter).build(); 33 | } catch (Exception e) { 34 | throw new RuntimeException("Failed to open Parquet file: " + path, e); 35 | } 36 | } 37 | 38 | public T read() { 39 | try { 40 | return reader.read(); 41 | } catch (Exception e) { 42 | throw new RuntimeException("Failed to read next record from Parquet file: " + path, e); 43 | } 44 | } 45 | 46 | public void close() { 47 | try { 48 | reader.close(); 49 | } catch (Exception e) { 50 | logger.warn("Failed to close Parquet file: " + path + " because of: " + e.getMessage()); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/LongColumnHeader.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | public final class LongColumnHeader implements ColumnHeader { 4 | private final String name; 5 | 6 | public LongColumnHeader(String name) { 7 | this.name = name; 8 | } 9 | 10 | @Override 11 | public String name() { 12 | return name; 13 | } 14 | 15 | @Override 16 | public ColumnType type() { 17 | return ColumnType.LONG; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/ParquetLoggerOverride.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | import java.io.InputStream; 4 | import java.util.logging.FileHandler; 5 | import java.util.logging.Handler; 6 | import java.util.logging.Level; 7 | import java.util.logging.LogManager; 8 | import java.util.logging.Logger; 9 | 10 | /** 11 | * See https://issues.apache.org/jira/browse/SPARK-4412 and 12 | * http://stackoverflow.com/questions/805701/load-java-util-logging-config-file-for-default-initialization 13 | */ 14 | public class ParquetLoggerOverride { 15 | public static void fixParquetJUL() { 16 | try (final InputStream inputStream = ParquetLoggerOverride.class.getResourceAsStream("/logging.properties")) { 17 | LogManager.getLogManager().readConfiguration(inputStream); 18 | 19 | // trigger static initialization 20 | Class.forName(org.apache.parquet.Log.class.getName()); 21 | 22 | // make sure it will NOT write to console 23 | final Logger parquetLog = Logger.getLogger(org.apache.parquet.Log.class.getPackage().getName()); 24 | for (Handler h : parquetLog.getHandlers()) { 25 | parquetLog.removeHandler(h); 26 | } 27 | parquetLog.setUseParentHandlers(true); 28 | parquetLog.setLevel(Level.INFO); 29 | 30 | // redirect to file 31 | final FileHandler toFile = new FileHandler(); 32 | parquetLog.addHandler(toFile); 33 | } catch (final Exception e) { 34 | throw new IllegalArgumentException("Could not load default logging.properties file"); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/Record.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | /** 4 | * Record format: | rowId:long | metricId:long | time:long | value:long | 5 | */ 6 | public final class Record { 7 | public static final long NULL = -1; 8 | 9 | public long id; 10 | 11 | public long metric; 12 | 13 | public long time; 14 | 15 | public long value; 16 | 17 | public Record(long id, long metric, long time, long value) { 18 | this.id = id; 19 | this.metric = metric; 20 | this.time = time; 21 | this.value = value; 22 | } 23 | 24 | public long getLong(int index) { 25 | switch (index) { 26 | case 0 : return id; 27 | case 1 : return metric; 28 | case 2 : return time; 29 | case 3 : return value; 30 | default: throw new IllegalArgumentException("Unexpected column index: " + index); 31 | } 32 | } 33 | 34 | public void setLong(int index, long newValue) { 35 | switch (index) { 36 | case 0 : id = newValue; break; 37 | case 1 : metric = newValue; break; 38 | case 2 : time = newValue; break; 39 | case 3 : value = newValue; break; 40 | default: throw new IllegalArgumentException("Unexpected column index: " + index); 41 | } 42 | } 43 | 44 | public boolean isNull(int index) { 45 | return getLong(index) == Record.NULL; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/RecordFields.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | /** 4 | * Supported record fields 5 | */ 6 | public enum RecordFields { 7 | ROW_ID { 8 | @Override 9 | public String columnName() { 10 | return "ROWID"; 11 | } 12 | 13 | @Override 14 | public int index() { 15 | return 0; 16 | } 17 | }, 18 | METRIC { 19 | @Override 20 | public String columnName() { 21 | return "METRIC"; 22 | } 23 | 24 | @Override 25 | public int index() { 26 | return 1; 27 | } 28 | }, 29 | TIME { 30 | @Override 31 | public String columnName() { 32 | return "TIME"; 33 | } 34 | 35 | @Override 36 | public int index() { 37 | return 2; 38 | } 39 | }, 40 | VALUE { 41 | @Override 42 | public String columnName() { 43 | return "VALUE"; 44 | } 45 | 46 | @Override 47 | public int index() { 48 | return 3; 49 | } 50 | }; 51 | 52 | public abstract String columnName(); 53 | 54 | public abstract int index(); 55 | } 56 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/RecordFileUtil.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | import org.apache.hadoop.fs.Path; 4 | 5 | import java.io.IOException; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import static com.google.common.collect.Lists.newArrayList; 10 | 11 | /** 12 | * Parquet file operations 13 | */ 14 | public final class RecordFileUtil { 15 | public static void createParquetFile(String path, List rows, Map metadata) throws IOException { 16 | final RecordParquetWriter writer = new RecordParquetWriter( 17 | new Path(path), 18 | newArrayList( 19 | new LongColumnHeader(RecordFields.ROW_ID.columnName()), 20 | new LongColumnHeader(RecordFields.METRIC.columnName()), 21 | new LongColumnHeader(RecordFields.TIME.columnName()), 22 | new LongColumnHeader(RecordFields.VALUE.columnName())), 23 | metadata); 24 | 25 | try { 26 | rows.forEach(row -> { 27 | try { 28 | writer.write(row); 29 | } catch (IOException e) { 30 | throw new IllegalArgumentException(e); 31 | } 32 | }); 33 | } finally { 34 | writer.close(); 35 | } 36 | 37 | } 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/RecordParquetWriter.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | import org.apache.hadoop.fs.Path; 4 | import org.apache.parquet.hadoop.ParquetWriter; 5 | import org.apache.parquet.hadoop.metadata.CompressionCodecName; 6 | 7 | import java.io.IOException; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | /** 12 | * Write a dataset of {@link Record} instances to a Parquet file. 13 | */ 14 | public final class RecordParquetWriter extends ParquetWriter { 15 | private static final int DEFAULT_PAGE_SIZE = 512 * 1024; // 500K 16 | private static final int DEFAULT_BLOCK_SIZE = 128 * 1024 * 1024; // 128M 17 | 18 | public RecordParquetWriter(Path file, List headers, CompressionCodecName compressionCodecName, int blockSize, int pageSize, Map metadata) throws IOException { 19 | super(file, new RecordWriteSupport(headers, metadata), compressionCodecName, blockSize, pageSize); 20 | } 21 | 22 | /** 23 | * Create a new {@link Record} writer. Default compression is no compression. 24 | * 25 | * @param path the path name to write to (e.g. "file:///var/tmp/file.par") 26 | * @param headers column headers that represent the data schema 27 | * @param metadata custom metadata to attach to the newly created file 28 | * @throws IOException 29 | */ 30 | public RecordParquetWriter(Path path, List headers, Map metadata) throws IOException { 31 | this(path, headers, CompressionCodecName.UNCOMPRESSED, DEFAULT_BLOCK_SIZE, DEFAULT_PAGE_SIZE, metadata); 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/RecordWriteSupport.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.parquet.hadoop.api.WriteSupport; 5 | import org.apache.parquet.io.api.RecordConsumer; 6 | 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | public final class RecordWriteSupport extends WriteSupport { 11 | static final String ROW_COUNT = "ROW_COUNT"; 12 | 13 | private final List headers; 14 | 15 | private final Map metadata; 16 | 17 | private RecordConsumer recordConsumer; 18 | 19 | private WriterFactory.Writer writer; 20 | 21 | private long rowCount; 22 | 23 | public RecordWriteSupport(List headers, Map metadata) { 24 | this.headers = headers; 25 | this.metadata = metadata; 26 | } 27 | 28 | @Override 29 | public WriteContext init(Configuration configuration) { 30 | return new WriteContext(ToParquet.from(headers), metadata); 31 | } 32 | 33 | @Override 34 | public void prepareForWrite(RecordConsumer consumer) { 35 | this.recordConsumer = consumer; 36 | this.writer = WriterFactory.create(headers, consumer); 37 | } 38 | 39 | @Override 40 | public void write(Record record) { 41 | recordConsumer.startMessage(); 42 | 43 | try { 44 | writer.write(record); 45 | } catch (RuntimeException e) { 46 | throw new RuntimeException("Could not write record: " + record, e); 47 | } 48 | 49 | recordConsumer.endMessage(); 50 | rowCount++; 51 | } 52 | 53 | @Override 54 | public FinalizedWriteContext finalizeWrite() { 55 | metadata.put(ROW_COUNT, String.valueOf(rowCount)); 56 | return new FinalizedWriteContext(metadata); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/ToParquet.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | import org.apache.parquet.schema.MessageType; 4 | import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; 5 | import org.apache.parquet.schema.Type; 6 | import org.apache.parquet.schema.Types; 7 | import org.apache.parquet.schema.Types.Builder; 8 | import org.apache.parquet.schema.Types.GroupBuilder; 9 | 10 | import java.util.List; 11 | 12 | public final class ToParquet { 13 | public static final String SCHEMA_NAME = "NET.NDOLGOV.PARQUETTEST"; 14 | 15 | /** 16 | * @param headers column headers 17 | * @return create a Parquet schema from a sequence of types columns 18 | */ 19 | public static MessageType from(List headers) { 20 | return from(Types.buildMessage(), headers).named(SCHEMA_NAME); 21 | } 22 | 23 | private static GroupBuilder from(GroupBuilder groupBuilder, List headers) { 24 | GroupBuilder builder = groupBuilder; 25 | 26 | for (ColumnHeader header : headers) { 27 | builder = addField(header, builder).named(header.name()); // no ids because headers are created sequentially 28 | } 29 | 30 | return builder; 31 | } 32 | 33 | private static Builder>, GroupBuilder> addField(ColumnHeader header, GroupBuilder builder) { 34 | switch (header.type()) { 35 | case LONG: 36 | return builder.primitive(PrimitiveTypeName.INT64, Type.Repetition.REQUIRED); // fully qualified tuples only 37 | 38 | case DOUBLE: 39 | return builder.primitive(PrimitiveTypeName.DOUBLE, Type.Repetition.OPTIONAL); // not all metrics are expected in every row 40 | 41 | default: 42 | throw new IllegalArgumentException("Unexpected header type: " + header.type()); 43 | } 44 | } 45 | 46 | private ToParquet() { 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /parquettest/src/main/java/net/ndolgov/parquettest/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Example of writing a simple record type to a Parquet file without Avro/PBs
3 | * 4 | * Heavily inspired by https://github.com/apache/parquet-mr/tree/master/parquet-protobuf 5 | */ 6 | package net.ndolgov.parquettest; -------------------------------------------------------------------------------- /parquettest/src/test/java/net/ndolgov/parquettest/FilterByValue.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.parquettest; 2 | 3 | import org.apache.parquet.filter2.predicate.UserDefinedPredicate; 4 | 5 | import java.io.Serializable; 6 | 7 | /** 8 | * Accept only rows with matching value 9 | */ 10 | final class FilterByValue extends UserDefinedPredicate implements Serializable { 11 | private final long value; 12 | 13 | public FilterByValue(long value) { 14 | this.value = value; 15 | } 16 | 17 | @Override 18 | public boolean keep(Long value) { 19 | return value == this.value; 20 | } 21 | 22 | @Override 23 | public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics statistics) { 24 | return (value < statistics.getMin()) || (statistics.getMax() < value); 25 | } 26 | 27 | @Override 28 | public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics statistics) { 29 | return !canDrop(statistics); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /parquettest/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | ] > 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /parquettest/src/test/resources/logging.properties: -------------------------------------------------------------------------------- 1 | # Logging 2 | handlers = java.util.logging.FileHandler 3 | 4 | # File Logging 5 | java.util.logging.FileHandler.pattern = target/parquet.log 6 | java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter 7 | java.util.logging.FileHandler.level = FINE -------------------------------------------------------------------------------- /querydsl/pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 4.0.0 6 | 7 | net.ndolgov.querydsl 8 | querydsl 9 | 1.0.0-SNAPSHOT 10 | pom 11 | Query DSL parser examples 12 | 13 | 14 | querydsl-antlr 15 | querydsl-dsl 16 | querydsl-fastparse 17 | querydsl-parboiled 18 | querydsl-parquet 19 | 20 | 21 | 22 | 4.6 23 | 1.1.7 24 | 1.7.0 25 | 1.6.4 26 | 6.8.8 27 | 2.3.2 28 | UTF-8 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /querydsl/querydsl-antlr/src/main/antlr4/imports/Common.g4: -------------------------------------------------------------------------------- 1 | lexer grammar Common; 2 | 3 | FILEPATH : ('/' | LETTER) (LETTER | DIGIT | '-' | '_' | '/' | '.')+ ; 4 | 5 | //QUOTED_FILEPATH : QUOTE FILEPATH QUOTE; 6 | 7 | QUOTED_ID : QUOTE ID QUOTE; 8 | 9 | ID : LETTER (LETTER | '_' | DIGIT)*; 10 | 11 | INT : DIGIT+ ; 12 | 13 | fragment DIGIT : '0'..'9'; 14 | 15 | fragment LETTER : ('a'..'z' | 'A'..'Z') ; 16 | 17 | WS : (' ' | '\r' | '\t' | '\n')+ -> skip ; 18 | 19 | QUOTE : '\''; -------------------------------------------------------------------------------- /querydsl/querydsl-antlr/src/main/antlr4/net/ndolgov/querydsl/antlr/action/QueryDsl.g4: -------------------------------------------------------------------------------- 1 | grammar QueryDsl; 2 | 3 | import Common; 4 | 5 | @parser::header { 6 | import net.ndolgov.querydsl.ast.expression.PredicateExpr; 7 | } 8 | 9 | @parser::members { 10 | private AstBuilder builder; 11 | 12 | public T parseWithAstBuilder(T builder) throws RecognitionException { 13 | this.builder = builder; 14 | query(); 15 | return builder; 16 | } 17 | } 18 | 19 | query : selectExpr fromExpr whereExpr; 20 | 21 | selectExpr : 'select' ('*' | metricExpr); 22 | 23 | metricExpr : INT {builder.onMetricId($INT.text);} (',' INT)* {builder.onMetricId($INT.text);} ; 24 | 25 | fromExpr : 'from' QUOTE FILEPATH QUOTE {builder.onFromFilePath($FILEPATH.text);} ; 26 | 27 | whereExpr : 'where' pred = conditionExpr {builder.onPredicate($pred.cond);} ; 28 | 29 | conditionExpr returns [PredicateExpr cond] : 30 | pred = longEqExpr { $cond = $pred.cond; } | 31 | left = nestedCondition '&&' right = nestedCondition {$cond = builder.onAnd($left.cond, $right.cond);} | 32 | left = nestedCondition '||' right = nestedCondition {$cond = builder.onOr($left.cond, $right.cond);} ; 33 | 34 | nestedCondition returns [PredicateExpr cond] : '(' pred = conditionExpr ')' {$cond = $pred.cond;} ; 35 | 36 | longEqExpr returns [PredicateExpr cond] : QUOTED_ID '=' INT {$cond = builder.onAttrEqLong($QUOTED_ID.text, $INT.text);} ; 37 | -------------------------------------------------------------------------------- /querydsl/querydsl-antlr/src/main/antlr4/net/ndolgov/querydsl/antlr/listener/ParquetDsl.g4: -------------------------------------------------------------------------------- 1 | grammar ParquetDsl; 2 | 3 | import Common; 4 | 5 | query : selectExpr fromExpr whereExpr; 6 | 7 | selectExpr : 'select' ('*' | metricExpr); 8 | 9 | metricExpr : INT (',' INT)*; 10 | 11 | fromExpr : 'from' QUOTE FILEPATH QUOTE; 12 | 13 | whereExpr : 'where' conditionExpr; 14 | 15 | conditionExpr : 16 | longEqExpr # LongEq | 17 | nestedCondition '&&' nestedCondition # And | 18 | nestedCondition '||' nestedCondition # Or ; 19 | 20 | nestedCondition : '(' conditionExpr ')'; 21 | 22 | longEqExpr : QUOTED_ID '=' INT; -------------------------------------------------------------------------------- /querydsl/querydsl-antlr/src/main/java/net/ndolgov/querydsl/antlr/action/AntlrActionDslParser.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.antlr.action; 2 | 3 | import net.ndolgov.querydsl.ast.DslQuery; 4 | import net.ndolgov.querydsl.parser.DslParser; 5 | import org.antlr.v4.runtime.ANTLRInputStream; 6 | import org.antlr.v4.runtime.CommonTokenStream; 7 | 8 | /** 9 | * ANTLR4-based DSL parser that uses grammar actions directly (instead of being a parse tree walker listener) 10 | */ 11 | public final class AntlrActionDslParser implements DslParser { 12 | @Override 13 | public DslQuery parse(String query) { 14 | try { 15 | return parser(query).parseWithAstBuilder(new AstBuilderImpl()).buildAst(); 16 | } catch (Exception e) { 17 | throw new IllegalArgumentException("Failed to parse: " + query, e); 18 | } 19 | } 20 | 21 | private static QueryDslParser parser(String query) { 22 | return new QueryDslParser( 23 | new CommonTokenStream( 24 | new QueryDslLexer( 25 | new ANTLRInputStream(query)))); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /querydsl/querydsl-antlr/src/main/java/net/ndolgov/querydsl/antlr/action/AstBuilder.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.antlr.action; 2 | 3 | import net.ndolgov.querydsl.ast.DslQuery; 4 | import net.ndolgov.querydsl.ast.expression.PredicateExpr; 5 | 6 | /** 7 | * Query AST builder that is notified by grammar actions about found elements. 8 | */ 9 | interface AstBuilder { 10 | void onMetricId(String longAsStr); 11 | 12 | void onFromFilePath(String path); 13 | 14 | PredicateExpr onAttrEqLong(String quotedAttrname, String longAsStr); 15 | 16 | PredicateExpr onAnd(PredicateExpr left, PredicateExpr right); 17 | 18 | PredicateExpr onOr(PredicateExpr left, PredicateExpr right); 19 | 20 | void onPredicate(PredicateExpr predicate); 21 | 22 | /** 23 | * @return the root of the AST 24 | */ 25 | DslQuery buildAst(); 26 | } 27 | -------------------------------------------------------------------------------- /querydsl/querydsl-antlr/src/main/java/net/ndolgov/querydsl/antlr/listener/AntlrListenerDslParser.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.antlr.listener; 2 | 3 | import net.ndolgov.querydsl.parser.DslParser; 4 | import net.ndolgov.querydsl.ast.DslQuery; 5 | import org.antlr.v4.runtime.ANTLRInputStream; 6 | import org.antlr.v4.runtime.CommonTokenStream; 7 | import org.antlr.v4.runtime.tree.ParseTreeWalker; 8 | 9 | /** 10 | * ANTLR4-based DSL parser implementation that implements parse tree listener interface (and has no Java code in the grammar) 11 | */ 12 | public final class AntlrListenerDslParser implements DslParser { 13 | @Override 14 | public DslQuery parse(String query) { 15 | try { 16 | final AstBuildingListener listener = new AstBuildingListener(); 17 | new ParseTreeWalker().walk(listener, parser(query).query()); 18 | return listener.buildAst(); 19 | } catch (Exception e) { 20 | throw new IllegalArgumentException("Failed to parse: " + query, e); 21 | } 22 | } 23 | 24 | private static ParquetDslParser parser(String query) { 25 | final ParquetDslParser parser = new ParquetDslParser( 26 | new CommonTokenStream( 27 | new ParquetDslLexer( 28 | new ANTLRInputStream(query)))); 29 | 30 | parser.setBuildParseTree(true); 31 | return parser; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | 9 | net.ndolgov.querydsl 10 | querydsl 11 | 1.0.0-SNAPSHOT 12 | 13 | 14 | querydsl-dsl 15 | 1.0.0-SNAPSHOT 16 | jar 17 | Query AST and Parser API 18 | 19 | 20 | 21 | org.slf4j 22 | slf4j-api 23 | ${slf4j.version} 24 | 25 | 26 | 27 | org.slf4j 28 | slf4j-log4j12 29 | ${slf4j.version} 30 | runtime 31 | 32 | 33 | 34 | 35 | 36 | 37 | org.apache.maven.plugins 38 | maven-compiler-plugin 39 | ${maven.compiler.plugin.version} 40 | 41 | 1.8 42 | 1.8 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/AstNode.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast; 2 | 3 | /** 4 | * Heterogeneous AST node 5 | */ 6 | public interface AstNode { 7 | } 8 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/DslQuery.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast; 2 | 3 | /** 4 | * The root of a query AST 5 | */ 6 | public final class DslQuery implements AstNode { 7 | public final Select selectNode; 8 | public final From fromNode; 9 | public final Where whereNode; 10 | 11 | public DslQuery(Select selectNode, From fromNode, Where whereNode) { 12 | this.whereNode = whereNode; 13 | this.selectNode = selectNode; 14 | this.fromNode = fromNode; 15 | } 16 | 17 | @Override 18 | public String toString() { 19 | return selectNode + " " + fromNode + " " + whereNode; 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/From.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast; 2 | 3 | /** 4 | * "/home/tmp/file.par" 5 | */ 6 | public final class From implements AstNode { 7 | public final String path; 8 | 9 | public From(String path) { 10 | this.path = path; 11 | } 12 | 13 | @Override 14 | public String toString() { 15 | return " FROM " + path; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/Projection.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast; 2 | 3 | /** 4 | * "123456" 5 | */ 6 | public final class Projection implements AstNode { 7 | public final long metricId; 8 | 9 | public Projection(long metricId) { 10 | this.metricId = metricId; 11 | } 12 | 13 | @Override 14 | public String toString() { 15 | return String.valueOf(metricId); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/Select.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * "SELECT '*' | projection*" 7 | */ 8 | public final class Select implements AstNode { 9 | public final List projections; 10 | 11 | // no projections means select all 12 | public Select(List projections) { 13 | this.projections = projections; 14 | } 15 | 16 | @Override 17 | public String toString() { 18 | return "SELECT " + (all() ? "*" : projections); 19 | } 20 | 21 | public boolean all() { 22 | return projections.isEmpty(); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/Where.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast; 2 | 3 | import net.ndolgov.querydsl.ast.expression.PredicateExpr; 4 | 5 | /** 6 | * "WHERE predicate" 7 | */ 8 | public final class Where implements AstNode { 9 | public final PredicateExpr predicate; 10 | 11 | public Where(PredicateExpr predicate) { 12 | this.predicate = predicate; 13 | } 14 | 15 | @Override 16 | public String toString() { 17 | return " WHERE " + predicate; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/expression/AttrEqLong.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast.expression; 2 | 3 | /** 4 | * Long constant equality operator 5 | */ 6 | public final class AttrEqLong implements PredicateExpr { 7 | public final String attrName; 8 | public final long value; 9 | 10 | public AttrEqLong(String attrName, long value) { 11 | this.attrName = attrName; 12 | this.value = value; 13 | } 14 | 15 | @Override 16 | public > E accept(V visitor) { 17 | return visitor.visitAttrEqLong(this); 18 | } 19 | 20 | @Override 21 | public String toString() { 22 | return "(" + attrName + " = " + value + ")"; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/expression/BinaryExpr.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast.expression; 2 | 3 | /** 4 | * "predicate1 && predicate2" 5 | * todo binary vs n-ary 6 | */ 7 | public final class BinaryExpr implements PredicateExpr { 8 | public final PredicateExpr left; 9 | public final PredicateExpr right; 10 | public final Op operator; 11 | 12 | public BinaryExpr(PredicateExpr left, PredicateExpr right, Op operator) { 13 | this.left = left; 14 | this.right = right; 15 | this.operator = operator; 16 | } 17 | 18 | @Override 19 | public > E accept(V visitor) { 20 | return visitor.visitBinaryExpr(this); 21 | } 22 | 23 | @Override 24 | public final String toString() { 25 | return "(" + left.toString() + " " + operator.toString() + " " + right.toString() + ")"; 26 | } 27 | 28 | public enum Op { 29 | EQ, 30 | AND, OR 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/expression/NoOpExpr.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast.expression; 2 | 3 | /** 4 | * 5 | */ 6 | public final class NoOpExpr implements PredicateExpr { 7 | public static final PredicateExpr INSTANCE = new NoOpExpr(); 8 | 9 | @Override 10 | public > E accept(V visitor) { 11 | return null; 12 | } 13 | 14 | private NoOpExpr() { 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/ast/expression/PredicateExpr.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.ast.expression; 2 | 3 | /** 4 | * Predicate expression type 5 | */ 6 | public interface PredicateExpr { 7 | > E accept(V visitor); 8 | 9 | interface ExprVisitor { 10 | E visitAttrEqLong(AttrEqLong expr); 11 | 12 | E visitBinaryExpr(BinaryExpr expr); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/parser/DslParser.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.parser; 2 | 3 | import net.ndolgov.querydsl.ast.DslQuery; 4 | 5 | /** 6 | * Generic DSL parser API 7 | */ 8 | public interface DslParser { 9 | /** 10 | * @param query query expression 11 | * @return the AST corresponding to a given query string 12 | */ 13 | DslQuery parse(String query); 14 | } 15 | -------------------------------------------------------------------------------- /querydsl/querydsl-dsl/src/main/java/net/ndolgov/querydsl/parser/Tokens.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.parser; 2 | 3 | /** 4 | * Tokens and keywords used in the query DSL 5 | */ 6 | public final class Tokens { 7 | public static final String SELECT = "select"; 8 | public static final String FROM = "from"; 9 | public static final String WHERE = "where"; 10 | 11 | public static final String AND = "&&"; 12 | public static final String OR = "||"; 13 | public static final String EQUALS = "="; 14 | 15 | public static final char LPAREN = '('; 16 | public static final char RPAREN = ')'; 17 | 18 | public static final char LQUOTE = '\''; 19 | public static final char RQUOTE = '\''; 20 | 21 | public static final char ASTERISK = '*'; 22 | } 23 | -------------------------------------------------------------------------------- /querydsl/querydsl-fastparse/build.sbt: -------------------------------------------------------------------------------- 1 | val fastparse_version = "1.0.0" 2 | val scalatest_version = "3.0.4" 3 | val scala_version = "2.12.3" 4 | 5 | val querydsl_fastparse_id = "querydsl-fastparse" 6 | 7 | lazy val root = Project(id = querydsl_fastparse_id, base = file(".") ). 8 | settings( 9 | scalaVersion := scala_version, 10 | scalacOptions ++= Seq("-deprecation", "-Xfatal-warnings") 11 | ). 12 | settings( 13 | name := querydsl_fastparse_id, 14 | organization := "net.ndolgov.querydsl", 15 | version := "1.0.0-SNAPSHOT" 16 | ). 17 | settings( 18 | libraryDependencies ++= Seq( 19 | "org.scalatest" %% "scalatest" % scalatest_version % Test, 20 | "com.lihaoyi" %% "fastparse" % fastparse_version, 21 | "net.ndolgov.querydsl" % "querydsl-dsl" % "1.0.0-SNAPSHOT" 22 | ) 23 | ). 24 | settings( 25 | resolvers += "Local Maven" at Path.userHome.asFile.toURI.toURL + ".m2/repository" 26 | ) 27 | 28 | 29 | -------------------------------------------------------------------------------- /querydsl/querydsl-fastparse/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.15 2 | -------------------------------------------------------------------------------- /querydsl/querydsl-fastparse/src/main/scala/net/ndolgov/querydsl/fastparse/FastparseDslParser.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.fastparse 2 | 3 | import net.ndolgov.querydsl.ast.DslQuery 4 | import net.ndolgov.querydsl.parser.DslParser 5 | 6 | /** Fastparse-based DSL parser implementation */ 7 | private final class FastparseDslParser(private val parser: FastparseParser) extends DslParser { 8 | override def parse(query: String): DslQuery = parser.parse(query) match { 9 | case Right(ast) => 10 | ast 11 | 12 | case Left(e) => 13 | //print(e.extra.traced.trace.mkString) // for syntax debugging 14 | throw new RuntimeException(e.msg); 15 | } 16 | } 17 | 18 | object FastparseDslParser { 19 | def apply() : DslParser = new FastparseDslParser(new FastparseParser()) 20 | } 21 | -------------------------------------------------------------------------------- /querydsl/querydsl-parboiled/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | 9 | net.ndolgov.querydsl 10 | querydsl 11 | 1.0.0-SNAPSHOT 12 | 13 | 14 | querydsl-parboiled 15 | 1.0.0-SNAPSHOT 16 | jar 17 | Parboiled query parser 18 | 19 | 20 | 21 | net.ndolgov.querydsl 22 | querydsl-dsl 23 | ${project.version} 24 | 25 | 26 | 27 | org.parboiled 28 | parboiled-java 29 | ${parboiled.version} 30 | 31 | 32 | 33 | org.slf4j 34 | slf4j-api 35 | ${slf4j.version} 36 | 37 | 38 | 39 | org.slf4j 40 | slf4j-log4j12 41 | ${slf4j.version} 42 | runtime 43 | 44 | 45 | 46 | org.testng 47 | testng 48 | ${testng.version} 49 | test 50 | 51 | 52 | 53 | 54 | 55 | 56 | org.apache.maven.plugins 57 | maven-compiler-plugin 58 | ${maven.compiler.plugin.version} 59 | 60 | 1.8 61 | 1.8 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /querydsl/querydsl-parboiled/src/main/java/net/ndolgov/querydsl/parboiled/ParboiledDslParser.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.parboiled; 2 | 3 | import net.ndolgov.querydsl.parser.DslParser; 4 | import net.ndolgov.querydsl.ast.DslQuery; 5 | import org.parboiled.Parboiled; 6 | import org.parboiled.parserunners.RecoveringParseRunner; 7 | 8 | /** 9 | * Parboiled-based DSL parser implementation 10 | */ 11 | public final class ParboiledDslParser implements DslParser { 12 | private final RecoveringParseRunner runner; 13 | 14 | public ParboiledDslParser() { 15 | runner = new RecoveringParseRunner<>(Parboiled.createParser(ParboiledParser.class).DslQuery()); 16 | } 17 | 18 | @Override 19 | public DslQuery parse(String query) { 20 | try { 21 | return runner.run(query).resultValue; 22 | } finally { 23 | //System.out.println(runner.getLog()); // for syntax debugging, TracingParseRunner 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /querydsl/querydsl-parboiled/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /querydsl/querydsl-parquet/src/main/java/net/ndolgov/querydsl/parquet/PredicateExprs.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.parquet; 2 | 3 | import net.ndolgov.parquettest.RecordFields; 4 | import net.ndolgov.querydsl.ast.expression.AttrEqLong; 5 | import net.ndolgov.querydsl.ast.expression.BinaryExpr; 6 | import net.ndolgov.querydsl.ast.expression.NoOpExpr; 7 | import net.ndolgov.querydsl.ast.expression.PredicateExpr; 8 | 9 | /** 10 | * Parquet file predicate expression helpers 11 | */ 12 | final class PredicateExprs { 13 | public static boolean isNoOp(PredicateExpr expr) { 14 | return expr == NoOpExpr.INSTANCE; 15 | } 16 | 17 | /** 18 | * @return expression that will pass through rows matching all child expressions 19 | */ 20 | public static PredicateExpr conjunction(PredicateExpr left, PredicateExpr right) { 21 | return new BinaryExpr(left, right, BinaryExpr.Op.AND); 22 | } 23 | 24 | /** 25 | * @return expression that will pass through rows matching any of the two child expressions 26 | */ 27 | public static PredicateExpr disjunction(PredicateExpr left, PredicateExpr right) { 28 | return new BinaryExpr(left, right, BinaryExpr.Op.OR); 29 | } 30 | 31 | /** 32 | * @return expression that will pass through rows with attr values equal to a given constant 33 | */ 34 | public static PredicateExpr columnEq(RecordFields attr, long attrValue) { 35 | return new AttrEqLong(attr.columnName(), attrValue); 36 | } 37 | 38 | /** 39 | * @return expression that will force reading input file with no filter (and so won't incur additional cost) 40 | */ 41 | public static PredicateExpr noOpPredicate() { 42 | return NoOpExpr.INSTANCE; 43 | } 44 | } -------------------------------------------------------------------------------- /querydsl/querydsl-parquet/src/main/java/net/ndolgov/querydsl/parquet/ToParquetFilter.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.querydsl.parquet; 2 | 3 | import net.ndolgov.querydsl.ast.expression.AttrEqLong; 4 | import net.ndolgov.querydsl.ast.expression.BinaryExpr; 5 | import net.ndolgov.querydsl.ast.expression.PredicateExpr; 6 | import org.apache.parquet.filter2.compat.FilterCompat; 7 | import org.apache.parquet.filter2.predicate.FilterPredicate; 8 | 9 | import static net.ndolgov.querydsl.parquet.PredicateExprs.isNoOp; 10 | import static org.apache.parquet.filter2.predicate.FilterApi.and; 11 | import static org.apache.parquet.filter2.predicate.FilterApi.eq; 12 | import static org.apache.parquet.filter2.predicate.FilterApi.longColumn; 13 | import static org.apache.parquet.filter2.predicate.FilterApi.or; 14 | 15 | /** 16 | * Compile predicate expression tree into a Parquet filter 17 | */ 18 | final class ToParquetFilter { 19 | /** 20 | * @param expr filter expression 21 | * @return Parquet filter corresponding to the filter expression 22 | */ 23 | public static FilterCompat.Filter transform(PredicateExpr expr) { 24 | if (isNoOp(expr)) { 25 | return FilterCompat.NOOP; 26 | } 27 | 28 | return FilterCompat.get(toPredicate(expr)); 29 | } 30 | 31 | private static FilterPredicate toPredicate(PredicateExpr expr) { 32 | return expr.accept(new PredicateExpr.ExprVisitor() { 33 | @Override 34 | public FilterPredicate visitAttrEqLong(AttrEqLong expr) { 35 | return eq(longColumn(expr.attrName), expr.value); 36 | } 37 | 38 | @Override 39 | public FilterPredicate visitBinaryExpr(BinaryExpr expr) { 40 | final FilterPredicate left = toPredicate(expr.left); 41 | final FilterPredicate right = toPredicate(expr.right); 42 | 43 | switch (expr.operator) { 44 | case AND: 45 | return and(left, right); 46 | 47 | case OR: 48 | return or(left, right); 49 | 50 | default: 51 | throw new IllegalArgumentException("Unexpected op: " + expr.operator); 52 | } 53 | } 54 | }); 55 | } 56 | 57 | } -------------------------------------------------------------------------------- /querydsl/querydsl-parquet/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /restgatewaytest/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.0.3 2 | -------------------------------------------------------------------------------- /restgatewaytest/project/protoc.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.thesamet" % "sbt-protoc" % "0.99.13") 2 | 3 | resolvers += Resolver.bintrayRepo("beyondthelines", "maven") 4 | 5 | val scalapb_plugin_version = "0.6.7" 6 | val scalapb_gateway_version = "0.0.8" 7 | 8 | libraryDependencies ++= Seq( 9 | "com.trueaccord.scalapb" %% "compilerplugin" % scalapb_plugin_version, 10 | "beyondthelines" %% "grpcgatewaygenerator" % scalapb_gateway_version 11 | ) -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/proto/testsvcA.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package net.ndolgov.restgatewaytest.api; 4 | 5 | option java_multiple_files = false; 6 | option java_package = "net.ndolgov.restgatewaytest.api"; 7 | option java_outer_classname = "TestServiceAProto"; 8 | option objc_class_prefix = "TS1P"; 9 | 10 | import "google/api/annotations.proto"; 11 | 12 | service TestServiceA { 13 | rpc Process (TestRequestA) returns (TestResponseA) { 14 | option (google.api.http) = { 15 | post: "/restgateway/test/testservicea" 16 | body: "*" 17 | }; 18 | } 19 | } 20 | 21 | message TestRequestA { 22 | int64 requestId = 1; 23 | } 24 | 25 | message TestResponseA { 26 | bool success = 1; 27 | int64 requestId = 2; 28 | string result = 3; 29 | } -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/proto/testsvcB.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package net.ndolgov.restgatewaytest.api; 4 | 5 | option java_multiple_files = false; 6 | option java_package = "net.ndolgov.restgatewaytest.api"; 7 | option java_outer_classname = "TestServiceBProto"; 8 | option objc_class_prefix = "TS2P"; 9 | 10 | import "google/api/annotations.proto"; 11 | 12 | service TestServiceB { 13 | rpc Process (TestRequestB) returns (TestResponseB) { 14 | option (google.api.http) = { 15 | post: "/restgateway/test/testserviceb" 16 | body: "*" 17 | }; 18 | } 19 | } 20 | 21 | message TestRequestB { 22 | int64 requestId = 1; 23 | } 24 | 25 | message TestResponseB { 26 | bool success = 1; 27 | int64 requestId = 2; 28 | string result = 3; 29 | } -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/resources/specs/TestsvcAService.yml: -------------------------------------------------------------------------------- 1 | swagger: '2.0' 2 | info: 3 | version: not set 4 | title: 'TestsvcAProto' 5 | description: 'REST API generated from testsvcA.proto' 6 | schemes: 7 | - http 8 | - https 9 | consumes: 10 | - 'application/json' 11 | produces: 12 | - 'application/json' 13 | paths: 14 | /restgateway/test/testservicea: 15 | post: 16 | tags: 17 | - TestServiceA 18 | summary: 19 | 'Process' 20 | description: 21 | 'Generated from net.ndolgov.restgatewaytest.api.TestServiceA.Process' 22 | produces: 23 | ['application/json'] 24 | responses: 25 | 200: 26 | description: 'Normal response' 27 | schema: 28 | $ref: "#/definitions/TestResponseA" 29 | parameters: 30 | - in: 'body' 31 | name: body 32 | schema: 33 | $ref: "#/definitions/TestRequestA" 34 | definitions: 35 | TestRequestA: 36 | type: object 37 | properties: 38 | requestId: 39 | type: integer 40 | format: int64 41 | TestResponseA: 42 | type: object 43 | properties: 44 | success: 45 | type: boolean 46 | requestId: 47 | type: integer 48 | format: int64 49 | result: 50 | type: string -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/resources/specs/TestsvcBService.yml: -------------------------------------------------------------------------------- 1 | swagger: '2.0' 2 | info: 3 | version: not set 4 | title: 'TestsvcBProto' 5 | description: 'REST API generated from testsvcB.proto' 6 | schemes: 7 | - http 8 | - https 9 | consumes: 10 | - 'application/json' 11 | produces: 12 | - 'application/json' 13 | paths: 14 | /restgateway/test/testserviceb: 15 | post: 16 | tags: 17 | - TestServiceB 18 | summary: 19 | 'Process' 20 | description: 21 | 'Generated from net.ndolgov.restgatewaytest.api.TestServiceB.Process' 22 | produces: 23 | ['application/json'] 24 | responses: 25 | 200: 26 | description: 'Normal response' 27 | schema: 28 | $ref: "#/definitions/TestResponseB" 29 | parameters: 30 | - in: 'body' 31 | name: body 32 | schema: 33 | $ref: "#/definitions/TestRequestB" 34 | definitions: 35 | TestRequestB: 36 | type: object 37 | properties: 38 | requestId: 39 | type: integer 40 | format: int64 41 | TestResponseB: 42 | type: object 43 | properties: 44 | success: 45 | type: boolean 46 | requestId: 47 | type: integer 48 | format: int64 49 | result: 50 | type: string -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/scala/net/ndolgov/restgatewaytest/GatewayServer.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.restgatewaytest 2 | 3 | import java.util.concurrent.Executor 4 | 5 | import grpcgateway.handlers.GrpcGatewayHandler 6 | import grpcgateway.server.{GrpcGatewayServer, GrpcGatewayServerBuilder} 7 | import io.grpc.{ManagedChannel, ManagedChannelBuilder} 8 | import org.slf4j.LoggerFactory 9 | 10 | /** REST gateway for a gRPC service instance that can be started and stopped */ 11 | trait GatewayServer { 12 | def start(): Unit 13 | 14 | def stop(): Unit 15 | } 16 | 17 | private final class GatewayServerImpl(server: GrpcGatewayServer, port: Int) extends GatewayServer { 18 | private val logger = LoggerFactory.getLogger(classOf[GatewayServer]) 19 | 20 | override def start(): Unit = { 21 | try { 22 | server.start() 23 | logger.info("Started " + this) 24 | } catch { 25 | case e: Exception => 26 | throw new RuntimeException("Could not start server", e) 27 | } 28 | } 29 | 30 | override def stop(): Unit = { 31 | try { 32 | logger.info("Stopping " + this) 33 | server.shutdown() 34 | logger.info("Stopped " + this) 35 | } catch { 36 | case _: Exception => 37 | logger.warn("Interrupted while shutting down " + this) 38 | } 39 | } 40 | 41 | override def toString: String = "{GatewayServer:port=" + port + "}" 42 | } 43 | 44 | /** Create a Netty-backed REST Gateway for a given gRPC server with the request handlers created by a given factory 45 | * method. Bind the gateway to a given port. Perform request redirection on a given thread pool. */ 46 | object GatewayServer { 47 | def apply(serviceHost: String, servicePort: Int, 48 | gatewayPort: Int, 49 | executor: Executor, 50 | toHandlers: (ManagedChannel) => Seq[GrpcGatewayHandler]) : GatewayServer = { 51 | val channel = ManagedChannelBuilder.forAddress(serviceHost, servicePort).usePlaintext(true).executor(executor).build() 52 | 53 | var builder = GrpcGatewayServerBuilder.forPort(gatewayPort) 54 | for (handler <- toHandlers(channel)) { 55 | builder = builder.addService(handler) 56 | } 57 | 58 | new GatewayServerImpl(builder.build(), gatewayPort) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/scala/net/ndolgov/restgatewaytest/GrpcServer.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.restgatewaytest 2 | 3 | import java.net.InetSocketAddress 4 | import java.util.concurrent.TimeUnit 5 | 6 | import io.grpc.netty.NettyServerBuilder 7 | import io.grpc.{Server, ServerServiceDefinition} 8 | import org.slf4j.LoggerFactory 9 | 10 | import scala.concurrent.ExecutionContext 11 | 12 | /** gRPC service instance that can be started and stopped */ 13 | trait GrpcServer { 14 | def start(): Unit 15 | 16 | def stop(timeout: Long): Unit 17 | } 18 | 19 | private final class GrpcServerImpl(server: Server, port: Int) extends GrpcServer { 20 | private val logger = LoggerFactory.getLogger(classOf[GrpcServer]) 21 | 22 | override def start(): Unit = { 23 | try { 24 | logger.info("Starting " + this) 25 | server.start 26 | logger.info("Started " + this) 27 | } catch { 28 | case e: Exception => 29 | throw new RuntimeException("Could not start server", e) 30 | } 31 | } 32 | 33 | override def stop(timeout: Long): Unit = { 34 | try { 35 | logger.info("Stopping " + this) 36 | server.shutdown().awaitTermination(timeout, TimeUnit.MILLISECONDS) 37 | logger.info("Stopped " + this) 38 | } catch { 39 | case _: Exception => 40 | logger.warn("Interrupted while shutting down " + this) 41 | } 42 | } 43 | 44 | override def toString: String = "{GrpcServer:port=" + port + "}" 45 | } 46 | 47 | /** Create a Netty-backed gRPC service instance with the request handlers created by a given factory method. 48 | * Bind the service to a given "host:port" address. Process requests on a given thread pool. */ 49 | object GrpcServer { 50 | def apply(hostname: String, 51 | port: Int, 52 | toServices: (ExecutionContext) => Seq[ServerServiceDefinition]) 53 | (implicit ec: ExecutionContext) : GrpcServer = { 54 | 55 | val builder: NettyServerBuilder = NettyServerBuilder.forAddress(new InetSocketAddress(hostname, port)) 56 | toServices.apply(ec).foreach(definition => builder.addService(definition)) 57 | 58 | new GrpcServerImpl(builder.build(), port) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/scala/net/ndolgov/restgatewaytest/TestServiceAImpl.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.restgatewaytest 2 | 3 | import net.ndolgov.restgatewaytest.api.testsvcA.{TestRequestA, TestResponseA} 4 | import net.ndolgov.restgatewaytest.api.testsvcA.TestServiceAGrpc.TestServiceA 5 | import org.slf4j.LoggerFactory 6 | 7 | import scala.concurrent.{ExecutionContext, Future} 8 | 9 | class TestServiceAImpl(implicit ec: ExecutionContext) extends TestServiceA { 10 | private val logger = LoggerFactory.getLogger(classOf[TestServiceAImpl]) 11 | 12 | override def process(request: TestRequestA): Future[TestResponseA] = { 13 | Future { 14 | logger.info(s"Computing result of $request"); // todo this is where actual time-consuming processing would be 15 | TestResponseA(success = true, request.requestId, "RESULTA") 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/main/scala/net/ndolgov/restgatewaytest/TestServiceBImpl.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.restgatewaytest 2 | 3 | import net.ndolgov.restgatewaytest.api.testsvcB.{TestRequestB, TestResponseB} 4 | import net.ndolgov.restgatewaytest.api.testsvcB.TestServiceBGrpc.TestServiceB 5 | import org.slf4j.LoggerFactory 6 | 7 | import scala.concurrent.{ExecutionContext, Future} 8 | 9 | class TestServiceBImpl(implicit ec: ExecutionContext) extends TestServiceB { 10 | private val logger = LoggerFactory.getLogger(classOf[TestServiceBImpl]) 11 | 12 | override def process(request: TestRequestB): Future[TestResponseB] = { 13 | Future { 14 | logger.info(s"Computing result of $request"); // todo this is where actual time-consuming processing would be 15 | TestResponseB(success = true, request.requestId, "RESULTB") 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /restgatewaytest/restgatewaytest-web/src/test/scala/net/ndolgov/restgatewaytest/JsonMarshaller.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.restgatewaytest 2 | 3 | import java.io.StringWriter 4 | 5 | import com.fasterxml.jackson.databind.ObjectMapper 6 | import com.fasterxml.jackson.module.scala.DefaultScalaModule 7 | 8 | /** Scala wrapper for a popular Java JSON serialization library*/ 9 | object JsonMarshaller { 10 | private val marshaller = new ObjectMapper().registerModule(new DefaultScalaModule()) 11 | 12 | def toJson[T](value: T): String = { 13 | val writer = new StringWriter 14 | marshaller.writeValue(writer, value) 15 | writer.toString 16 | } 17 | 18 | def fromJson[T](json: String, clazz: Class[T]) : T = { 19 | marshaller.readValue(json, clazz) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/FileDownloader.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | import java.io.File; 4 | 5 | /** 6 | * Download a file from S3 storage 7 | */ 8 | public interface FileDownloader { 9 | /** 10 | * @param localFile local file to upload 11 | * @param remotePath relative path (counting from some assumed root) in S3 storage 12 | * @param callback observer to notify about upload status 13 | */ 14 | void download(File localFile, String remotePath, TransferCallback callback); 15 | } 16 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/FileHandler.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | /** 4 | * Downloader-side handler of newly detected S3 storage files 5 | */ 6 | public interface FileHandler { 7 | /** 8 | * Handle a file found in the remote storage at a given path 9 | * @param remotePath S3 storage path 10 | * @param callback a means of reporting request status asynchronously 11 | */ 12 | void handle(String remotePath, TransferCallback callback); 13 | } 14 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/FileUploader.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | import java.io.File; 4 | 5 | /** 6 | * Upload a file to S3 storage 7 | */ 8 | public interface FileUploader { 9 | /** 10 | * @param localFile local file to upload 11 | * @param remotePath relative path (counting from some assumed root) in S3 storage 12 | * @param callback observer to notify about upload status 13 | */ 14 | void upload(File localFile, String remotePath, TransferCallback callback); 15 | } 16 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/S3ClientMBean.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | /** 4 | * S3 client JMX end point 5 | */ 6 | public interface S3ClientMBean { 7 | /** 8 | * @return S3 bucket 9 | */ 10 | String getBucket(); 11 | 12 | /** 13 | * @return a prefix path automatically inserted between the bucket name and a relative path to a file 14 | */ 15 | String getPathPrefix(); 16 | } 17 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/S3Destination.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | /** 4 | * A means of having a common S3 parent path for multiple files 5 | */ 6 | public interface S3Destination { 7 | /** Our S3 path delimiter */ 8 | String DELIMITER = "/"; 9 | 10 | /** 11 | * @param namespace AWS namespace 12 | * @return namespace-based prefix to restrict S3 keys for this destination 13 | */ 14 | String prefix(String namespace); 15 | } 16 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/S3Downloader.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | import com.amazonaws.services.s3.transfer.Transfer; 4 | 5 | import java.io.File; 6 | 7 | /** 8 | * Download a file from a given "subdirectory" of S3 bucket 9 | */ 10 | public final class S3Downloader implements FileDownloader { 11 | private final S3FileTransferClient client; 12 | 13 | public S3Downloader(S3FileTransferClient client) { 14 | this.client = client; 15 | } 16 | 17 | @Override 18 | public void download(File localFile, String remotePath, TransferCallback callback) { 19 | final S3TransferProgressListener listener = new S3TransferProgressListener(remotePath, localFile.getName(), callback); 20 | final Transfer download = client.download(remotePath, localFile, listener); 21 | listener.listenTo(download); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/S3FileTransferClient.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | import com.amazonaws.event.ProgressListener; 4 | import com.amazonaws.services.s3.transfer.Download; 5 | import com.amazonaws.services.s3.transfer.Upload; 6 | 7 | import java.io.File; 8 | import java.util.function.Function; 9 | 10 | /** 11 | * Asynchronous transfer operations on files in one S3 bucket 12 | */ 13 | public interface S3FileTransferClient { 14 | /** 15 | * 16 | * @param src absolute source file path 17 | * @param key a path in the bucket 18 | * @param listener continuation to run when a final transfer state is reached 19 | * @return Upload request handle 20 | */ 21 | Upload upload(File src, String key, ProgressListener listener); 22 | 23 | /** 24 | * 25 | * @param key a path in the bucket 26 | * @param dest local file to download to 27 | * @param listener continuation to run when a final transfer state is reached 28 | * @return Download request handle 29 | */ 30 | Download download(String key, File dest, ProgressListener listener); 31 | 32 | /** 33 | * List all object located under the root 34 | * @param consumer consumer of found object keys 35 | */ 36 | void list(Function consumer); 37 | 38 | /** 39 | * List all object located directly under the root or a given subdirectory 40 | * @param consumer consumer of found object keys 41 | * @param subDir subdirectory under root (e.g. "some_path") or null to search under root 42 | */ 43 | void listOneLevel(Function consumer, String subDir); 44 | 45 | /** 46 | * @param key a path in the bucket 47 | * @return true if there is an object with the given key 48 | */ 49 | boolean exists(String key); 50 | 51 | /** 52 | * @param objectKey object key (i.e. absolute path in the bucket) 53 | * @return true if there the object with the given key was deleted 54 | */ 55 | boolean delete(String objectKey); 56 | } 57 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/S3Uploader.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | import com.amazonaws.services.s3.transfer.Transfer; 4 | 5 | import java.io.File; 6 | 7 | /** 8 | * Upload a file to a subdirectory of S3 bucket 9 | */ 10 | public final class S3Uploader implements FileUploader { 11 | private final S3FileTransferClient client; 12 | 13 | public S3Uploader(S3FileTransferClient client) { 14 | this.client = client; 15 | } 16 | 17 | @Override 18 | public void upload(File localFile, String remotePath, TransferCallback callback) { 19 | final S3TransferProgressListener listener = new S3TransferProgressListener(localFile.getName(), remotePath, callback); 20 | final Transfer upload = client.upload(localFile, remotePath, listener); 21 | listener.listenTo(upload); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /s3test/src/main/java/net/ndolgov/s3test/TransferCallback.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.s3test; 2 | 3 | /** 4 | * A means of tracking asynchronous file transfer completion 5 | */ 6 | public interface TransferCallback { 7 | /** 8 | * Notify about successful file transfer 9 | */ 10 | void onSuccess(); 11 | 12 | /** 13 | * Notify about file transfer failure 14 | * @param message error message 15 | */ 16 | void onFailure(String message); 17 | 18 | /** 19 | * For cases when nobody cares about request status 20 | */ 21 | TransferCallback IGNORED = new TransferCallback() { 22 | public void onSuccess() { 23 | } 24 | 25 | public void onFailure(String message) { 26 | } 27 | }; 28 | } 29 | -------------------------------------------------------------------------------- /s3test/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /scalapbtest/build.sbt: -------------------------------------------------------------------------------- 1 | import scalapb.compiler.Version.scalapbVersion 2 | 3 | val grpc_version = "1.8.0" 4 | val slf4j_version = "1.6.4" 5 | val scalatest_version = "3.0.4" 6 | val scala_version = "2.12.3" 7 | 8 | val group_id = "net.ndolgov" 9 | val project_version = "1.0.0-SNAPSHOT" 10 | 11 | val scalapbtest_grpc_id = "scalapbtest-grpc" 12 | val scalapbtest_grpc = Project(id = scalapbtest_grpc_id, base = file(scalapbtest_grpc_id)). 13 | settings( 14 | name := scalapbtest_grpc_id, 15 | organization := group_id, 16 | version := project_version 17 | ). 18 | settings( 19 | PB.protoSources in Compile := Seq(sourceDirectory.value / "main/proto"), 20 | PB.targets in Compile := Seq(scalapb.gen() -> (sourceManaged in Compile).value) 21 | ). 22 | settings( 23 | libraryDependencies ++= Seq( 24 | "org.scalatest" %% "scalatest" % scalatest_version % Test, 25 | "com.thesamet.scalapb" %% "scalapb-runtime" % scalapbVersion % "protobuf", 26 | "com.thesamet.scalapb" %% "scalapb-runtime-grpc" % scalapbVersion, 27 | "io.grpc" % "grpc-netty" % grpc_version, 28 | "org.slf4j" % "slf4j-api" % slf4j_version, 29 | "org.slf4j" % "slf4j-log4j12" % slf4j_version, 30 | ) 31 | ) 32 | 33 | val scalapbtest_root_id = "scalapbtest-root" 34 | val root = Project(id = scalapbtest_root_id, base = file(".") ). 35 | settings( 36 | scalaVersion := scala_version, 37 | scalacOptions ++= Seq("-deprecation", "-Xfatal-warnings") 38 | ). 39 | settings( 40 | name := scalapbtest_root_id, 41 | organization := group_id, 42 | version := project_version 43 | ). 44 | settings( 45 | resolvers += "Local Maven" at Path.userHome.asFile.toURI.toURL + ".m2/repository" 46 | ). 47 | settings( 48 | packageBin := { new File("") }, 49 | packageSrc := { new File("") } 50 | ). 51 | aggregate(scalapbtest_grpc) 52 | 53 | 54 | -------------------------------------------------------------------------------- /scalapbtest/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.0.3 2 | -------------------------------------------------------------------------------- /scalapbtest/project/scalapb.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.thesamet" % "sbt-protoc" % "0.99.13") 2 | 3 | libraryDependencies += "com.thesamet.scalapb" %% "compilerplugin" % "0.7.0-rc6" -------------------------------------------------------------------------------- /scalapbtest/scalapbtest-grpc/src/main/proto/testsvcA.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package scalapbtest; 4 | 5 | option java_multiple_files = false; 6 | option java_package = "net.ndolgov.scalapbtest.api"; 7 | option java_outer_classname = "TestServiceAProto"; 8 | option objc_class_prefix = "TS1P"; 9 | 10 | 11 | service TestServiceA { 12 | rpc Process (TestRequestA) returns (TestResponseA) {} 13 | } 14 | 15 | message TestRequestA { 16 | int64 requestId = 1; 17 | } 18 | 19 | message TestResponseA { 20 | bool success = 1; 21 | int64 requestId = 2; 22 | string result = 3; 23 | } -------------------------------------------------------------------------------- /scalapbtest/scalapbtest-grpc/src/main/proto/testsvcB.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package scalapbtest; 4 | 5 | option java_multiple_files = false; 6 | option java_package = "net.ndolgov.scalapbtest.api"; 7 | option java_outer_classname = "TestServiceBProto"; 8 | option objc_class_prefix = "TS2P"; 9 | 10 | 11 | service TestServiceB { 12 | rpc Process (TestRequestB) returns (TestResponseB) {} 13 | } 14 | 15 | message TestRequestB { 16 | int64 requestId = 1; 17 | } 18 | 19 | message TestResponseB { 20 | bool success = 1; 21 | int64 requestId = 2; 22 | string result = 3; 23 | } -------------------------------------------------------------------------------- /scalapbtest/scalapbtest-grpc/src/main/scala/net/ndolgov/scalapbtest/GrpcClient.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.scalapbtest 2 | 3 | import java.util.concurrent.{Executor, TimeUnit} 4 | 5 | import io.grpc.stub.AbstractStub 6 | import io.grpc.{ManagedChannel, ManagedChannelBuilder} 7 | import scala.concurrent.ExecutionContext 8 | 9 | trait GrpcClient { 10 | def stop(): Unit 11 | 12 | def createClient[A <: AbstractStub[A]](f: (ManagedChannel) => A) : A 13 | 14 | def executionContext() : ExecutionContext 15 | } 16 | 17 | private final class GrpcClientImpl(channel: ManagedChannel, ec: ExecutionContext) extends GrpcClient { 18 | override def createClient[A <: AbstractStub[A]](factory: (ManagedChannel) => A) : A = { 19 | factory.apply(channel) 20 | } 21 | 22 | override def stop(): Unit = channel.shutdown().awaitTermination(5000, TimeUnit.MILLISECONDS) 23 | 24 | override def executionContext(): ExecutionContext = ec 25 | } 26 | 27 | /** Create GRPC transport to a given "host:port" destination */ 28 | object GrpcClient { 29 | def apply(hostname: String, port: Int, executor: Executor) : GrpcClient = { 30 | new GrpcClientImpl( 31 | ManagedChannelBuilder.forAddress(hostname, port).usePlaintext(true).executor(executor).build(), 32 | ExecutionContext.fromExecutor(executor)) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /scalapbtest/scalapbtest-grpc/src/main/scala/net/ndolgov/scalapbtest/GrpcServer.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.scalapbtest 2 | 3 | import java.net.InetSocketAddress 4 | import java.util.concurrent.TimeUnit 5 | 6 | import io.grpc.netty.NettyServerBuilder 7 | import io.grpc.{Server, ServerServiceDefinition} 8 | import org.slf4j.LoggerFactory 9 | 10 | import scala.concurrent.ExecutionContext 11 | 12 | trait GrpcServer { 13 | def start(): Unit 14 | 15 | def stop(): Unit 16 | } 17 | 18 | private final class GrpcServerImpl(server: Server, port: Int) extends GrpcServer { 19 | private val logger = LoggerFactory.getLogger(classOf[GrpcServer]) 20 | 21 | override def start(): Unit = { 22 | try { 23 | server.start 24 | logger.info("Started " + this) 25 | } catch { 26 | case e: Exception => 27 | throw new RuntimeException("Could not start server", e) 28 | } 29 | } 30 | 31 | override def stop(): Unit = { 32 | try { 33 | logger.info("Stopping " + this) 34 | server.shutdown().awaitTermination(5, TimeUnit.SECONDS) 35 | logger.info("Stopped " + this) 36 | } catch { 37 | case e: Exception => 38 | logger.warn("Interrupted while shutting down " + this) 39 | } 40 | } 41 | 42 | override def toString: String = "{GrpcServer:port=" + port + "}" 43 | } 44 | 45 | /** Create a GRPC server for given request handlers and bind it to provided "host:port" */ 46 | object GrpcServer { 47 | def apply(hostname: String, port: Int, toServices: (ExecutionContext) => Seq[ServerServiceDefinition]) 48 | (implicit ec: ExecutionContext) : GrpcServer = { 49 | val builder: NettyServerBuilder = NettyServerBuilder.forAddress(new InetSocketAddress(hostname, port)) 50 | toServices.apply(ec).foreach(definition => builder.addService(definition)) 51 | 52 | new GrpcServerImpl(builder.build(), port) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /scalapbtest/scalapbtest-grpc/src/main/scala/net/ndolgov/scalapbtest/TestServiceAImpl.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.scalapbtest 2 | 3 | import net.ndolgov.scalapbtest.api.testsvcA.{TestRequestA, TestResponseA} 4 | import net.ndolgov.scalapbtest.api.testsvcA.TestServiceAGrpc.TestServiceA 5 | import org.slf4j.LoggerFactory 6 | 7 | import scala.concurrent.{ExecutionContext, Future} 8 | 9 | class TestServiceAImpl(implicit ec: ExecutionContext) extends TestServiceA { 10 | private val logger = LoggerFactory.getLogger(classOf[TestServiceAImpl]) 11 | 12 | override def process(request: TestRequestA): Future[TestResponseA] = { 13 | Future { 14 | logger.info("Computing result"); // todo this is where actual time-consuming processing would be 15 | TestResponseA(success = true, request.requestId, "RESULT") 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /scalapbtest/scalapbtest-grpc/src/main/scala/net/ndolgov/scalapbtest/TestServiceBImpl.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.scalapbtest 2 | 3 | import net.ndolgov.scalapbtest.api.testsvcB.{TestRequestB, TestResponseB} 4 | import net.ndolgov.scalapbtest.api.testsvcB.TestServiceBGrpc.TestServiceB 5 | import org.slf4j.LoggerFactory 6 | 7 | import scala.concurrent.{ExecutionContext, Future} 8 | 9 | class TestServiceBImpl(implicit ec: ExecutionContext) extends TestServiceB { 10 | private val logger = LoggerFactory.getLogger(classOf[TestServiceBImpl]) 11 | 12 | override def process(request: TestRequestB): Future[TestResponseB] = { 13 | Future { 14 | logger.info("Computing result"); // todo this is where actual time-consuming processing would be 15 | TestResponseB(success = true, request.requestId, "RESULT") 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /scalapbtest/scalapbtest-grpc/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /sparkdatasourcetest/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | net.ndolgov.sparkdatasourcetest.sql.DefaultSource 2 | -------------------------------------------------------------------------------- /sparkdatasourcetest/src/main/scala/net/ndolgov/sparkdatasourcetest/lucene/LuceneFieldReader.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.lucene 2 | 3 | import net.ndolgov.sparkdatasourcetest.sql.LuceneSchema 4 | import org.apache.spark.sql.sources.Filter 5 | import org.apache.spark.sql.types.{DoubleType, LongType, StructField} 6 | 7 | /** 8 | * Read a field value from the current Lucene document to the current Spark Row 9 | */ 10 | trait LuceneFieldReader { 11 | def readLong(value : Long) : Unit = throw new UnsupportedOperationException 12 | 13 | def readDouble(value : Double) : Unit = throw new UnsupportedOperationException 14 | } 15 | 16 | object LuceneFieldReader { 17 | 18 | // todo retrieve fields for (filters - columns) attrs? 19 | def apply(columns: Seq[String], filters: Array[Filter], schema: LuceneSchema, row : Array[Any]) : Array[LuceneFieldReader] = { 20 | val readers : Array[LuceneFieldReader] = Array.ofDim[LuceneFieldReader](columns.length) 21 | 22 | var schemaIndex : Int = 0 23 | var outputIndex : Int = 0 24 | for (field <- schema.sparkSchema()) { 25 | if (columns.contains(field.name)) { 26 | readers(outputIndex) = apply(schema, schemaIndex, outputIndex, row) 27 | outputIndex += 1 28 | } 29 | schemaIndex += 1 30 | } 31 | 32 | readers 33 | } 34 | 35 | private def apply(schema: LuceneSchema, schemaIndex: Int, outputIndex : Int, row: Array[Any]): LuceneFieldReader = { 36 | val sparkField: StructField = schema.sparkField(schemaIndex) 37 | 38 | sparkField.dataType match { 39 | case LongType => new LongReader(outputIndex, row) 40 | case DoubleType => new DoubleReader(outputIndex, row) 41 | case _ => throw new IllegalArgumentException("Unsupported field type: " + sparkField.dataType); 42 | } 43 | } 44 | 45 | private final class LongReader(index : Int, row: Array[Any]) extends LuceneFieldReader { 46 | override def readLong(value : Long) : Unit = { 47 | row(index) = value 48 | } 49 | } 50 | 51 | private final class DoubleReader(index : Int, row: Array[Any]) extends LuceneFieldReader { 52 | override def readDouble(value : Double) : Unit = { 53 | row(index) = value 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /sparkdatasourcetest/src/main/scala/net/ndolgov/sparkdatasourcetest/lucene/LuceneFieldWriter.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.lucene 2 | 3 | import net.ndolgov.sparkdatasourcetest.sql.LuceneSchema 4 | import org.apache.lucene.document.Document 5 | import org.apache.spark.sql.Row 6 | import org.apache.spark.sql.types.{DoubleType, LongType} 7 | 8 | /** 9 | * Write a Spark Row to a Lucene index as a document 10 | */ 11 | trait LuceneFieldWriter { 12 | def write(row : Row) 13 | } 14 | 15 | object LuceneFieldWriter { 16 | /** 17 | * @param schema document schema 18 | * @param document document instance to reuse 19 | * @return a new writer 20 | */ 21 | def apply(schema : LuceneSchema, document : Document) : LuceneFieldWriter = { 22 | val writers : Array[LuceneFieldWriter] = Array.ofDim[LuceneFieldWriter](schema.size) 23 | 24 | var index : Int = 0 25 | for (field <- schema.sparkSchema()) { 26 | writers(index) = apply(schema, index, document) 27 | index += 1 28 | } 29 | 30 | new RowWriter(writers, document) 31 | } 32 | 33 | private def apply(schema : LuceneSchema, index : Int, document : Document) : LuceneFieldWriter = { 34 | val field : LuceneDocumentField = LuceneFieldFactory(schema, index) 35 | 36 | schema.sparkFieldType(index) match { 37 | case LongType => new LongFieldWriter(index, field, document) 38 | case DoubleType => new DoubleFieldWriter(index, field, document) 39 | case _ => throw new IllegalArgumentException("Unsupported field type: " + field); 40 | } 41 | } 42 | 43 | private final class RowWriter(writers : Seq[LuceneFieldWriter], document : Document) extends LuceneFieldWriter { 44 | override def write(row: Row): Unit = { 45 | writers.foreach((writer: LuceneFieldWriter) => writer.write(row)) 46 | } 47 | } 48 | 49 | private final class LongFieldWriter(index: Int, field: LuceneDocumentField, document : Document) extends LuceneFieldWriter { 50 | override def write(row: Row): Unit = { 51 | if (!row.isNullAt(index)) { 52 | field.addTo(document) 53 | field.setLongValue(row.getLong(index)) 54 | } 55 | } 56 | } 57 | 58 | private final class DoubleFieldWriter(index: Int, field: LuceneDocumentField, document: Document) extends LuceneFieldWriter { 59 | override def write(row: Row): Unit = { 60 | if (!row.isNullAt(index)) { 61 | field.addTo(document) 62 | field.setDoubleValue(row.getDouble(index)) 63 | } 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /sparkdatasourcetest/src/main/scala/net/ndolgov/sparkdatasourcetest/lucene/StoredFieldVisitorQuery.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.lucene 2 | 3 | import java.io.IOException 4 | 5 | import org.apache.lucene.index.{LeafReaderContext, StoredFieldVisitor} 6 | import org.apache.lucene.queries.{CustomScoreProvider, CustomScoreQuery} 7 | import org.apache.lucene.search.Query 8 | 9 | /** 10 | * Custom Lucene query using a visitor to collect values from matching Lucene documents 11 | */ 12 | final class StoredFieldVisitorQuery(val subQuery: Query, val processor : LuceneDocumentProcessor) extends CustomScoreQuery(subQuery) { 13 | @throws[IOException] 14 | override def getCustomScoreProvider(context : LeafReaderContext) : CustomScoreProvider = { 15 | new Provider(context, processor) 16 | } 17 | } 18 | 19 | trait LuceneDocumentProcessor { 20 | /** @return Lucene field visitor to apply to all matching documents */ 21 | def visitor() : StoredFieldVisitor 22 | 23 | /** Process Lucene document fields gathered by the [[visitor]] from the last seen document */ 24 | def onDocument() 25 | } 26 | 27 | @Override 28 | private final class Provider(val leafCtx : LeafReaderContext, val processor : LuceneDocumentProcessor) extends CustomScoreProvider(leafCtx) { 29 | val DEFAULT_SCORE: Int = 0 30 | val visitor : StoredFieldVisitor = processor.visitor() 31 | 32 | override def customScore(docId: Int, subQueryScore: Float, valSrcScore: Float): Float = { 33 | leafCtx.reader().document(docId, visitor) 34 | processor.onDocument() 35 | 36 | DEFAULT_SCORE 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /sparkdatasourcetest/src/main/scala/net/ndolgov/sparkdatasourcetest/sql/LuceneRelation.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.sql 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation, Filter, PrunedFilteredScan} 5 | import org.apache.spark.sql.types.StructType 6 | import org.apache.spark.sql.{Dataset, Row, SaveMode, SQLContext} 7 | 8 | /** 9 | * Searchable Lucene-based data storage of some application-specific scope (e.g. corresponding to a particular version of 10 | * a tenant's dataset; the client would be responsible for mapping (tenantId,version) pair to a relationDir) 11 | * 12 | * @param relationDir root location for all storage partitions 13 | * @param userSchema user-provided schema 14 | * @param sqlContext Spark context 15 | */ 16 | class LuceneRelation(relationDir: String, userSchema: LuceneSchema = null)(@transient val sqlContext: SQLContext) 17 | extends BaseRelation with PrunedFilteredScan with InsertableRelation { 18 | 19 | private val rddSchema : LuceneSchema = getSchema 20 | 21 | private val rdd : LuceneRDD = LuceneRDD(sqlContext.sparkContext, relationDir, rddSchema) 22 | 23 | private def getSchema: LuceneSchema = { 24 | if (userSchema == null) { 25 | LuceneSchema.open(LuceneRDD.schemaFilePath(relationDir).toUri.toString) 26 | } else { 27 | userSchema 28 | } 29 | } 30 | 31 | override def schema: StructType = rddSchema.sparkSchema() 32 | 33 | override def unhandledFilters(filters: Array[Filter]): Array[Filter] = rddSchema.unhandledFilters(filters) 34 | 35 | override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { 36 | rdd.pruneAndFilter(requiredColumns, filters) 37 | } 38 | 39 | override def insert(df: Dataset[Row], overwrite: Boolean): Unit = { 40 | df. 41 | write. 42 | format(LuceneDataSource.SHORT_NAME). 43 | option(LuceneDataSource.PATH, relationDir). 44 | mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append). 45 | save() 46 | } 47 | } 48 | 49 | -------------------------------------------------------------------------------- /sparkdatasourcetest/src/main/scala/net/ndolgov/sparkdatasourcetest/sql/package.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest 2 | 3 | import org.apache.spark.sql.{Dataset, Row, SQLContext} 4 | 5 | /** 6 | * Extend Spark API with LuceneRDD support 7 | */ 8 | package object sql { 9 | 10 | /** 11 | * Extend SQLContext API 12 | */ 13 | implicit class LuceneSqlContext(sqlContext : SQLContext) { 14 | /** 15 | * Load data from the Lucene-based storage at a given location to a data frame 16 | * @param path data storage location 17 | */ 18 | def luceneTable(path: String): Unit = { 19 | sqlContext.baseRelationToDataFrame(new LuceneRelation(path)(sqlContext)) 20 | } 21 | } 22 | 23 | /** 24 | * Extend DataFrame API 25 | */ 26 | implicit class LuceneDataFrame(df: Dataset[Row]) { 27 | /** 28 | * Save a data frame to the Lucene-based storage 29 | * @param path storage location for the given dataset 30 | * @param luceneSchema data schema 31 | */ 32 | def saveAsLuceneIndex(path : String, luceneSchema : String): Unit = { 33 | LuceneRDD(df, luceneSchema).save(path) 34 | } 35 | 36 | /** 37 | * @return the number of rows in the data frame counted in a more efficient way than the default one (that requires 38 | * the entire dataset to be moved to the driver before counting) 39 | */ 40 | def countRows(): Long = { 41 | LuceneRDD(df).count() 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /sparkdatasourcetest/src/test/scala/net/ndolgov/sparkdatasourcetest/sql/LuceneSchemaTestSuit.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.sql 2 | 3 | import java.io.File 4 | 5 | import org.apache.spark.sql.types.StructType 6 | import org.scalatest.{Assertions, FlatSpec} 7 | 8 | final class LuceneSchemaTestSuit extends FlatSpec with Assertions { 9 | "A schema written to a file" should "be read back" in { 10 | val sparkSchema : StructType = LuceneDataSourceTestEnv.defaultSchema 11 | val luceneSchema : Array[FieldType] = Array[FieldType](FieldType.INDEXED, FieldType.QUERYABLE, FieldType.STORED) 12 | val original = LuceneSchema(sparkSchema, FieldType.toString(luceneSchema)) 13 | 14 | val filePath: String = "target/testschema" + System.currentTimeMillis() + ".txt" 15 | LuceneSchema.save(original, filePath) 16 | 17 | val retrieved = LuceneSchema.open(filePath) 18 | 19 | assert(retrieved.size == 3) 20 | for (i <- 0 to 2) { 21 | assert(retrieved.sparkFieldType(i) == sparkSchema.fields(i).dataType) 22 | assert(retrieved.luceneFieldType(i) == luceneSchema(i)) 23 | } 24 | 25 | new File(filePath).delete() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/README.md: -------------------------------------------------------------------------------- 1 | #### Spark Data Source V2 example 2 | 3 | This example is intended to demonstrate typical interactions with the Data Source API V2. 4 | 5 | Lucene indices are used for persistent storage to make the challenge of integration realistic while still simple. 6 | 7 | The local file system is currently used to simplify the code. 8 | 9 | When writing data 10 | * store each partition as an individual Lucene index in a subdirectory of the same local directory shared by all executors. 11 | 12 | When reading data 13 | * push predicates down to the Lucene engine as a [BooleanQuery](https://lucene.apache.org/core/7_2_1/core/org/apache/lucene/search/BooleanQuery.html) 14 | * prune columns to retrieve only explicitly requested Lucene document fields 15 | 16 | ##### Running locally 17 | 18 | * ```mvn clean test``` to build the data source and execute a few test queries against it -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | net.ndolgov.sparkdatasourcetest.connector.LuceneDataSourceV2 2 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/connector/FilePaths.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.connector 2 | 3 | import net.ndolgov.sparkdatasourcetest.lucene.LuceneSchema 4 | import org.apache.hadoop.fs.Path 5 | 6 | object FilePaths { 7 | private val PART : String = "/part-" 8 | 9 | /** 10 | * @param rddDir RDD root path 11 | * @param index RDD partition index 12 | * @return RDD partition path 13 | */ 14 | def partitionPath(rddDir: String, index : Long) : Path = new Path(partitionDir(rddDir, index)) 15 | 16 | /** 17 | * @param rddDir RDD root path 18 | * @param index RDD partition index 19 | * @return RDD partition path 20 | */ 21 | def partitionDir(rddDir: String, index : Long) : String = rddDir + PART + "%05d".format(index) 22 | 23 | /** 24 | * @param rddDir RDD root path 25 | * @return schema file path 26 | */ 27 | def schemaFilePath(rddDir: String) : Path = new Path(rddDir + "/" + LuceneSchema.SCHEMA_FILE_NAME) 28 | } 29 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/connector/FileUtils.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.connector 2 | 3 | import java.io.{File, FileFilter} 4 | 5 | /** For simplicity, avoid using apache commons-style dependencies */ 6 | object FileUtils { 7 | def mkDir(path: String): File = { 8 | val dir = new File(path) 9 | if (dir.mkdir()) dir else throw new RuntimeException("Could not create dir: " + path) 10 | } 11 | 12 | def deleteRecursively(file: File): Boolean = { 13 | if (file.isDirectory) { 14 | file.listFiles().forall(file => deleteRecursively(file)) 15 | } else { 16 | if (file.exists) file.delete() else true 17 | } 18 | } 19 | 20 | def listSubDirs(path: String): Array[String] = { 21 | val dir = new File(path) 22 | 23 | if (dir.isDirectory) { 24 | dir.listFiles(new FileFilter { 25 | override def accept(subDir: File): Boolean = subDir.getName.startsWith("part-") 26 | }).map((subDir: File) => subDir.getAbsolutePath) 27 | } else { 28 | throw new RuntimeException("Not a dir: " + path) 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/connector/LuceneDataSourceV2.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.connector 2 | 3 | import java.util.Optional 4 | 5 | import net.ndolgov.sparkdatasourcetest.lucene.LuceneSchema 6 | import org.apache.spark.sql.SaveMode 7 | import org.apache.spark.sql.sources.DataSourceRegister 8 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader 9 | import org.apache.spark.sql.sources.v2.writer.DataSourceWriter 10 | import org.apache.spark.sql.sources.v2.{DataSourceV2, DataSourceOptions, ReadSupport, WriteSupport} 11 | import org.apache.spark.sql.types.StructType 12 | 13 | final class LuceneDataSourceV2 extends DataSourceV2 with ReadSupport with WriteSupport with DataSourceRegister { 14 | import LuceneDataSourceV2._ 15 | 16 | override def createReader(options: DataSourceOptions): DataSourceReader = 17 | LuceneDataSourceV2Reader(path(options)) 18 | 19 | override def createWriter(jobId: String, schema: StructType, mode: SaveMode, options: DataSourceOptions): Optional[DataSourceWriter] = 20 | Optional.of( 21 | LuceneDataSourceV2Writer( 22 | path(options), 23 | LuceneSchema(schema, luceneSchema(options)))) 24 | 25 | override def shortName(): String = SHORT_NAME 26 | } 27 | 28 | object LuceneDataSourceV2 { 29 | /** alternative/long format name for when "META-INF.services" trick is not used */ 30 | val FORMAT : String = "net.ndolgov.sparkdatasourcev2test.sql" 31 | 32 | /** The default/short format name, usage example: "df.write.format(LuceneDataSource.SHORT_NAME)" */ 33 | val SHORT_NAME : String = "LuceneDataSourceV2" 34 | 35 | /** The root directory (presumably in a DFS) for Lucene data storage item. IRL would be based on tenantId/data version/etc */ 36 | val PATH : String = "path" 37 | 38 | /** The Lucene schema that describes which columns are indexed and/or stored */ 39 | val LUCENE_SCHEMA : String = "lucene.schema" 40 | 41 | private def path(options: DataSourceOptions): String = { 42 | stringArg(PATH, options) 43 | } 44 | 45 | private def luceneSchema(options: DataSourceOptions): String = { 46 | stringArg(LUCENE_SCHEMA, options) 47 | } 48 | 49 | private def stringArg(key : String, options: DataSourceOptions): String = { 50 | val mayBy = options.get(key) 51 | if (mayBy.isPresent) mayBy.get() else throw new IllegalArgumentException("Option is missing: " + key) 52 | } 53 | } 54 | 55 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/connector/LuceneDataSourceV2Writer.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.connector 2 | 3 | import net.ndolgov.sparkdatasourcetest.lucene.{LuceneIndexWriter, LuceneSchema} 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.sources.v2.writer.{DataSourceWriter, DataWriter, DataWriterFactory, WriterCommitMessage} 6 | 7 | /** Lucene data source write path */ 8 | private final class LuceneDataSourceV2Writer(path: String, schema: LuceneSchema) extends DataSourceWriter { 9 | override def createWriterFactory(): DataWriterFactory[InternalRow] = { 10 | FileUtils.mkDir(path) 11 | new LuceneDataWriterFactory(path, schema) 12 | } 13 | 14 | override def commit(messages: Array[WriterCommitMessage]): Unit = { 15 | // the same schema for all the partitions 16 | LuceneSchema.save(schema, FilePaths.schemaFilePath(path).toUri.toString) 17 | } 18 | 19 | override def abort(messages: Array[WriterCommitMessage]): Unit = { 20 | // todo delete partition dirs? 21 | } 22 | } 23 | 24 | private final class LuceneDataWriterFactory(rddDir: String, schema: LuceneSchema) extends DataWriterFactory[InternalRow] { 25 | override def createDataWriter(partitionId: Int, taskId: Long, epochId: Long): DataWriter[InternalRow] = 26 | new LuceneDataWriter(FilePaths.partitionDir(rddDir, partitionId), schema) //todo taskId in file paths 27 | } 28 | 29 | private final class LuceneDataWriter(partitionDir: String, schema: LuceneSchema) extends DataWriter[InternalRow] { 30 | private val rddDir = FileUtils.mkDir(partitionDir) 31 | 32 | private val writer = LuceneIndexWriter(partitionDir, schema) 33 | 34 | override def write(row: InternalRow): Unit = writer.write(row) 35 | 36 | override def commit(): WriterCommitMessage = { 37 | close() 38 | LuceneWriterCommitMessage() 39 | } 40 | 41 | override def abort(): Unit = { 42 | close() 43 | FileUtils.deleteRecursively(rddDir) 44 | } 45 | 46 | private def close(): Unit = writer.close() 47 | } 48 | 49 | case class LuceneWriterCommitMessage() extends WriterCommitMessage 50 | 51 | object LuceneDataSourceV2Writer { 52 | def apply(path: String, schema: LuceneSchema) : DataSourceWriter = new LuceneDataSourceV2Writer(path, schema) 53 | } 54 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/lucene/LuceneFieldReader.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.lucene 2 | 3 | import org.apache.spark.sql.sources.Filter 4 | import org.apache.spark.sql.types.{DoubleType, LongType, StructField} 5 | 6 | /** 7 | * Read a field value from the current Lucene document to the current Spark Row 8 | */ 9 | trait LuceneFieldReader { 10 | def readLong(value : Long) : Unit = throw new UnsupportedOperationException 11 | 12 | def readDouble(value : Double) : Unit = throw new UnsupportedOperationException 13 | } 14 | 15 | object LuceneFieldReader { 16 | 17 | // todo retrieve fields for (filters - columns) attrs? 18 | def apply(columns: Seq[String], filters: Array[Filter], schema: LuceneSchema, row : Array[Any]) : Array[LuceneFieldReader] = { 19 | val readers : Array[LuceneFieldReader] = Array.ofDim[LuceneFieldReader](columns.length) 20 | 21 | var schemaIndex : Int = 0 22 | var outputIndex : Int = 0 23 | for (field <- schema.sparkSchema()) { 24 | if (columns.contains(field.name)) { 25 | readers(outputIndex) = apply(field, outputIndex, row) 26 | outputIndex += 1 27 | } 28 | schemaIndex += 1 29 | } 30 | 31 | readers 32 | } 33 | 34 | private def apply(sparkField: StructField, outputIndex : Int, row: Array[Any]): LuceneFieldReader = { 35 | sparkField.dataType match { 36 | case LongType => new LongReader(outputIndex, row) 37 | case DoubleType => new DoubleReader(outputIndex, row) 38 | case _ => throw new IllegalArgumentException("Unsupported field type: " + sparkField.dataType); 39 | } 40 | } 41 | 42 | private final class LongReader(index : Int, row: Array[Any]) extends LuceneFieldReader { 43 | override def readLong(value : Long) : Unit = { 44 | row(index) = value 45 | } 46 | } 47 | 48 | private final class DoubleReader(index : Int, row: Array[Any]) extends LuceneFieldReader { 49 | override def readDouble(value : Double) : Unit = { 50 | row(index) = value 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/lucene/LuceneFieldWriter.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.lucene 2 | 3 | import org.apache.lucene.document.Document 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.types.{DoubleType, LongType} 6 | 7 | /** 8 | * Write a Spark Row to a Lucene index as a document 9 | */ 10 | trait LuceneFieldWriter { 11 | def write(row : InternalRow) 12 | } 13 | 14 | private final class RowWriter(writers : Seq[LuceneFieldWriter], document : Document) extends LuceneFieldWriter { 15 | override def write(row: InternalRow): Unit = { 16 | writers.foreach((writer: LuceneFieldWriter) => writer.write(row)) 17 | } 18 | } 19 | 20 | private final class LongFieldWriter(index: Int, field: LuceneDocumentField, document : Document) extends LuceneFieldWriter { 21 | override def write(row: InternalRow): Unit = { 22 | if (!row.isNullAt(index)) { 23 | field.addTo(document) 24 | field.setLongValue(row.getLong(index)) 25 | } 26 | } 27 | } 28 | 29 | private final class DoubleFieldWriter(index: Int, field: LuceneDocumentField, document: Document) extends LuceneFieldWriter { 30 | override def write(row: InternalRow): Unit = { 31 | if (!row.isNullAt(index)) { 32 | field.addTo(document) 33 | field.setDoubleValue(row.getDouble(index)) 34 | } 35 | } 36 | } 37 | 38 | object LuceneFieldWriter { 39 | /** 40 | * @param schema document schema 41 | * @param document document instance to reuse 42 | * @return a new writer 43 | */ 44 | def apply(schema : LuceneSchema, document : Document) : LuceneFieldWriter = { 45 | val writers : Array[LuceneFieldWriter] = Array.ofDim[LuceneFieldWriter](schema.size) 46 | 47 | var index : Int = 0 48 | for (_ <- schema.sparkSchema()) { 49 | writers(index) = apply(schema, index, document) 50 | index += 1 51 | } 52 | 53 | new RowWriter(writers, document) 54 | } 55 | 56 | private def apply(schema : LuceneSchema, index : Int, document : Document) : LuceneFieldWriter = { 57 | val field : LuceneDocumentField = LuceneFieldFactory(schema, index) 58 | 59 | schema.sparkFieldType(index) match { 60 | case LongType => new LongFieldWriter(index, field, document) 61 | case DoubleType => new DoubleFieldWriter(index, field, document) 62 | case _ => throw new IllegalArgumentException("Unsupported field type: " + field); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/lucene/LuceneIndexWriter.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.lucene 2 | 3 | import java.io.File 4 | 5 | import org.apache.lucene.analysis.core.KeywordAnalyzer 6 | import org.apache.lucene.document.Document 7 | import org.apache.lucene.index.{IndexWriter, IndexWriterConfig} 8 | import org.apache.lucene.store.{Directory, MMapDirectory} 9 | import org.apache.spark.sql.catalyst.InternalRow 10 | import org.slf4j.{Logger, LoggerFactory} 11 | 12 | /** 13 | * Write a sequence of Rows conforming to a provided schema into a new Lucene index at a given location 14 | */ 15 | object LuceneIndexWriter { 16 | val logger: Logger = LoggerFactory.getLogger(LuceneIndexWriter.getClass) 17 | 18 | def apply(indexDir: String, schema: LuceneSchema): LuceneIndexWriter = { 19 | logger.info("Creating Lucene index in: " + indexDir) 20 | 21 | val directory: Directory = new MMapDirectory(new File(indexDir).toPath) 22 | val indexWriter = new IndexWriter(directory, new IndexWriterConfig(new KeywordAnalyzer)) 23 | 24 | val document = new Document() 25 | val fieldWriter = LuceneFieldWriter(schema, document) 26 | 27 | new LuceneIndexWriterImpl(indexWriter, fieldWriter, directory, document) 28 | } 29 | } 30 | 31 | trait LuceneIndexWriter { 32 | def write(row: InternalRow) 33 | 34 | def close() 35 | } 36 | 37 | /** 38 | * Write a new fixed-schema Lucene document for every given row 39 | */ 40 | private final class LuceneIndexWriterImpl(indexWriter : IndexWriter, 41 | fieldWriter : LuceneFieldWriter, 42 | directory: Directory, 43 | document : Document) extends LuceneIndexWriter { 44 | import LuceneIndexWriter.logger 45 | 46 | def write(row: InternalRow) : Unit = { 47 | document.clear() 48 | 49 | fieldWriter.write(row) 50 | indexWriter.addDocument(document) 51 | } 52 | 53 | override def close(): Unit = { 54 | try { 55 | indexWriter.close() 56 | } catch { 57 | case _: Exception => logger.warn("Could not close index writer") 58 | } 59 | 60 | try { 61 | directory.close() 62 | } catch { 63 | case _: Exception => logger.warn("Could not close index directory") 64 | } 65 | 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/main/scala/net/ndolgov/sparkdatasourcetest/lucene/StoredFieldVisitorQuery.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.lucene 2 | 3 | import java.io.IOException 4 | 5 | import org.apache.lucene.index.{LeafReaderContext, StoredFieldVisitor} 6 | import org.apache.lucene.queries.{CustomScoreProvider, CustomScoreQuery} 7 | import org.apache.lucene.search.Query 8 | 9 | /** 10 | * Custom Lucene query using a visitor to collect values from matching Lucene documents 11 | */ 12 | final class StoredFieldVisitorQuery(val subQuery: Query, val processor : LuceneDocumentProcessor) extends CustomScoreQuery(subQuery) { 13 | @throws[IOException] 14 | override def getCustomScoreProvider(context : LeafReaderContext) : CustomScoreProvider = { 15 | new Provider(context, processor) 16 | } 17 | } 18 | 19 | trait LuceneDocumentProcessor { 20 | /** @return Lucene field visitor to apply to all matching documents */ 21 | def visitor() : StoredFieldVisitor 22 | 23 | /** Process Lucene document fields gathered by the [[visitor]] from the last seen document */ 24 | def onDocument() 25 | } 26 | 27 | @Override 28 | private final class Provider(val leafCtx : LeafReaderContext, val processor : LuceneDocumentProcessor) extends CustomScoreProvider(leafCtx) { 29 | val DEFAULT_SCORE: Int = 0 30 | val visitor : StoredFieldVisitor = processor.visitor() 31 | 32 | override def customScore(docId: Int, subQueryScore: Float, valSrcScore: Float): Float = { 33 | leafCtx.reader().document(docId, visitor) 34 | processor.onDocument() 35 | 36 | DEFAULT_SCORE 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | ] > 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /sparkdatasourcev2test/src/test/scala/net/ndolgov/sparkdatasourcetest/connector/LuceneSchemaTestSuit.scala: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sparkdatasourcetest.connector 2 | 3 | import java.io.File 4 | 5 | import net.ndolgov.sparkdatasourcetest.connector.LuceneDataSourceTestEnv.DocumentField 6 | import net.ndolgov.sparkdatasourcetest.lucene.{FieldType, LuceneField, LuceneSchema} 7 | import org.apache.spark.sql.types.StructType 8 | import org.scalatest.{Assertions, FlatSpec} 9 | 10 | final class LuceneSchemaTestSuit extends FlatSpec with Assertions { 11 | "A schema written to a file" should "be read back" in { 12 | val METRIC = DocumentField.METRIC.name 13 | val TIME = DocumentField.TIME.name 14 | val VALUE = DocumentField.VALUE.name 15 | 16 | val sparkSchema : StructType = LuceneDataSourceTestEnv.defaultSchema 17 | 18 | val luceneSchema : Array[LuceneField] = Array[LuceneField]( 19 | LuceneField(METRIC, FieldType.QUERYABLE), 20 | LuceneField(TIME, FieldType.INDEXED), 21 | LuceneField(VALUE, FieldType.STORED)) 22 | 23 | val original = LuceneSchema(sparkSchema, LuceneField.toString(luceneSchema)) 24 | 25 | val filePath: String = "target/testschema" + System.currentTimeMillis() + ".txt" 26 | LuceneSchema.save(original, filePath) 27 | 28 | val retrieved = LuceneSchema.open(filePath) 29 | 30 | assert(retrieved.size == 3) 31 | for (i <- 0 to 2) { 32 | assert(retrieved.sparkField(i) == sparkSchema.fields(i)) 33 | assert(retrieved.luceneField(i) == luceneSchema(i)) 34 | } 35 | 36 | new File(filePath).delete() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/AsyncSqsClient.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import com.amazonaws.services.sqs.model.Message; 4 | 5 | import java.util.List; 6 | import java.util.function.Function; 7 | 8 | public interface AsyncSqsClient { 9 | void receive(String queueUrl, int maxMessages, int visibilityTimeout, Function, Void> handler); 10 | 11 | void delete(String queueUrl, String handle); 12 | 13 | void renew(String queueUrl, String handle, int visibilityTimeout, Function handler); 14 | 15 | void close(); 16 | 17 | interface AsyncSqsClientCallback { 18 | void onSuccess(String handle); 19 | 20 | void onFailure(String message); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/ConcurrentMapMessageRepository.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import java.util.concurrent.ConcurrentHashMap; 4 | import java.util.concurrent.ConcurrentMap; 5 | 6 | public final class ConcurrentMapMessageRepository implements MessageRepository { 7 | private final ConcurrentMap handleToQueue; 8 | 9 | public ConcurrentMapMessageRepository() { 10 | this.handleToQueue = new ConcurrentHashMap<>(64); 11 | } 12 | 13 | @Override 14 | public String get(String handle) { 15 | return handleToQueue.get(handle); 16 | } 17 | 18 | @Override 19 | public String remove(String handle) { 20 | return handleToQueue.remove(handle); 21 | } 22 | 23 | @Override 24 | public String put(String handle, String queueUrl) { 25 | handleToQueue.put(handle, queueUrl); 26 | return queueUrl; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/Handler.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import net.ndolgov.sqstest.AsyncSqsClient.AsyncSqsClientCallback; 4 | 5 | public interface Handler { 6 | /** 7 | * Process a message asynchronously on a thread other than the one calling this method 8 | * @param message message 9 | * @param callback message processing status listener 10 | */ 11 | void handle(String message, AsyncSqsClientCallback callback); 12 | 13 | /** 14 | * @return how many messages this handler can process in one batch 15 | */ 16 | default int getRemainingCapacity() { 17 | return 1; 18 | } 19 | 20 | /** 21 | * @return AWS SQS visibility timeout for this handler, [sec] 22 | */ 23 | default int getVisibilityTimeout() { 24 | return 300; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/MessageHandler.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import net.ndolgov.sqstest.AsyncSqsClient.AsyncSqsClientCallback; 4 | 5 | import java.util.concurrent.ExecutorService; 6 | import java.util.concurrent.atomic.AtomicInteger; 7 | 8 | public final class MessageHandler implements Handler { 9 | private final ExecutorService executor; 10 | 11 | private final AtomicInteger jobInProgressCounter = new AtomicInteger(); 12 | 13 | private final int visibilityTimeout; 14 | 15 | private final int totalCapacity; 16 | 17 | public MessageHandler(int visibilityTimeout, ExecutorService executor) { 18 | this.visibilityTimeout = visibilityTimeout; 19 | this.executor = executor; 20 | 21 | this.totalCapacity = Runtime.getRuntime().availableProcessors(); 22 | } 23 | 24 | @Override 25 | public void handle(String message, AsyncSqsClientCallback callback) { 26 | jobInProgressCounter.incrementAndGet(); 27 | 28 | executor.submit((Runnable) () -> { 29 | try { 30 | final long startedAt = System.currentTimeMillis(); 31 | 32 | // todo process message 33 | 34 | callback.onSuccess(""); 35 | } catch (Exception e) { 36 | callback.onFailure(e.getMessage()); 37 | } finally { 38 | jobInProgressCounter.decrementAndGet(); 39 | } 40 | }); 41 | } 42 | 43 | @Override 44 | public int getRemainingCapacity() { 45 | return Math.max(totalCapacity - jobInProgressCounter.get(), 0); 46 | } 47 | 48 | @Override 49 | public int getVisibilityTimeout() { 50 | return visibilityTimeout; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/MessageRepository.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | /** 4 | * Remember all the messages currently being processed 5 | */ 6 | public interface MessageRepository { 7 | String get(String handle); 8 | 9 | String remove(String handle); 10 | 11 | String put(String handle, String queueUrl); 12 | } 13 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/SqsQueuePoller.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import com.amazonaws.services.sqs.model.Message; 4 | 5 | public final class SqsQueuePoller { 6 | private static final double SAFETY_MARGIN = 0.9; 7 | private final AsyncSqsClient sqsClient; 8 | private final MessageRepository repository; 9 | private final VisibilityTimeoutTracker tracker; 10 | 11 | public SqsQueuePoller(AsyncSqsClient sqsClient, MessageRepository repository, VisibilityTimeoutTracker tracker) { 12 | this.sqsClient = sqsClient; 13 | this.repository = repository; 14 | this.tracker = tracker; 15 | } 16 | 17 | public void poll(String queueUrl, Handler handler) { 18 | final int maxMessages = handler.getRemainingCapacity(); 19 | if (maxMessages > 0) { 20 | sqsClient.receive(queueUrl, maxMessages, handler.getVisibilityTimeout(), messages -> { 21 | for (Message message : messages) { 22 | handle(queueUrl, message, handler); 23 | } 24 | 25 | return null; 26 | }); 27 | } 28 | } 29 | 30 | private void handle(String queueUrl, Message message, Handler handler) { 31 | final String handle = message.getReceiptHandle(); 32 | 33 | repository.put(message.getReceiptHandle(), queueUrl); 34 | 35 | tracker.track(handle, (int) (handler.getVisibilityTimeout() * SAFETY_MARGIN)); 36 | 37 | handler.handle(message.getBody(), new AsyncSqsClient.AsyncSqsClientCallback() { 38 | @Override 39 | public void onSuccess(String handle) { 40 | sqsClient.delete(repository.remove(handle), handle); 41 | } 42 | 43 | @Override 44 | public void onFailure(String handle) { 45 | sqsClient.delete(repository.remove(handle), handle); 46 | } 47 | }); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/VisibilityTimeoutTracker.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import java.util.concurrent.Future; 4 | 5 | /** 6 | * Periodically reset visibility timeout for message still being processed 7 | */ 8 | public interface VisibilityTimeoutTracker { 9 | /** 10 | * @param handle message handle 11 | * @param timeout the next visibility timeout 12 | * @return the job to run after a given timeout 13 | */ 14 | Future track(String handle, int timeout); 15 | } 16 | -------------------------------------------------------------------------------- /sqstest/src/main/java/net/ndolgov/sqstest/VisibilityTimeoutTrackerImpl.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import org.slf4j.Logger; 4 | 5 | import java.util.concurrent.Future; 6 | import java.util.concurrent.ScheduledExecutorService; 7 | import java.util.concurrent.TimeUnit; 8 | 9 | public final class VisibilityTimeoutTrackerImpl implements VisibilityTimeoutTracker { 10 | private final Logger logger; 11 | 12 | private final ScheduledExecutorService scheduler; 13 | 14 | private final MessageRepository repository; 15 | 16 | private final AsyncSqsClient sqsClient; 17 | 18 | public VisibilityTimeoutTrackerImpl(Logger logger, MessageRepository repository, AsyncSqsClient sqsClient, ScheduledExecutorService scheduler) { 19 | this.logger = logger; 20 | this.repository = repository; 21 | this.sqsClient = sqsClient; 22 | this.scheduler = scheduler; 23 | } 24 | 25 | @Override 26 | public Future track(String handle, int timeout) { 27 | return scheduler.schedule(() -> { 28 | logger.info("Extending visibility of message: " + handle + " by seconds: " + timeout); 29 | 30 | final String queueUrl = repository.get(handle); 31 | if (queueUrl == null) { 32 | logger.info("Message was already processed: " + handle); 33 | } else { 34 | sqsClient.renew(queueUrl, handle, timeout, unit -> { 35 | track(handle, timeout); 36 | return null; 37 | }); 38 | } 39 | }, 40 | timeout, 41 | TimeUnit.SECONDS); 42 | } 43 | } -------------------------------------------------------------------------------- /sqstest/src/test/java/net/ndolgov/sqstest/SqsQueuePollerTest.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.sqstest; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | import org.testng.annotations.Test; 6 | 7 | import static java.util.concurrent.Executors.newSingleThreadExecutor; 8 | import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor; 9 | 10 | /** 11 | * Only an example of wiring up, the test itself will fail 12 | */ 13 | public final class SqsQueuePollerTest { 14 | private static final Logger logger = LoggerFactory.getLogger(SqsQueuePollerTest.class); 15 | private static final int VISIBILITY_TUMEOUT = 900; 16 | private static final String QUEUE_URL = "http://sqs.us-east-1.amazonaws.com/123456789012/queue2"; 17 | 18 | @Test 19 | public void testOneQueuePollingSetup() { 20 | final AsyncSqsClientImpl sqsClient = new AsyncSqsClientImpl(logger, VISIBILITY_TUMEOUT); 21 | final ConcurrentMapMessageRepository repository = new ConcurrentMapMessageRepository(); 22 | 23 | final SqsQueuePoller poller = new SqsQueuePoller( 24 | sqsClient, 25 | repository, 26 | new VisibilityTimeoutTrackerImpl(logger, repository, sqsClient, newSingleThreadScheduledExecutor())); 27 | 28 | poller.poll( 29 | QUEUE_URL, 30 | new MessageHandler(VISIBILITY_TUMEOUT, newSingleThreadExecutor())); 31 | 32 | sqsClient.close(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /thriftrpctest/src/main/java/net/ndolgov/thriftrpctest/ClientFactory.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.thriftrpctest; 2 | 3 | /** 4 | * Asynchronous Thrift service client factory 5 | */ 6 | public interface ClientFactory { 7 | /** 8 | * @param definition service definition to create client for 9 | * @param hostname server host name (IRL obtained from some discovery service) 10 | * @param Thrift-generated service interface type 11 | * @return newly created service client 12 | */ 13 | T create(ServiceDefinition definition, String hostname); 14 | 15 | void close(); 16 | } 17 | -------------------------------------------------------------------------------- /thriftrpctest/src/main/java/net/ndolgov/thriftrpctest/HandlerFactory.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.thriftrpctest; 2 | 3 | /** 4 | * Asynchronous Thrift service server-side handler factory 5 | */ 6 | public interface HandlerFactory { 7 | /** 8 | * @param definition service definition to create server-side handler for 9 | * @param Thrift-generated service interface type 10 | * @return newly created service request handler 11 | */ 12 | T handler(ServiceDefinition definition); 13 | } 14 | -------------------------------------------------------------------------------- /thriftrpctest/src/main/java/net/ndolgov/thriftrpctest/MultiplexedClientFactory.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.thriftrpctest; 2 | 3 | import org.apache.thrift.async.TAsyncClientManager; 4 | import org.apache.thrift.protocol.TBinaryProtocol; 5 | import org.apache.thrift.protocol.TMultiplexedProtocol; 6 | import org.apache.thrift.protocol.TProtocol; 7 | import org.apache.thrift.protocol.TProtocolFactory; 8 | import org.apache.thrift.transport.TNonblockingSocket; 9 | import org.apache.thrift.transport.TTransport; 10 | 11 | /** 12 | * Establish a connection to a given service instance 13 | */ 14 | public final class MultiplexedClientFactory implements ClientFactory { 15 | private final TAsyncClientManager manager; 16 | private final TProtocolFactory factory; 17 | private final int port; 18 | 19 | public MultiplexedClientFactory(int port) { 20 | this.port = port; 21 | 22 | factory = new TBinaryProtocol.Factory(); 23 | 24 | try { 25 | manager = new TAsyncClientManager(); 26 | } catch (Exception e) { 27 | throw new IllegalArgumentException("Could not create client manager", e); 28 | } 29 | } 30 | 31 | @Override 32 | public T create(ServiceDefinition definition, String hostname) { 33 | try { 34 | final TProtocolFactory pfactory = new TMultiplexedProtocolFactory(factory, definition.getName()); // todo cache? 35 | return (T) definition.clientFactory(pfactory, manager).getAsyncClient(new TNonblockingSocket(hostname, port)); 36 | } catch (Exception e) { 37 | throw new RuntimeException("Could not create client to: " + hostname + " for service: " + definition, e); 38 | } 39 | } 40 | 41 | @Override 42 | public void close() { 43 | manager.stop(); 44 | } 45 | 46 | private final static class TMultiplexedProtocolFactory implements TProtocolFactory { 47 | private final TProtocolFactory factory; 48 | private final String name; 49 | 50 | public TMultiplexedProtocolFactory(TProtocolFactory factory, String name) { 51 | this.factory = factory; 52 | this.name = name; 53 | } 54 | 55 | public TProtocol getProtocol(TTransport transport) { 56 | return new TMultiplexedProtocol(factory.getProtocol(transport), name); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /thriftrpctest/src/test/java/net/ndolgov/thriftrpctest/Handler.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.thriftrpctest; 2 | 3 | import net.ndolgov.thriftrpctest.api.TestRequest; 4 | import net.ndolgov.thriftrpctest.api.TestResponse; 5 | import net.ndolgov.thriftrpctest.api.TestService; 6 | 7 | import org.apache.thrift.async.AsyncMethodCallback; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import java.util.concurrent.CompletableFuture; 12 | import java.util.concurrent.Executor; 13 | 14 | /** 15 | * Asynchronous RPC call handler illustrating how to process requests on a dedicated thread pool and 16 | * reply asynchronously once the future is finished. 17 | */ 18 | public final class Handler implements TestService.AsyncIface { 19 | private static final Logger logger = LoggerFactory.getLogger(Handler.class); 20 | 21 | public static final String RESULT = "RESULT"; 22 | 23 | private final Executor executor; 24 | 25 | public Handler(Executor executor) { 26 | this.executor = executor; 27 | } 28 | 29 | @Override 30 | public void process(TestRequest request, AsyncMethodCallback callback) { 31 | logger.info("Processing: " + request); 32 | 33 | final CompletableFuture future = CompletableFuture.supplyAsync( 34 | () -> { 35 | return "RESULT"; // todo this is where actual time-consuming processing would be 36 | }, 37 | executor); 38 | 39 | future.whenComplete((result, e) -> { 40 | if (e == null) { 41 | callback.onComplete(new TestResponse().setSuccess(true).setResult(result).setRequestId(request.getRequestId())); 42 | } else { 43 | callback.onComplete(new TestResponse().setSuccess(false).setRequestId(request.getRequestId())); 44 | } 45 | }); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /thriftrpctest/src/test/java/net/ndolgov/thriftrpctest/Handler2.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.thriftrpctest; 2 | 3 | import net.ndolgov.thriftrpctest.api.TestRequest2; 4 | import net.ndolgov.thriftrpctest.api.TestResponse2; 5 | import net.ndolgov.thriftrpctest.api.TestService2; 6 | import org.apache.thrift.async.AsyncMethodCallback; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.concurrent.CompletableFuture; 11 | import java.util.concurrent.Executor; 12 | 13 | /** 14 | * Asynchronous RPC call handler illustrating how to process requests on a dedicated thread pool and 15 | * reply asynchronously once the future is finished. 16 | */ 17 | public final class Handler2 implements TestService2.AsyncIface { 18 | private static final Logger logger = LoggerFactory.getLogger(Handler2.class); 19 | 20 | public static final String RESULT = "RESULT2"; 21 | 22 | private final Executor executor; 23 | 24 | public Handler2(Executor executor) { 25 | this.executor = executor; 26 | } 27 | 28 | @Override 29 | public void process(TestRequest2 request, AsyncMethodCallback callback) { 30 | logger.info("Processing: " + request); 31 | 32 | final CompletableFuture future = CompletableFuture.supplyAsync( 33 | () -> { 34 | return "RESULT2"; // todo this is where actual time-consuming processing would be 35 | }, 36 | executor); 37 | 38 | future.whenComplete((result, e) -> { 39 | if (e == null) { 40 | callback.onComplete(new TestResponse2().setSuccess(true).setResult(result).setRequestId(request.getRequestId())); 41 | } else { 42 | callback.onComplete(new TestResponse2().setSuccess(false).setRequestId(request.getRequestId())); 43 | } 44 | }); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /thriftrpctest/src/test/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /thriftrpctest/src/test/thrift/testsvc.thrift: -------------------------------------------------------------------------------- 1 | namespace java net.ndolgov.thriftrpctest.api 2 | 3 | struct TestRequest { 4 | 1: required i64 requestId 5 | } 6 | 7 | struct TestResponse { 8 | 1: required bool success 9 | 2: required i64 requestId 10 | 3: string result 11 | } 12 | 13 | service TestService { 14 | TestResponse process(TestRequest request) 15 | } -------------------------------------------------------------------------------- /thriftrpctest/src/test/thrift/testsvc2.thrift: -------------------------------------------------------------------------------- 1 | namespace java net.ndolgov.thriftrpctest.api 2 | 3 | struct TestRequest2 { 4 | 1: required i64 requestId 5 | } 6 | 7 | struct TestResponse2 { 8 | 1: required bool success 9 | 2: required i64 requestId 10 | 3: string result 11 | } 12 | 13 | service TestService2 { 14 | TestResponse2 process(TestRequest2 request) 15 | } -------------------------------------------------------------------------------- /timeseriescompressiontest/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | net.ndolgov 9 | timeseriescompressiontest 10 | 1.0.0-SNAPSHOT 11 | jar 12 | Time Series compression test 13 | 14 | 15 | 6.8.8 16 | 17 | 18 | 19 | 20 | org.testng 21 | testng 22 | ${testng.version} 23 | test 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /timeseriescompressiontest/src/test/java/net/ndolgov/timeseriescompression/CompressedTimeSeriesTest.java: -------------------------------------------------------------------------------- 1 | package net.ndolgov.timeseriescompression; 2 | 3 | import org.testng.annotations.Test; 4 | 5 | import static org.testng.Assert.assertEquals; 6 | 7 | public final class CompressedTimeSeriesTest { 8 | @Test 9 | public void testEmpty() { 10 | read(new byte[0], 0, new long[0], new double[0]); 11 | } 12 | 13 | @Test 14 | public void testMultipleDataPoints() { 15 | final int dpCount = 6; 16 | final byte[] buffer = new byte[16 * dpCount]; 17 | 18 | final CompressedTimeSeries timeSeries = new CompressedTimeSeries(buffer); 19 | final long[] originalTimestamps = {0, 60, 120, 180, 240, 300}; 20 | final double[] originalValues = {5.0, 6.0, 7.0, 7.0, 8.0, 0.3333}; 21 | 22 | for (int i = 0; i < originalValues.length; i++) { 23 | append(timeSeries, originalTimestamps[i], originalValues[i]); 24 | } 25 | 26 | final long[] retrievedTimes = new long[dpCount]; 27 | final double[] retrievedValues = new double[dpCount]; 28 | read(buffer, dpCount, retrievedTimes, retrievedValues); 29 | 30 | for (int i = 0; i < dpCount; i++) { 31 | assertEquals(originalTimestamps[i], retrievedTimes[i]); 32 | assertEquals(originalValues[i], retrievedValues[i]); 33 | } 34 | } 35 | 36 | private static void append(CompressedTimeSeries stream, long time, double value) { 37 | if (!stream.append(time, value)) { 38 | throw new IllegalArgumentException("timestamp:" + time); 39 | } 40 | } 41 | 42 | private static void read(byte[] data, int count, long[] times, double[] values) { 43 | if (count == 0) { 44 | return; 45 | } 46 | 47 | final CompressedTimeSeries timeSeries = new CompressedTimeSeries(data); 48 | 49 | times[0] = timeSeries.readFirstTimeStamp(); 50 | values[0] = timeSeries.readNextValue(); 51 | int readSoFar = 1; 52 | 53 | while (readSoFar < count) { 54 | times[readSoFar] = timeSeries.readNextTimestamp(); 55 | values[readSoFar] = timeSeries.readNextValue(); 56 | readSoFar++; 57 | } 58 | } 59 | } 60 | --------------------------------------------------------------------------------