├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── checkstyles ├── marmaray-0.0.1.xml └── suppressions.xml ├── docs └── images │ ├── ForkOperator_ForkFunction.png │ ├── High_Level_Architecture.png │ ├── Marmaray-Secondary.Logo.png │ ├── Marmaray_Primary.Logo_tagline.png │ ├── Marmaray_white_Primary.Logo_tagline.png │ ├── Marmaray_white_Secondary.Logo.png │ ├── Metadata_Manager.png │ ├── avro_payload_conversion.png │ └── end_to_end_job_flow.png ├── examples └── README.md ├── marmaray-tools ├── checkstyles ├── pom.xml └── src │ └── main │ ├── cli │ └── toggleHDFSMetadataFile.py │ └── java │ └── com │ └── uber │ └── marmaray │ └── tools │ ├── HDFSMetadataPrinter.java │ └── HDFSMetadataPruner.java ├── marmaray ├── build.gradle ├── checkstyles ├── config │ └── sample.yaml ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── uber │ │ └── marmaray │ │ ├── common │ │ ├── AvroPayload.java │ │ ├── DispersalLengthType.java │ │ ├── DispersalType.java │ │ ├── FileSinkType.java │ │ ├── HoodieErrorPayload.java │ │ ├── IPayload.java │ │ ├── MetadataManagerType.java │ │ ├── PartitionType.java │ │ ├── WorkUnit.java │ │ ├── actions │ │ │ ├── IJobDagAction.java │ │ │ ├── JobDagActions.java │ │ │ └── ReporterAction.java │ │ ├── configuration │ │ │ ├── AwsConfiguration.java │ │ │ ├── CassandraMetadataManagerConfiguration.java │ │ │ ├── CassandraSinkConfiguration.java │ │ │ ├── ConfigScopeResolver.java │ │ │ ├── Configuration.java │ │ │ ├── ErrorTableConfiguration.java │ │ │ ├── FileSinkConfiguration.java │ │ │ ├── FileSourceConfiguration.java │ │ │ ├── HDFSMetadataManagerConfiguration.java │ │ │ ├── HDFSSchemaServiceConfiguration.java │ │ │ ├── HadoopConfiguration.java │ │ │ ├── HiveConfiguration.java │ │ │ ├── HiveSourceConfiguration.java │ │ │ ├── HoodieConfiguration.java │ │ │ ├── HoodieIndexConfiguration.java │ │ │ ├── KafkaConfiguration.java │ │ │ ├── KafkaSourceConfiguration.java │ │ │ ├── LockManagerConfiguration.java │ │ │ ├── MetadataManagerConfiguration.java │ │ │ ├── RetryStrategyConfiguration.java │ │ │ ├── SimpleRetryStrategyConfiguration.java │ │ │ ├── SparkConfiguration.java │ │ │ └── ZookeeperConfiguration.java │ │ ├── converters │ │ │ ├── converterresult │ │ │ │ └── ConverterResult.java │ │ │ ├── data │ │ │ │ ├── AbstractDataConverter.java │ │ │ │ ├── CassandraSinkCQLDataConverter.java │ │ │ │ ├── CassandraSinkDataConverter.java │ │ │ │ ├── DummyHoodieSinkDataConverter.java │ │ │ │ ├── FileSinkDataCSVConverter.java │ │ │ │ ├── FileSinkDataConverter.java │ │ │ │ ├── FileSinkDataConverterFactory.java │ │ │ │ ├── FileSinkDataJSONConverter.java │ │ │ │ ├── HoodieSinkDataConverter.java │ │ │ │ ├── KafkaSourceDataConverter.java │ │ │ │ ├── SinkDataConverter.java │ │ │ │ ├── SourceDataConverter.java │ │ │ │ ├── SparkSourceDataConverter.java │ │ │ │ └── TSBasedHoodieSinkDataConverter.java │ │ │ └── schema │ │ │ │ ├── AbstractSchemaConverter.java │ │ │ │ ├── CassandraSchemaConverter.java │ │ │ │ └── DataFrameSchemaConverter.java │ │ ├── data │ │ │ ├── BinaryRawData.java │ │ │ ├── ErrorData.java │ │ │ ├── ForkData.java │ │ │ ├── IData.java │ │ │ ├── RDDWrapper.java │ │ │ ├── RawData.java │ │ │ ├── RawDataHelper.java │ │ │ └── ValidData.java │ │ ├── dataset │ │ │ ├── ErrorRecord.java │ │ │ ├── ExceptionRecord.java │ │ │ ├── MetricRecord.java │ │ │ ├── UtilRecord.java │ │ │ └── UtilTable.java │ │ ├── exceptions │ │ │ ├── ForkOperationException.java │ │ │ ├── InvalidDataException.java │ │ │ ├── JobRuntimeException.java │ │ │ ├── MetadataException.java │ │ │ ├── MissingPropertyException.java │ │ │ └── RetryException.java │ │ ├── forkoperator │ │ │ ├── FilterFunction.java │ │ │ ├── ForkFunction.java │ │ │ └── ForkOperator.java │ │ ├── job │ │ │ ├── Dag.java │ │ │ ├── DagPayload.java │ │ │ ├── ExecutionTimeJobExecutionStrategy.java │ │ │ ├── IJobExecutionStrategy.java │ │ │ ├── Job.java │ │ │ ├── JobDag.java │ │ │ ├── JobManager.java │ │ │ ├── JobSubDag.java │ │ │ ├── SingleSinkSubDag.java │ │ │ ├── ThreadPoolService.java │ │ │ └── ThreadPoolServiceTier.java │ │ ├── metadata │ │ │ ├── AbstractValue.java │ │ │ ├── CassandraBasedMetadataManager.java │ │ │ ├── HDFSDatePartitionManager.java │ │ │ ├── HDFSMetadataManager.java │ │ │ ├── HDFSPartitionManager.java │ │ │ ├── HoodieBasedMetadataManager.java │ │ │ ├── IMetadataManager.java │ │ │ ├── JobManagerMetadataTracker.java │ │ │ ├── MetadataConstants.java │ │ │ ├── MultiMetadataManager.java │ │ │ ├── NoOpMetadataManager.java │ │ │ └── StringValue.java │ │ ├── metrics │ │ │ ├── CassandraMetric.java │ │ │ ├── CassandraPayloadRDDSizeEstimator.java │ │ │ ├── ChargebackMetricType.java │ │ │ ├── DataFeedMetricNames.java │ │ │ ├── DataFeedMetrics.java │ │ │ ├── DoubleMetric.java │ │ │ ├── ErrorCauseTagNames.java │ │ │ ├── IChargebackCalculator.java │ │ │ ├── IMetricable.java │ │ │ ├── JobMetricNames.java │ │ │ ├── JobMetricType.java │ │ │ ├── JobMetrics.java │ │ │ ├── LongMetric.java │ │ │ ├── Metric.java │ │ │ ├── ModuleTagNames.java │ │ │ └── TimerMetric.java │ │ ├── reporters │ │ │ ├── ConsoleReporter.java │ │ │ ├── IKafkaDataLossReporter.java │ │ │ ├── IReporter.java │ │ │ ├── Reportable.java │ │ │ └── Reporters.java │ │ ├── retry │ │ │ ├── IFunctionThrowsException.java │ │ │ ├── IRetryStrategy.java │ │ │ ├── RetryableFunction.java │ │ │ └── SimpleRetryStrategy.java │ │ ├── schema │ │ │ ├── HDFSSchemaService.java │ │ │ ├── ISchemaService.java │ │ │ ├── ISinkSchemaManager.java │ │ │ └── cassandra │ │ │ │ ├── CassandraDataField.java │ │ │ │ ├── CassandraMetadataSchemaManager.java │ │ │ │ ├── CassandraPayload.java │ │ │ │ ├── CassandraSchema.java │ │ │ │ ├── CassandraSchemaField.java │ │ │ │ ├── CassandraSchemaManager.java │ │ │ │ ├── CassandraSinkSchemaManager.java │ │ │ │ └── ClusterKey.java │ │ ├── sinks │ │ │ ├── ISink.java │ │ │ ├── SinkStatManager.java │ │ │ ├── cassandra │ │ │ │ ├── CassandraClientSink.java │ │ │ │ ├── CassandraSSTableSink.java │ │ │ │ └── CassandraSink.java │ │ │ ├── file │ │ │ │ ├── AwsFileSink.java │ │ │ │ ├── FileSink.java │ │ │ │ └── HdfsFileSink.java │ │ │ └── hoodie │ │ │ │ ├── HoodieErrorSink.java │ │ │ │ ├── HoodieSink.java │ │ │ │ ├── HoodieSinkOperations.java │ │ │ │ ├── HoodieWriteStatus.java │ │ │ │ └── partitioner │ │ │ │ └── DefaultHoodieDataPartitioner.java │ │ ├── sources │ │ │ ├── IRunState.java │ │ │ ├── ISource.java │ │ │ ├── IWorkUnitCalculator.java │ │ │ ├── file │ │ │ │ ├── FileRunState.java │ │ │ │ ├── FileSource.java │ │ │ │ ├── FileSourceDataConverter.java │ │ │ │ ├── FileWorkUnitCalculator.java │ │ │ │ │ └── FileWorkUnitCalculator.java │ │ │ │ └── JSONFileSourceDataConverter.java │ │ │ ├── hive │ │ │ │ ├── HiveRunState.java │ │ │ │ ├── HiveSource.java │ │ │ │ ├── ParquetWorkUnitCalculator.java │ │ │ │ └── ParquetWorkUnitCalculatorResult.java │ │ │ └── kafka │ │ │ │ ├── IKafkaOffsetSelector.java │ │ │ │ ├── KafkaBootstrapOffsetSelector.java │ │ │ │ ├── KafkaOffsetResetter.java │ │ │ │ ├── KafkaRunState.java │ │ │ │ ├── KafkaSource.java │ │ │ │ ├── KafkaWorkUnitCalculator.java │ │ │ │ └── LogBasedKafkaDataLossReporter.java │ │ ├── spark │ │ │ ├── MarmarayKryoSerializer.java │ │ │ ├── SparkArgs.java │ │ │ └── SparkFactory.java │ │ └── status │ │ │ ├── BaseStatus.java │ │ │ ├── IStatus.java │ │ │ └── JobManagerStatus.java │ │ ├── examples │ │ └── job │ │ │ └── ParquetToCassandraJob.java │ │ └── utilities │ │ ├── ByteBufferUtil.java │ │ ├── CassandraSinkUtil.java │ │ ├── CommandLineUtil.java │ │ ├── ConfigUtil.java │ │ ├── ConverterUtil.java │ │ ├── DateUtil.java │ │ ├── ErrorExtractor.java │ │ ├── ErrorTableUtil.java │ │ ├── FSUtils.java │ │ ├── GenericRecordUtil.java │ │ ├── HoodieSinkConverterErrorExtractor.java │ │ ├── HoodieSinkErrorExtractor.java │ │ ├── HoodieUtil.java │ │ ├── JobUtil.java │ │ ├── JsonSourceConverterErrorExtractor.java │ │ ├── KafkaSourceConverterErrorExtractor.java │ │ ├── KafkaUtil.java │ │ ├── LockManager.java │ │ ├── LongAccumulator.java │ │ ├── MapUtil.java │ │ ├── NumberConstants.java │ │ ├── ScalaUtil.java │ │ ├── SchemaUtil.java │ │ ├── SizeUnit.java │ │ ├── SparkUtil.java │ │ ├── StringTypes.java │ │ ├── StringUtil.java │ │ ├── TimeUnitUtil.java │ │ ├── TimestampInfo.java │ │ ├── cluster │ │ └── CassandraClusterInfo.java │ │ └── listener │ │ ├── SparkEventListener.java │ │ ├── SparkJobTracker.java │ │ └── TimeoutManager.java │ └── test │ ├── java │ └── com │ │ └── uber │ │ └── marmaray │ │ ├── TestSparkUtil.java │ │ ├── common │ │ ├── actions │ │ │ └── TestJobDagActions.java │ │ ├── configuration │ │ │ ├── TestAwsConfiguration.java │ │ │ ├── TestCassandraSinkConfiguration.java │ │ │ ├── TestConfigScopeResolver.java │ │ │ ├── TestConfiguration.java │ │ │ ├── TestErrorTableConfiguration.java │ │ │ ├── TestFileSinkConfiguration.java │ │ │ ├── TestHadoopConfiguration.java │ │ │ ├── TestHoodieConfiguration.java │ │ │ ├── TestHoodieIndexConfiguration.java │ │ │ ├── TestKafkaConfiguration.java │ │ │ └── TestKafkaSourceConfiguration.java │ │ ├── converters │ │ │ ├── TestAbstractDataConverter.java │ │ │ ├── TestCassandraDataFrameConverter.java │ │ │ ├── TestCassandraSchemaConverter.java │ │ │ ├── TestDataFrameDataConverter.java │ │ │ ├── TestDataFrameSchemaConverter.java │ │ │ └── data │ │ │ │ ├── TestCassandraSinkCQLDataConverter.java │ │ │ │ ├── TestCassandraSinkDataConverter.java │ │ │ │ ├── TestFileSinkDataCSVConverter.java │ │ │ │ ├── TestFileSinkDataJSONConverter.java │ │ │ │ └── TestSparkSourceDataConverter.java │ │ ├── data │ │ │ └── TestRDDWrapper.java │ │ ├── dataset │ │ │ └── TestUtilTable.java │ │ ├── forkoperator │ │ │ └── TestForkOperator.java │ │ ├── job │ │ │ ├── TestExecutionTimeJobExecutionStrategy.java │ │ │ ├── TestJobDag.java │ │ │ ├── TestJobManager.java │ │ │ ├── TestJobSubDag.java │ │ │ └── TestThreadPoolService.java │ │ ├── metadata │ │ │ ├── HDFSTestConstants.java │ │ │ ├── MemoryMetadataManager.java │ │ │ ├── TestCassandraBasedMetadataManager.java │ │ │ ├── TestHDFSDatePartitionManager.java │ │ │ ├── TestHDFSJobLevelMetadataTracker.java │ │ │ ├── TestHDFSMetadataManager.java │ │ │ ├── TestHDFSPartitionManager.java │ │ │ └── TestHoodieBasedMetadataManager.java │ │ ├── metrics │ │ │ ├── TestDataFeedMetrics.java │ │ │ ├── TestJobMetrics.java │ │ │ └── TestTimerMetric.java │ │ ├── retry │ │ │ └── TestRetryableFunction.java │ │ ├── schema │ │ │ ├── TestHDFSSchemaService.java │ │ │ └── cassandra │ │ │ │ ├── TestCassandraSinkSchemaManager.java │ │ │ │ └── TestClusterKey.java │ │ ├── sinks │ │ │ ├── TestSinkStatManager.java │ │ │ ├── cassandra │ │ │ │ ├── TestCassandraClientSink.java │ │ │ │ ├── TestCassandraSSTableSink.java │ │ │ │ └── TestCassandraSinkUtil.java │ │ │ ├── file │ │ │ │ ├── FileSinkTestUtil.java │ │ │ │ ├── TestAwsFileSink.java │ │ │ │ ├── TestFileSink.java │ │ │ │ └── TestHdfsFileSink.java │ │ │ └── hoodie │ │ │ │ └── TestHoodieSink.java │ │ ├── sources │ │ │ ├── file │ │ │ │ ├── TestFileWorkUnitCalculator.java │ │ │ │ └── TestJSONFileSourceDataConverter.java │ │ │ └── hive │ │ │ │ ├── TestHiveSource.java │ │ │ │ ├── TestHiveSourceConfiguration.java │ │ │ │ └── TestParquetWorkUnitCalculator.java │ │ ├── spark │ │ │ ├── TestMarmarayKryoSerializer.java │ │ │ └── TestSparkFactory.java │ │ ├── status │ │ │ ├── TestBaseStatus.java │ │ │ └── TestJobManagerStatus.java │ │ └── util │ │ │ ├── AbstractSparkTest.java │ │ │ ├── AvroPayloadUtil.java │ │ │ ├── CassandraTestConstants.java │ │ │ ├── CassandraTestUtil.java │ │ │ ├── FileHelperUtil.java │ │ │ ├── FileSinkConfigTestUtil.java │ │ │ ├── FileTestUtil.java │ │ │ ├── HiveTestUtil.java │ │ │ ├── KafkaTestHelper.java │ │ │ ├── MultiThreadTestCoordinator.java │ │ │ ├── ParquetWriterUtil.java │ │ │ ├── SchemaTestUtil.java │ │ │ ├── SparkTestUtil.java │ │ │ ├── TestConverterUtil.java │ │ │ ├── TestDateUtil.java │ │ │ ├── TestFsUtils.java │ │ │ ├── TestJobUtil.java │ │ │ ├── TestLockManager.java │ │ │ ├── TestMapUtil.java │ │ │ ├── TestParquetWriterUtil.java │ │ │ └── TestSchemaUtil.java │ │ └── utilities │ │ ├── ResourcesUtils.java │ │ ├── TestKafkaUtil.java │ │ ├── TestSizeUnit.java │ │ ├── TestTimeUnitUtil.java │ │ └── listener │ │ └── TestTimeoutManager.java │ └── resources │ ├── cassandra.yaml │ ├── config.yaml │ ├── configWithScopes.yaml │ ├── datacenter │ └── datacenter │ ├── expectedConfigWithBootstrapScope.yaml │ ├── expectedConfigWithIncrementalScope.yaml │ ├── log4j-surefire.properties │ ├── schemas │ ├── StringPair.avsc │ └── schemasource │ │ ├── myTestSchema.1.avsc │ │ ├── myTestSchema.2.avsc │ │ └── wrongSchema.1.avsc │ ├── setupTable.cql │ ├── teardownTable.cql │ └── testData │ ├── testPartition │ └── data.parquet │ └── testPartition1 │ └── testPartition2 │ └── data.parquet └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.a 8 | *.o 9 | *.so 10 | *.node 11 | 12 | # Node Waf Byproducts # 13 | ####################### 14 | .lock-wscript 15 | build/ 16 | autom4te.cache/ 17 | 18 | # Node Modules # 19 | ################ 20 | # Better to let npm install these from the package.json defintion 21 | # rather than maintain this manually 22 | node_modules/ 23 | 24 | # Packages # 25 | ############ 26 | # it's better to unpack these files and commit the raw source 27 | # git has its own built in compression methods 28 | *.7z 29 | *.dmg 30 | *.gz 31 | *.iso 32 | *.jar 33 | *.rar 34 | *.tar 35 | *.zip 36 | 37 | # Logs and databases # 38 | ###################### 39 | *.log 40 | dump.rdb 41 | *.tap 42 | 43 | 44 | # OS generated files # 45 | ###################### 46 | .DS_Store? 47 | .DS_Store 48 | ehthumbs.db 49 | Icon? 50 | Thumbs.db 51 | 52 | # thrift generated files # 53 | ########################## 54 | generated/ 55 | 56 | # NodeJS Core Dump 57 | core 58 | 59 | # Jenkins build scripts 60 | rt-jenkins/ 61 | 62 | # Coverage Reports 63 | coverage/ 64 | 65 | # local docs, scratchboards 66 | localdocs/ 67 | 68 | # vi temp files 69 | .*.swp 70 | 71 | # intelliJ 72 | .idea/ 73 | *.iml 74 | 75 | # Project specific items (local conf, build dir) 76 | config/local.json 77 | maps-evidence/ 78 | *.lst 79 | classes/ 80 | target/ 81 | *.dat 82 | 83 | # shaded jar pom file 84 | dependency-reduced-pom.xml 85 | 86 | # output of build plugin org.codehaus.mojo build-helper-maven-plugin 87 | test_properties.props 88 | 89 | # gradle generated logs 90 | .gradle 91 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | install: 3 | - mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -q 4 | script: mvn test -B -q 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Marmaray 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /checkstyles/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/images/ForkOperator_ForkFunction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/ForkOperator_ForkFunction.png -------------------------------------------------------------------------------- /docs/images/High_Level_Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/High_Level_Architecture.png -------------------------------------------------------------------------------- /docs/images/Marmaray-Secondary.Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray-Secondary.Logo.png -------------------------------------------------------------------------------- /docs/images/Marmaray_Primary.Logo_tagline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray_Primary.Logo_tagline.png -------------------------------------------------------------------------------- /docs/images/Marmaray_white_Primary.Logo_tagline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray_white_Primary.Logo_tagline.png -------------------------------------------------------------------------------- /docs/images/Marmaray_white_Secondary.Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray_white_Secondary.Logo.png -------------------------------------------------------------------------------- /docs/images/Metadata_Manager.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Metadata_Manager.png -------------------------------------------------------------------------------- /docs/images/avro_payload_conversion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/avro_payload_conversion.png -------------------------------------------------------------------------------- /docs/images/end_to_end_job_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/end_to_end_job_flow.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ## ParquetToCassandraJob 2 | 3 | This job demonstrates the ability to load parquet data from HDFS 4 | (can be underlying a hive table or just raw parquet files with the same schema) to a cassandra cluster. 5 | 6 | Requirements: 7 | 1. hadoop 8 | 2. spark 2.1 9 | 3. cassandra 10 | 11 | How to run: 12 | 13 | 1. Create parquet files on HDFS. Can be done in spark shell: 14 | ``` 15 | val testDF = Seq( (10, "foo"), (8, "bar"), (19, "baz")).toDF("id", "name") 16 | testDF.coalesce(1).write.format("parquet").parquet("/path/to/testParquet") 17 | ``` 18 | 19 | 2. replace guava in spark (guava 19.0). Can be done in spark jars directly, or use spark.yarn.archive to update the libraries used. 20 | 21 | 3. create the following config file, and put in HDFS 22 | ``` 23 | marmaray: 24 | cassandra: 25 | cluster_name: testcluster 26 | datacenter: solo 27 | keyspace: marmaray 28 | partition_keys: id 29 | tablename: test_parquet_cassandra 30 | error_table: 31 | enabled: false 32 | hadoop: 33 | yarn_queue: default 34 | cassandra: 35 | output.thrift.address: localhost 36 | hive: 37 | data_path: /path/to/testParquet 38 | job_name: testParquetToCassandra 39 | lock_manager: 40 | is_enabled: false 41 | zk_base_path: /hoodie/no-op 42 | metadata_manager: 43 | cassandra: 44 | cluster: testcluster 45 | keyspace: marmaray 46 | table_name: marmaray_metadata_table 47 | username: 48 | password: 49 | output.thrift.address: localhost 50 | type: CASSANDRA 51 | job_name: testParquetToCassandra 52 | zookeeper: 53 | port: 2181 54 | quorum: unused 55 | ``` 56 | 57 | 4. Run the spark job 58 | ``` 59 | ./bin/spark-submit --class com.uber.marmaray.examples.job.ParquetToCassandraJob path/to/marmaray-1.0-SNAPSHOT-jar-with-dependencies.jar -c path/to/test.yaml 60 | ``` 61 | 62 | 5. On success, the data will be dispersed to cassandra. You can use CQL to verify 63 | ``` 64 | cqlsh> select * from marmaray.test_parquet_cassandra; 65 | 66 | id | name 67 | ----+------ 68 | 10 | foo 69 | 19 | baz 70 | 8 | bar 71 | 72 | (3 rows) 73 | ``` 74 | -------------------------------------------------------------------------------- /marmaray-tools/checkstyles: -------------------------------------------------------------------------------- 1 | ../checkstyles -------------------------------------------------------------------------------- /marmaray-tools/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 1.8 5 | 1.8 6 | 7 | 8 | com.uber.marmaray 9 | marmaray-base 10 | 1.0-SNAPSHOT 11 | 12 | marmaray-tools 13 | 1.0-SNAPSHOT 14 | 15 | 16 | 17 | com.uber.marmaray 18 | marmaray 19 | 1.0-SNAPSHOT 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /marmaray/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'com.kageiit.jacobo' version '2.0.1' 3 | } 4 | 5 | description = 'translate jacoco to cobertura' 6 | 7 | dependencies { 8 | } 9 | 10 | task jacobo(type: com.kageiit.jacobo.JacoboTask) { 11 | jacocoReport = file("./target/site/jacoco-ut/jacoco.xml") 12 | coberturaReport = file("./target/site/cobertura/coverage.xml") 13 | srcDirs = ["./src/main/java"] 14 | } 15 | 16 | task noop { 17 | // noop task for when tests don't run 18 | } 19 | -------------------------------------------------------------------------------- /marmaray/checkstyles: -------------------------------------------------------------------------------- 1 | ../checkstyles -------------------------------------------------------------------------------- /marmaray/config/sample.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | marmaray: 3 | hadoop: 4 | anything: "???" 5 | hive: 6 | dataPath: "" 7 | jobName: "" 8 | source: 9 | saveCheckpoint: false 10 | hoodie: 11 | tables: 12 | cell_table: 13 | table_name: "" 14 | base_path: "" 15 | schema: "" 16 | parallelism: 1024 17 | row_table: 18 | table_name: "" 19 | base_bath: "" 20 | schema: "" 21 | default: 22 | combine_before_insert: true 23 | combine_before_upsert: true 24 | parallelism: 512 25 | kafka: 26 | conn: 27 | bootstrap: 28 | servers: "???" 29 | source: 30 | topicName: 31 | maxMessage: 32 | readParallelism: 33 | cassandra: 34 | output: 35 | native.port: "" 36 | thrift.address: "" 37 | keyspace: "keyspace" 38 | tablename: "tableName" 39 | cluster_name: "clusterName" 40 | column_list: "columnList" 41 | partition_keys: "partitionKeys" 42 | clustering_keys: "clusteringKeys" 43 | inputPath: "inputPath" 44 | partitionType: "partitionType" 45 | time_to_live: 0L 46 | -------------------------------------------------------------------------------- /marmaray/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 2.7.4 6 | 1.8 7 | 1.8 8 | 2.7.1 9 | 10 | 11 | 12 | 13 | 14 | org.jacoco 15 | jacoco-maven-plugin 16 | 17 | 18 | org.fortasoft 19 | gradle-maven-plugin 20 | 21 | 22 | maven-assembly-plugin 23 | 24 | 25 | 26 | 27 | 28 | 29 | com.fasterxml.jackson.dataformat 30 | jackson-dataformat-yaml 31 | ${jackson.dataformat.yaml} 32 | 33 | 34 | 35 | org.apache.curator 36 | curator-recipes 37 | ${apache.curator} 38 | 39 | 40 | 41 | org.apache.curator 42 | curator-test 43 | ${apache.curator} 44 | test 45 | 46 | 47 | 48 | com.uber.marmaray 49 | marmaray-base 50 | 1.0-SNAPSHOT 51 | 52 | marmaray 53 | 1.0-SNAPSHOT 54 | 55 | 56 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/DispersalLengthType.java: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2018 Uber Technologies, Inc. 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 3 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 4 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 5 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | * 7 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 8 | * of the Software. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 11 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 12 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 13 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 14 | * IN THE SOFTWARE. 15 | */ 16 | 17 | package com.uber.marmaray.common; 18 | 19 | /** 20 | * {@link DispersalLengthType} defines if dispersed data is from one single day or not 21 | * 1. SINGLE_DAY 22 | * 2. MULTIPLE_DAY 23 | */ 24 | public enum DispersalLengthType { 25 | SINGLE_DAY, 26 | MULTIPLE_DAY 27 | } 28 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/DispersalType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common; 19 | 20 | /** 21 | * {@link DispersalType} defines two dispersal types: 22 | * version: append new file to path with version id 23 | * overwrite: delete old files and then add new file to path 24 | */ 25 | public enum DispersalType { 26 | VERSION, 27 | OVERWRITE 28 | } 29 | 30 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/FileSinkType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common; 19 | 20 | /** 21 | * {@link FileSinkType} defines two options of file sink destinations 22 | * 1. HDFS 23 | * 2. S3: aws s3 24 | */ 25 | public enum FileSinkType { 26 | HDFS, 27 | S3 28 | } 29 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/IPayload.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common; 18 | 19 | /** 20 | * We explicitly model a generic payload here so that it gives us the flexibility to 21 | * wrap the data with additional metadata as needed 22 | * 23 | * @param data type 24 | */ 25 | public interface IPayload { 26 | D getData(); 27 | } 28 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/MetadataManagerType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common; 18 | 19 | /** 20 | * {@link MetadataManagerType} defines metadata manager type 21 | * normal: on disk 22 | * Cassandra: cassandra based 23 | */ 24 | public enum MetadataManagerType { 25 | HDFS, 26 | CASSANDRA, 27 | MULTI 28 | } 29 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/PartitionType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common; 18 | 19 | /** 20 | * {@link PartitionType} defines partition type for data 21 | * normal: partition by some defined key 22 | * date: partition by date 23 | * none: no partition 24 | */ 25 | public enum PartitionType { 26 | NORMAL, 27 | DATE, 28 | NONE 29 | } 30 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/WorkUnit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common; 18 | 19 | import lombok.AllArgsConstructor; 20 | import lombok.Getter; 21 | 22 | @AllArgsConstructor 23 | public class WorkUnit { 24 | 25 | @Getter 26 | private final String workEntity; 27 | } 28 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/actions/IJobDagAction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.actions; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * {@link IJobDagAction} is interface to determine a generic action to execute. 24 | * {@link JobDagActions} are completely independent and will determine if they should run based on success status. 25 | */ 26 | public interface IJobDagAction { 27 | int DEFAULT_TIMEOUT_SECONDS = 120; 28 | String ACTION_TYPE = "action_type"; 29 | 30 | /** 31 | * Execute the action 32 | * 33 | * @param successful whether the job dag succeeded 34 | * @return true if action succeeded 35 | */ 36 | boolean execute(boolean successful); 37 | 38 | /** 39 | * Timeout to wait for the action to complete 40 | * @return number of seconds to wait for task completion 41 | */ 42 | default int getTimeoutSeconds() { 43 | return DEFAULT_TIMEOUT_SECONDS; 44 | } 45 | 46 | /** 47 | * @return metric tags to be used for reporting metrics. 48 | */ 49 | default Map getMetricTags() { 50 | final Map metricsTags = new HashMap<>(); 51 | metricsTags.put(ACTION_TYPE, this.getClass().getSimpleName()); 52 | return metricsTags; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/configuration/HDFSMetadataManagerConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.configuration; 18 | 19 | import lombok.Getter; 20 | import lombok.NonNull; 21 | 22 | /** 23 | * {@link HDFSMetadataManagerConfiguration} contains all the generic metadata information for where Hive is either a source or 24 | * sink for the data pipeline job. All HDFSMetadataManagerConfiguration properties starts with {@link #}. 25 | */ 26 | public class HDFSMetadataManagerConfiguration extends MetadataManagerConfiguration { 27 | public static final String HDFS_METADATA_MANAGER_PREFIX = METADATA_MANAGER_PREFIX + "HDFS."; 28 | public static final String BASE_METADATA_PATH = HDFS_METADATA_MANAGER_PREFIX + "job_metadata"; 29 | 30 | @Getter 31 | private final String baseMetadataPath; 32 | 33 | public HDFSMetadataManagerConfiguration(@NonNull final Configuration conf) { 34 | super(conf); 35 | this.baseMetadataPath = this.getConf().getProperty(BASE_METADATA_PATH).get(); 36 | } 37 | } 38 | 39 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/configuration/HDFSSchemaServiceConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.configuration; 19 | 20 | import com.uber.marmaray.utilities.ConfigUtil; 21 | import lombok.NonNull; 22 | import org.apache.hadoop.fs.Path; 23 | 24 | import java.io.Serializable; 25 | import java.util.Collections; 26 | import java.util.List; 27 | 28 | public class HDFSSchemaServiceConfiguration implements Serializable { 29 | 30 | public static final String HDFS_SCHEMA_SERVICE_PREFIX = Configuration.MARMARAY_PREFIX + "hdfs_schema_service"; 31 | public static final String PATH = HDFS_SCHEMA_SERVICE_PREFIX + "path"; 32 | 33 | private final Configuration conf; 34 | 35 | public HDFSSchemaServiceConfiguration(@NonNull final Configuration conf) { 36 | ConfigUtil.checkMandatoryProperties(conf, getMandatoryProperties()); 37 | this.conf = conf; 38 | } 39 | 40 | public Path getPath() { 41 | return new Path(this.conf.getProperty(PATH).get()); 42 | } 43 | public static List getMandatoryProperties() { 44 | return Collections.singletonList(PATH); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/converters/data/DummyHoodieSinkDataConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.converters.data; 19 | 20 | import com.uber.marmaray.common.AvroPayload; 21 | import com.uber.marmaray.common.configuration.Configuration; 22 | import com.uber.marmaray.utilities.ErrorExtractor; 23 | 24 | import lombok.NonNull; 25 | 26 | /** 27 | * {@link DummyHoodieSinkDataConverter} is necessary where you do not want payloads to be 28 | * processed by the data converter. 29 | */ 30 | public class DummyHoodieSinkDataConverter extends HoodieSinkDataConverter { 31 | public DummyHoodieSinkDataConverter() { 32 | super(new Configuration(), new ErrorExtractor()); 33 | } 34 | 35 | @Override 36 | protected String getRecordKey(@NonNull final AvroPayload payload) throws Exception { 37 | return null; 38 | } 39 | 40 | @Override 41 | protected String getPartitionPath(@NonNull final AvroPayload payload) throws Exception { 42 | return null; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/converters/data/FileSinkDataConverterFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.converters.data; 19 | 20 | import com.uber.marmaray.common.configuration.Configuration; 21 | import com.uber.marmaray.common.configuration.FileSinkConfiguration; 22 | import com.uber.marmaray.utilities.ErrorExtractor; 23 | import lombok.NonNull; 24 | 25 | /** 26 | * {@link FileSinkDataConverterFactory 27 | * 28 | */ 29 | public class FileSinkDataConverterFactory { 30 | protected FileSinkDataConverterFactory() { 31 | } 32 | 33 | public static FileSinkDataConverter createFileSinkDataConverter(@NonNull final Configuration conf) { 34 | final FileSinkConfiguration fileConfig = new FileSinkConfiguration(conf); 35 | if (fileConfig.getFileType().equals("sequence")) { 36 | return new FileSinkDataJSONConverter(conf, new ErrorExtractor()); 37 | } else { 38 | return new FileSinkDataCSVConverter(conf, new ErrorExtractor()); 39 | } 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/converters/data/SinkDataConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.converters.data; 18 | 19 | import com.uber.marmaray.common.AvroPayload; 20 | import com.uber.marmaray.common.configuration.Configuration; 21 | import com.uber.marmaray.utilities.ErrorExtractor; 22 | import lombok.NonNull; 23 | import org.apache.avro.Schema; 24 | 25 | /** 26 | * {@link SinkDataConverter} extends {@link AbstractDataConverter} 27 | * This class converts records from ({@link Schema}, {@link AvroPayload}) to (OS, OD). 28 | * @param output schema type 29 | * @param output data type 30 | */ 31 | public abstract class SinkDataConverter extends AbstractDataConverter { 32 | private static final long serialVersionUID = 1L; 33 | 34 | public SinkDataConverter(@NonNull final Configuration conf, @NonNull final ErrorExtractor errorExtractor) { 35 | super(conf, errorExtractor); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/converters/data/SourceDataConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.converters.data; 18 | 19 | import com.uber.marmaray.common.AvroPayload; 20 | import com.uber.marmaray.common.configuration.Configuration; 21 | import com.uber.marmaray.utilities.ErrorExtractor; 22 | import lombok.NonNull; 23 | import org.apache.avro.Schema; 24 | 25 | /** 26 | * {@link SourceDataConverter} extends {@link AbstractDataConverter} 27 | * 28 | * This class converts records from (IS, ID) to ({@link Schema}, {@link AvroPayload}). 29 | * @param input schema type 30 | * @param input data type 31 | */ 32 | public abstract class SourceDataConverter extends AbstractDataConverter { 33 | public SourceDataConverter(@NonNull final Configuration conf, @NonNull final ErrorExtractor errorExtractor) { 34 | super(conf, errorExtractor); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/converters/schema/AbstractSchemaConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.converters.schema; 18 | 19 | import org.apache.avro.Schema; 20 | 21 | /** 22 | * {@link AbstractSchemaConverter} is responsible for 2 way conversion to convert an external schema to a common schema 23 | * extending {@link Schema} as well as vice versa 24 | * @param external schema 25 | * @param common schema 26 | */ 27 | public abstract class AbstractSchemaConverter { 28 | public abstract ES convertToExternalSchema(CS commonSchema); 29 | public abstract CS convertToCommonSchema(ES externalSchema); 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/BinaryRawData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import javax.xml.bind.DatatypeConverter; 20 | 21 | /** 22 | * Convenience class for wrapping byte[]. 23 | */ 24 | public class BinaryRawData extends RawData { 25 | 26 | public BinaryRawData(final byte[] data) { 27 | super(data); 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return DatatypeConverter.printHexBinary(getData()); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/ErrorData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import lombok.AllArgsConstructor; 20 | import lombok.Getter; 21 | 22 | /** 23 | * It holds the input {@link RawData} record and an error message describing which error check condition failed. This 24 | * will be useful information for debugging error records. 25 | */ 26 | @AllArgsConstructor 27 | public class ErrorData implements IData { 28 | 29 | /** 30 | * Error message. 31 | */ 32 | @Getter 33 | private final String errMessage; 34 | 35 | @Getter 36 | private final RawData rawData; 37 | } 38 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/ForkData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import java.io.Serializable; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import lombok.AllArgsConstructor; 23 | import lombok.Getter; 24 | import lombok.NonNull; 25 | 26 | /** 27 | * It is used to hold forked data. It wraps individual record entry with forked pipeline keys. 28 | */ 29 | @AllArgsConstructor 30 | public class ForkData implements Serializable { 31 | @Getter 32 | private final List keys = new ArrayList<>(); 33 | @Getter 34 | private final DI record; 35 | 36 | public ForkData(@NonNull final List keys, @NonNull final DI record) { 37 | this.keys.addAll(keys); 38 | this.record = record; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/IData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import java.io.Serializable; 20 | 21 | public interface IData extends Serializable { 22 | } 23 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/RDDWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import lombok.Getter; 20 | import lombok.NonNull; 21 | import org.apache.spark.api.java.JavaRDD; 22 | import org.apache.spark.api.java.Optional; 23 | 24 | /** 25 | * Convenience class to wrap RDD of records with its count to avoid multiple calls for count operation. If we need to 26 | * use count operation for a given RDD more than once in the form of isEmpty or actual count; then this wrapper will be 27 | * useful. 28 | * T dataType of RDD records. {@link #data} for more details. 29 | */ 30 | public class RDDWrapper { 31 | 32 | @Getter 33 | @NonNull 34 | final JavaRDD data; 35 | 36 | Optional count; 37 | 38 | public RDDWrapper(@NonNull final JavaRDD data) { 39 | this.data = data; 40 | this.count = Optional.absent(); 41 | } 42 | 43 | public RDDWrapper(@NonNull final JavaRDD data, final long count) { 44 | this.data = data; 45 | this.count = Optional.of(count); 46 | } 47 | 48 | public long getCount() { 49 | if (!count.isPresent()) { 50 | this.count = Optional.of(this.data.count()); 51 | } 52 | return count.get(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/RawData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import java.io.Serializable; 20 | import lombok.AllArgsConstructor; 21 | import lombok.Getter; 22 | import lombok.ToString; 23 | 24 | @AllArgsConstructor 25 | @ToString 26 | public class RawData implements IData, Serializable { 27 | 28 | @Getter 29 | private final T data; 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/RawDataHelper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import lombok.NonNull; 20 | 21 | /** 22 | * Helper class to return appropriate RawData subclass. 23 | */ 24 | public class RawDataHelper { 25 | 26 | public static RawData getRawData(@NonNull final T data) { 27 | if (data instanceof byte[]) { 28 | return new BinaryRawData((byte[]) data); 29 | } 30 | return new RawData(data); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/data/ValidData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import lombok.AllArgsConstructor; 20 | import lombok.Getter; 21 | 22 | @AllArgsConstructor 23 | public class ValidData implements IData { 24 | @Getter 25 | private final T data; 26 | } 27 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/dataset/ExceptionRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.dataset; 18 | 19 | import lombok.Data; 20 | import lombok.EqualsAndHashCode; 21 | import org.hibernate.validator.constraints.NotEmpty; 22 | 23 | /** 24 | * Simple Java Bean used to construct {@link UtilTable} of {@ExceptionRecord} 25 | */ 26 | @Data 27 | @EqualsAndHashCode(callSuper = true) 28 | public class ExceptionRecord extends UtilRecord { 29 | private String exception; 30 | private String exception_message; 31 | private String stacktrace; 32 | private boolean detected_on_driver; 33 | 34 | public ExceptionRecord(@NotEmpty final String applicationId, 35 | @NotEmpty final String jobName, 36 | final long jobStartTimestamp, 37 | final long timestamp, 38 | @NotEmpty final String exception, 39 | @NotEmpty final String exceptionMessage, 40 | @NotEmpty final String stacktrace, 41 | final boolean isDriver) { 42 | super(applicationId, jobName, jobStartTimestamp, timestamp); 43 | this.exception = exception; 44 | this.exception_message = exceptionMessage; 45 | this.stacktrace = stacktrace; 46 | this.detected_on_driver = isDriver; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/dataset/MetricRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.dataset; 18 | 19 | import lombok.Data; 20 | import lombok.EqualsAndHashCode; 21 | import org.hibernate.validator.constraints.NotEmpty; 22 | 23 | /** 24 | * Simple Java Bean used to construct {@link UtilTable} of {@MetricRecord} 25 | */ 26 | @Data 27 | @EqualsAndHashCode(callSuper = true) 28 | public class MetricRecord extends UtilRecord { 29 | private String metric_name; 30 | private Long metric_value; 31 | private String tags; 32 | 33 | public MetricRecord(@NotEmpty final String applicationId, 34 | @NotEmpty final String jobName, 35 | final long jobStartTimestamp, 36 | final long timestamp, 37 | @NotEmpty final String metricName, 38 | final long metricValue, 39 | @NotEmpty final String tags) { 40 | super(applicationId, jobName, jobStartTimestamp, timestamp); 41 | this.metric_name = metricName; 42 | this.metric_value = metricValue; 43 | this.tags = tags; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/dataset/UtilRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.dataset; 18 | 19 | import lombok.AllArgsConstructor; 20 | import lombok.Data; 21 | 22 | import java.io.Serializable; 23 | 24 | /** 25 | * {@link UtilRecord} is the member type of {@link UtilTable} collections. 26 | * Subclasses of {@link UtilRecord} must conform to the requirements of a 27 | * simple Java Bean so they can be converted to {@link org.apache.spark.sql.Dataset}, 28 | * which are: 29 | * 1) Have primitive field types 30 | * 2) Have default values for instance fields 31 | * 3) Have getter and setters for all fields 32 | * 4) Have a constructor with no arguments 33 | */ 34 | @AllArgsConstructor 35 | @Data 36 | public abstract class UtilRecord implements Serializable { 37 | private String application_id; 38 | private String job_name; 39 | private long job_start_timestamp; 40 | private long timestamp; 41 | } 42 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/exceptions/ForkOperationException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.exceptions; 18 | 19 | /** 20 | * Thrown when ForkOperation fails to succeed. 21 | */ 22 | public class ForkOperationException extends JobRuntimeException { 23 | 24 | public ForkOperationException(final String message) { 25 | super(message); 26 | } 27 | 28 | public ForkOperationException(final String message, final Throwable t) { 29 | super(message, t); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/exceptions/InvalidDataException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.exceptions; 18 | 19 | import com.uber.marmaray.common.AvroPayload; 20 | import com.uber.marmaray.common.converters.data.HoodieSinkDataConverter; 21 | import lombok.NonNull; 22 | import org.hibernate.validator.constraints.NotEmpty; 23 | 24 | /** 25 | * It is a checked exception and should be thrown when there is either missing or invalid user defined field in 26 | * data. Check {@link HoodieSinkDataConverter#getRecordKey(AvroPayload)} 27 | * for an example. 28 | */ 29 | public class InvalidDataException extends Exception { 30 | 31 | public InvalidDataException(@NotEmpty final String message) { 32 | super(message); 33 | } 34 | 35 | public InvalidDataException(@NotEmpty final String message, @NonNull final Throwable t) { 36 | super(message, t); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/exceptions/JobRuntimeException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.exceptions; 18 | 19 | import lombok.NonNull; 20 | import org.hibernate.validator.constraints.NotEmpty; 21 | 22 | /** 23 | * This is the parent runtime exception thrown whenever job encounters unrecoverable exception. 24 | */ 25 | public class JobRuntimeException extends RuntimeException { 26 | public JobRuntimeException(@NotEmpty final String message) { 27 | super(message); 28 | } 29 | 30 | public JobRuntimeException(@NonNull final Throwable t) { 31 | super(t); 32 | } 33 | 34 | public JobRuntimeException(@NotEmpty final String message, @NonNull final Throwable t) { 35 | super(message, t); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/exceptions/MetadataException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.exceptions; 18 | 19 | /** 20 | * This exception is thrown anytime we encounter an exception with reading or writing metadata 21 | */ 22 | public class MetadataException extends JobRuntimeException { 23 | public MetadataException(final String message) { 24 | super(message); 25 | } 26 | 27 | public MetadataException(final String message, final Throwable t) { 28 | super(message, t); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/exceptions/MissingPropertyException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.exceptions; 18 | 19 | /** 20 | * Thrown when any required property for a module is not specified. 21 | */ 22 | public class MissingPropertyException extends JobRuntimeException { 23 | public MissingPropertyException(final String propertyName) { 24 | super("property:" + propertyName); 25 | } 26 | 27 | public MissingPropertyException(final String message, final Throwable t) { 28 | super(message, t); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/exceptions/RetryException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.exceptions; 18 | 19 | /** 20 | * This exception is thrown when the task runs out of retries. 21 | */ 22 | public class RetryException extends JobRuntimeException { 23 | public RetryException(final String message) { 24 | super(message); 25 | } 26 | 27 | public RetryException(final String message, final Throwable t) { 28 | super(message, t); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/forkoperator/FilterFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.forkoperator; 18 | 19 | import com.uber.marmaray.common.data.ForkData; 20 | import lombok.AllArgsConstructor; 21 | import org.apache.spark.api.java.function.Function; 22 | 23 | /** 24 | * This is used for filtering result RDD. The passed in filterkey should be same as the one used 25 | * in ForkFunction. 26 | * @param 27 | */ 28 | @AllArgsConstructor 29 | public class FilterFunction implements Function, Boolean> { 30 | 31 | private final Integer filterKey; 32 | 33 | @Override 34 | public final Boolean call(final ForkData forkData) { 35 | return execute(forkData); 36 | } 37 | 38 | /** 39 | * It is used for filtering out tupleEntries. If it returns true then tupleEntry will be 40 | * filtered out. It will have same set of keys as defined by corresponding ForkFunction. 41 | * 42 | * @param forkData : forkData to be filtered out or retained. 43 | */ 44 | protected Boolean execute(final ForkData forkData) { 45 | return forkData.getKeys().contains(this.filterKey); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/job/Dag.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.common.job; 2 | 3 | import com.uber.marmaray.common.status.IStatus; 4 | import lombok.Getter; 5 | import lombok.NonNull; 6 | import lombok.Setter; 7 | import org.hibernate.validator.constraints.NotEmpty; 8 | 9 | import java.util.Map; 10 | 11 | public abstract class Dag { 12 | 13 | @NotEmpty @Getter @Setter 14 | private String jobName; 15 | 16 | @NotEmpty @Getter @Setter 17 | private String dataFeedName; 18 | 19 | @Getter @Setter 20 | private Map jobManagerMetadata; 21 | 22 | public Dag(@NonNull final String jobName, @NonNull final String dataFeedName) { 23 | this.dataFeedName = dataFeedName; 24 | this.jobName = jobName; 25 | } 26 | 27 | public abstract IStatus execute(); 28 | 29 | } 30 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/job/DagPayload.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.job; 18 | 19 | import com.uber.marmaray.common.AvroPayload; 20 | import com.uber.marmaray.common.IPayload; 21 | import lombok.AllArgsConstructor; 22 | import lombok.Getter; 23 | import lombok.NonNull; 24 | import org.apache.spark.api.java.JavaRDD; 25 | 26 | /** 27 | * Helper class to pass Payload to child dag. 28 | */ 29 | @AllArgsConstructor 30 | public class DagPayload implements IPayload> { 31 | 32 | @NonNull 33 | @Getter 34 | private final JavaRDD data; 35 | } 36 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/job/IJobExecutionStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.job; 19 | 20 | import lombok.NonNull; 21 | 22 | import java.util.List; 23 | import java.util.Queue; 24 | 25 | /** 26 | * {@link IJobExecutionStrategy} determines the order for {@link JobDag} execution rather than just relying on 27 | * submission order. 28 | */ 29 | public interface IJobExecutionStrategy { 30 | 31 | List sort(@NonNull final Queue inputJobDags); 32 | 33 | } 34 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/job/Job.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.job; 18 | 19 | import com.uber.marmaray.common.configuration.Configuration; 20 | import java.util.concurrent.atomic.AtomicBoolean; 21 | import lombok.AllArgsConstructor; 22 | import lombok.NonNull; 23 | import lombok.extern.slf4j.Slf4j; 24 | 25 | @Slf4j 26 | @AllArgsConstructor 27 | public class Job { 28 | 29 | @NonNull 30 | private final JobDag jobDag; 31 | @NonNull 32 | private final Configuration conf; 33 | 34 | public void run() { 35 | final AtomicBoolean isSuccess = new AtomicBoolean(true); 36 | try { 37 | ThreadPoolService.init(this.conf); 38 | jobDag.execute(); 39 | } catch (final Throwable t) { 40 | isSuccess.set(false); 41 | throw t; 42 | } finally { 43 | ThreadPoolService.shutdown(!isSuccess.get()); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/job/ThreadPoolServiceTier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.job; 18 | 19 | public enum ThreadPoolServiceTier { 20 | JOB_DAG_TIER, 21 | ACTIONS_TIER 22 | } 23 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metadata/AbstractValue.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metadata; 18 | 19 | import java.io.Serializable; 20 | 21 | /** 22 | * {@link AbstractValue} wraps a generic data value type that is required to be serializable. The data represents 23 | * some metadata that will be stored for a job. 24 | * @param 25 | */ 26 | public abstract class AbstractValue implements Serializable { 27 | abstract D getValue(); 28 | } 29 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metadata/MetadataConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metadata; 18 | 19 | import com.uber.marmaray.common.configuration.Configuration; 20 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 21 | 22 | public final class MetadataConstants { 23 | public static final String TEMP_FILE_EXTENSION = ".tmp"; 24 | public static final String CHECKPOINT_KEY = "checkpoint"; 25 | public static final String JOBMANAGER_PREFIX = Configuration.MARMARAY_PREFIX + "jobmanager"; 26 | public static final String JOBMANAGER_METADATA_PREFIX = JOBMANAGER_PREFIX + ".metadata"; 27 | public static final String JOBMANAGER_METADATA_ENABLED = JOBMANAGER_METADATA_PREFIX + ".enabled"; 28 | public static final String JOBMANAGER_METADATA_HDFS_PREFIX = JOBMANAGER_METADATA_PREFIX + ".hdfs"; 29 | public static final String JOBMANAGER_METADATA_HDFS_BASEPATH = JOBMANAGER_METADATA_HDFS_PREFIX + ".basePath"; 30 | public static final String JOBMANAGER_METADATA_STORAGE = JOBMANAGER_METADATA_PREFIX + ".sourceType"; 31 | public static final String JOBMANAGER_METADATA_SOURCE_HDFS = "HDFS"; 32 | 33 | private MetadataConstants() { 34 | throw new JobRuntimeException("This class should never be instantiated"); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metadata/StringValue.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metadata; 18 | 19 | import lombok.AllArgsConstructor; 20 | import lombok.EqualsAndHashCode; 21 | 22 | /** 23 | * {@link StringValue} extends {@link AbstractValue} and wraps a String that represents the job metadata 24 | * that will be stored in HDFS 25 | */ 26 | @AllArgsConstructor 27 | @EqualsAndHashCode(callSuper = false) 28 | public class StringValue extends AbstractValue { 29 | 30 | private final String value; 31 | 32 | @Override 33 | public String getValue() { 34 | return this.value; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/CassandraMetric.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.common.metrics; 2 | 3 | import com.uber.marmaray.common.configuration.CassandraSinkConfiguration; 4 | import com.uber.marmaray.utilities.StringTypes; 5 | import lombok.NonNull; 6 | 7 | import java.util.Map; 8 | 9 | public class CassandraMetric { 10 | 11 | public static final String TABLE_NAME_TAG = "tableName"; 12 | 13 | public static Map createTableNameTags(@NonNull final CassandraSinkConfiguration cassandraConf) { 14 | return DataFeedMetrics.createAdditionalTags(TABLE_NAME_TAG, 15 | cassandraConf.getKeyspace() + StringTypes.UNDERSCORE + cassandraConf.getTableName()); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/CassandraPayloadRDDSizeEstimator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import com.uber.marmaray.common.data.RDDWrapper; 20 | import com.uber.marmaray.common.schema.cassandra.CassandraPayload; 21 | 22 | import java.util.List; 23 | 24 | public class CassandraPayloadRDDSizeEstimator { 25 | private final int NO_OF_SAMPLE_ROWS = 1000; 26 | 27 | public long estimateTotalSize(final RDDWrapper rdd) { 28 | final long totalRows = rdd.getCount(); 29 | 30 | final List sampleRows = rdd.getData().takeSample(true, NO_OF_SAMPLE_ROWS); 31 | 32 | final long byteSize = sampleRows 33 | .stream() 34 | .map(element -> element.estimateRowSize()) 35 | .reduce((size, accumulator) -> size + accumulator) 36 | .orElse(0); 37 | 38 | final long totalSize = (long) (byteSize * (((totalRows) * 1.0) / (NO_OF_SAMPLE_ROWS))); 39 | 40 | return totalSize; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/ChargebackMetricType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | public enum ChargebackMetricType { 20 | // count the number of rows 21 | ROW_COUNT, 22 | // count the runtime of the execution 23 | RUN_TIME, 24 | // count the number of executors used 25 | EXECUTORS 26 | } 27 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/DoubleMetric.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import lombok.Getter; 20 | import lombok.ToString; 21 | import org.hibernate.validator.constraints.NotEmpty; 22 | 23 | /** 24 | * {@link Double} implementation of {@link Metric} 25 | */ 26 | @Getter 27 | @ToString 28 | public class DoubleMetric extends Metric { 29 | public DoubleMetric(final String metricName) { 30 | this("metric-type", 0.0); 31 | } 32 | 33 | public DoubleMetric(@NotEmpty final String metricName, final double metricValue) { 34 | super(metricName, metricValue); 35 | this.addTag("metric-type", "double"); 36 | } 37 | 38 | public void setMetricValue(final double metricValue) { 39 | this.metricValue = metricValue; 40 | } 41 | 42 | @Override 43 | public String toString() { 44 | return super.toString(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/IChargebackCalculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import java.util.Map; 20 | 21 | /** 22 | * Calculator to determine cost of a particular chargeback instance. 23 | * 24 | * Computation can be over a number of fields. 25 | */ 26 | public interface IChargebackCalculator { 27 | 28 | /** 29 | * Compute the cost based on the input fields. 30 | * @return the final cost value 31 | */ 32 | Map computeCost(); 33 | 34 | /** 35 | * Add a cost for a particular datafeed 36 | * @param datafeedName the name of the datafeed to add cost to 37 | * @param metricType the type of cost to add 38 | * @param value the cost value to add 39 | */ 40 | void addCost(String datafeedName, ChargebackMetricType metricType, Long value); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/IMetricable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import lombok.NonNull; 20 | 21 | public interface IMetricable { 22 | 23 | /* 24 | Take a DataFeedMetrics to report metrics to, if present 25 | */ 26 | void setDataFeedMetrics(@NonNull final DataFeedMetrics dataFeedMetrics); 27 | 28 | /* 29 | Take a JobMetrics to report metrics to, if present 30 | */ 31 | void setJobMetrics(@NonNull final JobMetrics jobMetrics); 32 | } 33 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/JobMetricNames.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 20 | 21 | public final class JobMetricNames { 22 | public static final String RUN_JOB_DAG_LATENCY_MS = "run_job_dag_latency_ms"; 23 | public static final String RUN_JOB_ERROR_COUNT = "run_job_error_count"; 24 | public static final String JOB_SETUP_LATENCY_MS = "job_setup_latency_ms"; 25 | 26 | // JobLockManager-related metrics 27 | public static final String JOB_MANAGER_LOCK_TIME_MS = "job_manager_lock_time_ms"; 28 | public static final String JOB_DAG_LOCK_TIME_MS = "job_dag_lock_time_ms"; 29 | 30 | private JobMetricNames() { 31 | throw new JobRuntimeException("Class should never be instantiated"); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/JobMetricType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | /** 20 | * Known {@link JobMetrics} names. 21 | */ 22 | public enum JobMetricType { 23 | // metrics in context of the job 24 | RUNTIME, 25 | STAGE_RUNTIME, 26 | SIZE, 27 | 28 | // resource usage metrics 29 | DRIVER_MEMORY, 30 | EXECUTOR_MEMORY, 31 | NUM_EXECUTORS, 32 | 33 | // error count metrics 34 | RUN_ERROR_COUNT 35 | } 36 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/LongMetric.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import lombok.Getter; 20 | import lombok.ToString; 21 | import org.hibernate.validator.constraints.NotEmpty; 22 | 23 | /** 24 | * {@link Long} implementation of {@link Metric} 25 | */ 26 | @Getter 27 | @ToString 28 | public class LongMetric extends Metric { 29 | public LongMetric(final String metricName) { 30 | super(metricName); 31 | } 32 | 33 | public LongMetric(@NotEmpty final String metricName, final long metricValue) { 34 | super(metricName, metricValue); 35 | this.addTag("metric-type", "long"); 36 | } 37 | 38 | public void setMetricValue(final long metricValue) { 39 | this.metricValue = metricValue; 40 | } 41 | 42 | @Override 43 | public String toString() { 44 | return super.toString(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/metrics/ModuleTagNames.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 20 | 21 | public final class ModuleTagNames { 22 | public static final String SOURCE = "source"; 23 | public static final String SINK = "sink"; 24 | public static final String SCHEMA_MANAGER = "schema_manager"; 25 | public static final String SOURCE_CONVERTER = "source_converter"; 26 | public static final String SINK_CONVERTER = "sink_converter"; 27 | public static final String SUB_DAG = "sub_dag"; 28 | public static final String WORK_UNIT_CALCULATOR = "work_unit_calc"; 29 | public static final String JOB_MANAGER = "job_manager"; 30 | public static final String JOB_DAG = "job_dag"; 31 | public static final String METADATA_MANAGER = "metadata_manager"; 32 | public static final String SINK_CONFIGURATION = "sink_configuration"; 33 | public static final String CONFIGURATION = "config"; 34 | 35 | private ModuleTagNames() { 36 | throw new JobRuntimeException("Class should never be instantiated"); 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/reporters/ConsoleReporter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.reporters; 18 | 19 | import com.uber.marmaray.common.metrics.Metric; 20 | import lombok.NonNull; 21 | import lombok.extern.slf4j.Slf4j; 22 | 23 | /** 24 | * Console implementation of {@link IReporter} 25 | */ 26 | @Slf4j 27 | public class ConsoleReporter implements IReporter { 28 | public void gauge(@NonNull final Metric m) { 29 | final String metricName = m.getMetricName(); 30 | final String metricValue = m.getMetricValue().toString(); 31 | final String tags = m.getTags().toString(); 32 | 33 | log.info("{}={}, Tags: {}", metricName, metricValue, tags); 34 | } 35 | 36 | public void finish() { 37 | // do nothing 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/reporters/IKafkaDataLossReporter.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.common.reporters; 2 | 3 | import org.hibernate.validator.constraints.NotEmpty; 4 | 5 | /** 6 | * {@link IKafkaDataLossReporter} reports Kafka data loss 7 | */ 8 | public interface IKafkaDataLossReporter { 9 | void reportDataLoss(@NotEmpty final String kafkaTopicName, 10 | final long totalNumberOfMessagesLost); 11 | } 12 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/reporters/IReporter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.reporters; 18 | 19 | import com.uber.marmaray.common.metrics.Metric; 20 | import lombok.NonNull; 21 | 22 | /** 23 | * {@link IReporter} gauges {@link Metric} to a sink 24 | */ 25 | public interface IReporter { 26 | void gauge(@NonNull final T m); 27 | 28 | void finish(); 29 | } 30 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/reporters/Reportable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.reporters; 18 | 19 | import lombok.NonNull; 20 | 21 | /** 22 | * {@link Reportable} that can be gauged by {@link IReporter} 23 | */ 24 | public interface Reportable { 25 | void gaugeAll(@NonNull final IReporter reporter); 26 | } 27 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/retry/IFunctionThrowsException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.retry; 18 | 19 | import lombok.NonNull; 20 | 21 | /** 22 | * {@link IFunctionThrowsException} is the interface for an function throwing exceptions. 23 | */ 24 | @FunctionalInterface 25 | public interface IFunctionThrowsException { 26 | R apply(@NonNull final T t) throws Exception; 27 | } 28 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/retry/IRetryStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.retry; 18 | 19 | import com.uber.marmaray.common.exceptions.RetryException; 20 | 21 | /** 22 | * {@link IRetryStrategy} determines if a function should be retried or not. retryMessage returns 23 | * the description of the current attempt. 24 | */ 25 | public interface IRetryStrategy { 26 | boolean shouldRetry() throws RetryException; 27 | String retryMessage(); 28 | } 29 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/schema/ISinkSchemaManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.schema; 18 | 19 | public interface ISinkSchemaManager { 20 | } 21 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/schema/cassandra/CassandraDataField.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.schema.cassandra; 18 | 19 | import lombok.AllArgsConstructor; 20 | import lombok.Getter; 21 | 22 | import java.io.Serializable; 23 | import java.nio.ByteBuffer; 24 | 25 | @AllArgsConstructor 26 | public class CassandraDataField implements Serializable { 27 | 28 | @Getter 29 | private final ByteBuffer columnKey; 30 | 31 | @Getter 32 | private final ByteBuffer value; 33 | } 34 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/schema/cassandra/CassandraSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.schema.cassandra; 18 | 19 | import java.io.Serializable; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import lombok.Getter; 23 | 24 | public class CassandraSchema implements Serializable { 25 | 26 | @Getter 27 | private final String keySpace; 28 | 29 | @Getter 30 | private final String tableName; 31 | 32 | @Getter 33 | private final List fields; 34 | 35 | public CassandraSchema(final String keySpace, final String tableName) { 36 | this.keySpace = keySpace; 37 | this.tableName = tableName; 38 | this.fields = new ArrayList<>(); 39 | } 40 | 41 | public CassandraSchema(final String keySpace, final String tableName, final List fields) { 42 | this.keySpace = keySpace; 43 | this.tableName = tableName; 44 | this.fields = fields; 45 | } 46 | 47 | public void addField(final CassandraSchemaField field) { 48 | fields.add(field); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sinks/ISink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sinks; 18 | 19 | import com.uber.marmaray.common.AvroPayload; 20 | import com.uber.marmaray.common.metrics.IMetricable; 21 | import org.apache.spark.api.java.JavaRDD; 22 | 23 | /** 24 | * Implementations of {@link ISink} should implement {@link #write(JavaRDD)}. 25 | * TODO: ISink#write() should return Stats object. 26 | */ 27 | public interface ISink extends IMetricable { 28 | 29 | /** 30 | * It writes data to sink. If there is any exception while writing then it throws 31 | * {@link com.uber.marmaray.common.exceptions.JobRuntimeException}. All invalid / error records will be 32 | * written to ErrorTable. 33 | * @param data data to write to sink 34 | */ 35 | void write(JavaRDD data); 36 | } 37 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sinks/hoodie/HoodieSinkOperations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sinks.hoodie; 18 | 19 | import com.uber.marmaray.common.configuration.HoodieConfiguration; 20 | import lombok.NonNull; 21 | import org.hibernate.validator.constraints.NotEmpty; 22 | 23 | /** 24 | * Helper class which invokes various operations before / after certain {@link HoodieSink} actions. See individual 25 | * operations for more details. 26 | */ 27 | public class HoodieSinkOperations { 28 | 29 | /** 30 | * Gets executed before calling {@link HoodieSink}'s underlying commit action. All the parquet write operations are 31 | * guaranteed to finish before this. Only thing left is the final commit file creation. 32 | */ 33 | public void preCommitOperations(@NonNull final HoodieConfiguration hoodieConfiguration, 34 | @NotEmpty final String commitTime) { 35 | // do nothing. 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sinks/hoodie/HoodieWriteStatus.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sinks.hoodie; 18 | 19 | import com.uber.hoodie.WriteStatus; 20 | import com.uber.hoodie.common.model.HoodieRecord; 21 | import java.util.Map; 22 | import java.util.Optional; 23 | 24 | /** 25 | * Helper class to change default behavior for {@link WriteStatus} 26 | */ 27 | public class HoodieWriteStatus extends WriteStatus { 28 | 29 | private long totalRecords; 30 | 31 | /** 32 | * Overriding {@link #markSuccess(HoodieRecord, Optional)} to avoid caching 33 | * {@link com.uber.hoodie.common.model.HoodieKey} for successfully written hoodie records. 34 | */ 35 | @Override 36 | public void markSuccess(final HoodieRecord record, final Optional> optionalRecordMetadata) { 37 | this.totalRecords++; 38 | } 39 | 40 | @Override 41 | public long getTotalRecords() { 42 | return super.getTotalRecords() + this.totalRecords; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sinks/hoodie/partitioner/DefaultHoodieDataPartitioner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sinks.hoodie.partitioner; 18 | 19 | import com.uber.hoodie.common.model.HoodieRecord; 20 | import com.uber.hoodie.common.model.HoodieRecordPayload; 21 | import com.uber.hoodie.table.UserDefinedBulkInsertPartitioner; 22 | import lombok.NonNull; 23 | import org.apache.spark.api.java.JavaRDD; 24 | import scala.Serializable; 25 | 26 | /** 27 | * {@link DefaultHoodieDataPartitioner} is used for sorting the records to ensure that all records belonging to single 28 | * partition are grouped together. for more information also see {@link UserDefinedBulkInsertPartitioner}. 29 | */ 30 | public class DefaultHoodieDataPartitioner implements UserDefinedBulkInsertPartitioner, 31 | Serializable { 32 | 33 | @Override 34 | public JavaRDD> repartitionRecords( 35 | @NonNull final JavaRDD> javaRDD, final int outputPartitions) { 36 | return javaRDD.sortBy( 37 | v1 -> String.format("%s %s", v1.getPartitionPath(), v1.getRecordKey()), true, outputPartitions); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/IRunState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sources; 18 | 19 | /** 20 | * It is an abstraction to hold job run state. 21 | */ 22 | public interface IRunState { 23 | 24 | } 25 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/ISource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sources; 18 | 19 | import com.uber.marmaray.common.AvroPayload; 20 | import com.uber.marmaray.common.metrics.IMetricable; 21 | import com.uber.marmaray.common.sources.IWorkUnitCalculator.IWorkUnitCalculatorResult; 22 | import com.uber.marmaray.common.status.BaseStatus; 23 | import com.uber.marmaray.common.status.IStatus; 24 | import org.apache.spark.api.java.JavaRDD; 25 | 26 | /** 27 | * @param {@link IWorkUnitCalculator} which calculates work units for the source. 28 | */ 29 | public interface ISource extends IMetricable { 31 | 32 | /** 33 | * It reads and returns the data. 34 | */ 35 | JavaRDD getData(K k); 36 | 37 | default IStatus getStatus() { 38 | return new BaseStatus(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/file/FileRunState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.sources.file; 19 | 20 | import com.uber.marmaray.common.sources.IRunState; 21 | 22 | public class FileRunState implements IRunState { 23 | } 24 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/file/FileSourceDataConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.sources.file; 19 | 20 | import com.uber.marmaray.common.configuration.Configuration; 21 | import com.uber.marmaray.common.converters.data.SourceDataConverter; 22 | import com.uber.marmaray.utilities.ErrorExtractor; 23 | import lombok.NonNull; 24 | import org.apache.avro.Schema; 25 | 26 | public abstract class FileSourceDataConverter extends SourceDataConverter { 27 | 28 | public FileSourceDataConverter(@NonNull final Configuration conf, @NonNull final ErrorExtractor errorExtractor) { 29 | super(conf, errorExtractor); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/hive/HiveRunState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sources.hive; 18 | 19 | import com.google.common.base.Optional; 20 | import com.uber.marmaray.common.sources.IRunState; 21 | import lombok.AllArgsConstructor; 22 | import lombok.EqualsAndHashCode; 23 | import lombok.Getter; 24 | 25 | @EqualsAndHashCode 26 | @AllArgsConstructor 27 | public class HiveRunState implements IRunState { 28 | @Getter 29 | private Optional partition; 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/hive/ParquetWorkUnitCalculatorResult.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sources.hive; 18 | 19 | import com.uber.marmaray.common.sources.IWorkUnitCalculator; 20 | import lombok.Getter; 21 | import lombok.NonNull; 22 | 23 | import java.util.List; 24 | 25 | final class ParquetWorkUnitCalculatorResult 26 | implements IWorkUnitCalculator.IWorkUnitCalculatorResult { 27 | 28 | @Getter 29 | private final HiveRunState nextRunState; 30 | 31 | /** 32 | * A workunit in this context is the name of a Hive partition 33 | */ 34 | @Getter 35 | private final List workUnits; 36 | 37 | public ParquetWorkUnitCalculatorResult(@NonNull final List workUnits, 38 | @NonNull final HiveRunState nextRunState) { 39 | this.workUnits = workUnits; 40 | this.nextRunState = nextRunState; 41 | } 42 | 43 | @Override 44 | public boolean hasWorkUnits() { 45 | return !this.workUnits.isEmpty(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/kafka/IKafkaOffsetSelector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.sources.kafka; 19 | 20 | import com.uber.marmaray.common.configuration.KafkaSourceConfiguration; 21 | import lombok.NonNull; 22 | import org.apache.kafka.common.TopicPartition; 23 | 24 | import java.util.Map; 25 | import java.util.Set; 26 | 27 | public interface IKafkaOffsetSelector { 28 | Map getPartitionOffsets(@NonNull final KafkaSourceConfiguration kafkaConf, 29 | @NonNull final Set topicPartitions, 30 | @NonNull final Map earliestLeaderOffsets, 31 | @NonNull final Map latestLeaderOffsets); 32 | } 33 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/kafka/KafkaOffsetResetter.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.common.sources.kafka; 2 | 3 | import com.uber.marmaray.common.reporters.IKafkaDataLossReporter; 4 | import lombok.Getter; 5 | import lombok.NonNull; 6 | import lombok.Setter; 7 | 8 | /** 9 | * {@link KafkaOffsetResetter} holds the logic and state to reset Kafka offsets when there is data loss. 10 | */ 11 | public class KafkaOffsetResetter { 12 | @NonNull 13 | @Getter 14 | private final IKafkaOffsetSelector offsetSelector; 15 | 16 | @NonNull 17 | @Getter 18 | @Setter 19 | private IKafkaDataLossReporter kafkaDataLossReporter = new LogBasedKafkaDataLossReporter(); 20 | 21 | public KafkaOffsetResetter(final IKafkaOffsetSelector offsetSelector) { 22 | this.offsetSelector = offsetSelector; 23 | } 24 | 25 | public KafkaOffsetResetter(final IKafkaOffsetSelector offsetSelector, 26 | final IKafkaDataLossReporter kafkaDataLossReporter) { 27 | this.offsetSelector = offsetSelector; 28 | this.kafkaDataLossReporter = kafkaDataLossReporter; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/kafka/KafkaRunState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.sources.kafka; 18 | 19 | import com.google.common.base.Optional; 20 | import com.uber.marmaray.common.sources.IRunState; 21 | import java.util.Map; 22 | import lombok.AllArgsConstructor; 23 | import lombok.Getter; 24 | 25 | /** 26 | * It holds the run state of the kafka run. This state needs to be persisted into checkpoint manager. 27 | */ 28 | @AllArgsConstructor 29 | public class KafkaRunState implements IRunState { 30 | @Getter 31 | private Map partitionOffsets; 32 | 33 | /** 34 | * It sets partition offset. 35 | */ 36 | public void setPartitionOffset(final int partition, final long offset) { 37 | this.partitionOffsets.put(partition, offset); 38 | } 39 | 40 | /** 41 | * Get offset for a partition. 42 | */ 43 | public Optional getPartitionOffset(final int partition) { 44 | if (this.partitionOffsets.containsKey(partition)) { 45 | return Optional.of(this.partitionOffsets.get(partition)); 46 | } else { 47 | return Optional.absent(); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/sources/kafka/LogBasedKafkaDataLossReporter.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.common.sources.kafka; 2 | 3 | import com.uber.marmaray.common.reporters.IKafkaDataLossReporter; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.hibernate.validator.constraints.NotEmpty; 6 | 7 | @Slf4j 8 | public class LogBasedKafkaDataLossReporter implements IKafkaDataLossReporter { 9 | 10 | public void reportDataLoss(@NotEmpty final String kafkaTopicName, final long totalNumberOfMessagesLost) { 11 | log.info("Kafka topic hitting loss: {} . Num messages lost: {}.", 12 | kafkaTopicName, totalNumberOfMessagesLost); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/spark/SparkArgs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.spark; 19 | 20 | import com.uber.marmaray.common.configuration.Configuration; 21 | import java.util.List; 22 | import lombok.Getter; 23 | import lombok.NonNull; 24 | import lombok.RequiredArgsConstructor; 25 | import lombok.ToString; 26 | import org.apache.avro.Schema; 27 | 28 | @ToString 29 | @RequiredArgsConstructor 30 | public class SparkArgs { 31 | 32 | /** 33 | * Avro schemas to be added to spark context for serialization 34 | */ 35 | @Getter 36 | @NonNull 37 | private final List avroSchemas; 38 | /** 39 | * User serialization classes to be added for kryo serialization 40 | */ 41 | @Getter 42 | @NonNull 43 | private final List userSerializationClasses; 44 | 45 | /** 46 | * Configuration object 47 | */ 48 | @Getter 49 | @NonNull 50 | private final Configuration configuration; 51 | 52 | /** 53 | * Flag to indicate whether Hive support is needed for SparkSession. 54 | * It is set to false by default. Use {@link #enableHiveSupport()} to turn it on. 55 | */ 56 | @Getter 57 | private boolean hiveSupportEnabled = false; 58 | 59 | public void enableHiveSupport() { 60 | this.hiveSupportEnabled = true; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/status/BaseStatus.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.status; 19 | 20 | import lombok.Getter; 21 | import lombok.NonNull; 22 | 23 | import java.util.Collection; 24 | import java.util.LinkedList; 25 | import java.util.List; 26 | 27 | /** 28 | * Base implementation of the {@link IStatus} interface. Defaults to success, but status can be downgraded. 29 | */ 30 | public class BaseStatus implements IStatus { 31 | 32 | @Getter 33 | private Status status = Status.SUCCESS; 34 | @Getter 35 | private final List exceptions = new LinkedList<>(); 36 | 37 | public void setStatus(@NonNull final Status inputStatus) { 38 | if (inputStatus.compareTo(this.status) > 0) { 39 | this.status = inputStatus; 40 | } 41 | } 42 | 43 | public void addException(@NonNull final Exception t) { 44 | this.exceptions.add(t); 45 | } 46 | 47 | public void addExceptions(@NonNull final Collection throwableCollection) { 48 | throwableCollection.forEach(this::addException); 49 | } 50 | 51 | public void mergeStatus(@NonNull final IStatus inputStatus) { 52 | setStatus(inputStatus.getStatus()); 53 | this.addExceptions(inputStatus.getExceptions()); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/status/IStatus.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.status; 19 | 20 | import java.util.List; 21 | 22 | /** 23 | * Return the status of a particular component. Status will be {@link Status#SUCCESS} if step is complete, 24 | * {@link Status#IN_PROGRESS} if there will be more work to do, and {@link Status#FAILURE} if the component has an 25 | * error. Status can be downgraded, but never upgraded. 26 | */ 27 | public interface IStatus { 28 | 29 | enum Status { 30 | SUCCESS, 31 | IN_PROGRESS, 32 | FAILURE 33 | } 34 | 35 | /** 36 | * Return the status of this particular run. 37 | */ 38 | Status getStatus(); 39 | 40 | List getExceptions(); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/common/status/JobManagerStatus.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.status; 19 | 20 | import lombok.Getter; 21 | import lombok.NonNull; 22 | import org.hibernate.validator.constraints.NotEmpty; 23 | 24 | import java.util.HashMap; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.stream.Collectors; 28 | 29 | /** 30 | * Implemenation of {@link IStatus} designed for return by {@link com.uber.marmaray.common.job.JobManager}. It maintains 31 | * a map of IStatus, one for each {@link com.uber.marmaray.common.job.JobDag}, or other collection of statuses to store. 32 | */ 33 | public class JobManagerStatus extends BaseStatus { 34 | 35 | @Getter 36 | private final Map jobStatuses = new HashMap<>(); 37 | 38 | @Override 39 | public List getExceptions() { 40 | final List jobExceptions = this.jobStatuses.values().stream() 41 | .map(IStatus::getExceptions) 42 | .flatMap(List::stream) 43 | .collect(Collectors.toList()); 44 | jobExceptions.addAll(super.getExceptions()); 45 | return jobExceptions; 46 | } 47 | 48 | public void addJobStatus(@NotEmpty final String name, @NonNull final IStatus status) { 49 | this.jobStatuses.put(name, status); 50 | this.setStatus(status.getStatus()); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/ByteBufferUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities; 18 | 19 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 20 | 21 | import java.io.UnsupportedEncodingException; 22 | import java.nio.ByteBuffer; 23 | import java.nio.charset.Charset; 24 | import java.nio.charset.StandardCharsets; 25 | 26 | /** 27 | * {@link ByteBufferUtil }Provides standard ByteBuffer functionality to convert types to and from ByteBuffers 28 | */ 29 | public final class ByteBufferUtil { 30 | private ByteBufferUtil() { 31 | throw new JobRuntimeException("This is a utility class that should not be instantiated"); 32 | } 33 | 34 | public static String convertToString(final ByteBuffer bb) { 35 | return new String(bb.array(), Charset.forName(StandardCharsets.UTF_8.toString())); 36 | } 37 | 38 | public static ByteBuffer wrap(final String value) { 39 | try { 40 | return ByteBuffer.wrap(value.getBytes(StandardCharsets.UTF_8.toString())); 41 | } catch (final UnsupportedEncodingException e) { 42 | // should never see this 43 | throw new JobRuntimeException( 44 | String.format("Unsupported encoding exception on string: %s. Error Message: %s", 45 | value, e.getMessage())); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/CassandraSinkUtil.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.utilities; 2 | 3 | import com.google.common.base.Optional; 4 | import lombok.extern.slf4j.Slf4j; 5 | 6 | import java.text.ParseException; 7 | import java.text.SimpleDateFormat; 8 | import java.util.TimeZone; 9 | 10 | @Slf4j 11 | public class CassandraSinkUtil { 12 | 13 | public static final TimeZone TIME_ZONE_UTC = TimeZone.getTimeZone("UTC"); 14 | 15 | public static Optional computeTimestamp(final Optional partition) { 16 | if (partition.isPresent()) { 17 | try { 18 | final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); 19 | formatter.setTimeZone(TIME_ZONE_UTC); 20 | final Long epochTime = formatter.parse(partition.get()).getTime() * 1000; 21 | return Optional.of(epochTime); 22 | } catch (ParseException e) { 23 | log.error("Got exception in parse the date to microseconds. {}", e); 24 | } 25 | } 26 | 27 | return Optional.absent(); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/DateUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities; 18 | 19 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 20 | 21 | import java.time.LocalDate; 22 | import java.time.ZoneOffset; 23 | import java.time.format.DateTimeFormatter; 24 | 25 | /** 26 | * {@link DateUtil} is a utility class providing helpful Date functions 27 | */ 28 | public final class DateUtil { 29 | 30 | public static final String DATE_PARTITION_FORMAT = "yyyy/MM/dd"; 31 | private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); 32 | 33 | private DateUtil() { 34 | throw new JobRuntimeException("DateUtil is a utility class and should never be instantiated"); 35 | } 36 | 37 | public static LocalDate convertToUTCDate(final String dateStr) { 38 | return LocalDate.parse(dateStr, formatter.withZone(ZoneOffset.UTC)); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/ErrorExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities; 18 | 19 | import com.uber.marmaray.common.data.ErrorData; 20 | import com.uber.marmaray.common.data.RawData; 21 | 22 | import lombok.NonNull; 23 | 24 | import java.io.Serializable; 25 | 26 | public class ErrorExtractor implements Serializable { 27 | protected static final String DEFAULT_ROW_KEY = "ROW_KEY_NOT_FOUND"; 28 | protected static final String DEFAULT_CHANGELOG_COLUMNS = "CHANGELOG_COLUMNS_NOT_FOUND"; 29 | protected static final String DEFAULT_ERROR_SOURCE_DATA = "ERROR_SOURCE_DATA_NOT_FOUND"; 30 | 31 | public String getRowKey(@NonNull final RawData rawdata) { 32 | return DEFAULT_ROW_KEY; 33 | } 34 | 35 | public String getChangeLogColumns(@NonNull final RawData rawdata) { 36 | return DEFAULT_CHANGELOG_COLUMNS; 37 | } 38 | 39 | public String getErrorSourceData(@NonNull final ErrorData errorData) { 40 | return DEFAULT_ERROR_SOURCE_DATA; 41 | } 42 | 43 | public String getErrorException(@NonNull final ErrorData errorData) { 44 | return errorData.getErrMessage(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/JsonSourceConverterErrorExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.utilities; 19 | 20 | import com.uber.marmaray.common.data.ErrorData; 21 | import lombok.NonNull; 22 | 23 | /** 24 | * Implementation of {@link ErrorExtractor} that can get error data from JSON records 25 | * that fail to parse. 26 | */ 27 | public class JsonSourceConverterErrorExtractor extends ErrorExtractor { 28 | @Override 29 | public String getErrorSourceData(@NonNull final ErrorData errorData) { 30 | return errorData.getRawData().toString(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/KafkaSourceConverterErrorExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities; 18 | 19 | import com.uber.marmaray.common.data.ErrorData; 20 | import com.uber.marmaray.common.data.RawData; 21 | 22 | import lombok.NonNull; 23 | import lombok.extern.slf4j.Slf4j; 24 | 25 | import javax.xml.bind.DatatypeConverter; 26 | 27 | @Slf4j 28 | public class KafkaSourceConverterErrorExtractor extends ErrorExtractor { 29 | 30 | @Override 31 | public String getChangeLogColumns(@NonNull final RawData rawdata) { 32 | return DEFAULT_CHANGELOG_COLUMNS; 33 | } 34 | 35 | @Override 36 | public String getErrorSourceData(@NonNull final ErrorData errorData) { 37 | try { 38 | return DatatypeConverter.printHexBinary((byte []) errorData.getRawData().getData()); 39 | } catch (Exception e) { 40 | log.debug("Not able to retrieve Error source data from ErrorData"); 41 | return DEFAULT_ERROR_SOURCE_DATA; 42 | } 43 | } 44 | 45 | @Override 46 | public String getErrorException(@NonNull final ErrorData errorData) { 47 | return errorData.getErrMessage(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/NumberConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities; 18 | 19 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 20 | 21 | /** 22 | * {@link NumberConstants} defines various number constants 23 | */ 24 | public final class NumberConstants { 25 | public static final int ONE_MILLION = 1000000; 26 | 27 | private NumberConstants() { 28 | throw new JobRuntimeException("This constants class should never be instantiated"); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/SizeUnit.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.utilities; 2 | 3 | /** 4 | * Conversion class for size units (bits and bytes) using 1024 as a factor between the levels. 5 | * Similar to {@link java.util.concurrent.TimeUnit}. 6 | * 7 | * Note that conversions may overflow Long when going from extremely large coarse units to finer ones. 8 | */ 9 | public enum SizeUnit { 10 | 11 | BITS(Constants.BIT_FACTOR), 12 | BYTES(Constants.BYTE_FACTOR), 13 | KILOBYTES(Constants.KILOBYTE_FACTOR), 14 | MEGABYTES(Constants.MEGABYTE_FACTOR), 15 | GIGABYTES(Constants.GIGABYTE_FACTOR); 16 | 17 | private final long factor; 18 | 19 | SizeUnit(final long factor) { 20 | this.factor = factor; 21 | } 22 | 23 | public long toBits(final long input) { 24 | return this.factor * input; 25 | } 26 | 27 | public long toBytes(final long input) { 28 | return this.factor * input / Constants.BYTE_FACTOR; 29 | } 30 | 31 | public long toKilobytes(final long input) { 32 | return this.factor * input / Constants.KILOBYTE_FACTOR; 33 | } 34 | 35 | public long toMegabytes(final long input) { 36 | return this.factor * input / Constants.MEGABYTE_FACTOR; 37 | } 38 | 39 | public long toGigabytes(final long input) { 40 | return this.factor * input / Constants.GIGABYTE_FACTOR; 41 | } 42 | 43 | private static final class Constants { 44 | public static final int UNIT_SEPARATOR = 1024; 45 | public static final long BIT_FACTOR = 1; 46 | public static final long BYTE_FACTOR = 8; 47 | public static final long KILOBYTE_FACTOR = BYTE_FACTOR * UNIT_SEPARATOR; 48 | public static final long MEGABYTE_FACTOR = KILOBYTE_FACTOR * UNIT_SEPARATOR; 49 | public static final long GIGABYTE_FACTOR = MEGABYTE_FACTOR * UNIT_SEPARATOR; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/StringTypes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities; 18 | 19 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 20 | 21 | /** 22 | * {@link StringTypes} defines a set of commonly used immutable strings 23 | */ 24 | public final class StringTypes { 25 | public static final String EMPTY = ""; 26 | public static final String SPACE = " "; 27 | public static final String DOT = "."; 28 | public static final String COMMA = ","; 29 | public static final String HASHTAG = "#"; 30 | public static final String COLON = ":"; 31 | public static final String SEMICOLON = ";"; 32 | public static final String NOT_DEFINED = "N/D"; 33 | public static final String UNDERSCORE = "_"; 34 | public static final String EQUAL = "="; 35 | public static final String FORWARD_SLASH = "/"; 36 | public static final String DASH = "-"; 37 | 38 | private StringTypes() { 39 | throw new JobRuntimeException("This class defines constants and should not be instantiated"); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/StringUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.utilities; 19 | 20 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 21 | 22 | /** 23 | * Utility class for operating on Strings. 24 | */ 25 | public final class StringUtil { 26 | 27 | private StringUtil() { 28 | throw new JobRuntimeException("StringUTil is a static utility class and should not be instantiated"); 29 | } 30 | 31 | /** 32 | * Use {@link String#intern} to save space when storing common strings. 33 | * @param input String to input 34 | * @return null if input is null, otherwise input.intern() 35 | */ 36 | public static String internString(final String input) { 37 | if (input == null) { 38 | return null; 39 | } else { 40 | return input.intern(); 41 | } 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/TimeUnitUtil.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.utilities; 2 | 3 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 4 | 5 | public class TimeUnitUtil { 6 | 7 | private static final Long MICRO_SEC_LOWER_BOUND = (long) Math.pow(10, 15); 8 | private static final Long MICRO_SEC_HIGHER_BOUND = (long) Math.pow(10, 18); 9 | 10 | /** 11 | * ConvertToMicroSeconds 12 | * @param num can only be in these time units [sec, milliseconds, microseconds, nanoseconds] 13 | * @return microseconds 14 | */ 15 | public static String convertToMicroSeconds(final Long num) { 16 | int computeTimes = 4; 17 | Long val = num; 18 | while (computeTimes-- > 0) { 19 | if (val.compareTo(MICRO_SEC_HIGHER_BOUND) >= 0) { 20 | val /= 1000; 21 | } else if (val.compareTo(MICRO_SEC_LOWER_BOUND) < 0) { 22 | val *= 1000; 23 | } else { 24 | return String.valueOf(val); 25 | } 26 | } 27 | 28 | throw new JobRuntimeException("Input timestamp doesn't have expected time unit. " 29 | + "We accept only seconds/milliseconds/microseconds/nanoseconds.]"); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/TimestampInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities; 18 | 19 | import com.google.common.base.Optional; 20 | import com.uber.marmaray.common.configuration.CassandraSinkConfiguration; 21 | import lombok.AllArgsConstructor; 22 | import lombok.Getter; 23 | import lombok.ToString; 24 | 25 | import java.io.Serializable; 26 | 27 | /** 28 | * {@link TimestampInfo} contains timestamp information, either as a String or a long 29 | */ 30 | @ToString 31 | @AllArgsConstructor 32 | public class TimestampInfo implements Serializable { 33 | @Getter 34 | final Optional timestamp; 35 | 36 | @Getter 37 | final boolean saveAsLongType; 38 | 39 | @Getter 40 | final String timestampFieldName; 41 | 42 | public boolean hasTimestamp() { 43 | return this.timestamp.isPresent(); 44 | } 45 | 46 | public static TimestampInfo generateEmptyTimestampInfo() { 47 | return new TimestampInfo( 48 | Optional.absent(), 49 | false, 50 | CassandraSinkConfiguration.DEFAULT_DISPERSAL_TIMESTAMP_FIELD_NAME); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /marmaray/src/main/java/com/uber/marmaray/utilities/cluster/CassandraClusterInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.utilities.cluster; 18 | 19 | import com.google.common.base.Optional; 20 | import lombok.Getter; 21 | import lombok.NonNull; 22 | import lombok.Setter; 23 | 24 | /** 25 | * POJO object that contains all the information 26 | * related to the cassandra cluster 27 | */ 28 | public class CassandraClusterInfo { 29 | 30 | @Getter 31 | @Setter 32 | private Optional sslStoragePort; 33 | 34 | @Getter 35 | @Setter 36 | private Optional rpcPort; 37 | 38 | @Getter 39 | @Setter 40 | private Optional storagePort; 41 | 42 | @Getter 43 | @Setter 44 | private Optional nativeApplicationPort; 45 | 46 | @Getter 47 | @Setter 48 | private Optional listOfNodes; 49 | 50 | public CassandraClusterInfo(@NonNull final Optional sslStoragePart, 51 | @NonNull final Optional rpcPort, 52 | @NonNull final Optional storagePort) { 53 | this.sslStoragePort = sslStoragePart; 54 | this.rpcPort = rpcPort; 55 | this.storagePort = storagePort; 56 | this.nativeApplicationPort = Optional.absent(); 57 | this.listOfNodes = Optional.absent(); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/data/TestRDDWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.data; 18 | 19 | import com.uber.marmaray.common.util.AbstractSparkTest; 20 | import java.util.Arrays; 21 | import java.util.List; 22 | import org.apache.spark.api.java.JavaRDD; 23 | import org.junit.Assert; 24 | import org.junit.Test; 25 | 26 | public class TestRDDWrapper extends AbstractSparkTest { 27 | 28 | @Test 29 | public void testCount() { 30 | final List list1 = Arrays.asList(1,2,3,4,5); 31 | final JavaRDD list1RDD = this.jsc.get().parallelize(list1); 32 | 33 | final RDDWrapper rddWrapper1 = new RDDWrapper(list1RDD); 34 | Assert.assertEquals(5, rddWrapper1.getCount()); 35 | Assert.assertEquals(5, rddWrapper1.getData().count()); 36 | 37 | final RDDWrapper rddWrapper2 = new RDDWrapper(list1RDD, 1); 38 | Assert.assertEquals(1, rddWrapper2.getCount()); 39 | Assert.assertEquals(5, rddWrapper2.getData().count()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/metadata/HDFSTestConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metadata; 18 | 19 | public class HDFSTestConstants { 20 | public static final String BASE_METADATA_PATH = "baseMetadataPath"; 21 | public static final String JOBMANAGER_BASE_METADATA_BASEPATH = "jobManagerBasePath"; 22 | public static final String BASE_RAW_DATA_PATH = "baseRawDataPath"; 23 | public static final String LOCAL = "local"; 24 | 25 | } 26 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/metrics/TestTimerMetric.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.metrics; 18 | 19 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 20 | import org.junit.Before; 21 | import org.junit.Test; 22 | 23 | import static org.junit.Assert.assertTrue; 24 | 25 | /** 26 | * Test for TimerMetric 27 | */ 28 | public class TestTimerMetric { 29 | 30 | private TimerMetric timerMetric; 31 | private final int SLEEP_TIME = 1000; 32 | private final int THRESHOLD = 50; 33 | 34 | @Before 35 | public void setupTestClass() { 36 | this.timerMetric = new TimerMetric("timer-big-function"); 37 | } 38 | 39 | @Test 40 | public void testGetMetricValueSuccess() throws InterruptedException { 41 | Thread.sleep(SLEEP_TIME); 42 | this.timerMetric.stop(); 43 | long diff = this.timerMetric.getMetricValue() - SLEEP_TIME; 44 | assertTrue(diff < THRESHOLD); 45 | } 46 | 47 | @Test(expected=JobRuntimeException.class) 48 | public void testGetMetricValueFail() { 49 | timerMetric.getMetricValue(); 50 | } 51 | 52 | @Test 53 | public void testAddTag() { 54 | this.timerMetric.addTag("job", "test-job"); 55 | assertTrue(this.timerMetric.getTags().containsKey("job")); 56 | assertTrue(this.timerMetric.getTags().containsKey("metric-type")); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/schema/cassandra/TestClusterKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.schema.cassandra; 18 | 19 | import org.junit.Assert; 20 | import org.junit.Test; 21 | 22 | public class TestClusterKey { 23 | private final static String KEY = "testKey"; 24 | @Test 25 | public void testParseStringWithNoExplicitOrdering() { 26 | final ClusterKey key = ClusterKey.parse(KEY); 27 | Assert.assertEquals(KEY, key.getName()); 28 | Assert.assertEquals(ClusterKey.Order.ASC, key.getOrder()); 29 | Assert.assertEquals("testKey ASC", key.toString()); 30 | } 31 | 32 | @Test 33 | public void testParseStringWithDescOrder() { 34 | final ClusterKey key = ClusterKey.parse(KEY + ":" + ClusterKey.Order.DESC); 35 | Assert.assertEquals(KEY, key.getName()); 36 | Assert.assertEquals(ClusterKey.Order.DESC, key.getOrder()); 37 | Assert.assertEquals("testKey DESC", key.toString()); 38 | } 39 | 40 | @Test 41 | public void testParseStringWithAscOrder() { 42 | final ClusterKey key = ClusterKey.parse(KEY + ":" + ClusterKey.Order.ASC); 43 | Assert.assertEquals(KEY, key.getName()); 44 | Assert.assertEquals(ClusterKey.Order.ASC, key.getOrder()); 45 | Assert.assertEquals("testKey ASC", key.toString()); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/spark/TestMarmarayKryoSerializer.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.common.spark; 2 | 3 | import com.uber.marmaray.common.util.AbstractSparkTest; 4 | import org.junit.Assert; 5 | import org.junit.Test; 6 | 7 | import java.util.LinkedList; 8 | import java.util.List; 9 | import java.util.stream.IntStream; 10 | 11 | public class TestMarmarayKryoSerializer extends AbstractSparkTest { 12 | 13 | @Test 14 | public void testExceptionSerialization() { 15 | final List exceptions = new LinkedList<>(); 16 | final int numberOfExceptions = 10; 17 | IntStream.range(0, numberOfExceptions) 18 | .forEach(i -> exceptions.add(new Exception("test-" + i))); 19 | final List exceptionList = this.jsc.get().parallelize(exceptions).map(o -> o).collect(); 20 | Assert.assertEquals(numberOfExceptions, exceptionList.size()); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/status/TestJobManagerStatus.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | 18 | package com.uber.marmaray.common.status; 19 | 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | import java.util.Collections; 24 | 25 | public class TestJobManagerStatus { 26 | 27 | @Test 28 | public void testStatuses() { 29 | // no status is successful 30 | JobManagerStatus status = new JobManagerStatus(); 31 | Assert.assertEquals(IStatus.Status.SUCCESS, status.getStatus()); 32 | Assert.assertEquals(Collections.emptyMap(), status.getJobStatuses()); 33 | 34 | status.addJobStatus("MyJob", new BaseStatus()); 35 | Assert.assertEquals(IStatus.Status.SUCCESS, status.getStatus()); 36 | Assert.assertEquals(1, status.getJobStatuses().size()); 37 | 38 | final BaseStatus failedStatus = new BaseStatus(); 39 | final Exception e1 = new NullPointerException("Foo was here!"); 40 | failedStatus.setStatus(IStatus.Status.FAILURE); 41 | failedStatus.addException(e1); 42 | status.addJobStatus("MySecondJob", failedStatus); 43 | Assert.assertEquals(Collections.singletonList(e1), status.getExceptions()); 44 | Assert.assertEquals(IStatus.Status.FAILURE, status.getStatus()); 45 | Assert.assertEquals(2, status.getJobStatuses().size()); 46 | 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/util/CassandraTestConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.util; 18 | 19 | import com.uber.marmaray.common.configuration.CassandraSinkConfiguration; 20 | import com.uber.marmaray.common.configuration.Configuration; 21 | 22 | public class CassandraTestConstants { 23 | public static final String KEY_SPACE = "marmaray"; 24 | public static final String TABLE = "crossfit_gyms"; 25 | public static final String LOCALHOST = "localhost"; 26 | public static final String INT_FIELD = "int_field"; 27 | public static final String STRING_FIELD = "string_field"; 28 | public static final String BOOLEAN_FIELD = "boolean_field"; 29 | public static final int CASSANDRA_PORT = 9142; 30 | public static final Configuration CONFIGURATION = new Configuration(); 31 | static { 32 | CONFIGURATION.setProperty(CassandraSinkConfiguration.KEYSPACE, KEY_SPACE); 33 | CONFIGURATION.setProperty(CassandraSinkConfiguration.TABLE_NAME, TABLE); 34 | CONFIGURATION.setProperty(CassandraSinkConfiguration.CLUSTER_NAME, "test-cluster"); 35 | CONFIGURATION.setProperty(CassandraSinkConfiguration.PARTITION_KEYS, "key_name1,key_name2"); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/util/FileTestUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.util; 18 | 19 | import com.google.common.io.Files; 20 | import com.uber.marmaray.common.exceptions.JobRuntimeException; 21 | import java.io.File; 22 | 23 | public class FileTestUtil { 24 | 25 | private FileTestUtil() { 26 | throw new JobRuntimeException("Utility class; don't instantiate it"); 27 | } 28 | 29 | /** 30 | * It creates temp directory. 31 | * @return 32 | */ 33 | public static String getTempFolder() { 34 | final File basePath = Files.createTempDir(); 35 | // We want temp directory to delete after all tests have completed. 36 | basePath.deleteOnExit(); 37 | return basePath.getAbsolutePath(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/util/HiveTestUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.util; 18 | 19 | import com.uber.marmaray.common.configuration.Configuration; 20 | import com.uber.marmaray.common.configuration.HiveSourceConfiguration; 21 | 22 | public final class HiveTestUtil { 23 | private HiveTestUtil() { 24 | throw new RuntimeException("This test class should never be instantiated"); 25 | } 26 | 27 | public static HiveSourceConfiguration initializeConfig(final String jobName, 28 | final String dataPath) 29 | { 30 | final Configuration config = new Configuration(); 31 | config.setProperty(HiveSourceConfiguration.JOB_NAME, jobName); 32 | config.setProperty(HiveSourceConfiguration.HIVE_DATA_PATH, dataPath); 33 | return new HiveSourceConfiguration(config); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/util/MultiThreadTestCoordinator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.util; 18 | 19 | import java.util.concurrent.atomic.AtomicLong; 20 | 21 | public class MultiThreadTestCoordinator { 22 | private final AtomicLong c; 23 | 24 | public MultiThreadTestCoordinator() { 25 | this.c = new AtomicLong(0); 26 | } 27 | 28 | public void nextStep() { 29 | this.c.addAndGet(1); 30 | } 31 | 32 | public void waitUntilStep(long s) throws RuntimeException { 33 | while (c.get() != s) { 34 | if (c.get() > s) { 35 | throw new RuntimeException(String.format("Current Step %d has passed ExpectedStep %d", c.get(), s)); 36 | } 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/util/TestDateUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.util; 18 | 19 | import com.uber.marmaray.utilities.DateUtil; 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | import java.time.LocalDate; 24 | 25 | public class TestDateUtil { 26 | 27 | @Test 28 | public void testConvertToUTCDate() { 29 | // We should be able to handle both forward slashes and dashes as separators 30 | // since we sanitize the input 31 | final String dateStr = "2017-05-01"; 32 | LocalDate ld = DateUtil.convertToUTCDate(dateStr); 33 | Assert.assertEquals(2017, ld.getYear()); 34 | Assert.assertEquals(5, ld.getMonth().getValue()); 35 | Assert.assertEquals(1, ld.getDayOfMonth()); 36 | 37 | final String dateStr2 = "1998-06-10"; 38 | LocalDate l2 = DateUtil.convertToUTCDate(dateStr2); 39 | Assert.assertEquals(1998, l2.getYear()); 40 | Assert.assertEquals(6, l2.getMonth().getValue()); 41 | Assert.assertEquals(10, l2.getDayOfMonth()); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/util/TestJobUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.util; 18 | 19 | import com.uber.marmaray.common.configuration.Configuration; 20 | import com.uber.marmaray.utilities.JobUtil; 21 | import org.apache.hadoop.fs.Path; 22 | import org.junit.Assert; 23 | import org.junit.Test; 24 | 25 | import java.io.IOException; 26 | 27 | public class TestJobUtil { 28 | 29 | @Test 30 | public void testGetDataCenter() throws IOException { 31 | final Configuration conf = new Configuration(); 32 | final String dcFile = FileHelperUtil.getResourcePath(getClass(), 33 | new Path("datacenter", "datacenter").toString()); 34 | final String dc = JobUtil.getDataCenterForJob(dcFile); 35 | Assert.assertEquals("test_dc", dc); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/common/util/TestMapUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 Uber Technologies, Inc. 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | * 8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions 9 | * of the Software. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 15 | * IN THE SOFTWARE. 16 | */ 17 | package com.uber.marmaray.common.util; 18 | 19 | import com.uber.marmaray.utilities.MapUtil; 20 | import java.util.HashMap; 21 | import java.util.Map; 22 | import org.junit.Assert; 23 | import org.junit.Test; 24 | 25 | public class TestMapUtil { 26 | 27 | @Test 28 | public void testSerDeserMap() { 29 | final Map map = new HashMap<>(); 30 | final int mapEntries = 10; 31 | for (int i = 0; i < mapEntries; i++) { 32 | map.put("prefixKey" + i + MapUtil.KEY_VALUE_SEPARATOR 33 | + MapUtil.KEYS_SEPARATOR + "suffix", 34 | "prefixValue" + i + MapUtil.KEY_VALUE_SEPARATOR 35 | + MapUtil.KEYS_SEPARATOR + "suffix"); 36 | } 37 | Assert.assertEquals(map.entrySet(), MapUtil.deserializeMap(MapUtil.serializeMap(map)).entrySet()); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/utilities/ResourcesUtils.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.utilities; 2 | import org.hibernate.validator.constraints.NotEmpty; 3 | import java.io.File; 4 | 5 | public class ResourcesUtils { 6 | public static String getTextFromResource(@NotEmpty final String fileName) throws Exception { 7 | return new String(getBytesFromResource(fileName)); 8 | } 9 | 10 | public static byte[] getBytesFromResource(@NotEmpty final String fileName) throws Exception { 11 | final File file = new File(ResourcesUtils.class.getClassLoader().getResource(fileName).toURI()); 12 | return java.nio.file.Files.readAllBytes(file.toPath()); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /marmaray/src/test/java/com/uber/marmaray/utilities/TestTimeUnitUtil.java: -------------------------------------------------------------------------------- 1 | package com.uber.marmaray.utilities; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class TestTimeUnitUtil { 7 | 8 | @Test 9 | public void testConvertToMicroSeconds() { 10 | // millsec -> microsec; 11 | Assert.assertEquals("1551742037000000", TimeUnitUtil.convertToMicroSeconds(1551742037L)); 12 | 13 | // sec -> microsec; 14 | Assert.assertEquals("1551742000000000", TimeUnitUtil.convertToMicroSeconds(1551742L)); 15 | 16 | // microsec -> microsec; 17 | Assert.assertEquals("1551742037895764", TimeUnitUtil.convertToMicroSeconds(1551742037895764L)); 18 | 19 | // nanosec -> microsec; 20 | Assert.assertEquals("1551742037895764", TimeUnitUtil.convertToMicroSeconds(1551742037895764000L)); 21 | 22 | // microsec_higer_bound -> microsec; 23 | Assert.assertEquals("1000000000000000", TimeUnitUtil.convertToMicroSeconds((long) Math.pow(10, 18))); 24 | 25 | // microsec_lower_bound -> microsec; 26 | Assert.assertEquals("1000000000000000", TimeUnitUtil.convertToMicroSeconds((long) Math.pow(10, 15))); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | marmaray: 3 | scalar_configs: 4 | integer: 1 5 | long: 1234567890123 6 | double: 1.23 7 | boolean: true 8 | string_configs: 9 | string1: "string1" 10 | stringlist: 11 | " string2 ": " string2 " 12 | retry_strategy: 13 | default_strategy: "SimpleRetryStrategy" 14 | simple: 15 | num_of_retries: 3 16 | wait_time_in_ms: 1000 17 | hadoop: 18 | mapreduce.map.memory.mb: 512 19 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/configWithScopes.yaml: -------------------------------------------------------------------------------- 1 | scope_override_map: 2 | bootstrap: default 3 | incremental: default 4 | 5 | database: 6 | connection: 7 | type: "jdbc" 8 | port: 5000 9 | 10 | hbase: 11 | connection: 12 | host: ["hadoopzkfoo1", "hadoopzkfoo2"] 13 | port: 4000 14 | 15 | bootstrap: 16 | spark_properties: 17 | spark.executor.memory: "8g" 18 | spark.driver.memory: "8g" 19 | spark.network.timeout: "120s" 20 | marmaray: 21 | hoodie: 22 | tables: 23 | non_primitive_field_from_bootstrap: 24 | heya: 3 25 | bye: 4 26 | target_table: 27 | parallelism: 1000 28 | combine_before_insert: false 29 | 30 | incremental: 31 | spark_properties: 32 | spark.executor.memory: "4g" 33 | spark.driver.memory: "4g" 34 | spark.network.timeout: "100s" 35 | marmaray: 36 | hoodie: 37 | tables: 38 | target_table: 39 | metrics_prefix: "hoover" 40 | enable_metrics: true 41 | parallelism: 100 42 | combine_before_insert: true 43 | combine_before_upsert: true 44 | 45 | default: 46 | marmaray: 47 | hoodie: 48 | tables: 49 | non_primitive_field_from_default: 50 | hello: 1 51 | hi: 2 52 | target_table: 53 | parallelism: 10 54 | parquet_max_file_size: 2147483647 55 | 56 | # parallelism is overridden 57 | # parquet_max_file_size, foo, bar are a primitive fields inherited 58 | # non_primitive_field_from_default is a non-primitive field inherited 59 | # combine_before_insert, combine_before_upsert are primitive field present in override scopes 60 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/datacenter/datacenter: -------------------------------------------------------------------------------- 1 | test_dc 2 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/expectedConfigWithBootstrapScope.yaml: -------------------------------------------------------------------------------- 1 | database: 2 | connection: 3 | type: "jdbc" 4 | port: 5000 5 | 6 | hbase: 7 | connection: 8 | host: ["hadoopzkfoo1", "hadoopzkfoo2"] 9 | port: 4000 10 | 11 | spark_properties: 12 | spark.executor.memory: "8g" 13 | spark.driver.memory: "8g" 14 | spark.network.timeout: "120s" 15 | 16 | marmaray: 17 | hoodie: 18 | tables: 19 | non_primitive_field_from_default: 20 | hello: 1 21 | hi: 2 22 | non_primitive_field_from_bootstrap: 23 | heya: 3 24 | bye: 4 25 | target_table: 26 | parallelism: 1000 27 | combine_before_insert: false 28 | parquet_max_file_size: 2147483647 29 | 30 | # parallelism is overridden 31 | # parquet_max_file_size, foo, bar are a primitive fields inherited 32 | # non_primitive_field_from_default is a non-primitive field inherited 33 | # combine_before_insert, combine_before_upsert are primitive fields retained in override scope 34 | # non_primitive_field_from_default is a non-primitive field retained in override scope 35 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/expectedConfigWithIncrementalScope.yaml: -------------------------------------------------------------------------------- 1 | database: 2 | connection: 3 | type: "jdbc" 4 | port: 5000 5 | 6 | hbase: 7 | connection: 8 | host: ["hadoopzkfoo1", "hadoopzkfoo2"] 9 | port: 4000 10 | 11 | spark_properties: 12 | spark.executor.memory: "4g" 13 | spark.driver.memory: "4g" 14 | spark.network.timeout: "100s" 15 | 16 | marmaray: 17 | hoodie: 18 | tables: 19 | non_primitive_field_from_default: 20 | hello: 1 21 | hi: 2 22 | target_table: 23 | metrics_prefix: "hoover" 24 | enable_metrics: true 25 | parallelism: 100 26 | combine_before_insert: true 27 | combine_before_upsert: true 28 | parquet_max_file_size: 2147483647 29 | 30 | # parallelism is overridden 31 | # parquet_max_file_size, foo, bar are a primitive fields inherited 32 | # non_primitive_field_from_default is a non-primitive field inherited 33 | # combine_before_insert, combine_before_upsert are primitive fields retained in override scope 34 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/log4j-surefire.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=WARN, A1 2 | log4j.category.com.uber=WARN 3 | log4j.category.com.uber.hoodie.common.utils=WARN 4 | log4j.category.org.apache.parquet.hadoop=WARN 5 | 6 | # A1 is set to be a ConsoleAppender. 7 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 8 | # A1 uses PatternLayout. 9 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 10 | log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n 11 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/schemas/StringPair.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "StringPair", 4 | "doc": "A pair of strings.", 5 | "fields": [ 6 | {"name": "left", "type": "string"}, 7 | {"name": "right", "type": "string"} 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/schemas/schemasource/myTestSchema.1.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "myTestSchema", 4 | "schemaVersion": 1, 5 | "fields": [ 6 | { "name": "firstName", "type": ["null", "string"], "default": null }, 7 | { "name": "lastName", "type": ["null", "string"], "default": null }, 8 | { "name": "address", "type": ["null", {"type": "record", "name": "address_items", "fields": [ 9 | { "name": "line1", "type": ["null", "string"], "default": null }, 10 | { "name": "city", "type": ["null", "string"], "default": null }, 11 | { "name": "zip", "type": ["null", "long"], "default": null} 12 | ] } ], "default": null} 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/schemas/schemasource/myTestSchema.2.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "myTestSchema", 4 | "schemaVersion": 2, 5 | "fields": [ 6 | { "name": "firstName", "type": ["null", "string"], "default": null }, 7 | { "name": "lastName", "type": ["null", "string"], "default": null }, 8 | { "name": "middleName", "type": ["null", "string"], "default": null }, 9 | { "name": "address", "type": ["null", {"type": "record", "name": "address_items", "fields": [ 10 | { "name": "line1", "type": ["null", "string"], "default": null }, 11 | { "name": "city", "type": ["null", "string"], "default": null }, 12 | { "name": "zip", "type": ["null", "long"], "default": null} 13 | ] } ], "default": null} 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/schemas/schemasource/wrongSchema.1.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "wrongSchema", 4 | "schemaVersion": 1, 5 | "fields": [ 6 | { "name": "foo", "type": ["null", "string"], "default": null } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/setupTable.cql: -------------------------------------------------------------------------------- 1 | CREATE KEYSPACE IF NOT EXISTS marmaray WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }; 2 | 3 | USE marmaray; 4 | 5 | CREATE TABLE IF NOT EXISTS devtable4 ( astr text PRIMARY KEY ) ; 6 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/teardownTable.cql: -------------------------------------------------------------------------------- 1 | USE marmaray; 2 | 3 | DROP TABLE IF EXISTS crossfit_gyms; 4 | DROP KEYSPACE IF EXISTS marmaray; 5 | -------------------------------------------------------------------------------- /marmaray/src/test/resources/testData/testPartition/data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/marmaray/src/test/resources/testData/testPartition/data.parquet -------------------------------------------------------------------------------- /marmaray/src/test/resources/testData/testPartition1/testPartition2/data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/marmaray/src/test/resources/testData/testPartition1/testPartition2/data.parquet --------------------------------------------------------------------------------