├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── checkstyles
├── marmaray-0.0.1.xml
└── suppressions.xml
├── docs
└── images
│ ├── ForkOperator_ForkFunction.png
│ ├── High_Level_Architecture.png
│ ├── Marmaray-Secondary.Logo.png
│ ├── Marmaray_Primary.Logo_tagline.png
│ ├── Marmaray_white_Primary.Logo_tagline.png
│ ├── Marmaray_white_Secondary.Logo.png
│ ├── Metadata_Manager.png
│ ├── avro_payload_conversion.png
│ └── end_to_end_job_flow.png
├── examples
└── README.md
├── marmaray-tools
├── checkstyles
├── pom.xml
└── src
│ └── main
│ ├── cli
│ └── toggleHDFSMetadataFile.py
│ └── java
│ └── com
│ └── uber
│ └── marmaray
│ └── tools
│ ├── HDFSMetadataPrinter.java
│ └── HDFSMetadataPruner.java
├── marmaray
├── build.gradle
├── checkstyles
├── config
│ └── sample.yaml
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── com
│ │ └── uber
│ │ └── marmaray
│ │ ├── common
│ │ ├── AvroPayload.java
│ │ ├── DispersalLengthType.java
│ │ ├── DispersalType.java
│ │ ├── FileSinkType.java
│ │ ├── HoodieErrorPayload.java
│ │ ├── IPayload.java
│ │ ├── MetadataManagerType.java
│ │ ├── PartitionType.java
│ │ ├── WorkUnit.java
│ │ ├── actions
│ │ │ ├── IJobDagAction.java
│ │ │ ├── JobDagActions.java
│ │ │ └── ReporterAction.java
│ │ ├── configuration
│ │ │ ├── AwsConfiguration.java
│ │ │ ├── CassandraMetadataManagerConfiguration.java
│ │ │ ├── CassandraSinkConfiguration.java
│ │ │ ├── ConfigScopeResolver.java
│ │ │ ├── Configuration.java
│ │ │ ├── ErrorTableConfiguration.java
│ │ │ ├── FileSinkConfiguration.java
│ │ │ ├── FileSourceConfiguration.java
│ │ │ ├── HDFSMetadataManagerConfiguration.java
│ │ │ ├── HDFSSchemaServiceConfiguration.java
│ │ │ ├── HadoopConfiguration.java
│ │ │ ├── HiveConfiguration.java
│ │ │ ├── HiveSourceConfiguration.java
│ │ │ ├── HoodieConfiguration.java
│ │ │ ├── HoodieIndexConfiguration.java
│ │ │ ├── KafkaConfiguration.java
│ │ │ ├── KafkaSourceConfiguration.java
│ │ │ ├── LockManagerConfiguration.java
│ │ │ ├── MetadataManagerConfiguration.java
│ │ │ ├── RetryStrategyConfiguration.java
│ │ │ ├── SimpleRetryStrategyConfiguration.java
│ │ │ ├── SparkConfiguration.java
│ │ │ └── ZookeeperConfiguration.java
│ │ ├── converters
│ │ │ ├── converterresult
│ │ │ │ └── ConverterResult.java
│ │ │ ├── data
│ │ │ │ ├── AbstractDataConverter.java
│ │ │ │ ├── CassandraSinkCQLDataConverter.java
│ │ │ │ ├── CassandraSinkDataConverter.java
│ │ │ │ ├── DummyHoodieSinkDataConverter.java
│ │ │ │ ├── FileSinkDataCSVConverter.java
│ │ │ │ ├── FileSinkDataConverter.java
│ │ │ │ ├── FileSinkDataConverterFactory.java
│ │ │ │ ├── FileSinkDataJSONConverter.java
│ │ │ │ ├── HoodieSinkDataConverter.java
│ │ │ │ ├── KafkaSourceDataConverter.java
│ │ │ │ ├── SinkDataConverter.java
│ │ │ │ ├── SourceDataConverter.java
│ │ │ │ ├── SparkSourceDataConverter.java
│ │ │ │ └── TSBasedHoodieSinkDataConverter.java
│ │ │ └── schema
│ │ │ │ ├── AbstractSchemaConverter.java
│ │ │ │ ├── CassandraSchemaConverter.java
│ │ │ │ └── DataFrameSchemaConverter.java
│ │ ├── data
│ │ │ ├── BinaryRawData.java
│ │ │ ├── ErrorData.java
│ │ │ ├── ForkData.java
│ │ │ ├── IData.java
│ │ │ ├── RDDWrapper.java
│ │ │ ├── RawData.java
│ │ │ ├── RawDataHelper.java
│ │ │ └── ValidData.java
│ │ ├── dataset
│ │ │ ├── ErrorRecord.java
│ │ │ ├── ExceptionRecord.java
│ │ │ ├── MetricRecord.java
│ │ │ ├── UtilRecord.java
│ │ │ └── UtilTable.java
│ │ ├── exceptions
│ │ │ ├── ForkOperationException.java
│ │ │ ├── InvalidDataException.java
│ │ │ ├── JobRuntimeException.java
│ │ │ ├── MetadataException.java
│ │ │ ├── MissingPropertyException.java
│ │ │ └── RetryException.java
│ │ ├── forkoperator
│ │ │ ├── FilterFunction.java
│ │ │ ├── ForkFunction.java
│ │ │ └── ForkOperator.java
│ │ ├── job
│ │ │ ├── Dag.java
│ │ │ ├── DagPayload.java
│ │ │ ├── ExecutionTimeJobExecutionStrategy.java
│ │ │ ├── IJobExecutionStrategy.java
│ │ │ ├── Job.java
│ │ │ ├── JobDag.java
│ │ │ ├── JobManager.java
│ │ │ ├── JobSubDag.java
│ │ │ ├── SingleSinkSubDag.java
│ │ │ ├── ThreadPoolService.java
│ │ │ └── ThreadPoolServiceTier.java
│ │ ├── metadata
│ │ │ ├── AbstractValue.java
│ │ │ ├── CassandraBasedMetadataManager.java
│ │ │ ├── HDFSDatePartitionManager.java
│ │ │ ├── HDFSMetadataManager.java
│ │ │ ├── HDFSPartitionManager.java
│ │ │ ├── HoodieBasedMetadataManager.java
│ │ │ ├── IMetadataManager.java
│ │ │ ├── JobManagerMetadataTracker.java
│ │ │ ├── MetadataConstants.java
│ │ │ ├── MultiMetadataManager.java
│ │ │ ├── NoOpMetadataManager.java
│ │ │ └── StringValue.java
│ │ ├── metrics
│ │ │ ├── CassandraMetric.java
│ │ │ ├── CassandraPayloadRDDSizeEstimator.java
│ │ │ ├── ChargebackMetricType.java
│ │ │ ├── DataFeedMetricNames.java
│ │ │ ├── DataFeedMetrics.java
│ │ │ ├── DoubleMetric.java
│ │ │ ├── ErrorCauseTagNames.java
│ │ │ ├── IChargebackCalculator.java
│ │ │ ├── IMetricable.java
│ │ │ ├── JobMetricNames.java
│ │ │ ├── JobMetricType.java
│ │ │ ├── JobMetrics.java
│ │ │ ├── LongMetric.java
│ │ │ ├── Metric.java
│ │ │ ├── ModuleTagNames.java
│ │ │ └── TimerMetric.java
│ │ ├── reporters
│ │ │ ├── ConsoleReporter.java
│ │ │ ├── IKafkaDataLossReporter.java
│ │ │ ├── IReporter.java
│ │ │ ├── Reportable.java
│ │ │ └── Reporters.java
│ │ ├── retry
│ │ │ ├── IFunctionThrowsException.java
│ │ │ ├── IRetryStrategy.java
│ │ │ ├── RetryableFunction.java
│ │ │ └── SimpleRetryStrategy.java
│ │ ├── schema
│ │ │ ├── HDFSSchemaService.java
│ │ │ ├── ISchemaService.java
│ │ │ ├── ISinkSchemaManager.java
│ │ │ └── cassandra
│ │ │ │ ├── CassandraDataField.java
│ │ │ │ ├── CassandraMetadataSchemaManager.java
│ │ │ │ ├── CassandraPayload.java
│ │ │ │ ├── CassandraSchema.java
│ │ │ │ ├── CassandraSchemaField.java
│ │ │ │ ├── CassandraSchemaManager.java
│ │ │ │ ├── CassandraSinkSchemaManager.java
│ │ │ │ └── ClusterKey.java
│ │ ├── sinks
│ │ │ ├── ISink.java
│ │ │ ├── SinkStatManager.java
│ │ │ ├── cassandra
│ │ │ │ ├── CassandraClientSink.java
│ │ │ │ ├── CassandraSSTableSink.java
│ │ │ │ └── CassandraSink.java
│ │ │ ├── file
│ │ │ │ ├── AwsFileSink.java
│ │ │ │ ├── FileSink.java
│ │ │ │ └── HdfsFileSink.java
│ │ │ └── hoodie
│ │ │ │ ├── HoodieErrorSink.java
│ │ │ │ ├── HoodieSink.java
│ │ │ │ ├── HoodieSinkOperations.java
│ │ │ │ ├── HoodieWriteStatus.java
│ │ │ │ └── partitioner
│ │ │ │ └── DefaultHoodieDataPartitioner.java
│ │ ├── sources
│ │ │ ├── IRunState.java
│ │ │ ├── ISource.java
│ │ │ ├── IWorkUnitCalculator.java
│ │ │ ├── file
│ │ │ │ ├── FileRunState.java
│ │ │ │ ├── FileSource.java
│ │ │ │ ├── FileSourceDataConverter.java
│ │ │ │ ├── FileWorkUnitCalculator.java
│ │ │ │ │ └── FileWorkUnitCalculator.java
│ │ │ │ └── JSONFileSourceDataConverter.java
│ │ │ ├── hive
│ │ │ │ ├── HiveRunState.java
│ │ │ │ ├── HiveSource.java
│ │ │ │ ├── ParquetWorkUnitCalculator.java
│ │ │ │ └── ParquetWorkUnitCalculatorResult.java
│ │ │ └── kafka
│ │ │ │ ├── IKafkaOffsetSelector.java
│ │ │ │ ├── KafkaBootstrapOffsetSelector.java
│ │ │ │ ├── KafkaOffsetResetter.java
│ │ │ │ ├── KafkaRunState.java
│ │ │ │ ├── KafkaSource.java
│ │ │ │ ├── KafkaWorkUnitCalculator.java
│ │ │ │ └── LogBasedKafkaDataLossReporter.java
│ │ ├── spark
│ │ │ ├── MarmarayKryoSerializer.java
│ │ │ ├── SparkArgs.java
│ │ │ └── SparkFactory.java
│ │ └── status
│ │ │ ├── BaseStatus.java
│ │ │ ├── IStatus.java
│ │ │ └── JobManagerStatus.java
│ │ ├── examples
│ │ └── job
│ │ │ └── ParquetToCassandraJob.java
│ │ └── utilities
│ │ ├── ByteBufferUtil.java
│ │ ├── CassandraSinkUtil.java
│ │ ├── CommandLineUtil.java
│ │ ├── ConfigUtil.java
│ │ ├── ConverterUtil.java
│ │ ├── DateUtil.java
│ │ ├── ErrorExtractor.java
│ │ ├── ErrorTableUtil.java
│ │ ├── FSUtils.java
│ │ ├── GenericRecordUtil.java
│ │ ├── HoodieSinkConverterErrorExtractor.java
│ │ ├── HoodieSinkErrorExtractor.java
│ │ ├── HoodieUtil.java
│ │ ├── JobUtil.java
│ │ ├── JsonSourceConverterErrorExtractor.java
│ │ ├── KafkaSourceConverterErrorExtractor.java
│ │ ├── KafkaUtil.java
│ │ ├── LockManager.java
│ │ ├── LongAccumulator.java
│ │ ├── MapUtil.java
│ │ ├── NumberConstants.java
│ │ ├── ScalaUtil.java
│ │ ├── SchemaUtil.java
│ │ ├── SizeUnit.java
│ │ ├── SparkUtil.java
│ │ ├── StringTypes.java
│ │ ├── StringUtil.java
│ │ ├── TimeUnitUtil.java
│ │ ├── TimestampInfo.java
│ │ ├── cluster
│ │ └── CassandraClusterInfo.java
│ │ └── listener
│ │ ├── SparkEventListener.java
│ │ ├── SparkJobTracker.java
│ │ └── TimeoutManager.java
│ └── test
│ ├── java
│ └── com
│ │ └── uber
│ │ └── marmaray
│ │ ├── TestSparkUtil.java
│ │ ├── common
│ │ ├── actions
│ │ │ └── TestJobDagActions.java
│ │ ├── configuration
│ │ │ ├── TestAwsConfiguration.java
│ │ │ ├── TestCassandraSinkConfiguration.java
│ │ │ ├── TestConfigScopeResolver.java
│ │ │ ├── TestConfiguration.java
│ │ │ ├── TestErrorTableConfiguration.java
│ │ │ ├── TestFileSinkConfiguration.java
│ │ │ ├── TestHadoopConfiguration.java
│ │ │ ├── TestHoodieConfiguration.java
│ │ │ ├── TestHoodieIndexConfiguration.java
│ │ │ ├── TestKafkaConfiguration.java
│ │ │ └── TestKafkaSourceConfiguration.java
│ │ ├── converters
│ │ │ ├── TestAbstractDataConverter.java
│ │ │ ├── TestCassandraDataFrameConverter.java
│ │ │ ├── TestCassandraSchemaConverter.java
│ │ │ ├── TestDataFrameDataConverter.java
│ │ │ ├── TestDataFrameSchemaConverter.java
│ │ │ └── data
│ │ │ │ ├── TestCassandraSinkCQLDataConverter.java
│ │ │ │ ├── TestCassandraSinkDataConverter.java
│ │ │ │ ├── TestFileSinkDataCSVConverter.java
│ │ │ │ ├── TestFileSinkDataJSONConverter.java
│ │ │ │ └── TestSparkSourceDataConverter.java
│ │ ├── data
│ │ │ └── TestRDDWrapper.java
│ │ ├── dataset
│ │ │ └── TestUtilTable.java
│ │ ├── forkoperator
│ │ │ └── TestForkOperator.java
│ │ ├── job
│ │ │ ├── TestExecutionTimeJobExecutionStrategy.java
│ │ │ ├── TestJobDag.java
│ │ │ ├── TestJobManager.java
│ │ │ ├── TestJobSubDag.java
│ │ │ └── TestThreadPoolService.java
│ │ ├── metadata
│ │ │ ├── HDFSTestConstants.java
│ │ │ ├── MemoryMetadataManager.java
│ │ │ ├── TestCassandraBasedMetadataManager.java
│ │ │ ├── TestHDFSDatePartitionManager.java
│ │ │ ├── TestHDFSJobLevelMetadataTracker.java
│ │ │ ├── TestHDFSMetadataManager.java
│ │ │ ├── TestHDFSPartitionManager.java
│ │ │ └── TestHoodieBasedMetadataManager.java
│ │ ├── metrics
│ │ │ ├── TestDataFeedMetrics.java
│ │ │ ├── TestJobMetrics.java
│ │ │ └── TestTimerMetric.java
│ │ ├── retry
│ │ │ └── TestRetryableFunction.java
│ │ ├── schema
│ │ │ ├── TestHDFSSchemaService.java
│ │ │ └── cassandra
│ │ │ │ ├── TestCassandraSinkSchemaManager.java
│ │ │ │ └── TestClusterKey.java
│ │ ├── sinks
│ │ │ ├── TestSinkStatManager.java
│ │ │ ├── cassandra
│ │ │ │ ├── TestCassandraClientSink.java
│ │ │ │ ├── TestCassandraSSTableSink.java
│ │ │ │ └── TestCassandraSinkUtil.java
│ │ │ ├── file
│ │ │ │ ├── FileSinkTestUtil.java
│ │ │ │ ├── TestAwsFileSink.java
│ │ │ │ ├── TestFileSink.java
│ │ │ │ └── TestHdfsFileSink.java
│ │ │ └── hoodie
│ │ │ │ └── TestHoodieSink.java
│ │ ├── sources
│ │ │ ├── file
│ │ │ │ ├── TestFileWorkUnitCalculator.java
│ │ │ │ └── TestJSONFileSourceDataConverter.java
│ │ │ └── hive
│ │ │ │ ├── TestHiveSource.java
│ │ │ │ ├── TestHiveSourceConfiguration.java
│ │ │ │ └── TestParquetWorkUnitCalculator.java
│ │ ├── spark
│ │ │ ├── TestMarmarayKryoSerializer.java
│ │ │ └── TestSparkFactory.java
│ │ ├── status
│ │ │ ├── TestBaseStatus.java
│ │ │ └── TestJobManagerStatus.java
│ │ └── util
│ │ │ ├── AbstractSparkTest.java
│ │ │ ├── AvroPayloadUtil.java
│ │ │ ├── CassandraTestConstants.java
│ │ │ ├── CassandraTestUtil.java
│ │ │ ├── FileHelperUtil.java
│ │ │ ├── FileSinkConfigTestUtil.java
│ │ │ ├── FileTestUtil.java
│ │ │ ├── HiveTestUtil.java
│ │ │ ├── KafkaTestHelper.java
│ │ │ ├── MultiThreadTestCoordinator.java
│ │ │ ├── ParquetWriterUtil.java
│ │ │ ├── SchemaTestUtil.java
│ │ │ ├── SparkTestUtil.java
│ │ │ ├── TestConverterUtil.java
│ │ │ ├── TestDateUtil.java
│ │ │ ├── TestFsUtils.java
│ │ │ ├── TestJobUtil.java
│ │ │ ├── TestLockManager.java
│ │ │ ├── TestMapUtil.java
│ │ │ ├── TestParquetWriterUtil.java
│ │ │ └── TestSchemaUtil.java
│ │ └── utilities
│ │ ├── ResourcesUtils.java
│ │ ├── TestKafkaUtil.java
│ │ ├── TestSizeUnit.java
│ │ ├── TestTimeUnitUtil.java
│ │ └── listener
│ │ └── TestTimeoutManager.java
│ └── resources
│ ├── cassandra.yaml
│ ├── config.yaml
│ ├── configWithScopes.yaml
│ ├── datacenter
│ └── datacenter
│ ├── expectedConfigWithBootstrapScope.yaml
│ ├── expectedConfigWithIncrementalScope.yaml
│ ├── log4j-surefire.properties
│ ├── schemas
│ ├── StringPair.avsc
│ └── schemasource
│ │ ├── myTestSchema.1.avsc
│ │ ├── myTestSchema.2.avsc
│ │ └── wrongSchema.1.avsc
│ ├── setupTable.cql
│ ├── teardownTable.cql
│ └── testData
│ ├── testPartition
│ └── data.parquet
│ └── testPartition1
│ └── testPartition2
│ └── data.parquet
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled source #
2 | ###################
3 | *.com
4 | *.class
5 | *.dll
6 | *.exe
7 | *.a
8 | *.o
9 | *.so
10 | *.node
11 |
12 | # Node Waf Byproducts #
13 | #######################
14 | .lock-wscript
15 | build/
16 | autom4te.cache/
17 |
18 | # Node Modules #
19 | ################
20 | # Better to let npm install these from the package.json defintion
21 | # rather than maintain this manually
22 | node_modules/
23 |
24 | # Packages #
25 | ############
26 | # it's better to unpack these files and commit the raw source
27 | # git has its own built in compression methods
28 | *.7z
29 | *.dmg
30 | *.gz
31 | *.iso
32 | *.jar
33 | *.rar
34 | *.tar
35 | *.zip
36 |
37 | # Logs and databases #
38 | ######################
39 | *.log
40 | dump.rdb
41 | *.tap
42 |
43 |
44 | # OS generated files #
45 | ######################
46 | .DS_Store?
47 | .DS_Store
48 | ehthumbs.db
49 | Icon?
50 | Thumbs.db
51 |
52 | # thrift generated files #
53 | ##########################
54 | generated/
55 |
56 | # NodeJS Core Dump
57 | core
58 |
59 | # Jenkins build scripts
60 | rt-jenkins/
61 |
62 | # Coverage Reports
63 | coverage/
64 |
65 | # local docs, scratchboards
66 | localdocs/
67 |
68 | # vi temp files
69 | .*.swp
70 |
71 | # intelliJ
72 | .idea/
73 | *.iml
74 |
75 | # Project specific items (local conf, build dir)
76 | config/local.json
77 | maps-evidence/
78 | *.lst
79 | classes/
80 | target/
81 | *.dat
82 |
83 | # shaded jar pom file
84 | dependency-reduced-pom.xml
85 |
86 | # output of build plugin org.codehaus.mojo build-helper-maven-plugin
87 | test_properties.props
88 |
89 | # gradle generated logs
90 | .gradle
91 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | install:
3 | - mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -q
4 | script: mvn test -B -q
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Marmaray
2 | Copyright (c) 2018 Uber Technologies, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/checkstyles/suppressions.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/docs/images/ForkOperator_ForkFunction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/ForkOperator_ForkFunction.png
--------------------------------------------------------------------------------
/docs/images/High_Level_Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/High_Level_Architecture.png
--------------------------------------------------------------------------------
/docs/images/Marmaray-Secondary.Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray-Secondary.Logo.png
--------------------------------------------------------------------------------
/docs/images/Marmaray_Primary.Logo_tagline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray_Primary.Logo_tagline.png
--------------------------------------------------------------------------------
/docs/images/Marmaray_white_Primary.Logo_tagline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray_white_Primary.Logo_tagline.png
--------------------------------------------------------------------------------
/docs/images/Marmaray_white_Secondary.Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Marmaray_white_Secondary.Logo.png
--------------------------------------------------------------------------------
/docs/images/Metadata_Manager.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/Metadata_Manager.png
--------------------------------------------------------------------------------
/docs/images/avro_payload_conversion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/avro_payload_conversion.png
--------------------------------------------------------------------------------
/docs/images/end_to_end_job_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/b826222cc4dba8f5b83ec6a3c8d667ebdd44d3b8/docs/images/end_to_end_job_flow.png
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | ## ParquetToCassandraJob
2 |
3 | This job demonstrates the ability to load parquet data from HDFS
4 | (can be underlying a hive table or just raw parquet files with the same schema) to a cassandra cluster.
5 |
6 | Requirements:
7 | 1. hadoop
8 | 2. spark 2.1
9 | 3. cassandra
10 |
11 | How to run:
12 |
13 | 1. Create parquet files on HDFS. Can be done in spark shell:
14 | ```
15 | val testDF = Seq( (10, "foo"), (8, "bar"), (19, "baz")).toDF("id", "name")
16 | testDF.coalesce(1).write.format("parquet").parquet("/path/to/testParquet")
17 | ```
18 |
19 | 2. replace guava in spark (guava 19.0). Can be done in spark jars directly, or use spark.yarn.archive to update the libraries used.
20 |
21 | 3. create the following config file, and put in HDFS
22 | ```
23 | marmaray:
24 | cassandra:
25 | cluster_name: testcluster
26 | datacenter: solo
27 | keyspace: marmaray
28 | partition_keys: id
29 | tablename: test_parquet_cassandra
30 | error_table:
31 | enabled: false
32 | hadoop:
33 | yarn_queue: default
34 | cassandra:
35 | output.thrift.address: localhost
36 | hive:
37 | data_path: /path/to/testParquet
38 | job_name: testParquetToCassandra
39 | lock_manager:
40 | is_enabled: false
41 | zk_base_path: /hoodie/no-op
42 | metadata_manager:
43 | cassandra:
44 | cluster: testcluster
45 | keyspace: marmaray
46 | table_name: marmaray_metadata_table
47 | username:
48 | password:
49 | output.thrift.address: localhost
50 | type: CASSANDRA
51 | job_name: testParquetToCassandra
52 | zookeeper:
53 | port: 2181
54 | quorum: unused
55 | ```
56 |
57 | 4. Run the spark job
58 | ```
59 | ./bin/spark-submit --class com.uber.marmaray.examples.job.ParquetToCassandraJob path/to/marmaray-1.0-SNAPSHOT-jar-with-dependencies.jar -c path/to/test.yaml
60 | ```
61 |
62 | 5. On success, the data will be dispersed to cassandra. You can use CQL to verify
63 | ```
64 | cqlsh> select * from marmaray.test_parquet_cassandra;
65 |
66 | id | name
67 | ----+------
68 | 10 | foo
69 | 19 | baz
70 | 8 | bar
71 |
72 | (3 rows)
73 | ```
74 |
--------------------------------------------------------------------------------
/marmaray-tools/checkstyles:
--------------------------------------------------------------------------------
1 | ../checkstyles
--------------------------------------------------------------------------------
/marmaray-tools/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | 1.8
5 | 1.8
6 |
7 |
8 | com.uber.marmaray
9 | marmaray-base
10 | 1.0-SNAPSHOT
11 |
12 | marmaray-tools
13 | 1.0-SNAPSHOT
14 |
15 |
16 |
17 | com.uber.marmaray
18 | marmaray
19 | 1.0-SNAPSHOT
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/marmaray/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'com.kageiit.jacobo' version '2.0.1'
3 | }
4 |
5 | description = 'translate jacoco to cobertura'
6 |
7 | dependencies {
8 | }
9 |
10 | task jacobo(type: com.kageiit.jacobo.JacoboTask) {
11 | jacocoReport = file("./target/site/jacoco-ut/jacoco.xml")
12 | coberturaReport = file("./target/site/cobertura/coverage.xml")
13 | srcDirs = ["./src/main/java"]
14 | }
15 |
16 | task noop {
17 | // noop task for when tests don't run
18 | }
19 |
--------------------------------------------------------------------------------
/marmaray/checkstyles:
--------------------------------------------------------------------------------
1 | ../checkstyles
--------------------------------------------------------------------------------
/marmaray/config/sample.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | marmaray:
3 | hadoop:
4 | anything: "???"
5 | hive:
6 | dataPath: ""
7 | jobName: ""
8 | source:
9 | saveCheckpoint: false
10 | hoodie:
11 | tables:
12 | cell_table:
13 | table_name: ""
14 | base_path: ""
15 | schema: ""
16 | parallelism: 1024
17 | row_table:
18 | table_name: ""
19 | base_bath: ""
20 | schema: ""
21 | default:
22 | combine_before_insert: true
23 | combine_before_upsert: true
24 | parallelism: 512
25 | kafka:
26 | conn:
27 | bootstrap:
28 | servers: "???"
29 | source:
30 | topicName:
31 | maxMessage:
32 | readParallelism:
33 | cassandra:
34 | output:
35 | native.port: ""
36 | thrift.address: ""
37 | keyspace: "keyspace"
38 | tablename: "tableName"
39 | cluster_name: "clusterName"
40 | column_list: "columnList"
41 | partition_keys: "partitionKeys"
42 | clustering_keys: "clusteringKeys"
43 | inputPath: "inputPath"
44 | partitionType: "partitionType"
45 | time_to_live: 0L
46 |
--------------------------------------------------------------------------------
/marmaray/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | 2.7.4
6 | 1.8
7 | 1.8
8 | 2.7.1
9 |
10 |
11 |
12 |
13 |
14 | org.jacoco
15 | jacoco-maven-plugin
16 |
17 |
18 | org.fortasoft
19 | gradle-maven-plugin
20 |
21 |
22 | maven-assembly-plugin
23 |
24 |
25 |
26 |
27 |
28 |
29 | com.fasterxml.jackson.dataformat
30 | jackson-dataformat-yaml
31 | ${jackson.dataformat.yaml}
32 |
33 |
34 |
35 | org.apache.curator
36 | curator-recipes
37 | ${apache.curator}
38 |
39 |
40 |
41 | org.apache.curator
42 | curator-test
43 | ${apache.curator}
44 | test
45 |
46 |
47 |
48 | com.uber.marmaray
49 | marmaray-base
50 | 1.0-SNAPSHOT
51 |
52 | marmaray
53 | 1.0-SNAPSHOT
54 |
55 |
56 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/DispersalLengthType.java:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2018 Uber Technologies, Inc.
2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
3 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
4 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
5 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | *
7 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
8 | * of the Software.
9 | *
10 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
11 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
12 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
13 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
14 | * IN THE SOFTWARE.
15 | */
16 |
17 | package com.uber.marmaray.common;
18 |
19 | /**
20 | * {@link DispersalLengthType} defines if dispersed data is from one single day or not
21 | * 1. SINGLE_DAY
22 | * 2. MULTIPLE_DAY
23 | */
24 | public enum DispersalLengthType {
25 | SINGLE_DAY,
26 | MULTIPLE_DAY
27 | }
28 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/DispersalType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 |
18 | package com.uber.marmaray.common;
19 |
20 | /**
21 | * {@link DispersalType} defines two dispersal types:
22 | * version: append new file to path with version id
23 | * overwrite: delete old files and then add new file to path
24 | */
25 | public enum DispersalType {
26 | VERSION,
27 | OVERWRITE
28 | }
29 |
30 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/FileSinkType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 |
18 | package com.uber.marmaray.common;
19 |
20 | /**
21 | * {@link FileSinkType} defines two options of file sink destinations
22 | * 1. HDFS
23 | * 2. S3: aws s3
24 | */
25 | public enum FileSinkType {
26 | HDFS,
27 | S3
28 | }
29 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/IPayload.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common;
18 |
19 | /**
20 | * We explicitly model a generic payload here so that it gives us the flexibility to
21 | * wrap the data with additional metadata as needed
22 | *
23 | * @param data type
24 | */
25 | public interface IPayload {
26 | D getData();
27 | }
28 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/MetadataManagerType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common;
18 |
19 | /**
20 | * {@link MetadataManagerType} defines metadata manager type
21 | * normal: on disk
22 | * Cassandra: cassandra based
23 | */
24 | public enum MetadataManagerType {
25 | HDFS,
26 | CASSANDRA,
27 | MULTI
28 | }
29 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/PartitionType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common;
18 |
19 | /**
20 | * {@link PartitionType} defines partition type for data
21 | * normal: partition by some defined key
22 | * date: partition by date
23 | * none: no partition
24 | */
25 | public enum PartitionType {
26 | NORMAL,
27 | DATE,
28 | NONE
29 | }
30 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/WorkUnit.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common;
18 |
19 | import lombok.AllArgsConstructor;
20 | import lombok.Getter;
21 |
22 | @AllArgsConstructor
23 | public class WorkUnit {
24 |
25 | @Getter
26 | private final String workEntity;
27 | }
28 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/actions/IJobDagAction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.actions;
18 |
19 | import java.util.HashMap;
20 | import java.util.Map;
21 |
22 | /**
23 | * {@link IJobDagAction} is interface to determine a generic action to execute.
24 | * {@link JobDagActions} are completely independent and will determine if they should run based on success status.
25 | */
26 | public interface IJobDagAction {
27 | int DEFAULT_TIMEOUT_SECONDS = 120;
28 | String ACTION_TYPE = "action_type";
29 |
30 | /**
31 | * Execute the action
32 | *
33 | * @param successful whether the job dag succeeded
34 | * @return true if action succeeded
35 | */
36 | boolean execute(boolean successful);
37 |
38 | /**
39 | * Timeout to wait for the action to complete
40 | * @return number of seconds to wait for task completion
41 | */
42 | default int getTimeoutSeconds() {
43 | return DEFAULT_TIMEOUT_SECONDS;
44 | }
45 |
46 | /**
47 | * @return metric tags to be used for reporting metrics.
48 | */
49 | default Map getMetricTags() {
50 | final Map metricsTags = new HashMap<>();
51 | metricsTags.put(ACTION_TYPE, this.getClass().getSimpleName());
52 | return metricsTags;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/configuration/HDFSMetadataManagerConfiguration.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.configuration;
18 |
19 | import lombok.Getter;
20 | import lombok.NonNull;
21 |
22 | /**
23 | * {@link HDFSMetadataManagerConfiguration} contains all the generic metadata information for where Hive is either a source or
24 | * sink for the data pipeline job. All HDFSMetadataManagerConfiguration properties starts with {@link #}.
25 | */
26 | public class HDFSMetadataManagerConfiguration extends MetadataManagerConfiguration {
27 | public static final String HDFS_METADATA_MANAGER_PREFIX = METADATA_MANAGER_PREFIX + "HDFS.";
28 | public static final String BASE_METADATA_PATH = HDFS_METADATA_MANAGER_PREFIX + "job_metadata";
29 |
30 | @Getter
31 | private final String baseMetadataPath;
32 |
33 | public HDFSMetadataManagerConfiguration(@NonNull final Configuration conf) {
34 | super(conf);
35 | this.baseMetadataPath = this.getConf().getProperty(BASE_METADATA_PATH).get();
36 | }
37 | }
38 |
39 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/configuration/HDFSSchemaServiceConfiguration.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 |
18 | package com.uber.marmaray.common.configuration;
19 |
20 | import com.uber.marmaray.utilities.ConfigUtil;
21 | import lombok.NonNull;
22 | import org.apache.hadoop.fs.Path;
23 |
24 | import java.io.Serializable;
25 | import java.util.Collections;
26 | import java.util.List;
27 |
28 | public class HDFSSchemaServiceConfiguration implements Serializable {
29 |
30 | public static final String HDFS_SCHEMA_SERVICE_PREFIX = Configuration.MARMARAY_PREFIX + "hdfs_schema_service";
31 | public static final String PATH = HDFS_SCHEMA_SERVICE_PREFIX + "path";
32 |
33 | private final Configuration conf;
34 |
35 | public HDFSSchemaServiceConfiguration(@NonNull final Configuration conf) {
36 | ConfigUtil.checkMandatoryProperties(conf, getMandatoryProperties());
37 | this.conf = conf;
38 | }
39 |
40 | public Path getPath() {
41 | return new Path(this.conf.getProperty(PATH).get());
42 | }
43 | public static List getMandatoryProperties() {
44 | return Collections.singletonList(PATH);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/converters/data/DummyHoodieSinkDataConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 |
18 | package com.uber.marmaray.common.converters.data;
19 |
20 | import com.uber.marmaray.common.AvroPayload;
21 | import com.uber.marmaray.common.configuration.Configuration;
22 | import com.uber.marmaray.utilities.ErrorExtractor;
23 |
24 | import lombok.NonNull;
25 |
26 | /**
27 | * {@link DummyHoodieSinkDataConverter} is necessary where you do not want payloads to be
28 | * processed by the data converter.
29 | */
30 | public class DummyHoodieSinkDataConverter extends HoodieSinkDataConverter {
31 | public DummyHoodieSinkDataConverter() {
32 | super(new Configuration(), new ErrorExtractor());
33 | }
34 |
35 | @Override
36 | protected String getRecordKey(@NonNull final AvroPayload payload) throws Exception {
37 | return null;
38 | }
39 |
40 | @Override
41 | protected String getPartitionPath(@NonNull final AvroPayload payload) throws Exception {
42 | return null;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/converters/data/FileSinkDataConverterFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 |
18 | package com.uber.marmaray.common.converters.data;
19 |
20 | import com.uber.marmaray.common.configuration.Configuration;
21 | import com.uber.marmaray.common.configuration.FileSinkConfiguration;
22 | import com.uber.marmaray.utilities.ErrorExtractor;
23 | import lombok.NonNull;
24 |
25 | /**
26 | * {@link FileSinkDataConverterFactory
27 | *
28 | */
29 | public class FileSinkDataConverterFactory {
30 | protected FileSinkDataConverterFactory() {
31 | }
32 |
33 | public static FileSinkDataConverter createFileSinkDataConverter(@NonNull final Configuration conf) {
34 | final FileSinkConfiguration fileConfig = new FileSinkConfiguration(conf);
35 | if (fileConfig.getFileType().equals("sequence")) {
36 | return new FileSinkDataJSONConverter(conf, new ErrorExtractor());
37 | } else {
38 | return new FileSinkDataCSVConverter(conf, new ErrorExtractor());
39 | }
40 | }
41 | }
42 |
43 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/converters/data/SinkDataConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.converters.data;
18 |
19 | import com.uber.marmaray.common.AvroPayload;
20 | import com.uber.marmaray.common.configuration.Configuration;
21 | import com.uber.marmaray.utilities.ErrorExtractor;
22 | import lombok.NonNull;
23 | import org.apache.avro.Schema;
24 |
25 | /**
26 | * {@link SinkDataConverter} extends {@link AbstractDataConverter}
27 | * This class converts records from ({@link Schema}, {@link AvroPayload}) to (OS, OD).
28 | * @param output schema type
29 | * @param output data type
30 | */
31 | public abstract class SinkDataConverter extends AbstractDataConverter {
32 | private static final long serialVersionUID = 1L;
33 |
34 | public SinkDataConverter(@NonNull final Configuration conf, @NonNull final ErrorExtractor errorExtractor) {
35 | super(conf, errorExtractor);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/converters/data/SourceDataConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.converters.data;
18 |
19 | import com.uber.marmaray.common.AvroPayload;
20 | import com.uber.marmaray.common.configuration.Configuration;
21 | import com.uber.marmaray.utilities.ErrorExtractor;
22 | import lombok.NonNull;
23 | import org.apache.avro.Schema;
24 |
25 | /**
26 | * {@link SourceDataConverter} extends {@link AbstractDataConverter}
27 | *
28 | * This class converts records from (IS, ID) to ({@link Schema}, {@link AvroPayload}).
29 | * @param input schema type
30 | * @param input data type
31 | */
32 | public abstract class SourceDataConverter extends AbstractDataConverter {
33 | public SourceDataConverter(@NonNull final Configuration conf, @NonNull final ErrorExtractor errorExtractor) {
34 | super(conf, errorExtractor);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/converters/schema/AbstractSchemaConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.converters.schema;
18 |
19 | import org.apache.avro.Schema;
20 |
21 | /**
22 | * {@link AbstractSchemaConverter} is responsible for 2 way conversion to convert an external schema to a common schema
23 | * extending {@link Schema} as well as vice versa
24 | * @param external schema
25 | * @param common schema
26 | */
27 | public abstract class AbstractSchemaConverter {
28 | public abstract ES convertToExternalSchema(CS commonSchema);
29 | public abstract CS convertToCommonSchema(ES externalSchema);
30 | }
31 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/BinaryRawData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import javax.xml.bind.DatatypeConverter;
20 |
21 | /**
22 | * Convenience class for wrapping byte[].
23 | */
24 | public class BinaryRawData extends RawData {
25 |
26 | public BinaryRawData(final byte[] data) {
27 | super(data);
28 | }
29 |
30 | @Override
31 | public String toString() {
32 | return DatatypeConverter.printHexBinary(getData());
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/ErrorData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import lombok.AllArgsConstructor;
20 | import lombok.Getter;
21 |
22 | /**
23 | * It holds the input {@link RawData} record and an error message describing which error check condition failed. This
24 | * will be useful information for debugging error records.
25 | */
26 | @AllArgsConstructor
27 | public class ErrorData implements IData {
28 |
29 | /**
30 | * Error message.
31 | */
32 | @Getter
33 | private final String errMessage;
34 |
35 | @Getter
36 | private final RawData rawData;
37 | }
38 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/ForkData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import java.io.Serializable;
20 | import java.util.ArrayList;
21 | import java.util.List;
22 | import lombok.AllArgsConstructor;
23 | import lombok.Getter;
24 | import lombok.NonNull;
25 |
26 | /**
27 | * It is used to hold forked data. It wraps individual record entry with forked pipeline keys.
28 | */
29 | @AllArgsConstructor
30 | public class ForkData implements Serializable {
31 | @Getter
32 | private final List keys = new ArrayList<>();
33 | @Getter
34 | private final DI record;
35 |
36 | public ForkData(@NonNull final List keys, @NonNull final DI record) {
37 | this.keys.addAll(keys);
38 | this.record = record;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/IData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import java.io.Serializable;
20 |
21 | public interface IData extends Serializable {
22 | }
23 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/RDDWrapper.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import lombok.Getter;
20 | import lombok.NonNull;
21 | import org.apache.spark.api.java.JavaRDD;
22 | import org.apache.spark.api.java.Optional;
23 |
24 | /**
25 | * Convenience class to wrap RDD of records with its count to avoid multiple calls for count operation. If we need to
26 | * use count operation for a given RDD more than once in the form of isEmpty or actual count; then this wrapper will be
27 | * useful.
28 | * T dataType of RDD records. {@link #data} for more details.
29 | */
30 | public class RDDWrapper {
31 |
32 | @Getter
33 | @NonNull
34 | final JavaRDD data;
35 |
36 | Optional count;
37 |
38 | public RDDWrapper(@NonNull final JavaRDD data) {
39 | this.data = data;
40 | this.count = Optional.absent();
41 | }
42 |
43 | public RDDWrapper(@NonNull final JavaRDD data, final long count) {
44 | this.data = data;
45 | this.count = Optional.of(count);
46 | }
47 |
48 | public long getCount() {
49 | if (!count.isPresent()) {
50 | this.count = Optional.of(this.data.count());
51 | }
52 | return count.get();
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/RawData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import java.io.Serializable;
20 | import lombok.AllArgsConstructor;
21 | import lombok.Getter;
22 | import lombok.ToString;
23 |
24 | @AllArgsConstructor
25 | @ToString
26 | public class RawData implements IData, Serializable {
27 |
28 | @Getter
29 | private final T data;
30 | }
31 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/RawDataHelper.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import lombok.NonNull;
20 |
21 | /**
22 | * Helper class to return appropriate RawData subclass.
23 | */
24 | public class RawDataHelper {
25 |
26 | public static RawData getRawData(@NonNull final T data) {
27 | if (data instanceof byte[]) {
28 | return new BinaryRawData((byte[]) data);
29 | }
30 | return new RawData(data);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/data/ValidData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.data;
18 |
19 | import lombok.AllArgsConstructor;
20 | import lombok.Getter;
21 |
22 | @AllArgsConstructor
23 | public class ValidData implements IData {
24 | @Getter
25 | private final T data;
26 | }
27 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/dataset/ExceptionRecord.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.dataset;
18 |
19 | import lombok.Data;
20 | import lombok.EqualsAndHashCode;
21 | import org.hibernate.validator.constraints.NotEmpty;
22 |
23 | /**
24 | * Simple Java Bean used to construct {@link UtilTable} of {@ExceptionRecord}
25 | */
26 | @Data
27 | @EqualsAndHashCode(callSuper = true)
28 | public class ExceptionRecord extends UtilRecord {
29 | private String exception;
30 | private String exception_message;
31 | private String stacktrace;
32 | private boolean detected_on_driver;
33 |
34 | public ExceptionRecord(@NotEmpty final String applicationId,
35 | @NotEmpty final String jobName,
36 | final long jobStartTimestamp,
37 | final long timestamp,
38 | @NotEmpty final String exception,
39 | @NotEmpty final String exceptionMessage,
40 | @NotEmpty final String stacktrace,
41 | final boolean isDriver) {
42 | super(applicationId, jobName, jobStartTimestamp, timestamp);
43 | this.exception = exception;
44 | this.exception_message = exceptionMessage;
45 | this.stacktrace = stacktrace;
46 | this.detected_on_driver = isDriver;
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/dataset/MetricRecord.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.dataset;
18 |
19 | import lombok.Data;
20 | import lombok.EqualsAndHashCode;
21 | import org.hibernate.validator.constraints.NotEmpty;
22 |
23 | /**
24 | * Simple Java Bean used to construct {@link UtilTable} of {@MetricRecord}
25 | */
26 | @Data
27 | @EqualsAndHashCode(callSuper = true)
28 | public class MetricRecord extends UtilRecord {
29 | private String metric_name;
30 | private Long metric_value;
31 | private String tags;
32 |
33 | public MetricRecord(@NotEmpty final String applicationId,
34 | @NotEmpty final String jobName,
35 | final long jobStartTimestamp,
36 | final long timestamp,
37 | @NotEmpty final String metricName,
38 | final long metricValue,
39 | @NotEmpty final String tags) {
40 | super(applicationId, jobName, jobStartTimestamp, timestamp);
41 | this.metric_name = metricName;
42 | this.metric_value = metricValue;
43 | this.tags = tags;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/dataset/UtilRecord.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.dataset;
18 |
19 | import lombok.AllArgsConstructor;
20 | import lombok.Data;
21 |
22 | import java.io.Serializable;
23 |
24 | /**
25 | * {@link UtilRecord} is the member type of {@link UtilTable} collections.
26 | * Subclasses of {@link UtilRecord} must conform to the requirements of a
27 | * simple Java Bean so they can be converted to {@link org.apache.spark.sql.Dataset},
28 | * which are:
29 | * 1) Have primitive field types
30 | * 2) Have default values for instance fields
31 | * 3) Have getter and setters for all fields
32 | * 4) Have a constructor with no arguments
33 | */
34 | @AllArgsConstructor
35 | @Data
36 | public abstract class UtilRecord implements Serializable {
37 | private String application_id;
38 | private String job_name;
39 | private long job_start_timestamp;
40 | private long timestamp;
41 | }
42 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/exceptions/ForkOperationException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.exceptions;
18 |
19 | /**
20 | * Thrown when ForkOperation fails to succeed.
21 | */
22 | public class ForkOperationException extends JobRuntimeException {
23 |
24 | public ForkOperationException(final String message) {
25 | super(message);
26 | }
27 |
28 | public ForkOperationException(final String message, final Throwable t) {
29 | super(message, t);
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/exceptions/InvalidDataException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.exceptions;
18 |
19 | import com.uber.marmaray.common.AvroPayload;
20 | import com.uber.marmaray.common.converters.data.HoodieSinkDataConverter;
21 | import lombok.NonNull;
22 | import org.hibernate.validator.constraints.NotEmpty;
23 |
24 | /**
25 | * It is a checked exception and should be thrown when there is either missing or invalid user defined field in
26 | * data. Check {@link HoodieSinkDataConverter#getRecordKey(AvroPayload)}
27 | * for an example.
28 | */
29 | public class InvalidDataException extends Exception {
30 |
31 | public InvalidDataException(@NotEmpty final String message) {
32 | super(message);
33 | }
34 |
35 | public InvalidDataException(@NotEmpty final String message, @NonNull final Throwable t) {
36 | super(message, t);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/exceptions/JobRuntimeException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.exceptions;
18 |
19 | import lombok.NonNull;
20 | import org.hibernate.validator.constraints.NotEmpty;
21 |
22 | /**
23 | * This is the parent runtime exception thrown whenever job encounters unrecoverable exception.
24 | */
25 | public class JobRuntimeException extends RuntimeException {
26 | public JobRuntimeException(@NotEmpty final String message) {
27 | super(message);
28 | }
29 |
30 | public JobRuntimeException(@NonNull final Throwable t) {
31 | super(t);
32 | }
33 |
34 | public JobRuntimeException(@NotEmpty final String message, @NonNull final Throwable t) {
35 | super(message, t);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/exceptions/MetadataException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.exceptions;
18 |
19 | /**
20 | * This exception is thrown anytime we encounter an exception with reading or writing metadata
21 | */
22 | public class MetadataException extends JobRuntimeException {
23 | public MetadataException(final String message) {
24 | super(message);
25 | }
26 |
27 | public MetadataException(final String message, final Throwable t) {
28 | super(message, t);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/exceptions/MissingPropertyException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.exceptions;
18 |
19 | /**
20 | * Thrown when any required property for a module is not specified.
21 | */
22 | public class MissingPropertyException extends JobRuntimeException {
23 | public MissingPropertyException(final String propertyName) {
24 | super("property:" + propertyName);
25 | }
26 |
27 | public MissingPropertyException(final String message, final Throwable t) {
28 | super(message, t);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/exceptions/RetryException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.exceptions;
18 |
19 | /**
20 | * This exception is thrown when the task runs out of retries.
21 | */
22 | public class RetryException extends JobRuntimeException {
23 | public RetryException(final String message) {
24 | super(message);
25 | }
26 |
27 | public RetryException(final String message, final Throwable t) {
28 | super(message, t);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/forkoperator/FilterFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.forkoperator;
18 |
19 | import com.uber.marmaray.common.data.ForkData;
20 | import lombok.AllArgsConstructor;
21 | import org.apache.spark.api.java.function.Function;
22 |
23 | /**
24 | * This is used for filtering result RDD. The passed in filterkey should be same as the one used
25 | * in ForkFunction.
26 | * @param
27 | */
28 | @AllArgsConstructor
29 | public class FilterFunction implements Function, Boolean> {
30 |
31 | private final Integer filterKey;
32 |
33 | @Override
34 | public final Boolean call(final ForkData forkData) {
35 | return execute(forkData);
36 | }
37 |
38 | /**
39 | * It is used for filtering out tupleEntries. If it returns true then tupleEntry will be
40 | * filtered out. It will have same set of keys as defined by corresponding ForkFunction.
41 | *
42 | * @param forkData : forkData to be filtered out or retained.
43 | */
44 | protected Boolean execute(final ForkData forkData) {
45 | return forkData.getKeys().contains(this.filterKey);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/job/Dag.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.job;
2 |
3 | import com.uber.marmaray.common.status.IStatus;
4 | import lombok.Getter;
5 | import lombok.NonNull;
6 | import lombok.Setter;
7 | import org.hibernate.validator.constraints.NotEmpty;
8 |
9 | import java.util.Map;
10 |
11 | public abstract class Dag {
12 |
13 | @NotEmpty @Getter @Setter
14 | private String jobName;
15 |
16 | @NotEmpty @Getter @Setter
17 | private String dataFeedName;
18 |
19 | @Getter @Setter
20 | private Map jobManagerMetadata;
21 |
22 | public Dag(@NonNull final String jobName, @NonNull final String dataFeedName) {
23 | this.dataFeedName = dataFeedName;
24 | this.jobName = jobName;
25 | }
26 |
27 | public abstract IStatus execute();
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/job/DagPayload.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.job;
18 |
19 | import com.uber.marmaray.common.AvroPayload;
20 | import com.uber.marmaray.common.IPayload;
21 | import lombok.AllArgsConstructor;
22 | import lombok.Getter;
23 | import lombok.NonNull;
24 | import org.apache.spark.api.java.JavaRDD;
25 |
26 | /**
27 | * Helper class to pass Payload to child dag.
28 | */
29 | @AllArgsConstructor
30 | public class DagPayload implements IPayload> {
31 |
32 | @NonNull
33 | @Getter
34 | private final JavaRDD data;
35 | }
36 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/job/IJobExecutionStrategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 |
18 | package com.uber.marmaray.common.job;
19 |
20 | import lombok.NonNull;
21 |
22 | import java.util.List;
23 | import java.util.Queue;
24 |
25 | /**
26 | * {@link IJobExecutionStrategy} determines the order for {@link JobDag} execution rather than just relying on
27 | * submission order.
28 | */
29 | public interface IJobExecutionStrategy {
30 |
31 | List sort(@NonNull final Queue inputJobDags);
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/job/Job.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.job;
18 |
19 | import com.uber.marmaray.common.configuration.Configuration;
20 | import java.util.concurrent.atomic.AtomicBoolean;
21 | import lombok.AllArgsConstructor;
22 | import lombok.NonNull;
23 | import lombok.extern.slf4j.Slf4j;
24 |
25 | @Slf4j
26 | @AllArgsConstructor
27 | public class Job {
28 |
29 | @NonNull
30 | private final JobDag jobDag;
31 | @NonNull
32 | private final Configuration conf;
33 |
34 | public void run() {
35 | final AtomicBoolean isSuccess = new AtomicBoolean(true);
36 | try {
37 | ThreadPoolService.init(this.conf);
38 | jobDag.execute();
39 | } catch (final Throwable t) {
40 | isSuccess.set(false);
41 | throw t;
42 | } finally {
43 | ThreadPoolService.shutdown(!isSuccess.get());
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/job/ThreadPoolServiceTier.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.job;
18 |
19 | public enum ThreadPoolServiceTier {
20 | JOB_DAG_TIER,
21 | ACTIONS_TIER
22 | }
23 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metadata/AbstractValue.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metadata;
18 |
19 | import java.io.Serializable;
20 |
21 | /**
22 | * {@link AbstractValue} wraps a generic data value type that is required to be serializable. The data represents
23 | * some metadata that will be stored for a job.
24 | * @param
25 | */
26 | public abstract class AbstractValue implements Serializable {
27 | abstract D getValue();
28 | }
29 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metadata/MetadataConstants.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metadata;
18 |
19 | import com.uber.marmaray.common.configuration.Configuration;
20 | import com.uber.marmaray.common.exceptions.JobRuntimeException;
21 |
22 | public final class MetadataConstants {
23 | public static final String TEMP_FILE_EXTENSION = ".tmp";
24 | public static final String CHECKPOINT_KEY = "checkpoint";
25 | public static final String JOBMANAGER_PREFIX = Configuration.MARMARAY_PREFIX + "jobmanager";
26 | public static final String JOBMANAGER_METADATA_PREFIX = JOBMANAGER_PREFIX + ".metadata";
27 | public static final String JOBMANAGER_METADATA_ENABLED = JOBMANAGER_METADATA_PREFIX + ".enabled";
28 | public static final String JOBMANAGER_METADATA_HDFS_PREFIX = JOBMANAGER_METADATA_PREFIX + ".hdfs";
29 | public static final String JOBMANAGER_METADATA_HDFS_BASEPATH = JOBMANAGER_METADATA_HDFS_PREFIX + ".basePath";
30 | public static final String JOBMANAGER_METADATA_STORAGE = JOBMANAGER_METADATA_PREFIX + ".sourceType";
31 | public static final String JOBMANAGER_METADATA_SOURCE_HDFS = "HDFS";
32 |
33 | private MetadataConstants() {
34 | throw new JobRuntimeException("This class should never be instantiated");
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metadata/StringValue.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metadata;
18 |
19 | import lombok.AllArgsConstructor;
20 | import lombok.EqualsAndHashCode;
21 |
22 | /**
23 | * {@link StringValue} extends {@link AbstractValue} and wraps a String that represents the job metadata
24 | * that will be stored in HDFS
25 | */
26 | @AllArgsConstructor
27 | @EqualsAndHashCode(callSuper = false)
28 | public class StringValue extends AbstractValue {
29 |
30 | private final String value;
31 |
32 | @Override
33 | public String getValue() {
34 | return this.value;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/CassandraMetric.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.metrics;
2 |
3 | import com.uber.marmaray.common.configuration.CassandraSinkConfiguration;
4 | import com.uber.marmaray.utilities.StringTypes;
5 | import lombok.NonNull;
6 |
7 | import java.util.Map;
8 |
9 | public class CassandraMetric {
10 |
11 | public static final String TABLE_NAME_TAG = "tableName";
12 |
13 | public static Map createTableNameTags(@NonNull final CassandraSinkConfiguration cassandraConf) {
14 | return DataFeedMetrics.createAdditionalTags(TABLE_NAME_TAG,
15 | cassandraConf.getKeyspace() + StringTypes.UNDERSCORE + cassandraConf.getTableName());
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/CassandraPayloadRDDSizeEstimator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | import com.uber.marmaray.common.data.RDDWrapper;
20 | import com.uber.marmaray.common.schema.cassandra.CassandraPayload;
21 |
22 | import java.util.List;
23 |
24 | public class CassandraPayloadRDDSizeEstimator {
25 | private final int NO_OF_SAMPLE_ROWS = 1000;
26 |
27 | public long estimateTotalSize(final RDDWrapper rdd) {
28 | final long totalRows = rdd.getCount();
29 |
30 | final List sampleRows = rdd.getData().takeSample(true, NO_OF_SAMPLE_ROWS);
31 |
32 | final long byteSize = sampleRows
33 | .stream()
34 | .map(element -> element.estimateRowSize())
35 | .reduce((size, accumulator) -> size + accumulator)
36 | .orElse(0);
37 |
38 | final long totalSize = (long) (byteSize * (((totalRows) * 1.0) / (NO_OF_SAMPLE_ROWS)));
39 |
40 | return totalSize;
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/ChargebackMetricType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | public enum ChargebackMetricType {
20 | // count the number of rows
21 | ROW_COUNT,
22 | // count the runtime of the execution
23 | RUN_TIME,
24 | // count the number of executors used
25 | EXECUTORS
26 | }
27 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/DoubleMetric.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | import lombok.Getter;
20 | import lombok.ToString;
21 | import org.hibernate.validator.constraints.NotEmpty;
22 |
23 | /**
24 | * {@link Double} implementation of {@link Metric}
25 | */
26 | @Getter
27 | @ToString
28 | public class DoubleMetric extends Metric {
29 | public DoubleMetric(final String metricName) {
30 | this("metric-type", 0.0);
31 | }
32 |
33 | public DoubleMetric(@NotEmpty final String metricName, final double metricValue) {
34 | super(metricName, metricValue);
35 | this.addTag("metric-type", "double");
36 | }
37 |
38 | public void setMetricValue(final double metricValue) {
39 | this.metricValue = metricValue;
40 | }
41 |
42 | @Override
43 | public String toString() {
44 | return super.toString();
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/IChargebackCalculator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | import java.util.Map;
20 |
21 | /**
22 | * Calculator to determine cost of a particular chargeback instance.
23 | *
24 | * Computation can be over a number of fields.
25 | */
26 | public interface IChargebackCalculator {
27 |
28 | /**
29 | * Compute the cost based on the input fields.
30 | * @return the final cost value
31 | */
32 | Map computeCost();
33 |
34 | /**
35 | * Add a cost for a particular datafeed
36 | * @param datafeedName the name of the datafeed to add cost to
37 | * @param metricType the type of cost to add
38 | * @param value the cost value to add
39 | */
40 | void addCost(String datafeedName, ChargebackMetricType metricType, Long value);
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/IMetricable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | import lombok.NonNull;
20 |
21 | public interface IMetricable {
22 |
23 | /*
24 | Take a DataFeedMetrics to report metrics to, if present
25 | */
26 | void setDataFeedMetrics(@NonNull final DataFeedMetrics dataFeedMetrics);
27 |
28 | /*
29 | Take a JobMetrics to report metrics to, if present
30 | */
31 | void setJobMetrics(@NonNull final JobMetrics jobMetrics);
32 | }
33 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/JobMetricNames.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | import com.uber.marmaray.common.exceptions.JobRuntimeException;
20 |
21 | public final class JobMetricNames {
22 | public static final String RUN_JOB_DAG_LATENCY_MS = "run_job_dag_latency_ms";
23 | public static final String RUN_JOB_ERROR_COUNT = "run_job_error_count";
24 | public static final String JOB_SETUP_LATENCY_MS = "job_setup_latency_ms";
25 |
26 | // JobLockManager-related metrics
27 | public static final String JOB_MANAGER_LOCK_TIME_MS = "job_manager_lock_time_ms";
28 | public static final String JOB_DAG_LOCK_TIME_MS = "job_dag_lock_time_ms";
29 |
30 | private JobMetricNames() {
31 | throw new JobRuntimeException("Class should never be instantiated");
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/JobMetricType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | /**
20 | * Known {@link JobMetrics} names.
21 | */
22 | public enum JobMetricType {
23 | // metrics in context of the job
24 | RUNTIME,
25 | STAGE_RUNTIME,
26 | SIZE,
27 |
28 | // resource usage metrics
29 | DRIVER_MEMORY,
30 | EXECUTOR_MEMORY,
31 | NUM_EXECUTORS,
32 |
33 | // error count metrics
34 | RUN_ERROR_COUNT
35 | }
36 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/LongMetric.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | import lombok.Getter;
20 | import lombok.ToString;
21 | import org.hibernate.validator.constraints.NotEmpty;
22 |
23 | /**
24 | * {@link Long} implementation of {@link Metric}
25 | */
26 | @Getter
27 | @ToString
28 | public class LongMetric extends Metric {
29 | public LongMetric(final String metricName) {
30 | super(metricName);
31 | }
32 |
33 | public LongMetric(@NotEmpty final String metricName, final long metricValue) {
34 | super(metricName, metricValue);
35 | this.addTag("metric-type", "long");
36 | }
37 |
38 | public void setMetricValue(final long metricValue) {
39 | this.metricValue = metricValue;
40 | }
41 |
42 | @Override
43 | public String toString() {
44 | return super.toString();
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/ModuleTagNames.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.metrics;
18 |
19 | import com.uber.marmaray.common.exceptions.JobRuntimeException;
20 |
21 | public final class ModuleTagNames {
22 | public static final String SOURCE = "source";
23 | public static final String SINK = "sink";
24 | public static final String SCHEMA_MANAGER = "schema_manager";
25 | public static final String SOURCE_CONVERTER = "source_converter";
26 | public static final String SINK_CONVERTER = "sink_converter";
27 | public static final String SUB_DAG = "sub_dag";
28 | public static final String WORK_UNIT_CALCULATOR = "work_unit_calc";
29 | public static final String JOB_MANAGER = "job_manager";
30 | public static final String JOB_DAG = "job_dag";
31 | public static final String METADATA_MANAGER = "metadata_manager";
32 | public static final String SINK_CONFIGURATION = "sink_configuration";
33 | public static final String CONFIGURATION = "config";
34 |
35 | private ModuleTagNames() {
36 | throw new JobRuntimeException("Class should never be instantiated");
37 | }
38 | }
39 |
40 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/reporters/ConsoleReporter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.reporters;
18 |
19 | import com.uber.marmaray.common.metrics.Metric;
20 | import lombok.NonNull;
21 | import lombok.extern.slf4j.Slf4j;
22 |
23 | /**
24 | * Console implementation of {@link IReporter}
25 | */
26 | @Slf4j
27 | public class ConsoleReporter implements IReporter {
28 | public void gauge(@NonNull final Metric m) {
29 | final String metricName = m.getMetricName();
30 | final String metricValue = m.getMetricValue().toString();
31 | final String tags = m.getTags().toString();
32 |
33 | log.info("{}={}, Tags: {}", metricName, metricValue, tags);
34 | }
35 |
36 | public void finish() {
37 | // do nothing
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/reporters/IKafkaDataLossReporter.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.reporters;
2 |
3 | import org.hibernate.validator.constraints.NotEmpty;
4 |
5 | /**
6 | * {@link IKafkaDataLossReporter} reports Kafka data loss
7 | */
8 | public interface IKafkaDataLossReporter {
9 | void reportDataLoss(@NotEmpty final String kafkaTopicName,
10 | final long totalNumberOfMessagesLost);
11 | }
12 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/reporters/IReporter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.reporters;
18 |
19 | import com.uber.marmaray.common.metrics.Metric;
20 | import lombok.NonNull;
21 |
22 | /**
23 | * {@link IReporter} gauges {@link Metric} to a sink
24 | */
25 | public interface IReporter {
26 | void gauge(@NonNull final T m);
27 |
28 | void finish();
29 | }
30 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/reporters/Reportable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.reporters;
18 |
19 | import lombok.NonNull;
20 |
21 | /**
22 | * {@link Reportable} that can be gauged by {@link IReporter}
23 | */
24 | public interface Reportable {
25 | void gaugeAll(@NonNull final IReporter reporter);
26 | }
27 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/retry/IFunctionThrowsException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.retry;
18 |
19 | import lombok.NonNull;
20 |
21 | /**
22 | * {@link IFunctionThrowsException} is the interface for an function throwing exceptions.
23 | */
24 | @FunctionalInterface
25 | public interface IFunctionThrowsException {
26 | R apply(@NonNull final T t) throws Exception;
27 | }
28 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/retry/IRetryStrategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.retry;
18 |
19 | import com.uber.marmaray.common.exceptions.RetryException;
20 |
21 | /**
22 | * {@link IRetryStrategy} determines if a function should be retried or not. retryMessage returns
23 | * the description of the current attempt.
24 | */
25 | public interface IRetryStrategy {
26 | boolean shouldRetry() throws RetryException;
27 | String retryMessage();
28 | }
29 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/schema/ISinkSchemaManager.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.schema;
18 |
19 | public interface ISinkSchemaManager {
20 | }
21 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/schema/cassandra/CassandraDataField.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.schema.cassandra;
18 |
19 | import lombok.AllArgsConstructor;
20 | import lombok.Getter;
21 |
22 | import java.io.Serializable;
23 | import java.nio.ByteBuffer;
24 |
25 | @AllArgsConstructor
26 | public class CassandraDataField implements Serializable {
27 |
28 | @Getter
29 | private final ByteBuffer columnKey;
30 |
31 | @Getter
32 | private final ByteBuffer value;
33 | }
34 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/schema/cassandra/CassandraSchema.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.schema.cassandra;
18 |
19 | import java.io.Serializable;
20 | import java.util.ArrayList;
21 | import java.util.List;
22 | import lombok.Getter;
23 |
24 | public class CassandraSchema implements Serializable {
25 |
26 | @Getter
27 | private final String keySpace;
28 |
29 | @Getter
30 | private final String tableName;
31 |
32 | @Getter
33 | private final List fields;
34 |
35 | public CassandraSchema(final String keySpace, final String tableName) {
36 | this.keySpace = keySpace;
37 | this.tableName = tableName;
38 | this.fields = new ArrayList<>();
39 | }
40 |
41 | public CassandraSchema(final String keySpace, final String tableName, final List fields) {
42 | this.keySpace = keySpace;
43 | this.tableName = tableName;
44 | this.fields = fields;
45 | }
46 |
47 | public void addField(final CassandraSchemaField field) {
48 | fields.add(field);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/sinks/ISink.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.sinks;
18 |
19 | import com.uber.marmaray.common.AvroPayload;
20 | import com.uber.marmaray.common.metrics.IMetricable;
21 | import org.apache.spark.api.java.JavaRDD;
22 |
23 | /**
24 | * Implementations of {@link ISink} should implement {@link #write(JavaRDD)}.
25 | * TODO: ISink#write() should return Stats object.
26 | */
27 | public interface ISink extends IMetricable {
28 |
29 | /**
30 | * It writes data to sink. If there is any exception while writing then it throws
31 | * {@link com.uber.marmaray.common.exceptions.JobRuntimeException}. All invalid / error records will be
32 | * written to ErrorTable.
33 | * @param data data to write to sink
34 | */
35 | void write(JavaRDD data);
36 | }
37 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/sinks/hoodie/HoodieSinkOperations.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.sinks.hoodie;
18 |
19 | import com.uber.marmaray.common.configuration.HoodieConfiguration;
20 | import lombok.NonNull;
21 | import org.hibernate.validator.constraints.NotEmpty;
22 |
23 | /**
24 | * Helper class which invokes various operations before / after certain {@link HoodieSink} actions. See individual
25 | * operations for more details.
26 | */
27 | public class HoodieSinkOperations {
28 |
29 | /**
30 | * Gets executed before calling {@link HoodieSink}'s underlying commit action. All the parquet write operations are
31 | * guaranteed to finish before this. Only thing left is the final commit file creation.
32 | */
33 | public void preCommitOperations(@NonNull final HoodieConfiguration hoodieConfiguration,
34 | @NotEmpty final String commitTime) {
35 | // do nothing.
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/sinks/hoodie/HoodieWriteStatus.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Uber Technologies, Inc.
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4 | * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
6 | * permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 | *
8 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions
9 | * of the Software.
10 | *
11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15 | * IN THE SOFTWARE.
16 | */
17 | package com.uber.marmaray.common.sinks.hoodie;
18 |
19 | import com.uber.hoodie.WriteStatus;
20 | import com.uber.hoodie.common.model.HoodieRecord;
21 | import java.util.Map;
22 | import java.util.Optional;
23 |
24 | /**
25 | * Helper class to change default behavior for {@link WriteStatus}
26 | */
27 | public class HoodieWriteStatus extends WriteStatus {
28 |
29 | private long totalRecords;
30 |
31 | /**
32 | * Overriding {@link #markSuccess(HoodieRecord, Optional)} to avoid caching
33 | * {@link com.uber.hoodie.common.model.HoodieKey} for successfully written hoodie records.
34 | */
35 | @Override
36 | public void markSuccess(final HoodieRecord record, final Optional