├── marmaray
├── checkstyles
├── src
│ ├── test
│ │ ├── resources
│ │ │ ├── datacenter
│ │ │ │ └── datacenter
│ │ │ ├── teardownTable.cql
│ │ │ ├── testData
│ │ │ │ ├── testPartition
│ │ │ │ │ └── data.parquet
│ │ │ │ └── testPartition1
│ │ │ │ │ └── testPartition2
│ │ │ │ │ └── data.parquet
│ │ │ ├── setupTable.cql
│ │ │ ├── schemas
│ │ │ │ ├── schemasource
│ │ │ │ │ ├── wrongSchema.1.avsc
│ │ │ │ │ ├── myTestSchema.1.avsc
│ │ │ │ │ └── myTestSchema.2.avsc
│ │ │ │ └── StringPair.avsc
│ │ │ ├── log4j-surefire.properties
│ │ │ ├── config.yaml
│ │ │ ├── expectedConfigWithIncrementalScope.yaml
│ │ │ ├── expectedConfigWithBootstrapScope.yaml
│ │ │ └── configWithScopes.yaml
│ │ └── java
│ │ │ └── com
│ │ │ └── uber
│ │ │ └── marmaray
│ │ │ ├── utilities
│ │ │ ├── ResourcesUtils.java
│ │ │ └── TestTimeUnitUtil.java
│ │ │ └── common
│ │ │ ├── spark
│ │ │ └── TestMarmarayKryoSerializer.java
│ │ │ ├── metadata
│ │ │ └── HDFSTestConstants.java
│ │ │ ├── util
│ │ │ ├── MultiThreadTestCoordinator.java
│ │ │ ├── FileTestUtil.java
│ │ │ ├── TestJobUtil.java
│ │ │ ├── HiveTestUtil.java
│ │ │ ├── TestMapUtil.java
│ │ │ ├── TestDateUtil.java
│ │ │ └── CassandraTestConstants.java
│ │ │ ├── data
│ │ │ └── TestRDDWrapper.java
│ │ │ ├── schema
│ │ │ └── cassandra
│ │ │ │ └── TestClusterKey.java
│ │ │ ├── status
│ │ │ └── TestJobManagerStatus.java
│ │ │ └── metrics
│ │ │ └── TestTimerMetric.java
│ └── main
│ │ └── java
│ │ └── com
│ │ └── uber
│ │ └── marmaray
│ │ ├── common
│ │ ├── reporters
│ │ │ ├── IKafkaDataLossReporter.java
│ │ │ ├── Reportable.java
│ │ │ ├── IReporter.java
│ │ │ └── ConsoleReporter.java
│ │ ├── sources
│ │ │ ├── kafka
│ │ │ │ ├── LogBasedKafkaDataLossReporter.java
│ │ │ │ ├── KafkaOffsetResetter.java
│ │ │ │ ├── IKafkaOffsetSelector.java
│ │ │ │ └── KafkaRunState.java
│ │ │ ├── IRunState.java
│ │ │ ├── file
│ │ │ │ ├── FileRunState.java
│ │ │ │ └── FileSourceDataConverter.java
│ │ │ ├── hive
│ │ │ │ ├── HiveRunState.java
│ │ │ │ └── ParquetWorkUnitCalculatorResult.java
│ │ │ └── ISource.java
│ │ ├── metrics
│ │ │ ├── CassandraMetric.java
│ │ │ ├── ChargebackMetricType.java
│ │ │ ├── JobMetricType.java
│ │ │ ├── IMetricable.java
│ │ │ ├── JobMetricNames.java
│ │ │ ├── IChargebackCalculator.java
│ │ │ ├── LongMetric.java
│ │ │ ├── DoubleMetric.java
│ │ │ ├── CassandraPayloadRDDSizeEstimator.java
│ │ │ └── ModuleTagNames.java
│ │ ├── job
│ │ │ ├── Dag.java
│ │ │ ├── ThreadPoolServiceTier.java
│ │ │ ├── IJobExecutionStrategy.java
│ │ │ ├── DagPayload.java
│ │ │ └── Job.java
│ │ ├── schema
│ │ │ ├── ISinkSchemaManager.java
│ │ │ └── cassandra
│ │ │ │ ├── CassandraDataField.java
│ │ │ │ └── CassandraSchema.java
│ │ ├── data
│ │ │ ├── IData.java
│ │ │ ├── ValidData.java
│ │ │ ├── RawData.java
│ │ │ ├── RawDataHelper.java
│ │ │ ├── BinaryRawData.java
│ │ │ ├── ErrorData.java
│ │ │ ├── ForkData.java
│ │ │ └── RDDWrapper.java
│ │ ├── WorkUnit.java
│ │ ├── FileSinkType.java
│ │ ├── MetadataManagerType.java
│ │ ├── DispersalLengthType.java
│ │ ├── IPayload.java
│ │ ├── PartitionType.java
│ │ ├── DispersalType.java
│ │ ├── retry
│ │ │ ├── IFunctionThrowsException.java
│ │ │ └── IRetryStrategy.java
│ │ ├── metadata
│ │ │ ├── AbstractValue.java
│ │ │ ├── StringValue.java
│ │ │ └── MetadataConstants.java
│ │ ├── exceptions
│ │ │ ├── RetryException.java
│ │ │ ├── ForkOperationException.java
│ │ │ ├── MetadataException.java
│ │ │ ├── MissingPropertyException.java
│ │ │ ├── JobRuntimeException.java
│ │ │ └── InvalidDataException.java
│ │ ├── converters
│ │ │ ├── schema
│ │ │ │ └── AbstractSchemaConverter.java
│ │ │ └── data
│ │ │ │ ├── SourceDataConverter.java
│ │ │ │ ├── SinkDataConverter.java
│ │ │ │ ├── DummyHoodieSinkDataConverter.java
│ │ │ │ └── FileSinkDataConverterFactory.java
│ │ ├── status
│ │ │ ├── IStatus.java
│ │ │ ├── BaseStatus.java
│ │ │ └── JobManagerStatus.java
│ │ ├── sinks
│ │ │ ├── ISink.java
│ │ │ └── hoodie
│ │ │ │ ├── HoodieSinkOperations.java
│ │ │ │ ├── HoodieWriteStatus.java
│ │ │ │ └── partitioner
│ │ │ │ └── DefaultHoodieDataPartitioner.java
│ │ ├── dataset
│ │ │ ├── UtilRecord.java
│ │ │ ├── MetricRecord.java
│ │ │ └── ExceptionRecord.java
│ │ ├── configuration
│ │ │ ├── HDFSMetadataManagerConfiguration.java
│ │ │ └── HDFSSchemaServiceConfiguration.java
│ │ ├── forkoperator
│ │ │ └── FilterFunction.java
│ │ ├── actions
│ │ │ └── IJobDagAction.java
│ │ └── spark
│ │ │ └── SparkArgs.java
│ │ └── utilities
│ │ ├── CassandraSinkUtil.java
│ │ ├── TimeUnitUtil.java
│ │ ├── NumberConstants.java
│ │ ├── JsonSourceConverterErrorExtractor.java
│ │ ├── SizeUnit.java
│ │ ├── StringUtil.java
│ │ ├── DateUtil.java
│ │ ├── StringTypes.java
│ │ ├── TimestampInfo.java
│ │ ├── ErrorExtractor.java
│ │ ├── KafkaSourceConverterErrorExtractor.java
│ │ ├── cluster
│ │ └── CassandraClusterInfo.java
│ │ └── ByteBufferUtil.java
├── build.gradle
├── config
│ └── sample.yaml
└── pom.xml
├── marmaray-tools
├── checkstyles
└── pom.xml
├── docs
└── images
│ ├── Metadata_Manager.png
│ ├── end_to_end_job_flow.png
│ ├── High_Level_Architecture.png
│ ├── Marmaray-Secondary.Logo.png
│ ├── avro_payload_conversion.png
│ ├── ForkOperator_ForkFunction.png
│ ├── Marmaray_Primary.Logo_tagline.png
│ ├── Marmaray_white_Secondary.Logo.png
│ └── Marmaray_white_Primary.Logo_tagline.png
├── .travis.yml
├── checkstyles
└── suppressions.xml
├── LICENSE
├── .gitignore
└── examples
└── README.md
/marmaray/checkstyles:
--------------------------------------------------------------------------------
1 | ../checkstyles
--------------------------------------------------------------------------------
/marmaray-tools/checkstyles:
--------------------------------------------------------------------------------
1 | ../checkstyles
--------------------------------------------------------------------------------
/marmaray/src/test/resources/datacenter/datacenter:
--------------------------------------------------------------------------------
1 | test_dc
2 |
--------------------------------------------------------------------------------
/docs/images/Metadata_Manager.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/Metadata_Manager.png
--------------------------------------------------------------------------------
/docs/images/end_to_end_job_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/end_to_end_job_flow.png
--------------------------------------------------------------------------------
/docs/images/High_Level_Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/High_Level_Architecture.png
--------------------------------------------------------------------------------
/docs/images/Marmaray-Secondary.Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/Marmaray-Secondary.Logo.png
--------------------------------------------------------------------------------
/docs/images/avro_payload_conversion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/avro_payload_conversion.png
--------------------------------------------------------------------------------
/docs/images/ForkOperator_ForkFunction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/ForkOperator_ForkFunction.png
--------------------------------------------------------------------------------
/docs/images/Marmaray_Primary.Logo_tagline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/Marmaray_Primary.Logo_tagline.png
--------------------------------------------------------------------------------
/docs/images/Marmaray_white_Secondary.Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/Marmaray_white_Secondary.Logo.png
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | install:
3 | - mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -q
4 | script: mvn test -B -q
5 |
6 |
--------------------------------------------------------------------------------
/marmaray/src/test/resources/teardownTable.cql:
--------------------------------------------------------------------------------
1 | USE marmaray;
2 |
3 | DROP TABLE IF EXISTS crossfit_gyms;
4 | DROP KEYSPACE IF EXISTS marmaray;
5 |
--------------------------------------------------------------------------------
/docs/images/Marmaray_white_Primary.Logo_tagline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/docs/images/Marmaray_white_Primary.Logo_tagline.png
--------------------------------------------------------------------------------
/marmaray/src/test/resources/testData/testPartition/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/marmaray/src/test/resources/testData/testPartition/data.parquet
--------------------------------------------------------------------------------
/marmaray/src/test/resources/testData/testPartition1/testPartition2/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/marmaray/HEAD/marmaray/src/test/resources/testData/testPartition1/testPartition2/data.parquet
--------------------------------------------------------------------------------
/marmaray/src/test/resources/setupTable.cql:
--------------------------------------------------------------------------------
1 | CREATE KEYSPACE IF NOT EXISTS marmaray WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };
2 |
3 | USE marmaray;
4 |
5 | CREATE TABLE IF NOT EXISTS devtable4 ( astr text PRIMARY KEY ) ;
6 |
--------------------------------------------------------------------------------
/marmaray/src/test/resources/schemas/schemasource/wrongSchema.1.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "wrongSchema",
4 | "schemaVersion": 1,
5 | "fields": [
6 | { "name": "foo", "type": ["null", "string"], "default": null }
7 | ]
8 | }
9 |
--------------------------------------------------------------------------------
/marmaray/src/test/resources/schemas/StringPair.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "StringPair",
4 | "doc": "A pair of strings.",
5 | "fields": [
6 | {"name": "left", "type": "string"},
7 | {"name": "right", "type": "string"}
8 | ]
9 | }
10 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/reporters/IKafkaDataLossReporter.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.reporters;
2 |
3 | import org.hibernate.validator.constraints.NotEmpty;
4 |
5 | /**
6 | * {@link IKafkaDataLossReporter} reports Kafka data loss
7 | */
8 | public interface IKafkaDataLossReporter {
9 | void reportDataLoss(@NotEmpty final String kafkaTopicName,
10 | final long totalNumberOfMessagesLost);
11 | }
12 |
--------------------------------------------------------------------------------
/marmaray/src/test/resources/log4j-surefire.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=WARN, A1
2 | log4j.category.com.uber=WARN
3 | log4j.category.com.uber.hoodie.common.utils=WARN
4 | log4j.category.org.apache.parquet.hadoop=WARN
5 |
6 | # A1 is set to be a ConsoleAppender.
7 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
8 | # A1 uses PatternLayout.
9 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
10 | log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
11 |
--------------------------------------------------------------------------------
/marmaray/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'com.kageiit.jacobo' version '2.0.1'
3 | }
4 |
5 | description = 'translate jacoco to cobertura'
6 |
7 | dependencies {
8 | }
9 |
10 | task jacobo(type: com.kageiit.jacobo.JacoboTask) {
11 | jacocoReport = file("./target/site/jacoco-ut/jacoco.xml")
12 | coberturaReport = file("./target/site/cobertura/coverage.xml")
13 | srcDirs = ["./src/main/java"]
14 | }
15 |
16 | task noop {
17 | // noop task for when tests don't run
18 | }
19 |
--------------------------------------------------------------------------------
/checkstyles/suppressions.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/marmaray/src/test/resources/config.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | marmaray:
3 | scalar_configs:
4 | integer: 1
5 | long: 1234567890123
6 | double: 1.23
7 | boolean: true
8 | string_configs:
9 | string1: "string1"
10 | stringlist:
11 | " string2 ": " string2 "
12 | retry_strategy:
13 | default_strategy: "SimpleRetryStrategy"
14 | simple:
15 | num_of_retries: 3
16 | wait_time_in_ms: 1000
17 | hadoop:
18 | mapreduce.map.memory.mb: 512
19 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/sources/kafka/LogBasedKafkaDataLossReporter.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.sources.kafka;
2 |
3 | import com.uber.marmaray.common.reporters.IKafkaDataLossReporter;
4 | import lombok.extern.slf4j.Slf4j;
5 | import org.hibernate.validator.constraints.NotEmpty;
6 |
7 | @Slf4j
8 | public class LogBasedKafkaDataLossReporter implements IKafkaDataLossReporter {
9 |
10 | public void reportDataLoss(@NotEmpty final String kafkaTopicName, final long totalNumberOfMessagesLost) {
11 | log.info("Kafka topic hitting loss: {} . Num messages lost: {}.",
12 | kafkaTopicName, totalNumberOfMessagesLost);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/marmaray/src/test/java/com/uber/marmaray/utilities/ResourcesUtils.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.utilities;
2 | import org.hibernate.validator.constraints.NotEmpty;
3 | import java.io.File;
4 |
5 | public class ResourcesUtils {
6 | public static String getTextFromResource(@NotEmpty final String fileName) throws Exception {
7 | return new String(getBytesFromResource(fileName));
8 | }
9 |
10 | public static byte[] getBytesFromResource(@NotEmpty final String fileName) throws Exception {
11 | final File file = new File(ResourcesUtils.class.getClassLoader().getResource(fileName).toURI());
12 | return java.nio.file.Files.readAllBytes(file.toPath());
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/metrics/CassandraMetric.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.metrics;
2 |
3 | import com.uber.marmaray.common.configuration.CassandraSinkConfiguration;
4 | import com.uber.marmaray.utilities.StringTypes;
5 | import lombok.NonNull;
6 |
7 | import java.util.Map;
8 |
9 | public class CassandraMetric {
10 |
11 | public static final String TABLE_NAME_TAG = "tableName";
12 |
13 | public static Map createTableNameTags(@NonNull final CassandraSinkConfiguration cassandraConf) {
14 | return DataFeedMetrics.createAdditionalTags(TABLE_NAME_TAG,
15 | cassandraConf.getKeyspace() + StringTypes.UNDERSCORE + cassandraConf.getTableName());
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/marmaray/src/test/resources/schemas/schemasource/myTestSchema.1.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "myTestSchema",
4 | "schemaVersion": 1,
5 | "fields": [
6 | { "name": "firstName", "type": ["null", "string"], "default": null },
7 | { "name": "lastName", "type": ["null", "string"], "default": null },
8 | { "name": "address", "type": ["null", {"type": "record", "name": "address_items", "fields": [
9 | { "name": "line1", "type": ["null", "string"], "default": null },
10 | { "name": "city", "type": ["null", "string"], "default": null },
11 | { "name": "zip", "type": ["null", "long"], "default": null}
12 | ] } ], "default": null}
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/marmaray/src/main/java/com/uber/marmaray/common/job/Dag.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.job;
2 |
3 | import com.uber.marmaray.common.status.IStatus;
4 | import lombok.Getter;
5 | import lombok.NonNull;
6 | import lombok.Setter;
7 | import org.hibernate.validator.constraints.NotEmpty;
8 |
9 | import java.util.Map;
10 |
11 | public abstract class Dag {
12 |
13 | @NotEmpty @Getter @Setter
14 | private String jobName;
15 |
16 | @NotEmpty @Getter @Setter
17 | private String dataFeedName;
18 |
19 | @Getter @Setter
20 | private Map jobManagerMetadata;
21 |
22 | public Dag(@NonNull final String jobName, @NonNull final String dataFeedName) {
23 | this.dataFeedName = dataFeedName;
24 | this.jobName = jobName;
25 | }
26 |
27 | public abstract IStatus execute();
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/marmaray/src/test/resources/schemas/schemasource/myTestSchema.2.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "myTestSchema",
4 | "schemaVersion": 2,
5 | "fields": [
6 | { "name": "firstName", "type": ["null", "string"], "default": null },
7 | { "name": "lastName", "type": ["null", "string"], "default": null },
8 | { "name": "middleName", "type": ["null", "string"], "default": null },
9 | { "name": "address", "type": ["null", {"type": "record", "name": "address_items", "fields": [
10 | { "name": "line1", "type": ["null", "string"], "default": null },
11 | { "name": "city", "type": ["null", "string"], "default": null },
12 | { "name": "zip", "type": ["null", "long"], "default": null}
13 | ] } ], "default": null}
14 | ]
15 | }
16 |
--------------------------------------------------------------------------------
/marmaray/src/test/java/com/uber/marmaray/common/spark/TestMarmarayKryoSerializer.java:
--------------------------------------------------------------------------------
1 | package com.uber.marmaray.common.spark;
2 |
3 | import com.uber.marmaray.common.util.AbstractSparkTest;
4 | import org.junit.Assert;
5 | import org.junit.Test;
6 |
7 | import java.util.LinkedList;
8 | import java.util.List;
9 | import java.util.stream.IntStream;
10 |
11 | public class TestMarmarayKryoSerializer extends AbstractSparkTest {
12 |
13 | @Test
14 | public void testExceptionSerialization() {
15 | final List