├── .gitignore ├── LICENSE ├── README.md ├── console └── custom │ ├── local.json │ ├── settings.json │ └── themes │ ├── dark │ ├── config.json │ ├── dark.css │ ├── dark.html │ └── dark_logo.svg │ └── purple │ ├── config.json │ ├── css │ └── purple.css │ └── images │ └── purple_logo.svg ├── dt-demo ├── apex_checks.xml ├── dimensions │ ├── .gitignore │ ├── READEME.md │ ├── XmlJavadocCommentsExtractor.xsl │ ├── apex_checks.xml │ ├── pom.xml │ └── src │ │ ├── assemble │ │ └── appPackage.xml │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── datatorrent │ │ │ │ └── demos │ │ │ │ ├── dimensions │ │ │ │ ├── InputGenerator.java │ │ │ │ ├── ads │ │ │ │ │ ├── AdInfo.java │ │ │ │ │ ├── InputItemGenerator.java │ │ │ │ │ ├── benchmark │ │ │ │ │ │ ├── AdsDimensionsGenericBenchmark.java │ │ │ │ │ │ └── AdsDimensionsStatsBenchmark.java │ │ │ │ │ ├── generic │ │ │ │ │ │ ├── AdsDimensionsDemo.java │ │ │ │ │ │ ├── AdsDimensionsDemoDifferentTimeBuckets.java │ │ │ │ │ │ ├── AdsDimensionsDemoManualEntry.java │ │ │ │ │ │ └── AdsDimensionsDemoNoTime.java │ │ │ │ │ ├── package-info.java │ │ │ │ │ └── stats │ │ │ │ │ │ ├── AdsConverter.java │ │ │ │ │ │ └── AdsDimensionsDemoPerformant.java │ │ │ │ ├── sales │ │ │ │ │ └── generic │ │ │ │ │ │ ├── EnrichmentOperator.java │ │ │ │ │ │ ├── JsonSalesGenerator.java │ │ │ │ │ │ ├── JsonToMapConverter.java │ │ │ │ │ │ ├── RandomWeightedMovableGenerator.java │ │ │ │ │ │ ├── SalesDemo.java │ │ │ │ │ │ └── SalesEvent.java │ │ │ │ └── telecom │ │ │ │ │ ├── app │ │ │ │ │ ├── CDRDemoV2.java │ │ │ │ │ ├── CallDetailRecordGenerateApp.java │ │ │ │ │ ├── CustomerEnrichedInfoGenerateApp.java │ │ │ │ │ ├── CustomerServiceDemoV2.java │ │ │ │ │ ├── EnrichCDRApp.java │ │ │ │ │ ├── TelecomDemoV2.java │ │ │ │ │ └── TelecomDimensionsDemo.java │ │ │ │ │ ├── conf │ │ │ │ │ ├── ConfigUtil.java │ │ │ │ │ ├── CustomerEnrichedInfoCassandraConfig.java │ │ │ │ │ ├── CustomerEnrichedInfoHBaseConfig.java │ │ │ │ │ ├── CustomerEnrichedInfoHiveConfig.java │ │ │ │ │ ├── CustomerServiceCassandraConf.java │ │ │ │ │ ├── CustomerServiceHBaseConf.java │ │ │ │ │ ├── DataWarehouseConfig.java │ │ │ │ │ ├── EnrichedCDRCassandraConfig.java │ │ │ │ │ ├── EnrichedCDRHBaseConfig.java │ │ │ │ │ ├── EnrichedCDRHiveConfig.java │ │ │ │ │ ├── EnrichedCustomerServiceCassandraConf.java │ │ │ │ │ ├── EnrichedCustomerServiceHBaseConf.java │ │ │ │ │ ├── EnrichedCustomerServiceHiveConfig.java │ │ │ │ │ └── TelecomDemoConf.java │ │ │ │ │ ├── generate │ │ │ │ │ ├── AbstractStringRandomGenerator.java │ │ │ │ │ ├── CDRHBaseFieldInfo.java │ │ │ │ │ ├── CallDetailRecordCustomerInfoGenerator.java │ │ │ │ │ ├── CallDetailRecordRandomGenerator.java │ │ │ │ │ ├── CharRandomGenerator.java │ │ │ │ │ ├── CharRange.java │ │ │ │ │ ├── CustomerEnrichedInfoCassandraRepo.java │ │ │ │ │ ├── CustomerEnrichedInfoEmbededRepo.java │ │ │ │ │ ├── CustomerEnrichedInfoHbaseRepo.java │ │ │ │ │ ├── CustomerEnrichedInfoHiveRepo.java │ │ │ │ │ ├── CustomerEnrichedInfoProvider.java │ │ │ │ │ ├── CustomerInfoRandomGenerator.java │ │ │ │ │ ├── CustomerServiceDefaultGenerator.java │ │ │ │ │ ├── CustomerServiceRandomGenerator.java │ │ │ │ │ ├── EnumStringRandomGenerator.java │ │ │ │ │ ├── FixLengthStringRandomGenerator.java │ │ │ │ │ ├── Generator.java │ │ │ │ │ ├── GeneratorUtil.java │ │ │ │ │ ├── ImeiGenerator.java │ │ │ │ │ ├── ImsiGenerator.java │ │ │ │ │ ├── LocationRepo.java │ │ │ │ │ ├── MNCRepo.java │ │ │ │ │ ├── MsisdnGenerator.java │ │ │ │ │ ├── Range.java │ │ │ │ │ ├── StringComposeGenerator.java │ │ │ │ │ └── TACRepo.java │ │ │ │ │ ├── hive │ │ │ │ │ ├── HiveUtil.java │ │ │ │ │ ├── TelecomHiveExecuteOperator.java │ │ │ │ │ └── TelecomHiveOutputOperator.java │ │ │ │ │ ├── model │ │ │ │ │ ├── BytesSupport.java │ │ │ │ │ ├── CallDetailRecord.java │ │ │ │ │ ├── CallType.java │ │ │ │ │ ├── CustomerEnrichedInfo.java │ │ │ │ │ ├── CustomerInfo.java │ │ │ │ │ ├── CustomerService.java │ │ │ │ │ ├── DisconnectReason.java │ │ │ │ │ ├── EnrichedCDR.java │ │ │ │ │ ├── EnrichedCustomerService.java │ │ │ │ │ ├── MNCInfo.java │ │ │ │ │ ├── TACInfo.java │ │ │ │ │ └── ZipCodeHelper.java │ │ │ │ │ └── operator │ │ │ │ │ ├── AppDataConfigurableSnapshotServer.java │ │ │ │ │ ├── AppDataSimpleConfigurableSnapshotServer.java │ │ │ │ │ ├── AppDataSingleSchemaDimensionStoreHDHTUpdateWithList.java │ │ │ │ │ ├── AppDataSnapshotServerAggregate.java │ │ │ │ │ ├── CDREnrichOperator.java │ │ │ │ │ ├── CDRHdfsInputOperator.java │ │ │ │ │ ├── CDRHdfsOutputOperator.java │ │ │ │ │ ├── CDRStore.java │ │ │ │ │ ├── CallDetailRecordGenerateOperator.java │ │ │ │ │ ├── CustomerEnrichedInfoCassandraOutputOperator.java │ │ │ │ │ ├── CustomerEnrichedInfoGenerateOperator.java │ │ │ │ │ ├── CustomerEnrichedInfoHbaseOutputOperator.java │ │ │ │ │ ├── CustomerEnrichedInfoHiveOutputOperator.java │ │ │ │ │ ├── CustomerServiceCassandraOutputOperator.java │ │ │ │ │ ├── CustomerServiceEnrichOperator.java │ │ │ │ │ ├── CustomerServiceGenerateOperator.java │ │ │ │ │ ├── CustomerServiceHbaseOutputOperator.java │ │ │ │ │ ├── CustomerServiceStore.java │ │ │ │ │ ├── EnrichedCDRCassandraOutputOperator.java │ │ │ │ │ ├── EnrichedCDRHbaseInputOperator.java │ │ │ │ │ ├── EnrichedCDRHbaseOutputOperator.java │ │ │ │ │ ├── EnrichedCustomerServiceCassandraOutputOperator.java │ │ │ │ │ ├── EnrichedCustomerServiceHbaseOutputOperator.java │ │ │ │ │ ├── GeoDimensionStore.java │ │ │ │ │ ├── RegionZipCombinationFilter.java │ │ │ │ │ ├── RegionZipCombinationValidator.java │ │ │ │ │ ├── TelecomDemoCassandraOutputOperator.java │ │ │ │ │ └── TelecomDemoHBaseOutputOperator.java │ │ │ │ └── hdht │ │ │ │ └── benchmark │ │ │ │ ├── Generator.java │ │ │ │ ├── HDHTBenchmarkApplication.java │ │ │ │ └── HDSOperator.java │ │ └── resources │ │ │ ├── META-INF │ │ │ ├── properties-GenericDimensionsWithCsvMapParser.xml │ │ │ └── properties.xml │ │ │ ├── TelecomDemoV2-setup │ │ │ ├── adsBenchmarkSchema.json │ │ │ ├── adsGenericDataSchema.json │ │ │ ├── adsGenericEventSchema.json │ │ │ ├── adsGenericEventSchemaNoEnums.json │ │ │ ├── adsGenericEventSchemaNoTime.json │ │ │ ├── adsGenericEventSchemaTimeBuckets.json │ │ │ ├── averageWaittimeSnapshotSchema.json │ │ │ ├── cdrDemoV2EventSchema.json │ │ │ ├── cdrDemoV2SnapshotSchema.json │ │ │ ├── cdrGeoSchema.json │ │ │ ├── csGeoSchema.json │ │ │ ├── customerServiceDemoV2EventSchema.json │ │ │ ├── customerenrichedinfo.csv │ │ │ ├── products.txt │ │ │ ├── salesGenericDataSchema.json │ │ │ ├── salesGenericEventSchema.json │ │ │ ├── satisfactionRatingSnapshotSchema.json │ │ │ ├── serviceCallSnapshotSchema.json │ │ │ ├── telecomDimensionsEventSchema.json │ │ │ └── usLocationToZips.csv │ │ ├── site │ │ └── conf │ │ │ └── my-app-conf1.xml │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── demos │ │ │ └── dimensions │ │ │ ├── ads │ │ │ ├── custom │ │ │ │ └── AdsConverterTest.java │ │ │ └── generic │ │ │ │ ├── AdsDimensionsDemoTest.java │ │ │ │ └── MockGenerator.java │ │ │ ├── sales │ │ │ └── generic │ │ │ │ ├── MockGenerator.java │ │ │ │ └── SalesDemoTest.java │ │ │ └── telecom │ │ │ ├── CDRDemoV2Tester.java │ │ │ ├── CDREnrichTester.java │ │ │ ├── CallDetailRecordGenerateAppTester.java │ │ │ ├── CustomerEnrichedInfoEmbededRepoTester.java │ │ │ ├── CustomerEnrichedInfoGenerateAppTester.java │ │ │ ├── CustomerServiceDemoV2Tester.java │ │ │ ├── CustomerServiceHbaseOutputOperatorTester.java │ │ │ ├── DataWrapper.java │ │ │ ├── EnrichCDRAppTester.java │ │ │ ├── EnrichedCDRHbaseInputOperatorTester.java │ │ │ ├── PointZipCodeRepoTester.java │ │ │ ├── RegionZipDimensionalExpanderTester.java │ │ │ ├── TelecomDimensionsDemoTester.java │ │ │ ├── TelecomHiveOutputOperatorTester.java │ │ │ └── TupleCacheOperator.java │ │ └── resources │ │ ├── adsquery.json │ │ ├── log4j.properties │ │ ├── salesquery.json │ │ └── satisfactionRatingSnapshotSchema_test.json ├── machinedata │ ├── XmlJavadocCommentsExtractor.xsl │ ├── apex_checks.xml │ ├── pom.xml │ └── src │ │ ├── assemble │ │ └── appPackage.xml │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── datatorrent │ │ │ │ └── demos │ │ │ │ └── machinedata │ │ │ │ ├── Application.java │ │ │ │ ├── ApplicationHardcoded.java │ │ │ │ ├── ApplicationPerformant.java │ │ │ │ ├── InputReceiver.java │ │ │ │ └── data │ │ │ │ ├── AbstractMachineAggregatorHardcoded.java │ │ │ │ ├── MachineAggregate.java │ │ │ │ ├── MachineAggregatorAverage.java │ │ │ │ ├── MachineAggregatorCount.java │ │ │ │ ├── MachineAggregatorHardCodedCount.java │ │ │ │ ├── MachineAggregatorHardCodedSum.java │ │ │ │ ├── MachineAggregatorSum.java │ │ │ │ ├── MachineHardCodedAggregate.java │ │ │ │ ├── MachineHardCodedAggregateConverter.java │ │ │ │ ├── MachineInfo.java │ │ │ │ └── MachineKey.java │ │ └── resources │ │ │ ├── META-INF │ │ │ └── properties.xml │ │ │ └── machinedataschema.json │ │ └── test │ │ └── resources │ │ └── log4j.properties ├── pom.xml └── starter-app │ ├── apex_checks.xml │ ├── pom.xml │ └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ └── resources │ │ └── META-INF │ │ └── properties.xml │ └── test │ └── resources │ └── log4j.properties ├── tools ├── README.md └── monitor.py ├── training ├── metrics-app │ ├── README.md │ ├── XmlJavadocCommentsExtractor.xsl │ ├── pom.xml │ └── src │ │ ├── assemble │ │ └── appPackage.xml │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── datatorrent │ │ │ │ └── apps │ │ │ │ ├── AppMetricsService.java │ │ │ │ ├── ApplicationCCP.java │ │ │ │ ├── ApplicationCPPAppMetrics.java │ │ │ │ ├── POJOGenerator.java │ │ │ │ ├── PojoEvent.java │ │ │ │ ├── TopNAccounts.java │ │ │ │ └── TopNAggregator.java │ │ └── resources │ │ │ └── META-INF │ │ │ ├── properties-test.xml │ │ │ └── properties.xml │ │ └── test │ │ └── resources │ │ └── log4j.properties └── random-to-hdfs │ ├── README.md │ ├── XmlJavadocCommentsExtractor.xsl │ ├── pom.xml │ └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── apps │ │ │ ├── Application.java │ │ │ └── POJOGenerator.java │ └── resources │ │ └── META-INF │ │ ├── properties-test.xml │ │ └── properties.xml │ └── test │ └── resources │ └── log4j.properties └── tutorials ├── README.md ├── cassandraInput ├── README.md ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── cassandra │ │ │ ├── Application.java │ │ │ └── TestUser.java │ └── resources │ │ └── META-INF │ │ ├── example.cql │ │ └── properties-CassandraInputApplication.xml │ └── test │ └── java │ └── com │ └── datatorrent │ └── cassandra │ └── CassandraApplicationTest.java ├── cassandraOutput ├── README.md ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ └── java │ │ └── com │ │ └── datatorrent │ │ └── cassandra │ │ ├── Application.java │ │ └── TestUser.java │ ├── site │ └── conf │ │ └── properties-CassandraOutputTestApp.xml │ └── test │ ├── java │ └── com │ │ └── datatorrent │ │ └── cassandra │ │ └── CassandraApplicationTest.java │ └── resources │ ├── example.cql │ └── properties-CassandraOutputTestApp.xml ├── exactly-once ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── myapexapp │ │ │ ├── Application.java │ │ │ └── AtomicFileOutputApp.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── myapexapp │ │ ├── ApplicationTest.java │ │ └── AtomicFileOutputAppTest.java │ └── resources │ └── log4j.properties ├── fileIO-multiDir ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── fileIO │ │ │ ├── Application.java │ │ │ ├── FileReader.java │ │ │ ├── FileReaderMultiDir.java │ │ │ └── FileWriter.java │ └── resources │ │ ├── META-INF │ │ └── properties.xml │ │ └── unused-log4j.properties │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── fileIO │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── fileIO-simple ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── myapexapp │ │ │ ├── Application.java │ │ │ └── FileOutputOperator.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── myapexapp │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── fileIO ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── fileIO │ │ │ ├── Application.java │ │ │ ├── BytesFileWriter.java │ │ │ ├── FileReader.java │ │ │ ├── FileWriter.java │ │ │ ├── ThroughputBasedApplication.java │ │ │ └── ThroughputBasedReader.java │ └── resources │ │ ├── META-INF │ │ ├── properties-FileIO.xml │ │ └── properties-ThroughputBasedFileIO.xml │ │ └── unused-log4j.properties │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── fileIO │ │ ├── ApplicationTest.java │ │ └── ThroughputBasedApplicationTest.java │ └── resources │ └── log4j.properties ├── fileOutput ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── fileOutput │ │ │ ├── Application.java │ │ │ ├── FileWriter.java │ │ │ └── SequenceGenerator.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── fileOutput │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── fileToJdbc ├── .gitignore ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── FileToJdbcApp │ │ │ ├── CustomParser.java │ │ │ ├── FileReader.java │ │ │ ├── FileToJdbcCsvParser.java │ │ │ ├── FileToJdbcCustomParser.java │ │ │ └── PojoEvent.java │ └── resources │ │ ├── META-INF │ │ └── properties.xml │ │ └── schema.json │ ├── site │ └── conf │ │ ├── exampleCsvParser.xml │ │ └── exampleCustomParser.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── FileToJdbcApp │ │ └── ApplicationTest.java │ └── resources │ ├── example.sql │ ├── log4j.properties │ ├── test-input │ └── sample.txt │ └── test.xml ├── hdfs-sync ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── apps │ │ │ └── copy │ │ │ └── HDFSFileCopyApp.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ ├── cluster-memory-conf.xml │ │ └── sandbox-memory-conf.xml │ └── test │ └── resources │ └── log4j.properties ├── hdfs2kafka ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── myapexapp │ │ │ └── Application.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── myapexapp │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── hdht ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ ├── Application.java │ │ │ └── RandomNumberGenerator.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ ├── ApplicationTest.java │ │ ├── HDHTAppTest.java │ │ └── HDHTTestOperator.java │ └── resources │ └── log4j.properties ├── jdbcIngest ├── .gitignore ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── mydtapp │ │ │ ├── FileLineOutputOperator.java │ │ │ ├── JdbcHDFSApp.java │ │ │ ├── JdbcPollerApplication.java │ │ │ └── PojoEvent.java │ └── resources │ │ └── META-INF │ │ ├── properties-PollJdbcToHDFSApp.xml │ │ └── properties-SimpleJdbcToHDFSApp.xml │ ├── site │ └── conf │ │ └── example.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── mydtapp │ │ ├── ApplicationTest.java │ │ ├── JdbcInputAppTest.java │ │ └── JdbcPollerApplicationTest.java │ └── resources │ ├── example.sql │ └── log4j.properties ├── jdbcToJdbc ├── .gitignore ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── mydtapp │ │ │ ├── JdbcToJdbcApp.java │ │ │ └── PojoEvent.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── example.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── mydtapp │ │ ├── ApplicationTest.java │ │ └── JdbcOperatorTest.java │ └── resources │ ├── example.sql │ └── log4j.properties ├── jms-output-exactlyonce ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ ├── BatchSequenceGenerator.java │ │ │ ├── JmsOutputApplication.java │ │ │ ├── PassthroughFailOperator.java │ │ │ ├── StringMessageJMSSinglePortOutputOperator.java │ │ │ ├── ValidationApplication.java │ │ │ └── ValidationToFile.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── JmsApplicationTest.java │ └── resources │ └── log4j.properties ├── jmsActiveMQ ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── jmsActiveMQ │ │ │ ├── ActiveMQApplication.java │ │ │ └── LineOutputOperator.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── jmsActiveMQ │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── jmsSqs ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── jmsSqs │ │ │ ├── LineOutputOperator.java │ │ │ └── SqsApplication.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── jmsSqs │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── kafka ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── myapexapp │ │ │ ├── KafkaApp.java │ │ │ └── LineOutputOperator.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ └── my-app-conf1.xml │ └── test │ ├── java │ └── com │ │ └── example │ │ └── myapexapp │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── kinesisInput ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── example │ │ │ └── kinesisInput │ │ │ └── Application.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ └── test │ └── resources │ └── log4j.properties ├── maprapp ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── maprapp │ │ │ ├── Application.java │ │ │ └── Data.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ └── test │ ├── java │ └── com │ │ └── datatorrent │ │ └── maprapp │ │ └── ApplicationTest.java │ └── resources │ └── log4j.properties ├── operatorTutorial ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── datatorrent │ │ └── tutorials │ │ └── operatorTutorial │ │ └── WordCountOperator.java │ └── test │ ├── java │ └── com │ │ └── datatorrent │ │ └── tutorials │ │ └── operatorTutorial │ │ └── WordCountOperatorTest.java │ └── resources │ ├── log4j.properties │ └── stop-words ├── parser ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── tutorial │ │ │ ├── csvparser │ │ │ ├── AdDataGenerator.java │ │ │ ├── Campaign.java │ │ │ ├── FileOutputOperator.java │ │ │ └── csvParserApplication.java │ │ │ ├── fixedwidthparser │ │ │ ├── Ad.java │ │ │ ├── AdDataGenerator.java │ │ │ └── fixedWidthParserApplication.java │ │ │ └── xmlparser │ │ │ ├── EmployeeBean.java │ │ │ ├── EmployeeDataGenerator.java │ │ │ ├── JavaSerializationStreamCodec.java │ │ │ ├── XmlDocumentFormatter.java │ │ │ └── xmlParserApplication.java │ └── resources │ │ └── META-INF │ │ ├── properties-csvParseApplication.xml │ │ ├── properties-fixedWidthParserApplication.xml │ │ └── properties-xmlParseApplication.xml │ └── test │ ├── java │ └── com │ │ └── datatorrent │ │ └── tutorial │ │ ├── csvparser │ │ └── csvParserApplicationTest.java │ │ ├── fixedwidthparser │ │ └── fixedWidthParserApplicationTest.java │ │ └── xmlparser │ │ └── xmlParserApplicationTest.java │ └── resources │ └── log4j.properties ├── s3-to-hdfs-sync ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── tutorial │ │ │ └── s3input │ │ │ └── S3ToHDFSSyncApplication.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ ├── site │ └── conf │ │ ├── cluster-memory-conf.xml │ │ └── sandbox-memory-conf.xml │ └── test │ └── resources │ └── log4j.properties ├── s3-tuple-output ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ ├── main │ ├── java │ │ └── com │ │ │ └── datatorrent │ │ │ └── tutorials │ │ │ └── s3output │ │ │ └── Application.java │ └── resources │ │ └── META-INF │ │ └── properties.xml │ └── test │ └── resources │ └── log4j.properties ├── s3output ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src │ ├── assemble │ └── appPackage.xml │ └── main │ ├── java │ └── com │ │ └── example │ │ └── s3output │ │ └── Application.java │ └── resources │ └── META-INF │ └── properties.xml ├── topnwords ├── app │ ├── XmlJavadocCommentsExtractor.xsl │ ├── pom.xml │ └── src │ │ ├── assemble │ │ └── appPackage.xml │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── example │ │ │ │ └── topNwordcount │ │ │ │ ├── ApplicationWithQuerySupport.java │ │ │ │ ├── FileWordCount.java │ │ │ │ ├── LineReader.java │ │ │ │ ├── WCPair.java │ │ │ │ ├── WindowWordCount.java │ │ │ │ ├── WordCountWriter.java │ │ │ │ └── WordReader.java │ │ └── resources │ │ │ ├── META-INF │ │ │ └── properties.xml │ │ │ └── WordDataSchema.json │ │ ├── site │ │ └── conf │ │ │ ├── low-mem.xml │ │ │ └── my-app-conf1.xml │ │ └── test │ │ └── resources │ │ └── log4j.properties ├── scripts │ ├── aliases │ ├── build-apex │ ├── build-apex.cmd │ ├── check-services │ ├── newapp │ └── newapp.cmd └── webinar │ ├── ApplicationWordCount.java │ ├── FileWordCount.java │ ├── LineReader.java │ ├── WCPair.java │ ├── WindowWordCount.java │ ├── WordCountWriter.java │ ├── WordReader.java │ └── properties-SortedWordCount.xml └── unifiers ├── README.md ├── XmlJavadocCommentsExtractor.xsl ├── pom.xml └── src ├── assemble └── appPackage.xml ├── main ├── java │ └── com │ │ └── example │ │ └── myapexapp │ │ ├── Application.java │ │ ├── RandomInteger.java │ │ ├── RangeFinder.java │ │ └── ToConsole.java └── resources │ └── META-INF │ └── properties.xml ├── site └── conf │ └── use-unifier.xml └── test ├── java └── com │ └── example │ └── myapexapp │ └── ApplicationTest.java └── resources └── log4j.properties /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .project 3 | .settings/ 4 | .metadata/ 5 | .idea/ 6 | target/ 7 | 8 | # Mobile Tools for Java (J2ME) 9 | .mtj.tmp/ 10 | 11 | # Package Files # 12 | *.jar 13 | *.war 14 | *.ear 15 | 16 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 17 | hs_err_pid* 18 | 19 | .DS_Store 20 | .classpath 21 | .project 22 | .settings/ 23 | .metadata/ 24 | .idea/ 25 | target/ 26 | /front/dist_tmp 27 | *.iml 28 | npm-debug.log 29 | nb-configuration.xml 30 | hadoop.log 31 | .checkstyle 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataTorrent Application Examples 2 | 3 | DataTorrent RTS, powered by Apache Apex, provides a high-performing, fault-tolerant, scalable, easy to use data processing platform for both batch and streaming data. It includes advanced management, monitoring, development, visualization, data ingestion and distribution features. For more information on Apache Apex, please go to https://apex.apache.org/ 4 | 5 | This repository contains examples for new users to start developing data pipelines as an application. The complete documentation along with prerequisites, setup guide and tutorial is available on http://docs.datatorrent.com/ 6 | 7 | ##Contact 8 | 9 | DataTorrent is at https://www.datatorrent.com/ 10 | 11 | Apache Apex is at http://apex.apache.org and [subscribe](http://apex.apache.org/community.html) to the mailing lists. 12 | 13 | There is an active user community at http://stackoverflow.com/. Please tag the question with "apache-apex". 14 | 15 | -------------------------------------------------------------------------------- /console/custom/local.json: -------------------------------------------------------------------------------- 1 | { 2 | "User Profile": "User Settings", 3 | "Roles": "Super Custom Roles" 4 | } 5 | -------------------------------------------------------------------------------- /console/custom/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "disableStockThemes": false, 3 | "customDefaultTheme": "", 4 | "customThemes": [ 5 | "dark", 6 | "purple" 7 | ], 8 | "customLocalization": "local.json", 9 | "defaultHomePage": "/ops" 10 | } -------------------------------------------------------------------------------- /console/custom/themes/dark/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dark", 3 | "logo": "dark_logo.svg", 4 | "css": "dark.css", 5 | "html": "dark.html" 6 | } -------------------------------------------------------------------------------- /console/custom/themes/dark/dark.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #333; 3 | color: white; 4 | } -------------------------------------------------------------------------------- /console/custom/themes/dark/dark.html: -------------------------------------------------------------------------------- 1 | 4 | 5 |
6 |
7 |
8 |
9 |

Dark Theme Debugger

10 |
11 |

Console Settings

12 |
{{ settings | json }}
13 |
14 |
15 |
16 | 17 | -------------------------------------------------------------------------------- /console/custom/themes/purple/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Purple", 3 | "logo": "images/purple_logo.svg", 4 | "css": "css/purple.css" 5 | } -------------------------------------------------------------------------------- /console/custom/themes/purple/css/purple.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #d2bdde; 3 | } -------------------------------------------------------------------------------- /dt-demo/dimensions/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /dt-demo/dimensions/READEME.md: -------------------------------------------------------------------------------- 1 | ## Dimensions Example 2 | 3 | This application demonstrates Apex platform capabilities such as 4 | - Massive multi dimensional computation with very low latency 5 | - Fault Tolerance without data loss 6 | - High scalability and throughput 7 | 8 | -------------------------------------------------------------------------------- /dt-demo/dimensions/XmlJavadocCommentsExtractor.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/assemble/appPackage.xml: -------------------------------------------------------------------------------- 1 | 4 | appPackage 5 | 6 | jar 7 | 8 | false 9 | 10 | 11 | ${basedir}/target/ 12 | /app 13 | 14 | ${project.artifactId}-${project.version}.jar 15 | 16 | 17 | 18 | ${basedir}/target/deps 19 | /lib 20 | 21 | 22 | ${basedir}/src/site/conf 23 | /conf 24 | 25 | *.xml 26 | 27 | 28 | 29 | ${basedir}/src/main/resources/META-INF 30 | /META-INF 31 | 32 | 33 | ${basedir}/src/main/resources/app 34 | /app 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/InputGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions; 6 | 7 | import com.datatorrent.api.InputOperator; 8 | 9 | /** 10 | * @since 3.1.0 11 | */ 12 | 13 | public interface InputGenerator extends InputOperator 14 | { 15 | public OutputPort getOutputPort(); 16 | } 17 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/ads/generic/AdsDimensionsDemoDifferentTimeBuckets.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.ads.generic; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | 9 | import com.datatorrent.api.DAG; 10 | import com.datatorrent.api.annotation.ApplicationAnnotation; 11 | 12 | @ApplicationAnnotation(name = AdsDimensionsDemoDifferentTimeBuckets.APP_NAME) 13 | public class AdsDimensionsDemoDifferentTimeBuckets extends AdsDimensionsDemo 14 | { 15 | public static final String APP_NAME = "AdsDimensionsDemoDifferentTimeBuckets"; 16 | public static final String EVENT_SCHEMA_LOCATION = "adsGenericEventSchemaTimeBuckets.json"; 17 | 18 | @Override 19 | public void populateDAG(DAG dag, Configuration conf) 20 | { 21 | this.appName = APP_NAME; 22 | this.eventSchemaLocation = EVENT_SCHEMA_LOCATION; 23 | super.populateDAG(dag, conf); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/ads/generic/AdsDimensionsDemoManualEntry.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.ads.generic; 6 | 7 | import java.util.List; 8 | 9 | import org.apache.hadoop.conf.Configuration; 10 | 11 | import com.google.common.collect.Lists; 12 | 13 | import com.datatorrent.api.DAG; 14 | import com.datatorrent.api.annotation.ApplicationAnnotation; 15 | 16 | /** 17 | * @since 3.1.0 18 | */ 19 | @ApplicationAnnotation(name = AdsDimensionsDemoManualEntry.APP_NAME) 20 | public class AdsDimensionsDemoManualEntry extends AdsDimensionsDemo 21 | { 22 | public static final String APP_NAME = "AdsDimensionsDemoGenericManualEntry"; 23 | public static final String EVENT_SCHEMA_LOCATION = "adsGenericEventSchemaNoEnums.json"; 24 | 25 | @Override 26 | public void populateDAG(DAG dag, Configuration conf) 27 | { 28 | this.appName = APP_NAME; 29 | this.eventSchemaLocation = EVENT_SCHEMA_LOCATION; 30 | this.advertisers = (List)Lists.newArrayList("starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"); 31 | super.populateDAG(dag, conf); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/ads/generic/AdsDimensionsDemoNoTime.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.ads.generic; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | 9 | import com.datatorrent.api.DAG; 10 | import com.datatorrent.api.annotation.ApplicationAnnotation; 11 | 12 | @ApplicationAnnotation(name = AdsDimensionsDemoNoTime.APP_NAME) 13 | public class AdsDimensionsDemoNoTime extends AdsDimensionsDemo 14 | { 15 | public static final String APP_NAME = "AdsDimensionsDemoNoTime"; 16 | public static final String EVENT_SCHEMA_LOCATION = "adsGenericEventSchemaNoTime.json"; 17 | 18 | @Override 19 | public void populateDAG(DAG dag, Configuration conf) 20 | { 21 | this.appName = APP_NAME; 22 | this.eventSchemaLocation = EVENT_SCHEMA_LOCATION; 23 | super.populateDAG(dag, conf); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/ads/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 DataTorrent, Inc. ALL Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Ads dimension demo application, operators, and utilities. 19 | */ 20 | package com.datatorrent.demos.dimensions.ads; 21 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/sales/generic/SalesEvent.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.demos.dimensions.sales.generic; 2 | 3 | /** 4 | * A single sales event 5 | */ 6 | class SalesEvent 7 | { 8 | 9 | /* dimension keys */ 10 | public long time; 11 | public int productId; 12 | public String customer; 13 | public String channel; 14 | public String region; 15 | /* metrics */ 16 | public double sales; 17 | public double discount; 18 | public double tax; 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/app/TelecomDemoV2.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.app; 6 | 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import org.apache.hadoop.conf.Configuration; 11 | 12 | import com.datatorrent.api.DAG; 13 | import com.datatorrent.api.StreamingApplication; 14 | import com.datatorrent.api.annotation.ApplicationAnnotation; 15 | 16 | @ApplicationAnnotation(name = TelecomDemoV2.APP_NAME) 17 | public class TelecomDemoV2 implements StreamingApplication 18 | { 19 | private static final transient Logger logger = LoggerFactory.getLogger(TelecomDemoV2.class); 20 | 21 | public static final String APP_NAME = "TelecomDemoV2"; 22 | 23 | public static final int outputMask_HBase = 0x01; 24 | public static final int outputMask_Cassandra = 0x100; 25 | 26 | protected int outputMask = outputMask_Cassandra; 27 | 28 | @Override 29 | public void populateDAG(DAG dag, Configuration conf) 30 | { 31 | { 32 | CDRDemoV2 cdr = new CDRDemoV2(APP_NAME); 33 | cdr.setOutputMask(outputMask); 34 | cdr.populateDAG(dag, conf); 35 | } 36 | 37 | { 38 | CustomerServiceDemoV2 cs = new CustomerServiceDemoV2(APP_NAME); 39 | cs.setOutputMask(outputMask); 40 | cs.populateDAG(dag, conf); 41 | } 42 | } 43 | 44 | public int getOutputMask() 45 | { 46 | return outputMask; 47 | } 48 | 49 | public void setOutputMask(int outputMask) 50 | { 51 | this.outputMask = outputMask; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/ConfigUtil.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | import java.net.URI; 8 | 9 | import org.apache.hadoop.conf.Configuration; 10 | 11 | import com.datatorrent.api.Context.DAGContext; 12 | import com.datatorrent.api.DAG; 13 | 14 | public class ConfigUtil 15 | { 16 | 17 | public static final String PROP_GATEWAY_ADDRESS = "dt.gateway.listenAddress"; 18 | 19 | public static URI getAppDataQueryPubSubURI(DAG dag, Configuration conf) 20 | { 21 | URI uri = URI.create("ws://" + getGatewayAddress(dag, conf) + "/pubsub"); 22 | return uri; 23 | } 24 | 25 | public static String getGatewayAddress(DAG dag, Configuration conf) 26 | { 27 | String gatewayAddress = dag.getValue(DAGContext.GATEWAY_CONNECT_ADDRESS); 28 | if (gatewayAddress == null) { 29 | gatewayAddress = conf.get(PROP_GATEWAY_ADDRESS); 30 | } 31 | return gatewayAddress; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/CustomerEnrichedInfoCassandraConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class CustomerEnrichedInfoCassandraConfig extends DataWarehouseConfig 8 | { 9 | private static CustomerEnrichedInfoCassandraConfig instance; 10 | 11 | public static CustomerEnrichedInfoCassandraConfig instance() 12 | { 13 | if (instance == null) { 14 | synchronized (CustomerEnrichedInfoCassandraConfig.class) { 15 | if (instance == null) { 16 | instance = new CustomerEnrichedInfoCassandraConfig(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected CustomerEnrichedInfoCassandraConfig() 24 | { 25 | host = TelecomDemoConf.instance.getCassandraHost(); 26 | port = TelecomDemoConf.instance.getCassandraPort(); 27 | userName = TelecomDemoConf.instance.getCassandraUserName(); 28 | password = TelecomDemoConf.instance.getCassandraPassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getCustomerEnrichedInfoTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/CustomerEnrichedInfoHBaseConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class CustomerEnrichedInfoHBaseConfig extends DataWarehouseConfig 8 | { 9 | private static CustomerEnrichedInfoHBaseConfig instance; 10 | 11 | public static CustomerEnrichedInfoHBaseConfig instance() 12 | { 13 | if (instance == null) { 14 | synchronized (CustomerEnrichedInfoHBaseConfig.class) { 15 | if (instance == null) { 16 | instance = new CustomerEnrichedInfoHBaseConfig(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected CustomerEnrichedInfoHBaseConfig() 24 | { 25 | host = TelecomDemoConf.instance.getHbaseHost(); 26 | port = TelecomDemoConf.instance.getHbasePort(); 27 | userName = TelecomDemoConf.instance.getHbaseUserName(); 28 | password = TelecomDemoConf.instance.getHbasePassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getCustomerEnrichedInfoTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/CustomerEnrichedInfoHiveConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class CustomerEnrichedInfoHiveConfig extends DataWarehouseConfig 8 | { 9 | private static CustomerEnrichedInfoHiveConfig instance; 10 | 11 | public static CustomerEnrichedInfoHiveConfig instance() 12 | { 13 | if (instance == null) { 14 | synchronized (CustomerEnrichedInfoHiveConfig.class) { 15 | if (instance == null) { 16 | instance = new CustomerEnrichedInfoHiveConfig(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected CustomerEnrichedInfoHiveConfig() 24 | { 25 | host = TelecomDemoConf.instance.getHiveHost(); 26 | port = TelecomDemoConf.instance.getHivePort(); 27 | userName = TelecomDemoConf.instance.getHiveUserName(); 28 | password = TelecomDemoConf.instance.getHivePassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getCustomerEnrichedInfoTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/CustomerServiceCassandraConf.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class CustomerServiceCassandraConf extends DataWarehouseConfig 8 | { 9 | private static CustomerServiceCassandraConf instance; 10 | 11 | public static CustomerServiceCassandraConf instance() 12 | { 13 | if (instance == null) { 14 | synchronized (CustomerServiceCassandraConf.class) { 15 | if (instance == null) { 16 | instance = new CustomerServiceCassandraConf(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected CustomerServiceCassandraConf() 24 | { 25 | host = TelecomDemoConf.instance.getCassandraHost(); 26 | port = TelecomDemoConf.instance.getHbasePort(); 27 | userName = TelecomDemoConf.instance.getCassandraUserName(); 28 | password = TelecomDemoConf.instance.getCassandraPassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getCustomerServiceTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/CustomerServiceHBaseConf.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class CustomerServiceHBaseConf extends DataWarehouseConfig 8 | { 9 | private static CustomerServiceHBaseConf instance; 10 | 11 | public static CustomerServiceHBaseConf instance() 12 | { 13 | if (instance == null) { 14 | synchronized (CustomerServiceHBaseConf.class) { 15 | if (instance == null) { 16 | instance = new CustomerServiceHBaseConf(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected CustomerServiceHBaseConf() 24 | { 25 | host = TelecomDemoConf.instance.getHbaseHost(); 26 | port = TelecomDemoConf.instance.getHbasePort(); 27 | userName = TelecomDemoConf.instance.getHbaseUserName(); 28 | password = TelecomDemoConf.instance.getHbasePassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getCustomerServiceTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/EnrichedCDRCassandraConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class EnrichedCDRCassandraConfig extends DataWarehouseConfig 8 | { 9 | private static EnrichedCDRCassandraConfig instance; 10 | 11 | public static EnrichedCDRCassandraConfig instance() 12 | { 13 | if (instance == null) { 14 | synchronized (EnrichedCDRCassandraConfig.class) { 15 | if (instance == null) { 16 | instance = new EnrichedCDRCassandraConfig(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected EnrichedCDRCassandraConfig() 24 | { 25 | host = TelecomDemoConf.instance.getCassandraHost(); 26 | port = TelecomDemoConf.instance.getCassandraPort(); 27 | userName = TelecomDemoConf.instance.getCassandraUserName(); 28 | password = TelecomDemoConf.instance.getCassandraPassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getCdrEnrichedRecordTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/EnrichedCDRHBaseConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class EnrichedCDRHBaseConfig extends DataWarehouseConfig 8 | { 9 | 10 | private static EnrichedCDRHBaseConfig instance; 11 | 12 | public static EnrichedCDRHBaseConfig instance() 13 | { 14 | if (instance == null) { 15 | synchronized (EnrichedCDRHBaseConfig.class) { 16 | if (instance == null) { 17 | instance = new EnrichedCDRHBaseConfig(); 18 | } 19 | } 20 | } 21 | return instance; 22 | } 23 | 24 | protected EnrichedCDRHBaseConfig() 25 | { 26 | host = TelecomDemoConf.instance.getHbaseHost(); 27 | port = TelecomDemoConf.instance.getHbasePort(); 28 | userName = TelecomDemoConf.instance.getHbaseUserName(); 29 | password = TelecomDemoConf.instance.getHbasePassword(); 30 | database = TelecomDemoConf.instance.getDatabase(); 31 | tableName = TelecomDemoConf.instance.getCdrEnrichedRecordTableName(); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/EnrichedCDRHiveConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class EnrichedCDRHiveConfig extends DataWarehouseConfig 8 | { 9 | private static EnrichedCDRHiveConfig instance; 10 | 11 | public static EnrichedCDRHiveConfig instance() 12 | { 13 | if (instance == null) { 14 | synchronized (EnrichedCDRHiveConfig.class) { 15 | if (instance == null) { 16 | instance = new EnrichedCDRHiveConfig(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected EnrichedCDRHiveConfig() 24 | { 25 | host = TelecomDemoConf.instance.getHiveHost(); 26 | port = TelecomDemoConf.instance.getHivePort(); 27 | userName = TelecomDemoConf.instance.getHiveUserName(); 28 | password = TelecomDemoConf.instance.getHivePassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getCdrEnrichedRecordTableName(); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/EnrichedCustomerServiceCassandraConf.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class EnrichedCustomerServiceCassandraConf extends DataWarehouseConfig 8 | { 9 | private static EnrichedCustomerServiceCassandraConf instance; 10 | 11 | public static EnrichedCustomerServiceCassandraConf instance() 12 | { 13 | if (instance == null) { 14 | synchronized (EnrichedCustomerServiceCassandraConf.class) { 15 | if (instance == null) { 16 | instance = new EnrichedCustomerServiceCassandraConf(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected EnrichedCustomerServiceCassandraConf() 24 | { 25 | host = TelecomDemoConf.instance.getCassandraHost(); 26 | port = TelecomDemoConf.instance.getHbasePort(); 27 | userName = TelecomDemoConf.instance.getCassandraUserName(); 28 | password = TelecomDemoConf.instance.getCassandraPassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getEnrichedCustomerServiceTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/EnrichedCustomerServiceHBaseConf.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class EnrichedCustomerServiceHBaseConf extends DataWarehouseConfig 8 | { 9 | private static EnrichedCustomerServiceHBaseConf instance; 10 | 11 | public static EnrichedCustomerServiceHBaseConf instance() 12 | { 13 | if (instance == null) { 14 | synchronized (EnrichedCustomerServiceHBaseConf.class) { 15 | if (instance == null) { 16 | instance = new EnrichedCustomerServiceHBaseConf(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected EnrichedCustomerServiceHBaseConf() 24 | { 25 | host = TelecomDemoConf.instance.getHbaseHost(); 26 | port = TelecomDemoConf.instance.getHbasePort(); 27 | userName = TelecomDemoConf.instance.getHbaseUserName(); 28 | password = TelecomDemoConf.instance.getHbasePassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getEnrichedCustomerServiceTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/conf/EnrichedCustomerServiceHiveConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.conf; 6 | 7 | public class EnrichedCustomerServiceHiveConfig extends DataWarehouseConfig 8 | { 9 | private static EnrichedCustomerServiceHiveConfig instance; 10 | 11 | public static EnrichedCustomerServiceHiveConfig instance() 12 | { 13 | if (instance == null) { 14 | synchronized (EnrichedCustomerServiceHiveConfig.class) { 15 | if (instance == null) { 16 | instance = new EnrichedCustomerServiceHiveConfig(); 17 | } 18 | } 19 | } 20 | return instance; 21 | } 22 | 23 | protected EnrichedCustomerServiceHiveConfig() 24 | { 25 | host = TelecomDemoConf.instance.getHiveHost(); 26 | port = TelecomDemoConf.instance.getHivePort(); 27 | userName = TelecomDemoConf.instance.getHiveUserName(); 28 | password = TelecomDemoConf.instance.getHivePassword(); 29 | database = TelecomDemoConf.instance.getDatabase(); 30 | tableName = TelecomDemoConf.instance.getEnrichedCustomerServiceTableName(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/AbstractStringRandomGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | public abstract class AbstractStringRandomGenerator implements Generator 8 | { 9 | protected CharRandomGenerator charGenerator; 10 | 11 | @Override 12 | public String next() 13 | { 14 | if (charGenerator == null) { 15 | throw new RuntimeException("Please set the char generator first."); 16 | } 17 | final int stringLen = getStringLength(); 18 | if (stringLen < 0) { 19 | throw new RuntimeException("The string lenght expect not less than zero."); 20 | } 21 | if (stringLen == 0) { 22 | return ""; 23 | } 24 | char[] chars = new char[stringLen]; 25 | for (int index = 0; index < stringLen; ++index) { 26 | chars[index] = charGenerator.next(); 27 | } 28 | return new String(chars); 29 | } 30 | 31 | protected abstract int getStringLength(); 32 | } 33 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/CDRHBaseFieldInfo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import com.datatorrent.contrib.hbase.HBaseFieldInfo; 8 | 9 | public class CDRHBaseFieldInfo extends HBaseFieldInfo 10 | { 11 | public CDRHBaseFieldInfo(String columnName, String columnExpression, SupportType type, String familyName) 12 | { 13 | super(columnName, columnExpression, type, familyName); 14 | } 15 | 16 | /** 17 | * get rid of the null point exception. should be fix it in 18 | * HBasePOJOPutOperator or HBaseFieldInfo 19 | */ 20 | @Override 21 | public byte[] toBytes(Object value) 22 | { 23 | if (value == null) { 24 | return null; 25 | } 26 | return super.toBytes(value); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/CharRange.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | public class CharRange extends Range 8 | { 9 | public static final CharRange digits = new CharRange('0', '9'); 10 | public static final CharRange lowerLetters = new CharRange('a', 'z'); 11 | public static final CharRange upperLetters = new CharRange('A', 'Z'); 12 | 13 | public CharRange() 14 | { 15 | this('a', 'z'); 16 | } 17 | 18 | public CharRange(Character from, Character to) 19 | { 20 | super(from, to); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/CustomerEnrichedInfoProvider.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import com.datatorrent.demos.dimensions.telecom.model.CustomerEnrichedInfo.SingleRecord; 8 | 9 | public interface CustomerEnrichedInfoProvider 10 | { 11 | public SingleRecord getRandomCustomerEnrichedInfo(); 12 | } 13 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/CustomerInfoRandomGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import java.util.List; 8 | 9 | import com.google.common.collect.Lists; 10 | 11 | import com.datatorrent.demos.dimensions.telecom.model.CustomerInfo; 12 | 13 | public class CustomerInfoRandomGenerator implements Generator 14 | { 15 | private final MsisdnGenerator msidnGenerator = new MsisdnGenerator(); 16 | private final ImsiGenerator imsiGenerator = new ImsiGenerator(); 17 | private final ImeiGenerator imeiGenerator = new ImeiGenerator(); 18 | 19 | private final int[] deviceNumArray = {1, 1, 1, 1, 1, 1, 2, 2, 3}; 20 | 21 | @Override 22 | public CustomerInfo next() 23 | { 24 | //most customer only have one device. 25 | int deviceNumIndex = Generator.random.nextInt(deviceNumArray.length); 26 | int deviceNum = deviceNumArray[deviceNumIndex]; 27 | List imeis = Lists.newArrayList(); 28 | for (int i = 0; i < deviceNum; ++i) { 29 | imeis.add(imeiGenerator.next()); 30 | } 31 | return new CustomerInfo(imsiGenerator.next(), msidnGenerator.next(), imeis); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/CustomerServiceRandomGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import com.datatorrent.demos.dimensions.telecom.model.CustomerService; 8 | import com.datatorrent.demos.dimensions.telecom.model.CustomerService.IssueType; 9 | 10 | public class CustomerServiceRandomGenerator implements Generator 11 | { 12 | public static final int MAX_DURATION = 100; 13 | private ImsiGenerator imsiGenerator = new ImsiGenerator(); 14 | private MsisdnGenerator msisdnGenerator = new MsisdnGenerator(); 15 | private ImeiGenerator imeiGenerator = new ImeiGenerator(); 16 | 17 | @Override 18 | public CustomerService next() 19 | { 20 | String imsi = imsiGenerator.next(); 21 | String isdn = msisdnGenerator.next(); 22 | String imei = imeiGenerator.next(); 23 | 24 | int totalDuration = Generator.random.nextInt(MAX_DURATION); 25 | int wait = (int)(totalDuration * Math.random()); 26 | String zipCode = LocationRepo.instance().getRandomZipCode(); 27 | IssueType issueType = IssueType.values()[Generator.random.nextInt(IssueType.values().length)]; 28 | boolean satisfied = (Generator.random.nextInt(1) == 0); 29 | return new CustomerService(imsi, isdn, imei, totalDuration, wait, zipCode, issueType, satisfied); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/EnumStringRandomGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import java.util.Random; 8 | 9 | public class EnumStringRandomGenerator implements Generator 10 | { 11 | protected static final Random random = new Random(); 12 | protected String[] candidates; 13 | 14 | public EnumStringRandomGenerator() 15 | { 16 | } 17 | 18 | public EnumStringRandomGenerator(String[] candidates) 19 | { 20 | if (candidates == null || candidates.length == 0) { 21 | throw new IllegalArgumentException("candidates can't null or empty."); 22 | } 23 | this.candidates = candidates; 24 | } 25 | 26 | public String next() 27 | { 28 | if (candidates.length == 1) { 29 | return candidates[0]; 30 | } 31 | return candidates[random.nextInt(candidates.length)]; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/FixLengthStringRandomGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | public class FixLengthStringRandomGenerator extends AbstractStringRandomGenerator 8 | { 9 | protected int length; 10 | 11 | public FixLengthStringRandomGenerator() 12 | { 13 | } 14 | 15 | public FixLengthStringRandomGenerator(CharRandomGenerator charGenerator, int length) 16 | { 17 | if (length <= 0) { 18 | throw new IllegalArgumentException("The length should large than zero."); 19 | } 20 | this.length = length; 21 | this.charGenerator = charGenerator; 22 | } 23 | 24 | @Override 25 | protected int getStringLength() 26 | { 27 | return length; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/Generator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import java.util.Random; 8 | 9 | public interface Generator 10 | { 11 | public static final Random random = new Random(); 12 | 13 | public T next(); 14 | } 15 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/GeneratorUtil.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import java.util.Calendar; 8 | 9 | public class GeneratorUtil 10 | { 11 | public static long TIME_2010 = getTime2010(); 12 | 13 | private static long getTime2010() 14 | { 15 | Calendar calendar2010 = Calendar.getInstance(); 16 | calendar2010.set(2010, 1, 1, 0, 0, 0); 17 | return calendar2010.getTimeInMillis(); 18 | } 19 | 20 | public static long getRecordId() 21 | { 22 | return (Calendar.getInstance().getTimeInMillis() - TIME_2010) * 1000; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/ImeiGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | /** 8 | * 9 | * IMEI format: TAC(8 digits) + serial number(6 digits) + checksum(1 digit) 10 | */ 11 | public class ImeiGenerator implements Generator 12 | { 13 | private CharRandomGenerator digitCharGenerator = new CharRandomGenerator(CharRange.digits); 14 | private FixLengthStringRandomGenerator serialGenerator = new FixLengthStringRandomGenerator(digitCharGenerator, 6); 15 | //TODO: implement checksum later 16 | private FixLengthStringRandomGenerator checksumGenerator = new FixLengthStringRandomGenerator(digitCharGenerator, 1); 17 | 18 | public ImeiGenerator() 19 | { 20 | } 21 | 22 | @Override 23 | public String next() 24 | { 25 | return TACRepo.instance().getRandomTacInfo().getTacAsString() + serialGenerator.next() + checksumGenerator.next(); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/ImsiGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | /** 8 | * IMSI = MCC + MNC + MSIN 9 | * 10 | * @author bright 11 | * 12 | */ 13 | public class ImsiGenerator implements Generator 14 | { 15 | private CharRandomGenerator digitCharGenerator = new CharRandomGenerator(CharRange.digits); 16 | private FixLengthStringRandomGenerator msinGenerator = new FixLengthStringRandomGenerator(digitCharGenerator, 9); 17 | 18 | public ImsiGenerator() 19 | { 20 | } 21 | 22 | @Override 23 | public String next() 24 | { 25 | return MNCRepo.instance().getRandomMncInfo().getMccMnc() + msinGenerator.next(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/MsisdnGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | public class MsisdnGenerator extends StringComposeGenerator 8 | { 9 | @SuppressWarnings("unchecked") 10 | public MsisdnGenerator() 11 | { 12 | super(new EnumStringRandomGenerator(new String[] {"01"}), 13 | new EnumStringRandomGenerator(new String[] {"408", "650", "510", "415", "925", "707"}), 14 | new FixLengthStringRandomGenerator(CharRandomGenerator.digitCharGenerator, 7)); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/Range.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | public class Range 8 | { 9 | public final T from; 10 | public final T to; 11 | 12 | public Range(T from, T to) 13 | { 14 | this.from = from; 15 | this.to = to; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/generate/StringComposeGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.generate; 6 | 7 | import java.util.List; 8 | 9 | import com.google.common.collect.Lists; 10 | 11 | public class StringComposeGenerator implements Generator 12 | { 13 | private List> generators; 14 | 15 | public StringComposeGenerator() 16 | { 17 | } 18 | 19 | @SafeVarargs 20 | public StringComposeGenerator(Generator... generators) 21 | { 22 | if (generators == null || generators.length == 0) { 23 | return; 24 | } 25 | this.generators = Lists.newArrayList(generators); 26 | } 27 | 28 | @Override 29 | public String next() 30 | { 31 | StringBuilder sb = new StringBuilder(); 32 | for (Generator generator : generators) { 33 | sb.append(generator.next()); 34 | } 35 | return sb.toString(); 36 | } 37 | 38 | public List> getGenerators() 39 | { 40 | return generators; 41 | } 42 | 43 | public void setGenerators(List> generators) 44 | { 45 | this.generators = generators; 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/hive/HiveUtil.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.hive; 6 | 7 | public class HiveUtil 8 | { 9 | public static final String driverName = "org.apache.hive.jdbc.HiveDriver"; 10 | public static final String urlPrefix = "jdbc:hive2://"; 11 | 12 | public static String getUrl(String host, int port, String database) 13 | { 14 | return String.format("%s%s:%d/%s", urlPrefix, host, port, database); 15 | } 16 | 17 | public static void verifyDriver() throws ClassNotFoundException 18 | { 19 | Class.forName(driverName); 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/model/BytesSupport.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.model; 6 | 7 | public interface BytesSupport 8 | { 9 | /** 10 | * convert this object to bytes 11 | * 12 | * @return 13 | */ 14 | public byte[] toBytes(); 15 | } 16 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/model/CallType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.model; 6 | 7 | public enum CallType 8 | { 9 | MOC, MTC, SMS_MO("SMS-MO"), SMS_MT("SMS-MT"), DATA; 10 | 11 | private String label; 12 | private static final String[] labels = {"MOC", "MTC", "SMS-MO", "SMS-MT", "DATA"}; 13 | 14 | private CallType() 15 | { 16 | this.label = name(); 17 | } 18 | 19 | private CallType(String label) 20 | { 21 | this.label = label; 22 | } 23 | 24 | public String label() 25 | { 26 | return this.label; 27 | } 28 | 29 | public static String[] labels() 30 | { 31 | return labels; 32 | } 33 | 34 | public static CallType labelOf(String label) 35 | { 36 | for (CallType ct : CallType.values()) { 37 | if (ct.label().equals(label)) { 38 | return ct; 39 | } 40 | } 41 | throw new IllegalArgumentException("Invalid CallType label: " + label); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/model/CustomerInfo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.model; 6 | 7 | import java.util.Collection; 8 | import java.util.Collections; 9 | 10 | /** 11 | * The information of the customer. We don't care about information such as 12 | * customer name etc. only care about the MSISDN, IMSI and IMEI etc. We identify 13 | * customer by IMSI. 14 | * 15 | * @author bright 16 | * 17 | */ 18 | public class CustomerInfo 19 | { 20 | public final String imsi; 21 | public final String msisdn; 22 | public final Collection imeis; //one imsi can map to multiple device 23 | 24 | //used only by reflection 25 | protected CustomerInfo() 26 | { 27 | imsi = ""; 28 | msisdn = ""; 29 | imeis = Collections.emptyList(); 30 | } 31 | 32 | public CustomerInfo(String imsi, String msisdn, Collection imeis) 33 | { 34 | this.imsi = imsi; 35 | this.msisdn = msisdn; 36 | this.imeis = Collections.unmodifiableCollection(imeis); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/model/DisconnectReason.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.model; 6 | 7 | import java.util.Random; 8 | 9 | public enum DisconnectReason 10 | { 11 | NoResponse(9, "No Response"), CallComplete(10, "Call Complete"), CallDropped(11, "Call Dropped"); 12 | 13 | private int code; 14 | private String label; 15 | 16 | private DisconnectReason() 17 | { 18 | } 19 | 20 | private DisconnectReason(int code, String label) 21 | { 22 | this.code = code; 23 | this.label = label; 24 | } 25 | 26 | public final int getCode() 27 | { 28 | return code; 29 | } 30 | 31 | public String getLabel() 32 | { 33 | return label; 34 | } 35 | 36 | private static Random random = new Random(); 37 | 38 | public static DisconnectReason randomDisconnectReason() 39 | { 40 | final int size = DisconnectReason.values().length; 41 | return DisconnectReason.values()[random.nextInt(size)]; 42 | } 43 | 44 | public static DisconnectReason fromCode(int code) 45 | { 46 | for (DisconnectReason dr : DisconnectReason.values()) { 47 | if (dr.code == code) { 48 | return dr; 49 | } 50 | } 51 | throw new IllegalArgumentException("Invalid disconnect code: " + code); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/model/MNCInfo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.model; 6 | 7 | public class MNCInfo 8 | { 9 | public enum Carrier 10 | { 11 | ATT("AT&T"), VZN("Verizon"), TMO("T-Mobile"), SPR("Sprint"); 12 | 13 | public final String operatorCode; 14 | public final String operatorName; 15 | 16 | private Carrier(String operatorName) 17 | { 18 | this.operatorCode = name(); 19 | this.operatorName = operatorName; 20 | } 21 | 22 | private Carrier(String operatorCode, String operatorName) 23 | { 24 | this.operatorCode = operatorCode; 25 | this.operatorName = operatorName; 26 | } 27 | } 28 | 29 | public final int mcc; 30 | public final int mnc; 31 | public final Carrier carrier; 32 | 33 | public MNCInfo(int mcc, int mnc, Carrier carrier) 34 | { 35 | this.mcc = mcc; 36 | this.mnc = mnc; 37 | this.carrier = carrier; 38 | } 39 | 40 | public String getMccMnc() 41 | { 42 | return String.format("%06d", mcc * 1000 + mnc); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/model/TACInfo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.model; 6 | 7 | public class TACInfo 8 | { 9 | //tac is 8 digits, can be treat as int 10 | public final int tac; 11 | public final String manufacturer; 12 | public final String model; 13 | 14 | public TACInfo(int tac, String manufacturer, String model) 15 | { 16 | this.tac = tac; 17 | this.manufacturer = manufacturer; 18 | this.model = model; 19 | } 20 | 21 | public String getTacAsString() 22 | { 23 | return String.format("%08d", tac); 24 | } 25 | 26 | @Override 27 | public boolean equals(Object obj) 28 | { 29 | if (obj == null) { 30 | return false; 31 | } 32 | if (getClass() != obj.getClass()) { 33 | return false; 34 | } 35 | final TACInfo other = (TACInfo)obj; 36 | 37 | return this.tac == other.tac; 38 | } 39 | 40 | @Override 41 | public int hashCode() 42 | { 43 | return tac; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/model/ZipCodeHelper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.model; 6 | 7 | public class ZipCodeHelper 8 | { 9 | public static ZipCodeHelper usZipCodeHelper = new ZipCodeHelper(5); 10 | 11 | protected static final String ZEROS = "0000000000"; 12 | 13 | protected ZipCodeHelper(int zipCodeLength) 14 | { 15 | this.zipCodeLength = zipCodeLength; 16 | } 17 | 18 | private int zipCodeLength; 19 | 20 | public int toInt(String zip) 21 | { 22 | return Integer.parseInt(zip); 23 | } 24 | 25 | public String toString(int zipCode) 26 | { 27 | String zip = String.valueOf(zipCode); 28 | if (zip.length() == zipCodeLength) { 29 | return zip; 30 | } 31 | if (zip.length() < zipCodeLength) { 32 | return ZEROS.substring(0, zipCodeLength - zip.length()) + zip; 33 | } 34 | throw new IllegalArgumentException( 35 | "The length of zip (" + zipCode + ") is large than expected length (" + zipCodeLength + ")"); 36 | } 37 | 38 | public boolean isZip(String str) 39 | { 40 | if (str == null || str.length() != zipCodeLength) { 41 | return false; 42 | } 43 | try { 44 | Integer.parseInt(str); 45 | } catch (Exception e) { 46 | return false; 47 | } 48 | return true; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/operator/AppDataSimpleConfigurableSnapshotServer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.operator; 6 | 7 | import java.util.Map; 8 | 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import com.datatorrent.lib.appdata.gpo.GPOMutable; 13 | 14 | /** 15 | * The tuple is a List of MutablePair 16 | * 17 | * @author bright 18 | * 19 | */ 20 | public class AppDataSimpleConfigurableSnapshotServer extends AppDataConfigurableSnapshotServer> 21 | { 22 | private static final transient Logger logger = LoggerFactory.getLogger(AppDataSimpleConfigurableSnapshotServer.class); 23 | 24 | @Override 25 | protected void convertTo(Map row, GPOMutable gpo) 26 | { 27 | for (Map.Entry entry : row.entrySet()) { 28 | gpo.setField(entry.getKey(), entry.getValue()); 29 | logger.info("field: {}; value: {}", entry.getKey(), entry.getValue()); 30 | } 31 | 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/operator/CDRHdfsInputOperator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.operator; 6 | 7 | import java.io.BufferedReader; 8 | import java.io.IOException; 9 | import java.io.InputStream; 10 | import java.io.InputStreamReader; 11 | 12 | import org.apache.hadoop.fs.Path; 13 | 14 | import com.datatorrent.api.DefaultOutputPort; 15 | import com.datatorrent.lib.io.fs.AbstractFileInputOperator; 16 | 17 | public class CDRHdfsInputOperator extends AbstractFileInputOperator 18 | { 19 | public final transient DefaultOutputPort output = new DefaultOutputPort(); 20 | private transient BufferedReader br = null; 21 | 22 | @Override 23 | protected InputStream openFile(Path path) throws IOException 24 | { 25 | InputStream is = super.openFile(path); 26 | br = new BufferedReader(new InputStreamReader(is)); 27 | return is; 28 | } 29 | 30 | @Override 31 | protected void closeFile(InputStream is) throws IOException 32 | { 33 | super.closeFile(is); 34 | br.close(); 35 | br = null; 36 | } 37 | 38 | @Override 39 | protected InputStream retryFailedFile(FailedFile ff) throws IOException 40 | { 41 | return super.retryFailedFile(ff); 42 | } 43 | 44 | @Override 45 | protected String readEntity() throws IOException 46 | { 47 | return br.readLine(); 48 | } 49 | 50 | @Override 51 | protected void emit(String tuple) 52 | { 53 | output.emit(tuple); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/operator/CDRHdfsOutputOperator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.operator; 6 | 7 | import com.datatorrent.api.Context.OperatorContext; 8 | import com.datatorrent.lib.io.fs.AbstractSingleFileOutputOperator; 9 | 10 | public class CDRHdfsOutputOperator extends AbstractSingleFileOutputOperator //AbstractFileOutputOperator 11 | { 12 | public CDRHdfsOutputOperator() 13 | { 14 | setMaxLength(64 * 1024 * 1024); 15 | setOutputFileName("cdr"); 16 | } 17 | 18 | @Override 19 | public void setup(OperatorContext context) 20 | { 21 | super.setup(context); 22 | } 23 | 24 | @Override 25 | public byte[] getBytesForTuple(byte[] t) 26 | { 27 | return t; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/operator/CDRStore.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.operator; 6 | 7 | import java.util.List; 8 | 9 | import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.Aggregate; 10 | 11 | import com.datatorrent.api.DefaultOutputPort; 12 | import com.datatorrent.api.annotation.OutputPortFieldAnnotation; 13 | 14 | public class CDRStore extends AppDataSingleSchemaDimensionStoreHDHTUpdateWithList 15 | { 16 | private static final long serialVersionUID = 2348875268413944860L; 17 | 18 | @OutputPortFieldAnnotation(optional = true) 19 | public final transient DefaultOutputPort> updateWithList = new DefaultOutputPort<>(); 20 | 21 | @Override 22 | protected DefaultOutputPort> getOutputPort(int index, int aggregatorID, int dimensionDescriptorID) 23 | { 24 | return updateWithList; 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/operator/CustomerServiceHbaseOutputOperator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.operator; 6 | 7 | import org.apache.hadoop.hbase.client.Put; 8 | import org.apache.hadoop.hbase.util.Bytes; 9 | 10 | import com.datatorrent.demos.dimensions.telecom.conf.CustomerServiceHBaseConf; 11 | import com.datatorrent.demos.dimensions.telecom.model.CustomerService; 12 | 13 | public class CustomerServiceHbaseOutputOperator extends TelecomDemoHBaseOutputOperator 14 | { 15 | private static byte[] familyName = Bytes.toBytes("f1"); 16 | 17 | public CustomerServiceHbaseOutputOperator() 18 | { 19 | setHbaseConfig(CustomerServiceHBaseConf.instance()); 20 | } 21 | 22 | @Override 23 | public Put operationPut(CustomerService cs) 24 | { 25 | Put put = new Put(Bytes.toBytes(cs.imsi)); 26 | put.add(familyName, Bytes.toBytes("totalDuration"), Bytes.toBytes(cs.totalDuration)); 27 | put.add(familyName, Bytes.toBytes("wait"), Bytes.toBytes(cs.wait)); 28 | put.add(familyName, Bytes.toBytes("zipCode"), Bytes.toBytes(cs.zipCode)); 29 | put.add(familyName, Bytes.toBytes("issueType"), Bytes.toBytes(cs.issueType.name())); 30 | put.add(familyName, Bytes.toBytes("satisfied"), Bytes.toBytes(cs.satisfied)); 31 | return put; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/java/com/datatorrent/demos/dimensions/telecom/operator/GeoDimensionStore.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom.operator; 6 | 7 | import java.util.Map; 8 | 9 | import com.datatorrent.contrib.dimensions.AppDataSingleSchemaDimensionStoreHDHT; 10 | import com.datatorrent.contrib.dimensions.CombinationDimensionalExpander; 11 | import com.datatorrent.contrib.dimensions.CombinationValidator; 12 | import com.datatorrent.contrib.dimensions.DimensionsQueueManager; 13 | 14 | /** 15 | * 16 | * @author bright 17 | * 18 | */ 19 | public class GeoDimensionStore extends AppDataSingleSchemaDimensionStoreHDHT 20 | { 21 | private static final long serialVersionUID = 3839563720592204620L; 22 | 23 | protected RegionZipCombinationFilter filter = new RegionZipCombinationFilter(); 24 | protected RegionZipCombinationValidator validator = new RegionZipCombinationValidator(); 25 | 26 | @Override 27 | @SuppressWarnings({ "unchecked", "rawtypes" }) 28 | protected DimensionsQueueManager getDimensionsQueueManager() 29 | { 30 | return new DimensionsQueueManager(this, schemaRegistry, new CombinationDimensionalExpander((Map)seenEnumValues).withCombinationFilter(filter).withCombinationValidator((CombinationValidator)validator)); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/META-INF/properties-GenericDimensionsWithCsvMapParser.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | dt.application.GenericDimensionsWithCsvMapParser.operator.Parser.fieldmappingFile 7 | 8 | 9 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/TelecomDemoV2-setup: -------------------------------------------------------------------------------- 1 | 2 | - setup for Hive 3 | - create hdfs directory for cdr and customerservice. should be read/write permission. default is /user/cdrtmp and /user/cstmp ( must use absolute path ) 4 | - create tables for cdr and customerservice 5 | - cdr: 6 | CREATE TABLE IF NOT EXISTS telecomdemo.CDREnrichedRecord 7 | ( isdn string, imsi string, imei string, plan string, callType string, correspType string, 8 | correspIsdn string, duration string, bytes string, dr string, lat string, lon string, 9 | drLable string, operatorCode string, deviceBrand string, deviceModel string, zipCode string ) 10 | PARTITIONED BY( createdtime bigint ) 11 | ROW FORMAT DELIMITED FIELDS TERMINATED BY ","; 12 | - customer service: 13 | CREATE TABLE IF NOT EXISTS telecomdemo.EnrichedCustomerService 14 | ( imsi string, isdn string, imei string, totalDuration string, wait string, zipCode string, 15 | issueType string, satisfied string, operatorCode string, deviceBrand string, deviceModel string ) 16 | PARTITIONED BY( createdtime bigint ) 17 | ROW FORMAT DELIMITED FIELDS TERMINATED BY ","; -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/adsGenericDataSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "time": { 3 | "from":1123455556656, 4 | "to": 3823908593845 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/adsGenericEventSchema.json: -------------------------------------------------------------------------------- 1 | {"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, 2 | {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, 3 | {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], 4 | "timeBuckets":["1m","1h","1d"], 5 | "values": 6 | [{"name":"impressions","type":"long","aggregators":["SUM","COUNT","AVG"]}, 7 | {"name":"clicks","type":"long","aggregators":["SUM","COUNT","AVG"]}, 8 | {"name":"cost","type":"double","aggregators":["SUM","COUNT","AVG"]}, 9 | {"name":"revenue","type":"double","aggregators":["SUM","COUNT","AVG"]}], 10 | "dimensions": 11 | [{"combination":[]}, 12 | {"combination":["location"]}, 13 | {"combination":["advertiser"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 14 | {"combination":["publisher"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 15 | {"combination":["advertiser","location"]}, 16 | {"combination":["publisher","location"]}, 17 | {"combination":["publisher","advertiser"]}, 18 | {"combination":["publisher","advertiser","location"]}] 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/adsGenericEventSchemaNoEnums.json: -------------------------------------------------------------------------------- 1 | {"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, 2 | {"name":"advertiser","type":"string"}, 3 | {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], 4 | "timeBuckets":["1m","1h","1d"], 5 | "values": 6 | [{"name":"impressions","type":"long","aggregators":["SUM","COUNT","AVG"]}, 7 | {"name":"clicks","type":"long","aggregators":["SUM","COUNT","AVG"]}, 8 | {"name":"cost","type":"double","aggregators":["SUM","COUNT","AVG"]}, 9 | {"name":"revenue","type":"double","aggregators":["SUM","COUNT","AVG"]}], 10 | "dimensions": 11 | [{"combination":[]}, 12 | {"combination":["location"]}, 13 | {"combination":["advertiser"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 14 | {"combination":["publisher"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 15 | {"combination":["advertiser","location"]}, 16 | {"combination":["publisher","location"]}, 17 | {"combination":["publisher","advertiser"]}, 18 | {"combination":["publisher","advertiser","location"]}] 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/adsGenericEventSchemaNoTime.json: -------------------------------------------------------------------------------- 1 | {"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, 2 | {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, 3 | {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], 4 | "values": 5 | [{"name":"impressions","type":"long","aggregators":["SUM","COUNT","AVG"]}, 6 | {"name":"clicks","type":"long","aggregators":["SUM","COUNT","AVG"]}, 7 | {"name":"cost","type":"double","aggregators":["SUM","COUNT","AVG"]}, 8 | {"name":"revenue","type":"double","aggregators":["SUM","COUNT","AVG"]}], 9 | "dimensions": 10 | [{"combination":[]}, 11 | {"combination":["location"]}, 12 | {"combination":["advertiser"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 13 | {"combination":["publisher"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 14 | {"combination":["advertiser","location"]}, 15 | {"combination":["publisher","location"]}, 16 | {"combination":["publisher","advertiser"]}, 17 | {"combination":["publisher","advertiser","location"]}] 18 | } 19 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/adsGenericEventSchemaTimeBuckets.json: -------------------------------------------------------------------------------- 1 | {"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, 2 | {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, 3 | {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], 4 | "timeBuckets":["1m","1h"], 5 | "values": 6 | [{"name":"impressions","type":"long","aggregators":["SUM","COUNT","AVG"]}, 7 | {"name":"clicks","type":"long","aggregators":["SUM","COUNT","AVG"]}, 8 | {"name":"cost","type":"double","aggregators":["SUM","COUNT","AVG"]}, 9 | {"name":"revenue","type":"double","aggregators":["SUM","COUNT","AVG"]}], 10 | "dimensions": 11 | [{"combination":[]}, 12 | {"combination":["location"]}, 13 | {"combination":["advertiser"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 14 | {"combination":["publisher"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, 15 | {"combination":["advertiser","location"], "timeBuckets":["30s", "2h", "1d"]}, 16 | {"combination":["publisher","location"]}, 17 | {"combination":["publisher","advertiser"]}, 18 | {"combination":["publisher","advertiser","location"]}] 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/averageWaittimeSnapshotSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": [ 3 | {"name": "current", "type": "long", "tags": ["current"]}, 4 | {"name": "min", "type": "long", "tags": ["min"]}, 5 | {"name": "max", "type": "long", "tags": ["max"]}, 6 | {"name": "threshold", "type": "long", "tags": ["threshold"]} 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/cdrDemoV2EventSchema.json: -------------------------------------------------------------------------------- 1 | {"keys": 2 | [ 3 | {"name":"region","type":"string"}, 4 | {"name":"deviceModel","type":"string"} 5 | ], 6 | "timeBuckets":["1m","1h"], 7 | "values": 8 | [ 9 | {"name":"disconnectCount","type":"long","aggregators":["SUM"]}, 10 | {"name":"downloadBytes","type":"long","aggregators":["SUM"]}, 11 | {"name":"called","type":"long","aggregators":["SUM","COUNT","AVG"]} 12 | ], 13 | "dimensions": 14 | [ 15 | {"combination":[]}, 16 | {"combination":["deviceModel"]}, 17 | {"combination":["region"]}, 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/cdrDemoV2SnapshotSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": [{"name": "downloadBytes", "type": "long"}, {"name": "deviceModel", "type": "string"}] 3 | } 4 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/cdrGeoSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "tags":["geo"], 3 | "keys": 4 | [ 5 | {"name":"zipcode","type":"string"}, 6 | {"name":"region","type":"string"}, 7 | ], 8 | "timeBuckets":["1m","1h"], 9 | "values": 10 | [ 11 | {"name":"disconnectCount","type":"long","aggregators":["SUM"]}, 12 | {"name":"downloadBytes","type":"long","aggregators":["SUM"]}, 13 | {"name":"lat","type":"float","aggregators":["FIRST"],"tags":["geo"]}, 14 | {"name":"lon","type":"float","aggregators":["FIRST"],"tags":["geo"]}, 15 | ], 16 | "dimensions": 17 | [ 18 | {"combination":["zipcode","region"]}, 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/csGeoSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "tags":["geo"], 3 | "keys": 4 | [ 5 | {"name":"zipcode","type":"string"}, 6 | {"name":"region","type":"string"}, 7 | ], 8 | "timeBuckets":["1m","1h"], 9 | "values": 10 | [ 11 | {"name":"wait","type":"long","aggregators":["AVG"]}, 12 | {"name":"lat","type":"float","aggregators":["FIRST"],"tags":["geo"]}, 13 | {"name":"lon","type":"float","aggregators":["FIRST"],"tags":["geo"]}, 14 | ], 15 | "dimensions": 16 | [ 17 | {"combination":["zipcode","region"]}, 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/customerServiceDemoV2EventSchema.json: -------------------------------------------------------------------------------- 1 | {"keys": 2 | [ 3 | {"name":"zipCode","type":"string"}, 4 | {"name":"issueType","type":"string"} 5 | ], 6 | "timeBuckets":["1m", "1h"], 7 | "values": 8 | [ 9 | {"name":"serviceCall","type":"long","aggregators":["COUNT","SUM","AVG"]}, 10 | {"name":"wait","type":"long","aggregators":["SUM","COUNT","AVG"]}, 11 | {"name":"satisfaction","type":"long","aggregators":["AVG"]}, 12 | ], 13 | "dimensions": 14 | [ 15 | {"combination":[]}, 16 | {"combination":["zipCode"]}, 17 | {"combination":["issueType"]}, 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/salesGenericDataSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "time": { 3 | "from":1123455556656, 4 | "to": 3823908593845 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/salesGenericEventSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "keys":[{"name":"channel","type":"string","enumValues":["Mobile","Online","Store"]}, 3 | {"name":"region","type":"string","enumValues":["Atlanta","Boston","Chicago","Cleveland","Dallas","Minneapolis","New York","Philadelphia","San Francisco","St. Louis"]}, 4 | {"name":"product","type":"string","enumValues":["Laptops","Printers","Routers","Smart Phones","Tablets"]}], 5 | "timeBuckets":["1m", "1h", "1d", "5m"], 6 | "values": 7 | [{"name":"sales","type":"double","aggregators":["SUM"]}, 8 | {"name":"discount","type":"double","aggregators":["SUM"]}, 9 | {"name":"tax","type":"double","aggregators":["SUM"]}], 10 | "dimensions": 11 | [{"combination":[]}, 12 | {"combination":["channel"]}, 13 | {"combination":["region"]}, 14 | {"combination":["product"]}, 15 | {"combination":["channel","region"]}, 16 | {"combination":["channel","product"]}, 17 | {"combination":["region","product"]}, 18 | {"combination":["channel","region","product"]}] 19 | } 20 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/satisfactionRatingSnapshotSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": [ 3 | {"name": "current", "type": "long", "tags": ["current"]}, 4 | {"name": "min", "type": "long", "tags": ["min"]}, 5 | {"name": "max", "type": "long", "tags": ["max"]}, 6 | {"name": "threshold", "type": "long", "tags": ["threshold"]} 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/serviceCallSnapshotSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": [{"name": "serviceCall", "type": "long"}, {"name": "issueType", "type": "string"}] 3 | } 4 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/main/resources/telecomDimensionsEventSchema.json: -------------------------------------------------------------------------------- 1 | {"keys":[{"name":"imsi","type":"string"}, 2 | {"name":"carrier","type":"string","enumValues":["ATT","VZN","TMO","SPR"]}, 3 | {"name":"imei","type":"string"}], 4 | "timeBuckets":["1m","1h","1d"], 5 | "values": 6 | [{"name":"duration","type":"long","aggregators":["SUM","COUNT","AVG"]}, 7 | {"name":"terminatedAbnomally","type":"long","aggregators":["SUM","COUNT","AVG"]}, 8 | {"name":"terminatedNomally","type":"long","aggregators":["SUM","COUNT","AVG"]}, 9 | {"name":"called","type":"long","aggregators":["SUM","COUNT","AVG"]}], 10 | "dimensions": 11 | [{"combination":[]}, 12 | {"combination":["imsi"]}, 13 | {"combination":["imsi","imei"]}, 14 | {"combination":["carrier"]}, 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/test/java/com/datatorrent/demos/dimensions/telecom/CDREnrichTester.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom; 6 | 7 | import org.junit.Assert; 8 | import org.junit.Test; 9 | 10 | import com.datatorrent.demos.dimensions.telecom.generate.CallDetailRecordCustomerInfoGenerator; 11 | import com.datatorrent.demos.dimensions.telecom.model.CallDetailRecord; 12 | import com.datatorrent.demos.dimensions.telecom.model.EnrichedCDR; 13 | 14 | public class CDREnrichTester 15 | { 16 | @Test 17 | public void test() throws Exception 18 | { 19 | CallDetailRecordCustomerInfoGenerator generator = new CallDetailRecordCustomerInfoGenerator(); 20 | for (int i = 0; i < 100; ++i) { 21 | CallDetailRecord cdr = generator.next(); 22 | EnrichedCDR enriched = EnrichedCDR.fromCallDetailRecord(cdr.toLine()); 23 | String line = enriched.toLine(); 24 | Assert.assertTrue(line != null && !line.isEmpty()); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/test/java/com/datatorrent/demos/dimensions/telecom/CustomerEnrichedInfoEmbededRepoTester.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom; 6 | 7 | import org.junit.Test; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import com.datatorrent.demos.dimensions.telecom.generate.CustomerEnrichedInfoEmbededRepo; 12 | import com.datatorrent.demos.dimensions.telecom.model.CustomerEnrichedInfo.SingleRecord; 13 | 14 | public class CustomerEnrichedInfoEmbededRepoTester 15 | { 16 | private static final Logger logger = LoggerFactory.getLogger(CustomerEnrichedInfoEmbededRepoTester.class); 17 | 18 | @Test 19 | public void test() 20 | { 21 | CustomerEnrichedInfoEmbededRepo repo = CustomerEnrichedInfoEmbededRepo.instance(); 22 | for (int i = 0; i < 100; ++i) { 23 | SingleRecord record = repo.getRandomCustomerEnrichedInfo(); 24 | logger.info("{}", record); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/test/java/com/datatorrent/demos/dimensions/telecom/EnrichedCDRHbaseInputOperatorTester.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.dimensions.telecom; 6 | 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | import com.datatorrent.demos.dimensions.telecom.conf.EnrichedCDRHBaseConfig; 11 | import com.datatorrent.demos.dimensions.telecom.operator.EnrichedCDRHbaseInputOperator; 12 | 13 | public class EnrichedCDRHbaseInputOperatorTester 14 | { 15 | 16 | @Before 17 | public void setUp() 18 | { 19 | EnrichedCDRHBaseConfig.instance().setHost("localhost"); 20 | } 21 | 22 | @Test 23 | public void testInternal() 24 | { 25 | EnrichedCDRHbaseInputOperator operator = new EnrichedCDRHbaseInputOperator(); 26 | operator.setup(null); 27 | operator.emitTuples(); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/test/resources/adsquery.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 1, 3 | "type": "dataQuery", 4 | "data": { 5 | "time": { 6 | "from":0, 7 | "to":60000, 8 | "bucket": "1m" 9 | }, 10 | "incompleteResultOK": true, 11 | "keys": {}, 12 | "fields": ["time", "publisher", "advertiser", "location", "impressions:COUNT", "impressions:SUM", "impressions:AVG", "clicks:COUNT", "clicks:SUM", "clicks:AVG", "cost:COUNT", "cost:SUM", "cost:AVG", "revenue:COUNT", "revenue:SUM", "revenue:AVG"] 13 | }, 14 | "countdown":1, 15 | "incompleteResultOK": true 16 | } 17 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.kafka=info 20 | log4j.logger.org=info 21 | #log4j.logger.org.apache.commons.beanutils=warn 22 | log4j.logger.com.datatorrent=debug 23 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/test/resources/salesquery.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 1, 3 | "type": "dataQuery", 4 | "data": { 5 | "time": { 6 | "latestNumBuckets": 10, 7 | "bucket": "1m" 8 | }, 9 | "incompleteResultOK": true, 10 | "keys": {}, 11 | "fields": ["time", "channel", "region", "product", "tax:SUM", "sales:SUM", "discount:SUM"] 12 | }, 13 | "countdown": 1, 14 | "incompleteResultOK": true 15 | } 16 | -------------------------------------------------------------------------------- /dt-demo/dimensions/src/test/resources/satisfactionRatingSnapshotSchema_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": [ 3 | {"name": "current", "type": "long", "tags": ["current"]}, 4 | {"name": "min", "type": "long", "tags": ["min"]}, 5 | {"name": "max", "type": "long", "tags": ["max"]}, 6 | {"name": "threshold", "type": "long", "tags": ["threshold"]} 7 | ] 8 | } -------------------------------------------------------------------------------- /dt-demo/machinedata/src/main/java/com/datatorrent/demos/machinedata/data/MachineAggregatorAverage.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.machinedata.data; 6 | 7 | import java.util.List; 8 | 9 | import org.apache.apex.malhar.lib.dimensions.aggregator.AggregatorAverage; 10 | import org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator; 11 | 12 | import com.google.common.collect.ImmutableList; 13 | 14 | import com.datatorrent.api.annotation.Name; 15 | 16 | @Name("AVG") 17 | /** 18 | * @since 3.2.0 19 | */ 20 | public class MachineAggregatorAverage extends AggregatorAverage 21 | { 22 | private static final long serialVersionUID = 201510130110L; 23 | 24 | /** 25 | * The singleton instance of this class. 26 | */ 27 | public static final MachineAggregatorAverage INSTANCE = new MachineAggregatorAverage(); 28 | 29 | public static final List> MACHINE_CHILD_AGGREGATORS = ImmutableList.of( 30 | (Class)MachineAggregatorSum.class, 31 | (Class)MachineAggregatorCount.class); 32 | 33 | protected MachineAggregatorAverage() 34 | { 35 | } 36 | 37 | @Override 38 | public List> getChildAggregators() 39 | { 40 | return MACHINE_CHILD_AGGREGATORS; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /dt-demo/machinedata/src/main/java/com/datatorrent/demos/machinedata/data/MachineAggregatorHardCodedCount.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.machinedata.data; 6 | 7 | import org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor; 8 | 9 | /** 10 | * @since 3.2.0 11 | */ 12 | public class MachineAggregatorHardCodedCount extends AbstractMachineAggregatorHardcoded 13 | { 14 | private static final long serialVersionUID = 102220150244L; 15 | 16 | private MachineAggregatorHardCodedCount() 17 | { 18 | //for kryo 19 | } 20 | 21 | public MachineAggregatorHardCodedCount(int ddID, DimensionsDescriptor dimensionsDescriptor) 22 | { 23 | super(ddID, dimensionsDescriptor); 24 | } 25 | 26 | @Override 27 | public void aggregate(MachineHardCodedAggregate dest, MachineInfo src) 28 | { 29 | dest.count++; 30 | } 31 | 32 | @Override 33 | public void aggregate(MachineHardCodedAggregate dest, MachineHardCodedAggregate src) 34 | { 35 | dest.count += src.count; 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /dt-demo/machinedata/src/main/java/com/datatorrent/demos/machinedata/data/MachineAggregatorHardCodedSum.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 DataTorrent, Inc. 3 | * All rights reserved. 4 | */ 5 | package com.datatorrent.demos.machinedata.data; 6 | 7 | import org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor; 8 | 9 | /** 10 | * @since 3.2.0 11 | */ 12 | public class MachineAggregatorHardCodedSum extends AbstractMachineAggregatorHardcoded 13 | { 14 | private static final long serialVersionUID = 201510230657L; 15 | 16 | private MachineAggregatorHardCodedSum() 17 | { 18 | //for kryo 19 | } 20 | 21 | public MachineAggregatorHardCodedSum(int ddID, DimensionsDescriptor dimensionsDescriptor) 22 | { 23 | super(ddID, dimensionsDescriptor); 24 | } 25 | 26 | @Override 27 | public MachineHardCodedAggregate getGroup(MachineInfo src, int aggregatorIndex) 28 | { 29 | MachineHardCodedAggregate aggregate = super.getGroup(src, aggregatorIndex); 30 | aggregate.sum = true; 31 | 32 | return aggregate; 33 | } 34 | 35 | @Override 36 | public void aggregate(MachineHardCodedAggregate dest, MachineInfo src) 37 | { 38 | dest.cpuUsage += src.getCpu(); 39 | dest.hddUsage += src.getHdd(); 40 | dest.ramUsage += src.getRam(); 41 | } 42 | 43 | @Override 44 | public void aggregate(MachineHardCodedAggregate dest, MachineHardCodedAggregate src) 45 | { 46 | dest.cpuUsage += src.cpuUsage; 47 | dest.hddUsage += src.hddUsage; 48 | dest.ramUsage += src.ramUsage; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /dt-demo/machinedata/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /dt-demo/starter-app/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | com.datatorrent 6 | dt-demos 7 | 3.4.0-SNAPSHOT 8 | 9 | 10 | starter-app 11 | jar 12 | 13 | DataTorrent Starter Application Package 14 | Empty Application Package that includes the Malhar and Megh operator libraries for basis of DT AppBuilder 15 | 16 | 17 | 3.4.0 18 | 19 | 20 | 21 | 22 | 23 | org.apache.apex 24 | malhar-library 25 | ${malhar.version} 26 | 27 | 28 | com.datatorrent 29 | dt-library 30 | ${megh.version} 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /dt-demo/starter-app/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /dt-demo/starter-app/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # DataTorrent tools 2 | 3 | This directory contains some useful tools. 4 | 5 | + `monitor.py` -- A python script that uses the Gateway REST API to monitor an Apex 6 | application 7 | -------------------------------------------------------------------------------- /training/metrics-app/README.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | Training example to demonstrate metrics platform capabilities. 4 | -------------------------------------------------------------------------------- /training/metrics-app/XmlJavadocCommentsExtractor.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /training/metrics-app/src/main/java/com/datatorrent/apps/PojoEvent.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.apps; 2 | 3 | public class PojoEvent 4 | { 5 | @Override 6 | public String toString() 7 | { 8 | return "PojoEvent [accountNumber=" + accountNumber + ", name=" + name + ", amount=" + amount + "]"; 9 | } 10 | 11 | private int accountNumber; 12 | private String name; 13 | private int amount; 14 | 15 | public int getAccountNumber() 16 | { 17 | return accountNumber; 18 | } 19 | 20 | public void setAccountNumber(int accountNumber) 21 | { 22 | this.accountNumber = accountNumber; 23 | } 24 | 25 | public String getName() 26 | { 27 | return name; 28 | } 29 | 30 | public void setName(String name) 31 | { 32 | this.name = name; 33 | } 34 | 35 | public int getAmount() 36 | { 37 | return amount; 38 | } 39 | 40 | public void setAmount(int amount) 41 | { 42 | this.amount = amount; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /training/metrics-app/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /training/random-to-hdfs/README.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | Training example to demonstrate minimalist application random to hdfs. 4 | -------------------------------------------------------------------------------- /training/random-to-hdfs/XmlJavadocCommentsExtractor.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /training/random-to-hdfs/src/main/java/com/datatorrent/apps/Application.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.apps; 2 | 3 | import org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.BytesFileOutputOperator; 4 | import org.apache.hadoop.conf.Configuration; 5 | 6 | import com.datatorrent.api.DAG; 7 | import com.datatorrent.api.StreamingApplication; 8 | import com.datatorrent.api.annotation.ApplicationAnnotation; 9 | 10 | @ApplicationAnnotation(name = "Random-to-HDFS") 11 | public class Application implements StreamingApplication 12 | { 13 | public void populateDAG(DAG dag, Configuration conf) 14 | { 15 | POJOGenerator generator = dag.addOperator("POJOGenerator", POJOGenerator.class); 16 | BytesFileOutputOperator fileOutput = dag.addOperator("fileOutput", BytesFileOutputOperator.class); 17 | 18 | dag.addStream("data", generator.out, fileOutput.input); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /training/random-to-hdfs/src/main/resources/META-INF/properties-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.fileOutput.prop.filePath 5 | /tmp 6 | 7 | 8 | dt.operator.fileOutput.prop.outputFileName 9 | output.txt 10 | 11 | 12 | dt.attr.CHECKPOINT_WINDOW_COUNT 13 | 10 14 | 15 | 16 | dt.operator.fileOutput.prop.maxIdleWindows 17 | 10 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /training/random-to-hdfs/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.fileOutput.prop.filePath 5 | /tmp 6 | 7 | 8 | dt.operator.fileOutput.prop.outputFileName 9 | output.txt 10 | 11 | 12 | dt.attr.MASTER_MEMORY_MB 13 | 1024 14 | 15 | 16 | dt.loggers.level 17 | com.datatorrent.*:DEBUG,org.apache.*:INFO 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /training/random-to-hdfs/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/cassandraInput/src/main/resources/META-INF/example.cql: -------------------------------------------------------------------------------- 1 | CREATE KEYSPACE IF NOT EXISTS testapp WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 }; 2 | 3 | USE testapp; 4 | 5 | CREATE TABLE IF NOT EXISTS testapp.dt_meta(dt_app_id TEXT, dt_operator_id INT, dt_window BIGINT, PRIMARY KEY (dt_app_id, dt_operator_id)); 6 | 7 | CREATE TABLE IF NOT EXISTS testapp.TestUser(id uuid PRIMARY KEY, fname text, lname text, city text); 8 | 9 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'pune', 'Neha', 'Singh'); 10 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'Lindon', 'Tina', 'Laura'); 11 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'Sunnyvale', 'Tom', 'Zing'); 12 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'Banglore', 'Priya', 'Gupta'); 13 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'Hydrabad', 'Seeta', 'Reddy'); 14 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'San Jose', 'Sam', 'Honk'); 15 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'pune', 'Rita', 'Sony'); 16 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'Sunnyvale', 'Megh', 'Gill'); 17 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'San Jose', 'Tina', 'Mann'); 18 | INSERT INTO testapp.testuser(id,city,fname,lname) VALUES(now(), 'San Jose', 'Sarah', 'Tom'); 19 | -------------------------------------------------------------------------------- /tutorials/cassandraInput/src/main/resources/META-INF/properties-CassandraInputApplication.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | dt.operator.CassandraReader.prop.tablename 4 | TestUser 5 | 6 | 7 | dt.operator.CassandraReader.prop.store.node 8 | localhost 9 | 10 | 11 | dt.operator.CassandraReader.prop.store.keyspace 12 | testapp 13 | 14 | 15 | dt.operator.CassandraReader.port.outputPort.attr.TUPLE_CLASS 16 | com.datatorrent.cassandra.TestUser 17 | 18 | 19 | dt.operator.CassandraReader.prop.primaryKeyColumn 20 | id 21 | 22 | 23 | dt.operator.CassandraReader.prop.query 24 | select * from testapp.%t 25 | 26 | 27 | dt.operator.fileWriter.prop.filePath 28 | /tmp/examples/input 29 | 30 | 31 | dt.operator.fileWriter.prop.outputFileName 32 | cassandraData 33 | 34 | 35 | dt.loggers.level 36 | com.datatorrent.contrib.*:DEBUG 37 | 38 | 39 | -------------------------------------------------------------------------------- /tutorials/cassandraOutput/src/site/conf/properties-CassandraOutputTestApp.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | dt.application.CassandraOutputApplication.operator.*.attr.MEMORY_MB 4 | 5 | 6 | 7 | dt.application.CassandraOutputApplication.attr.MASTER_MEMORY_MB 8 | 9 | 10 | 11 | dt.application.CassandraOutputApplication.prop.rowsCount 12 | 13 | 14 | 15 | dt.operator.CassandraDataWriter.prop.tablename 16 | 17 | 18 | 19 | dt.operator.CassandraDataWriter.prop.store.node 20 | 21 | 22 | 23 | dt.operator.CassandraDataWriter.prop.store.keyspace 24 | 25 | 26 | 27 | dt.operator.CassandraDataWriter.port.input.attr.TUPLE_CLASS 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /tutorials/cassandraOutput/src/test/resources/example.cql: -------------------------------------------------------------------------------- 1 | CREATE KEYSPACE IF NOT EXISTS testapp WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 }; 2 | 3 | USE testapp; 4 | 5 | CREATE TABLE IF NOT EXISTS testapp.dt_meta(dt_app_id TEXT, dt_operator_id INT, dt_window BIGINT, PRIMARY KEY (dt_app_id, dt_operator_id)); 6 | 7 | CREATE TABLE IF NOT EXISTS testapp.TestUser(id uuid PRIMARY KEY, fname text, lname text, city text); -------------------------------------------------------------------------------- /tutorials/cassandraOutput/src/test/resources/properties-CassandraOutputTestApp.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | dt.operator.CassandraDataWriter.prop.tablename 4 | TestUser 5 | 6 | 7 | dt.operator.CassandraDataWriter.prop.store.node 8 | localhost 9 | 10 | 11 | dt.operator.CassandraDataWriter.prop.store.keyspace 12 | testapp 13 | 14 | 15 | dt.operator.CassandraDataWriter.port.input.attr.TUPLE_CLASS 16 | com.datatorrent.cassandra.TestUser 17 | 18 | 19 | -------------------------------------------------------------------------------- /tutorials/exactly-once/README.md: -------------------------------------------------------------------------------- 1 | #Examples for Exactly-Once with Apache Apex 2 | 3 | [How does it work?](https://www.datatorrent.com/blog/end-to-end-exactly-once-with-apache-apex/) 4 | 5 | ## Read from Kafka, write to JDBC 6 | 7 | This application shows exactly-once output to JDBC through transactions: 8 | 9 | [Application](src/main/java/com/example/myapexapp/Application.java) 10 | 11 | [Test](src/test/java/com/example/myapexapp/ApplicationTest.java) 12 | 13 | ## Read from Kafka, write to Files 14 | 15 | This application shows exactly-once output to HDFS through atomic file operation: 16 | 17 | [Application](src/main/java/com/example/myapexapp/AtomicFileOutputApp.java) 18 | 19 | [Test](src/test/java/com/example/myapexapp/AtomicFileOutputAppTest.java) 20 | 21 | -------------------------------------------------------------------------------- /tutorials/exactly-once/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/exactly-once/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | log4j.logger.kafka.server=info 21 | log4j.logger.kafka.request.logger=info 22 | #log4j.logger.org.apache.commons.beanutils=warn 23 | log4j.logger.com.datatorrent=info 24 | -------------------------------------------------------------------------------- /tutorials/fileIO-multiDir/README.md: -------------------------------------------------------------------------------- 1 | This example is very similar to the fileIO example with one difference: it shows how 2 | create a set of partitions separated into slices where each slice monitors a different 3 | input directory. A custom partitioner and directory scanner are used. 4 | -------------------------------------------------------------------------------- /tutorials/fileIO-multiDir/src/main/java/com/example/fileIO/Application.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example.fileIO; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | 8 | import com.datatorrent.api.annotation.ApplicationAnnotation; 9 | import com.datatorrent.api.StreamingApplication; 10 | import com.datatorrent.api.DAG; 11 | 12 | //import com.datatorrent.lib.io.fs.FileReaderMultiDir; 13 | import static com.datatorrent.api.Context.PortContext.*; 14 | 15 | @ApplicationAnnotation(name="FileIO") 16 | public class Application implements StreamingApplication 17 | { 18 | 19 | @Override 20 | public void populateDAG(DAG dag, Configuration conf) 21 | { 22 | // create operators 23 | FileReader reader = dag.addOperator("read", FileReader.class); 24 | FileWriter writer = dag.addOperator("write", FileWriter.class); 25 | 26 | reader.setScanner(new FileReaderMultiDir.SlicedDirectoryScanner()); 27 | 28 | // using parallel partitioning ensures that lines from a single file are handled 29 | // by the same writer 30 | // 31 | dag.setInputPortAttribute(writer.input, PARTITION_PARALLEL, true); 32 | dag.setInputPortAttribute(writer.control, PARTITION_PARALLEL, true); 33 | 34 | dag.addStream("data", reader.output, writer.input); 35 | dag.addStream("ctrl", reader.control, writer.control); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /tutorials/fileIO-multiDir/src/main/resources/unused-log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.logger.org=info 8 | #log4j.logger.org.apache.commons.beanutils=warn 9 | log4j.logger.com.datatorrent=info 10 | -------------------------------------------------------------------------------- /tutorials/fileIO-multiDir/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/fileIO-multiDir/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/fileIO-simple/README.md: -------------------------------------------------------------------------------- 1 | Sample application to show how to use the file input and output operators. 2 | 3 | During a typical run on a Hadoop cluster, when input files are dropped into the 4 | configured input directory (e.g. `/tmp/SimpleFileIO/input-dir`), the application 5 | will create temporary files like this at the configured output location in 6 | HDFS (e.g. `/tmp/SimpleFileIO/output-dir`) and copy all input file data to it: 7 | 8 | /tmp/SimpleFileIO/output-dir/myfile_p2.0.1465929407447.tmp 9 | 10 | When the file size exceeds the configured limit of 100000 bytes, a new file with 11 | a name like `myfile_p2.1.1465929407447.tmp` will be opened and, a minute or two 12 | later, the old file will be renamed to `myfile_p2.0`. 13 | -------------------------------------------------------------------------------- /tutorials/fileIO-simple/src/main/java/com/example/myapexapp/Application.java: -------------------------------------------------------------------------------- 1 | package com.example.myapexapp; 2 | 3 | import org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator; 4 | import org.apache.hadoop.conf.Configuration; 5 | 6 | import com.datatorrent.api.annotation.ApplicationAnnotation; 7 | import com.datatorrent.api.StreamingApplication; 8 | import com.datatorrent.api.DAG; 9 | 10 | /** 11 | * Simple application illustrating file input-output 12 | */ 13 | @ApplicationAnnotation(name="SimpleFileIO") 14 | public class Application implements StreamingApplication 15 | { 16 | 17 | @Override 18 | public void populateDAG(DAG dag, Configuration conf) 19 | { 20 | // create operators 21 | LineByLineFileInputOperator in = dag.addOperator("input", 22 | new LineByLineFileInputOperator()); 23 | FileOutputOperator out = dag.addOperator("output", 24 | new FileOutputOperator()); 25 | // configure operators 26 | in.setDirectory("/tmp/SimpleFileIO/input-dir"); 27 | out.setFilePath("/tmp/SimpleFileIO/output-dir"); 28 | out.setMaxLength(1_000_000); // file rotation size 29 | 30 | // create streams 31 | dag.addStream("data", in.output, out.input); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tutorials/fileIO-simple/src/main/java/com/example/myapexapp/FileOutputOperator.java: -------------------------------------------------------------------------------- 1 | package com.example.myapexapp; 2 | 3 | import javax.validation.constraints.NotNull; 4 | 5 | import com.datatorrent.api.Context; 6 | import com.datatorrent.lib.io.fs.AbstractFileOutputOperator; 7 | 8 | /** 9 | * Write incoming lines to output file 10 | */ 11 | public class FileOutputOperator extends AbstractFileOutputOperator 12 | { 13 | private static final String CHARSET_NAME = "UTF-8"; 14 | private static final String NL = System.lineSeparator(); 15 | 16 | @NotNull 17 | private String fileName; 18 | 19 | private transient String fName; // per partition file name 20 | 21 | @Override 22 | public void setup(Context.OperatorContext context) 23 | { 24 | // create file name for this partition by appending the operator id to 25 | // the base name 26 | // 27 | long id = context.getId(); 28 | fName = fileName + "_p" + id; 29 | super.setup(context); 30 | } 31 | 32 | @Override 33 | protected String getFileName(String tuple) 34 | { 35 | return fName; 36 | } 37 | 38 | @Override 39 | protected byte[] getBytesForTuple(String line) 40 | { 41 | byte result[] = null; 42 | try { 43 | result = (line + NL).getBytes(CHARSET_NAME); 44 | } catch (Exception e) { 45 | throw new RuntimeException(e); 46 | } 47 | return result; 48 | } 49 | 50 | // getters and setters 51 | public String getFileName() { return fileName; } 52 | public void setFileName(String v) { fileName = v; } 53 | } 54 | -------------------------------------------------------------------------------- /tutorials/fileIO-simple/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 15 | 16 | 17 | 18 | dt.application.SimpleFileIO.operator.input.prop.directory 19 | /tmp/SimpleFileIO/input-dir 20 | 21 | 22 | dt.application.SimpleFileIO.operator.output.prop.filePath 23 | /tmp/SimpleFileIO/output-dir 24 | 25 | 26 | dt.application.SimpleFileIO.operator.output.prop.fileName 27 | myfile 28 | 29 | 30 | dt.application.SimpleFileIO.operator.output.prop.maxLength 31 | 1000000 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /tutorials/fileIO-simple/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/fileIO-simple/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/fileIO/src/main/java/com/example/fileIO/Application.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example.fileIO; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | 8 | import com.datatorrent.api.annotation.ApplicationAnnotation; 9 | import com.datatorrent.api.StreamingApplication; 10 | import com.datatorrent.api.DAG; 11 | 12 | import static com.datatorrent.api.Context.PortContext.*; 13 | 14 | @ApplicationAnnotation(name="FileIO") 15 | public class Application implements StreamingApplication 16 | { 17 | 18 | @Override 19 | public void populateDAG(DAG dag, Configuration conf) 20 | { 21 | // create operators 22 | FileReader reader = dag.addOperator("read", FileReader.class); 23 | FileWriter writer = dag.addOperator("write", FileWriter.class); 24 | 25 | // using parallel partitioning ensures that lines from a single file are handled 26 | // by the same writer 27 | // 28 | dag.setInputPortAttribute(writer.input, PARTITION_PARALLEL, true); 29 | dag.setInputPortAttribute(writer.control, PARTITION_PARALLEL, true); 30 | 31 | dag.addStream("data", reader.output, writer.input); 32 | dag.addStream("ctrl", reader.control, writer.control); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /tutorials/fileIO/src/main/java/com/example/fileIO/ThroughputBasedApplication.java: -------------------------------------------------------------------------------- 1 | package com.example.fileIO; 2 | 3 | import static com.datatorrent.api.Context.PortContext.PARTITION_PARALLEL; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | 7 | import com.datatorrent.api.DAG; 8 | import com.datatorrent.api.StreamingApplication; 9 | import com.datatorrent.api.annotation.ApplicationAnnotation; 10 | 11 | @ApplicationAnnotation(name = "ThroughputBasedFileIO") 12 | public class ThroughputBasedApplication implements StreamingApplication 13 | { 14 | 15 | @Override 16 | public void populateDAG(DAG dag, Configuration conf) 17 | { 18 | ThroughputBasedReader reader = dag.addOperator("read", ThroughputBasedReader.class); 19 | BytesFileWriter writer = dag.addOperator("write", BytesFileWriter.class); 20 | 21 | dag.setInputPortAttribute(writer.input, PARTITION_PARALLEL, true); 22 | dag.setInputPortAttribute(writer.control, PARTITION_PARALLEL, true); 23 | 24 | dag.addStream("data", reader.output, writer.input); 25 | dag.addStream("ctrl", reader.control, writer.control); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /tutorials/fileIO/src/main/resources/META-INF/properties-FileIO.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 15 | 16 | dt.application.FileIO.operator.read.prop.directory 17 | /tmp/fileIO/input-dir 18 | 19 | 20 | dt.application.FileIO.operator.write.prop.filePath 21 | /tmp/fileIO/output-dir 22 | 23 | 24 | dt.application.FileIO.operator.read.prop.partitionCount 25 | 5 26 | 27 | 28 | dt.loggers.level 29 | com.datatorrent.*:INFO,org.apache.*:INFO 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /tutorials/fileIO/src/main/resources/unused-log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.logger.org=info 8 | #log4j.logger.org.apache.commons.beanutils=warn 9 | log4j.logger.com.datatorrent=info 10 | -------------------------------------------------------------------------------- /tutorials/fileIO/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/fileIO/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/fileOutput/README.md: -------------------------------------------------------------------------------- 1 | Sample application to show how to use the file output operator along with 2 | partitioning and rolling file output. 3 | 4 | A typical run on a Hadoop cluster will create files like this at the configured 5 | output location in HDFS (e.g. `/tmp/fileOutput`) where the numeric extension is 6 | the sequnce number of rolling output files and the number following 'p' is the 7 | operator id of the partition that generated the file: 8 | 9 | /tmp/fileOutput/sequence_p3.0 10 | /tmp/fileOutput/sequence_p3.1 11 | /tmp/fileOutput/sequence_p4.0 12 | /tmp/fileOutput/sequence_p4.1 13 | 14 | Each file should contain lines like this where the second value is the number 15 | produced by the generator operator and the first is the corresponding operator id: 16 | 17 | [1, 1075] 18 | [1, 1095] 19 | [2, 1110] 20 | [2, 1120] 21 | 22 | Please note that there are no guarantees about the way operator ids are assigned 23 | to operators by the platform. 24 | -------------------------------------------------------------------------------- /tutorials/fileOutput/src/main/java/com/example/fileOutput/Application.java: -------------------------------------------------------------------------------- 1 | package com.example.fileOutput; 2 | 3 | import com.datatorrent.api.DAG; 4 | import com.datatorrent.api.StreamingApplication; 5 | import com.datatorrent.api.annotation.ApplicationAnnotation; 6 | import org.apache.hadoop.conf.Configuration; 7 | 8 | @ApplicationAnnotation(name="fileOutput") 9 | public class Application implements StreamingApplication 10 | { 11 | 12 | @Override 13 | public void populateDAG(DAG dag, Configuration conf) 14 | { 15 | 16 | SequenceGenerator generator = dag.addOperator("generator", SequenceGenerator.class); 17 | 18 | FileWriter writer = dag.addOperator("writer", FileWriter.class); 19 | 20 | // properties can be set here or from properties file 21 | //writer.setMaxLength(1 << 10); 22 | 23 | dag.addStream("data", generator.out, writer.input); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /tutorials/fileOutput/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.maxTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/fileOutput/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.idea/ 3 | /target/ 4 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/main/java/com/example/FileToJdbcApp/FileReader.java: -------------------------------------------------------------------------------- 1 | package com.example.FileToJdbcApp; 2 | 3 | import com.datatorrent.api.DefaultOutputPort; 4 | import org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator; 5 | 6 | public class FileReader extends LineByLineFileInputOperator{ 7 | 8 | /** 9 | * output in bytes to match CsvParser input type 10 | */ 11 | public final transient DefaultOutputPort byteOutput = new DefaultOutputPort<>(); 12 | 13 | @Override 14 | protected void emit(String tuple) 15 | { 16 | output.emit(tuple); 17 | byteOutput.emit(tuple.getBytes()); 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/main/java/com/example/FileToJdbcApp/PojoEvent.java: -------------------------------------------------------------------------------- 1 | package com.example.FileToJdbcApp; 2 | 3 | public class PojoEvent 4 | { 5 | @Override 6 | public String toString() 7 | { 8 | return "PojoEvent [accountNumber=" + accountNumber + ", name=" + name + ", amount=" + amount + "]"; 9 | } 10 | 11 | private int accountNumber; 12 | private String name; 13 | private int amount; 14 | 15 | public int getAccountNumber() 16 | { 17 | return accountNumber; 18 | } 19 | 20 | public void setAccountNumber(int accountNumber) 21 | { 22 | this.accountNumber = accountNumber; 23 | } 24 | 25 | public String getName() 26 | { 27 | return name; 28 | } 29 | 30 | public void setName(String name) 31 | { 32 | this.name = name; 33 | } 34 | 35 | public int getAmount() 36 | { 37 | return amount; 38 | } 39 | 40 | public void setAmount(int amount) 41 | { 42 | this.amount = amount; 43 | } 44 | } 45 | 46 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.JdbcOutput.prop.store.databaseDriver 5 | com.mysql.jdbc.Driver 6 | 7 | 8 | 9 | dt.operator.JdbcOutput.prop.store.databaseUrl 10 | jdbc:mysql://hostName:portNumber/dbName 11 | 12 | 13 | 14 | dt.operator.JdbcOutput.prop.store.userName 15 | root 16 | 17 | 18 | 19 | dt.operator.JdbcOutput.prop.store.password 20 | password 21 | 22 | 23 | 24 | dt.operator.JdbcOutput.prop.batchSize 25 | 5 26 | 27 | 28 | 29 | dt.operator.JdbcOutput.prop.tablename 30 | table_name 31 | 32 | 33 | 34 | dt.operator.JdbcOutput.port.input.attr.TUPLE_CLASS 35 | com.example.FileToJdbcApp.PojoEvent 36 | 37 | 38 | 39 | dt.operator.FileReader.prop.directory 40 | input_directory 41 | 42 | 43 | 44 | dt.loggers.level 45 | com.datatorrent.*:INFO,org.apache.*:INFO 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/main/resources/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "separator": ",", 3 | "quoteChar":"\"", 4 | "fields": [ 5 | { 6 | "name": "AccountNumber", 7 | "type": "INTEGER" 8 | }, 9 | { 10 | "name": "Name", 11 | "type": "String" 12 | }, 13 | { 14 | "name": "Amount", 15 | "type": "INTEGER" 16 | } 17 | ] 18 | } 19 | 20 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/site/conf/exampleCsvParser.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.application.FileToJdbcCsvParser.operator.CsvParser.port.out.attr.TUPLE_CLASS 5 | com.example.FileToJdbcApp.PojoEvent 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/site/conf/exampleCustomParser.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.application.FileToJdbcCustomParser.operator.CustomParser.prop.regexStr 5 | , 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/test/resources/example.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS testJdbc; 2 | 3 | USE testJdbc; 4 | 5 | CREATE TABLE IF NOT EXISTS `test_jdbc_table` ( 6 | `ACCOUNT_NO` int(11) NOT NULL, 7 | `NAME` varchar(255), 8 | `AMOUNT` int(11)); 9 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/fileToJdbc/src/test/resources/test-input/sample.txt: -------------------------------------------------------------------------------- 1 | 1,User1,1000 2 | 2,User2,2000 3 | 3,User3,3000 4 | 4,User4,4000 5 | 5,User5,5000 6 | 6,User6,6000 7 | 7,User7,7000 8 | 8,User8,8000 9 | 9,User9,9000 10 | 10,User10,10000 11 | -------------------------------------------------------------------------------- /tutorials/hdfs-sync/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 22 | 23 | 24 | 25 | dt.operator.HDFSInputModule.prop.files 26 | hdfs://source-namenode-service/user/dtuser/path-to-input-directory 27 | 28 | 29 | dt.operator.HDFSFileCopyModule.prop.outputDirectoryPath 30 | hdfs://destination-namenode-service/user/dtuser/path-to-input-directory 31 | 32 | 33 | -------------------------------------------------------------------------------- /tutorials/hdfs-sync/src/site/conf/cluster-memory-conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 22 | 23 | 24 | 25 | dt.operator.HDFSInputModule.prop.maxReaders 26 | 16 27 | 28 | 29 | dt.operator.HDFSInputModule.prop.blocksThreshold 30 | 16 31 | 32 | 33 | -------------------------------------------------------------------------------- /tutorials/hdfs-sync/src/site/conf/sandbox-memory-conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 22 | 23 | 24 | 25 | dt.operator.HDFSInputModule.prop.maxReaders 26 | 1 27 | 28 | 29 | dt.operator.HDFSInputModule.prop.blocksThreshold 30 | 1 31 | 32 | 33 | dt.operator.*.attr.MEMORY_MB 34 | 768 35 | 36 | 37 | dt.application.HDFS-Sync-App.attr.MASTER_MEMORY_MB 38 | 768 39 | 40 | 41 | -------------------------------------------------------------------------------- /tutorials/hdfs2kafka/README.md: -------------------------------------------------------------------------------- 1 | This sample application shows how to read lines from files in HDFS and write 2 | them out to a Kafka topic. Each line of the input file is considered a separate 3 | message. The topic name, the name of the directory that is monitored for input 4 | files, and other parameters are configurable in `META_INF/properties.xml`. 5 | -------------------------------------------------------------------------------- /tutorials/hdfs2kafka/src/main/java/com/example/myapexapp/Application.java: -------------------------------------------------------------------------------- 1 | package com.example.myapexapp; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import com.datatorrent.api.annotation.ApplicationAnnotation; 6 | import com.datatorrent.api.StreamingApplication; 7 | import com.datatorrent.api.DAG; 8 | import com.datatorrent.api.DAG.Locality; 9 | import com.datatorrent.contrib.kafka.KafkaSinglePortOutputOperator; 10 | import org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator; 11 | 12 | @ApplicationAnnotation(name="Hdfs2Kafka") 13 | public class Application implements StreamingApplication 14 | { 15 | 16 | @Override 17 | public void populateDAG(DAG dag, Configuration conf) 18 | { 19 | LineByLineFileInputOperator in = dag.addOperator("lines", 20 | LineByLineFileInputOperator.class); 21 | 22 | KafkaSinglePortOutputOperator out = dag.addOperator("kafkaOutput", new KafkaSinglePortOutputOperator()); 23 | 24 | dag.addStream("data", in.output, out.inputPort).setLocality(Locality.CONTAINER_LOCAL); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /tutorials/hdfs2kafka/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.kafkaOutput.prop.topic 5 | hdfs2kafka 6 | 7 | 8 | dt.operator.lines.prop.directory 9 | /tmp/hdfs2kafka 10 | 11 | 12 | dt.operator.kafkaOutput.prop.producerProperties 13 | serializer.class=kafka.serializer.StringEncoder,producer.type=async,metadata.broker.list=localhost:9092 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /tutorials/hdfs2kafka/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/hdfs2kafka/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/hdht/src/main/java/com/example/Application.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | 8 | import com.datatorrent.api.annotation.ApplicationAnnotation; 9 | import com.datatorrent.api.StreamingApplication; 10 | import com.datatorrent.api.DAG; 11 | import com.datatorrent.api.DAG.Locality; 12 | import com.datatorrent.lib.io.ConsoleOutputOperator; 13 | 14 | @ApplicationAnnotation(name="MyFirstApplication") 15 | public class Application implements StreamingApplication 16 | { 17 | 18 | @Override 19 | public void populateDAG(DAG dag, Configuration conf) 20 | { 21 | // Sample DAG with 2 operators 22 | // Replace this code with the DAG you want to build 23 | 24 | RandomNumberGenerator randomGenerator = dag.addOperator("randomGenerator", RandomNumberGenerator.class); 25 | randomGenerator.setNumTuples(500); 26 | 27 | ConsoleOutputOperator cons = dag.addOperator("console", new ConsoleOutputOperator()); 28 | 29 | dag.addStream("randomData", randomGenerator.out, cons.input).setLocality(Locality.CONTAINER_LOCAL); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /tutorials/hdht/src/main/java/com/example/RandomNumberGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example; 5 | 6 | import com.datatorrent.api.DefaultOutputPort; 7 | import com.datatorrent.api.InputOperator; 8 | import com.datatorrent.common.util.BaseOperator; 9 | 10 | /** 11 | * This is a simple operator that emits random number. 12 | */ 13 | public class RandomNumberGenerator extends BaseOperator implements InputOperator 14 | { 15 | private int numTuples = 100; 16 | private transient int count = 0; 17 | 18 | public final transient DefaultOutputPort out = new DefaultOutputPort(); 19 | 20 | @Override 21 | public void beginWindow(long windowId) 22 | { 23 | count = 0; 24 | } 25 | 26 | @Override 27 | public void emitTuples() 28 | { 29 | if (count++ < numTuples) { 30 | out.emit(Math.random()); 31 | } 32 | } 33 | 34 | public int getNumTuples() 35 | { 36 | return numTuples; 37 | } 38 | 39 | /** 40 | * Sets the number of tuples to be emitted every window. 41 | * @param numTuples number of tuples 42 | */ 43 | public void setNumTuples(int numTuples) 44 | { 45 | this.numTuples = numTuples; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tutorials/hdht/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 15 | 16 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 17 | 1000 18 | 19 | 20 | dt.application.MyFirstApplication.operator.console.prop.stringFormat 21 | hello world: %s 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /tutorials/hdht/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/hdht/src/test/java/com/example/ApplicationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example; 5 | 6 | import java.io.IOException; 7 | 8 | import javax.validation.ConstraintViolationException; 9 | 10 | import org.junit.Assert; 11 | 12 | import org.apache.hadoop.conf.Configuration; 13 | import org.junit.Test; 14 | 15 | import com.datatorrent.api.LocalMode; 16 | import com.example.Application; 17 | 18 | /** 19 | * Test the DAG declaration in local mode. 20 | */ 21 | public class ApplicationTest { 22 | 23 | @Test 24 | public void testApplication() throws IOException, Exception { 25 | try { 26 | LocalMode lma = LocalMode.newInstance(); 27 | Configuration conf = new Configuration(false); 28 | conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml")); 29 | lma.prepareDAG(new Application(), conf); 30 | LocalMode.Controller lc = lma.getController(); 31 | lc.run(10000); // runs for 10 seconds and quits 32 | } catch (ConstraintViolationException e) { 33 | Assert.fail("constraint violations: " + e.getConstraintViolations()); 34 | } 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /tutorials/hdht/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=info 22 | log4j.logger.com.datatorrent.contrib.hdht=debug -------------------------------------------------------------------------------- /tutorials/jdbcIngest/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /tutorials/jdbcIngest/src/main/java/com/example/mydtapp/FileLineOutputOperator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.example.mydtapp; 20 | 21 | import com.datatorrent.lib.io.fs.AbstractFileOutputOperator; 22 | 23 | public class FileLineOutputOperator extends AbstractFileOutputOperator 24 | { 25 | @Override 26 | protected String getFileName(Object input) 27 | { 28 | return context.getId() + "_" + "op.dat"; 29 | } 30 | 31 | @Override 32 | protected byte[] getBytesForTuple(Object input) 33 | { 34 | return (input.toString() + "\n").getBytes(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /tutorials/jdbcIngest/src/main/java/com/example/mydtapp/PojoEvent.java: -------------------------------------------------------------------------------- 1 | package com.example.mydtapp; 2 | 3 | public class PojoEvent 4 | { 5 | @Override 6 | public String toString() 7 | { 8 | return "PojoEvent [accountNumber=" + accountNumber + ", name=" + name + ", amount=" + amount + "]"; 9 | } 10 | 11 | private int accountNumber; 12 | private String name; 13 | private int amount; 14 | 15 | public int getAccountNumber() 16 | { 17 | return accountNumber; 18 | } 19 | 20 | public void setAccountNumber(int accountNumber) 21 | { 22 | this.accountNumber = accountNumber; 23 | } 24 | 25 | public String getName() 26 | { 27 | return name; 28 | } 29 | 30 | public void setName(String name) 31 | { 32 | this.name = name; 33 | } 34 | 35 | public int getAmount() 36 | { 37 | return amount; 38 | } 39 | 40 | public void setAmount(int amount) 41 | { 42 | this.amount = amount; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /tutorials/jdbcIngest/src/test/resources/example.sql: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS testDev; 2 | 3 | CREATE DATABASE testDev; 4 | 5 | USE testDev; 6 | 7 | CREATE TABLE IF NOT EXISTS `test_event_table` ( 8 | `ACCOUNT_NO` int(11) NOT NULL, 9 | `NAME` varchar(255) DEFAULT NULL, 10 | `AMOUNT` int(11) DEFAULT NULL, 11 | primary key(`ACCOUNT_NO`) 12 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1; 13 | 14 | INSERT INTO `test_event_table` (`ACCOUNT_NO`, `NAME`, `AMOUNT`) VALUES 15 | (1, 'User1', 1000), 16 | (2, 'User2', 2000), 17 | (3, 'User3', 3000), 18 | (4, 'User4', 4000), 19 | (5, 'User5', 5000), 20 | (6, 'User6', 6000), 21 | (7, 'User7', 7000), 22 | (8, 'User8', 8000), 23 | (9, 'User9', 9000), 24 | (10, 'User10', 1000); 25 | -------------------------------------------------------------------------------- /tutorials/jdbcIngest/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/jdbcToJdbc/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /tutorials/jdbcToJdbc/src/main/java/com/example/mydtapp/PojoEvent.java: -------------------------------------------------------------------------------- 1 | package com.example.mydtapp; 2 | 3 | public class PojoEvent 4 | { 5 | @Override 6 | public String toString() 7 | { 8 | return "TestPOJOEvent [accountNumber=" + accountNumber + ", name=" + name + ", amount=" + amount + "]"; 9 | } 10 | 11 | private int accountNumber; 12 | private String name; 13 | private int amount; 14 | 15 | public int getAccountNumber() 16 | { 17 | return accountNumber; 18 | } 19 | 20 | public void setAccountNumber(int accountNumber) 21 | { 22 | this.accountNumber = accountNumber; 23 | } 24 | 25 | public String getName() 26 | { 27 | return name; 28 | } 29 | 30 | public void setName(String name) 31 | { 32 | this.name = name; 33 | } 34 | 35 | public int getAmount() 36 | { 37 | return amount; 38 | } 39 | 40 | public void setAmount(int amount) 41 | { 42 | this.amount = amount; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /tutorials/jdbcToJdbc/src/test/java/com/example/mydtapp/ApplicationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example.mydtapp; 5 | 6 | import java.io.IOException; 7 | 8 | import javax.validation.ConstraintViolationException; 9 | 10 | import org.junit.Assert; 11 | import org.junit.Ignore; 12 | import org.junit.Test; 13 | 14 | import org.apache.hadoop.conf.Configuration; 15 | 16 | import com.datatorrent.api.LocalMode; 17 | 18 | /** 19 | * Test the DAG declaration in local mode.
20 | * The assumption to run this test case is that test_event_table,meta-table and 21 | * test_output_event_table are created already 22 | */ 23 | public class ApplicationTest 24 | { 25 | 26 | @Test 27 | @Ignore 28 | public void testApplication() throws IOException, Exception 29 | { 30 | try { 31 | LocalMode lma = LocalMode.newInstance(); 32 | Configuration conf = new Configuration(false); 33 | conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml")); 34 | lma.prepareDAG(new JdbcToJdbcApp(), conf); 35 | LocalMode.Controller lc = lma.getController(); 36 | lc.run(50000); // runs for 10 seconds and quits 37 | } catch (ConstraintViolationException e) { 38 | Assert.fail("constraint violations: " + e.getConstraintViolations()); 39 | } 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /tutorials/jdbcToJdbc/src/test/resources/example.sql: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS testDev; 2 | 3 | CREATE DATABASE testDev; 4 | 5 | USE testDev; 6 | 7 | CREATE TABLE IF NOT EXISTS `test_event_table` ( 8 | `ACCOUNT_NO` int(11) NOT NULL, 9 | `NAME` varchar(255) DEFAULT NULL, 10 | `AMOUNT` int(11) DEFAULT NULL 11 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1; 12 | 13 | INSERT INTO `test_event_table` (`ACCOUNT_NO`, `NAME`, `AMOUNT`) VALUES 14 | (1, 'User1', 1000), 15 | (2, 'User2', 2000), 16 | (3, 'User3', 3000), 17 | (4, 'User4', 4000), 18 | (5, 'User5', 5000), 19 | (6, 'User6', 6000), 20 | (7, 'User7', 7000), 21 | (8, 'User8', 8000), 22 | (9, 'User9', 9000), 23 | (10, 'User10', 1000); 24 | 25 | CREATE TABLE IF NOT EXISTS `test_output_event_table` ( 26 | `ACCOUNT_NO` int(11) NOT NULL, 27 | `NAME` varchar(255) DEFAULT NULL, 28 | `AMOUNT` int(11) DEFAULT NULL 29 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1; 30 | 31 | CREATE TABLE IF NOT EXISTS `dt_meta` ( 32 | `dt_app_id` VARCHAR(100) NOT NULL, 33 | `dt_operator_id` INT NOT NULL, 34 | `dt_window` BIGINT NOT NULL, 35 | UNIQUE (`dt_app_id`, `dt_operator_id`, `dt_window`) 36 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1; 37 | -------------------------------------------------------------------------------- /tutorials/jdbcToJdbc/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/jms-output-exactlyonce/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | apex.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | apex.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/jms-output-exactlyonce/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/jmsActiveMQ/src/main/java/com/example/jmsActiveMQ/ActiveMQApplication.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example.jmsActiveMQ; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | 8 | import com.datatorrent.api.annotation.ApplicationAnnotation; 9 | import com.datatorrent.api.StreamingApplication; 10 | import com.datatorrent.api.DAG; 11 | import com.datatorrent.lib.io.jms.JMSStringInputOperator; 12 | 13 | @ApplicationAnnotation(name="Amq2HDFS") 14 | public class ActiveMQApplication implements StreamingApplication 15 | { 16 | 17 | @Override 18 | public void populateDAG(DAG dag, Configuration conf) 19 | { 20 | JMSStringInputOperator amqInput = dag.addOperator("amqIn", 21 | new JMSStringInputOperator()); 22 | 23 | LineOutputOperator out = dag.addOperator("fileOut", new LineOutputOperator()); 24 | 25 | dag.addStream("data", amqInput.output, out.input); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /tutorials/jmsActiveMQ/src/main/java/com/example/jmsActiveMQ/LineOutputOperator.java: -------------------------------------------------------------------------------- 1 | package com.example.jmsActiveMQ; 2 | 3 | import java.nio.charset.Charset; 4 | import java.nio.charset.StandardCharsets; 5 | 6 | import javax.validation.constraints.NotNull; 7 | 8 | import com.datatorrent.lib.io.fs.AbstractFileOutputOperator; 9 | 10 | /** 11 | * Converts each tuple to a string and writes it as a new line to the output file 12 | */ 13 | public class LineOutputOperator extends AbstractFileOutputOperator 14 | { 15 | private static final String NL = System.lineSeparator(); 16 | private static final Charset CS = StandardCharsets.UTF_8; 17 | 18 | @NotNull 19 | private String baseName; 20 | 21 | public String getBaseName() 22 | { 23 | return baseName; 24 | } 25 | 26 | public void setBaseName(String v) 27 | { 28 | baseName = v; 29 | } 30 | 31 | @Override 32 | protected String getFileName(String tuple) 33 | { 34 | return baseName; 35 | } 36 | 37 | @Override 38 | protected byte[] getBytesForTuple(String tuple) 39 | { 40 | String result = tuple + NL; 41 | return result.getBytes(CS); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /tutorials/jmsActiveMQ/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 15 | 16 | dt.operator.fileOut.prop.filePath 17 | /tmp 18 | 19 | 20 | dt.operator.fileOut.prop.baseName 21 | test 22 | 23 | 24 | dt.operator.fileOut.prop.maxLength 25 | 45 26 | 27 | 28 | dt.operator.fileOut.prop.rotationWindows 29 | 1 30 | 31 | 32 | dt.operator.amqIn.prop.connectionFactoryProperties.brokerURL 33 | vm://localhost 34 | 35 | 36 | dt.operator.amqIn.prop.subject 37 | jms4Amq 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /tutorials/jmsActiveMQ/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.fileOut.prop.filePath 5 | target/ 6 | 7 | 8 | dt.operator.fileOut.prop.baseName 9 | jmsTestClstr 10 | 11 | 12 | dt.operator.fileOut.prop.maxLength 13 | 45 14 | 15 | 16 | dt.operator.fileOut.prop.rotationWindows 17 | 1 18 | 19 | 20 | dt.operator.amqIn.prop.connectionFactoryProperties.brokerURL 21 | 22 | tcp://192.168.128.142:61616 23 | 24 | 25 | dt.operator.amqIn.prop.subject 26 | test1 27 | 28 | 29 | -------------------------------------------------------------------------------- /tutorials/jmsActiveMQ/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/jmsSqs/src/main/java/com/example/jmsSqs/LineOutputOperator.java: -------------------------------------------------------------------------------- 1 | package com.example.jmsSqs; 2 | 3 | import java.nio.charset.Charset; 4 | import java.nio.charset.StandardCharsets; 5 | 6 | import javax.validation.constraints.NotNull; 7 | 8 | import com.datatorrent.lib.io.fs.AbstractFileOutputOperator; 9 | 10 | /** 11 | * Converts each tuple to a string and writes it as a new line to the output file 12 | */ 13 | public class LineOutputOperator extends AbstractFileOutputOperator 14 | { 15 | private static final String NL = System.lineSeparator(); 16 | private static final Charset CS = StandardCharsets.UTF_8; 17 | 18 | @NotNull 19 | private String baseName; 20 | 21 | public String getBaseName() 22 | { 23 | return baseName; 24 | } 25 | 26 | public void setBaseName(String v) 27 | { 28 | baseName = v; 29 | } 30 | 31 | @Override 32 | protected String getFileName(String tuple) 33 | { 34 | return baseName; 35 | } 36 | 37 | @Override 38 | protected byte[] getBytesForTuple(String tuple) 39 | { 40 | String result = tuple + NL; 41 | return result.getBytes(CS); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /tutorials/jmsSqs/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.fileOut.prop.filePath 5 | terget/ 6 | 7 | 8 | dt.operator.fileOut.prop.baseName 9 | test 10 | 11 | 12 | dt.operator.fileOut.prop.maxLength 13 | 50 14 | 15 | 16 | dt.operator.fileOut.prop.rotationWindows 17 | 10 18 | 19 | 20 | dt.operator.sqsIn.prop.ackMode 21 | AUTO_ACKNOWLEDGE 22 | 23 | 24 | dt.operator.sqsIn.prop.transacted 25 | false 26 | 27 | 28 | dt.operator.sqsIn.prop.subject 29 | DtQueue 30 | 31 | 32 | dt.operator.sqsIn.prop.aws.key.id 33 | awsKeyId 34 | 35 | 36 | dt.operator.sqsIn.prop.aws.key.secret 37 | awsKeySecret 38 | 39 | 40 | dt.operator.sqsIn.prop.aws.region 41 | us-east-1 42 | 43 | 44 | -------------------------------------------------------------------------------- /tutorials/jmsSqs/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/kafka/README.md: -------------------------------------------------------------------------------- 1 | This sample application show how to read lines from a Kafka topic using the new (0.9) 2 | Kafka input operator and write them out to HDFS using rolling files with a bounded size. 3 | 4 | The output files start out with a `.tmp` extension and get renamed when they reach the 5 | size bound. Additional operators to perform parsing, aggregation or filtering can be 6 | inserted into this pipeline as needed. 7 | -------------------------------------------------------------------------------- /tutorials/kafka/src/main/java/com/example/myapexapp/KafkaApp.java: -------------------------------------------------------------------------------- 1 | package com.example.myapexapp; 2 | 3 | import org.apache.apex.malhar.kafka.AbstractKafkaInputOperator; 4 | import org.apache.apex.malhar.kafka.KafkaSinglePortInputOperator; 5 | import org.apache.hadoop.conf.Configuration; 6 | 7 | import com.datatorrent.api.DAG; 8 | import com.datatorrent.api.StreamingApplication; 9 | import com.datatorrent.api.annotation.ApplicationAnnotation; 10 | 11 | @ApplicationAnnotation(name="Kafka2HDFS") 12 | public class KafkaApp implements StreamingApplication 13 | { 14 | 15 | @Override 16 | public void populateDAG(DAG dag, Configuration conf) 17 | { 18 | KafkaSinglePortInputOperator in 19 | = dag.addOperator("kafkaIn", new KafkaSinglePortInputOperator()); 20 | 21 | in.setInitialOffset(AbstractKafkaInputOperator.InitialOffset.EARLIEST.name()); 22 | LineOutputOperator out = dag.addOperator("fileOut", new LineOutputOperator()); 23 | 24 | dag.addStream("data", in.outputPort, out.input); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /tutorials/kafka/src/main/java/com/example/myapexapp/LineOutputOperator.java: -------------------------------------------------------------------------------- 1 | package com.example.myapexapp; 2 | 3 | import java.nio.charset.Charset; 4 | import java.nio.charset.StandardCharsets; 5 | 6 | import javax.validation.constraints.NotNull; 7 | 8 | import com.datatorrent.lib.io.fs.AbstractFileOutputOperator; 9 | 10 | /** 11 | * Converts each tuple to a string and writes it as a new line to the output file 12 | */ 13 | public class LineOutputOperator extends AbstractFileOutputOperator 14 | { 15 | private static final String NL = System.lineSeparator(); 16 | private static final Charset CS = StandardCharsets.UTF_8; 17 | 18 | @NotNull 19 | private String baseName; 20 | 21 | @Override 22 | public byte[] getBytesForTuple(byte[] t) { 23 | String result = new String(t, CS) + NL; 24 | return result.getBytes(CS); 25 | } 26 | 27 | @Override 28 | protected String getFileName(byte[] tuple) { 29 | return baseName; 30 | } 31 | 32 | public String getBaseName() { return baseName; } 33 | public void setBaseName(String v) { baseName = v; } 34 | } 35 | -------------------------------------------------------------------------------- /tutorials/kafka/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/kafka/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/kinesisInput/README.md: -------------------------------------------------------------------------------- 1 | ## Sample KinesisInput Application 2 | 3 | This application reads record(s) from Kinesis Streams and writes them to user specified file in HDFS in form of string. 4 | 5 | Follow these steps to run this application: 6 | 7 | **Step 1**: Update these properties in the file `src/properties.xml`: 8 | 9 | | Property Name | Description | 10 | | ------------- | ----------- | 11 | | dt.operator.kinesisInput.prop.streamName | AWS Kinesis Stream name | 12 | | dt.operator.kinesisInput.prop.accessKey | AWS Credentials AccessKeyId | 13 | | dt.operator.kinesisInput.prop.secretKey | AWS Credentials SecretAccessKey | 14 | | dt.operator.kinesisInput.prop.endPoint | AWS EndPoint | 15 | | dt.operator.kinesisInput.prop.initialOffset| Offset of shard for stream e.g: latest | 16 | | dt.operator.fileOutput.prop.outputFileName | Output file name e.g: output.txt | 17 | | dt.operator.fileOutput.prop.filePath | HDFS output file path | 18 | 19 | **Step 2**: Build the code: 20 | 21 | shell> mvn clean package -DskipTests 22 | 23 | Upload the `target/kinesisInput-1.0-SNAPSHOT.apa` to the UI console if available or launch it from 24 | the commandline using `apex`. 25 | 26 | **Step 3**: After running application verify the output directory has the expected output: 27 | 28 | shell> hadoop fs -cat / 29 | 30 | Sample Output: 31 | 32 | hadoop fs -cat /output.txt 33 | record 1 34 | record 2 35 | -------------------------------------------------------------------------------- /tutorials/kinesisInput/src/main/java/com/example/kinesisInput/Application.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.example.kinesisInput; 5 | 6 | import org.apache.apex.malhar.lib.fs.GenericFileOutputOperator; 7 | import org.apache.hadoop.conf.Configuration; 8 | 9 | import com.datatorrent.api.DAG; 10 | import com.datatorrent.api.StreamingApplication; 11 | import com.datatorrent.api.annotation.ApplicationAnnotation; 12 | import com.datatorrent.contrib.kinesis.KinesisStringInputOperator; 13 | 14 | @ApplicationAnnotation(name="Kinesis-to-HDFS") 15 | public class Application implements StreamingApplication 16 | { 17 | 18 | @Override 19 | public void populateDAG(DAG dag, Configuration conf) 20 | { 21 | KinesisStringInputOperator inputOperator = dag.addOperator("kinesisInput", new KinesisStringInputOperator()); 22 | GenericFileOutputOperator.StringFileOutputOperator fileOutputOperator = dag.addOperator("fileOutput", new GenericFileOutputOperator.StringFileOutputOperator()); 23 | dag.addStream("kinesis-to-hdfs", inputOperator.outputPort, fileOutputOperator.input); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /tutorials/kinesisInput/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.kinesisInput.prop.initialOffset 5 | earliest 6 | 7 | 8 | dt.operator.kinesisInput.prop.streamName 9 | kinesisToS3 10 | 11 | 12 | dt.operator.kinesisInput.prop.accessKey 13 | ACCESSKEY 14 | 15 | 16 | dt.operator.kinesisInput.prop.secretKey 17 | SECRETKEY 18 | 19 | 20 | dt.operator.kinesisInput.prop.endPoint 21 | kinesis.us-east-1.amazonaws.com 22 | 23 | 24 | dt.operator.fileOutput.prop.filePath 25 | /tmp 26 | 27 | 28 | dt.operator.fileOutput.prop.outputFileName 29 | kinesis.txt 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /tutorials/kinesisInput/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/maprapp/README.md: -------------------------------------------------------------------------------- 1 | This sample application show how to read log data from a MapR Streams 2 | using Kafka (0.9) input operator and write them out to MapR DB using 3 | HBase output operator. 4 | 5 | An operator to parse JSON formatted data into POJO has been inserted 6 | into this pipeline. Other processing operators can be introduced 7 | depending upon the requirements. 8 | 9 | ###### MapR Streams Properties 10 | 11 | Specifying topic in MapR Streams. Please note the name of topic starts 12 | with Stream file path, followed by ":" and then Topic name. 13 | > dt.application.MaprStreamsToMaprDB.operator.Streams.prop.topics 14 | > 15 | > **/data/streams/sample-stream:sample-topic** 16 | 17 | MapR Streams Cluster running at 18 | 19 | > dt.application.MaprStreamsToMaprDB.operator.Streams.prop.clusters 20 | > 21 | > **broker1.dtlab.com:9092** 22 | 23 | ###### MapR DB Properties 24 | 25 | HBase output operator properties 26 | -------------------------------------------------------------------------------- /tutorials/maprapp/src/main/java/com/datatorrent/maprapp/Data.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.maprapp; 2 | 3 | /** 4 | * Data: POJO for data 5 | */ 6 | public class Data 7 | { 8 | private int id; 9 | private String name; 10 | private String message; 11 | 12 | public int getId() 13 | { 14 | return id; 15 | } 16 | 17 | public void setId(int id) 18 | { 19 | this.id = id; 20 | } 21 | 22 | public String getName() 23 | { 24 | return name; 25 | } 26 | 27 | public void setName(String name) 28 | { 29 | this.name = name; 30 | } 31 | 32 | public String getMessage() 33 | { 34 | return message; 35 | } 36 | 37 | public void setMessage(String message) 38 | { 39 | this.message = message; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tutorials/maprapp/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dt.application.MaprStreamsToMaprDB.operator.Streams.prop.initialPartitionCount 6 | 1 7 | 8 | 9 | dt.application.MaprStreamsToMaprDB.operator.Streams.prop.topics 10 | /data/streams/sample-stream:sample-topic 11 | 12 | 13 | dt.application.MaprStreamsToMaprDB.operator.Streams.prop.clusters 14 | broker1.dtlab.com:9092 15 | 16 | 17 | 18 | 19 | dt.application.MaprStreamsToMaprDB.operator.Parser.prop.sleepTime 20 | 100 21 | 22 | 23 | dt.application.MaprStreamsToMaprDB.operator.Parser.port.out.attr.TUPLE_CLASS 24 | com.datatorrent.maprapp.Data 25 | 26 | 27 | 28 | 29 | dt.application.MaprStreamsToMaprDB.operator.Db.port.input.attr.TUPLE_CLASS 30 | com.datatorrent.maprapp.Data 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /tutorials/maprapp/src/test/java/com/datatorrent/maprapp/ApplicationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Put your copyright and license info here. 3 | */ 4 | package com.datatorrent.maprapp; 5 | 6 | import java.io.IOException; 7 | 8 | import javax.validation.ConstraintViolationException; 9 | 10 | import org.junit.Assert; 11 | 12 | import org.apache.hadoop.conf.Configuration; 13 | import org.junit.Test; 14 | 15 | import com.datatorrent.api.LocalMode; 16 | import com.datatorrent.maprapp.Application; 17 | 18 | /** 19 | * Test the DAG declaration in local mode. 20 | */ 21 | public class ApplicationTest { 22 | boolean check() { 23 | return true; 24 | } 25 | 26 | @Test 27 | public void testApplication() throws IOException, Exception { 28 | try { 29 | LocalMode lma = LocalMode.newInstance(); 30 | Configuration conf = new Configuration(false); 31 | conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml")); 32 | lma.prepareDAG(new Application(), conf); 33 | LocalMode.Controller lc = lma.getController(); 34 | lc.runAsync(); 35 | while( !check() ) { 36 | System.out.println("Sleeping..."); 37 | Thread.sleep(1000); 38 | } 39 | } catch (ConstraintViolationException e) { 40 | Assert.fail("constraint violations: " + e.getConstraintViolations()); 41 | } 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /tutorials/maprapp/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/operatorTutorial/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | /bin/ 3 | /.settings/ 4 | .project 5 | .classpath 6 | -------------------------------------------------------------------------------- /tutorials/operatorTutorial/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/parser/README.md: -------------------------------------------------------------------------------- 1 | This project contains applications showcasing different **Parsers** and **Formatters** present in the Malhar Library. For all the apps, parameters are configurable in META_INF/properties.xml. 2 | 3 | * **Xml Parser App** 4 | 5 | This application showcases how to use [XmlParser](https://datatorrent.com/docs/apidocs/com/datatorrent/lib/parser/XmlParser.html) 6 | from [Apex Malhar](https://github.com/apache/apex-malhar) library. The XmlParser Operator converts XML string to POJO. 7 | The parser emits dom based Document on *parsedOutput* port. It emits POJO on *out* and error records on *err* port. 8 | Follow these steps to run this application: 9 | 10 | **Step 1**: Build the code: 11 | 12 | shell> mvn clean install 13 | 14 | **Step 2**: Upload the `target/parser-1.0-SNAPSHOT.apa` to the UI console if available or launch it from 15 | the commandline using `apex` cli script. 16 | 17 | **Step 3**: During launch use `src/main/resources/META-INF/properties-xmlParseApplication.xml` as a custom configuration file; then verify 18 | that the output by checking hdfs file path configured in properties-xmlParseApplication.xml 19 | -------------------------------------------------------------------------------- /tutorials/parser/src/main/java/com/datatorrent/tutorial/csvparser/FileOutputOperator.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.tutorial.csvparser; 2 | 3 | import javax.validation.constraints.NotNull; 4 | 5 | import com.datatorrent.lib.io.fs.AbstractFileOutputOperator; 6 | 7 | public class FileOutputOperator extends AbstractFileOutputOperator 8 | { 9 | @NotNull 10 | private String outputFileName; 11 | 12 | @Override 13 | protected String getFileName(Object tuple) 14 | { 15 | return outputFileName; 16 | } 17 | 18 | @Override 19 | protected byte[] getBytesForTuple(Object tuple) 20 | { 21 | return (tuple.toString() + "\n").getBytes(); 22 | } 23 | 24 | public String getOutputFileName() 25 | { 26 | return outputFileName; 27 | } 28 | 29 | public void setOutputFileName(String outputFileName) 30 | { 31 | this.outputFileName = outputFileName; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tutorials/parser/src/main/java/com/datatorrent/tutorial/csvparser/csvParserApplication.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.tutorial.csvparser; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import com.datatorrent.api.DAG; 6 | import com.datatorrent.api.StreamingApplication; 7 | import com.datatorrent.api.annotation.ApplicationAnnotation; 8 | import com.datatorrent.contrib.parser.CsvParser; 9 | import com.datatorrent.lib.io.ConsoleOutputOperator; 10 | 11 | @ApplicationAnnotation(name = "csvParseApplication") 12 | public class csvParserApplication implements StreamingApplication 13 | { 14 | 15 | @Override 16 | public void populateDAG(DAG dag, Configuration conf) 17 | { 18 | AdDataGenerator dataGenerator = dag.addOperator("dataGenerator", new AdDataGenerator()); 19 | CsvParser parserOperator = dag.addOperator("csvParser", new CsvParser()); 20 | FileOutputOperator dataOutput = dag.addOperator("dataOutput", new FileOutputOperator()); 21 | FileOutputOperator errorOutput = dag.addOperator("errorOutput", new FileOutputOperator()); 22 | ConsoleOutputOperator consoleOutput = dag.addOperator("consoleOutput", new ConsoleOutputOperator()); 23 | 24 | dag.addStream("inputData", dataGenerator.out, parserOperator.in); 25 | dag.addStream("parsedData", parserOperator.parsedOutput, dataOutput.input); 26 | dag.addStream("errorData", parserOperator.err, errorOutput.input); 27 | dag.addStream("pojoData", parserOperator.out, consoleOutput.input); 28 | 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /tutorials/parser/src/main/java/com/datatorrent/tutorial/xmlparser/EmployeeDataGenerator.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.tutorial.xmlparser; 2 | 3 | import com.datatorrent.api.Context; 4 | import com.datatorrent.api.DefaultOutputPort; 5 | import com.datatorrent.api.InputOperator; 6 | import com.datatorrent.common.util.BaseOperator; 7 | 8 | public class EmployeeDataGenerator extends BaseOperator implements InputOperator 9 | { 10 | public final transient DefaultOutputPort output = new DefaultOutputPort(); 11 | public int tupleCount; 12 | public int totalTupleCount = 10; 13 | 14 | @Override 15 | public void emitTuples() 16 | { 17 | if (tupleCount < totalTupleCount) { 18 | StringBuilder xmlSample = new StringBuilder(); 19 | xmlSample.append(""); 20 | xmlSample.append(""); 21 | xmlSample.append("employee" + tupleCount + ""); 22 | xmlSample.append("department"+ tupleCount + ""); 23 | xmlSample.append("" + tupleCount +""); 24 | xmlSample.append("2015-01-01"); 25 | xmlSample.append("
"+ "new york"); 26 | xmlSample.append("US" +"
"); 27 | xmlSample.append("
"); 28 | output.emit(xmlSample.toString()); 29 | tupleCount++; 30 | } 31 | } 32 | @Override 33 | public void setup(Context.OperatorContext context) 34 | { 35 | tupleCount = 0; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /tutorials/parser/src/main/java/com/datatorrent/tutorial/xmlparser/JavaSerializationStreamCodec.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.tutorial.xmlparser; 2 | 3 | import com.esotericsoftware.kryo.serializers.JavaSerializer; 4 | 5 | import com.datatorrent.netlet.util.Slice; 6 | import com.datatorrent.stram.plan.logical.DefaultKryoStreamCodec; 7 | 8 | public class JavaSerializationStreamCodec extends DefaultKryoStreamCodec 9 | { 10 | 11 | private static final long serialVersionUID = -183071548840076388L; 12 | 13 | public JavaSerializationStreamCodec() { 14 | super(); 15 | this.kryo.setDefaultSerializer(JavaSerializer.class); 16 | } 17 | 18 | @Override 19 | public Slice toByteArray(T info) { 20 | return super.toByteArray(info); 21 | } 22 | 23 | @Override 24 | public Object fromByteArray(Slice fragment) { 25 | return super.fromByteArray(fragment); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /tutorials/parser/src/main/resources/META-INF/properties-xmlParseApplication.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.application.xmlParserApplication.operator.xmlParser.port.out.attr.TUPLE_CLASS 5 | com.datatorrent.tutorial.xmlparser.EmployeeBean 6 | 7 | 8 | dt.application.xmlParserApplication.operator.dataOutput.prop.outputFileName 9 | parsedData 10 | 11 | 12 | dt.application.xmlParserApplication.operator.pojoOutput.prop.outputFileName 13 | pojoData 14 | 15 | 16 | dt.application.xmlParserApplication.operator.errorOutput.prop.outputFileName 17 | errorData 18 | 19 | 20 | dt.application.xmlParserApplication.operator.*.prop.filePath 21 | /tmp/application/parser 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /tutorials/parser/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | #log4j.logger.org.apache.commons.beanutils=warn 20 | log4j.logger.com.datatorrent=debug 21 | log4j.logger.org.apache.apex=debug 22 | log4j.logger.org=info 23 | -------------------------------------------------------------------------------- /tutorials/s3-to-hdfs-sync/README.md: -------------------------------------------------------------------------------- 1 | # Amazon S3 to HDFS sync application 2 | Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. 3 | This application transfers files from the configured S3 location to the destination path in HDFS. 4 | The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync 5 | Send feedback or feature requests to feedback@datatorrent.com -------------------------------------------------------------------------------- /tutorials/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tutorials/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.tutorial.s3input; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import com.datatorrent.api.DAG; 6 | import com.datatorrent.api.DAG.Locality; 7 | import com.datatorrent.api.StreamingApplication; 8 | import com.datatorrent.api.annotation.ApplicationAnnotation; 9 | import com.datatorrent.lib.io.fs.HDFSFileCopyModule; 10 | import com.datatorrent.lib.io.fs.S3InputModule; 11 | 12 | /** 13 | * Simple application illustrating file copy from S3 14 | */ 15 | @ApplicationAnnotation(name="S3-to-HDFS-Sync") 16 | public class S3ToHDFSSyncApplication implements StreamingApplication 17 | { 18 | 19 | @Override 20 | public void populateDAG(DAG dag, Configuration conf) 21 | { 22 | 23 | S3InputModule inputModule = dag.addModule("S3InputModule", new S3InputModule()); 24 | HDFSFileCopyModule outputModule = dag.addModule("HDFSFileCopyModule", new HDFSFileCopyModule()); 25 | 26 | dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); 27 | dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) 28 | .setLocality(Locality.THREAD_LOCAL); 29 | dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(Locality.THREAD_LOCAL); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /tutorials/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.S3InputModule.prop.files 5 | s3n://ACCESS_KEY_ID:SECRET_KEY@BUCKET_NAME/DIRECTORY 6 | 7 | 8 | dt.operator.HDFSFileCopyModule.prop.outputDirectoryPath 9 | hdfs://destination-namenode-service:port/path-to-output-directory 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tutorials/s3-to-hdfs-sync/src/site/conf/cluster-memory-conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dt.operator.S3InputModule.prop.maxReaders 6 | 16 7 | 8 | 9 | dt.operator.S3InputModule.prop.blocksThreshold 10 | 16 11 | 12 | 13 | -------------------------------------------------------------------------------- /tutorials/s3-to-hdfs-sync/src/site/conf/sandbox-memory-conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dt.operator.S3InputModule.prop.maxReaders 6 | 1 7 | 8 | 9 | dt.operator.S3InputModule.prop.blocksThreshold 10 | 1 11 | 12 | 13 | dt.operator.*.attr.MEMORY_MB 14 | 768 15 | 16 | 17 | dt.application.HDFSFileCopyApp.attr.MASTER_MEMORY_MB 18 | 768 19 | 20 | 21 | -------------------------------------------------------------------------------- /tutorials/s3-to-hdfs-sync/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/s3-tuple-output/README.md: -------------------------------------------------------------------------------- 1 | # S3 tuple output example 2 | 3 | Sample application to show how to use the S3 tuple output module. 4 | 5 | The application reads records from HDFS using `FSRecordReaderModule`. 6 | These records are then written to Amazon S3 using `S3BytesOutputModule`. 7 | 8 | ### How to configure 9 | The properties file META-INF/properties.xml shows how to configure the respective operators. 10 | 11 | ### How to compile 12 | `shell> mvn clean package` 13 | 14 | This will generate application package s3-tuple-output-1.0-SNAPSHOT.apa inside target directory. 15 | 16 | ### How to run 17 | Use the application package generated above to launch the application from UI console(if available) or apex command line interface. 18 | 19 | `apex> launch target/s3-tuple-output-1.0-SNAPSHOT.apa` 20 | -------------------------------------------------------------------------------- /tutorials/s3-tuple-output/XmlJavadocCommentsExtractor.xsl: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tutorials/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java: -------------------------------------------------------------------------------- 1 | package com.datatorrent.tutorials.s3output; 2 | 3 | import org.apache.apex.malhar.lib.fs.FSRecordReaderModule; 4 | import org.apache.apex.malhar.lib.fs.s3.S3TupleOutputModule.S3BytesOutputModule; 5 | import org.apache.hadoop.conf.Configuration; 6 | 7 | import com.datatorrent.api.Context.PortContext; 8 | import com.datatorrent.api.DAG; 9 | import com.datatorrent.api.StreamingApplication; 10 | import com.datatorrent.api.annotation.ApplicationAnnotation; 11 | 12 | /** 13 | * Simple application illustrating file copy from S3 14 | */ 15 | @ApplicationAnnotation(name="s3-output-line") 16 | public class Application implements StreamingApplication 17 | { 18 | 19 | public void populateDAG(DAG dag, Configuration conf) 20 | { 21 | FSRecordReaderModule recordReader = dag.addModule("lineInput", FSRecordReaderModule.class); 22 | S3BytesOutputModule s3StringOutputModule = dag.addModule("s3output", S3BytesOutputModule.class); 23 | dag.addStream("data", recordReader.records, s3StringOutputModule.input); 24 | 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /tutorials/s3-tuple-output/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/s3output/src/main/java/com/example/s3output/Application.java: -------------------------------------------------------------------------------- 1 | package com.example.s3output; 2 | 3 | import org.apache.apex.malhar.lib.fs.s3.S3OutputModule; 4 | import org.apache.hadoop.conf.Configuration; 5 | 6 | import com.datatorrent.api.DAG; 7 | import com.datatorrent.api.StreamingApplication; 8 | import com.datatorrent.api.annotation.ApplicationAnnotation; 9 | import com.datatorrent.lib.io.fs.FSInputModule; 10 | 11 | /** 12 | * Application illustrating copy files from HDFS to S3 bucket. 13 | */ 14 | @ApplicationAnnotation(name="HDFSToS3App") 15 | public class Application implements StreamingApplication 16 | { 17 | @Override 18 | public void populateDAG(DAG dag, Configuration conf) 19 | { 20 | FSInputModule inputModule = dag.addModule("HDFSInputModule", new FSInputModule()); 21 | S3OutputModule outputModule = dag.addModule("S3OutputModule", new S3OutputModule()); 22 | 23 | dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); 24 | dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) 25 | .setLocality(DAG.Locality.CONTAINER_LOCAL); 26 | dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(DAG.Locality.CONTAINER_LOCAL); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tutorials/s3output/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.operator.HDFSInputModule.prop.files 5 | hdfs://source-namenode-service/user/dtuser/path-to-input-directory 6 | 7 | 8 | dt.operator.HDFSInputModule.prop.maxReaders 9 | 6 10 | 11 | 12 | dt.operator.HDFSInputModule.prop.minReaders 13 | 6 14 | 15 | 16 | dt.operator.HDFSInputModule.prop.blocksThreshold 17 | 2 18 | 19 | 20 | dt.operator.S3OutputModule.prop.accessKey 21 | 22 | 23 | 24 | dt.operator.S3OutputModule.prop.secretAccessKey 25 | 26 | 27 | 28 | dt.operator.S3OutputModule.prop.bucketName 29 | 30 | 31 | 32 | dt.operator.S3OutputModule.prop.outputDirectoryPath 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /tutorials/topnwords/app/src/main/java/com/example/topNwordcount/WCPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.example.topNwordcount; 20 | 21 | // a single (word, frequency) pair 22 | public class WCPair { 23 | public String word; 24 | public int freq; 25 | 26 | public WCPair() {} 27 | 28 | public WCPair(String w, int f) { 29 | word = w; 30 | freq = f; 31 | } 32 | 33 | @Override 34 | public String toString() { 35 | return String.format("(%s, %d)", word, freq); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /tutorials/topnwords/app/src/main/resources/WordDataSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": [{"name": "word", "type": "string"}, 3 | {"name": "count", "type": "integer"}] 4 | } 5 | -------------------------------------------------------------------------------- /tutorials/topnwords/app/src/site/conf/my-app-conf1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.attr.MASTER_MEMORY_MB 5 | 1024 6 | 7 | 8 | dt.application.MyFirstApplication.operator.randomGenerator.prop.numTuples 9 | 1000 10 | 11 | 12 | -------------------------------------------------------------------------------- /tutorials/topnwords/app/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | -------------------------------------------------------------------------------- /tutorials/topnwords/scripts/aliases: -------------------------------------------------------------------------------- 1 | # bash aliases and functions useful for working on input and out directories 2 | # 3 | 4 | # input and output directories 5 | in=/tmp/test/input-dir out=/tmp/test/output-dir 6 | 7 | # list files in input directory 8 | alias ls-input="hdfs dfs -ls $in" 9 | 10 | # list files in output directory 11 | alias ls-output="hdfs dfs -ls $out" 12 | 13 | # clean input directory 14 | alias clean-input="hdfs dfs -rm $in/*" 15 | 16 | # clean output directory 17 | alias clean-output="hdfs dfs -rm $out/*" 18 | 19 | # convenient alias to run dtcli from code repository 20 | alias dtcli3="$HOME/src/incubator-apex-core/engine/src/main/scripts/dtcli" 21 | 22 | # copy local file (argument) to input directory 23 | function put-file ( ) { 24 | hdfs dfs -put "$1" "$in" 25 | } 26 | 27 | # make local copy of output file (argument) from output directory 28 | function get-file ( ) { 29 | hdfs dfs -get "$out/$1" "$1".out 30 | } 31 | -------------------------------------------------------------------------------- /tutorials/topnwords/scripts/check-services: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # script to check status of various Hadoop services 4 | 5 | # NOTE: If machine was shutdown improperly, or if HDFS continues to show red 6 | # not-ready status, some of these may not be running; just start them if needed with: 7 | # sudo service start 8 | # where is the name of the service you want to start 9 | # 10 | services="hadoop-hdfs-namenode hadoop-hdfs-datanode hadoop-yarn-resourcemanager \ 11 | hadoop-yarn-nodemanager dtdemos" 12 | for s in $services; do 13 | sudo service $s status 14 | done 15 | 16 | # gateway service 17 | sudo service dtgateway status 18 | 19 | -------------------------------------------------------------------------------- /tutorials/topnwords/scripts/newapp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # script to create a new project 3 | 4 | # change project name and archetype version as needed 5 | name=myapexapp 6 | version=3.3.0-incubating 7 | 8 | mvn -B archetype:generate \ 9 | -DarchetypeGroupId=org.apache.apex \ 10 | -DarchetypeArtifactId=apex-app-archetype \ 11 | -DarchetypeVersion=$version \ 12 | -DgroupId=com.example \ 13 | -Dpackage=com.example.$name \ 14 | -DartifactId=$name \ 15 | -Dversion=1.0-SNAPSHOT 16 | -------------------------------------------------------------------------------- /tutorials/topnwords/scripts/newapp.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | @rem Script for creating a new application 3 | 4 | setlocal 5 | 6 | mvn -B archetype:generate ^ 7 | -DarchetypeGroupId=org.apache.apex ^ 8 | -DarchetypeArtifactId=apex-app-archetype ^ 9 | -DarchetypeVersion=3.3.0-incubating ^ 10 | -DgroupId=com.example ^ 11 | -Dpackage=com.example.myapexapp ^ 12 | -DartifactId=myapexapp ^ 13 | -Dversion=1.0-SNAPSHOT 14 | 15 | endlocal 16 | -------------------------------------------------------------------------------- /tutorials/topnwords/webinar/WCPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.example.myapexapp; 20 | 21 | // a single (word, frequency) pair 22 | public class WCPair { 23 | public String word; 24 | public int freq; 25 | 26 | public WCPair() {} 27 | 28 | public WCPair(String w, int f) { 29 | word = w; 30 | freq = f; 31 | } 32 | 33 | @Override 34 | public String toString() { 35 | return String.format("(%s, %d)", word, freq); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /tutorials/unifiers/src/main/java/com/example/myapexapp/Application.java: -------------------------------------------------------------------------------- 1 | package com.example.myapexapp; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import com.datatorrent.api.annotation.ApplicationAnnotation; 6 | import com.datatorrent.api.StreamingApplication; 7 | import com.datatorrent.api.DAG; 8 | import com.datatorrent.api.DAG.Locality; 9 | import com.datatorrent.lib.io.ConsoleOutputOperator; 10 | 11 | @ApplicationAnnotation(name="MyFirstApplication") 12 | public class Application implements StreamingApplication 13 | { 14 | 15 | @Override 16 | public void populateDAG(DAG dag, Configuration conf) 17 | { 18 | // create operators 19 | RandomInteger random = dag.addOperator("random", new RandomInteger()); 20 | RangeFinder rf = dag.addOperator("range", new RangeFinder()); 21 | ToConsole cons = dag.addOperator("console", new ToConsole()); 22 | 23 | // create streams 24 | dag.addStream("randomData", random.out, rf.in); 25 | dag.addStream("rangeData", rf.out, cons.in); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /tutorials/unifiers/src/main/java/com/example/myapexapp/RandomInteger.java: -------------------------------------------------------------------------------- 1 | package com.example.myapexapp; 2 | 3 | import java.util.Random; 4 | 5 | import com.datatorrent.api.DefaultOutputPort; 6 | import com.datatorrent.api.InputOperator; 7 | import com.datatorrent.common.util.BaseOperator; 8 | 9 | /** 10 | * Simple operator that emits random numbers in a fixed range. 11 | */ 12 | public class RandomInteger extends BaseOperator implements InputOperator 13 | { 14 | private transient Random rand = new Random(); 15 | 16 | private int numTuples = 1000; 17 | private transient int count = 0; 18 | private int rangeMax = 1 << 11; 19 | 20 | public final transient DefaultOutputPort out = new DefaultOutputPort<>(); 21 | 22 | @Override 23 | public void beginWindow(long windowId) 24 | { 25 | count = 0; 26 | } 27 | 28 | @Override 29 | public void emitTuples() 30 | { 31 | if (count < numTuples) { 32 | ++count; 33 | out.emit(rand.nextInt() % rangeMax); 34 | } 35 | } 36 | 37 | public int getNumTuples() 38 | { 39 | return numTuples; 40 | } 41 | 42 | /** 43 | * Sets the number of tuples to be emitted every window. 44 | * @param numTuples number of tuples 45 | */ 46 | public void setNumTuples(int numTuples) 47 | { 48 | this.numTuples = numTuples; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /tutorials/unifiers/src/main/resources/META-INF/properties.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 15 | 16 | dt.application.MyFirstApplication.operator.*.attr.MEMORY_MB 17 | 200 18 | 19 | 20 | dt.application.MyFirstApplication.operator.random.prop.numTuples 21 | 1000 22 | 23 | 24 | dt.application.MyFirstApplication.operator.range.attr.PARTITIONER 25 | com.datatorrent.common.partitioner.StatelessPartitioner:3 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tutorials/unifiers/src/site/conf/use-unifier.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dt.application.MyFirstApplication.operator.range.prop.useUnifier 5 | true 6 | 7 | 8 | -------------------------------------------------------------------------------- /tutorials/unifiers/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG,CONSOLE 2 | 3 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 4 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 6 | 7 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 8 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n 10 | log4j.appender.RFA.File=/tmp/app.log 11 | 12 | # to enable, add SYSLOG to rootLogger 13 | log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender 14 | log4j.appender.SYSLOG.syslogHost=127.0.0.1 15 | log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n 17 | log4j.appender.SYSLOG.Facility=LOCAL1 18 | 19 | log4j.logger.org=info 20 | #log4j.logger.org.apache.commons.beanutils=warn 21 | log4j.logger.com.datatorrent=debug 22 | --------------------------------------------------------------------------------