├── .gitignore ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── configure-flume.sh ├── dataset-compatibility ├── README.md ├── pom.xml └── src │ └── main │ ├── java │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── data │ │ ├── DescribeDatasets.java │ │ └── ReadMovies.java │ └── resources │ ├── core-site.xml │ └── log4j.properties ├── dataset-hbase ├── README.md ├── pom.xml └── src │ └── main │ ├── avro │ ├── user.avsc │ ├── user.avsc.invalid-migration │ └── user.avsc.valid-migration │ ├── java │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── data │ │ ├── ReadUserDataset.java │ │ └── WriteUserDataset.java │ └── resources │ └── log4j.properties ├── dataset-staging ├── README.md ├── pom.xml └── src │ └── main │ ├── java │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── staging │ │ ├── CreateStagedDataset.java │ │ ├── GenerateSimpleLogs.java │ │ └── StagingToPersistent.java │ └── resources │ ├── core-site.xml │ ├── log4j.properties │ └── simple-log.avsc ├── dataset ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── kitesdk │ │ │ └── examples │ │ │ └── data │ │ │ ├── CreateHiveUserDatasetGeneric.java │ │ │ ├── CreateProductDatasetPojo.java │ │ │ ├── CreateUserDatasetGeneric.java │ │ │ ├── CreateUserDatasetGenericParquet.java │ │ │ ├── CreateUserDatasetGenericPartitioned.java │ │ │ ├── DeleteHiveUserDataset.java │ │ │ ├── DeleteProductDataset.java │ │ │ ├── DeleteUserDataset.java │ │ │ ├── HelloKite.java │ │ │ ├── Product.java │ │ │ ├── ReadHiveUserDatasetGeneric.java │ │ │ ├── ReadProductDatasetPojo.java │ │ │ ├── ReadUserDatasetGeneric.java │ │ │ └── ReadUserDatasetGenericOnePartition.java │ ├── resources │ │ ├── core-site.xml │ │ ├── hive-site.xml │ │ ├── log4j.properties │ │ └── user.avsc │ └── scala │ │ ├── creategeneric.scala │ │ └── createpojo.scala │ └── test │ ├── java │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── data │ │ └── ITDataset.java │ └── resources │ ├── core-site.xml │ ├── hive-site.xml │ └── log4j.properties ├── demo ├── README.md ├── bin │ └── simulate-activity.sh ├── demo-core │ ├── pom.xml │ └── src │ │ └── main │ │ └── avro │ │ ├── session.avsc │ │ └── standard_event.avsc ├── demo-crunch │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── org │ │ │ └── kitesdk │ │ │ └── examples │ │ │ └── demo │ │ │ └── CreateSessions.java │ │ └── resources │ │ ├── core-site.xml │ │ ├── hive-site.xml │ │ ├── mapred-site.xml │ │ └── yarn-site.xml ├── demo-logging-webapp │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── org │ │ │ └── kitesdk │ │ │ └── examples │ │ │ └── demo │ │ │ └── LoggingServlet.java │ │ ├── resources │ │ └── log4j.properties │ │ └── webapp │ │ ├── WEB-INF │ │ └── web.xml │ │ └── index.jsp ├── demo-reports-webapp │ ├── pom.xml │ └── src │ │ └── main │ │ └── webapp │ │ ├── WEB-INF │ │ ├── context.xml │ │ └── web.xml │ │ ├── index.jsp │ │ └── reports │ │ ├── all_sessions.jsp │ │ ├── avg_session_duration.jsp │ │ ├── display_query_results.jspf │ │ └── report.css ├── flume.properties ├── pom.xml └── src │ └── main │ └── webapp │ └── index.jsp ├── json ├── README.md ├── flume.properties ├── morphline.conf ├── pom.xml └── user.avsc ├── kite-examples-morphlines ├── README.md ├── pom.xml └── src │ ├── main │ └── java │ │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── morphlines │ │ └── MyToLowerCaseBuilder.java │ └── test │ ├── java │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── morphlines │ │ └── ExampleMorphlineTest.java │ └── resources │ ├── custom-mimetypes.xml │ ├── grok-dictionaries │ ├── firewalls │ ├── grok-patterns │ ├── java │ ├── linux-syslog │ ├── mcollective │ ├── mcollective-patterns │ ├── nagios │ ├── postgresql │ ├── redis │ └── ruby │ ├── log4j.properties │ ├── solr │ ├── collection1 │ │ └── conf │ │ │ ├── lang │ │ │ └── stopwords_en.txt │ │ │ ├── protwords.txt │ │ │ ├── schema.xml │ │ │ ├── solrconfig.xml │ │ │ ├── stopwords.txt │ │ │ └── synonyms.txt │ └── solr.xml │ ├── test-documents │ ├── NullHeader.docx │ ├── arrays.json │ ├── boilerplate.html │ ├── cars.csv │ ├── cars.csv.gz │ ├── cars.ssv │ ├── cars.tar.gz │ ├── cars.tsv │ ├── cars2.csv │ ├── cars3.csv │ ├── complex.json │ ├── complex.mbox │ ├── email.eml │ ├── email.txt │ ├── emails.txt │ ├── mime.eml │ ├── multiline-sessions.log │ ├── multiline-stacktrace-expected-long-event.log │ ├── multiline-stacktrace.log │ ├── non-length-delimited-20130430-234145-tweets.json.gz │ ├── rsstest.rss │ ├── sample-statuses-20120906-141433 │ ├── sample-statuses-20120906-141433-medium.avro │ ├── sample-statuses-20120906-141433-subschema.avsc │ ├── sample-statuses-20120906-141433.avro │ ├── sample-statuses-20120906-141433.avsc │ ├── sample-statuses-20120906-141433.bz2 │ ├── sample-statuses-20120906-141433.gz │ ├── sample-statuses-20120906-141433.json │ ├── simpleCSV.txt │ ├── stream.json │ ├── test-documents.7z │ ├── test-documents.cpio │ ├── test-documents.tar │ ├── test-documents.tbz2 │ ├── test-documents.tgz │ ├── test-documents.zip │ ├── test-outlook.msg │ ├── test-zip-of-zip.zip │ ├── testAIFF.aif │ ├── testBMP.bmp │ ├── testBMPfp.txt │ ├── testDITA.dita │ ├── testEMLX.emlx │ ├── testEXCEL.xls │ ├── testEXCEL.xlsx │ ├── testFLAC.flac │ ├── testFLAC.oga │ ├── testFLV.flv │ ├── testGIF.gif │ ├── testJAR.jar │ ├── testJPEG_EXIF.jpg │ ├── testJPEG_EXIF.jpg.gz │ ├── testJPEG_EXIF.jpg.tar.gz │ ├── testKML.kml │ ├── testMP3i18n.mp3 │ ├── testMP4.m4a │ ├── testPDF.pdf │ ├── testPNG.png │ ├── testPPM.ppm │ ├── testPPT_various.ppt │ ├── testPPT_various.pptx │ ├── testPSD.psd │ ├── testPages.pages │ ├── testRDF.rdf │ ├── testRFC822 │ ├── testRTFVarious.rtf │ ├── testSVG.svg │ ├── testTIFF.tif │ ├── testVISIO.vsd │ ├── testVORBIS.ogg │ ├── testWAR.war │ ├── testWAV.wav │ ├── testWINMAIL.dat │ ├── testWMA.wma │ ├── testWMF.wmf │ ├── testWMV.wmv │ ├── testWORD_various.doc │ ├── testWindows-x86-32.exe │ ├── testXML.xml │ └── testXML2.xml │ └── test-morphlines │ ├── addCurrentTime.conf │ ├── addLocalHost.conf │ ├── addLocalHostWithName.conf │ ├── addValues.conf │ ├── addValuesIfAbsent.conf │ ├── addValuesIfAbsentWithLargeN.conf │ ├── characterEscaping.conf │ ├── compileWithMissingMorphline.conf │ ├── contains.conf │ ├── convertTimestamp.conf │ ├── convertTimestampWithBadTimezone.conf │ ├── convertTimestampWithDefaults.conf │ ├── convertTimestampWithInputFormatUnixTimeInMillis.conf │ ├── convertTimestampWithInputFormatUnixTimeInSeconds.conf │ ├── convertTimestampWithOutputFormatUnixTimeInMillis.conf │ ├── convertTimestampWithOutputFormatUnixTimeInSeconds.conf │ ├── decodeBase64.conf │ ├── detectMimeTypesWithDefaultMimeTypes.conf │ ├── detectMimeTypesWithDefaultMimeTypesAndFile.conf │ ├── detectMimeTypesWithFile.conf │ ├── detectMimeTypesWithString.conf │ ├── equalsFailure.conf │ ├── equalsSuccess.conf │ ├── extractAvroPaths.conf │ ├── extractAvroPathsFlattened.conf │ ├── extractAvroTree.conf │ ├── extractJsonPaths.conf │ ├── extractJsonPathsFlattened.conf │ ├── extractURIComponent.conf │ ├── extractURIComponents.conf │ ├── extractURIQueryParameters.conf │ ├── extractURIQueryParametersWithMaxParameters.conf │ ├── findReplace.conf │ ├── findReplaceWithGrok.conf │ ├── findReplaceWithRegex.conf │ ├── generateUUID.conf │ ├── generateUUIDSecure.conf │ ├── grokEmail.conf │ ├── grokFindSubstrings.conf │ ├── grokFindSubstringsInplace.conf │ ├── grokFindSubstringsInplaceTwoExpressions.conf │ ├── grokIfNotMatchDropRecord.conf │ ├── grokSyslogMatch.conf │ ├── grokSyslogMatchInplace.conf │ ├── grokSyslogMatchInplaceTwoExpressions.conf │ ├── grokSyslogNgCisco.conf │ ├── grokWithEscaping.conf │ ├── ifDetectMimeType.conf │ ├── ifThenElseWithElse.conf │ ├── ifThenElseWithElseEmpty.conf │ ├── ifThenElseWithThen.conf │ ├── ifThenElseWithThenEmpty.conf │ ├── isTrue.conf │ ├── javaCompilationException.conf │ ├── javaHelloWorld.conf │ ├── javaRuntimeException.conf │ ├── loadSolrBasic.conf │ ├── log.conf │ ├── myToLowerCase.conf │ ├── noOperation.conf │ ├── notWithFalse.conf │ ├── notWithTrue.conf │ ├── parseComplexConfig.conf │ ├── parseVariables.conf │ ├── pipeWithTwoBasicCommands.conf │ ├── readAvroContainer.conf │ ├── readAvroJsonTweetsWithExternalSchema.conf │ ├── readAvroJsonTweetsWithExternalSubSchema.conf │ ├── readAvroTweetsContainer.conf │ ├── readAvroTweetsContainerWithExternalSchema.conf │ ├── readAvroTweetsContainerWithExternalSubSchema.conf │ ├── readAvroTweetsWithExternalSchema.conf │ ├── readAvroTweetsWithExternalSubSchema.conf │ ├── readAvroWithExternalSchema.conf │ ├── readAvroWithMissingExternalSchema.conf │ ├── readCSV.conf │ ├── readCSVDetails.conf │ ├── readCSVWithoutQuoting.conf │ ├── readClob.conf │ ├── readJson.conf │ ├── readJsonTweets.conf │ ├── readJsonTweetsLengthDelimited.conf │ ├── readJsonWithMap.conf │ ├── readLine.conf │ ├── readLineWithMimeType.conf │ ├── readLineWithMimeTypeMismatch.conf │ ├── readLineWithMimeTypeWildcard.conf │ ├── readMultiLine.conf │ ├── readMultiLineWithWhatNext.conf │ ├── separateAttachments.conf │ ├── setValues.conf │ ├── simpleCSV.conf │ ├── solrCellDocumentTypes.conf │ ├── solrCellJPGCompressed.conf │ ├── solrCellXML.conf │ ├── solrLocator.conf │ ├── split.conf │ ├── splitKeyValue.conf │ ├── splitKeyValueWithIPTables.conf │ ├── splitKeyValueWithLiteralSeparatorOfLength3.conf │ ├── splitKeyValueWithRegex.conf │ ├── splitWithEdgeCases.conf │ ├── splitWithGrok.conf │ ├── splitWithMultipleChars.conf │ ├── splitWithOutputFields.conf │ ├── startReportingMetricsToCSV.conf │ ├── startReportingMetricsToJMX.conf │ ├── startReportingMetricsToSLF4J.conf │ ├── toByteArray.conf │ ├── toString.conf │ ├── toStringWithTrim.conf │ ├── tokenizeText.conf │ ├── translate.conf │ ├── translateFailure.conf │ ├── tryRulesCatchException.conf │ ├── tryRulesFail.conf │ ├── tryRulesFailTwice.conf │ ├── tryRulesPass.conf │ ├── tutorialReadAvroContainer.conf │ └── tutorialReadJsonTestTweets.conf ├── logging-webapp ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── kitesdk │ │ │ └── examples │ │ │ └── logging │ │ │ ├── CreateDataset.java │ │ │ ├── DeleteDataset.java │ │ │ ├── ReadDataset.java │ │ │ └── webapp │ │ │ └── LoggingServlet.java │ ├── resources │ │ ├── event.avsc │ │ ├── hive-site.xml │ │ └── log4j.properties │ └── webapp │ │ ├── WEB-INF │ │ └── web.xml │ │ └── index.jsp │ └── test │ ├── java │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── logging │ │ └── webapp │ │ └── ITLoggingWebapp.java │ └── resources │ ├── core-site.xml │ ├── flume.properties │ ├── hive-site.xml │ └── log4j.properties ├── logging ├── README.md ├── flume.properties ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── kitesdk │ │ │ └── examples │ │ │ └── logging │ │ │ ├── App.java │ │ │ ├── CreateDataset.java │ │ │ ├── DeleteDataset.java │ │ │ └── ReadDataset.java │ └── resources │ │ ├── core-site.xml │ │ ├── event.avsc │ │ ├── hive-site.xml │ │ └── log4j.properties │ └── test │ ├── java │ └── org │ │ └── kitesdk │ │ └── examples │ │ └── logging │ │ └── ITLogging.java │ └── resources │ ├── core-site.xml │ ├── flume.properties │ ├── hive-site.xml │ └── log4j.properties └── spark ├── README.md ├── pom.xml └── src └── main ├── avro ├── correlated_events.avsc └── standard_event.avsc ├── java └── org │ └── kitesdk │ └── examples │ └── spark │ ├── BaseEventsTool.java │ ├── CorrelateEvents.java │ ├── CorrelateEventsTask.java │ └── CreateEvents.java ├── resources ├── core-site.xml ├── hive-site.xml └── log4j.properties └── scala └── org └── kitesdk └── examples └── spark └── AvroKyroRegistrator.scala /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .settings 3 | .project 4 | target 5 | build 6 | test-output 7 | .surefire-* 8 | .DS_Store 9 | .idea 10 | *.iml -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | This product includes software developed by Cloudera, Inc. 2 | (http://www.cloudera.com/). -------------------------------------------------------------------------------- /dataset-compatibility/src/main/java/org/kitesdk/examples/data/ReadMovies.java: -------------------------------------------------------------------------------- 1 | package org.kitesdk.examples.data; 2 | 3 | import org.apache.hadoop.conf.Configured; 4 | import org.apache.hadoop.util.Tool; 5 | import org.apache.hadoop.util.ToolRunner; 6 | import org.kitesdk.data.Dataset; 7 | import org.kitesdk.data.DatasetReader; 8 | import org.kitesdk.data.Datasets; 9 | 10 | import static org.apache.avro.generic.GenericData.Record; 11 | 12 | public class ReadMovies extends Configured implements Tool { 13 | 14 | @Override 15 | public int run(String[] args) throws Exception { 16 | Dataset movies = Datasets.load( 17 | "dataset:hdfs:/tmp/data/movies", Record.class); 18 | 19 | DatasetReader reader = null; 20 | try { 21 | reader = movies.newReader(); 22 | for (Record rec : reader) { 23 | System.err.println("Movie: " + rec); 24 | } 25 | 26 | } finally { 27 | if (reader != null) { 28 | reader.close(); 29 | } 30 | } 31 | 32 | return 0; 33 | } 34 | 35 | public static void main(String... args) throws Exception { 36 | int rc = ToolRunner.run(new ReadMovies(), args); 37 | System.exit(rc); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /dataset-compatibility/src/main/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | fs.default.name 20 | hdfs://quickstart.cloudera/ 21 | 22 | 23 | -------------------------------------------------------------------------------- /dataset-compatibility/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # by default, log at INFO to the console 2 | log4j.rootLogger = INFO, console 3 | 4 | # Set the appender named console to be a ConsoleAppender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | 7 | # Define the layout for console appender 8 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p :: %m%n 10 | 11 | # silence native code warnings 12 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 13 | 14 | # allow DEBUG messages from Kite examples 15 | log4j.logger.org.kitesdk.examples=DEBUG 16 | log4j.logger.org.kitesdk.data.filesystem=DEBUG 17 | log4j.logger.org.kitesdk.data.hcatalog=DEBUG 18 | log4j.logger.org.kitesdk.data.hbase=DEBUG 19 | -------------------------------------------------------------------------------- /dataset-hbase/src/main/avro/user.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "User", 4 | "namespace": "org.kitesdk.examples.data", 5 | "doc": "A user record", 6 | "fields": [ 7 | { 8 | "name": "username", 9 | "type": "string", 10 | "mapping": { "type": "key", "value": "0" } 11 | }, 12 | { 13 | "name": "creationDate", 14 | "type": "long", 15 | "mapping": { "type": "column", "value": "meta:creationDate" } 16 | }, 17 | { 18 | "name": "favoriteColor", 19 | "type": "string", 20 | "mapping": { "type": "column", "value": "meta:favoriteColor" } 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /dataset-hbase/src/main/avro/user.avsc.invalid-migration: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "User", 4 | "namespace": "org.kitesdk.examples.data", 5 | "doc": "A user record", 6 | "fields": [ 7 | { 8 | "name": "username", 9 | "type": "string", 10 | "mapping": { "type": "key", "value": "0" } 11 | }, 12 | { 13 | "name": "creationDate", 14 | "type": "long", 15 | "mapping": { "type": "column", "value": "meta:creationDate" } 16 | }, 17 | { 18 | "name": "favoriteColor", 19 | "type": "string", 20 | "mapping": { "type": "column", "value": "meta:favoriteColor" } 21 | }, 22 | { 23 | "name": "age", 24 | "type": "int", 25 | "mapping": { "type": "column", "value": "meta:age" } 26 | } 27 | ] 28 | } -------------------------------------------------------------------------------- /dataset-hbase/src/main/avro/user.avsc.valid-migration: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "User", 4 | "namespace": "org.kitesdk.examples.data", 5 | "doc": "A user record", 6 | "fields": [ 7 | { 8 | "name": "username", 9 | "type": "string", 10 | "mapping": { "type": "key", "value": "0" } 11 | }, 12 | { 13 | "name": "creationDate", 14 | "type": "long", 15 | "mapping": { "type": "column", "value": "meta:creationDate" } 16 | }, 17 | { 18 | "name": "favoriteColor", 19 | "type": "string", 20 | "mapping": { "type": "column", "value": "meta:favoriteColor" } 21 | }, 22 | { 23 | "name": "age", 24 | "type": "int", 25 | "default": 0, 26 | "mapping": { "type": "column", "value": "meta:age" } 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /dataset-hbase/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # by default, log at INFO to the console 2 | log4j.rootLogger = INFO, console 3 | 4 | # Set the appender named console to be a ConsoleAppender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | 7 | # Define the layout for console appender 8 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p :: %m%n 10 | 11 | # silence native code warnings 12 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 13 | 14 | # allow DEBUG messages from Kite examples 15 | log4j.logger.org.kitesdk.examples=DEBUG 16 | log4j.logger.org.kitesdk.data.filesystem=DEBUG 17 | log4j.logger.org.kitesdk.data.hcatalog=DEBUG 18 | log4j.logger.org.kitesdk.data.hbase=DEBUG 19 | -------------------------------------------------------------------------------- /dataset-staging/src/main/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | fs.default.name 20 | hdfs://quickstart.cloudera/ 21 | 22 | 23 | -------------------------------------------------------------------------------- /dataset-staging/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # by default, log at INFO to the console 2 | log4j.rootLogger = INFO, console 3 | 4 | # Set the appender named console to be a ConsoleAppender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | 7 | # Define the layout for console appender 8 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p :: %m%n 10 | 11 | # silence native code warnings 12 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 13 | 14 | # allow DEBUG messages from Kite examples 15 | log4j.logger.org.kitesdk.examples=DEBUG 16 | log4j.logger.org.kitesdk.data.filesystem=DEBUG 17 | log4j.logger.org.kitesdk.data.hcatalog=DEBUG 18 | log4j.logger.org.kitesdk.data.hbase=DEBUG 19 | -------------------------------------------------------------------------------- /dataset-staging/src/main/resources/simple-log.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "simple.log", 4 | "namespace": "org.kitesdk.examples.staging", 5 | "doc": "A simple log message with basic metadata", 6 | "fields": [ 7 | { "name": "timestamp", "type": "long" }, 8 | { "name": "level", "type": "string", "default": "INFO" }, 9 | { "name": "component", "type": "string", "default": "" }, 10 | { "name": "message", "type": "string", "default": "" } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /dataset/src/main/java/org/kitesdk/examples/data/DeleteHiveUserDataset.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Cloudera Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.kitesdk.examples.data; 17 | 18 | import org.apache.hadoop.conf.Configured; 19 | import org.apache.hadoop.util.Tool; 20 | import org.apache.hadoop.util.ToolRunner; 21 | import org.kitesdk.data.Datasets; 22 | 23 | /** 24 | * Delete the users dataset and HCatalog metadata. 25 | */ 26 | public class DeleteHiveUserDataset extends Configured implements Tool { 27 | 28 | @Override 29 | public int run(String[] args) throws Exception { 30 | // Delete the users dataset 31 | boolean success = Datasets.delete("dataset:hive?dataset=users"); 32 | 33 | return success ? 0 : 1; 34 | } 35 | 36 | public static void main(String... args) throws Exception { 37 | int rc = ToolRunner.run(new DeleteHiveUserDataset(), args); 38 | System.exit(rc); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /dataset/src/main/java/org/kitesdk/examples/data/DeleteProductDataset.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Cloudera Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.kitesdk.examples.data; 17 | 18 | import org.apache.hadoop.conf.Configured; 19 | import org.apache.hadoop.util.Tool; 20 | import org.apache.hadoop.util.ToolRunner; 21 | import org.kitesdk.data.Datasets; 22 | 23 | /** 24 | * Delete the products dataset. 25 | */ 26 | public class DeleteProductDataset extends Configured implements Tool { 27 | 28 | @Override 29 | public int run(String[] args) throws Exception { 30 | // Delete the products dataset 31 | boolean success = Datasets.delete("dataset:hdfs:/tmp/data/products"); 32 | 33 | return success ? 0 : 1; 34 | } 35 | 36 | public static void main(String... args) throws Exception { 37 | int rc = ToolRunner.run(new DeleteProductDataset(), args); 38 | System.exit(rc); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /dataset/src/main/java/org/kitesdk/examples/data/DeleteUserDataset.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Cloudera Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.kitesdk.examples.data; 17 | 18 | import org.apache.hadoop.conf.Configured; 19 | import org.apache.hadoop.util.Tool; 20 | import org.apache.hadoop.util.ToolRunner; 21 | import org.kitesdk.data.Datasets; 22 | 23 | /** 24 | * Delete the users dataset. 25 | */ 26 | public class DeleteUserDataset extends Configured implements Tool { 27 | 28 | @Override 29 | public int run(String[] args) throws Exception { 30 | // Delete the users dataset 31 | boolean success = Datasets.delete("dataset:hdfs:/tmp/data/users"); 32 | 33 | return success ? 0 : 1; 34 | } 35 | 36 | public static void main(String... args) throws Exception { 37 | int rc = ToolRunner.run(new DeleteUserDataset(), args); 38 | System.exit(rc); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /dataset/src/main/java/org/kitesdk/examples/data/Product.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Cloudera Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.kitesdk.examples.data; 17 | 18 | import com.google.common.base.Objects; 19 | 20 | /** 21 | * A POJO representing a product. 22 | */ 23 | public class Product { 24 | private String name; 25 | private long id; 26 | 27 | public String getName() { 28 | return name; 29 | } 30 | 31 | public void setName(String name) { 32 | this.name = name; 33 | } 34 | 35 | public long getId() { 36 | return id; 37 | } 38 | 39 | public void setId(long id) { 40 | this.id = id; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return Objects.toStringHelper(this) 46 | .add("name", name) 47 | .add("id", id) 48 | .toString(); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /dataset/src/main/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | fs.default.name 20 | hdfs://quickstart.cloudera/ 21 | 22 | 23 | -------------------------------------------------------------------------------- /dataset/src/main/resources/hive-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | hive.metastore.uris 20 | thrift://localhost:9083 21 | 22 | 23 | hive.metastore.warehouse.dir 24 | /user/hive/warehouse 25 | 26 | 27 | 29 | 35 | -------------------------------------------------------------------------------- /dataset/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # by default, log at INFO to the console 2 | log4j.rootLogger = INFO, console 3 | 4 | # Set the appender named console to be a ConsoleAppender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | 7 | # Define the layout for console appender 8 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p :: %m%n 10 | 11 | # silence native code warnings 12 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 13 | 14 | # allow DEBUG messages from Kite examples 15 | log4j.logger.org.kitesdk.examples=DEBUG 16 | log4j.logger.org.kitesdk.data.spi.filesystem.FileSystemDatasetReader=DEBUG 17 | log4j.logger.org.kitesdk.data.hcatalog=INFO 18 | log4j.logger.org.kitesdk.data.hbase=INFO 19 | -------------------------------------------------------------------------------- /dataset/src/main/resources/user.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "User", 4 | "namespace": "org.kitesdk.examples.data", 5 | "doc": "A user record", 6 | "fields": [ 7 | { 8 | "name": "username", 9 | "type": "string" 10 | }, 11 | { 12 | "name": "creationDate", 13 | "type": "long" 14 | }, 15 | { 16 | "name": "favoriteColor", 17 | "type": "string" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /dataset/src/test/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | fs.default.name 20 | hdfs://localhost/ 21 | 22 | 23 | -------------------------------------------------------------------------------- /dataset/src/test/resources/hive-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | hive.metastore.uris 20 | thrift://localhost:9083 21 | 22 | 23 | hive.metastore.warehouse.dir 24 | /user/hive/warehouse 25 | 26 | 27 | -------------------------------------------------------------------------------- /dataset/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2013 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # this file is needed so that we replace log4j.properties from the logging example jar 18 | # and configure the Flume Log4jAppender programmatically 19 | 20 | log4j.rootCategory = INFO, out 21 | 22 | log4j.appender.out = org.apache.log4j.ConsoleAppender 23 | log4j.appender.out.layout = org.apache.log4j.PatternLayout 24 | log4j.appender.out.layout.ConversionPattern = %d (%t) [%p - %l] %m%n 25 | 26 | 27 | -------------------------------------------------------------------------------- /demo/bin/simulate-activity.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Cloudera Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | USER_ID=$1 19 | REPEATS=${2:-100000} 20 | COOKIE_FILE=/tmp/user_${USER_ID}_cookies.txt 21 | rm -f $COOKIE_FILE 22 | 23 | for (( i=1; i<=$REPEATS; i++ )) 24 | do 25 | curl -sL -w "%{http_code} %{url_effective}\\n" \ 26 | -o /dev/null \ 27 | -b $COOKIE_FILE \ 28 | -c $COOKIE_FILE \ 29 | "http://quickstart.cloudera:8034/demo-logging-webapp/send?user_id=$USER_ID&message=message$i" 30 | sleep $(printf ".%02ds" $(( $RANDOM % 1000 ))) 31 | done 32 | -------------------------------------------------------------------------------- /demo/demo-core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 18 | 19 | 20 | 21 | 4.0.0 22 | 23 | org.kitesdk.examples 24 | demo-core 25 | 1.1.0 26 | 27 | 28 | org.kitesdk.examples 29 | demo 30 | 1.1.0 31 | 32 | 33 | jar 34 | 35 | demo-core 36 | 37 | 38 | 39 | 40 | org.apache.avro 41 | avro-maven-plugin 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /demo/demo-core/src/main/avro/session.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Session", 3 | "namespace": "org.kitesdk.examples.demo.event", 4 | "type": "record", 5 | "doc": "A session derived from a standard event log.", 6 | "fields": [ 7 | { 8 | "name": "user_id", 9 | "type": "long", 10 | "doc": "A unique identifier for the user. Required." 11 | }, 12 | { 13 | "name": "session_id", 14 | "type": "string", 15 | "doc": "A unique identifier for the session. Required." 16 | }, 17 | { 18 | "name": "ip", 19 | "type": "string", 20 | "doc": "The IP address of the host where the event originated. Required." 21 | }, 22 | { 23 | "name": "start_timestamp", 24 | "type": "long", 25 | "doc": "The point in time when the session started, represented as the number of milliseconds since January 1, 1970, 00:00:00 GMT. Required." 26 | }, 27 | { 28 | "name": "duration", 29 | "type": "long", 30 | "doc": "The duration of the session in milliseconds. Required." 31 | }, 32 | { 33 | "name": "session_event_count", 34 | "type": "int", 35 | "doc": "The number of events that occurred in the session. Required." 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /demo/demo-core/src/main/avro/standard_event.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "StandardEvent", 3 | "namespace": "org.kitesdk.data.event", 4 | "type": "record", 5 | "doc": "A standard event type for logging, based on the paper 'The Unified Logging Infrastructure for Data Analytics at Twitter' by Lee et al, http://vldb.org/pvldb/vol5/p1771_georgelee_vldb2012.pdf", 6 | "fields": [ 7 | { 8 | "name": "event_initiator", 9 | "type": "string", 10 | "doc": "Where the event was triggered from in the format {client,server}_{user,app}, e.g. 'client_user'. Required." 11 | }, 12 | { 13 | "name": "event_name", 14 | "type": "string", 15 | "doc": "A hierarchical name for the event, with parts separated by ':'. Required." 16 | }, 17 | { 18 | "name": "user_id", 19 | "type": "long", 20 | "doc": "A unique identifier for the user. Required." 21 | }, 22 | { 23 | "name": "session_id", 24 | "type": "string", 25 | "doc": "A unique identifier for the session. Required." 26 | }, 27 | { 28 | "name": "ip", 29 | "type": "string", 30 | "doc": "The IP address of the host where the event originated. Required." 31 | }, 32 | { 33 | "name": "timestamp", 34 | "type": "long", 35 | "doc": "The point in time when the event occurred, represented as the number of milliseconds since January 1, 1970, 00:00:00 GMT. Required." 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /demo/demo-crunch/src/main/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | fs.default.name 20 | hdfs://quickstart.cloudera/ 21 | 22 | 23 | -------------------------------------------------------------------------------- /demo/demo-crunch/src/main/resources/hive-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | hive.metastore.uris 20 | thrift://quickstart.cloudera:9083 21 | 22 | 23 | -------------------------------------------------------------------------------- /demo/demo-crunch/src/main/resources/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | mapreduce.framework.name 24 | yarn 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /demo/demo-logging-webapp/src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 16 | 19 | 20 | 21 | Kite Example 22 | 23 | logging-servlet 24 | org.kitesdk.examples.demo.LoggingServlet 25 | 26 | 27 | 28 | logging-servlet 29 | /send/* 30 | 31 | 32 | 33 | index.jsp 34 | 35 | 36 | -------------------------------------------------------------------------------- /demo/demo-logging-webapp/src/main/webapp/index.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Copyright 2013 Cloudera Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | --%> 16 | 17 | 18 | Kite Example 19 | 20 | 21 |

Kite Example

22 |
23 | User ID: 24 | Message: 25 | 26 |
27 | 28 | 29 | -------------------------------------------------------------------------------- /demo/demo-reports-webapp/src/main/webapp/WEB-INF/context.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /demo/demo-reports-webapp/src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 16 | 21 | 22 | 23 | Impala JDBC 24 | jdbc/impala 25 | javax.sql.DataSource 26 | Container 27 | 28 | 29 | index.jsp 30 | 31 | 32 | -------------------------------------------------------------------------------- /demo/demo-reports-webapp/src/main/webapp/index.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Copyright 2013 Cloudera Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | --%> 16 | <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> 17 | <%@taglib uri="http://java.sun.com/jsp/jstl/sql" prefix="sql" %> 18 | 19 | 20 | Kite Demo 21 | 22 | 23 |

Kite Demo

24 |

25 | Available reports: 26 |

27 |