├── visualizations ├── TotalTweets.json ├── Sentiments.json ├── PIELanguage.json ├── TrendingHashTags.json ├── TwitterSentimentAnalysis_dashboard.json ├── TextBasedSentiment.json └── Quarterhourly-Analysis.json ├── .gitignore ├── src ├── test │ └── java │ │ └── com │ │ └── stdatalabs │ │ └── SparkES │ │ └── AppTest.java └── main │ └── scala │ └── com │ └── stdatalabs │ └── SparkES │ ├── SentimentUtils.scala │ └── TwitterSentimentAnalysis.scala ├── README.md ├── pom.xml └── dependency-reduced-pom.xml /visualizations/TotalTweets.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "Total-Tweets", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "Total Tweets", 7 | "visState": "{\"type\":\"metric\",\"params\":{\"fontSize\":60},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}],\"listeners\":{}}", 8 | "description": "", 9 | "version": 1, 10 | "kibanaSavedObjectMeta": { 11 | "searchSourceJSON": "{\"index\":\"twitter_020717\",\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"filter\":[]}" 12 | } 13 | } 14 | } 15 | ] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .*/ 3 | target/ 4 | .classpath 5 | .cache-main 6 | .cache-tests 7 | .settings 8 | .project 9 | tweets.txt 10 | checkpoint/ 11 | 12 | # Compiled source # 13 | ################### 14 | *.com 15 | *.class 16 | *.dll 17 | *.exe 18 | *.o 19 | *.so 20 | checkpoint 21 | 22 | # Packages # 23 | ############ 24 | # it's better to unpack these files and commit the raw source 25 | # git has its own built in compression methods 26 | *.7z 27 | *.dmg 28 | *.gz 29 | *.iso 30 | *.jar 31 | *.rar 32 | *.tar 33 | *.zip 34 | 35 | # Logs and databases # 36 | ###################### 37 | *.log 38 | *.sql 39 | *.sqlite 40 | 41 | # OS generated files # 42 | ###################### 43 | .DS_Store 44 | .DS_Store? 45 | ._* 46 | .Spotlight-V100 47 | .Trashes 48 | ehthumbs.db 49 | Thumbs.db 50 | -------------------------------------------------------------------------------- /visualizations/Sentiments.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "Sentiments", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "Sentiments", 7 | "visState": "{\"type\":\"table\",\"params\":{\"perPage\":5,\"showMeticsAtAllLevels\":false,\"showPartialRows\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"sentiment\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", 8 | "description": "", 9 | "version": 1, 10 | "kibanaSavedObjectMeta": { 11 | "searchSourceJSON": "{\"index\":\"twitter_020717\",\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}},\"filter\":[]}" 12 | } 13 | } 14 | } 15 | ] -------------------------------------------------------------------------------- /src/test/java/com/stdatalabs/SparkES/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.stdatalabs.SparkES; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /visualizations/PIELanguage.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "PIE-Sentiment-Language", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "PIE Sentiment Language", 7 | "visState": "{\"aggs\":[{\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"id\":\"3\",\"params\":{\"field\":\"language\",\"order\":\"desc\",\"orderBy\":\"1\",\"size\":5},\"schema\":\"segment\",\"type\":\"terms\"},{\"id\":\"4\",\"params\":{\"field\":\"sentiment\",\"order\":\"desc\",\"orderBy\":\"1\",\"size\":5},\"schema\":\"segment\",\"type\":\"terms\"}],\"listeners\":{},\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"shareYAxis\":true},\"type\":\"pie\"}", 8 | "description": "", 9 | "version": 1, 10 | "kibanaSavedObjectMeta": { 11 | "searchSourceJSON": "{\"index\":\"twitter_020717\",\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}},\"filter\":[]}" 12 | } 13 | } 14 | } 15 | ] -------------------------------------------------------------------------------- /visualizations/TrendingHashTags.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "Trending-HashTags", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "Trending HashTags", 7 | "visState": "{\"type\":\"pie\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"isDonut\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"hashtags\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"custom\",\"orderAgg\":{\"id\":\"2-orderAgg\",\"type\":\"count\",\"schema\":\"orderAgg\",\"params\":{}}}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", 8 | "description": "", 9 | "version": 1, 10 | "kibanaSavedObjectMeta": { 11 | "searchSourceJSON": "{\"index\":\"twitter_020717\",\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"filter\":[]}" 12 | } 13 | } 14 | } 15 | ] -------------------------------------------------------------------------------- /visualizations/TwitterSentimentAnalysis_dashboard.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "Twitter-Sentiment-DashBoard-020717", 4 | "_type": "dashboard", 5 | "_source": { 6 | "title": "Twitter Sentiment DashBoard - 020717", 7 | "hits": 0, 8 | "description": "", 9 | "panelsJSON": "[{\"col\":1,\"id\":\"PIE-Sentiment-Language\",\"row\":9,\"size_x\":5,\"size_y\":4,\"type\":\"visualization\"},{\"col\":6,\"id\":\"Trending-HashTags\",\"row\":9,\"size_x\":7,\"size_y\":4,\"type\":\"visualization\"},{\"col\":5,\"id\":\"QuarterHourly-Analysis\",\"row\":1,\"size_x\":8,\"size_y\":4,\"type\":\"visualization\"},{\"col\":1,\"id\":\"Total-Tweets\",\"row\":5,\"size_x\":4,\"size_y\":4,\"type\":\"visualization\"},{\"col\":1,\"id\":\"Sentiments\",\"row\":1,\"size_x\":4,\"size_y\":4,\"type\":\"visualization\"},{\"id\":\"Text-Based-Sentiment\",\"type\":\"visualization\",\"size_x\":8,\"size_y\":4,\"col\":5,\"row\":5}]", 10 | "version": 1, 11 | "timeRestore": false, 12 | "kibanaSavedObjectMeta": { 13 | "searchSourceJSON": "{\"filter\":[{\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}}}]}" 14 | } 15 | } 16 | } 17 | ] -------------------------------------------------------------------------------- /visualizations/TextBasedSentiment.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "Text-Based-Sentiment", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "Text Based Sentiment", 7 | "visState": "{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"text\",\"include\":{\"flags\":[\"CASE_INSENSITIVE\"],\"pattern\":\"Transport||Health||Energy||Education\"},\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", 8 | "description": "", 9 | "version": 1, 10 | "kibanaSavedObjectMeta": { 11 | "searchSourceJSON": "{\"index\":\"twitter_020717\",\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"filter\":[]}" 12 | } 13 | } 14 | } 15 | ] -------------------------------------------------------------------------------- /visualizations/Quarterhourly-Analysis.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "QuarterHourly-Analysis", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "QuarterHourly Analysis", 7 | "visState": "{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"showCircles\":true,\"smoothLines\":false,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":\"55\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"created_at\",\"interval\":\"custom\",\"customInterval\":\"15m\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"sentiment\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", 8 | "description": "", 9 | "version": 1, 10 | "kibanaSavedObjectMeta": { 11 | "searchSourceJSON": "{\"index\":\"twitter_020717\",\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"filter\":[]}" 12 | } 13 | } 14 | } 15 | ] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SparkTwitterPopularHashTags 2 | 3 | A project on Spark Streaming to analyze Popular hashtags from live twitter data streams. Data is ingested from different input sources like Twitter source, Flume and Kafka and processed downstream using Spark Streaming. 4 | 5 | ## Requirements 6 | - IDE 7 | - Apache Maven 3.x 8 | - JVM 6 or 7 9 | 10 | ## General Info 11 | The source folder is organized into 2 packages i.e. Kafka and Streaming. Each class in the Streaming package explores different approach to consume data from Twitter source. Below is the list of classes: 12 | * com/stdatalabs/Kafka 13 | * KafkaTwitterProducer.java -- A Kafka Producer that publishes twitter data to a kafka broker 14 | * com/stdatalabs/Streaming 15 | * SparkPopularHashTags.scala -- Receives data from Twitter datasource 16 | * FlumeSparkPopularHashTags.scala -- Receives data from Flume Twitter producer 17 | * KafkaSparkPopularHashTags.scala -- Receives data from Kafka Producer 18 | * RecoverableKafkaPopularHashTags.scala -- Spark-Kafka receiver based approach. Ensures at-least once semantics 19 | * KafkaDirectPopularHashTags.scala -- Spark-Kafka Direct approach. Ensures exactly once semantics 20 | * TwitterAvroSource.conf 21 | -- Flume conf for running Twitter avro source 22 | 23 | ## Description 24 | * ##### A Spark Streaming application that receives tweets on certain keywords from twitter datasource and finds the popular hashtags. 25 | Discussed in blog -- 26 | [Spark Streaming part 1: Real time twitter sentiment analysis](http://stdatalabs.blogspot.in/2016/09/spark-streaming-part-1-real-time.html) 27 | 28 | * ##### A Spark Streaming - Flume integration to find Popular hashtags from twitter. It receives events from a Flume source that connects to twitter and pushes tweets as avro events to sink. 29 | Discussed in blog -- 30 | [Spark streaming part 2: Real time twitter sentiment analysis using Flume](http://stdatalabs.blogspot.in/2016/09/spark-streaming-part-2-real-time_10.html) 31 | 32 | * ##### A Spark Streaming - Kafka integration to receive twitter data from kafka producer and find the popular hashtags 33 | Discussed in blog -- 34 | [Spark streaming part 3: Real time twitter sentiment analysis using kafka](http://stdatalabs.blogspot.in/2016/09/spark-streaming-part-3-real-time.html) 35 | 36 | * ##### A Spark Streaming - Kafka integration to ensure at-least once semantics 37 | Discussed in blog -- 38 | [Data guarantees in Spark Streaming with kafka integration](http://stdatalabs.blogspot.in/2016/10/data-guarantees-in-spark-streaming-with.html) 39 | 40 | * ##### A Spark Streaming - Kafka integration to ensure exactly once semantics 41 | Discussed in blog -- 42 | [Data guarantees in Spark Streaming with kafka integration](http://stdatalabs.blogspot.in/2016/10/data-guarantees-in-spark-streaming-with.html) 43 | 44 | 45 | 46 | ### More articles on hadoop technology stack at [stdatalabs](stdatalabs.blogspot.com) 47 | 48 | -------------------------------------------------------------------------------- /src/main/scala/com/stdatalabs/SparkES/SentimentUtils.scala: -------------------------------------------------------------------------------- 1 | package com.stdatalabs.SparkES 2 | 3 | import java.util.Properties 4 | 5 | import edu.stanford.nlp.ling.CoreAnnotations 6 | import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations 7 | import edu.stanford.nlp.pipeline.StanfordCoreNLP 8 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations 9 | 10 | import scala.collection.JavaConversions._ 11 | import scala.collection.mutable.ListBuffer 12 | 13 | object SentimentUtils { 14 | 15 | val nlpPropts = { 16 | val propts = new Properties() 17 | /* Annotators - Meaning http://corenlp.run/ 18 | tokenize - Tokenize the sentence. 19 | ssplit - Split the text into sentence. Identify fullstop, exclamation etc and split sentences 20 | pos - Reads text in some language and assigns parts of speech to each word (and other token), such as noun, verb, adjective, etc. 21 | lemma - Group together the different inflected forms of a word so they can be analysed as a single item. 22 | parse - Provide syntactic analysis http://nlp.stanford.edu:8080/parser/index.jsp 23 | sentiment - Provide model for sentiment analysis 24 | * */ 25 | propts.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment") 26 | propts 27 | } 28 | 29 | def detectSentiment(message: String): String = { 30 | 31 | // Create a pipeline with NLP properties 32 | val pipeline = new StanfordCoreNLP(nlpPropts) 33 | 34 | // Run message through the Pipeline 35 | val annotation = pipeline.process(message) 36 | var sentiments: ListBuffer[Double] = ListBuffer() 37 | var sizes: ListBuffer[Int] = ListBuffer() 38 | 39 | var longest = 0 40 | var mainSentiment = 0 41 | 42 | // An Annotation is a Map and you can get and use the various analyses individually. 43 | // For instance, this gets the parse tree of the first sentence in the text. 44 | // Iterate through tweet 45 | for (tweetMsg <- annotation.get(classOf[CoreAnnotations.SentencesAnnotation])) { 46 | // Create a RNN parse tree 47 | val parseTree = tweetMsg.get(classOf[SentimentCoreAnnotations.AnnotatedTree]) 48 | // Detect Sentiment 49 | val tweetSentiment = RNNCoreAnnotations.getPredictedClass(parseTree) 50 | val partText = tweetMsg.toString 51 | 52 | if (partText.length() > longest) { 53 | mainSentiment = tweetSentiment 54 | longest = partText.length() 55 | } 56 | 57 | sentiments += tweetSentiment.toDouble 58 | sizes += partText.length 59 | } 60 | 61 | val weightedSentiments = (sentiments, sizes).zipped.map((sentiment, size) => sentiment * size) 62 | var weightedSentiment = weightedSentiments.sum / (sizes.fold(0)(_ + _)) 63 | 64 | if (weightedSentiment <= 0.0) 65 | "NOT_UNDERSTOOD" 66 | else if (weightedSentiment < 1.6) 67 | "NEGATIVE" 68 | else if (weightedSentiment <= 2.0) 69 | "NEUTRAL" 70 | else if (weightedSentiment < 5.0) 71 | "POSITIVE" 72 | else "NOT_UNDERSTOOD" 73 | } 74 | } -------------------------------------------------------------------------------- /src/main/scala/com/stdatalabs/SparkES/TwitterSentimentAnalysis.scala: -------------------------------------------------------------------------------- 1 | package com.stdatalabs.SparkES 2 | 3 | import com.stdatalabs.SparkES.SentimentUtils._ 4 | import org.apache.spark.SparkConf 5 | import org.apache.spark.streaming.twitter._ 6 | import org.apache.spark.streaming.{Seconds, StreamingContext} 7 | import org.elasticsearch.spark._ 8 | import org.apache.spark.SparkContext 9 | import org.apache.spark.SparkContext._ 10 | 11 | import java.util.Date 12 | import java.text.SimpleDateFormat 13 | import java.util.Locale 14 | 15 | import scala.util.Try 16 | 17 | /** 18 | * Twitter Sentiment analysis using Stanford core-nlp library 19 | * and storing results in elastic search 20 | * 21 | * Arguments: ... 22 | * - Twitter consumer key 23 | * - Twitter consumer secret 24 | * - Twitter access token 25 | * - Twitter access token secret 26 | * - The kafka topic to subscribe to 27 | * - The keyword to filter tweets 28 | * - Any number of keywords to filter tweets 29 | * 30 | * 31 | * @author Sachin Thirumala 32 | */ 33 | 34 | object TwitterSentimentAnalysis { 35 | 36 | def main(args: Array[String]) { 37 | 38 | if (args.length < 4) { 39 | System.err.println("Usage: TwitterSentimentAnalysis " + 40 | " []") 41 | System.exit(1) 42 | } 43 | 44 | val Array(consumerKey, consumerSecret, accessToken, accessTokenSecret) = args.take(4) 45 | val filters = args.takeRight(args.length - 4) 46 | 47 | // set twitter oAuth keys 48 | System.setProperty("twitter4j.oauth.consumerKey", consumerKey) 49 | System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret) 50 | System.setProperty("twitter4j.oauth.accessToken", accessToken) 51 | System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret) 52 | 53 | val conf = new SparkConf().setAppName("TwitterSentimentAnalysis") 54 | 55 | // Create a DStream for every 5 seconds 56 | val ssc = new StreamingContext(conf, Seconds(5)) 57 | 58 | // Get json object from twitter stream 59 | val tweets = TwitterUtils.createStream(ssc, None, filters) 60 | 61 | tweets.print() 62 | 63 | /* Extract and transform required columns from json object and also generate sentiment score for each tweet. 64 | * RDD can be saved into elasticsearch as long as the content can be translated to a document. 65 | * So each RDD should be transformed to a Map before storing in elasticsearch index twitter_082717/tweet. 66 | */ 67 | tweets.foreachRDD{(rdd, time) => 68 | rdd.map(t => { 69 | Map( 70 | "user"-> t.getUser.getScreenName, 71 | "created_at" -> t.getCreatedAt.getTime.toString, 72 | "location" -> Option(t.getGeoLocation).map(geo => { s"${geo.getLatitude},${geo.getLongitude}" }), 73 | "text" -> t.getText, 74 | "hashtags" -> t.getHashtagEntities.map(_.getText), 75 | "retweet" -> t.getRetweetCount, 76 | "language" -> t.getLang.toString(), 77 | "sentiment" -> detectSentiment(t.getText).toString 78 | ) 79 | }).saveToEs("twitter_020717/tweet") 80 | } 81 | 82 | 83 | ssc.start() 84 | ssc.awaitTermination() 85 | 86 | } 87 | } -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.stdatalabs 6 | SparkES 7 | 0.0.1-SNAPSHOT 8 | jar 9 | 10 | SparkES 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | cloudera 20 | https://repository.cloudera.com/artifactory/cloudera-repos/ 21 | 22 | 23 | 24 | 25 | 26 | junit 27 | junit 28 | 3.8.1 29 | test 30 | 31 | 32 | 33 | org.apache.spark 34 | spark-core_2.10 35 | 1.5.0 36 | provided 37 | 38 | 39 | 40 | org.apache.spark 41 | spark-streaming_2.10 42 | 1.5.0 43 | provided 44 | 45 | 46 | org.apache.spark 47 | spark-mllib_2.10 48 | 1.5.0 49 | provided 50 | 51 | 52 | org.apache.spark 53 | spark-sql_2.10 54 | 1.5.0 55 | provided 56 | 57 | 58 | org.apache.spark 59 | spark-hive_2.10 60 | 1.5.0 61 | provided 62 | 63 | 64 | org.twitter4j 65 | twitter4j-core 66 | 3.0.6 67 | 68 | 69 | 70 | org.twitter4j 71 | twitter4j-stream 72 | 3.0.6 73 | 74 | 75 | 76 | org.twitter4j 77 | twitter4j-async 78 | 3.0.6 79 | 80 | 81 | org.apache.spark 82 | spark-streaming-twitter_2.10 83 | 1.5.0-cdh5.5.0 84 | compile 85 | 86 | 87 | edu.stanford.nlp 88 | stanford-corenlp 89 | 3.4.1 90 | 91 | 92 | edu.stanford.nlp 93 | stanford-corenlp 94 | 3.4.1 95 | models 96 | 97 | 98 | 99 | org.elasticsearch 100 | elasticsearch-spark-13_2.10 101 | 5.4.0 102 | 103 | 104 | 105 | 106 | 107 | 108 | org.apache.maven.plugins 109 | maven-shade-plugin 110 | 2.4.3 111 | 112 | 113 | package 114 | 115 | shade 116 | 117 | 118 | 119 | 121 | com.stdatalabs.SparkES.TwitterSentimentAnalysis 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | com.stdatalabs 5 | SparkES 6 | SparkES 7 | 0.0.1-SNAPSHOT 8 | http://maven.apache.org 9 | 10 | 11 | 12 | maven-shade-plugin 13 | 2.4.3 14 | 15 | 16 | package 17 | 18 | shade 19 | 20 | 21 | 22 | 23 | com.stdatalabs.SparkES.TwitterSentimentAnalysis 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | cloudera 35 | https://repository.cloudera.com/artifactory/cloudera-repos/ 36 | 37 | 38 | 39 | 40 | junit 41 | junit 42 | 3.8.1 43 | test 44 | 45 | 46 | org.apache.spark 47 | spark-core_2.10 48 | 1.5.0 49 | provided 50 | 51 | 52 | avro-mapred 53 | org.apache.avro 54 | 55 | 56 | chill_2.10 57 | com.twitter 58 | 59 | 60 | chill-java 61 | com.twitter 62 | 63 | 64 | hadoop-client 65 | org.apache.hadoop 66 | 67 | 68 | spark-launcher_2.10 69 | org.apache.spark 70 | 71 | 72 | spark-network-common_2.10 73 | org.apache.spark 74 | 75 | 76 | spark-network-shuffle_2.10 77 | org.apache.spark 78 | 79 | 80 | spark-unsafe_2.10 81 | org.apache.spark 82 | 83 | 84 | jets3t 85 | net.java.dev.jets3t 86 | 87 | 88 | curator-recipes 89 | org.apache.curator 90 | 91 | 92 | javax.servlet 93 | org.eclipse.jetty.orbit 94 | 95 | 96 | commons-lang3 97 | org.apache.commons 98 | 99 | 100 | commons-math3 101 | org.apache.commons 102 | 103 | 104 | jsr305 105 | com.google.code.findbugs 106 | 107 | 108 | slf4j-api 109 | org.slf4j 110 | 111 | 112 | jul-to-slf4j 113 | org.slf4j 114 | 115 | 116 | jcl-over-slf4j 117 | org.slf4j 118 | 119 | 120 | log4j 121 | log4j 122 | 123 | 124 | slf4j-log4j12 125 | org.slf4j 126 | 127 | 128 | compress-lzf 129 | com.ning 130 | 131 | 132 | snappy-java 133 | org.xerial.snappy 134 | 135 | 136 | lz4 137 | net.jpountz.lz4 138 | 139 | 140 | RoaringBitmap 141 | org.roaringbitmap 142 | 143 | 144 | commons-net 145 | commons-net 146 | 147 | 148 | akka-remote_2.10 149 | com.typesafe.akka 150 | 151 | 152 | akka-slf4j_2.10 153 | com.typesafe.akka 154 | 155 | 156 | json4s-jackson_2.10 157 | org.json4s 158 | 159 | 160 | jersey-server 161 | com.sun.jersey 162 | 163 | 164 | jersey-core 165 | com.sun.jersey 166 | 167 | 168 | mesos 169 | org.apache.mesos 170 | 171 | 172 | netty-all 173 | io.netty 174 | 175 | 176 | stream 177 | com.clearspring.analytics 178 | 179 | 180 | metrics-core 181 | io.dropwizard.metrics 182 | 183 | 184 | metrics-jvm 185 | io.dropwizard.metrics 186 | 187 | 188 | metrics-json 189 | io.dropwizard.metrics 190 | 191 | 192 | metrics-graphite 193 | io.dropwizard.metrics 194 | 195 | 196 | jackson-databind 197 | com.fasterxml.jackson.core 198 | 199 | 200 | jackson-module-scala_2.10 201 | com.fasterxml.jackson.module 202 | 203 | 204 | ivy 205 | org.apache.ivy 206 | 207 | 208 | oro 209 | oro 210 | 211 | 212 | tachyon-client 213 | org.tachyonproject 214 | 215 | 216 | pyrolite 217 | net.razorvine 218 | 219 | 220 | py4j 221 | net.sf.py4j 222 | 223 | 224 | 225 | 226 | org.apache.spark 227 | spark-streaming_2.10 228 | 1.5.0 229 | provided 230 | 231 | 232 | org.apache.spark 233 | spark-mllib_2.10 234 | 1.5.0 235 | provided 236 | 237 | 238 | spark-graphx_2.10 239 | org.apache.spark 240 | 241 | 242 | breeze_2.10 243 | org.scalanlp 244 | 245 | 246 | pmml-model 247 | org.jpmml 248 | 249 | 250 | commons-math3 251 | org.apache.commons 252 | 253 | 254 | 255 | 256 | org.apache.spark 257 | spark-sql_2.10 258 | 1.5.0 259 | provided 260 | 261 | 262 | spark-catalyst_2.10 263 | org.apache.spark 264 | 265 | 266 | parquet-column 267 | org.apache.parquet 268 | 269 | 270 | parquet-hadoop 271 | org.apache.parquet 272 | 273 | 274 | jackson-databind 275 | com.fasterxml.jackson.core 276 | 277 | 278 | 279 | 280 | org.apache.spark 281 | spark-hive_2.10 282 | 1.5.0 283 | provided 284 | 285 | 286 | parquet-hadoop-bundle 287 | com.twitter 288 | 289 | 290 | hive-exec 291 | org.spark-project.hive 292 | 293 | 294 | hive-metastore 295 | org.spark-project.hive 296 | 297 | 298 | avro 299 | org.apache.avro 300 | 301 | 302 | commons-httpclient 303 | commons-httpclient 304 | 305 | 306 | calcite-avatica 307 | org.apache.calcite 308 | 309 | 310 | calcite-core 311 | org.apache.calcite 312 | 313 | 314 | httpclient 315 | org.apache.httpcomponents 316 | 317 | 318 | jackson-mapper-asl 319 | org.codehaus.jackson 320 | 321 | 322 | commons-codec 323 | commons-codec 324 | 325 | 326 | jodd-core 327 | org.jodd 328 | 329 | 330 | datanucleus-core 331 | org.datanucleus 332 | 333 | 334 | libthrift 335 | org.apache.thrift 336 | 337 | 338 | libfb303 339 | org.apache.thrift 340 | 341 | 342 | avro-mapred 343 | org.apache.avro 344 | 345 | 346 | jsr305 347 | com.google.code.findbugs 348 | 349 | 350 | 351 | 352 | 353 | UTF-8 354 | 355 | 356 | 357 | --------------------------------------------------------------------------------