├── .gitignore ├── LICENSE ├── README.md ├── TwitterStreaming └── src │ ├── app │ ├── sparkml_train_model.py │ ├── tweet_stream_consumer.py │ ├── tweet_stream_producer.py │ └── twitter_credentials.py │ └── models │ ├── metadata │ ├── .part-00000.crc │ ├── _SUCCESS │ └── part-00000 │ └── stages │ ├── 0_StopWordsRemover_8c03fb6588e2 │ └── metadata │ │ ├── .part-00000.crc │ │ ├── _SUCCESS │ │ └── part-00000 │ ├── 1_CountVectorizer_7b5edde58198 │ ├── data │ │ ├── .part-00000-303a974b-0804-4c09-bf69-ab4d75d7c22a-c000.snappy.parquet.crc │ │ ├── _SUCCESS │ │ └── part-00000-303a974b-0804-4c09-bf69-ab4d75d7c22a-c000.snappy.parquet │ └── metadata │ │ ├── .part-00000.crc │ │ ├── _SUCCESS │ │ └── part-00000 │ ├── 2_IDF_ea5017f27b57 │ ├── data │ │ ├── .part-00000-475278fe-9e19-48d2-8669-7e9328854a90-c000.snappy.parquet.crc │ │ ├── _SUCCESS │ │ └── part-00000-475278fe-9e19-48d2-8669-7e9328854a90-c000.snappy.parquet │ └── metadata │ │ ├── .part-00000.crc │ │ ├── _SUCCESS │ │ └── part-00000 │ ├── 3_StringIndexer_45535689b707 │ ├── data │ │ ├── .part-00000-0e3c9fa5-51b9-48e8-af15-899663b0f27c-c000.snappy.parquet.crc │ │ ├── _SUCCESS │ │ └── part-00000-0e3c9fa5-51b9-48e8-af15-899663b0f27c-c000.snappy.parquet │ └── metadata │ │ ├── .part-00000.crc │ │ ├── _SUCCESS │ │ └── part-00000 │ └── 4_LogisticRegression_ed84056a763e │ ├── data │ ├── .part-00000-800e9de8-576c-459d-ad5c-a34bc8d5c915-c000.snappy.parquet.crc │ ├── _SUCCESS │ └── part-00000-800e9de8-576c-459d-ad5c-a34bc8d5c915-c000.snappy.parquet │ └── metadata │ ├── .part-00000.crc │ ├── _SUCCESS │ └── part-00000 └── images ├── Architecture.png └── MLPipeline.png /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/README.md -------------------------------------------------------------------------------- /TwitterStreaming/src/app/sparkml_train_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/app/sparkml_train_model.py -------------------------------------------------------------------------------- /TwitterStreaming/src/app/tweet_stream_consumer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/app/tweet_stream_consumer.py -------------------------------------------------------------------------------- /TwitterStreaming/src/app/tweet_stream_producer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/app/tweet_stream_producer.py -------------------------------------------------------------------------------- /TwitterStreaming/src/app/twitter_credentials.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/app/twitter_credentials.py -------------------------------------------------------------------------------- /TwitterStreaming/src/models/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/metadata/.part-00000.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/metadata/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/metadata/part-00000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/metadata/part-00000 -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/0_StopWordsRemover_8c03fb6588e2/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/0_StopWordsRemover_8c03fb6588e2/metadata/.part-00000.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/0_StopWordsRemover_8c03fb6588e2/metadata/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/0_StopWordsRemover_8c03fb6588e2/metadata/part-00000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/0_StopWordsRemover_8c03fb6588e2/metadata/part-00000 -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/data/.part-00000-303a974b-0804-4c09-bf69-ab4d75d7c22a-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/data/.part-00000-303a974b-0804-4c09-bf69-ab4d75d7c22a-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/data/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/data/part-00000-303a974b-0804-4c09-bf69-ab4d75d7c22a-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/data/part-00000-303a974b-0804-4c09-bf69-ab4d75d7c22a-c000.snappy.parquet -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/metadata/.part-00000.crc: -------------------------------------------------------------------------------- 1 | crc#nl{ -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/metadata/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/metadata/part-00000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/1_CountVectorizer_7b5edde58198/metadata/part-00000 -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/data/.part-00000-475278fe-9e19-48d2-8669-7e9328854a90-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/data/.part-00000-475278fe-9e19-48d2-8669-7e9328854a90-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/data/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/data/part-00000-475278fe-9e19-48d2-8669-7e9328854a90-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/data/part-00000-475278fe-9e19-48d2-8669-7e9328854a90-c000.snappy.parquet -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/metadata/.part-00000.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/metadata/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/metadata/part-00000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/2_IDF_ea5017f27b57/metadata/part-00000 -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/data/.part-00000-0e3c9fa5-51b9-48e8-af15-899663b0f27c-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/data/.part-00000-0e3c9fa5-51b9-48e8-af15-899663b0f27c-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/data/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/data/part-00000-0e3c9fa5-51b9-48e8-af15-899663b0f27c-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/data/part-00000-0e3c9fa5-51b9-48e8-af15-899663b0f27c-c000.snappy.parquet -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/metadata/.part-00000.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/metadata/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/metadata/part-00000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/3_StringIndexer_45535689b707/metadata/part-00000 -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/data/.part-00000-800e9de8-576c-459d-ad5c-a34bc8d5c915-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/data/.part-00000-800e9de8-576c-459d-ad5c-a34bc8d5c915-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/data/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/data/part-00000-800e9de8-576c-459d-ad5c-a34bc8d5c915-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/data/part-00000-800e9de8-576c-459d-ad5c-a34bc8d5c915-c000.snappy.parquet -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/metadata/.part-00000.crc -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/metadata/_SUCCESS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/metadata/part-00000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/TwitterStreaming/src/models/stages/4_LogisticRegression_ed84056a763e/metadata/part-00000 -------------------------------------------------------------------------------- /images/Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/images/Architecture.png -------------------------------------------------------------------------------- /images/MLPipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pran4ajith/spark-twitter-streaming/HEAD/images/MLPipeline.png --------------------------------------------------------------------------------