├── .empty ├── Procfile ├── system.properties ├── .travis.yml ├── src ├── test │ ├── scala │ │ └── com │ │ │ └── eneco │ │ │ └── trading │ │ │ └── kafka │ │ │ └── connect │ │ │ └── twitter │ │ │ ├── TestDomain.scala │ │ │ ├── TestTwitterStatusReader.scala │ │ │ ├── TestTwitterSourceTask.scala │ │ │ ├── TestSinkTask.scala │ │ │ ├── TestTwitterSinkConfig.scala │ │ │ ├── TestTwitterBase.scala │ │ │ ├── TestTwitterSourceConfig.scala │ │ │ ├── TestTwitterSourceConnector.scala │ │ │ └── TestSimpleTwitterWriter.scala │ └── resources │ │ └── log4j.properties └── main │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── eneco │ └── trading │ └── kafka │ └── connect │ └── twitter │ ├── Logging.scala │ ├── Extensions.scala │ ├── TwitterSourceTask.scala │ ├── TwitterSinkConfig.scala │ ├── TwitterWriter.scala │ ├── TwitterSinkTask.scala │ ├── TwitterSinkConnector.scala │ ├── TwitterSourceConnector.scala │ ├── TwitterReader.scala │ ├── TwitterStatusReader.scala │ ├── domain │ └── TwitterStatus.scala │ └── TwitterSourceConfig.scala ├── .gitignore ├── twitter-source.properties.example ├── setup-confluent.sh ├── setup_certs ├── connect-simple-source-standalone.properties ├── connect-sink-standalone.properties ├── connect-source-standalone.properties ├── properties-generate ├── start ├── README.md ├── README.original.md ├── pom.xml └── LICENSE /.empty: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: ./start 2 | -------------------------------------------------------------------------------- /system.properties: -------------------------------------------------------------------------------- 1 | java.runtime.version=1.8 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 2.11.7 3 | jdk: oraclejdk8 4 | before_script: ulimit -u 30000 5 | install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true -DjavaVersion=1.8 6 | script: mvn install -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestDomain.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | /** 4 | * Created by andrew@datamountaineer.com on 29/02/16. 5 | * kafka-connect-twitter 6 | */ 7 | class TestDomain extends TestTwitterBase { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /twitter-source.properties.example 2 | /connect-standalone.properties 3 | /kafka-connect-twitter.iml 4 | /.idea 5 | # sbt specific 6 | target/ 7 | # Scala-IDE specific 8 | /twitter-source.properties 9 | /.settings 10 | /.project 11 | /.classpath 12 | /.cache-main 13 | /.cache-tests 14 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestTwitterStatusReader.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | /** 4 | * Created by andrew@datamountaineer.com on 29/02/16. 5 | * kafka-connect-twitter 6 | */ 7 | class TestTwitterStatusReader { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestTwitterSourceTask.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | /** 4 | * Created by andrew@datamountaineer.com on 29/02/16. 5 | * kafka-connect-twitter 6 | */ 7 | class TestTwitterSourceTask extends TestTwitterBase { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # suppress inspection "UnusedProperty" for whole file 2 | log4j.rootLogger=INFO,stdout 3 | 4 | #stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.stdout.layout.conversionPattern=%d{ISO8601} %-5p [%t] [%c] [%M:%L] %m%n -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # suppress inspection "UnusedProperty" for whole file 2 | log4j.rootLogger=INFO,stdout 3 | 4 | #stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.stdout.layout.conversionPattern=%d{ISO8601} %-5p [%t] [%c] [%M:%L] %m%n -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/Logging.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | /** 4 | * Created by andrew@datamountaineer.com on 24/02/16. 5 | * kafka-connect-twitter 6 | */ 7 | 8 | import org.slf4j.LoggerFactory 9 | 10 | trait Logging { 11 | val loggerName = this.getClass.getName 12 | @transient lazy val log = LoggerFactory.getLogger(loggerName) 13 | } -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/Extensions.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util.concurrent.{LinkedBlockingQueue, TimeUnit} 4 | import java.util 5 | import com.google.common.collect.Queues 6 | 7 | /** 8 | * Created by r on 3/1/16. 9 | */ 10 | object Extensions { 11 | 12 | implicit class LinkedBlockingQueueExtension[T](val lbq: LinkedBlockingQueue[T]) extends AnyVal { 13 | def drainWithTimeoutTo(collection: util.Collection[_ >: T], maxElements: Int, timeout: Long, unit: TimeUnit): Int = { 14 | Queues.drain[T](lbq, collection, maxElements, timeout, unit) 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /twitter-source.properties.example: -------------------------------------------------------------------------------- 1 | name=twitter-source 2 | connector.class=com.eneco.trading.kafka.connect.twitter.TwitterSourceConnector 3 | tasks.max=1 4 | topic=test 5 | twitter.consumerkey= 6 | twitter.consumersecret= 7 | twitter.token= 8 | twitter.secret= 9 | 10 | # set output.format to string to output string key/values, it defaults to structured 11 | #output.format=string 12 | 13 | # language=en,ru,de 14 | # stream.type=sample 15 | # stream.type=filter 16 | # track.terms=news,music,hadoop,clojure,scala,fp,golang,python,fsharp,cpp,java 17 | # San Francisco OR New York City 18 | #track.locations=-122.75,36.8,-121.75,37.8,-74,40,-73,41 19 | # bbcbreaking,bbcnews,justinbieber 20 | # track.follow=5402612,612473,27260086 21 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestSinkTask.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import org.apache.kafka.connect.sink.SinkRecord 4 | import scala.collection.JavaConverters._ 5 | import scala.util.{Success, Try} 6 | 7 | class TestSinkTask extends TestTwitterBase { 8 | test("Strings put to to Task are tweeted") { 9 | val sinkTask = new TwitterSinkTask() 10 | val myTestTweet = "I tweet, ergo sum." 11 | sinkTask.writer = Some(new SimpleTwitterWriter { 12 | //TODO: use DI? 13 | def updateStatus(s: String): Try[Long] = { 14 | s shouldEqual myTestTweet 15 | Success(5) 16 | } 17 | }) 18 | val sr = new SinkRecord("topic", 5, null, null, null, myTestTweet, 123) 19 | sinkTask.put(Seq(sr).asJava) 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestTwitterSinkConfig.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import scala.collection.JavaConverters._ 4 | 5 | class TestTwitterSinkConfig extends TestTwitterBase { 6 | test("A TestTwitterSinkConfig should be correctly configured") { 7 | val config = getSinkConfig 8 | val taskConfig = new TwitterSinkConfig(config.asJava) 9 | taskConfig.getString(TwitterSinkConfig.CONSUMER_KEY_CONFIG) shouldBe "test" 10 | taskConfig.getPassword(TwitterSinkConfig.CONSUMER_SECRET_CONFIG).value shouldBe "c-secret" 11 | taskConfig.getPassword(TwitterSinkConfig.SECRET_CONFIG).value shouldBe "secret" 12 | taskConfig.getString(TwitterSinkConfig.TOKEN_CONFIG) shouldBe "token" 13 | taskConfig.getList(TwitterSinkConfig.TOPICS) shouldBe Seq("just-a-sink-topic").asJava 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterSourceTask.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util 4 | import org.apache.kafka.connect.source.{SourceRecord, SourceTask} 5 | 6 | /** 7 | * Created by andrew@datamountaineer.com on 24/02/16. 8 | * kafka-connect-twitter 9 | */ 10 | class TwitterSourceTask extends SourceTask with Logging { 11 | private var reader : Option[TwitterStatusReader] = null 12 | 13 | override def poll(): util.List[SourceRecord] = { 14 | require(reader.isDefined, "Twitter client not initialized!") 15 | reader.get.poll() 16 | } 17 | 18 | override def start(props: util.Map[String, String]): Unit = { 19 | val sourceConfig = new TwitterSourceConfig(props) 20 | reader = Some(TwitterReader(config = sourceConfig, context = context)) 21 | } 22 | 23 | override def stop() = { 24 | reader.foreach(r=>r.stop()) 25 | } 26 | override def version(): String = "" 27 | } 28 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestTwitterBase.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import org.scalatest.{FunSuite, Matchers, BeforeAndAfter} 4 | 5 | /** 6 | * Created by andrew@datamountaineer.com on 29/02/16. 7 | * kafka-connect-twitter 8 | */ 9 | trait TestTwitterBase extends FunSuite with Matchers with BeforeAndAfter { 10 | def getConfig = { 11 | Map(TwitterSourceConfig.CONSUMER_KEY_CONFIG->"test", 12 | TwitterSourceConfig.CONSUMER_SECRET_CONFIG->"c-secret", 13 | TwitterSourceConfig.SECRET_CONFIG->"secret", 14 | TwitterSourceConfig.TOKEN_CONFIG->"token", 15 | TwitterSourceConfig.TRACK_TERMS->"term1", 16 | TwitterSourceConfig.TWITTER_APP_NAME->"myApp", 17 | TwitterSourceConfig.BATCH_SIZE->"1337", 18 | TwitterSourceConfig.TOPIC->"just-a-topic" 19 | ) 20 | } 21 | def getSinkConfig = { 22 | Map(TwitterSinkConfig.CONSUMER_KEY_CONFIG->"test", 23 | TwitterSinkConfig.CONSUMER_SECRET_CONFIG->"c-secret", 24 | TwitterSinkConfig.SECRET_CONFIG->"secret", 25 | TwitterSinkConfig.TOKEN_CONFIG->"token", 26 | TwitterSinkConfig.TOPICS->"just-a-sink-topic" 27 | ) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestTwitterSourceConfig.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | 4 | import scala.collection.JavaConverters._ 5 | /** 6 | * Created by andrew@datamountaineer.com on 29/02/16. 7 | * kafka-connect-twitter 8 | */ 9 | class TestTwitterSourceConfig extends TestTwitterBase { 10 | test("A TwitterSourceConfig should be correctly configured") { 11 | val config = getConfig 12 | val taskConfig = new TwitterSourceConfig(config.asJava) 13 | taskConfig.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG) shouldBe "test" 14 | taskConfig.getPassword(TwitterSourceConfig.SECRET_CONFIG).value shouldBe "secret" 15 | taskConfig.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value shouldBe "c-secret" 16 | taskConfig.getString(TwitterSourceConfig.TOKEN_CONFIG) shouldBe "token" 17 | taskConfig.getList(TwitterSourceConfig.TRACK_TERMS).asScala.head shouldBe "term1" 18 | taskConfig.getString(TwitterSourceConfig.TWITTER_APP_NAME) shouldBe "myApp" 19 | taskConfig.getInt(TwitterSourceConfig.BATCH_SIZE) shouldBe 1337 20 | taskConfig.getString(TwitterSourceConfig.TOPIC) shouldBe "just-a-topic" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /setup-confluent.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | BUILD_DIR=$1 4 | 5 | indent() { 6 | # Handle GNU vs BSD sed 7 | case $(sed --help 2>&1) in 8 | *GNU*) sed_u () { sed -u "$@"; };; 9 | *) sed_u () { sed "$@"; };; 10 | esac 11 | 12 | sed_u 's/^/ /' 13 | } 14 | 15 | # download and "install" confluent 16 | ARCHIVE_URL="http://packages.confluent.io/archive/3.0/confluent-3.0.0-2.11.tar.gz" 17 | 18 | echo "Downloading Confluent..." | indent 19 | 20 | wget -qO - $ARCHIVE_URL | tar -zxf - 21 | if ! [ $? ]; then 22 | echo "FAILED to obtain confluent distribution" | indent 23 | exit 1 24 | fi 25 | 26 | cp -a confluent-3.0.0/* $BUILD_DIR/ 27 | 28 | # fix broken symlink 29 | cd $BUILD_DIR/share/java/kafka 30 | ln -sf kafka_2.11-0.10.0.0-cp1.jar kafka.jar 31 | cd $BUILD_DIR 32 | 33 | echo "Copied Confluent successfully" | indent 34 | 35 | rm -rf $BUILD_DIR/share/java/kafka-connect-hdfs 36 | rm -rf $BUILD_DIR/share/java/schema-registry 37 | rm -rf $BUILD_DIR/share/java/confluent-control-center 38 | rm -rf $BUILD_DIR/confluent-3.0.0 39 | 40 | echo "Deleted extra share directories to reduce slug size" | indent 41 | 42 | export CLASSPATH="$CLASSPATH:/app/target/kafka-connect-twitter-0.1-jar-with-dependencies.jar" 43 | -------------------------------------------------------------------------------- /setup_certs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | addon="$1" 6 | 7 | [ -z $addon ] && { 8 | echo "addon is missing" >&2 9 | exit 1 10 | } 11 | 12 | client_key="$(echo $addon)_CLIENT_CERT_KEY" 13 | client_cert="$(echo $addon)_CLIENT_CERT" 14 | trusted_cert="$(echo $addon)_TRUSTED_CERT" 15 | 16 | [ -z $TRUSTSTORE_PASSWORD ] && { 17 | echo "TRUSTSTORE_PASSWORD is missing" >&2 18 | exit 1 19 | } 20 | 21 | [ -z $KEYSTORE_PASSWORD ] && { 22 | echo "KEYSTORE_PASSWORD is missing" >&2 23 | exit 1 24 | } 25 | 26 | rm -f .{keystore,truststore}.{pem,pkcs12,jks} 27 | rm -f .cacerts 28 | 29 | #cp /etc/ssl/certs/java/cacerts ./.cacerts 30 | 31 | echo -n "${!client_key}" >> .keystore.pem 32 | echo -n "${!client_cert}" >> .keystore.pem 33 | echo -n "${!trusted_cert}" > .truststore.pem 34 | 35 | keytool -importcert -file .truststore.pem -keystore .truststore.jks -deststorepass $TRUSTSTORE_PASSWORD -noprompt 36 | 37 | openssl pkcs12 -export -in .keystore.pem -out .keystore.pkcs12 -password pass:$KEYSTORE_PASSWORD 38 | keytool -importkeystore -srcstoretype PKCS12 \ 39 | -destkeystore .keystore.jks -deststorepass $KEYSTORE_PASSWORD \ 40 | -srckeystore .keystore.pkcs12 -srcstorepass $KEYSTORE_PASSWORD 41 | 42 | rm -f .{keystore,truststore}.{pem,pkcs12} 43 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestTwitterSourceConnector.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import org.apache.kafka.connect.errors.ConnectException 4 | 5 | import scala.collection.JavaConverters._ 6 | 7 | /** 8 | * Created by andrew@datamountaineer.com on 29/02/16. 9 | * kafka-connect-twitter 10 | */ 11 | class TestTwitterSourceConnector extends TestTwitterBase { 12 | val goodProps = getConfig 13 | val badProps = goodProps + (TwitterSourceConfig.BATCH_SIZE -> "this is no integer") 14 | 15 | test("A TwitterSourceConnector should start with valid properties") { 16 | val t = new TwitterSourceConnector() 17 | t.start(goodProps.asJava) 18 | } 19 | 20 | test("A TwitterSourceConnector shouldn't start with invalid properties") { 21 | val t = new TwitterSourceConnector() 22 | a[ConnectException] should be thrownBy { 23 | t.start(badProps.asJava) 24 | } 25 | } 26 | 27 | test("A TwitterSourceConnector should provide the correct taskClass") { 28 | val t = new TwitterSourceConnector() 29 | t.taskClass() should be (classOf[TwitterSourceTask]) 30 | } 31 | 32 | test("A TwitterSourceConnector should return a taskConfig for each task") { 33 | val t = new TwitterSourceConnector() 34 | t.taskConfigs(42).size() should be (42) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterSinkConfig.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util 4 | 5 | import org.apache.kafka.common.config.{AbstractConfig, ConfigDef} 6 | import org.apache.kafka.common.config.ConfigDef.{Importance, Type} 7 | import org.apache.kafka.connect.sink.SinkTask 8 | 9 | object TwitterSinkConfig { 10 | val CONSUMER_KEY_CONFIG = "twitter.consumerkey" 11 | val CONSUMER_KEY_CONFIG_DOC = "Twitter account consumer key." 12 | val CONSUMER_SECRET_CONFIG = "twitter.consumersecret" 13 | val CONSUMER_SECRET_CONFIG_DOC = "Twitter account consumer secret." 14 | val TOKEN_CONFIG = "twitter.token" 15 | val TOKEN_CONFIG_DOC = "Twitter account token." 16 | val SECRET_CONFIG = "twitter.secret" 17 | val SECRET_CONFIG_DOC = "Twitter account secret." 18 | val TOPICS = SinkTask.TOPICS_CONFIG 19 | val TOPICS_DOC = "The Kafka topic to read from." 20 | 21 | val config: ConfigDef = new ConfigDef() 22 | .define(CONSUMER_KEY_CONFIG, Type.STRING, Importance.HIGH, CONSUMER_KEY_CONFIG_DOC) 23 | .define(CONSUMER_SECRET_CONFIG, Type.PASSWORD, Importance.HIGH, CONSUMER_SECRET_CONFIG_DOC) 24 | .define(TOKEN_CONFIG, Type.STRING, Importance.HIGH, TOKEN_CONFIG_DOC) 25 | .define(SECRET_CONFIG, Type.PASSWORD, Importance.HIGH, SECRET_CONFIG_DOC) 26 | .define(TOPICS, Type.LIST, Importance.HIGH, TOPICS_DOC) 27 | } 28 | 29 | class TwitterSinkConfig(props: util.Map[String, String]) 30 | extends AbstractConfig(TwitterSinkConfig.config, props) { 31 | } 32 | -------------------------------------------------------------------------------- /src/test/scala/com/eneco/trading/kafka/connect/twitter/TestSimpleTwitterWriter.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import org.scalamock.scalatest.proxy.MockFactory 4 | import twitter4j.{Status, Twitter} 5 | import twitter4j.auth.AccessToken 6 | 7 | import scala.util.Success 8 | 9 | class TestSimpleTwitterWriter extends TestTwitterBase with MockFactory { 10 | val cons = "cons" 11 | val consSecret = "consSecret" 12 | val access = "access" 13 | val accessSecret = "accessSecret" 14 | val myStatus = "I tweet, ergo sum" 15 | val myStatusId = 1337L 16 | 17 | test("TwitterWriter.ctor properly initializes the Twitter4j client") { 18 | val twitterMock = mock[Twitter] 19 | twitterMock.expects('setOAuthConsumer)(cons, consSecret) 20 | twitterMock.expects('setOAuthAccessToken)(new AccessToken(access, accessSecret)) 21 | 22 | new TwitterWriter(cons,consSecret,access,accessSecret,twitterMock) 23 | } 24 | 25 | test("TwitterWriter.updateStatus properly dispatches the status to the Twitter4j client") { 26 | val statusMock = mock[Status] 27 | statusMock.expects('getId)().returning(myStatusId) 28 | 29 | val twitterMock = mock[Twitter] 30 | twitterMock.expects('setOAuthConsumer)(cons, consSecret) 31 | twitterMock.expects('setOAuthAccessToken)(new AccessToken(access, accessSecret)) 32 | twitterMock.expects('updateStatus)(myStatus).returning(statusMock) 33 | 34 | val r = new TwitterWriter(cons,consSecret,access,accessSecret,twitterMock).updateStatus(myStatus) 35 | r shouldEqual Success(myStatusId) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterWriter.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import twitter4j.auth.AccessToken 4 | import twitter4j.{TwitterFactory, Twitter} 5 | 6 | import scala.util.Try 7 | 8 | trait SimpleTwitterWriter { 9 | /** 10 | * Performs a status update 11 | * @param s the status 12 | * @return A Try id of the new status as assigned by the Twitter api 13 | */ 14 | def updateStatus(s: String): Try[Long] 15 | } 16 | 17 | /** 18 | * Allows one to update statuses 19 | * @param consumer apps.twitter.com | Keys and Access Tokens: Consumer Key (API Key) 20 | * @param consumerSecret apps.twitter.com | Keys and Access Tokens: Consumer Secret (API Secret) 21 | * @param access apps.twitter.com | Keys and Access Tokens: Access Token 22 | * @param accessSecret apps.twitter.com | Keys and Access Tokens: Access Token Secret 23 | * @param twitterClient poor man's DI: something that implements the Twitter4j Twitter interface 24 | */ 25 | class TwitterWriter(consumer: String, consumerSecret: String, access: String, accessSecret: String, twitterClient: Twitter = new TwitterFactory().getInstance()) extends SimpleTwitterWriter { 26 | twitterClient.setOAuthConsumer(consumer, consumerSecret) 27 | twitterClient.setOAuthAccessToken(new AccessToken(access, accessSecret)) 28 | 29 | /** 30 | * Performs a status update 31 | * @param s the status 32 | * @return A Try id of the new status as assigned by the Twitter api 33 | */ 34 | def updateStatus(s: String): Try[Long] = { 35 | Try(twitterClient.updateStatus(s).getId) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterSinkTask.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util 4 | import org.apache.kafka.clients.consumer.OffsetAndMetadata 5 | import org.apache.kafka.common.TopicPartition 6 | import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} 7 | import scala.collection.JavaConverters._ 8 | import scala.util.{Success, Failure} 9 | 10 | class TwitterSinkTask extends SinkTask with Logging { 11 | var writer: Option[SimpleTwitterWriter] = None 12 | 13 | override def start(props: util.Map[String, String]): Unit = { 14 | val sinkConfig = new TwitterSinkConfig(props) 15 | writer = Some(new TwitterWriter( 16 | sinkConfig.getString(TwitterSinkConfig.CONSUMER_KEY_CONFIG), 17 | sinkConfig.getPassword(TwitterSinkConfig.CONSUMER_SECRET_CONFIG).value, 18 | sinkConfig.getString(TwitterSinkConfig.TOKEN_CONFIG), 19 | sinkConfig.getPassword(TwitterSinkConfig.SECRET_CONFIG).value)) 20 | } 21 | 22 | override def put(records: util.Collection[SinkRecord]): Unit = 23 | records.asScala 24 | .map(_.value.toString) 25 | .map(text => (text, writer match { 26 | case Some(writer) => writer.updateStatus(text) 27 | case None => Failure(new IllegalStateException("twitter writer is not set")) 28 | })) 29 | .foreach { 30 | case (text, result) => result match { 31 | case Success(id) => log.info(s"successfully tweeted `${text}`; got assigned id ${id}") 32 | case Failure(err) => log.warn(s"tweeting `${text}` failed: ${err.getMessage}") 33 | } 34 | } 35 | 36 | override def stop(): Unit = { 37 | } 38 | 39 | override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]) = { 40 | } 41 | override def version(): String = "" 42 | } -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterSinkConnector.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util 4 | 5 | import org.apache.kafka.connect.connector.Task 6 | import org.apache.kafka.connect.errors.ConnectException 7 | import org.apache.kafka.connect.sink.SinkConnector 8 | 9 | import scala.collection.JavaConverters._ 10 | import scala.util.{Failure, Try} 11 | 12 | class TwitterSinkConnector extends SinkConnector with Logging { 13 | private var configProps : util.Map[String, String] = null 14 | 15 | /** 16 | * Return config definition for sink connector 17 | */ 18 | override def config() = TwitterSinkConfig.config 19 | 20 | /** 21 | * States which SinkTask class to use 22 | * */ 23 | override def taskClass(): Class[_ <: Task] = classOf[TwitterSinkTask] 24 | 25 | /** 26 | * Set the configuration for each work and determine the split 27 | * 28 | * @param maxTasks The max number of task workers be can spawn 29 | * @return a List of configuration properties per worker 30 | * */ 31 | override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = { 32 | log.info(s"Setting task configurations for $maxTasks workers.") 33 | (1 to maxTasks).map(c => configProps).toList.asJava 34 | } 35 | 36 | /** 37 | * Start the sink and set to configuration 38 | * 39 | * @param props A map of properties for the connector and worker 40 | * */ 41 | override def start(props: util.Map[String, String]): Unit = { 42 | log.info(s"Starting Twitter sink task with ${props.toString}.") 43 | configProps = props 44 | Try(new TwitterSinkConfig(props)) match { 45 | case Failure(f) => throw new ConnectException("Couldn't start TwitterSinkConnector due to configuration error.", f) 46 | case _ => 47 | } 48 | } 49 | 50 | override def stop(): Unit = {} 51 | override def version(): String = "" 52 | } -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterSourceConnector.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util 4 | import org.apache.kafka.connect.connector.{Task, Connector} 5 | import org.apache.kafka.connect.errors.ConnectException 6 | import scala.collection.JavaConverters._ 7 | import scala.util.{Failure, Try} 8 | 9 | /** 10 | * Created by andrew@datamountaineer.com on 24/02/16. 11 | * kafka-connect-twitter 12 | */ 13 | class TwitterSourceConnector extends Connector with Logging { 14 | private var configProps : util.Map[String, String] = null 15 | 16 | /** 17 | * Return config definition for source connector 18 | */ 19 | override def config() = TwitterSourceConfig.config 20 | 21 | /** 22 | * States which SourceTask class to use 23 | * */ 24 | override def taskClass(): Class[_ <: Task] = classOf[TwitterSourceTask] 25 | 26 | /** 27 | * Set the configuration for each work and determine the split 28 | * 29 | * @param maxTasks The max number of task workers be can spawn 30 | * @return a List of configuration properties per worker 31 | * */ 32 | override def taskConfigs(maxTasks: Int): util.List[util.Map[String, String]] = { 33 | log.info(s"Setting task configurations for $maxTasks workers.") 34 | (1 to maxTasks).map(c => configProps).toList.asJava 35 | } 36 | 37 | /** 38 | * Start the source and set to configuration 39 | * 40 | * @param props A map of properties for the connector and worker 41 | * */ 42 | override def start(props: util.Map[String, String]): Unit = { 43 | log.info(s"Starting Twitter source task with ${props.toString}.") 44 | configProps = props 45 | Try(new TwitterSourceConfig(props)) match { 46 | case Failure(f) => throw new ConnectException("Couldn't start Twitter source due to configuration error: " 47 | + f.getMessage, f) 48 | case _ => 49 | } 50 | } 51 | 52 | override def stop() = {} 53 | override def version(): String = "" 54 | } 55 | -------------------------------------------------------------------------------- /connect-simple-source-standalone.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # These are defaults. This file just demonstrates how to override some settings. 17 | bootstrap.servers=localhost:9092 18 | 19 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 20 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 21 | #key.converter=org.apache.kafka.connect.json.JsonConverter 22 | #value.converter=org.apache.kafka.connect.json.JsonConverter 23 | key.converter=org.apache.kafka.connect.storage.StringConverter 24 | value.converter=org.apache.kafka.connect.storage.StringConverter 25 | 26 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 27 | # it to 28 | key.converter.schemas.enable=true 29 | value.converter.schemas.enable=true 30 | 31 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 32 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 33 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 34 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 35 | internal.key.converter.schemas.enable=false 36 | internal.value.converter.schemas.enable=false 37 | 38 | offset.storage.file.filename=/tmp/connect.offsets 39 | # Flush much faster than normal, which is useful for testing/debugging 40 | offset.flush.interval.ms=10000 41 | -------------------------------------------------------------------------------- /connect-sink-standalone.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # These are defaults. This file just demonstrates how to override some settings. 17 | bootstrap.servers=localhost:9092 18 | 19 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 20 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 21 | #key.converter=org.apache.kafka.connect.json.JsonConverter 22 | #value.converter=org.apache.kafka.connect.json.JsonConverter 23 | key.converter=org.apache.kafka.connect.storage.StringConverter 24 | value.converter=org.apache.kafka.connect.storage.StringConverter 25 | 26 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 27 | # it to 28 | #key.converter.schemas.enable=true 29 | #value.converter.schemas.enable=true 30 | 31 | o 32 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 33 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 34 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 35 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 36 | internal.key.converter.schemas.enable=false 37 | internal.value.converter.schemas.enable=false 38 | 39 | offset.storage.file.filename=/tmp/connect.offsets 40 | # Flush much faster than normal, which is useful for testing/debugging 41 | offset.flush.interval.ms=10000 42 | -------------------------------------------------------------------------------- /connect-source-standalone.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # These are defaults. This file just demonstrates how to override some settings. 17 | bootstrap.servers=localhost:9092 18 | 19 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 20 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 21 | #key.converter=org.apache.kafka.connect.json.JsonConverter 22 | #value.converter=org.apache.kafka.connect.json.JsonConverter 23 | key.converter=io.confluent.connect.avro.AvroConverter 24 | key.converter.schema.registry.url=http://localhost:8081 25 | value.converter=io.confluent.connect.avro.AvroConverter 26 | value.converter.schema.registry.url=http://localhost:8081 27 | 28 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 29 | # it to 30 | key.converter.schemas.enable=true 31 | value.converter.schemas.enable=true 32 | 33 | o 34 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 35 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 36 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 37 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 38 | internal.key.converter.schemas.enable=false 39 | internal.value.converter.schemas.enable=false 40 | 41 | offset.storage.file.filename=/tmp/connect.offsets 42 | # Flush much faster than normal, which is useful for testing/debugging 43 | offset.flush.interval.ms=10000 44 | -------------------------------------------------------------------------------- /properties-generate: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ueo pipefail 3 | 4 | # Generate Twitter properties file 5 | cat > twitter-source.properties < $TWITTER_PROPERTIES_FILE < $CONFLUENT_PROPERTIES_FILE < Double.box(x.toDouble)}).grouped(4).toList 36 | .map({ l => new Location(new Location.Coordinate(l(0), l(1)), new Location.Coordinate(l(2), l(3)))}) 37 | .asJava 38 | trackEndpoint.locations(locations) 39 | } 40 | val follow = config.getList(TwitterSourceConfig.TRACK_FOLLOW) 41 | if (!follow.isEmpty) { 42 | val users = follow.toList.map({ x => Long.box(x.trim.toLong)}).asJava 43 | trackEndpoint.followings(users) 44 | } 45 | trackEndpoint 46 | } 47 | endpoint.stallWarnings(false) 48 | val language = config.getList(TwitterSourceConfig.LANGUAGE) 49 | if (!language.isEmpty) { 50 | // endpoint.languages(language) doesn't work as intended! 51 | endpoint.addQueryParameter(TwitterSourceConfig.LANGUAGE, language.toList.mkString(",")) 52 | } 53 | 54 | //twitter auth stuff 55 | val auth = new OAuth1(config.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG), 56 | config.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value, 57 | config.getString(TwitterSourceConfig.TOKEN_CONFIG), 58 | config.getPassword(TwitterSourceConfig.SECRET_CONFIG).value) 59 | 60 | //batch size to take from the queue 61 | val batchSize = config.getInt(TwitterSourceConfig.BATCH_SIZE) 62 | val batchTimeout = config.getDouble(TwitterSourceConfig.BATCH_TIMEOUT) 63 | 64 | //The Kafka topic to append to 65 | val topic = config.getString(TwitterSourceConfig.TOPIC) 66 | 67 | //queue for client to buffer to 68 | val queue = new LinkedBlockingQueue[String](10000) 69 | 70 | //how the output is formatted 71 | val statusConverter = config.getString(TwitterSourceConfig.OUTPUT_FORMAT) match { 72 | case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRING => StatusToStringKeyValue 73 | case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRUCTURED => StatusToTwitterStatusStructure 74 | } 75 | 76 | //build basic client 77 | val client = new ClientBuilder() 78 | .name(config.getString(TwitterSourceConfig.TWITTER_APP_NAME)) 79 | .hosts(Constants.STREAM_HOST) 80 | .endpoint(endpoint) 81 | .authentication(auth) 82 | .processor(new StringDelimitedProcessor(queue)) 83 | .build() 84 | 85 | new TwitterStatusReader(client = client, rawQueue = queue, batchSize = batchSize, 86 | batchTimeout = batchTimeout, topic = topic, statusConverter = statusConverter) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterStatusReader.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util 4 | import java.util.concurrent.{TimeUnit, LinkedBlockingQueue, Executors} 5 | import com.eneco.trading.kafka.connect.twitter.domain.TwitterStatus 6 | import com.twitter.hbc.httpclient.BasicClient 7 | import com.twitter.hbc.twitter4j.Twitter4jStatusClient 8 | import org.apache.kafka.connect.data.Schema 9 | import org.apache.kafka.connect.source.SourceRecord 10 | import twitter4j._ 11 | import scala.collection.JavaConverters._ 12 | import Extensions._ 13 | 14 | class StatusEnqueuer(queue: LinkedBlockingQueue[Status]) extends StatusListener with Logging { 15 | override def onStallWarning(stallWarning: StallWarning) = log.warn("onStallWarning") 16 | override def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) = log.info("onDeletionNotice") 17 | 18 | override def onScrubGeo(l: Long, l1: Long) = { 19 | log.debug(s"onScrubGeo $l $l1") 20 | } 21 | 22 | override def onStatus(status: Status) = { 23 | log.debug("onStatus") 24 | queue.put(status) 25 | } 26 | 27 | override def onTrackLimitationNotice(i: Int) = log.info(s"onTrackLimitationNotice $i") 28 | override def onException(e: Exception)= log.warn("onException " + e.toString) 29 | } 30 | 31 | trait StatusToSourceRecord { 32 | def convert(status: Status, topic: String): SourceRecord 33 | } 34 | 35 | object StatusToStringKeyValue extends StatusToSourceRecord { 36 | def convert (status: Status, topic: String): SourceRecord = { 37 | new SourceRecord( 38 | Map("tweetSource" -> status.getSource).asJava, //source partitions? 39 | Map("tweetId" -> status.getId).asJava, //source offsets? 40 | topic, 41 | null, 42 | Schema.STRING_SCHEMA, 43 | status.getUser.getScreenName, 44 | Schema.STRING_SCHEMA, 45 | status.getText) 46 | } 47 | } 48 | 49 | object StatusToTwitterStatusStructure extends StatusToSourceRecord { 50 | def convert(status: Status, topic: String): SourceRecord = { 51 | //val ts = TwitterStatus.struct(TwitterStatus(status)) 52 | new SourceRecord( 53 | Map("tweetSource" -> status.getSource).asJava, //source partitions? 54 | Map("tweetId" -> status.getId).asJava, //source offsets? 55 | topic, 56 | TwitterStatus.schema, 57 | TwitterStatus.struct(status)) 58 | } 59 | } 60 | 61 | /** 62 | * Created by andrew@datamountaineer.com on 24/02/16. 63 | * kafka-connect-twitter 64 | */ 65 | class TwitterStatusReader(client: BasicClient, 66 | rawQueue: LinkedBlockingQueue[String], 67 | batchSize : Int, 68 | batchTimeout: Double, 69 | topic: String, 70 | statusConverter: StatusToSourceRecord = StatusToTwitterStatusStructure 71 | ) extends Logging { 72 | log.info("Initialising Twitter Stream Reader") 73 | val statusQueue = new LinkedBlockingQueue[Status](10000) 74 | 75 | //Construct the status client 76 | val t4jClient = new Twitter4jStatusClient( 77 | client, 78 | rawQueue, 79 | List[StatusListener](new StatusEnqueuer(statusQueue)).asJava, 80 | Executors.newFixedThreadPool(1)) 81 | 82 | //connect and subscribe 83 | t4jClient.connect() 84 | t4jClient.process() 85 | 86 | /** 87 | * Drain the queue 88 | * 89 | * @return A List of SourceRecords 90 | * */ 91 | def poll() : util.List[SourceRecord] = { 92 | if (client.isDone) log.warn("Client connection closed unexpectedly: ", client.getExitEvent.getMessage) //TODO: what next? 93 | 94 | val l = new util.ArrayList[Status]() 95 | statusQueue.drainWithTimeoutTo(l, batchSize, (batchTimeout * 1E9).toLong, TimeUnit.NANOSECONDS) 96 | l.asScala.map(statusConverter.convert(_, topic)).asJava 97 | } 98 | 99 | /** 100 | * Stop the HBC client 101 | * */ 102 | def stop() = { 103 | log.info("Stop Twitter client") 104 | client.stop() 105 | } 106 | 107 | 108 | } 109 | -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/domain/TwitterStatus.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter.domain 2 | 3 | import java.text.SimpleDateFormat 4 | import java.util.{Date, TimeZone} 5 | 6 | import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} 7 | import scala.collection.JavaConverters._ 8 | import scala.collection.JavaConversions._ 9 | 10 | object TwitterUser { 11 | def struct(u: twitter4j.User) = 12 | new Struct(schema) 13 | .put("id", u.getId) 14 | .put("name", u.getName) 15 | .put("screen_name", u.getScreenName) 16 | .put("location", u.getLocation) 17 | .put("verified", u.isVerified) 18 | .put("friends_count", u.getFriendsCount) 19 | .put("followers_count", u.getFollowersCount) 20 | .put("statuses_count", u.getStatusesCount) 21 | 22 | val schema = SchemaBuilder.struct().name("com.eneco.trading.kafka.connect.twitter.User") 23 | .field("id", Schema.INT64_SCHEMA) 24 | .field("name", Schema.STRING_SCHEMA) 25 | .field("screen_name", Schema.STRING_SCHEMA) 26 | .field("location", SchemaBuilder.string.optional.build()) 27 | .field("verified", Schema.BOOLEAN_SCHEMA) 28 | .field("friends_count", Schema.INT32_SCHEMA) 29 | .field("followers_count", Schema.INT32_SCHEMA) 30 | .field("statuses_count", Schema.INT32_SCHEMA) 31 | .build() 32 | } 33 | 34 | object Entities { 35 | def struct(s: twitter4j.EntitySupport) = 36 | new Struct(schema) 37 | .put("hashtags", s.getHashtagEntities.toSeq.map(h => 38 | new Struct(hschema) 39 | .put("text", h.getText)).asJava) 40 | .put("media", s.getMediaEntities.toSeq.map(m => 41 | new Struct(mschema) 42 | .put("display_url", m.getDisplayURL) 43 | .put("expanded_url", m.getExpandedURL) 44 | .put("id", m.getId) 45 | .put("type", m.getType) 46 | .put("url", m.getURL)).asJava) 47 | .put("urls", s.getURLEntities.toSeq.map(u => 48 | new Struct(uschema) 49 | .put("display_url", u.getDisplayURL) 50 | .put("expanded_url", u.getExpandedURL) 51 | .put("url", u.getURL)).asJava) 52 | .put("user_mentions", s.getUserMentionEntities.toSeq.map(um => 53 | new Struct(umschema) 54 | .put("id", um.getId) 55 | .put("name", um.getName) 56 | .put("screen_name", um.getScreenName)).asJava) 57 | 58 | val hschema = SchemaBuilder.struct().name("com.eneco.trading.kafka.connect.twitter.Hashtag") 59 | .field("text", Schema.STRING_SCHEMA) 60 | .build() 61 | val mschema = SchemaBuilder.struct().name("com.eneco.trading.kafka.connect.twitter.Medium") 62 | .field("display_url", Schema.STRING_SCHEMA) 63 | .field("expanded_url", Schema.STRING_SCHEMA) 64 | .field("id", Schema.INT64_SCHEMA) 65 | .field("type", Schema.STRING_SCHEMA) 66 | .field("url", Schema.STRING_SCHEMA) 67 | .build() 68 | val uschema = SchemaBuilder.struct().name("com.eneco.trading.kafka.connect.twitter.Url") 69 | .field("display_url", Schema.STRING_SCHEMA) 70 | .field("expanded_url", Schema.STRING_SCHEMA) 71 | .field("url", Schema.STRING_SCHEMA) 72 | .build() 73 | val umschema = SchemaBuilder.struct().name("com.eneco.trading.kafka.connect.twitter.UserMention") 74 | .field("id", Schema.INT64_SCHEMA) 75 | .field("name", Schema.OPTIONAL_STRING_SCHEMA) 76 | .field("screen_name", Schema.OPTIONAL_STRING_SCHEMA) 77 | .build() 78 | 79 | val schema = SchemaBuilder.struct().name("com.eneco.trading.kafka.connect.twitter.Entities") 80 | .field("hashtags", SchemaBuilder.array(hschema).optional.build()) 81 | .field("media", SchemaBuilder.array(mschema).optional.build()) 82 | .field("urls", SchemaBuilder.array(uschema).optional.build()) 83 | .field("user_mentions", SchemaBuilder.array(umschema).optional.build()) 84 | .build() 85 | } 86 | 87 | object TwitterStatus { 88 | def asIso8601String(d:Date) = { 89 | val tz = TimeZone.getTimeZone("UTC") 90 | val df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ") 91 | df.setTimeZone(tz) 92 | df.format(if (d == null) { new Date() } else { d }) 93 | } 94 | 95 | def struct(s: twitter4j.Status) = 96 | new Struct(schema) 97 | .put("id", s.getId) 98 | .put("created_at", asIso8601String(s.getCreatedAt)) 99 | .put("user", TwitterUser.struct(s.getUser)) 100 | .put("text", s.getText) 101 | .put("lang", s.getLang) 102 | .put("is_retweet", s.isRetweet) 103 | .put("entities", Entities.struct(s)) 104 | 105 | val schema = SchemaBuilder.struct().name("com.eneco.trading.kafka.connect.twitter.Tweet") 106 | .field("id", Schema.INT64_SCHEMA) 107 | .field("created_at", Schema.STRING_SCHEMA) 108 | .field("user", TwitterUser.schema) 109 | .field("text", Schema.STRING_SCHEMA) 110 | .field("lang", Schema.STRING_SCHEMA) 111 | .field("is_retweet", Schema.BOOLEAN_SCHEMA) 112 | .field("entities", Entities.schema) 113 | .build() 114 | } 115 | -------------------------------------------------------------------------------- /src/main/scala/com/eneco/trading/kafka/connect/twitter/TwitterSourceConfig.scala: -------------------------------------------------------------------------------- 1 | package com.eneco.trading.kafka.connect.twitter 2 | 3 | import java.util 4 | 5 | import org.apache.kafka.common.config.{AbstractConfig, ConfigDef} 6 | import org.apache.kafka.common.config.ConfigDef.{Type, Importance} 7 | import scala.collection.JavaConversions._ 8 | import scala.util.{Failure, Try} 9 | 10 | /** 11 | * Created by andrew@datamountaineer.com on 24/02/16. 12 | * kafka-connect-twitter 13 | */ 14 | object TwitterSourceConfig { 15 | val CONSUMER_KEY_CONFIG = "twitter.consumerkey" 16 | val CONSUMER_KEY_CONFIG_DOC = "Twitter account consumer key." 17 | val CONSUMER_SECRET_CONFIG = "twitter.consumersecret" 18 | val CONSUMER_SECRET_CONFIG_DOC = "Twitter account consumer secret." 19 | val TOKEN_CONFIG = "twitter.token" 20 | val TOKEN_CONFIG_DOC = "Twitter account token." 21 | val SECRET_CONFIG = "twitter.secret" 22 | val SECRET_CONFIG_DOC = "Twitter account secret." 23 | val STREAM_TYPE = "stream.type" 24 | val STREAM_TYPE_DOC = "Twitter stream type (filter or sample)." 25 | val STREAM_TYPE_FILTER = "filter" 26 | val STREAM_TYPE_SAMPLE = "sample" 27 | val STREAM_TYPE_DEFAULT = STREAM_TYPE_FILTER 28 | val TRACK_TERMS = "track.terms" 29 | val TRACK_TERMS_DOC = "Twitter terms to track." 30 | val TRACK_LOCATIONS = "track.locations" 31 | val TRACK_LOCATIONS_DOC = "Geo locations to track." 32 | val TRACK_FOLLOW = "track.follow" 33 | val TRACK_FOLLOW_DOC = "User IDs to track." 34 | val TWITTER_APP_NAME = "twitter.app.name" 35 | val TWITTER_APP_NAME_DOC = "Twitter app name" 36 | val TWITTER_APP_NAME_DEFAULT = "KafkaConnectTwitterSource" 37 | val BATCH_SIZE = "batch.size" 38 | val BATCH_SIZE_DOC = "Batch size to write to Kafka (Drains the queue supplied to the hbc client)." 39 | val BATCH_SIZE_DEFAULT = 100 40 | val BATCH_TIMEOUT = "batch.timeout" 41 | val BATCH_TIMEOUT_DOC = "Batch timeout in seconds to write to Kafka (Drains the queue supplied to the hbc client)." 42 | val BATCH_TIMEOUT_DEFAULT = 0.1 43 | val TOPIC = "topic" 44 | val TOPIC_DOC = "The Kafka topic to append to" 45 | val TOPIC_DEFAULT = "tweets" 46 | val LANGUAGE = "language" 47 | val LANGUAGE_DOC = "List of languages to filter" 48 | val OUTPUT_FORMAT = "output.format" 49 | val OUTPUT_FORMAT_ENUM_STRUCTURED = "structured" 50 | val OUTPUT_FORMAT_ENUM_STRING = "string" 51 | val OUTPUT_FORMAT_DOC = s"How the output is formatted, can be either ${OUTPUT_FORMAT_ENUM_STRING} for (key=username:string, value=text:string), or ${OUTPUT_FORMAT_ENUM_STRUCTURED} for value=structure:TwitterStatus." 52 | val OUTPUT_FORMAT_DEFAULT = "structured" 53 | val EMPTY_VALUE = "" 54 | 55 | val config: ConfigDef = new ConfigDef() 56 | .define(CONSUMER_KEY_CONFIG, Type.STRING, Importance.HIGH, CONSUMER_KEY_CONFIG_DOC) 57 | .define(CONSUMER_SECRET_CONFIG, Type.PASSWORD, Importance.HIGH, CONSUMER_SECRET_CONFIG_DOC) 58 | .define(TOKEN_CONFIG, Type.STRING, Importance.HIGH, TOKEN_CONFIG_DOC) 59 | .define(SECRET_CONFIG, Type.PASSWORD, Importance.HIGH, SECRET_CONFIG_DOC) 60 | .define(STREAM_TYPE, Type.STRING, STREAM_TYPE_DEFAULT, Importance.HIGH, STREAM_TYPE_DOC) 61 | .define(TRACK_TERMS, Type.LIST, EMPTY_VALUE, Importance.MEDIUM, TRACK_TERMS_DOC) 62 | .define(TRACK_FOLLOW, Type.LIST, EMPTY_VALUE, Importance.MEDIUM, TRACK_FOLLOW_DOC) 63 | .define(TRACK_LOCATIONS, Type.LIST, EMPTY_VALUE, Importance.MEDIUM, TRACK_LOCATIONS_DOC) 64 | .define(TWITTER_APP_NAME, Type.STRING, TWITTER_APP_NAME_DEFAULT, Importance.HIGH, TWITTER_APP_NAME_DOC) 65 | .define(BATCH_SIZE, Type.INT, BATCH_SIZE_DEFAULT, Importance.MEDIUM, BATCH_SIZE_DOC) 66 | .define(BATCH_TIMEOUT, Type.DOUBLE, BATCH_TIMEOUT_DEFAULT, Importance.MEDIUM, BATCH_TIMEOUT_DOC) 67 | .define(TOPIC, Type.STRING, TOPIC_DEFAULT, Importance.HIGH, TOPIC_DOC) 68 | .define(LANGUAGE, Type.LIST, EMPTY_VALUE, Importance.MEDIUM, LANGUAGE_DOC) 69 | .define(OUTPUT_FORMAT, Type.STRING, OUTPUT_FORMAT_DEFAULT, Importance.MEDIUM, OUTPUT_FORMAT_DOC) 70 | } 71 | 72 | class TwitterSourceConfig(props: util.Map[String, String]) 73 | extends AbstractConfig(TwitterSourceConfig.config, props) { 74 | getString(TwitterSourceConfig.STREAM_TYPE) match { 75 | case TwitterSourceConfig.STREAM_TYPE_SAMPLE => {} 76 | case TwitterSourceConfig.STREAM_TYPE_FILTER => { 77 | val terms = getList(TwitterSourceConfig.TRACK_TERMS) 78 | val locations = getList(TwitterSourceConfig.TRACK_LOCATIONS) 79 | val users = getList(TwitterSourceConfig.TRACK_FOLLOW) 80 | val language = getList(TwitterSourceConfig.LANGUAGE) 81 | if (terms.isEmpty && locations.isEmpty && users.isEmpty) { 82 | throw new RuntimeException("At least one of the parameters " 83 | + TwitterSourceConfig.TRACK_TERMS + ", " + TwitterSourceConfig.TRACK_LOCATIONS 84 | + ", " + TwitterSourceConfig.TRACK_FOLLOW + " should be specified!") 85 | } 86 | if (!locations.isEmpty) { 87 | if ((locations.size % 4) != 0) { 88 | throw new RuntimeException(TwitterSourceConfig.TRACK_LOCATIONS 89 | + " should have number of elements divisible by 4!") 90 | } 91 | try { 92 | locations.toList.map { x => x.trim.toDouble} 93 | } catch { 94 | case e: NumberFormatException => throw new RuntimeException("You should use double numbers in " 95 | + TwitterSourceConfig.TRACK_LOCATIONS) 96 | } 97 | } 98 | try { 99 | users.toList.map { x => x.trim.toLong} 100 | } catch { 101 | case e: NumberFormatException => throw new RuntimeException("You should use numeric user IDs in " 102 | + TwitterSourceConfig.TRACK_FOLLOW) 103 | } 104 | } 105 | case _ => throw new RuntimeException("Unknown value for " 106 | + TwitterSourceConfig.STREAM_TYPE + " parameter") 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /README.original.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/Eneco/kafka-connect-twitter.svg?branch=master)](https://travis-ci.org/Eneco/kafka-connect-twitter) 2 | 3 | Kafka Connect Twitter 4 | ===================== 5 | 6 | A Kafka Connect for Twitter. Both a source (from Twitter to Kafka) and sink (from Kafka to Twitter) are provided: 7 | 8 | - The *sink* receives plain strings from Kafka, which are tweeted using [Twitter4j](http://twitter4j.org/); 9 | - The *source* receives tweets from the [Twitter Streaming API](https://dev.twitter.com/streaming/overview) using [Hosebird](https://github.com/twitter/hbc), which are fed into Kafka either as a `TwitterStatus` structure (default) or as plain strings. 10 | 11 | Data types 12 | ========== 13 | 14 | - The *sink* connector expects plain strings (UTF-8 by default) from Kafka (`org.apache.kafka.connect.storage.StringConverter`), i.e. `kafka-console-producer` will do; 15 | - The *source* connector either outputs `TwitterStatus` structures (default) or plain strings. The Kafka Connect framework is serialization format agnostic. An intermidiate representation is used inside the framework; when an actual Kafka record is to be created, the `key.converter` and `value.converter` properties are used. Chances are that you use Avro (`io.confluent.connect.avro.AvroConverter`) or JSON (`org.apache.kafka.connect.json.JsonConverter`). When `output.format=string`, both the key and value are strings, with the key the user name and the value the tweet text. Here the `org.apache.kafka.connect.storage.StringConverter` converter must be used. 16 | 17 | An actual `TwitterStatus` after JSON conversion, freshly grabbed from Kafka, looks like: 18 | 19 | ``` json 20 | { 21 | "id": 723416534626881536, 22 | "createdAt": "Fri Apr 22 09:41:56 CEST 2016", 23 | "favoriteCount": 0, 24 | "text": "Pipio ergo sum", 25 | "user": { 26 | "id": 4877511249, 27 | "name": "rollulus", 28 | "screenName": "rollulus" 29 | } 30 | } 31 | ``` 32 | 33 | (indeed, having the `favoriteCount` field in there was a totally arbitrary choice) 34 | 35 | Setup 36 | ===== 37 | 38 | Properties 39 | ---------- 40 | 41 | In addition to the default configuration for Kafka connectors (e.g. `name`, `connector.class`, etc.) the following options are needed for both the source and sink: 42 | 43 | | name | data type | required | default | description | 44 | |:-------------------------|:----------|:---------|:--------|:------------------------| 45 | | `twitter.consumerkey` | string | yes | | Twitter consumer key | 46 | | `twitter.consumersecret` | string | yes | | Twitter consumer secret | 47 | | `twitter.token` | string | yes | | Twitter token | 48 | | `twitter.secret` | string | yes | | Twitter secret | 49 | 50 | This is all for the sink. The *source* has the following additional properties: 51 | 52 | | name | data type | required | default | description | 53 | |:------------------|:----------|:---------|:-------------|:-------------------------------------------| 54 | | `stream.type` | string | no | filter | Type of stream ¹ | 55 | | `track.terms` | string | maybe ² | | A Twitter `track` parameter ² | 56 | | `track.locations` | string | maybe ² | | A Twitter `locations` parameter ³ | 57 | | `track.follow` | string | maybe ² | | A Twitter `follow` parameter ⁴ | 58 | | `batch.size` | int | no | 100 | Flush after this many tweets ⁶ | 59 | | `batch.timeout` | double | no | 0.1 | Flush after this many seconds ⁶ | 60 | | `language` | string | no | | List of languages to fetch ⁷ | 61 | | `output.format` | string | no | `structured` | The output format: `[structured|string]` ⁸ | 62 | 63 | ¹ Type of stream: [filter](https://dev.twitter.com/streaming/reference/post/statuses/filter), or [sample](https://dev.twitter.com/streaming/reference/get/statuses/sample). 64 | 65 | ² When the `filter` type is used, one of the parameters `track.terms`, `track.locations`, or `track.follow` should be specified. If multiple parameters are specified, they are working as OR operation. 66 | 67 | ³ Please refer to [here](https://dev.twitter.com/streaming/overview/request-parameters#track) for the format of the `track` parameter. 68 | 69 | ⁴ Please refer to [here](https://dev.twitter.com/streaming/overview/request-parameters#locations) for the format of the `locations` parameter. 70 | 71 | ⁵ Please refer to [here](https://dev.twitter.com/streaming/overview/request-parameters#follow) for the format of the `follow` parameter. 72 | 73 | ⁶ Tweets are accumulated and flushed as a batch into Kafka; when the batch is larger than `batch.size` or when the oldest tweet in it is older than `batch.timeout` [s], it is flushed. 74 | 75 | ⁷ List of languages for which tweets will be returned. Can be used with any stream type. See [here](https://dev.twitter.com/streaming/overview/request-parameters#language) for format of the `language` parameter. 76 | 77 | ⁸ The source can output in two ways: *structured*, where a `TwitterStatus` structures are output as values, or *string*, where both the key and value are strings, with the key the user name and the value the tweet text. Remember to update `key.converter` and `value.converter` appropriately: `io.confluent.connect.avro.AvroConverter` or `org.apache.kafka.connect.json.JsonConverter` for *structured*; `org.apache.kafka.connect.storage.StringConverter` for *string*. 78 | 79 | An example `twitter-source.properties`: 80 | 81 | ``` properties 82 | name=twitter-source 83 | connector.class=com.eneco.trading.kafka.connect.twitter.TwitterSourceConnector 84 | tasks.max=1 85 | topic=twitter 86 | twitter.consumerkey=(secret) 87 | twitter.consumersecret=(secret) 88 | twitter.token=(secret) 89 | twitter.secret=(secret) 90 | track.terms=test 91 | ``` 92 | 93 | And an example `twitter-sink.properties` is like: 94 | 95 | ``` properties 96 | name=twitter-sink 97 | connector.class=com.eneco.trading.kafka.connect.twitter.TwitterSinkConnector 98 | tasks.max=1 99 | topics=texts-to-tweet 100 | twitter.consumerkey=(secret) 101 | twitter.consumersecret=(secret) 102 | twitter.token=(secret) 103 | twitter.secret=(secret) 104 | ``` 105 | 106 | Creating a Twitter application 107 | ------------------------------ 108 | 109 | To obtain the required keys, visit https://apps.twitter.com/ and `Create a New App`. Fill in an application name & description & web site and accept the developer aggreement. Click on `Create my access token` and populate a file `twitter-source.properties` with consumer key & secret and the access token & token secret using the example file to begin with. 110 | 111 | Setting up the Confluent Platform 112 | --------------------------------- 113 | 114 | Follow instructions at [Confluent](http://docs.confluent.io) and install and run the `schema-registry` service, and appropriate `zookeeper` & `kafka` brokers. Once the platform is up & running, populate the file `connect-sink-standalone.properties` and / or `connect-source-standalone.properties` with the appropriate hostnames and ports. 115 | 116 | Assuming that `$CONFLUENT_HOME` refers to the root of your Confluent Platform installation: 117 | 118 | Start Zookeeper: 119 | 120 | $CONFLUENT_HOME/bin/zookeeper-server-start $CONFLUENT_HOME/etc/kafka/zookeeper.properties 121 | 122 | Start Kafka: 123 | 124 | $CONFLUENT_HOME/bin/kafka-server-start $CONFLUENT_HOME/etc/kafka/server.properties 125 | 126 | Start the Schema Registry: 127 | 128 | $CONFLUENT_HOME/bin/schema-registry-start $CONFLUENT_HOME/etc/schema-registry/schema-registry.properties 129 | 130 | Running 131 | ======= 132 | 133 | Starting kafka-connect-twitter 134 | ------------------------------ 135 | 136 | Having cloned this repository, build the latest source code with: 137 | 138 | mvn clean package 139 | 140 | Put the JAR file location into your `CLASSPATH`: 141 | 142 | export CLASSPATH=`pwd`/target/kafka-connect-twitter-0.1-jar-with-dependencies.jar 143 | 144 | ### Source, structured output mode 145 | 146 | To start a Kafka Connect source instance: 147 | 148 | $CONFLUENT_HOME/bin/connect-standalone connect-source-standalone.properties twitter-source.properties 149 | 150 | And watch Avro `TwitterStatus` tweets come in represented as JSON: 151 | 152 | $CONFLUENT_HOME/bin/kafka-avro-console-consumer --topic twitter --zookeeper localhost:2181 153 | 154 | ### Source, simple (plain strings) output mode 155 | 156 | To start a Kafka Connect source instance: 157 | 158 | $CONFLUENT_HOME/bin/connect-standalone connect-simple-source-standalone.properties twitter-simple-source.properties 159 | 160 | And watch tweets come in, with the key the user, and the value the tweet text: 161 | 162 | $CONFLUENT_HOME/bin/kafka-console-consumer --zookeeper localhost:2181 \ 163 | --topic twitter \ 164 | --formatter kafka.tools.DefaultMessageFormatter \ 165 | --property print.key=true \ 166 | --property key.deserializer=org.apache.kafka.common.serialization.StringDeserializer \ 167 | --property value.deserializer=org.apache.kafka.common.serialization.StringDeserializer 168 | 169 | ### Sink 170 | 171 | To start a Kafka Connect sink instance: 172 | 173 | $CONFLUENT_HOME/bin/connect-standalone connect-sink-standalone.properties twitter-sink.properties 174 | 175 | Fire up the console producer to feed text from the console into your topic: 176 | 177 | $CONFLUENT_HOME/bin/kafka-console-producer -broker-list localhost:9092 --topic texts-to-tweet 178 | Pipio ergo sum 179 | 180 | Todo: 181 | ----- 182 | 183 | - Add hosebird client mode to take the full fat response rather than the twitter4j subset. Needs json to Avro converter. Avro4s? 184 | - Split the track terms up and assign to workers? Limits on connections to twitter? 185 | - [ ] Extend 186 | - [ ] Test 187 | - [ ] Document 188 | 189 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.eneco.trading 8 | kafka-connect-twitter 9 | jar 10 | 0.1 11 | kafka-connect-twitter 12 | 13 | Eneco 14 | http://energytradeuk.eneco.nl 15 | 16 | http://energytradeuk.eneco.nl/ 17 | 18 | A Kafka Connect Twitter source connector for copying data between Twitter and Kafka. 19 | 20 | 21 | 22 | scm:git:git@github.com/Eneco/kafka-connect-twitter.git 23 | https://github.com/Eneco/kafka-connect-twitter 24 | scm:git:git@github.com/Eneco/kafka-connect-twitter.git 25 | 26 | 27 | 28 | 29 | 30 | rollulus 31 | Roel Reijerse 32 | Eneco 33 | 34 | 35 | andrewstevenson 36 | Andrew Stevenson 37 | Datamountaineer 38 | 39 | 40 | GodlyLudu 41 | Henry Cheung 42 | Eneco 43 | 44 | 45 | dudebowski 46 | Adriaan Mutter 47 | Eneco 48 | 49 | 50 | Chrizje 51 | Christian De Jong 52 | Eneco 53 | 54 | 55 | 56 | 57 | 1.8 58 | 2.11.7 59 | 3.0.0 60 | 0.10.0.0 61 | 2.2.0 62 | 1.7.7 63 | 1.7.13 64 | 1.3.9 65 | 1.10.19 66 | 3.0.0-M1 67 | 19.0 68 | UTF-8 69 | http://packages.confluent.io/maven/ 70 | https://repository.cloudera.com/artifactory/cloudera-repos 71 | 72 | 73 | 74 | 75 | confluent 76 | Confluent 77 | ${confluent.maven.repo} 78 | 79 | 80 | cdh.repo 81 | Cloudera Repositories 82 | ${cloudera.maven.repo} 83 | 84 | false 85 | 86 | 87 | 88 | 89 | 90 | 91 | org.scala-lang 92 | scala-library 93 | ${scala.version} 94 | 95 | 96 | org.apache.kafka 97 | connect-api 98 | ${kafka.version} 99 | provided 100 | 101 | 102 | org.scalamock 103 | scalamock-scalatest-support_2.11 104 | 3.2.2 105 | 106 | 107 | org.mockito 108 | mockito-all 109 | ${mockito.version} 110 | test 111 | 112 | 113 | com.google.code.findbugs 114 | jsr305 115 | ${jsr305.version} 116 | 117 | 118 | org.scalatest 119 | scalatest_2.11 120 | ${scalatest.version} 121 | test 122 | 123 | 124 | 125 | org.scala-lang 126 | scala-library 127 | 128 | 129 | 130 | 131 | com.twitter 132 | hbc-twitter4j 133 | ${hosebird.version} 134 | 135 | 136 | org.apache.avro 137 | avro 138 | ${avro.version} 139 | 140 | 141 | org.slf4j 142 | slf4j-simple 143 | ${slf4j.version} 144 | 145 | 146 | com.google.guava 147 | guava 148 | ${guava.version} 149 | 150 | 151 | 152 | 153 | 154 | 155 | src/main/resources 156 | true 157 | 158 | 159 | src/test/resources 160 | 161 | 162 | 163 | 164 | 165 | org.apache.maven.plugins 166 | maven-resources-plugin 167 | 2.7 168 | 169 | ${project.build.sourceEncoding} 170 | 171 | 172 | 173 | 174 | copy-resources 175 | testResources 176 | 177 | 178 | 179 | 180 | 181 | org.scalatest 182 | scalatest-maven-plugin 183 | 1.0 184 | 185 | 186 | test 187 | 188 | test 189 | 190 | 191 | 192 | 193 | 194 | net.alchim31.maven 195 | scala-maven-plugin 196 | 3.2.0 197 | 198 | incremental 199 | 200 | 201 | 202 | 203 | compile 204 | add-source 205 | testCompile 206 | 207 | 208 | 209 | 210 | 211 | maven-assembly-plugin 212 | 2.4 213 | 214 | 215 | jar-with-dependencies 216 | 217 | 218 | 219 | 220 | make-assembly 221 | package 222 | 223 | single 224 | 225 | 226 | 227 | 228 | 229 | exec-maven-plugin 230 | org.codehaus.mojo 231 | 1.5.0 232 | 233 | 234 | setup-confluent 235 | install 236 | 237 | exec 238 | 239 | 240 | ${basedir}/setup-confluent.sh 241 | 242 | ${user.home} 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------