├── streams-aggregator ├── README.md ├── src │ └── main │ │ ├── resources │ │ └── log4j2.xml │ │ └── java │ │ └── io │ │ └── jeffchao │ │ └── streams │ │ └── aggregator │ │ ├── Aggregator.java │ │ ├── sinks │ │ └── PostgresSink.java │ │ └── AggregatorConfig.java └── build.gradle ├── streams-text-processor ├── README.md ├── src │ ├── test │ │ └── java │ │ │ └── io │ │ │ └── jeffchao │ │ │ └── streams │ │ │ └── textprocessor │ │ │ └── TextProcessorTest.java │ └── main │ │ ├── resources │ │ └── log4j2.xml │ │ └── java │ │ └── io │ │ └── jeffchao │ │ └── streams │ │ └── textprocessor │ │ ├── TextProcessor.java │ │ └── TextProcessorConfig.java └── build.gradle ├── streams-anomaly-detector ├── README.md ├── src │ └── main │ │ ├── resources │ │ └── log4j2.xml │ │ └── java │ │ └── io │ │ └── jeffchao │ │ └── streams │ │ └── anomalydetector │ │ ├── sinks │ │ ├── AlertSink.java │ │ └── EmailSink.java │ │ ├── AnomalyDetector.java │ │ └── AnomalyDetectorConfig.java └── build.gradle ├── Gemfile ├── gradle ├── heroku │ ├── clean.gradle │ └── stage.gradle ├── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties └── check.gradle ├── settings.gradle ├── Procfile ├── Gemfile.lock ├── data-generators ├── text-generator │ ├── stream-lines-to-kafka.rb │ └── alice-in-wonderland.info ├── stream-to-kafka.rb └── log-generator │ └── stream-logs-to-kafka.rb ├── LICENSE ├── .gitignore ├── setup ├── gradlew.bat ├── README.md ├── gradlew └── codequality └── checkstyle.xml /streams-aggregator/README.md: -------------------------------------------------------------------------------- 1 | # streams-aggregator -------------------------------------------------------------------------------- /streams-text-processor/README.md: -------------------------------------------------------------------------------- 1 | # streams-text-processor -------------------------------------------------------------------------------- /streams-anomaly-detector/README.md: -------------------------------------------------------------------------------- 1 | # streams-anomaly-detector -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem 'ruby-kafka' 4 | gem 'faker' 5 | -------------------------------------------------------------------------------- /gradle/heroku/clean.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'base' 2 | 3 | clean.doLast { 4 | delete rootProject.buildDir 5 | } -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyric/kafka-streams-on-heroku/master/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'kafka-streams-on-heroku' 2 | 3 | include 'streams-text-processor' 4 | include 'streams-anomaly-detector' 5 | include 'streams-aggregator' 6 | -------------------------------------------------------------------------------- /streams-text-processor/src/test/java/io/jeffchao/streams/textprocessor/TextProcessorTest.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.textprocessor; 2 | 3 | 4 | public class TextProcessorTest { 5 | 6 | } -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | text_processor_worker: java -jar build/libs/streams-text-processor-all.jar 2 | anomaly_detector_worker: java -jar build/libs/streams-anomaly-detector-all.jar 3 | aggregator_worker: java -jar build/libs/streams-aggregator-all.jar 4 | -------------------------------------------------------------------------------- /gradle/heroku/stage.gradle: -------------------------------------------------------------------------------- 1 | task stage(dependsOn: ['clean', 'shadowJar']) 2 | 3 | task copyToLib(type: Copy) { 4 | from "$buildDir/libs" 5 | into "$rootProject.buildDir/libs" 6 | } 7 | copyToLib.dependsOn(shadowJar) 8 | stage.dependsOn(copyToLib) -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Wed Nov 01 15:30:19 PDT 2017 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.3-all.zip 7 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | concurrent-ruby (1.0.5) 5 | faker (1.8.5) 6 | i18n (~> 0.9.1) 7 | i18n (0.9.1) 8 | concurrent-ruby (~> 1.0) 9 | ruby-kafka (0.5.1) 10 | 11 | PLATFORMS 12 | ruby 13 | 14 | DEPENDENCIES 15 | faker 16 | ruby-kafka 17 | 18 | BUNDLED WITH 19 | 1.16.0 20 | -------------------------------------------------------------------------------- /gradle/check.gradle: -------------------------------------------------------------------------------- 1 | subprojects { 2 | apply plugin: 'checkstyle' 3 | checkstyle { 4 | ignoreFailures = true 5 | configFile = rootProject.file('codequality/checkstyle.xml') 6 | toolVersion = '8.4' 7 | } 8 | 9 | apply plugin: 'findbugs' 10 | findbugs { 11 | ignoreFailures = true 12 | } 13 | 14 | apply plugin: 'pmd' 15 | } 16 | -------------------------------------------------------------------------------- /streams-text-processor/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'application' 2 | 3 | mainClassName = 'io.jeffchao.streams.textprocessor.TextProcessor' 4 | 5 | dependencies { 6 | compile 'org.apache.kafka:kafka-streams:1.0.0' 7 | compile 'com.heroku.sdk:env-keystore:1.0.0' 8 | } 9 | 10 | run.doFirst { 11 | environment "ADDON_SUFFIX", "" 12 | environment "HEROKU_KAFKA_URL", "kafka://localhost:9092" 13 | } -------------------------------------------------------------------------------- /data-generators/text-generator/stream-lines-to-kafka.rb: -------------------------------------------------------------------------------- 1 | require_relative '../stream-to-kafka' 2 | 3 | initialize_kafka 4 | 5 | puts "Reading text file." 6 | count = 0 7 | ARGF.each_line do |line| 8 | line.strip! 9 | unless line.empty? 10 | produce(line, to: 'textlines') 11 | count += 1 12 | end 13 | print '.' if (ARGF.lineno % 100) == 0 14 | end 15 | 16 | puts "\nRead #{count} (non-blank) lines." 17 | -------------------------------------------------------------------------------- /streams-aggregator/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /streams-anomaly-detector/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /streams-text-processor/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /streams-anomaly-detector/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'application' 2 | 3 | mainClassName = 'io.jeffchao.streams.anomalydetector.AnomalyDetector' 4 | 5 | dependencies { 6 | compile 'org.apache.kafka:kafka-streams:1.0.0' 7 | compile 'com.heroku.sdk:env-keystore:1.0.0' 8 | compile 'com.sendgrid:sendgrid-java:3.0.9' 9 | } 10 | 11 | run.doFirst { 12 | environment "ADDON_SUFFIX", "" 13 | environment "HEROKU_KAFKA_URL", "kafka://localhost:9092" 14 | } 15 | -------------------------------------------------------------------------------- /streams-aggregator/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'application' 2 | 3 | mainClassName = 'io.jeffchao.streams.aggregator.Aggregator' 4 | 5 | dependencies { 6 | compile 'org.apache.kafka:kafka-streams:1.0.0' 7 | compile 'com.heroku.sdk:env-keystore:1.0.0' 8 | compile 'org.postgresql:postgresql:42.1.4.jre7' 9 | } 10 | 11 | run.doFirst { 12 | environment "ADDON_SUFFIX", "" 13 | environment "HEROKU_KAFKA_URL", "kafka://localhost:9092" 14 | environment "HEROKU_POSTGRESQL_URL", "postgres://localhost:5432/kafka_streams_dev" 15 | } -------------------------------------------------------------------------------- /streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/sinks/AlertSink.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.anomalydetector.sinks; 2 | 3 | import org.apache.kafka.streams.kstream.Windowed; 4 | import org.apache.kafka.streams.processor.Processor; 5 | import org.apache.kafka.streams.processor.ProcessorContext; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | 10 | public class AlertSink implements Processor, Long> { 11 | 12 | private static final Logger log = LoggerFactory.getLogger(AlertSink.class); 13 | 14 | @Override 15 | public void init(ProcessorContext context) { 16 | } 17 | 18 | @Override 19 | public void process(Windowed key, Long value) { 20 | log.info("Too many login failures for {}, count: {}. Alerting to PagerDuty.", 21 | key.key(), value); 22 | } 23 | 24 | @Override 25 | public void punctuate(long timestamp) { 26 | } 27 | 28 | @Override 29 | public void close() { 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jeff Chao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data-generators/stream-to-kafka.rb: -------------------------------------------------------------------------------- 1 | require 'kafka' 2 | require 'tempfile' 3 | 4 | 5 | def initialize_kafka 6 | tmp_ca_file = Tempfile.new('ca_certs') 7 | tmp_ca_file.write(ENV.fetch('HEROKU_KAFKA_TRUSTED_CERT')) 8 | tmp_ca_file.close 9 | 10 | producer_kafka = Kafka.new( 11 | seed_brokers: ENV.fetch('HEROKU_KAFKA_URL'), 12 | ssl_ca_cert_file_path: tmp_ca_file.path, 13 | ssl_client_cert: ENV.fetch('HEROKU_KAFKA_CLIENT_CERT'), 14 | ssl_client_cert_key: ENV.fetch('HEROKU_KAFKA_CLIENT_CERT_KEY') 15 | ) 16 | $producer = producer_kafka.async_producer( 17 | delivery_interval: 1, 18 | max_buffer_size: 10_000, 19 | max_buffer_bytesize: 100_000_000, 20 | required_acks: :all 21 | ) 22 | puts "Producer connected to Kafka." 23 | 24 | at_exit do 25 | $producer.shutdown 26 | tmp_ca_file.unlink 27 | puts "Producer shutdown." 28 | end 29 | end 30 | 31 | def kafka_topic(topic_name) 32 | kafka_topic = ENV.fetch('HEROKU_KAFKA_TOPIC', topic_name) 33 | if ENV['HEROKU_KAFKA_PREFIX'] 34 | kafka_topic = ENV['HEROKU_KAFKA_PREFIX'] + kafka_topic 35 | end 36 | 37 | kafka_topic 38 | end 39 | 40 | def produce(message, to: 'textlines') 41 | $producer.produce(message, topic: kafka_topic(to)) 42 | end 43 | -------------------------------------------------------------------------------- /data-generators/log-generator/stream-logs-to-kafka.rb: -------------------------------------------------------------------------------- 1 | require_relative '../stream-to-kafka' 2 | require 'faker' 3 | 4 | # spec: user_id | timestamp | ip_address | action | message 5 | def log_line(action: 'login succeeded') 6 | user_id = Faker::Internet.user_name 7 | timestamp = Time.now 8 | ip_address = Faker::Internet.ip_v4_address 9 | message = Faker::Lorem.words(3,0).join(' ') 10 | 11 | "user_id: #{user_id} | timestamp: #{timestamp} | "\ 12 | "ip_address: #{ip_address} | action: #{action} | message: #{message}" 13 | end 14 | 15 | def log_line_with_anomaly 16 | log_line(action: 'login failed') 17 | end 18 | 19 | initialize_kafka 20 | 21 | messages_per_second = ARGV[0] 22 | probability_of_anomality = ARGV[1].to_f * 100 23 | 24 | puts "Generating #{messages_per_second} log lines per second "\ 25 | "with #{probability_of_anomality}% chance of anomaly. "\ 26 | "CTRL-C to stop." 27 | 28 | count = 0 29 | 30 | trap "SIGINT" do 31 | puts "\nGenerated #{count} log lines. Stopping." 32 | exit 130 33 | end 34 | 35 | while true 36 | if rand(100) <= probability_of_anomality 37 | produce(log_line_with_anomaly, to: 'loglines') 38 | else 39 | produce(log_line, to: 'loglines') 40 | end 41 | count += 1 42 | print "#{count}..." if (count % 10) == 0 43 | sleep (1 / messages_per_second.to_f) 44 | end 45 | -------------------------------------------------------------------------------- /streams-aggregator/src/main/java/io/jeffchao/streams/aggregator/Aggregator.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.aggregator; 2 | 3 | import java.io.IOException; 4 | import java.net.URISyntaxException; 5 | import java.security.KeyStoreException; 6 | import java.security.NoSuchAlgorithmException; 7 | import java.security.cert.CertificateException; 8 | import java.util.Optional; 9 | import java.util.Properties; 10 | import java.util.concurrent.TimeUnit; 11 | 12 | import io.jeffchao.streams.aggregator.sinks.PostgresSink; 13 | import org.apache.kafka.streams.KafkaStreams; 14 | import org.apache.kafka.streams.StreamsBuilder; 15 | import org.apache.kafka.streams.kstream.KStream; 16 | import org.apache.kafka.streams.kstream.Materialized; 17 | import org.apache.kafka.streams.kstream.TimeWindows; 18 | import org.apache.kafka.streams.kstream.Windowed; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | 23 | public class Aggregator { 24 | 25 | private static final Logger log = LoggerFactory.getLogger(Aggregator.class); 26 | 27 | private static final String ADDON_SUFFIX = Optional.ofNullable( 28 | System.getenv("ADDON_SUFFIX")).orElse(""); 29 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX); 30 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable( 31 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse(""); 32 | 33 | public static void main(String[] args) throws CertificateException, NoSuchAlgorithmException, 34 | KeyStoreException, IOException, URISyntaxException { 35 | Properties streamsConfig = new AggregatorConfig().getProperties(); 36 | 37 | final StreamsBuilder builder = new StreamsBuilder(); 38 | 39 | final KStream, String> words = 40 | builder.stream(String.format("%swords", HEROKU_KAFKA_PREFIX)); 41 | 42 | words 43 | .groupBy((key, word) -> word) 44 | .windowedBy(TimeWindows.of(TimeUnit.SECONDS.toMillis(10))) 45 | .count(Materialized.as("windowed-counts")) 46 | .toStream() 47 | .process(PostgresSink::new); 48 | 49 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfig); 50 | 51 | streams.cleanUp(); 52 | streams.start(); 53 | 54 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); 55 | } 56 | 57 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Intellij+iml ### 2 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 3 | 4 | # User-specific stuff: 5 | .idea/**/workspace.xml 6 | .idea/**/tasks.xml 7 | .idea/dictionaries 8 | 9 | # Sensitive or high-churn files: 10 | .idea/**/dataSources/ 11 | .idea/**/dataSources.ids 12 | .idea/**/dataSources.xml 13 | .idea/**/dataSources.local.xml 14 | .idea/**/sqlDataSources.xml 15 | .idea/**/dynamic.xml 16 | .idea/**/uiDesigner.xml 17 | 18 | # Gradle: 19 | .idea/**/gradle.xml 20 | .idea/**/libraries 21 | 22 | ## File-based project format: 23 | *.iws 24 | 25 | ## Plugin-specific files: 26 | 27 | # IntelliJ 28 | /out/ 29 | 30 | # mpeltonen/sbt-idea plugin 31 | .idea_modules/ 32 | 33 | # Crashlytics plugin (for Android Studio and IntelliJ) 34 | com_crashlytics_export_strings.xml 35 | crashlytics.properties 36 | crashlytics-build.properties 37 | fabric.properties 38 | 39 | ### Intellij+iml Patch ### 40 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 41 | 42 | .idea/ 43 | 44 | *.iml 45 | modules.xml 46 | .idea/misc.xml 47 | *.ipr 48 | 49 | ### Java ### 50 | # Compiled class file 51 | *.class 52 | 53 | # Log file 54 | *.log 55 | 56 | # Package Files # 57 | *.jar 58 | *.war 59 | *.ear 60 | *.zip 61 | *.tar.gz 62 | *.rar 63 | 64 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 65 | hs_err_pid* 66 | 67 | ### macOS ### 68 | *.DS_Store 69 | .AppleDouble 70 | .LSOverride 71 | 72 | # Icon must end with two \r 73 | Icon 74 | 75 | # Thumbnails 76 | ._* 77 | 78 | # Files that might appear in the root of a volume 79 | .DocumentRevisions-V100 80 | .fseventsd 81 | .Spotlight-V100 82 | .TemporaryItems 83 | .Trashes 84 | .VolumeIcon.icns 85 | .com.apple.timemachine.donotpresent 86 | 87 | # Directories potentially created on remote AFP share 88 | .AppleDB 89 | .AppleDesktop 90 | Network Trash Folder 91 | Temporary Items 92 | .apdisk 93 | 94 | ### Gradle ### 95 | .gradle 96 | **/build/ 97 | 98 | # Ignore Gradle GUI config 99 | gradle-app.setting 100 | 101 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 102 | !gradle-wrapper.jar 103 | 104 | # Cache of project 105 | .gradletasknamecache 106 | 107 | ### Ruby ### 108 | .bundle 109 | vendor 110 | -------------------------------------------------------------------------------- /streams-text-processor/src/main/java/io/jeffchao/streams/textprocessor/TextProcessor.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.textprocessor; 2 | 3 | import java.io.IOException; 4 | import java.net.URISyntaxException; 5 | import java.security.KeyStoreException; 6 | import java.security.NoSuchAlgorithmException; 7 | import java.security.cert.CertificateException; 8 | import java.util.Arrays; 9 | import java.util.Optional; 10 | import java.util.Properties; 11 | import java.util.regex.Pattern; 12 | 13 | import org.apache.kafka.common.serialization.Serde; 14 | import org.apache.kafka.common.serialization.Serdes; 15 | import org.apache.kafka.streams.KafkaStreams; 16 | import org.apache.kafka.streams.StreamsBuilder; 17 | import org.apache.kafka.streams.kstream.KStream; 18 | import org.apache.kafka.streams.kstream.Produced; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | 23 | public class TextProcessor { 24 | 25 | private static final Logger log = LoggerFactory.getLogger(TextProcessor.class); 26 | 27 | private static final String ADDON_SUFFIX = Optional.ofNullable( 28 | System.getenv("ADDON_SUFFIX")).orElse(""); 29 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX); 30 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable( 31 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse(""); 32 | 33 | public static void main(String[] args) throws CertificateException, NoSuchAlgorithmException, 34 | KeyStoreException, IOException, URISyntaxException { 35 | Properties streamsConfig = new TextProcessorConfig().getProperties(); 36 | 37 | final Serde stringSerde = Serdes.String(); 38 | 39 | final StreamsBuilder builder = new StreamsBuilder(); 40 | 41 | final KStream textLines = 42 | builder.stream(String.format("%stextlines", HEROKU_KAFKA_PREFIX)); 43 | 44 | final Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); 45 | 46 | textLines 47 | .flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))) 48 | .to(String.format("%swords", HEROKU_KAFKA_PREFIX), Produced.with(stringSerde, stringSerde)); 49 | 50 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfig); 51 | 52 | streams.cleanUp(); 53 | streams.start(); 54 | 55 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); 56 | } 57 | 58 | } -------------------------------------------------------------------------------- /setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | setup_dedicated () { 6 | heroku addons:create heroku-postgresql:hobby-dev --as HEROKU_POSTGRESQL -a ${APP_NAME} && \ 7 | heroku addons:create heroku-kafka:${PLAN} --as HEROKU_KAFKA -a ${APP_NAME} && \ 8 | heroku kafka:wait -a ${APP_NAME} && \ 9 | heroku kafka:topics:create textlines --partitions 5 -a ${APP_NAME} && \ 10 | heroku kafka:topics:create words --partitions 5 -a ${APP_NAME} && \ 11 | heroku kafka:topics:create loglines --partitions 5 -a ${APP_NAME} 12 | } 13 | 14 | setup_multi_tenant () { 15 | heroku addons:create heroku-postgresql:hobby-dev --as HEROKU_POSTGRESQL -a ${APP_NAME} && \ 16 | heroku addons:create heroku-kafka:${PLAN} --as HEROKU_KAFKA -a ${APP_NAME} && \ 17 | heroku kafka:wait -a ${APP_NAME} && \ 18 | heroku kafka:topics:create textlines --partitions 5 -a ${APP_NAME} && \ 19 | heroku kafka:topics:create words --partitions 5 -a ${APP_NAME} && \ 20 | heroku kafka:topics:create loglines --partitions 5 -a ${APP_NAME} && \ 21 | heroku kafka:topics:create aggregator-app-windowed-counts-changelog --partitions 5 -a ${APP_NAME} && \ 22 | heroku kafka:topics:create aggregator-app-windowed-counts-repartition --partitions 5 -a ${APP_NAME} && \ 23 | heroku kafka:topics:create anomaly-detector-app-windowed-counts-changelog --partitions 5 -a ${APP_NAME} && \ 24 | heroku kafka:topics:create anomaly-detector-app-windowed-counts-repartition --partitions 5 -a ${APP_NAME} && \ 25 | heroku kafka:consumer-groups:create anomaly-detector-app -a ${APP_NAME} && \ 26 | heroku kafka:consumer-groups:create text-processor-app -a ${APP_NAME} && \ 27 | heroku kafka:consumer-groups:create aggregator-app -a ${APP_NAME} 28 | } 29 | 30 | if [[ -z $1 ]]; then 31 | echo "usage: $0 APP_NAME PLAN" >&2 32 | exit 1 33 | fi 34 | 35 | APP_NAME=$1 36 | 37 | if [[ $2 =~ (standard|basic|extended|private-standard|private-extended)-[012] ]]; then 38 | PLAN=$2 39 | else 40 | PLAN="basic-0" 41 | fi 42 | 43 | if [[ $PLAN =~ basic-[012] ]]; then 44 | setup_multi_tenant 45 | else 46 | setup_dedicated 47 | fi 48 | 49 | heroku pg:psql -c 'create table windowed_counts(id serial primary key not null, time_window bigint not null, word text, count bigint not null);' HEROKU_POSTGRESQL_URL -a ${APP_NAME} 50 | 51 | heroku ps:scale text_processor_worker=1 -a ${APP_NAME} 52 | heroku ps:scale aggregator_worker=1 -a ${APP_NAME} 53 | # heroku ps:scale anomaly_detector_worker=1 -a ${APP_NAME} # Can't run more than 2 free dynos. 54 | -------------------------------------------------------------------------------- /streams-aggregator/src/main/java/io/jeffchao/streams/aggregator/sinks/PostgresSink.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.aggregator.sinks; 2 | 3 | import java.net.URI; 4 | import java.net.URISyntaxException; 5 | import java.sql.Connection; 6 | import java.sql.DriverManager; 7 | import java.sql.PreparedStatement; 8 | import java.sql.SQLException; 9 | import java.util.Optional; 10 | 11 | import org.apache.kafka.streams.kstream.Windowed; 12 | import org.apache.kafka.streams.processor.Processor; 13 | import org.apache.kafka.streams.processor.ProcessorContext; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | 17 | 18 | public class PostgresSink implements Processor, Long> { 19 | 20 | private static final Logger log = LoggerFactory.getLogger(PostgresSink.class); 21 | 22 | private Connection connection; 23 | 24 | private static Connection getConnection() throws URISyntaxException, SQLException { 25 | URI dbUri = new URI(System.getenv("HEROKU_POSTGRESQL_URL")); 26 | 27 | String[] userInfo = Optional.ofNullable(dbUri.getUserInfo()).orElse(":").split(":"); 28 | String username = userInfo.length == 0 ? null : userInfo[0]; 29 | String password = userInfo.length == 0 ? null : userInfo[1]; 30 | String dbUrl = "jdbc:postgresql://" + dbUri.getHost() + ':' + dbUri.getPort() + dbUri.getPath(); 31 | 32 | return DriverManager.getConnection(dbUrl, username, password); 33 | } 34 | 35 | @Override 36 | public void init(ProcessorContext context) { 37 | try { 38 | connection = getConnection(); 39 | } catch (URISyntaxException | SQLException e) { 40 | e.printStackTrace(); 41 | } 42 | } 43 | 44 | @Override 45 | public void process(Windowed key, Long value) { 46 | log.info("writing to pg: window: {}, key: {}, value: {}", key.window(), key.key(), value); 47 | try { 48 | PreparedStatement statement = connection.prepareStatement( 49 | "INSERT INTO windowed_counts (time_window, word, count) VALUES (?, ?, ?)"); 50 | statement.setLong(1, key.window().start()); 51 | statement.setString(2, key.key()); 52 | statement.setLong(3, value); 53 | 54 | statement.execute(); 55 | } catch (SQLException e) { 56 | log.error(e.getMessage(), e); 57 | } 58 | 59 | } 60 | 61 | @Override 62 | public void punctuate(long timestamp) { 63 | } 64 | 65 | @Override 66 | public void close() { 67 | try { 68 | connection.close(); 69 | } catch (SQLException e) { 70 | e.printStackTrace(); 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/sinks/EmailSink.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.anomalydetector.sinks; 2 | 3 | import java.io.IOException; 4 | 5 | import com.google.common.base.Strings; 6 | import com.sendgrid.Content; 7 | import com.sendgrid.Email; 8 | import com.sendgrid.Mail; 9 | import com.sendgrid.Method; 10 | import com.sendgrid.Request; 11 | import com.sendgrid.SendGrid; 12 | import org.apache.kafka.streams.kstream.Windowed; 13 | import org.apache.kafka.streams.processor.Processor; 14 | import org.apache.kafka.streams.processor.ProcessorContext; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | 19 | public class EmailSink implements Processor, Long> { 20 | 21 | private static final Logger log = LoggerFactory.getLogger(EmailSink.class); 22 | 23 | @Override 24 | public void init(ProcessorContext context) { 25 | } 26 | 27 | private Content generateContent(Windowed key, Long value) { 28 | return new Content("text/plain", "Hello, our realtime anomaly detector " 29 | + "has detected an issue for " + key.key() + " with " 30 | + value + " failed login attempts"); 31 | } 32 | 33 | private void sendEmail(Windowed key, Long value) throws IOException { 34 | Email from = new Email("example@example.com"); 35 | String subject = String.format("Anomaly detected for %s", value); 36 | Email to = new Email(System.getenv("TESTING_EMAIL")); 37 | Content content = generateContent(key, value); 38 | Mail mail = new Mail(from, subject, to, content); 39 | 40 | SendGrid sendGrid = new SendGrid(System.getenv("SENDGRID_API_KEY")); 41 | Request request = new Request(); 42 | request.method = Method.POST; 43 | request.endpoint = "mail/send"; 44 | request.body = mail.build(); 45 | sendGrid.api(request); 46 | } 47 | 48 | @Override 49 | public void process(Windowed key, Long value) { 50 | if (Strings.isNullOrEmpty(System.getenv("SENDGRID_API_KEY"))) { 51 | log.info(generateContent(key, value).getValue()); 52 | } else { 53 | try { 54 | log.info("Sending email to {} with content {}", 55 | System.getenv("TESTING_EMAIL"), 56 | generateContent(key, value).toString()); 57 | sendEmail(key, value); 58 | } catch (IOException e) { 59 | log.error(e.getMessage(), e); 60 | } 61 | } 62 | } 63 | 64 | @Override 65 | public void punctuate(long timestamp) { 66 | } 67 | 68 | @Override 69 | public void close() { 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/AnomalyDetector.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.anomalydetector; 2 | 3 | import java.io.IOException; 4 | import java.net.URISyntaxException; 5 | import java.security.KeyStoreException; 6 | import java.security.NoSuchAlgorithmException; 7 | import java.security.cert.CertificateException; 8 | import java.util.Optional; 9 | import java.util.Properties; 10 | import java.util.concurrent.TimeUnit; 11 | 12 | import io.jeffchao.streams.anomalydetector.sinks.AlertSink; 13 | import io.jeffchao.streams.anomalydetector.sinks.EmailSink; 14 | import org.apache.kafka.streams.KafkaStreams; 15 | import org.apache.kafka.streams.StreamsBuilder; 16 | import org.apache.kafka.streams.kstream.KStream; 17 | import org.apache.kafka.streams.kstream.Materialized; 18 | import org.apache.kafka.streams.kstream.TimeWindows; 19 | import org.apache.kafka.streams.kstream.Windowed; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | 24 | public class AnomalyDetector { 25 | 26 | private static final Logger log = LoggerFactory.getLogger(AnomalyDetector.class); 27 | 28 | private static final String ADDON_SUFFIX = Optional.ofNullable( 29 | System.getenv("ADDON_SUFFIX")).orElse(""); 30 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX); 31 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable( 32 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse(""); 33 | 34 | public static void main(String[] args) throws CertificateException, NoSuchAlgorithmException, 35 | KeyStoreException, IOException, URISyntaxException { 36 | Properties streamsConfig = new AnomalyDetectorConfig().getProperties(); 37 | 38 | final StreamsBuilder builder = new StreamsBuilder(); 39 | 40 | final KStream loglines = 41 | builder.stream( String.format("%sloglines", HEROKU_KAFKA_PREFIX)); 42 | 43 | KStream, Long> anomalies = loglines 44 | .filter((key, value) -> value.contains("login failed")) 45 | .selectKey((key, value) -> value.split("\\|")[0]) 46 | .groupByKey() 47 | .windowedBy(TimeWindows.of(TimeUnit.SECONDS.toMillis(10))) 48 | .count(Materialized.as("windowed-counts")) 49 | .toStream(); 50 | 51 | @SuppressWarnings("unchecked") 52 | KStream, Long>[] branches = anomalies 53 | .branch( 54 | (key, value) -> value > 1, 55 | (key, value) -> value > 0 56 | ); 57 | 58 | branches[0].process(AlertSink::new); 59 | branches[1].process(EmailSink::new); 60 | 61 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfig); 62 | 63 | streams.cleanUp(); 64 | streams.start(); 65 | 66 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); 67 | } 68 | 69 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kafka-streams-on-heroku 2 | 3 | Kafka Streams example on Heroku with a multi-project gradle build 4 | 5 | ## Dependencies 6 | 7 | 1. Postgres 8 | 2. Kafka (+ Zookeeper) 0.10+ (this uses 0.11 brokers against 1.0 client) 9 | 3. Java 8 10 | 4. Gradle 4.3 (use sdkman) 11 | 12 | ## Local Development 13 | 14 | ### Building 15 | 16 | ``` 17 | $ ./gradlew clean build 18 | ``` 19 | 20 | ### Testing 21 | 22 | ``` 23 | $ ./gradlew clean test 24 | ``` 25 | 26 | ### Building FatJar Artifacts 27 | 28 | ``` 29 | $ ./gradlew clean stage 30 | ``` 31 | 32 | ### Running Locally 33 | 34 | Topologies are organized as subprojects. You can run any or all of them 35 | 36 | ``` 37 | (start postgres - optional, zookeeper - required, kafka - required) 38 | $ ./gradlew streams-text-processor:run 39 | $ ./gradlew streams-aggregator:run 40 | $ ./gradlew streams-anomaly-checker:run 41 | 42 | ``` 43 | 44 | ## Deployment: Heroku 45 | 46 | ### Dependencies 47 | 48 | 1. Postgres 49 | 2. Kafka 50 | 3. Heroku CLI 51 | 4. Heroku Kafka CLI Plugin 52 | 53 | ### Config Vars 54 | 55 | 1. `SENDGRID_API_KEY` (optional via SendGrid addon) 56 | 2. `TESTING_EMAIL` (optional for sinking to a test email using SendGrid addon) 57 | 58 | 59 | ### Setup 60 | 61 | Install the Heroku CLI: https://devcenter.heroku.com/articles/heroku-cli 62 | 63 | 64 | Install the Heroku Kafka CLI Plugin: 65 | 66 | ``` 67 | heroku plugins:install heroku-kafka 68 | ``` 69 | 70 | Clone the application: 71 | 72 | ``` 73 | $ git clone git@github.com:kissaten/kafka-streams-on-heroku.git 74 | ``` 75 | 76 | Create the application: 77 | 78 | ``` 79 | $ cd kafka-streams-on-heroku 80 | $ heroku apps:create 81 | $ heroku buildpacks:add heroku/ruby 82 | $ heroku buildpacks:add heroku/gradle 83 | ``` 84 | 85 | Deploy the application: 86 | 87 | ``` 88 | $ git push heroku master 89 | ``` 90 | 91 | Run the setup script: 92 | 93 | ``` 94 | $ ./setup 95 | ``` 96 | 97 | ### Smoke Testing 98 | 99 | ``` 100 | $ heroku kafka:topics:write [prefix]textlines "hello world" -a 101 | $ heroku pg:psql -c 'select * from windowed_counts' HEROKU_POSTGRESQL_URL -a 102 | ``` 103 | 104 | ### Example Use Cases 105 | 106 | Now let's use Kafka Streams with some example use cases. The `data-generators` directory contains some simple Ruby scripts to generate streams of data. Instructions on how to use them are below. 107 | 108 | #### Word Count 109 | 110 | First we'll do word count over a large stream of text. This will produce into Kafka lines from _Alice's Adventures in Wonderland_. 111 | 112 | ```bash 113 | $ heroku run ruby data-generators/text-generator/stream-lines-to-kafka.rb data-generators/text-generator/alice-in-wonderland.txt --app sushi 114 | ``` 115 | 116 | Alternatively, if you have Ruby and Bundler installed locally, you can run the data generator locally 117 | 118 | ```bash 119 | $ bundle install --path=vendor/gems 120 | $ cd data-generators/text-generator 121 | $ HEROKU_KAFKA_URL=$(heroku config:get HEROKU_KAFKA_URL) \ 122 | HEROKU_KAFKA_CLIENT_CERT=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT) \ 123 | HEROKU_KAFKA_CLIENT_CERT_KEY=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT_KEY) \ 124 | HEROKU_KAFKA_TRUSTED_CERT=$(heroku config:get HEROKU_KAFKA_TRUSTED_CERT) \ 125 | HEROKU_KAFKA_PREFIX=$(heroku config:get HEROKU_KAFKA_PREFIX) \ 126 | bundle exec ruby stream-lines-to-kafka.rb alice-in-wonderland.txt 127 | ``` 128 | 129 | Now we can see the word count for specific time windows: 130 | 131 | ```bash 132 | $ heroku pg:psql -c 'select * from windowed_counts order by time_window desc' HEROKU_POSTGRESQL_URL 133 | ``` 134 | 135 | #### Anomaly Detection 136 | 137 | Let's look at a more interesting use case -- not only because it is more realistic but also because it better showcases continuously updating caluculations based on a stream of data. You'll need two separate terminal windows for this. 138 | 139 | In the first one, tail the Heroku application logs 140 | ``` 141 | $ heroku logs --tail --app sushi 142 | ``` 143 | 144 | In the second one, we'll generate some data. This will produce into Kafka fake log data at a rate of 10 messages per second with a 20% chance of anomaly. 145 | 146 | ```bash 147 | $ heroku run ruby data-generators/log-generator/stream-logs-to-kafka.rb 10 .2 --app sushi 148 | ``` 149 | 150 | Alternatively, if you have Ruby and Bundler installed locally, you can run the data generator locally 151 | 152 | ```bash 153 | $ bundle install --path=vendor/gems 154 | $ cd data-generators/log-generator 155 | $ HEROKU_KAFKA_URL=$(heroku config:get HEROKU_KAFKA_URL) \ 156 | HEROKU_KAFKA_CLIENT_CERT=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT) \ 157 | HEROKU_KAFKA_CLIENT_CERT_KEY=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT_KEY) \ 158 | HEROKU_KAFKA_TRUSTED_CERT=$(heroku config:get HEROKU_KAFKA_TRUSTED_CERT) \ 159 | HEROKU_KAFKA_PREFIX=$(heroku config:get HEROKU_KAFKA_PREFIX) \ 160 | bundle exec ruby stream-logs-to-kafka.rb 10 .2 161 | ``` 162 | 163 | Looking at the Heroku applications logs, you will see STDOUT output showing an anomaly has been detected. 164 | -------------------------------------------------------------------------------- /streams-aggregator/src/main/java/io/jeffchao/streams/aggregator/AggregatorConfig.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.aggregator; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.URI; 6 | import java.net.URISyntaxException; 7 | import java.security.KeyStoreException; 8 | import java.security.NoSuchAlgorithmException; 9 | import java.security.cert.CertificateException; 10 | import java.util.List; 11 | import java.util.Optional; 12 | import java.util.Properties; 13 | 14 | import com.google.common.base.Joiner; 15 | import com.google.common.base.Preconditions; 16 | import com.google.common.base.Splitter; 17 | import com.google.common.collect.Lists; 18 | import com.heroku.sdk.EnvKeyStore; 19 | import org.apache.kafka.clients.CommonClientConfigs; 20 | import org.apache.kafka.common.config.SslConfigs; 21 | import org.apache.kafka.common.serialization.Serdes; 22 | import org.apache.kafka.streams.StreamsConfig; 23 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | 28 | class AggregatorConfig extends Properties { 29 | 30 | private static final Logger log = LoggerFactory.getLogger(AggregatorConfig.class); 31 | 32 | private static final String ADDON_SUFFIX = Optional.ofNullable( 33 | System.getenv("ADDON_SUFFIX")).orElse(""); 34 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX); 35 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable( 36 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse(""); 37 | private static final String HEROKU_KAFKA_URL = String.format("%s_URL", HEROKU_KAFKA); 38 | private static final String HEROKU_KAFKA_TRUSTED_CERT = 39 | String.format("%s_TRUSTED_CERT", HEROKU_KAFKA); 40 | private static final String HEROKU_KAFKA_CLIENT_CERT_KEY = 41 | String.format("%s_CLIENT_CERT_KEY", HEROKU_KAFKA); 42 | private static final String HEROKU_KAFKA_CLIENT_CERT = 43 | String.format("%s_CLIENT_CERT", HEROKU_KAFKA); 44 | 45 | private String bootstrapServers; 46 | 47 | Properties getProperties() throws URISyntaxException, CertificateException, 48 | NoSuchAlgorithmException, KeyStoreException, IOException { 49 | return buildDefaults(); 50 | } 51 | 52 | private Properties buildDefaults() throws CertificateException, NoSuchAlgorithmException, 53 | KeyStoreException, IOException, URISyntaxException { 54 | Properties defaultProperties = new Properties(); 55 | Properties herokuKafkaConfigVarProperties = buildHerokuKafkaConfigVars(); 56 | Properties kafkaStreamsProperties = buildKafkaStreamsDefaults(); 57 | 58 | defaultProperties.putAll(herokuKafkaConfigVarProperties); 59 | defaultProperties.putAll(kafkaStreamsProperties); 60 | 61 | 62 | return defaultProperties; 63 | } 64 | 65 | private Properties buildHerokuKafkaConfigVars() throws URISyntaxException, CertificateException, 66 | NoSuchAlgorithmException, KeyStoreException, IOException { 67 | Properties properties = new Properties(); 68 | List bootstrapServerList = Lists.newArrayList(); 69 | 70 | Iterable kafkaUrl = Splitter.on(",") 71 | .split(Preconditions.checkNotNull(System.getenv(HEROKU_KAFKA_URL))); 72 | 73 | for (String url : kafkaUrl) { 74 | URI uri = new URI(url); 75 | bootstrapServerList.add(String.format("%s:%d", uri.getHost(), uri.getPort())); 76 | 77 | switch (uri.getScheme()) { 78 | case "kafka": 79 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "PLAINTEXT"); 80 | break; 81 | case "kafka+ssl": 82 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL"); 83 | EnvKeyStore envTrustStore = EnvKeyStore.createWithRandomPassword( 84 | HEROKU_KAFKA_TRUSTED_CERT); 85 | EnvKeyStore envKeyStore = EnvKeyStore.createWithRandomPassword( 86 | HEROKU_KAFKA_CLIENT_CERT_KEY, HEROKU_KAFKA_CLIENT_CERT); 87 | 88 | File trustStoreFile = envTrustStore.storeTemp(); 89 | File keyStoreFile = envKeyStore.storeTemp(); 90 | 91 | properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, envTrustStore.type()); 92 | properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, 93 | trustStoreFile.getAbsolutePath()); 94 | properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, envTrustStore.password()); 95 | properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, envKeyStore.type()); 96 | properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keyStoreFile.getAbsolutePath()); 97 | properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, envKeyStore.password()); 98 | break; 99 | default: 100 | throw new URISyntaxException(uri.getScheme(), "Unknown URI scheme"); 101 | } 102 | } 103 | 104 | bootstrapServers = Joiner.on(",").join(bootstrapServerList); 105 | 106 | return properties; 107 | } 108 | 109 | private Properties buildKafkaStreamsDefaults() { 110 | Properties properties = new Properties(); 111 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG, 112 | String.format("%saggregator-app", HEROKU_KAFKA_PREFIX)); 113 | properties.put(StreamsConfig.CLIENT_ID_CONFIG, 114 | String.format("%saggregator-client", "")); 115 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 116 | properties.put( 117 | StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, 118 | Serdes.String().getClass().getName()); 119 | properties.put( 120 | StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, 121 | Serdes.String().getClass().getName()); 122 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000); 123 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, 124 | WallclockTimestampExtractor.class); 125 | 126 | return properties; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /streams-text-processor/src/main/java/io/jeffchao/streams/textprocessor/TextProcessorConfig.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.textprocessor; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.URI; 6 | import java.net.URISyntaxException; 7 | import java.security.KeyStoreException; 8 | import java.security.NoSuchAlgorithmException; 9 | import java.security.cert.CertificateException; 10 | import java.util.List; 11 | import java.util.Optional; 12 | import java.util.Properties; 13 | 14 | import com.google.common.base.Joiner; 15 | import com.google.common.base.Preconditions; 16 | import com.google.common.base.Splitter; 17 | import com.google.common.collect.Lists; 18 | import com.heroku.sdk.EnvKeyStore; 19 | import org.apache.kafka.clients.CommonClientConfigs; 20 | import org.apache.kafka.common.config.SslConfigs; 21 | import org.apache.kafka.common.serialization.Serdes; 22 | import org.apache.kafka.streams.StreamsConfig; 23 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | 28 | class TextProcessorConfig extends Properties { 29 | 30 | private static final Logger log = LoggerFactory.getLogger(TextProcessorConfig.class); 31 | 32 | private static final String ADDON_SUFFIX = Optional.ofNullable( 33 | System.getenv("ADDON_SUFFIX")).orElse(""); 34 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX); 35 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable( 36 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse(""); 37 | private static final String HEROKU_KAFKA_URL = String.format("%s_URL", HEROKU_KAFKA); 38 | private static final String HEROKU_KAFKA_TRUSTED_CERT = 39 | String.format("%s_TRUSTED_CERT", HEROKU_KAFKA); 40 | private static final String HEROKU_KAFKA_CLIENT_CERT_KEY = 41 | String.format("%s_CLIENT_CERT_KEY", HEROKU_KAFKA); 42 | private static final String HEROKU_KAFKA_CLIENT_CERT = 43 | String.format("%s_CLIENT_CERT", HEROKU_KAFKA); 44 | 45 | private String bootstrapServers; 46 | 47 | Properties getProperties() throws URISyntaxException, CertificateException, 48 | NoSuchAlgorithmException, KeyStoreException, IOException { 49 | return buildDefaults(); 50 | } 51 | 52 | private Properties buildDefaults() throws CertificateException, NoSuchAlgorithmException, 53 | KeyStoreException, IOException, URISyntaxException { 54 | Properties defaultProperties = new Properties(); 55 | Properties herokuKafkaConfigVarProperties = buildHerokuKafkaConfigVars(); 56 | Properties kafkaStreamsProperties = buildKafkaStreamsDefaults(); 57 | 58 | defaultProperties.putAll(herokuKafkaConfigVarProperties); 59 | defaultProperties.putAll(kafkaStreamsProperties); 60 | 61 | 62 | return defaultProperties; 63 | } 64 | 65 | private Properties buildHerokuKafkaConfigVars() throws URISyntaxException, CertificateException, 66 | NoSuchAlgorithmException, KeyStoreException, IOException { 67 | Properties properties = new Properties(); 68 | List bootstrapServerList = Lists.newArrayList(); 69 | 70 | Iterable kafkaUrl = Splitter.on(",") 71 | .split(Preconditions.checkNotNull(System.getenv(HEROKU_KAFKA_URL))); 72 | 73 | for (String url : kafkaUrl) { 74 | URI uri = new URI(url); 75 | bootstrapServerList.add(String.format("%s:%d", uri.getHost(), uri.getPort())); 76 | 77 | switch (uri.getScheme()) { 78 | case "kafka": 79 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "PLAINTEXT"); 80 | break; 81 | case "kafka+ssl": 82 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL"); 83 | EnvKeyStore envTrustStore = EnvKeyStore.createWithRandomPassword( 84 | HEROKU_KAFKA_TRUSTED_CERT); 85 | EnvKeyStore envKeyStore = EnvKeyStore.createWithRandomPassword( 86 | HEROKU_KAFKA_CLIENT_CERT_KEY, HEROKU_KAFKA_CLIENT_CERT); 87 | 88 | File trustStoreFile = envTrustStore.storeTemp(); 89 | File keyStoreFile = envKeyStore.storeTemp(); 90 | 91 | properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, envTrustStore.type()); 92 | properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, 93 | trustStoreFile.getAbsolutePath()); 94 | properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, envTrustStore.password()); 95 | properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, envKeyStore.type()); 96 | properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keyStoreFile.getAbsolutePath()); 97 | properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, envKeyStore.password()); 98 | break; 99 | default: 100 | throw new URISyntaxException(uri.getScheme(), "Unknown URI scheme"); 101 | } 102 | } 103 | 104 | bootstrapServers = Joiner.on(",").join(bootstrapServerList); 105 | 106 | return properties; 107 | } 108 | 109 | private Properties buildKafkaStreamsDefaults() { 110 | Properties properties = new Properties(); 111 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG, 112 | String.format("%stext-processor-app", HEROKU_KAFKA_PREFIX)); 113 | properties.put(StreamsConfig.CLIENT_ID_CONFIG, 114 | String.format("%stext-processor-client", HEROKU_KAFKA_PREFIX)); 115 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 116 | properties.put( 117 | StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, 118 | Serdes.String().getClass().getName()); 119 | properties.put( 120 | StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, 121 | Serdes.String().getClass().getName()); 122 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000); 123 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, 124 | WallclockTimestampExtractor.class); 125 | 126 | return properties; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/AnomalyDetectorConfig.java: -------------------------------------------------------------------------------- 1 | package io.jeffchao.streams.anomalydetector; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.URI; 6 | import java.net.URISyntaxException; 7 | import java.security.KeyStoreException; 8 | import java.security.NoSuchAlgorithmException; 9 | import java.security.cert.CertificateException; 10 | import java.util.List; 11 | import java.util.Optional; 12 | import java.util.Properties; 13 | 14 | import com.google.common.base.Joiner; 15 | import com.google.common.base.Preconditions; 16 | import com.google.common.base.Splitter; 17 | import com.google.common.collect.Lists; 18 | import com.heroku.sdk.EnvKeyStore; 19 | import org.apache.kafka.clients.CommonClientConfigs; 20 | import org.apache.kafka.common.config.SslConfigs; 21 | import org.apache.kafka.common.serialization.Serdes; 22 | import org.apache.kafka.streams.StreamsConfig; 23 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | 28 | class AnomalyDetectorConfig extends Properties { 29 | 30 | private static final Logger log = LoggerFactory.getLogger(AnomalyDetectorConfig.class); 31 | 32 | private static final String ADDON_SUFFIX = Optional.ofNullable( 33 | System.getenv("ADDON_SUFFIX")).orElse(""); 34 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX); 35 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable( 36 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse(""); 37 | private static final String HEROKU_KAFKA_URL = String.format("%s_URL", HEROKU_KAFKA); 38 | private static final String HEROKU_KAFKA_TRUSTED_CERT = 39 | String.format("%s_TRUSTED_CERT", HEROKU_KAFKA); 40 | private static final String HEROKU_KAFKA_CLIENT_CERT_KEY = 41 | String.format("%s_CLIENT_CERT_KEY", HEROKU_KAFKA); 42 | private static final String HEROKU_KAFKA_CLIENT_CERT = 43 | String.format("%s_CLIENT_CERT", HEROKU_KAFKA); 44 | 45 | private String bootstrapServers; 46 | 47 | Properties getProperties() throws URISyntaxException, CertificateException, 48 | NoSuchAlgorithmException, KeyStoreException, IOException { 49 | return buildDefaults(); 50 | } 51 | 52 | private Properties buildDefaults() throws CertificateException, NoSuchAlgorithmException, 53 | KeyStoreException, IOException, URISyntaxException { 54 | Properties defaultProperties = new Properties(); 55 | Properties herokuKafkaConfigVarProperties = buildHerokuKafkaConfigVars(); 56 | Properties kafkaStreamsProperties = buildKafkaStreamsDefaults(); 57 | 58 | defaultProperties.putAll(herokuKafkaConfigVarProperties); 59 | defaultProperties.putAll(kafkaStreamsProperties); 60 | 61 | 62 | return defaultProperties; 63 | } 64 | 65 | private Properties buildHerokuKafkaConfigVars() throws URISyntaxException, CertificateException, 66 | NoSuchAlgorithmException, KeyStoreException, IOException { 67 | Properties properties = new Properties(); 68 | List bootstrapServerList = Lists.newArrayList(); 69 | 70 | Iterable kafkaUrl = Splitter.on(",") 71 | .split(Preconditions.checkNotNull(System.getenv(HEROKU_KAFKA_URL))); 72 | 73 | for (String url : kafkaUrl) { 74 | URI uri = new URI(url); 75 | bootstrapServerList.add(String.format("%s:%d", uri.getHost(), uri.getPort())); 76 | 77 | switch (uri.getScheme()) { 78 | case "kafka": 79 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "PLAINTEXT"); 80 | break; 81 | case "kafka+ssl": 82 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL"); 83 | EnvKeyStore envTrustStore = EnvKeyStore.createWithRandomPassword( 84 | HEROKU_KAFKA_TRUSTED_CERT); 85 | EnvKeyStore envKeyStore = EnvKeyStore.createWithRandomPassword( 86 | HEROKU_KAFKA_CLIENT_CERT_KEY, HEROKU_KAFKA_CLIENT_CERT); 87 | 88 | File trustStoreFile = envTrustStore.storeTemp(); 89 | File keyStoreFile = envKeyStore.storeTemp(); 90 | 91 | properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, envTrustStore.type()); 92 | properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, 93 | trustStoreFile.getAbsolutePath()); 94 | properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, envTrustStore.password()); 95 | properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, envKeyStore.type()); 96 | properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keyStoreFile.getAbsolutePath()); 97 | properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, envKeyStore.password()); 98 | break; 99 | default: 100 | throw new URISyntaxException(uri.getScheme(), "Unknown URI scheme"); 101 | } 102 | } 103 | 104 | bootstrapServers = Joiner.on(",").join(bootstrapServerList); 105 | 106 | return properties; 107 | } 108 | 109 | private Properties buildKafkaStreamsDefaults() { 110 | Properties properties = new Properties(); 111 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG, 112 | String.format("%sanomaly-detector-app", HEROKU_KAFKA_PREFIX)); 113 | properties.put(StreamsConfig.CLIENT_ID_CONFIG, 114 | String.format("%sanomaly-detector-client", HEROKU_KAFKA_PREFIX)); 115 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 116 | properties.put( 117 | StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, 118 | Serdes.String().getClass().getName()); 119 | properties.put( 120 | StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, 121 | Serdes.String().getClass().getName()); 122 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000); 123 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, 124 | WallclockTimestampExtractor.class); 125 | 126 | return properties; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /codequality/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 71 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 115 | 116 | 117 | 119 | 120 | 121 | 122 | 124 | 125 | 126 | 127 | 129 | 130 | 131 | 132 | 134 | 135 | 136 | 137 | 138 | 140 | 141 | 142 | 143 | 145 | 146 | 147 | 148 | 150 | 151 | 152 | 153 | 155 | 156 | 157 | 158 | 160 | 162 | 164 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | -------------------------------------------------------------------------------- /data-generators/text-generator/alice-in-wonderland.info: -------------------------------------------------------------------------------- 1 | Project Gutenberg’s Alice’s Adventures in Wonderland, by Lewis Carroll 2 | 3 | This eBook is for the use of anyone anywhere at no cost and with 4 | almost no restrictions whatsoever. You may copy it, give it away or 5 | re-use it under the terms of the Project Gutenberg License included 6 | with this eBook or online at www.gutenberg.org 7 | 8 | 9 | Title: Alice’s Adventures in Wonderland 10 | 11 | Author: Lewis Carroll 12 | 13 | Posting Date: June 25, 2008 [EBook #11] 14 | Release Date: March, 1994 15 | Last Updated: October 6, 2016 16 | 17 | Language: English 18 | 19 | Character set encoding: UTF-8 20 | 21 | *** START OF THIS PROJECT GUTENBERG EBOOK ALICE’S ADVENTURES IN WONDERLAND *** 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | ALICE’S ADVENTURES IN WONDERLAND 33 | 34 | Lewis Carroll 35 | 36 | THE MILLENNIUM FULCRUM EDITION 3.0 37 | 38 | 39 | 40 | 41 | 42 | (see alice-in-wonderland.txt) 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | End of Project Gutenberg’s Alice’s Adventures in Wonderland, by Lewis Carroll 51 | 52 | *** END OF THIS PROJECT GUTENBERG EBOOK ALICE’S ADVENTURES IN WONDERLAND *** 53 | 54 | ***** This file should be named 11-0.txt or 11-0.zip ***** 55 | This and all associated files of various formats will be found in: 56 | http://www.gutenberg.org/1/11/ 57 | 58 | 59 | 60 | Updated editions will replace the previous one--the old editions 61 | will be renamed. 62 | 63 | Creating the works from public domain print editions means that no 64 | one owns a United States copyright in these works, so the Foundation 65 | (and you!) can copy and distribute it in the United States without 66 | permission and without paying copyright royalties. Special rules, 67 | set forth in the General Terms of Use part of this license, apply to 68 | copying and distributing Project Gutenberg-tm electronic works to 69 | protect the PROJECT GUTENBERG-tm concept and trademark. Project 70 | Gutenberg is a registered trademark, and may not be used if you 71 | charge for the eBooks, unless you receive specific permission. If you 72 | do not charge anything for copies of this eBook, complying with the 73 | rules is very easy. You may use this eBook for nearly any purpose 74 | such as creation of derivative works, reports, performances and 75 | research. They may be modified and printed and given away--you may do 76 | practically ANYTHING with public domain eBooks. Redistribution is 77 | subject to the trademark license, especially commercial 78 | redistribution. 79 | 80 | 81 | 82 | *** START: FULL LICENSE *** 83 | 84 | THE FULL PROJECT GUTENBERG LICENSE 85 | PLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK 86 | 87 | To protect the Project Gutenberg-tm mission of promoting the free 88 | distribution of electronic works, by using or distributing this work 89 | (or any other work associated in any way with the phrase “Project 90 | Gutenberg”), you agree to comply with all the terms of the Full Project 91 | Gutenberg-tm License (available with this file or online at 92 | http://gutenberg.org/license). 93 | 94 | 95 | Section 1. General Terms of Use and Redistributing Project Gutenberg-tm 96 | electronic works 97 | 98 | 1.A. By reading or using any part of this Project Gutenberg-tm 99 | electronic work, you indicate that you have read, understand, agree to 100 | and accept all the terms of this license and intellectual property 101 | (trademark/copyright) agreement. If you do not agree to abide by all 102 | the terms of this agreement, you must cease using and return or destroy 103 | all copies of Project Gutenberg-tm electronic works in your possession. 104 | If you paid a fee for obtaining a copy of or access to a Project 105 | Gutenberg-tm electronic work and you do not agree to be bound by the 106 | terms of this agreement, you may obtain a refund from the person or 107 | entity to whom you paid the fee as set forth in paragraph 1.E.8. 108 | 109 | 1.B. “Project Gutenberg” is a registered trademark. It may only be 110 | used on or associated in any way with an electronic work by people who 111 | agree to be bound by the terms of this agreement. There are a few 112 | things that you can do with most Project Gutenberg-tm electronic works 113 | even without complying with the full terms of this agreement. See 114 | paragraph 1.C below. There are a lot of things you can do with Project 115 | Gutenberg-tm electronic works if you follow the terms of this agreement 116 | and help preserve free future access to Project Gutenberg-tm electronic 117 | works. See paragraph 1.E below. 118 | 119 | 1.C. The Project Gutenberg Literary Archive Foundation (“the Foundation” 120 | or PGLAF), owns a compilation copyright in the collection of Project 121 | Gutenberg-tm electronic works. Nearly all the individual works in the 122 | collection are in the public domain in the United States. If an 123 | individual work is in the public domain in the United States and you are 124 | located in the United States, we do not claim a right to prevent you from 125 | copying, distributing, performing, displaying or creating derivative 126 | works based on the work as long as all references to Project Gutenberg 127 | are removed. Of course, we hope that you will support the Project 128 | Gutenberg-tm mission of promoting free access to electronic works by 129 | freely sharing Project Gutenberg-tm works in compliance with the terms of 130 | this agreement for keeping the Project Gutenberg-tm name associated with 131 | the work. You can easily comply with the terms of this agreement by 132 | keeping this work in the same format with its attached full Project 133 | Gutenberg-tm License when you share it without charge with others. 134 | 135 | 1.D. The copyright laws of the place where you are located also govern 136 | what you can do with this work. Copyright laws in most countries are in 137 | a constant state of change. If you are outside the United States, check 138 | the laws of your country in addition to the terms of this agreement 139 | before downloading, copying, displaying, performing, distributing or 140 | creating derivative works based on this work or any other Project 141 | Gutenberg-tm work. The Foundation makes no representations concerning 142 | the copyright status of any work in any country outside the United 143 | States. 144 | 145 | 1.E. Unless you have removed all references to Project Gutenberg: 146 | 147 | 1.E.1. The following sentence, with active links to, or other immediate 148 | access to, the full Project Gutenberg-tm License must appear prominently 149 | whenever any copy of a Project Gutenberg-tm work (any work on which the 150 | phrase “Project Gutenberg” appears, or with which the phrase “Project 151 | Gutenberg” is associated) is accessed, displayed, performed, viewed, 152 | copied or distributed: 153 | 154 | This eBook is for the use of anyone anywhere at no cost and with 155 | almost no restrictions whatsoever. You may copy it, give it away or 156 | re-use it under the terms of the Project Gutenberg License included 157 | with this eBook or online at www.gutenberg.org 158 | 159 | 1.E.2. If an individual Project Gutenberg-tm electronic work is derived 160 | from the public domain (does not contain a notice indicating that it is 161 | posted with permission of the copyright holder), the work can be copied 162 | and distributed to anyone in the United States without paying any fees 163 | or charges. If you are redistributing or providing access to a work 164 | with the phrase “Project Gutenberg” associated with or appearing on the 165 | work, you must comply either with the requirements of paragraphs 1.E.1 166 | through 1.E.7 or obtain permission for the use of the work and the 167 | Project Gutenberg-tm trademark as set forth in paragraphs 1.E.8 or 168 | 1.E.9. 169 | 170 | 1.E.3. If an individual Project Gutenberg-tm electronic work is posted 171 | with the permission of the copyright holder, your use and distribution 172 | must comply with both paragraphs 1.E.1 through 1.E.7 and any additional 173 | terms imposed by the copyright holder. Additional terms will be linked 174 | to the Project Gutenberg-tm License for all works posted with the 175 | permission of the copyright holder found at the beginning of this work. 176 | 177 | 1.E.4. Do not unlink or detach or remove the full Project Gutenberg-tm 178 | License terms from this work, or any files containing a part of this 179 | work or any other work associated with Project Gutenberg-tm. 180 | 181 | 1.E.5. Do not copy, display, perform, distribute or redistribute this 182 | electronic work, or any part of this electronic work, without 183 | prominently displaying the sentence set forth in paragraph 1.E.1 with 184 | active links or immediate access to the full terms of the Project 185 | Gutenberg-tm License. 186 | 187 | 1.E.6. You may convert to and distribute this work in any binary, 188 | compressed, marked up, nonproprietary or proprietary form, including any 189 | word processing or hypertext form. However, if you provide access to or 190 | distribute copies of a Project Gutenberg-tm work in a format other than 191 | “Plain Vanilla ASCII” or other format used in the official version 192 | posted on the official Project Gutenberg-tm web site (www.gutenberg.org), 193 | you must, at no additional cost, fee or expense to the user, provide a 194 | copy, a means of exporting a copy, or a means of obtaining a copy upon 195 | request, of the work in its original “Plain Vanilla ASCII” or other 196 | form. Any alternate format must include the full Project Gutenberg-tm 197 | License as specified in paragraph 1.E.1. 198 | 199 | 1.E.7. Do not charge a fee for access to, viewing, displaying, 200 | performing, copying or distributing any Project Gutenberg-tm works 201 | unless you comply with paragraph 1.E.8 or 1.E.9. 202 | 203 | 1.E.8. You may charge a reasonable fee for copies of or providing 204 | access to or distributing Project Gutenberg-tm electronic works provided 205 | that 206 | 207 | - You pay a royalty fee of 20% of the gross profits you derive from 208 | the use of Project Gutenberg-tm works calculated using the method 209 | you already use to calculate your applicable taxes. The fee is 210 | owed to the owner of the Project Gutenberg-tm trademark, but he 211 | has agreed to donate royalties under this paragraph to the 212 | Project Gutenberg Literary Archive Foundation. Royalty payments 213 | must be paid within 60 days following each date on which you 214 | prepare (or are legally required to prepare) your periodic tax 215 | returns. Royalty payments should be clearly marked as such and 216 | sent to the Project Gutenberg Literary Archive Foundation at the 217 | address specified in Section 4, “Information about donations to 218 | the Project Gutenberg Literary Archive Foundation.” 219 | 220 | - You provide a full refund of any money paid by a user who notifies 221 | you in writing (or by e-mail) within 30 days of receipt that s/he 222 | does not agree to the terms of the full Project Gutenberg-tm 223 | License. You must require such a user to return or 224 | destroy all copies of the works possessed in a physical medium 225 | and discontinue all use of and all access to other copies of 226 | Project Gutenberg-tm works. 227 | 228 | - You provide, in accordance with paragraph 1.F.3, a full refund of any 229 | money paid for a work or a replacement copy, if a defect in the 230 | electronic work is discovered and reported to you within 90 days 231 | of receipt of the work. 232 | 233 | - You comply with all other terms of this agreement for free 234 | distribution of Project Gutenberg-tm works. 235 | 236 | 1.E.9. If you wish to charge a fee or distribute a Project Gutenberg-tm 237 | electronic work or group of works on different terms than are set 238 | forth in this agreement, you must obtain permission in writing from 239 | both the Project Gutenberg Literary Archive Foundation and Michael 240 | Hart, the owner of the Project Gutenberg-tm trademark. Contact the 241 | Foundation as set forth in Section 3 below. 242 | 243 | 1.F. 244 | 245 | 1.F.1. Project Gutenberg volunteers and employees expend considerable 246 | effort to identify, do copyright research on, transcribe and proofread 247 | public domain works in creating the Project Gutenberg-tm 248 | collection. Despite these efforts, Project Gutenberg-tm electronic 249 | works, and the medium on which they may be stored, may contain 250 | “Defects,” such as, but not limited to, incomplete, inaccurate or 251 | corrupt data, transcription errors, a copyright or other intellectual 252 | property infringement, a defective or damaged disk or other medium, a 253 | computer virus, or computer codes that damage or cannot be read by 254 | your equipment. 255 | 256 | 1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the “Right 257 | of Replacement or Refund” described in paragraph 1.F.3, the Project 258 | Gutenberg Literary Archive Foundation, the owner of the Project 259 | Gutenberg-tm trademark, and any other party distributing a Project 260 | Gutenberg-tm electronic work under this agreement, disclaim all 261 | liability to you for damages, costs and expenses, including legal 262 | fees. YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT 263 | LIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE 264 | PROVIDED IN PARAGRAPH F3. YOU AGREE THAT THE FOUNDATION, THE 265 | TRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE 266 | LIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR 267 | INCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH 268 | DAMAGE. 269 | 270 | 1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a 271 | defect in this electronic work within 90 days of receiving it, you can 272 | receive a refund of the money (if any) you paid for it by sending a 273 | written explanation to the person you received the work from. If you 274 | received the work on a physical medium, you must return the medium with 275 | your written explanation. The person or entity that provided you with 276 | the defective work may elect to provide a replacement copy in lieu of a 277 | refund. If you received the work electronically, the person or entity 278 | providing it to you may choose to give you a second opportunity to 279 | receive the work electronically in lieu of a refund. If the second copy 280 | is also defective, you may demand a refund in writing without further 281 | opportunities to fix the problem. 282 | 283 | 1.F.4. Except for the limited right of replacement or refund set forth 284 | in paragraph 1.F.3, this work is provided to you ‘AS-IS’ WITH NO OTHER 285 | WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 286 | WARRANTIES OF MERCHANTIBILITY OR FITNESS FOR ANY PURPOSE. 287 | 288 | 1.F.5. Some states do not allow disclaimers of certain implied 289 | warranties or the exclusion or limitation of certain types of damages. 290 | If any disclaimer or limitation set forth in this agreement violates the 291 | law of the state applicable to this agreement, the agreement shall be 292 | interpreted to make the maximum disclaimer or limitation permitted by 293 | the applicable state law. The invalidity or unenforceability of any 294 | provision of this agreement shall not void the remaining provisions. 295 | 296 | 1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the 297 | trademark owner, any agent or employee of the Foundation, anyone 298 | providing copies of Project Gutenberg-tm electronic works in accordance 299 | with this agreement, and any volunteers associated with the production, 300 | promotion and distribution of Project Gutenberg-tm electronic works, 301 | harmless from all liability, costs and expenses, including legal fees, 302 | that arise directly or indirectly from any of the following which you do 303 | or cause to occur: (a) distribution of this or any Project Gutenberg-tm 304 | work, (b) alteration, modification, or additions or deletions to any 305 | Project Gutenberg-tm work, and (c) any Defect you cause. 306 | 307 | 308 | Section 2. Information about the Mission of Project Gutenberg-tm 309 | 310 | Project Gutenberg-tm is synonymous with the free distribution of 311 | electronic works in formats readable by the widest variety of computers 312 | including obsolete, old, middle-aged and new computers. It exists 313 | because of the efforts of hundreds of volunteers and donations from 314 | people in all walks of life. 315 | 316 | Volunteers and financial support to provide volunteers with the 317 | assistance they need, is critical to reaching Project Gutenberg-tm’s 318 | goals and ensuring that the Project Gutenberg-tm collection will 319 | remain freely available for generations to come. In 2001, the Project 320 | Gutenberg Literary Archive Foundation was created to provide a secure 321 | and permanent future for Project Gutenberg-tm and future generations. 322 | To learn more about the Project Gutenberg Literary Archive Foundation 323 | and how your efforts and donations can help, see Sections 3 and 4 324 | and the Foundation web page at http://www.pglaf.org. 325 | 326 | 327 | Section 3. Information about the Project Gutenberg Literary Archive 328 | Foundation 329 | 330 | The Project Gutenberg Literary Archive Foundation is a non profit 331 | 501(c)(3) educational corporation organized under the laws of the 332 | state of Mississippi and granted tax exempt status by the Internal 333 | Revenue Service. The Foundation’s EIN or federal tax identification 334 | number is 64-6221541. Its 501(c)(3) letter is posted at 335 | http://pglaf.org/fundraising. Contributions to the Project Gutenberg 336 | Literary Archive Foundation are tax deductible to the full extent 337 | permitted by U.S. federal laws and your state’s laws. 338 | 339 | The Foundation’s principal office is located at 4557 Melan Dr. S. 340 | Fairbanks, AK, 99712., but its volunteers and employees are scattered 341 | throughout numerous locations. Its business office is located at 342 | 809 North 1500 West, Salt Lake City, UT 84116, (801) 596-1887, email 343 | business@pglaf.org. Email contact links and up to date contact 344 | information can be found at the Foundation’s web site and official 345 | page at http://pglaf.org 346 | 347 | For additional contact information: 348 | Dr. Gregory B. Newby 349 | Chief Executive and Director 350 | gbnewby@pglaf.org 351 | 352 | 353 | Section 4. Information about Donations to the Project Gutenberg 354 | Literary Archive Foundation 355 | 356 | Project Gutenberg-tm depends upon and cannot survive without wide 357 | spread public support and donations to carry out its mission of 358 | increasing the number of public domain and licensed works that can be 359 | freely distributed in machine readable form accessible by the widest 360 | array of equipment including outdated equipment. Many small donations 361 | ($1 to $5,000) are particularly important to maintaining tax exempt 362 | status with the IRS. 363 | 364 | The Foundation is committed to complying with the laws regulating 365 | charities and charitable donations in all 50 states of the United 366 | States. Compliance requirements are not uniform and it takes a 367 | considerable effort, much paperwork and many fees to meet and keep up 368 | with these requirements. We do not solicit donations in locations 369 | where we have not received written confirmation of compliance. To 370 | SEND DONATIONS or determine the status of compliance for any 371 | particular state visit http://pglaf.org 372 | 373 | While we cannot and do not solicit contributions from states where we 374 | have not met the solicitation requirements, we know of no prohibition 375 | against accepting unsolicited donations from donors in such states who 376 | approach us with offers to donate. 377 | 378 | International donations are gratefully accepted, but we cannot make 379 | any statements concerning tax treatment of donations received from 380 | outside the United States. U.S. laws alone swamp our small staff. 381 | 382 | Please check the Project Gutenberg Web pages for current donation 383 | methods and addresses. Donations are accepted in a number of other 384 | ways including checks, online payments and credit card donations. 385 | To donate, please visit: http://pglaf.org/donate 386 | 387 | 388 | Section 5. General Information About Project Gutenberg-tm electronic 389 | works. 390 | 391 | Professor Michael S. Hart is the originator of the Project Gutenberg-tm 392 | concept of a library of electronic works that could be freely shared 393 | with anyone. For thirty years, he produced and distributed Project 394 | Gutenberg-tm eBooks with only a loose network of volunteer support. 395 | 396 | 397 | Project Gutenberg-tm eBooks are often created from several printed 398 | editions, all of which are confirmed as Public Domain in the U.S. 399 | unless a copyright notice is included. Thus, we do not necessarily 400 | keep eBooks in compliance with any particular paper edition. 401 | 402 | 403 | Most people start at our Web site which has the main PG search facility: 404 | 405 | http://www.gutenberg.org 406 | 407 | This Web site includes information about Project Gutenberg-tm, 408 | including how to make donations to the Project Gutenberg Literary 409 | Archive Foundation, how to help produce our new eBooks, and how to 410 | subscribe to our email newsletter to hear about new eBooks. 411 | --------------------------------------------------------------------------------