├── streams-aggregator
├── README.md
├── src
│ └── main
│ │ ├── resources
│ │ └── log4j2.xml
│ │ └── java
│ │ └── io
│ │ └── jeffchao
│ │ └── streams
│ │ └── aggregator
│ │ ├── Aggregator.java
│ │ ├── sinks
│ │ └── PostgresSink.java
│ │ └── AggregatorConfig.java
└── build.gradle
├── streams-text-processor
├── README.md
├── src
│ ├── test
│ │ └── java
│ │ │ └── io
│ │ │ └── jeffchao
│ │ │ └── streams
│ │ │ └── textprocessor
│ │ │ └── TextProcessorTest.java
│ └── main
│ │ ├── resources
│ │ └── log4j2.xml
│ │ └── java
│ │ └── io
│ │ └── jeffchao
│ │ └── streams
│ │ └── textprocessor
│ │ ├── TextProcessor.java
│ │ └── TextProcessorConfig.java
└── build.gradle
├── streams-anomaly-detector
├── README.md
├── src
│ └── main
│ │ ├── resources
│ │ └── log4j2.xml
│ │ └── java
│ │ └── io
│ │ └── jeffchao
│ │ └── streams
│ │ └── anomalydetector
│ │ ├── sinks
│ │ ├── AlertSink.java
│ │ └── EmailSink.java
│ │ ├── AnomalyDetector.java
│ │ └── AnomalyDetectorConfig.java
└── build.gradle
├── Gemfile
├── gradle
├── heroku
│ ├── clean.gradle
│ └── stage.gradle
├── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
└── check.gradle
├── settings.gradle
├── Procfile
├── Gemfile.lock
├── data-generators
├── text-generator
│ ├── stream-lines-to-kafka.rb
│ └── alice-in-wonderland.info
├── stream-to-kafka.rb
└── log-generator
│ └── stream-logs-to-kafka.rb
├── LICENSE
├── .gitignore
├── setup
├── gradlew.bat
├── README.md
├── gradlew
└── codequality
└── checkstyle.xml
/streams-aggregator/README.md:
--------------------------------------------------------------------------------
1 | # streams-aggregator
--------------------------------------------------------------------------------
/streams-text-processor/README.md:
--------------------------------------------------------------------------------
1 | # streams-text-processor
--------------------------------------------------------------------------------
/streams-anomaly-detector/README.md:
--------------------------------------------------------------------------------
1 | # streams-anomaly-detector
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 |
3 | gem 'ruby-kafka'
4 | gem 'faker'
5 |
--------------------------------------------------------------------------------
/gradle/heroku/clean.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'base'
2 |
3 | clean.doLast {
4 | delete rootProject.buildDir
5 | }
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyric/kafka-streams-on-heroku/master/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'kafka-streams-on-heroku'
2 |
3 | include 'streams-text-processor'
4 | include 'streams-anomaly-detector'
5 | include 'streams-aggregator'
6 |
--------------------------------------------------------------------------------
/streams-text-processor/src/test/java/io/jeffchao/streams/textprocessor/TextProcessorTest.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.textprocessor;
2 |
3 |
4 | public class TextProcessorTest {
5 |
6 | }
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | text_processor_worker: java -jar build/libs/streams-text-processor-all.jar
2 | anomaly_detector_worker: java -jar build/libs/streams-anomaly-detector-all.jar
3 | aggregator_worker: java -jar build/libs/streams-aggregator-all.jar
4 |
--------------------------------------------------------------------------------
/gradle/heroku/stage.gradle:
--------------------------------------------------------------------------------
1 | task stage(dependsOn: ['clean', 'shadowJar'])
2 |
3 | task copyToLib(type: Copy) {
4 | from "$buildDir/libs"
5 | into "$rootProject.buildDir/libs"
6 | }
7 | copyToLib.dependsOn(shadowJar)
8 | stage.dependsOn(copyToLib)
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Wed Nov 01 15:30:19 PDT 2017
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.3-all.zip
7 |
--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
1 | GEM
2 | remote: https://rubygems.org/
3 | specs:
4 | concurrent-ruby (1.0.5)
5 | faker (1.8.5)
6 | i18n (~> 0.9.1)
7 | i18n (0.9.1)
8 | concurrent-ruby (~> 1.0)
9 | ruby-kafka (0.5.1)
10 |
11 | PLATFORMS
12 | ruby
13 |
14 | DEPENDENCIES
15 | faker
16 | ruby-kafka
17 |
18 | BUNDLED WITH
19 | 1.16.0
20 |
--------------------------------------------------------------------------------
/gradle/check.gradle:
--------------------------------------------------------------------------------
1 | subprojects {
2 | apply plugin: 'checkstyle'
3 | checkstyle {
4 | ignoreFailures = true
5 | configFile = rootProject.file('codequality/checkstyle.xml')
6 | toolVersion = '8.4'
7 | }
8 |
9 | apply plugin: 'findbugs'
10 | findbugs {
11 | ignoreFailures = true
12 | }
13 |
14 | apply plugin: 'pmd'
15 | }
16 |
--------------------------------------------------------------------------------
/streams-text-processor/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'application'
2 |
3 | mainClassName = 'io.jeffchao.streams.textprocessor.TextProcessor'
4 |
5 | dependencies {
6 | compile 'org.apache.kafka:kafka-streams:1.0.0'
7 | compile 'com.heroku.sdk:env-keystore:1.0.0'
8 | }
9 |
10 | run.doFirst {
11 | environment "ADDON_SUFFIX", ""
12 | environment "HEROKU_KAFKA_URL", "kafka://localhost:9092"
13 | }
--------------------------------------------------------------------------------
/data-generators/text-generator/stream-lines-to-kafka.rb:
--------------------------------------------------------------------------------
1 | require_relative '../stream-to-kafka'
2 |
3 | initialize_kafka
4 |
5 | puts "Reading text file."
6 | count = 0
7 | ARGF.each_line do |line|
8 | line.strip!
9 | unless line.empty?
10 | produce(line, to: 'textlines')
11 | count += 1
12 | end
13 | print '.' if (ARGF.lineno % 100) == 0
14 | end
15 |
16 | puts "\nRead #{count} (non-blank) lines."
17 |
--------------------------------------------------------------------------------
/streams-aggregator/src/main/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/streams-anomaly-detector/src/main/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/streams-text-processor/src/main/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/streams-anomaly-detector/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'application'
2 |
3 | mainClassName = 'io.jeffchao.streams.anomalydetector.AnomalyDetector'
4 |
5 | dependencies {
6 | compile 'org.apache.kafka:kafka-streams:1.0.0'
7 | compile 'com.heroku.sdk:env-keystore:1.0.0'
8 | compile 'com.sendgrid:sendgrid-java:3.0.9'
9 | }
10 |
11 | run.doFirst {
12 | environment "ADDON_SUFFIX", ""
13 | environment "HEROKU_KAFKA_URL", "kafka://localhost:9092"
14 | }
15 |
--------------------------------------------------------------------------------
/streams-aggregator/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'application'
2 |
3 | mainClassName = 'io.jeffchao.streams.aggregator.Aggregator'
4 |
5 | dependencies {
6 | compile 'org.apache.kafka:kafka-streams:1.0.0'
7 | compile 'com.heroku.sdk:env-keystore:1.0.0'
8 | compile 'org.postgresql:postgresql:42.1.4.jre7'
9 | }
10 |
11 | run.doFirst {
12 | environment "ADDON_SUFFIX", ""
13 | environment "HEROKU_KAFKA_URL", "kafka://localhost:9092"
14 | environment "HEROKU_POSTGRESQL_URL", "postgres://localhost:5432/kafka_streams_dev"
15 | }
--------------------------------------------------------------------------------
/streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/sinks/AlertSink.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.anomalydetector.sinks;
2 |
3 | import org.apache.kafka.streams.kstream.Windowed;
4 | import org.apache.kafka.streams.processor.Processor;
5 | import org.apache.kafka.streams.processor.ProcessorContext;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 |
10 | public class AlertSink implements Processor, Long> {
11 |
12 | private static final Logger log = LoggerFactory.getLogger(AlertSink.class);
13 |
14 | @Override
15 | public void init(ProcessorContext context) {
16 | }
17 |
18 | @Override
19 | public void process(Windowed key, Long value) {
20 | log.info("Too many login failures for {}, count: {}. Alerting to PagerDuty.",
21 | key.key(), value);
22 | }
23 |
24 | @Override
25 | public void punctuate(long timestamp) {
26 | }
27 |
28 | @Override
29 | public void close() {
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Jeff Chao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/data-generators/stream-to-kafka.rb:
--------------------------------------------------------------------------------
1 | require 'kafka'
2 | require 'tempfile'
3 |
4 |
5 | def initialize_kafka
6 | tmp_ca_file = Tempfile.new('ca_certs')
7 | tmp_ca_file.write(ENV.fetch('HEROKU_KAFKA_TRUSTED_CERT'))
8 | tmp_ca_file.close
9 |
10 | producer_kafka = Kafka.new(
11 | seed_brokers: ENV.fetch('HEROKU_KAFKA_URL'),
12 | ssl_ca_cert_file_path: tmp_ca_file.path,
13 | ssl_client_cert: ENV.fetch('HEROKU_KAFKA_CLIENT_CERT'),
14 | ssl_client_cert_key: ENV.fetch('HEROKU_KAFKA_CLIENT_CERT_KEY')
15 | )
16 | $producer = producer_kafka.async_producer(
17 | delivery_interval: 1,
18 | max_buffer_size: 10_000,
19 | max_buffer_bytesize: 100_000_000,
20 | required_acks: :all
21 | )
22 | puts "Producer connected to Kafka."
23 |
24 | at_exit do
25 | $producer.shutdown
26 | tmp_ca_file.unlink
27 | puts "Producer shutdown."
28 | end
29 | end
30 |
31 | def kafka_topic(topic_name)
32 | kafka_topic = ENV.fetch('HEROKU_KAFKA_TOPIC', topic_name)
33 | if ENV['HEROKU_KAFKA_PREFIX']
34 | kafka_topic = ENV['HEROKU_KAFKA_PREFIX'] + kafka_topic
35 | end
36 |
37 | kafka_topic
38 | end
39 |
40 | def produce(message, to: 'textlines')
41 | $producer.produce(message, topic: kafka_topic(to))
42 | end
43 |
--------------------------------------------------------------------------------
/data-generators/log-generator/stream-logs-to-kafka.rb:
--------------------------------------------------------------------------------
1 | require_relative '../stream-to-kafka'
2 | require 'faker'
3 |
4 | # spec: user_id | timestamp | ip_address | action | message
5 | def log_line(action: 'login succeeded')
6 | user_id = Faker::Internet.user_name
7 | timestamp = Time.now
8 | ip_address = Faker::Internet.ip_v4_address
9 | message = Faker::Lorem.words(3,0).join(' ')
10 |
11 | "user_id: #{user_id} | timestamp: #{timestamp} | "\
12 | "ip_address: #{ip_address} | action: #{action} | message: #{message}"
13 | end
14 |
15 | def log_line_with_anomaly
16 | log_line(action: 'login failed')
17 | end
18 |
19 | initialize_kafka
20 |
21 | messages_per_second = ARGV[0]
22 | probability_of_anomality = ARGV[1].to_f * 100
23 |
24 | puts "Generating #{messages_per_second} log lines per second "\
25 | "with #{probability_of_anomality}% chance of anomaly. "\
26 | "CTRL-C to stop."
27 |
28 | count = 0
29 |
30 | trap "SIGINT" do
31 | puts "\nGenerated #{count} log lines. Stopping."
32 | exit 130
33 | end
34 |
35 | while true
36 | if rand(100) <= probability_of_anomality
37 | produce(log_line_with_anomaly, to: 'loglines')
38 | else
39 | produce(log_line, to: 'loglines')
40 | end
41 | count += 1
42 | print "#{count}..." if (count % 10) == 0
43 | sleep (1 / messages_per_second.to_f)
44 | end
45 |
--------------------------------------------------------------------------------
/streams-aggregator/src/main/java/io/jeffchao/streams/aggregator/Aggregator.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.aggregator;
2 |
3 | import java.io.IOException;
4 | import java.net.URISyntaxException;
5 | import java.security.KeyStoreException;
6 | import java.security.NoSuchAlgorithmException;
7 | import java.security.cert.CertificateException;
8 | import java.util.Optional;
9 | import java.util.Properties;
10 | import java.util.concurrent.TimeUnit;
11 |
12 | import io.jeffchao.streams.aggregator.sinks.PostgresSink;
13 | import org.apache.kafka.streams.KafkaStreams;
14 | import org.apache.kafka.streams.StreamsBuilder;
15 | import org.apache.kafka.streams.kstream.KStream;
16 | import org.apache.kafka.streams.kstream.Materialized;
17 | import org.apache.kafka.streams.kstream.TimeWindows;
18 | import org.apache.kafka.streams.kstream.Windowed;
19 | import org.slf4j.Logger;
20 | import org.slf4j.LoggerFactory;
21 |
22 |
23 | public class Aggregator {
24 |
25 | private static final Logger log = LoggerFactory.getLogger(Aggregator.class);
26 |
27 | private static final String ADDON_SUFFIX = Optional.ofNullable(
28 | System.getenv("ADDON_SUFFIX")).orElse("");
29 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX);
30 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable(
31 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse("");
32 |
33 | public static void main(String[] args) throws CertificateException, NoSuchAlgorithmException,
34 | KeyStoreException, IOException, URISyntaxException {
35 | Properties streamsConfig = new AggregatorConfig().getProperties();
36 |
37 | final StreamsBuilder builder = new StreamsBuilder();
38 |
39 | final KStream, String> words =
40 | builder.stream(String.format("%swords", HEROKU_KAFKA_PREFIX));
41 |
42 | words
43 | .groupBy((key, word) -> word)
44 | .windowedBy(TimeWindows.of(TimeUnit.SECONDS.toMillis(10)))
45 | .count(Materialized.as("windowed-counts"))
46 | .toStream()
47 | .process(PostgresSink::new);
48 |
49 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfig);
50 |
51 | streams.cleanUp();
52 | streams.start();
53 |
54 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
55 | }
56 |
57 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Intellij+iml ###
2 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
3 |
4 | # User-specific stuff:
5 | .idea/**/workspace.xml
6 | .idea/**/tasks.xml
7 | .idea/dictionaries
8 |
9 | # Sensitive or high-churn files:
10 | .idea/**/dataSources/
11 | .idea/**/dataSources.ids
12 | .idea/**/dataSources.xml
13 | .idea/**/dataSources.local.xml
14 | .idea/**/sqlDataSources.xml
15 | .idea/**/dynamic.xml
16 | .idea/**/uiDesigner.xml
17 |
18 | # Gradle:
19 | .idea/**/gradle.xml
20 | .idea/**/libraries
21 |
22 | ## File-based project format:
23 | *.iws
24 |
25 | ## Plugin-specific files:
26 |
27 | # IntelliJ
28 | /out/
29 |
30 | # mpeltonen/sbt-idea plugin
31 | .idea_modules/
32 |
33 | # Crashlytics plugin (for Android Studio and IntelliJ)
34 | com_crashlytics_export_strings.xml
35 | crashlytics.properties
36 | crashlytics-build.properties
37 | fabric.properties
38 |
39 | ### Intellij+iml Patch ###
40 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
41 |
42 | .idea/
43 |
44 | *.iml
45 | modules.xml
46 | .idea/misc.xml
47 | *.ipr
48 |
49 | ### Java ###
50 | # Compiled class file
51 | *.class
52 |
53 | # Log file
54 | *.log
55 |
56 | # Package Files #
57 | *.jar
58 | *.war
59 | *.ear
60 | *.zip
61 | *.tar.gz
62 | *.rar
63 |
64 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
65 | hs_err_pid*
66 |
67 | ### macOS ###
68 | *.DS_Store
69 | .AppleDouble
70 | .LSOverride
71 |
72 | # Icon must end with two \r
73 | Icon
74 |
75 | # Thumbnails
76 | ._*
77 |
78 | # Files that might appear in the root of a volume
79 | .DocumentRevisions-V100
80 | .fseventsd
81 | .Spotlight-V100
82 | .TemporaryItems
83 | .Trashes
84 | .VolumeIcon.icns
85 | .com.apple.timemachine.donotpresent
86 |
87 | # Directories potentially created on remote AFP share
88 | .AppleDB
89 | .AppleDesktop
90 | Network Trash Folder
91 | Temporary Items
92 | .apdisk
93 |
94 | ### Gradle ###
95 | .gradle
96 | **/build/
97 |
98 | # Ignore Gradle GUI config
99 | gradle-app.setting
100 |
101 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
102 | !gradle-wrapper.jar
103 |
104 | # Cache of project
105 | .gradletasknamecache
106 |
107 | ### Ruby ###
108 | .bundle
109 | vendor
110 |
--------------------------------------------------------------------------------
/streams-text-processor/src/main/java/io/jeffchao/streams/textprocessor/TextProcessor.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.textprocessor;
2 |
3 | import java.io.IOException;
4 | import java.net.URISyntaxException;
5 | import java.security.KeyStoreException;
6 | import java.security.NoSuchAlgorithmException;
7 | import java.security.cert.CertificateException;
8 | import java.util.Arrays;
9 | import java.util.Optional;
10 | import java.util.Properties;
11 | import java.util.regex.Pattern;
12 |
13 | import org.apache.kafka.common.serialization.Serde;
14 | import org.apache.kafka.common.serialization.Serdes;
15 | import org.apache.kafka.streams.KafkaStreams;
16 | import org.apache.kafka.streams.StreamsBuilder;
17 | import org.apache.kafka.streams.kstream.KStream;
18 | import org.apache.kafka.streams.kstream.Produced;
19 | import org.slf4j.Logger;
20 | import org.slf4j.LoggerFactory;
21 |
22 |
23 | public class TextProcessor {
24 |
25 | private static final Logger log = LoggerFactory.getLogger(TextProcessor.class);
26 |
27 | private static final String ADDON_SUFFIX = Optional.ofNullable(
28 | System.getenv("ADDON_SUFFIX")).orElse("");
29 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX);
30 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable(
31 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse("");
32 |
33 | public static void main(String[] args) throws CertificateException, NoSuchAlgorithmException,
34 | KeyStoreException, IOException, URISyntaxException {
35 | Properties streamsConfig = new TextProcessorConfig().getProperties();
36 |
37 | final Serde stringSerde = Serdes.String();
38 |
39 | final StreamsBuilder builder = new StreamsBuilder();
40 |
41 | final KStream textLines =
42 | builder.stream(String.format("%stextlines", HEROKU_KAFKA_PREFIX));
43 |
44 | final Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
45 |
46 | textLines
47 | .flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase())))
48 | .to(String.format("%swords", HEROKU_KAFKA_PREFIX), Produced.with(stringSerde, stringSerde));
49 |
50 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfig);
51 |
52 | streams.cleanUp();
53 | streams.start();
54 |
55 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
56 | }
57 |
58 | }
--------------------------------------------------------------------------------
/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -e
4 |
5 | setup_dedicated () {
6 | heroku addons:create heroku-postgresql:hobby-dev --as HEROKU_POSTGRESQL -a ${APP_NAME} && \
7 | heroku addons:create heroku-kafka:${PLAN} --as HEROKU_KAFKA -a ${APP_NAME} && \
8 | heroku kafka:wait -a ${APP_NAME} && \
9 | heroku kafka:topics:create textlines --partitions 5 -a ${APP_NAME} && \
10 | heroku kafka:topics:create words --partitions 5 -a ${APP_NAME} && \
11 | heroku kafka:topics:create loglines --partitions 5 -a ${APP_NAME}
12 | }
13 |
14 | setup_multi_tenant () {
15 | heroku addons:create heroku-postgresql:hobby-dev --as HEROKU_POSTGRESQL -a ${APP_NAME} && \
16 | heroku addons:create heroku-kafka:${PLAN} --as HEROKU_KAFKA -a ${APP_NAME} && \
17 | heroku kafka:wait -a ${APP_NAME} && \
18 | heroku kafka:topics:create textlines --partitions 5 -a ${APP_NAME} && \
19 | heroku kafka:topics:create words --partitions 5 -a ${APP_NAME} && \
20 | heroku kafka:topics:create loglines --partitions 5 -a ${APP_NAME} && \
21 | heroku kafka:topics:create aggregator-app-windowed-counts-changelog --partitions 5 -a ${APP_NAME} && \
22 | heroku kafka:topics:create aggregator-app-windowed-counts-repartition --partitions 5 -a ${APP_NAME} && \
23 | heroku kafka:topics:create anomaly-detector-app-windowed-counts-changelog --partitions 5 -a ${APP_NAME} && \
24 | heroku kafka:topics:create anomaly-detector-app-windowed-counts-repartition --partitions 5 -a ${APP_NAME} && \
25 | heroku kafka:consumer-groups:create anomaly-detector-app -a ${APP_NAME} && \
26 | heroku kafka:consumer-groups:create text-processor-app -a ${APP_NAME} && \
27 | heroku kafka:consumer-groups:create aggregator-app -a ${APP_NAME}
28 | }
29 |
30 | if [[ -z $1 ]]; then
31 | echo "usage: $0 APP_NAME PLAN" >&2
32 | exit 1
33 | fi
34 |
35 | APP_NAME=$1
36 |
37 | if [[ $2 =~ (standard|basic|extended|private-standard|private-extended)-[012] ]]; then
38 | PLAN=$2
39 | else
40 | PLAN="basic-0"
41 | fi
42 |
43 | if [[ $PLAN =~ basic-[012] ]]; then
44 | setup_multi_tenant
45 | else
46 | setup_dedicated
47 | fi
48 |
49 | heroku pg:psql -c 'create table windowed_counts(id serial primary key not null, time_window bigint not null, word text, count bigint not null);' HEROKU_POSTGRESQL_URL -a ${APP_NAME}
50 |
51 | heroku ps:scale text_processor_worker=1 -a ${APP_NAME}
52 | heroku ps:scale aggregator_worker=1 -a ${APP_NAME}
53 | # heroku ps:scale anomaly_detector_worker=1 -a ${APP_NAME} # Can't run more than 2 free dynos.
54 |
--------------------------------------------------------------------------------
/streams-aggregator/src/main/java/io/jeffchao/streams/aggregator/sinks/PostgresSink.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.aggregator.sinks;
2 |
3 | import java.net.URI;
4 | import java.net.URISyntaxException;
5 | import java.sql.Connection;
6 | import java.sql.DriverManager;
7 | import java.sql.PreparedStatement;
8 | import java.sql.SQLException;
9 | import java.util.Optional;
10 |
11 | import org.apache.kafka.streams.kstream.Windowed;
12 | import org.apache.kafka.streams.processor.Processor;
13 | import org.apache.kafka.streams.processor.ProcessorContext;
14 | import org.slf4j.Logger;
15 | import org.slf4j.LoggerFactory;
16 |
17 |
18 | public class PostgresSink implements Processor, Long> {
19 |
20 | private static final Logger log = LoggerFactory.getLogger(PostgresSink.class);
21 |
22 | private Connection connection;
23 |
24 | private static Connection getConnection() throws URISyntaxException, SQLException {
25 | URI dbUri = new URI(System.getenv("HEROKU_POSTGRESQL_URL"));
26 |
27 | String[] userInfo = Optional.ofNullable(dbUri.getUserInfo()).orElse(":").split(":");
28 | String username = userInfo.length == 0 ? null : userInfo[0];
29 | String password = userInfo.length == 0 ? null : userInfo[1];
30 | String dbUrl = "jdbc:postgresql://" + dbUri.getHost() + ':' + dbUri.getPort() + dbUri.getPath();
31 |
32 | return DriverManager.getConnection(dbUrl, username, password);
33 | }
34 |
35 | @Override
36 | public void init(ProcessorContext context) {
37 | try {
38 | connection = getConnection();
39 | } catch (URISyntaxException | SQLException e) {
40 | e.printStackTrace();
41 | }
42 | }
43 |
44 | @Override
45 | public void process(Windowed key, Long value) {
46 | log.info("writing to pg: window: {}, key: {}, value: {}", key.window(), key.key(), value);
47 | try {
48 | PreparedStatement statement = connection.prepareStatement(
49 | "INSERT INTO windowed_counts (time_window, word, count) VALUES (?, ?, ?)");
50 | statement.setLong(1, key.window().start());
51 | statement.setString(2, key.key());
52 | statement.setLong(3, value);
53 |
54 | statement.execute();
55 | } catch (SQLException e) {
56 | log.error(e.getMessage(), e);
57 | }
58 |
59 | }
60 |
61 | @Override
62 | public void punctuate(long timestamp) {
63 | }
64 |
65 | @Override
66 | public void close() {
67 | try {
68 | connection.close();
69 | } catch (SQLException e) {
70 | e.printStackTrace();
71 | }
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/sinks/EmailSink.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.anomalydetector.sinks;
2 |
3 | import java.io.IOException;
4 |
5 | import com.google.common.base.Strings;
6 | import com.sendgrid.Content;
7 | import com.sendgrid.Email;
8 | import com.sendgrid.Mail;
9 | import com.sendgrid.Method;
10 | import com.sendgrid.Request;
11 | import com.sendgrid.SendGrid;
12 | import org.apache.kafka.streams.kstream.Windowed;
13 | import org.apache.kafka.streams.processor.Processor;
14 | import org.apache.kafka.streams.processor.ProcessorContext;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 |
18 |
19 | public class EmailSink implements Processor, Long> {
20 |
21 | private static final Logger log = LoggerFactory.getLogger(EmailSink.class);
22 |
23 | @Override
24 | public void init(ProcessorContext context) {
25 | }
26 |
27 | private Content generateContent(Windowed key, Long value) {
28 | return new Content("text/plain", "Hello, our realtime anomaly detector "
29 | + "has detected an issue for " + key.key() + " with "
30 | + value + " failed login attempts");
31 | }
32 |
33 | private void sendEmail(Windowed key, Long value) throws IOException {
34 | Email from = new Email("example@example.com");
35 | String subject = String.format("Anomaly detected for %s", value);
36 | Email to = new Email(System.getenv("TESTING_EMAIL"));
37 | Content content = generateContent(key, value);
38 | Mail mail = new Mail(from, subject, to, content);
39 |
40 | SendGrid sendGrid = new SendGrid(System.getenv("SENDGRID_API_KEY"));
41 | Request request = new Request();
42 | request.method = Method.POST;
43 | request.endpoint = "mail/send";
44 | request.body = mail.build();
45 | sendGrid.api(request);
46 | }
47 |
48 | @Override
49 | public void process(Windowed key, Long value) {
50 | if (Strings.isNullOrEmpty(System.getenv("SENDGRID_API_KEY"))) {
51 | log.info(generateContent(key, value).getValue());
52 | } else {
53 | try {
54 | log.info("Sending email to {} with content {}",
55 | System.getenv("TESTING_EMAIL"),
56 | generateContent(key, value).toString());
57 | sendEmail(key, value);
58 | } catch (IOException e) {
59 | log.error(e.getMessage(), e);
60 | }
61 | }
62 | }
63 |
64 | @Override
65 | public void punctuate(long timestamp) {
66 | }
67 |
68 | @Override
69 | public void close() {
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @if "%DEBUG%" == "" @echo off
2 | @rem ##########################################################################
3 | @rem
4 | @rem Gradle startup script for Windows
5 | @rem
6 | @rem ##########################################################################
7 |
8 | @rem Set local scope for the variables with windows NT shell
9 | if "%OS%"=="Windows_NT" setlocal
10 |
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 |
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 |
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 |
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 |
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 |
32 | goto fail
33 |
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 |
38 | if exist "%JAVA_EXE%" goto init
39 |
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 |
46 | goto fail
47 |
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 |
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 |
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 |
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 |
61 | set CMD_LINE_ARGS=%*
62 |
63 | :execute
64 | @rem Setup the command line
65 |
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 |
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 |
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 |
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 |
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 |
84 | :omega
85 |
--------------------------------------------------------------------------------
/streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/AnomalyDetector.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.anomalydetector;
2 |
3 | import java.io.IOException;
4 | import java.net.URISyntaxException;
5 | import java.security.KeyStoreException;
6 | import java.security.NoSuchAlgorithmException;
7 | import java.security.cert.CertificateException;
8 | import java.util.Optional;
9 | import java.util.Properties;
10 | import java.util.concurrent.TimeUnit;
11 |
12 | import io.jeffchao.streams.anomalydetector.sinks.AlertSink;
13 | import io.jeffchao.streams.anomalydetector.sinks.EmailSink;
14 | import org.apache.kafka.streams.KafkaStreams;
15 | import org.apache.kafka.streams.StreamsBuilder;
16 | import org.apache.kafka.streams.kstream.KStream;
17 | import org.apache.kafka.streams.kstream.Materialized;
18 | import org.apache.kafka.streams.kstream.TimeWindows;
19 | import org.apache.kafka.streams.kstream.Windowed;
20 | import org.slf4j.Logger;
21 | import org.slf4j.LoggerFactory;
22 |
23 |
24 | public class AnomalyDetector {
25 |
26 | private static final Logger log = LoggerFactory.getLogger(AnomalyDetector.class);
27 |
28 | private static final String ADDON_SUFFIX = Optional.ofNullable(
29 | System.getenv("ADDON_SUFFIX")).orElse("");
30 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX);
31 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable(
32 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse("");
33 |
34 | public static void main(String[] args) throws CertificateException, NoSuchAlgorithmException,
35 | KeyStoreException, IOException, URISyntaxException {
36 | Properties streamsConfig = new AnomalyDetectorConfig().getProperties();
37 |
38 | final StreamsBuilder builder = new StreamsBuilder();
39 |
40 | final KStream loglines =
41 | builder.stream( String.format("%sloglines", HEROKU_KAFKA_PREFIX));
42 |
43 | KStream, Long> anomalies = loglines
44 | .filter((key, value) -> value.contains("login failed"))
45 | .selectKey((key, value) -> value.split("\\|")[0])
46 | .groupByKey()
47 | .windowedBy(TimeWindows.of(TimeUnit.SECONDS.toMillis(10)))
48 | .count(Materialized.as("windowed-counts"))
49 | .toStream();
50 |
51 | @SuppressWarnings("unchecked")
52 | KStream, Long>[] branches = anomalies
53 | .branch(
54 | (key, value) -> value > 1,
55 | (key, value) -> value > 0
56 | );
57 |
58 | branches[0].process(AlertSink::new);
59 | branches[1].process(EmailSink::new);
60 |
61 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfig);
62 |
63 | streams.cleanUp();
64 | streams.start();
65 |
66 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
67 | }
68 |
69 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # kafka-streams-on-heroku
2 |
3 | Kafka Streams example on Heroku with a multi-project gradle build
4 |
5 | ## Dependencies
6 |
7 | 1. Postgres
8 | 2. Kafka (+ Zookeeper) 0.10+ (this uses 0.11 brokers against 1.0 client)
9 | 3. Java 8
10 | 4. Gradle 4.3 (use sdkman)
11 |
12 | ## Local Development
13 |
14 | ### Building
15 |
16 | ```
17 | $ ./gradlew clean build
18 | ```
19 |
20 | ### Testing
21 |
22 | ```
23 | $ ./gradlew clean test
24 | ```
25 |
26 | ### Building FatJar Artifacts
27 |
28 | ```
29 | $ ./gradlew clean stage
30 | ```
31 |
32 | ### Running Locally
33 |
34 | Topologies are organized as subprojects. You can run any or all of them
35 |
36 | ```
37 | (start postgres - optional, zookeeper - required, kafka - required)
38 | $ ./gradlew streams-text-processor:run
39 | $ ./gradlew streams-aggregator:run
40 | $ ./gradlew streams-anomaly-checker:run
41 |
42 | ```
43 |
44 | ## Deployment: Heroku
45 |
46 | ### Dependencies
47 |
48 | 1. Postgres
49 | 2. Kafka
50 | 3. Heroku CLI
51 | 4. Heroku Kafka CLI Plugin
52 |
53 | ### Config Vars
54 |
55 | 1. `SENDGRID_API_KEY` (optional via SendGrid addon)
56 | 2. `TESTING_EMAIL` (optional for sinking to a test email using SendGrid addon)
57 |
58 |
59 | ### Setup
60 |
61 | Install the Heroku CLI: https://devcenter.heroku.com/articles/heroku-cli
62 |
63 |
64 | Install the Heroku Kafka CLI Plugin:
65 |
66 | ```
67 | heroku plugins:install heroku-kafka
68 | ```
69 |
70 | Clone the application:
71 |
72 | ```
73 | $ git clone git@github.com:kissaten/kafka-streams-on-heroku.git
74 | ```
75 |
76 | Create the application:
77 |
78 | ```
79 | $ cd kafka-streams-on-heroku
80 | $ heroku apps:create
81 | $ heroku buildpacks:add heroku/ruby
82 | $ heroku buildpacks:add heroku/gradle
83 | ```
84 |
85 | Deploy the application:
86 |
87 | ```
88 | $ git push heroku master
89 | ```
90 |
91 | Run the setup script:
92 |
93 | ```
94 | $ ./setup
95 | ```
96 |
97 | ### Smoke Testing
98 |
99 | ```
100 | $ heroku kafka:topics:write [prefix]textlines "hello world" -a
101 | $ heroku pg:psql -c 'select * from windowed_counts' HEROKU_POSTGRESQL_URL -a
102 | ```
103 |
104 | ### Example Use Cases
105 |
106 | Now let's use Kafka Streams with some example use cases. The `data-generators` directory contains some simple Ruby scripts to generate streams of data. Instructions on how to use them are below.
107 |
108 | #### Word Count
109 |
110 | First we'll do word count over a large stream of text. This will produce into Kafka lines from _Alice's Adventures in Wonderland_.
111 |
112 | ```bash
113 | $ heroku run ruby data-generators/text-generator/stream-lines-to-kafka.rb data-generators/text-generator/alice-in-wonderland.txt --app sushi
114 | ```
115 |
116 | Alternatively, if you have Ruby and Bundler installed locally, you can run the data generator locally
117 |
118 | ```bash
119 | $ bundle install --path=vendor/gems
120 | $ cd data-generators/text-generator
121 | $ HEROKU_KAFKA_URL=$(heroku config:get HEROKU_KAFKA_URL) \
122 | HEROKU_KAFKA_CLIENT_CERT=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT) \
123 | HEROKU_KAFKA_CLIENT_CERT_KEY=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT_KEY) \
124 | HEROKU_KAFKA_TRUSTED_CERT=$(heroku config:get HEROKU_KAFKA_TRUSTED_CERT) \
125 | HEROKU_KAFKA_PREFIX=$(heroku config:get HEROKU_KAFKA_PREFIX) \
126 | bundle exec ruby stream-lines-to-kafka.rb alice-in-wonderland.txt
127 | ```
128 |
129 | Now we can see the word count for specific time windows:
130 |
131 | ```bash
132 | $ heroku pg:psql -c 'select * from windowed_counts order by time_window desc' HEROKU_POSTGRESQL_URL
133 | ```
134 |
135 | #### Anomaly Detection
136 |
137 | Let's look at a more interesting use case -- not only because it is more realistic but also because it better showcases continuously updating caluculations based on a stream of data. You'll need two separate terminal windows for this.
138 |
139 | In the first one, tail the Heroku application logs
140 | ```
141 | $ heroku logs --tail --app sushi
142 | ```
143 |
144 | In the second one, we'll generate some data. This will produce into Kafka fake log data at a rate of 10 messages per second with a 20% chance of anomaly.
145 |
146 | ```bash
147 | $ heroku run ruby data-generators/log-generator/stream-logs-to-kafka.rb 10 .2 --app sushi
148 | ```
149 |
150 | Alternatively, if you have Ruby and Bundler installed locally, you can run the data generator locally
151 |
152 | ```bash
153 | $ bundle install --path=vendor/gems
154 | $ cd data-generators/log-generator
155 | $ HEROKU_KAFKA_URL=$(heroku config:get HEROKU_KAFKA_URL) \
156 | HEROKU_KAFKA_CLIENT_CERT=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT) \
157 | HEROKU_KAFKA_CLIENT_CERT_KEY=$(heroku config:get HEROKU_KAFKA_CLIENT_CERT_KEY) \
158 | HEROKU_KAFKA_TRUSTED_CERT=$(heroku config:get HEROKU_KAFKA_TRUSTED_CERT) \
159 | HEROKU_KAFKA_PREFIX=$(heroku config:get HEROKU_KAFKA_PREFIX) \
160 | bundle exec ruby stream-logs-to-kafka.rb 10 .2
161 | ```
162 |
163 | Looking at the Heroku applications logs, you will see STDOUT output showing an anomaly has been detected.
164 |
--------------------------------------------------------------------------------
/streams-aggregator/src/main/java/io/jeffchao/streams/aggregator/AggregatorConfig.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.aggregator;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.net.URI;
6 | import java.net.URISyntaxException;
7 | import java.security.KeyStoreException;
8 | import java.security.NoSuchAlgorithmException;
9 | import java.security.cert.CertificateException;
10 | import java.util.List;
11 | import java.util.Optional;
12 | import java.util.Properties;
13 |
14 | import com.google.common.base.Joiner;
15 | import com.google.common.base.Preconditions;
16 | import com.google.common.base.Splitter;
17 | import com.google.common.collect.Lists;
18 | import com.heroku.sdk.EnvKeyStore;
19 | import org.apache.kafka.clients.CommonClientConfigs;
20 | import org.apache.kafka.common.config.SslConfigs;
21 | import org.apache.kafka.common.serialization.Serdes;
22 | import org.apache.kafka.streams.StreamsConfig;
23 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor;
24 | import org.slf4j.Logger;
25 | import org.slf4j.LoggerFactory;
26 |
27 |
28 | class AggregatorConfig extends Properties {
29 |
30 | private static final Logger log = LoggerFactory.getLogger(AggregatorConfig.class);
31 |
32 | private static final String ADDON_SUFFIX = Optional.ofNullable(
33 | System.getenv("ADDON_SUFFIX")).orElse("");
34 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX);
35 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable(
36 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse("");
37 | private static final String HEROKU_KAFKA_URL = String.format("%s_URL", HEROKU_KAFKA);
38 | private static final String HEROKU_KAFKA_TRUSTED_CERT =
39 | String.format("%s_TRUSTED_CERT", HEROKU_KAFKA);
40 | private static final String HEROKU_KAFKA_CLIENT_CERT_KEY =
41 | String.format("%s_CLIENT_CERT_KEY", HEROKU_KAFKA);
42 | private static final String HEROKU_KAFKA_CLIENT_CERT =
43 | String.format("%s_CLIENT_CERT", HEROKU_KAFKA);
44 |
45 | private String bootstrapServers;
46 |
47 | Properties getProperties() throws URISyntaxException, CertificateException,
48 | NoSuchAlgorithmException, KeyStoreException, IOException {
49 | return buildDefaults();
50 | }
51 |
52 | private Properties buildDefaults() throws CertificateException, NoSuchAlgorithmException,
53 | KeyStoreException, IOException, URISyntaxException {
54 | Properties defaultProperties = new Properties();
55 | Properties herokuKafkaConfigVarProperties = buildHerokuKafkaConfigVars();
56 | Properties kafkaStreamsProperties = buildKafkaStreamsDefaults();
57 |
58 | defaultProperties.putAll(herokuKafkaConfigVarProperties);
59 | defaultProperties.putAll(kafkaStreamsProperties);
60 |
61 |
62 | return defaultProperties;
63 | }
64 |
65 | private Properties buildHerokuKafkaConfigVars() throws URISyntaxException, CertificateException,
66 | NoSuchAlgorithmException, KeyStoreException, IOException {
67 | Properties properties = new Properties();
68 | List bootstrapServerList = Lists.newArrayList();
69 |
70 | Iterable kafkaUrl = Splitter.on(",")
71 | .split(Preconditions.checkNotNull(System.getenv(HEROKU_KAFKA_URL)));
72 |
73 | for (String url : kafkaUrl) {
74 | URI uri = new URI(url);
75 | bootstrapServerList.add(String.format("%s:%d", uri.getHost(), uri.getPort()));
76 |
77 | switch (uri.getScheme()) {
78 | case "kafka":
79 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "PLAINTEXT");
80 | break;
81 | case "kafka+ssl":
82 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
83 | EnvKeyStore envTrustStore = EnvKeyStore.createWithRandomPassword(
84 | HEROKU_KAFKA_TRUSTED_CERT);
85 | EnvKeyStore envKeyStore = EnvKeyStore.createWithRandomPassword(
86 | HEROKU_KAFKA_CLIENT_CERT_KEY, HEROKU_KAFKA_CLIENT_CERT);
87 |
88 | File trustStoreFile = envTrustStore.storeTemp();
89 | File keyStoreFile = envKeyStore.storeTemp();
90 |
91 | properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, envTrustStore.type());
92 | properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG,
93 | trustStoreFile.getAbsolutePath());
94 | properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, envTrustStore.password());
95 | properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, envKeyStore.type());
96 | properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keyStoreFile.getAbsolutePath());
97 | properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, envKeyStore.password());
98 | break;
99 | default:
100 | throw new URISyntaxException(uri.getScheme(), "Unknown URI scheme");
101 | }
102 | }
103 |
104 | bootstrapServers = Joiner.on(",").join(bootstrapServerList);
105 |
106 | return properties;
107 | }
108 |
109 | private Properties buildKafkaStreamsDefaults() {
110 | Properties properties = new Properties();
111 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG,
112 | String.format("%saggregator-app", HEROKU_KAFKA_PREFIX));
113 | properties.put(StreamsConfig.CLIENT_ID_CONFIG,
114 | String.format("%saggregator-client", ""));
115 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
116 | properties.put(
117 | StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG,
118 | Serdes.String().getClass().getName());
119 | properties.put(
120 | StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,
121 | Serdes.String().getClass().getName());
122 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
123 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG,
124 | WallclockTimestampExtractor.class);
125 |
126 | return properties;
127 | }
128 | }
129 |
--------------------------------------------------------------------------------
/streams-text-processor/src/main/java/io/jeffchao/streams/textprocessor/TextProcessorConfig.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.textprocessor;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.net.URI;
6 | import java.net.URISyntaxException;
7 | import java.security.KeyStoreException;
8 | import java.security.NoSuchAlgorithmException;
9 | import java.security.cert.CertificateException;
10 | import java.util.List;
11 | import java.util.Optional;
12 | import java.util.Properties;
13 |
14 | import com.google.common.base.Joiner;
15 | import com.google.common.base.Preconditions;
16 | import com.google.common.base.Splitter;
17 | import com.google.common.collect.Lists;
18 | import com.heroku.sdk.EnvKeyStore;
19 | import org.apache.kafka.clients.CommonClientConfigs;
20 | import org.apache.kafka.common.config.SslConfigs;
21 | import org.apache.kafka.common.serialization.Serdes;
22 | import org.apache.kafka.streams.StreamsConfig;
23 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor;
24 | import org.slf4j.Logger;
25 | import org.slf4j.LoggerFactory;
26 |
27 |
28 | class TextProcessorConfig extends Properties {
29 |
30 | private static final Logger log = LoggerFactory.getLogger(TextProcessorConfig.class);
31 |
32 | private static final String ADDON_SUFFIX = Optional.ofNullable(
33 | System.getenv("ADDON_SUFFIX")).orElse("");
34 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX);
35 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable(
36 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse("");
37 | private static final String HEROKU_KAFKA_URL = String.format("%s_URL", HEROKU_KAFKA);
38 | private static final String HEROKU_KAFKA_TRUSTED_CERT =
39 | String.format("%s_TRUSTED_CERT", HEROKU_KAFKA);
40 | private static final String HEROKU_KAFKA_CLIENT_CERT_KEY =
41 | String.format("%s_CLIENT_CERT_KEY", HEROKU_KAFKA);
42 | private static final String HEROKU_KAFKA_CLIENT_CERT =
43 | String.format("%s_CLIENT_CERT", HEROKU_KAFKA);
44 |
45 | private String bootstrapServers;
46 |
47 | Properties getProperties() throws URISyntaxException, CertificateException,
48 | NoSuchAlgorithmException, KeyStoreException, IOException {
49 | return buildDefaults();
50 | }
51 |
52 | private Properties buildDefaults() throws CertificateException, NoSuchAlgorithmException,
53 | KeyStoreException, IOException, URISyntaxException {
54 | Properties defaultProperties = new Properties();
55 | Properties herokuKafkaConfigVarProperties = buildHerokuKafkaConfigVars();
56 | Properties kafkaStreamsProperties = buildKafkaStreamsDefaults();
57 |
58 | defaultProperties.putAll(herokuKafkaConfigVarProperties);
59 | defaultProperties.putAll(kafkaStreamsProperties);
60 |
61 |
62 | return defaultProperties;
63 | }
64 |
65 | private Properties buildHerokuKafkaConfigVars() throws URISyntaxException, CertificateException,
66 | NoSuchAlgorithmException, KeyStoreException, IOException {
67 | Properties properties = new Properties();
68 | List bootstrapServerList = Lists.newArrayList();
69 |
70 | Iterable kafkaUrl = Splitter.on(",")
71 | .split(Preconditions.checkNotNull(System.getenv(HEROKU_KAFKA_URL)));
72 |
73 | for (String url : kafkaUrl) {
74 | URI uri = new URI(url);
75 | bootstrapServerList.add(String.format("%s:%d", uri.getHost(), uri.getPort()));
76 |
77 | switch (uri.getScheme()) {
78 | case "kafka":
79 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "PLAINTEXT");
80 | break;
81 | case "kafka+ssl":
82 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
83 | EnvKeyStore envTrustStore = EnvKeyStore.createWithRandomPassword(
84 | HEROKU_KAFKA_TRUSTED_CERT);
85 | EnvKeyStore envKeyStore = EnvKeyStore.createWithRandomPassword(
86 | HEROKU_KAFKA_CLIENT_CERT_KEY, HEROKU_KAFKA_CLIENT_CERT);
87 |
88 | File trustStoreFile = envTrustStore.storeTemp();
89 | File keyStoreFile = envKeyStore.storeTemp();
90 |
91 | properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, envTrustStore.type());
92 | properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG,
93 | trustStoreFile.getAbsolutePath());
94 | properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, envTrustStore.password());
95 | properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, envKeyStore.type());
96 | properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keyStoreFile.getAbsolutePath());
97 | properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, envKeyStore.password());
98 | break;
99 | default:
100 | throw new URISyntaxException(uri.getScheme(), "Unknown URI scheme");
101 | }
102 | }
103 |
104 | bootstrapServers = Joiner.on(",").join(bootstrapServerList);
105 |
106 | return properties;
107 | }
108 |
109 | private Properties buildKafkaStreamsDefaults() {
110 | Properties properties = new Properties();
111 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG,
112 | String.format("%stext-processor-app", HEROKU_KAFKA_PREFIX));
113 | properties.put(StreamsConfig.CLIENT_ID_CONFIG,
114 | String.format("%stext-processor-client", HEROKU_KAFKA_PREFIX));
115 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
116 | properties.put(
117 | StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG,
118 | Serdes.String().getClass().getName());
119 | properties.put(
120 | StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,
121 | Serdes.String().getClass().getName());
122 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
123 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG,
124 | WallclockTimestampExtractor.class);
125 |
126 | return properties;
127 | }
128 | }
129 |
--------------------------------------------------------------------------------
/streams-anomaly-detector/src/main/java/io/jeffchao/streams/anomalydetector/AnomalyDetectorConfig.java:
--------------------------------------------------------------------------------
1 | package io.jeffchao.streams.anomalydetector;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.net.URI;
6 | import java.net.URISyntaxException;
7 | import java.security.KeyStoreException;
8 | import java.security.NoSuchAlgorithmException;
9 | import java.security.cert.CertificateException;
10 | import java.util.List;
11 | import java.util.Optional;
12 | import java.util.Properties;
13 |
14 | import com.google.common.base.Joiner;
15 | import com.google.common.base.Preconditions;
16 | import com.google.common.base.Splitter;
17 | import com.google.common.collect.Lists;
18 | import com.heroku.sdk.EnvKeyStore;
19 | import org.apache.kafka.clients.CommonClientConfigs;
20 | import org.apache.kafka.common.config.SslConfigs;
21 | import org.apache.kafka.common.serialization.Serdes;
22 | import org.apache.kafka.streams.StreamsConfig;
23 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor;
24 | import org.slf4j.Logger;
25 | import org.slf4j.LoggerFactory;
26 |
27 |
28 | class AnomalyDetectorConfig extends Properties {
29 |
30 | private static final Logger log = LoggerFactory.getLogger(AnomalyDetectorConfig.class);
31 |
32 | private static final String ADDON_SUFFIX = Optional.ofNullable(
33 | System.getenv("ADDON_SUFFIX")).orElse("");
34 | private static final String HEROKU_KAFKA = String.format("HEROKU_KAFKA%s", ADDON_SUFFIX);
35 | private static final String HEROKU_KAFKA_PREFIX = Optional.ofNullable(
36 | System.getenv(String.format("%s_PREFIX", HEROKU_KAFKA))).orElse("");
37 | private static final String HEROKU_KAFKA_URL = String.format("%s_URL", HEROKU_KAFKA);
38 | private static final String HEROKU_KAFKA_TRUSTED_CERT =
39 | String.format("%s_TRUSTED_CERT", HEROKU_KAFKA);
40 | private static final String HEROKU_KAFKA_CLIENT_CERT_KEY =
41 | String.format("%s_CLIENT_CERT_KEY", HEROKU_KAFKA);
42 | private static final String HEROKU_KAFKA_CLIENT_CERT =
43 | String.format("%s_CLIENT_CERT", HEROKU_KAFKA);
44 |
45 | private String bootstrapServers;
46 |
47 | Properties getProperties() throws URISyntaxException, CertificateException,
48 | NoSuchAlgorithmException, KeyStoreException, IOException {
49 | return buildDefaults();
50 | }
51 |
52 | private Properties buildDefaults() throws CertificateException, NoSuchAlgorithmException,
53 | KeyStoreException, IOException, URISyntaxException {
54 | Properties defaultProperties = new Properties();
55 | Properties herokuKafkaConfigVarProperties = buildHerokuKafkaConfigVars();
56 | Properties kafkaStreamsProperties = buildKafkaStreamsDefaults();
57 |
58 | defaultProperties.putAll(herokuKafkaConfigVarProperties);
59 | defaultProperties.putAll(kafkaStreamsProperties);
60 |
61 |
62 | return defaultProperties;
63 | }
64 |
65 | private Properties buildHerokuKafkaConfigVars() throws URISyntaxException, CertificateException,
66 | NoSuchAlgorithmException, KeyStoreException, IOException {
67 | Properties properties = new Properties();
68 | List bootstrapServerList = Lists.newArrayList();
69 |
70 | Iterable kafkaUrl = Splitter.on(",")
71 | .split(Preconditions.checkNotNull(System.getenv(HEROKU_KAFKA_URL)));
72 |
73 | for (String url : kafkaUrl) {
74 | URI uri = new URI(url);
75 | bootstrapServerList.add(String.format("%s:%d", uri.getHost(), uri.getPort()));
76 |
77 | switch (uri.getScheme()) {
78 | case "kafka":
79 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "PLAINTEXT");
80 | break;
81 | case "kafka+ssl":
82 | properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
83 | EnvKeyStore envTrustStore = EnvKeyStore.createWithRandomPassword(
84 | HEROKU_KAFKA_TRUSTED_CERT);
85 | EnvKeyStore envKeyStore = EnvKeyStore.createWithRandomPassword(
86 | HEROKU_KAFKA_CLIENT_CERT_KEY, HEROKU_KAFKA_CLIENT_CERT);
87 |
88 | File trustStoreFile = envTrustStore.storeTemp();
89 | File keyStoreFile = envKeyStore.storeTemp();
90 |
91 | properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, envTrustStore.type());
92 | properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG,
93 | trustStoreFile.getAbsolutePath());
94 | properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, envTrustStore.password());
95 | properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, envKeyStore.type());
96 | properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keyStoreFile.getAbsolutePath());
97 | properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, envKeyStore.password());
98 | break;
99 | default:
100 | throw new URISyntaxException(uri.getScheme(), "Unknown URI scheme");
101 | }
102 | }
103 |
104 | bootstrapServers = Joiner.on(",").join(bootstrapServerList);
105 |
106 | return properties;
107 | }
108 |
109 | private Properties buildKafkaStreamsDefaults() {
110 | Properties properties = new Properties();
111 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG,
112 | String.format("%sanomaly-detector-app", HEROKU_KAFKA_PREFIX));
113 | properties.put(StreamsConfig.CLIENT_ID_CONFIG,
114 | String.format("%sanomaly-detector-client", HEROKU_KAFKA_PREFIX));
115 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
116 | properties.put(
117 | StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG,
118 | Serdes.String().getClass().getName());
119 | properties.put(
120 | StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,
121 | Serdes.String().getClass().getName());
122 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
123 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG,
124 | WallclockTimestampExtractor.class);
125 |
126 | return properties;
127 | }
128 | }
129 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 |
3 | ##############################################################################
4 | ##
5 | ## Gradle start up script for UN*X
6 | ##
7 | ##############################################################################
8 |
9 | # Attempt to set APP_HOME
10 | # Resolve links: $0 may be a link
11 | PRG="$0"
12 | # Need this for relative symlinks.
13 | while [ -h "$PRG" ] ; do
14 | ls=`ls -ld "$PRG"`
15 | link=`expr "$ls" : '.*-> \(.*\)$'`
16 | if expr "$link" : '/.*' > /dev/null; then
17 | PRG="$link"
18 | else
19 | PRG=`dirname "$PRG"`"/$link"
20 | fi
21 | done
22 | SAVED="`pwd`"
23 | cd "`dirname \"$PRG\"`/" >/dev/null
24 | APP_HOME="`pwd -P`"
25 | cd "$SAVED" >/dev/null
26 |
27 | APP_NAME="Gradle"
28 | APP_BASE_NAME=`basename "$0"`
29 |
30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31 | DEFAULT_JVM_OPTS=""
32 |
33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
34 | MAX_FD="maximum"
35 |
36 | warn () {
37 | echo "$*"
38 | }
39 |
40 | die () {
41 | echo
42 | echo "$*"
43 | echo
44 | exit 1
45 | }
46 |
47 | # OS specific support (must be 'true' or 'false').
48 | cygwin=false
49 | msys=false
50 | darwin=false
51 | nonstop=false
52 | case "`uname`" in
53 | CYGWIN* )
54 | cygwin=true
55 | ;;
56 | Darwin* )
57 | darwin=true
58 | ;;
59 | MINGW* )
60 | msys=true
61 | ;;
62 | NONSTOP* )
63 | nonstop=true
64 | ;;
65 | esac
66 |
67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68 |
69 | # Determine the Java command to use to start the JVM.
70 | if [ -n "$JAVA_HOME" ] ; then
71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72 | # IBM's JDK on AIX uses strange locations for the executables
73 | JAVACMD="$JAVA_HOME/jre/sh/java"
74 | else
75 | JAVACMD="$JAVA_HOME/bin/java"
76 | fi
77 | if [ ! -x "$JAVACMD" ] ; then
78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79 |
80 | Please set the JAVA_HOME variable in your environment to match the
81 | location of your Java installation."
82 | fi
83 | else
84 | JAVACMD="java"
85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86 |
87 | Please set the JAVA_HOME variable in your environment to match the
88 | location of your Java installation."
89 | fi
90 |
91 | # Increase the maximum file descriptors if we can.
92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93 | MAX_FD_LIMIT=`ulimit -H -n`
94 | if [ $? -eq 0 ] ; then
95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96 | MAX_FD="$MAX_FD_LIMIT"
97 | fi
98 | ulimit -n $MAX_FD
99 | if [ $? -ne 0 ] ; then
100 | warn "Could not set maximum file descriptor limit: $MAX_FD"
101 | fi
102 | else
103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 | fi
105 | fi
106 |
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 |
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 | JAVACMD=`cygpath --unix "$JAVACMD"`
117 |
118 | # We build the pattern for arguments to be converted via cygpath
119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 | SEP=""
121 | for dir in $ROOTDIRSRAW ; do
122 | ROOTDIRS="$ROOTDIRS$SEP$dir"
123 | SEP="|"
124 | done
125 | OURCYGPATTERN="(^($ROOTDIRS))"
126 | # Add a user-defined pattern to the cygpath arguments
127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 | fi
130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 | i=0
132 | for arg in "$@" ; do
133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135 |
136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 | else
139 | eval `echo args$i`="\"$arg\""
140 | fi
141 | i=$((i+1))
142 | done
143 | case $i in
144 | (0) set -- ;;
145 | (1) set -- "$args0" ;;
146 | (2) set -- "$args0" "$args1" ;;
147 | (3) set -- "$args0" "$args1" "$args2" ;;
148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 | esac
155 | fi
156 |
157 | # Escape application args
158 | save () {
159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 | echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 |
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 |
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 | cd "$(dirname "$0")"
170 | fi
171 |
172 | exec "$JAVACMD" "$@"
173 |
--------------------------------------------------------------------------------
/codequality/checkstyle.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
71 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
115 |
116 |
117 |
119 |
120 |
121 |
122 |
124 |
125 |
126 |
127 |
129 |
130 |
131 |
132 |
134 |
135 |
136 |
137 |
138 |
140 |
141 |
142 |
143 |
145 |
146 |
147 |
148 |
150 |
151 |
152 |
153 |
155 |
156 |
157 |
158 |
160 |
162 |
164 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
--------------------------------------------------------------------------------
/data-generators/text-generator/alice-in-wonderland.info:
--------------------------------------------------------------------------------
1 | Project Gutenberg’s Alice’s Adventures in Wonderland, by Lewis Carroll
2 |
3 | This eBook is for the use of anyone anywhere at no cost and with
4 | almost no restrictions whatsoever. You may copy it, give it away or
5 | re-use it under the terms of the Project Gutenberg License included
6 | with this eBook or online at www.gutenberg.org
7 |
8 |
9 | Title: Alice’s Adventures in Wonderland
10 |
11 | Author: Lewis Carroll
12 |
13 | Posting Date: June 25, 2008 [EBook #11]
14 | Release Date: March, 1994
15 | Last Updated: October 6, 2016
16 |
17 | Language: English
18 |
19 | Character set encoding: UTF-8
20 |
21 | *** START OF THIS PROJECT GUTENBERG EBOOK ALICE’S ADVENTURES IN WONDERLAND ***
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 | ALICE’S ADVENTURES IN WONDERLAND
33 |
34 | Lewis Carroll
35 |
36 | THE MILLENNIUM FULCRUM EDITION 3.0
37 |
38 |
39 |
40 |
41 |
42 | (see alice-in-wonderland.txt)
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 | End of Project Gutenberg’s Alice’s Adventures in Wonderland, by Lewis Carroll
51 |
52 | *** END OF THIS PROJECT GUTENBERG EBOOK ALICE’S ADVENTURES IN WONDERLAND ***
53 |
54 | ***** This file should be named 11-0.txt or 11-0.zip *****
55 | This and all associated files of various formats will be found in:
56 | http://www.gutenberg.org/1/11/
57 |
58 |
59 |
60 | Updated editions will replace the previous one--the old editions
61 | will be renamed.
62 |
63 | Creating the works from public domain print editions means that no
64 | one owns a United States copyright in these works, so the Foundation
65 | (and you!) can copy and distribute it in the United States without
66 | permission and without paying copyright royalties. Special rules,
67 | set forth in the General Terms of Use part of this license, apply to
68 | copying and distributing Project Gutenberg-tm electronic works to
69 | protect the PROJECT GUTENBERG-tm concept and trademark. Project
70 | Gutenberg is a registered trademark, and may not be used if you
71 | charge for the eBooks, unless you receive specific permission. If you
72 | do not charge anything for copies of this eBook, complying with the
73 | rules is very easy. You may use this eBook for nearly any purpose
74 | such as creation of derivative works, reports, performances and
75 | research. They may be modified and printed and given away--you may do
76 | practically ANYTHING with public domain eBooks. Redistribution is
77 | subject to the trademark license, especially commercial
78 | redistribution.
79 |
80 |
81 |
82 | *** START: FULL LICENSE ***
83 |
84 | THE FULL PROJECT GUTENBERG LICENSE
85 | PLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK
86 |
87 | To protect the Project Gutenberg-tm mission of promoting the free
88 | distribution of electronic works, by using or distributing this work
89 | (or any other work associated in any way with the phrase “Project
90 | Gutenberg”), you agree to comply with all the terms of the Full Project
91 | Gutenberg-tm License (available with this file or online at
92 | http://gutenberg.org/license).
93 |
94 |
95 | Section 1. General Terms of Use and Redistributing Project Gutenberg-tm
96 | electronic works
97 |
98 | 1.A. By reading or using any part of this Project Gutenberg-tm
99 | electronic work, you indicate that you have read, understand, agree to
100 | and accept all the terms of this license and intellectual property
101 | (trademark/copyright) agreement. If you do not agree to abide by all
102 | the terms of this agreement, you must cease using and return or destroy
103 | all copies of Project Gutenberg-tm electronic works in your possession.
104 | If you paid a fee for obtaining a copy of or access to a Project
105 | Gutenberg-tm electronic work and you do not agree to be bound by the
106 | terms of this agreement, you may obtain a refund from the person or
107 | entity to whom you paid the fee as set forth in paragraph 1.E.8.
108 |
109 | 1.B. “Project Gutenberg” is a registered trademark. It may only be
110 | used on or associated in any way with an electronic work by people who
111 | agree to be bound by the terms of this agreement. There are a few
112 | things that you can do with most Project Gutenberg-tm electronic works
113 | even without complying with the full terms of this agreement. See
114 | paragraph 1.C below. There are a lot of things you can do with Project
115 | Gutenberg-tm electronic works if you follow the terms of this agreement
116 | and help preserve free future access to Project Gutenberg-tm electronic
117 | works. See paragraph 1.E below.
118 |
119 | 1.C. The Project Gutenberg Literary Archive Foundation (“the Foundation”
120 | or PGLAF), owns a compilation copyright in the collection of Project
121 | Gutenberg-tm electronic works. Nearly all the individual works in the
122 | collection are in the public domain in the United States. If an
123 | individual work is in the public domain in the United States and you are
124 | located in the United States, we do not claim a right to prevent you from
125 | copying, distributing, performing, displaying or creating derivative
126 | works based on the work as long as all references to Project Gutenberg
127 | are removed. Of course, we hope that you will support the Project
128 | Gutenberg-tm mission of promoting free access to electronic works by
129 | freely sharing Project Gutenberg-tm works in compliance with the terms of
130 | this agreement for keeping the Project Gutenberg-tm name associated with
131 | the work. You can easily comply with the terms of this agreement by
132 | keeping this work in the same format with its attached full Project
133 | Gutenberg-tm License when you share it without charge with others.
134 |
135 | 1.D. The copyright laws of the place where you are located also govern
136 | what you can do with this work. Copyright laws in most countries are in
137 | a constant state of change. If you are outside the United States, check
138 | the laws of your country in addition to the terms of this agreement
139 | before downloading, copying, displaying, performing, distributing or
140 | creating derivative works based on this work or any other Project
141 | Gutenberg-tm work. The Foundation makes no representations concerning
142 | the copyright status of any work in any country outside the United
143 | States.
144 |
145 | 1.E. Unless you have removed all references to Project Gutenberg:
146 |
147 | 1.E.1. The following sentence, with active links to, or other immediate
148 | access to, the full Project Gutenberg-tm License must appear prominently
149 | whenever any copy of a Project Gutenberg-tm work (any work on which the
150 | phrase “Project Gutenberg” appears, or with which the phrase “Project
151 | Gutenberg” is associated) is accessed, displayed, performed, viewed,
152 | copied or distributed:
153 |
154 | This eBook is for the use of anyone anywhere at no cost and with
155 | almost no restrictions whatsoever. You may copy it, give it away or
156 | re-use it under the terms of the Project Gutenberg License included
157 | with this eBook or online at www.gutenberg.org
158 |
159 | 1.E.2. If an individual Project Gutenberg-tm electronic work is derived
160 | from the public domain (does not contain a notice indicating that it is
161 | posted with permission of the copyright holder), the work can be copied
162 | and distributed to anyone in the United States without paying any fees
163 | or charges. If you are redistributing or providing access to a work
164 | with the phrase “Project Gutenberg” associated with or appearing on the
165 | work, you must comply either with the requirements of paragraphs 1.E.1
166 | through 1.E.7 or obtain permission for the use of the work and the
167 | Project Gutenberg-tm trademark as set forth in paragraphs 1.E.8 or
168 | 1.E.9.
169 |
170 | 1.E.3. If an individual Project Gutenberg-tm electronic work is posted
171 | with the permission of the copyright holder, your use and distribution
172 | must comply with both paragraphs 1.E.1 through 1.E.7 and any additional
173 | terms imposed by the copyright holder. Additional terms will be linked
174 | to the Project Gutenberg-tm License for all works posted with the
175 | permission of the copyright holder found at the beginning of this work.
176 |
177 | 1.E.4. Do not unlink or detach or remove the full Project Gutenberg-tm
178 | License terms from this work, or any files containing a part of this
179 | work or any other work associated with Project Gutenberg-tm.
180 |
181 | 1.E.5. Do not copy, display, perform, distribute or redistribute this
182 | electronic work, or any part of this electronic work, without
183 | prominently displaying the sentence set forth in paragraph 1.E.1 with
184 | active links or immediate access to the full terms of the Project
185 | Gutenberg-tm License.
186 |
187 | 1.E.6. You may convert to and distribute this work in any binary,
188 | compressed, marked up, nonproprietary or proprietary form, including any
189 | word processing or hypertext form. However, if you provide access to or
190 | distribute copies of a Project Gutenberg-tm work in a format other than
191 | “Plain Vanilla ASCII” or other format used in the official version
192 | posted on the official Project Gutenberg-tm web site (www.gutenberg.org),
193 | you must, at no additional cost, fee or expense to the user, provide a
194 | copy, a means of exporting a copy, or a means of obtaining a copy upon
195 | request, of the work in its original “Plain Vanilla ASCII” or other
196 | form. Any alternate format must include the full Project Gutenberg-tm
197 | License as specified in paragraph 1.E.1.
198 |
199 | 1.E.7. Do not charge a fee for access to, viewing, displaying,
200 | performing, copying or distributing any Project Gutenberg-tm works
201 | unless you comply with paragraph 1.E.8 or 1.E.9.
202 |
203 | 1.E.8. You may charge a reasonable fee for copies of or providing
204 | access to or distributing Project Gutenberg-tm electronic works provided
205 | that
206 |
207 | - You pay a royalty fee of 20% of the gross profits you derive from
208 | the use of Project Gutenberg-tm works calculated using the method
209 | you already use to calculate your applicable taxes. The fee is
210 | owed to the owner of the Project Gutenberg-tm trademark, but he
211 | has agreed to donate royalties under this paragraph to the
212 | Project Gutenberg Literary Archive Foundation. Royalty payments
213 | must be paid within 60 days following each date on which you
214 | prepare (or are legally required to prepare) your periodic tax
215 | returns. Royalty payments should be clearly marked as such and
216 | sent to the Project Gutenberg Literary Archive Foundation at the
217 | address specified in Section 4, “Information about donations to
218 | the Project Gutenberg Literary Archive Foundation.”
219 |
220 | - You provide a full refund of any money paid by a user who notifies
221 | you in writing (or by e-mail) within 30 days of receipt that s/he
222 | does not agree to the terms of the full Project Gutenberg-tm
223 | License. You must require such a user to return or
224 | destroy all copies of the works possessed in a physical medium
225 | and discontinue all use of and all access to other copies of
226 | Project Gutenberg-tm works.
227 |
228 | - You provide, in accordance with paragraph 1.F.3, a full refund of any
229 | money paid for a work or a replacement copy, if a defect in the
230 | electronic work is discovered and reported to you within 90 days
231 | of receipt of the work.
232 |
233 | - You comply with all other terms of this agreement for free
234 | distribution of Project Gutenberg-tm works.
235 |
236 | 1.E.9. If you wish to charge a fee or distribute a Project Gutenberg-tm
237 | electronic work or group of works on different terms than are set
238 | forth in this agreement, you must obtain permission in writing from
239 | both the Project Gutenberg Literary Archive Foundation and Michael
240 | Hart, the owner of the Project Gutenberg-tm trademark. Contact the
241 | Foundation as set forth in Section 3 below.
242 |
243 | 1.F.
244 |
245 | 1.F.1. Project Gutenberg volunteers and employees expend considerable
246 | effort to identify, do copyright research on, transcribe and proofread
247 | public domain works in creating the Project Gutenberg-tm
248 | collection. Despite these efforts, Project Gutenberg-tm electronic
249 | works, and the medium on which they may be stored, may contain
250 | “Defects,” such as, but not limited to, incomplete, inaccurate or
251 | corrupt data, transcription errors, a copyright or other intellectual
252 | property infringement, a defective or damaged disk or other medium, a
253 | computer virus, or computer codes that damage or cannot be read by
254 | your equipment.
255 |
256 | 1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the “Right
257 | of Replacement or Refund” described in paragraph 1.F.3, the Project
258 | Gutenberg Literary Archive Foundation, the owner of the Project
259 | Gutenberg-tm trademark, and any other party distributing a Project
260 | Gutenberg-tm electronic work under this agreement, disclaim all
261 | liability to you for damages, costs and expenses, including legal
262 | fees. YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT
263 | LIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE
264 | PROVIDED IN PARAGRAPH F3. YOU AGREE THAT THE FOUNDATION, THE
265 | TRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE
266 | LIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR
267 | INCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH
268 | DAMAGE.
269 |
270 | 1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a
271 | defect in this electronic work within 90 days of receiving it, you can
272 | receive a refund of the money (if any) you paid for it by sending a
273 | written explanation to the person you received the work from. If you
274 | received the work on a physical medium, you must return the medium with
275 | your written explanation. The person or entity that provided you with
276 | the defective work may elect to provide a replacement copy in lieu of a
277 | refund. If you received the work electronically, the person or entity
278 | providing it to you may choose to give you a second opportunity to
279 | receive the work electronically in lieu of a refund. If the second copy
280 | is also defective, you may demand a refund in writing without further
281 | opportunities to fix the problem.
282 |
283 | 1.F.4. Except for the limited right of replacement or refund set forth
284 | in paragraph 1.F.3, this work is provided to you ‘AS-IS’ WITH NO OTHER
285 | WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
286 | WARRANTIES OF MERCHANTIBILITY OR FITNESS FOR ANY PURPOSE.
287 |
288 | 1.F.5. Some states do not allow disclaimers of certain implied
289 | warranties or the exclusion or limitation of certain types of damages.
290 | If any disclaimer or limitation set forth in this agreement violates the
291 | law of the state applicable to this agreement, the agreement shall be
292 | interpreted to make the maximum disclaimer or limitation permitted by
293 | the applicable state law. The invalidity or unenforceability of any
294 | provision of this agreement shall not void the remaining provisions.
295 |
296 | 1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the
297 | trademark owner, any agent or employee of the Foundation, anyone
298 | providing copies of Project Gutenberg-tm electronic works in accordance
299 | with this agreement, and any volunteers associated with the production,
300 | promotion and distribution of Project Gutenberg-tm electronic works,
301 | harmless from all liability, costs and expenses, including legal fees,
302 | that arise directly or indirectly from any of the following which you do
303 | or cause to occur: (a) distribution of this or any Project Gutenberg-tm
304 | work, (b) alteration, modification, or additions or deletions to any
305 | Project Gutenberg-tm work, and (c) any Defect you cause.
306 |
307 |
308 | Section 2. Information about the Mission of Project Gutenberg-tm
309 |
310 | Project Gutenberg-tm is synonymous with the free distribution of
311 | electronic works in formats readable by the widest variety of computers
312 | including obsolete, old, middle-aged and new computers. It exists
313 | because of the efforts of hundreds of volunteers and donations from
314 | people in all walks of life.
315 |
316 | Volunteers and financial support to provide volunteers with the
317 | assistance they need, is critical to reaching Project Gutenberg-tm’s
318 | goals and ensuring that the Project Gutenberg-tm collection will
319 | remain freely available for generations to come. In 2001, the Project
320 | Gutenberg Literary Archive Foundation was created to provide a secure
321 | and permanent future for Project Gutenberg-tm and future generations.
322 | To learn more about the Project Gutenberg Literary Archive Foundation
323 | and how your efforts and donations can help, see Sections 3 and 4
324 | and the Foundation web page at http://www.pglaf.org.
325 |
326 |
327 | Section 3. Information about the Project Gutenberg Literary Archive
328 | Foundation
329 |
330 | The Project Gutenberg Literary Archive Foundation is a non profit
331 | 501(c)(3) educational corporation organized under the laws of the
332 | state of Mississippi and granted tax exempt status by the Internal
333 | Revenue Service. The Foundation’s EIN or federal tax identification
334 | number is 64-6221541. Its 501(c)(3) letter is posted at
335 | http://pglaf.org/fundraising. Contributions to the Project Gutenberg
336 | Literary Archive Foundation are tax deductible to the full extent
337 | permitted by U.S. federal laws and your state’s laws.
338 |
339 | The Foundation’s principal office is located at 4557 Melan Dr. S.
340 | Fairbanks, AK, 99712., but its volunteers and employees are scattered
341 | throughout numerous locations. Its business office is located at
342 | 809 North 1500 West, Salt Lake City, UT 84116, (801) 596-1887, email
343 | business@pglaf.org. Email contact links and up to date contact
344 | information can be found at the Foundation’s web site and official
345 | page at http://pglaf.org
346 |
347 | For additional contact information:
348 | Dr. Gregory B. Newby
349 | Chief Executive and Director
350 | gbnewby@pglaf.org
351 |
352 |
353 | Section 4. Information about Donations to the Project Gutenberg
354 | Literary Archive Foundation
355 |
356 | Project Gutenberg-tm depends upon and cannot survive without wide
357 | spread public support and donations to carry out its mission of
358 | increasing the number of public domain and licensed works that can be
359 | freely distributed in machine readable form accessible by the widest
360 | array of equipment including outdated equipment. Many small donations
361 | ($1 to $5,000) are particularly important to maintaining tax exempt
362 | status with the IRS.
363 |
364 | The Foundation is committed to complying with the laws regulating
365 | charities and charitable donations in all 50 states of the United
366 | States. Compliance requirements are not uniform and it takes a
367 | considerable effort, much paperwork and many fees to meet and keep up
368 | with these requirements. We do not solicit donations in locations
369 | where we have not received written confirmation of compliance. To
370 | SEND DONATIONS or determine the status of compliance for any
371 | particular state visit http://pglaf.org
372 |
373 | While we cannot and do not solicit contributions from states where we
374 | have not met the solicitation requirements, we know of no prohibition
375 | against accepting unsolicited donations from donors in such states who
376 | approach us with offers to donate.
377 |
378 | International donations are gratefully accepted, but we cannot make
379 | any statements concerning tax treatment of donations received from
380 | outside the United States. U.S. laws alone swamp our small staff.
381 |
382 | Please check the Project Gutenberg Web pages for current donation
383 | methods and addresses. Donations are accepted in a number of other
384 | ways including checks, online payments and credit card donations.
385 | To donate, please visit: http://pglaf.org/donate
386 |
387 |
388 | Section 5. General Information About Project Gutenberg-tm electronic
389 | works.
390 |
391 | Professor Michael S. Hart is the originator of the Project Gutenberg-tm
392 | concept of a library of electronic works that could be freely shared
393 | with anyone. For thirty years, he produced and distributed Project
394 | Gutenberg-tm eBooks with only a loose network of volunteer support.
395 |
396 |
397 | Project Gutenberg-tm eBooks are often created from several printed
398 | editions, all of which are confirmed as Public Domain in the U.S.
399 | unless a copyright notice is included. Thus, we do not necessarily
400 | keep eBooks in compliance with any particular paper edition.
401 |
402 |
403 | Most people start at our Web site which has the main PG search facility:
404 |
405 | http://www.gutenberg.org
406 |
407 | This Web site includes information about Project Gutenberg-tm,
408 | including how to make donations to the Project Gutenberg Literary
409 | Archive Foundation, how to help produce our new eBooks, and how to
410 | subscribe to our email newsletter to hear about new eBooks.
411 |
--------------------------------------------------------------------------------