├── requirements.txt
├── .github
├── FUNDING.yml
└── images
│ └── test_kafka.png
├── RRD.png
├── renovate.json
├── bin
├── 0-download-kafka.sh
├── env.sh
├── 1-start-kafka.sh
└── start-spark.sh
├── package.json
├── nodejs-kafka-proxy
└── server.js
├── LICENSE
├── .gitignore
├── spark
└── spark_server.py
└── README.md
/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka
2 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: duyet
2 | ko_fi: duyet
3 |
--------------------------------------------------------------------------------
/RRD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duyet/realtime-dashboard/HEAD/RRD.png
--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "config:base"
4 | ]
5 | }
6 |
--------------------------------------------------------------------------------
/.github/images/test_kafka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duyet/realtime-dashboard/HEAD/.github/images/test_kafka.png
--------------------------------------------------------------------------------
/bin/0-download-kafka.sh:
--------------------------------------------------------------------------------
1 | wget http://mirrors.viethosting.com/apache/kafka/1.0.0/kafka_2.11-1.0.0.tgz
2 | tar -xzf kafka_2.11-1.0.0.tgz
3 | cd kafka_2.11-1.0.0
4 |
--------------------------------------------------------------------------------
/bin/env.sh:
--------------------------------------------------------------------------------
1 | echo "Usage: source ./bin/env.sh"
2 | echo ""
3 |
4 | export RRD_HOME="$(cd "$(dirname "$0")"/..; pwd)"
5 | echo "RRD_HOME = $RRD_HOME"
6 |
7 | echo "Done!"
8 |
--------------------------------------------------------------------------------
/bin/1-start-kafka.sh:
--------------------------------------------------------------------------------
1 | # Start zookeeper
2 | bin/zookeeper-server-start.sh config/zookeeper.properties
3 |
4 | # Start Kafka
5 | bin/kafka-server-start.sh config/server.properties
6 |
--------------------------------------------------------------------------------
/bin/start-spark.sh:
--------------------------------------------------------------------------------
1 |
2 | source "$(dirname "$0")"/env.sh
3 |
4 | $SPARK_HOME/bin/spark-submit \
5 | --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \
6 | $RRD_HOME/spark/spark_server.py \
7 | localhost:2181 website-collect website-report
8 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "realtime-dashboard",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "tracking.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "repository": {
10 | "type": "git",
11 | "url": "git+https://github.com/duyetdev/realtime-dashboard.git"
12 | },
13 | "author": "",
14 | "license": "MIT",
15 | "bugs": {
16 | "url": "https://github.com/duyetdev/realtime-dashboard/issues"
17 | },
18 | "homepage": "https://github.com/duyetdev/realtime-dashboard#readme",
19 | "dependencies": {
20 | "express": "^4.16.2",
21 | "kafka-node": "^4.0.0"
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/nodejs-kafka-proxy/server.js:
--------------------------------------------------------------------------------
1 | const kafka = require('kafka-node')
2 | const express = require('express')
3 | const app = express()
4 |
5 | const Producer = kafka.Producer
6 | const client = new kafka.Client()
7 | const producer = new Producer(client)
8 |
9 | app.get('/', (req, res) => res.send('Kafka Proxy server!'))
10 |
11 | app.all('/proxy', (req, res) => {
12 | res.send("Send message by POST /proxy/:topic")
13 | })
14 |
15 | app.all('/proxy/:topic', (req, res) => {
16 | const topic = req.params['topic'];
17 | const messages = JSON.stringify(req.method == 'POST' ? req.body : req.query);
18 | const payloads = [{ topic, messages }];
19 | console.log("===> Payload data: ", payloads);
20 |
21 | producer.send(payloads, function (err, data) {
22 | if (err) {
23 | return res.status(999).json({error: 1, message: err, topic, data})
24 | } else {
25 | console.log(data);
26 | res.json({error:0, message: 'success', topic, data})
27 | }
28 | });
29 | })
30 |
31 | producer.on('ready', function () {
32 | app.listen(3000, () => console.log('Example app listening on port 3000!'))
33 | })
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Van-Duyet Le
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 |
8 | # Runtime data
9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 |
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 |
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 |
20 | # nyc test coverage
21 | .nyc_output
22 |
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 |
26 | # Bower dependency directory (https://bower.io/)
27 | bower_components
28 |
29 | # node-waf configuration
30 | .lock-wscript
31 |
32 | # Compiled binary addons (http://nodejs.org/api/addons.html)
33 | build/Release
34 |
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 |
39 | # Typescript v1 declaration files
40 | typings/
41 |
42 | # Optional npm cache directory
43 | .npm
44 |
45 | # Optional eslint cache
46 | .eslintcache
47 |
48 | # Optional REPL history
49 | .node_repl_history
50 |
51 | # Output of 'npm pack'
52 | *.tgz
53 |
54 | # Yarn Integrity file
55 | .yarn-integrity
56 |
57 | # dotenv environment variables file
58 | .env
59 |
60 |
--------------------------------------------------------------------------------
/spark/spark_server.py:
--------------------------------------------------------------------------------
1 | """
2 | Counts words in UTF8 encoded, '\n' delimited text received from the network
3 | every second.
4 | Usage: spark_server.py
5 | To run this on your local machine, you need to setup Kafka and create
6 | a producer first, see http://kafka.apache.org/documentation.html#quickstart
7 | and then run the example
8 | `$ bin/spark-submit \
9 | --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \
10 | spark/spark_server.py \
11 | localhost:2181 website-tracking website-report`
12 | """
13 | from __future__ import print_function
14 |
15 | import sys
16 | import json
17 |
18 | from pyspark import SparkContext
19 | from pyspark.streaming import StreamingContext
20 | from pyspark.streaming.kafka import KafkaUtils
21 | from kafka import KafkaProducer
22 |
23 | producer = KafkaProducer(bootstrap_servers='localhost:9092',
24 | value_serializer=lambda v: json.dumps(v)
25 | .encode('utf-8'))
26 |
27 | def get_json(s):
28 | """
29 | Parse JSON from string.
30 | """
31 | try:
32 | return json.loads(s)
33 | except ValueError:
34 | return None
35 |
36 |
37 | def handler(message, output_topic="website-report"):
38 | records = message.collect()
39 | print (records, "===============")
40 | for record in records:
41 | # TODO: Send output throught Kafka back to Node.js dashboard
42 | producer.send(output_topic, record[1])
43 | producer.flush()
44 |
45 |
46 | def kafka_send(topic, message):
47 | print ("11111111111111111111111111111111111111111111")
48 | producer.send(topic, message)
49 | producer.flush()
50 |
51 |
52 | if __name__ == "__main__":
53 | if len(sys.argv) != 4:
54 | print("Usage: spark_server.py ",
55 | file=sys.stderr)
56 | exit(-1)
57 |
58 | sc = SparkContext(appName="RealtimeDashboardTracking")
59 | sc.setLogLevel("WARN")
60 |
61 | ssc = StreamingContext(sc, 3)
62 |
63 | zkQuorum, topic, output_topic = sys.argv[1:]
64 | print ("zkQuorum", zkQuorum)
65 | print ("topic", topic)
66 | print ("output_topic", output_topic)
67 |
68 | kvs = KafkaUtils.createStream(ssc, zkQuorum, "spark-streaming-consumer",
69 | {topic: 1})
70 |
71 | # Format and filter DStream
72 | lines = kvs.map(lambda x: get_json(x[1]))
73 | lines.transform(lambda x: kafka_send(output_topic, {"hits": x}))
74 |
75 | # Send number of hits to output_topic
76 | lines.map(lambda rdd: kafka_send(output_topic, rdd))
77 | lines.pprint()
78 |
79 | ssc.start()
80 | ssc.awaitTermination()
81 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Realtime Dashboard
2 | Real-time report dashboard with Apache Kafka, Apache Spark Streaming and Node.js
3 |
4 | **Support**
5 |
6 |
7 |
8 |
9 | 
10 |
11 | # Getting started
12 |
13 | ## 1. Setup environment
14 |
15 | Clone this project
16 |
17 | ```sh
18 | git clone https://github.com/duyetdev/realtime-dashboard.git
19 | cd realtime-dashboard/
20 |
21 | # Setup env
22 | ./bin/env.sh
23 | ```
24 |
25 | Download [Apache Spark 2.2.0](http://spark.apache.org/downloads.html)
26 |
27 | ```sh
28 | cd $RRD_HOME
29 | wget http://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
30 | tar -xzf spark-2.2.0-bin-hadoop2.7.tgz
31 | export SPARK_HOME=$RRD_HOME/spark-2.2.0-bin-hadoop2.7
32 | ```
33 |
34 | Download Kafka
35 | ```sh
36 | cd $RRD_HOME
37 | wget http://mirrors.viethosting.com/apache/kafka/1.0.0/kafka_2.11-1.0.0.tgz
38 | tar -xzf kafka_2.11-1.0.0.tgz
39 | export KAFKA_HOME=$RRD_HOME/kafka_2.11-1.0.0
40 | ```
41 |
42 | Install Node.js packages
43 | ```sh
44 | npm install
45 | ```
46 |
47 | ## 2. Start Kafka Server
48 |
49 | Start Zookeeper and Kafka
50 | ```sh
51 | cd $RRD_HOME/kafka_2.11-1.0.0
52 |
53 | # Start zookeeper
54 | bin/zookeeper-server-start.sh config/zookeeper.properties &
55 |
56 | # Start Kafka
57 | bin/kafka-server-start.sh config/server.properties &
58 | ```
59 |
60 | Create Topics
61 | ```sh
62 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic website-collect
63 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic website-report
64 | ```
65 |
66 | We can't access Kafka directly via HTTP, so we start **Kafka Proxy** :
67 | ```sh
68 | node nodejs-kafka-proxy/server.js
69 |
70 | # [2017-11-16 14:24:03,008] INFO Accepted socket connection from /127.0.0.1:42984 (org.apache.zookeeper.server.NIOServerCnxnFactory)
71 | # [2017-11-16 14:24:03,010] WARN Connection request from old client /127.0.0.1:42984; will be dropped if server is in r-o mode (org.apache.zookeeper.server.ZooKeeperServer)
72 | # [2017-11-16 14:24:03,010] INFO Client attempting to establish new session at /127.0.0.1:42984 (org.apache.zookeeper.server.ZooKeeperServer)
73 | # [2017-11-16 14:24:03,025] INFO Established session 0x15fc38ffab40011 with negotiated timeout 30000 for client /127.0.0.1:42984 (org.apache.zookeeper.server.ZooKeeperServer)
74 | # Example app listening on port 3000!
75 |
76 | ```
77 |
78 | Test (Optional) Kafka Produder and Consumer
79 |
80 | Open two terminals:
81 | ```sh
82 | # Terminal 1
83 | $ bin/kafka-console-producer.sh --broker-list localhost:9092 --topic website-collect
84 | This is a message
85 | This is another message
86 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "view", "ip":"1.2.3.4", "UA": "Chrome"}
87 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "click", "ip":"1.2.3.5", "UA": "Firefox"}
88 | ```
89 |
90 | ```sh
91 | # Terminal 2
92 | $ bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic website-collect --from-beginning
93 | This is a message
94 | This is another message
95 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "view", "ip":"1.2.3.4", "UA": "Chrome"}
96 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "click", "ip":"1.2.3.5", "UA": "Firefox"}
97 | ```
98 |
99 | Test proxy server:
100 | ```
101 | http://localhost:3000/proxy/website-collect?message=hello
102 | ```
103 |
104 | You will see in Consumer Kafka:
105 |
106 | 
107 |
108 |
109 | ## 3. Apache Spark Streaming
110 |
111 | 
112 |
113 | Submit Spark Streaming script
114 |
115 | ```sh
116 | # Usage: spark_server.py
117 |
118 | $SPARK_HOME/bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \
119 | $RRD_HOME/spark/spark_server.py \
120 | localhost:2181 website-collect website-report
121 | ```
122 |
--------------------------------------------------------------------------------