├── requirements.txt
├── .github
    ├── FUNDING.yml
    └── images
    │   └── test_kafka.png
├── RRD.png
├── renovate.json
├── bin
    ├── 0-download-kafka.sh
    ├── env.sh
    ├── 1-start-kafka.sh
    └── start-spark.sh
├── package.json
├── nodejs-kafka-proxy
    └── server.js
├── LICENSE
├── .gitignore
├── spark
    └── spark_server.py
└── README.md


/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka
2 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: duyet
2 | ko_fi: duyet
3 | 


--------------------------------------------------------------------------------
/RRD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duyet/realtime-dashboard/HEAD/RRD.png


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": [
3 |     "config:base"
4 |   ]
5 | }
6 | 


--------------------------------------------------------------------------------
/.github/images/test_kafka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duyet/realtime-dashboard/HEAD/.github/images/test_kafka.png


--------------------------------------------------------------------------------
/bin/0-download-kafka.sh:
--------------------------------------------------------------------------------
1 | wget http://mirrors.viethosting.com/apache/kafka/1.0.0/kafka_2.11-1.0.0.tgz
2 | tar -xzf kafka_2.11-1.0.0.tgz
3 | cd kafka_2.11-1.0.0
4 | 


--------------------------------------------------------------------------------
/bin/env.sh:
--------------------------------------------------------------------------------
1 | echo "Usage: source ./bin/env.sh"
2 | echo ""
3 | 
4 | export RRD_HOME="$(cd "$(dirname "$0")"/..; pwd)"
5 | echo "RRD_HOME = $RRD_HOME"
6 | 
7 | echo "Done!"
8 | 


--------------------------------------------------------------------------------
/bin/1-start-kafka.sh:
--------------------------------------------------------------------------------
1 | # Start zookeeper
2 | bin/zookeeper-server-start.sh config/zookeeper.properties
3 | 
4 | # Start Kafka
5 | bin/kafka-server-start.sh config/server.properties
6 | 


--------------------------------------------------------------------------------
/bin/start-spark.sh:
--------------------------------------------------------------------------------
1 | 
2 | source "$(dirname "$0")"/env.sh
3 | 
4 | $SPARK_HOME/bin/spark-submit \
5 | 	--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \
6 | 	$RRD_HOME/spark/spark_server.py \
7 | 	localhost:2181 website-collect website-report
8 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "realtime-dashboard",
 3 |   "version": "1.0.0",
 4 |   "description": "<style>.markdown-preview {background: #FFF !important; color: #1f1f1f !important} .markdown-preview h1, .markdown-preview h2, .markdown-preview h3, .markdown-preview h4, .markdown-preview h5, .markdown-preview p {background: #FFF !important; color: #1f1f1f !important}</style>",
 5 |   "main": "tracking.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/duyetdev/realtime-dashboard.git"
12 |   },
13 |   "author": "",
14 |   "license": "MIT",
15 |   "bugs": {
16 |     "url": "https://github.com/duyetdev/realtime-dashboard/issues"
17 |   },
18 |   "homepage": "https://github.com/duyetdev/realtime-dashboard#readme",
19 |   "dependencies": {
20 |     "express": "^4.16.2",
21 |     "kafka-node": "^4.0.0"
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/nodejs-kafka-proxy/server.js:
--------------------------------------------------------------------------------
 1 | const kafka = require('kafka-node')
 2 | const express = require('express')
 3 | const app = express()
 4 | 
 5 | const Producer = kafka.Producer
 6 | const client = new kafka.Client()
 7 | const producer = new Producer(client)
 8 | 
 9 | app.get('/', (req, res) => res.send('Kafka Proxy server!'))
10 | 
11 | app.all('/proxy', (req, res) => {
12 |   res.send("Send message by POST /proxy/:topic")
13 | })
14 | 
15 | app.all('/proxy/:topic', (req, res) => {
16 |   const topic = req.params['topic'];
17 |   const messages = JSON.stringify(req.method == 'POST' ? req.body : req.query);
18 |   const payloads = [{ topic, messages }];
19 |   console.log("===> Payload data: ", payloads);
20 | 
21 |   producer.send(payloads, function (err, data) {
22 |       if (err) {
23 |         return res.status(999).json({error: 1, message: err, topic, data})
24 |       } else {
25 |         console.log(data);
26 |         res.json({error:0, message: 'success', topic, data})
27 |       }
28 |   });
29 | })
30 | 
31 | producer.on('ready', function () {
32 |   app.listen(3000, () => console.log('Example app listening on port 3000!'))
33 | })
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Van-Duyet Le
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | 
 8 | # Runtime data
 9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 | 
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 | 
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 | 
20 | # nyc test coverage
21 | .nyc_output
22 | 
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 | 
26 | # Bower dependency directory (https://bower.io/)
27 | bower_components
28 | 
29 | # node-waf configuration
30 | .lock-wscript
31 | 
32 | # Compiled binary addons (http://nodejs.org/api/addons.html)
33 | build/Release
34 | 
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 | 
39 | # Typescript v1 declaration files
40 | typings/
41 | 
42 | # Optional npm cache directory
43 | .npm
44 | 
45 | # Optional eslint cache
46 | .eslintcache
47 | 
48 | # Optional REPL history
49 | .node_repl_history
50 | 
51 | # Output of 'npm pack'
52 | *.tgz
53 | 
54 | # Yarn Integrity file
55 | .yarn-integrity
56 | 
57 | # dotenv environment variables file
58 | .env
59 | 
60 | 


--------------------------------------------------------------------------------
/spark/spark_server.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Counts words in UTF8 encoded, '\n' delimited text received from the network
 3 |  every second.
 4 |  Usage: spark_server.py <zk> <topic>
 5 |  To run this on your local machine, you need to setup Kafka and create
 6 |  a producer first, see http://kafka.apache.org/documentation.html#quickstart
 7 |  and then run the example
 8 |     `$ bin/spark-submit \
 9 |       --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \
10 |       spark/spark_server.py \
11 |       localhost:2181 website-tracking website-report`
12 | """
13 | from __future__ import print_function
14 | 
15 | import sys
16 | import json
17 | 
18 | from pyspark import SparkContext
19 | from pyspark.streaming import StreamingContext
20 | from pyspark.streaming.kafka import KafkaUtils
21 | from kafka import KafkaProducer
22 | 
23 | producer = KafkaProducer(bootstrap_servers='localhost:9092',
24 |                          value_serializer=lambda v: json.dumps(v)
25 |                                                         .encode('utf-8'))
26 | 
27 | def get_json(s):
28 |     """
29 |     Parse JSON from string.
30 |     """
31 |     try:
32 |         return json.loads(s)
33 |     except ValueError:
34 |         return None
35 | 
36 | 
37 | def handler(message, output_topic="website-report"):
38 |     records = message.collect()
39 |     print (records, "===============")
40 |     for record in records:
41 |         # TODO: Send output throught Kafka back to Node.js dashboard
42 |         producer.send(output_topic, record[1])
43 |         producer.flush()
44 | 
45 | 
46 | def kafka_send(topic, message):
47 |     print ("11111111111111111111111111111111111111111111")
48 |     producer.send(topic, message)
49 |     producer.flush()
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     if len(sys.argv) != 4:
54 |         print("Usage: spark_server.py <zk> <input_topic> <output_topic>",
55 |               file=sys.stderr)
56 |         exit(-1)
57 | 
58 |     sc = SparkContext(appName="RealtimeDashboardTracking")
59 |     sc.setLogLevel("WARN")
60 | 
61 |     ssc = StreamingContext(sc, 3)
62 | 
63 |     zkQuorum, topic, output_topic = sys.argv[1:]
64 |     print ("zkQuorum", zkQuorum)
65 |     print ("topic", topic)
66 |     print ("output_topic", output_topic)
67 | 
68 |     kvs = KafkaUtils.createStream(ssc, zkQuorum, "spark-streaming-consumer",
69 |                                   {topic: 1})
70 | 
71 |     # Format and filter DStream
72 |     lines = kvs.map(lambda x: get_json(x[1]))
73 |     lines.transform(lambda x: kafka_send(output_topic, {"hits": x}))
74 | 
75 |     # Send number of hits to output_topic
76 |     lines.map(lambda rdd: kafka_send(output_topic, rdd))
77 |     lines.pprint()
78 | 
79 |     ssc.start()
80 |     ssc.awaitTermination()
81 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Realtime Dashboard
  2 | Real-time report dashboard with Apache Kafka, Apache Spark Streaming and Node.js
  3 | 
  4 | **Support**
  5 | 
  6 | <a href="https://s.duyet.net/r/patreon"><img src="https://c5.patreon.com/external/logo/become_a_patron_button@2x.png" width="160"></a>
  7 | 
  8 | 
  9 | ![](RRD.png)
 10 | 
 11 | # Getting started
 12 | 
 13 | ## 1. Setup environment
 14 | 
 15 | Clone this project
 16 | 
 17 | ```sh
 18 | git clone https://github.com/duyetdev/realtime-dashboard.git
 19 | cd realtime-dashboard/
 20 | 
 21 | # Setup env
 22 | ./bin/env.sh
 23 | ```
 24 | 
 25 | Download [Apache Spark 2.2.0](http://spark.apache.org/downloads.html)
 26 | 
 27 | ```sh
 28 | cd $RRD_HOME
 29 | wget http://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
 30 | tar -xzf spark-2.2.0-bin-hadoop2.7.tgz
 31 | export SPARK_HOME=$RRD_HOME/spark-2.2.0-bin-hadoop2.7
 32 | ```
 33 | 
 34 | Download Kafka
 35 | ```sh
 36 | cd $RRD_HOME
 37 | wget http://mirrors.viethosting.com/apache/kafka/1.0.0/kafka_2.11-1.0.0.tgz
 38 | tar -xzf kafka_2.11-1.0.0.tgz
 39 | export KAFKA_HOME=$RRD_HOME/kafka_2.11-1.0.0
 40 | ```
 41 | 
 42 | Install Node.js packages
 43 | ```sh
 44 | npm install
 45 | ```
 46 | 
 47 | ## 2. Start Kafka Server
 48 | 
 49 | Start Zookeeper and Kafka
 50 | ```sh
 51 | cd $RRD_HOME/kafka_2.11-1.0.0
 52 | 
 53 | # Start zookeeper
 54 | bin/zookeeper-server-start.sh config/zookeeper.properties &
 55 | 
 56 | # Start Kafka
 57 | bin/kafka-server-start.sh config/server.properties &
 58 | ```
 59 | 
 60 | Create Topics
 61 | ```sh
 62 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic website-collect
 63 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic website-report
 64 | ```
 65 | 
 66 | We can't access Kafka directly via HTTP, so we start **Kafka Proxy** :
 67 | ```sh
 68 | node nodejs-kafka-proxy/server.js
 69 | 
 70 | # [2017-11-16 14:24:03,008] INFO Accepted socket connection from /127.0.0.1:42984 (org.apache.zookeeper.server.NIOServerCnxnFactory)
 71 | # [2017-11-16 14:24:03,010] WARN Connection request from old client /127.0.0.1:42984; will be dropped if server is in r-o mode (org.apache.zookeeper.server.ZooKeeperServer)
 72 | # [2017-11-16 14:24:03,010] INFO Client attempting to establish new session at /127.0.0.1:42984 (org.apache.zookeeper.server.ZooKeeperServer)
 73 | # [2017-11-16 14:24:03,025] INFO Established session 0x15fc38ffab40011 with negotiated timeout 30000 for client /127.0.0.1:42984 (org.apache.zookeeper.server.ZooKeeperServer)
 74 | # Example app listening on port 3000!
 75 | 
 76 | ```
 77 | 
 78 | Test (Optional) Kafka Produder and Consumer
 79 | 
 80 | Open two terminals:
 81 | ```sh
 82 | # Terminal 1
 83 | $ bin/kafka-console-producer.sh --broker-list localhost:9092 --topic website-collect
 84 | This is a message
 85 | This is another message
 86 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "view", "ip":"1.2.3.4", "UA": "Chrome"}
 87 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "click", "ip":"1.2.3.5", "UA": "Firefox"}
 88 | ```
 89 | 
 90 | ```sh
 91 | # Terminal 2
 92 | $ bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic website-collect --from-beginning
 93 | This is a message
 94 | This is another message
 95 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "view", "ip":"1.2.3.4", "UA": "Chrome"}
 96 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "click", "ip":"1.2.3.5", "UA": "Firefox"}
 97 | ```
 98 | 
 99 | Test proxy server:
100 | ```
101 | http://localhost:3000/proxy/website-collect?message=hello
102 | ```
103 | 
104 | You will see in Consumer Kafka:
105 | 
106 | ![](.github/images/test_kafka.png)
107 | 
108 | 
109 | ## 3. Apache Spark Streaming
110 | 
111 | ![](https://spark.apache.org/docs/latest/img/streaming-arch.png)
112 | 
113 | Submit Spark Streaming script
114 | 
115 | ```sh
116 | # Usage: spark_server.py <zk> <input_topic> <output_topic>
117 | 
118 | $SPARK_HOME/bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \
119 |     $RRD_HOME/spark/spark_server.py \
120 |     localhost:2181 website-collect website-report
121 | ```
122 | 


--------------------------------------------------------------------------------