├── requirements.txt ├── .github ├── FUNDING.yml └── images │ └── test_kafka.png ├── RRD.png ├── renovate.json ├── bin ├── 0-download-kafka.sh ├── env.sh ├── 1-start-kafka.sh └── start-spark.sh ├── package.json ├── nodejs-kafka-proxy └── server.js ├── LICENSE ├── .gitignore ├── spark └── spark_server.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | kafka 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: duyet 2 | ko_fi: duyet 3 | -------------------------------------------------------------------------------- /RRD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duyet/realtime-dashboard/HEAD/RRD.png -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /.github/images/test_kafka.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duyet/realtime-dashboard/HEAD/.github/images/test_kafka.png -------------------------------------------------------------------------------- /bin/0-download-kafka.sh: -------------------------------------------------------------------------------- 1 | wget http://mirrors.viethosting.com/apache/kafka/1.0.0/kafka_2.11-1.0.0.tgz 2 | tar -xzf kafka_2.11-1.0.0.tgz 3 | cd kafka_2.11-1.0.0 4 | -------------------------------------------------------------------------------- /bin/env.sh: -------------------------------------------------------------------------------- 1 | echo "Usage: source ./bin/env.sh" 2 | echo "" 3 | 4 | export RRD_HOME="$(cd "$(dirname "$0")"/..; pwd)" 5 | echo "RRD_HOME = $RRD_HOME" 6 | 7 | echo "Done!" 8 | -------------------------------------------------------------------------------- /bin/1-start-kafka.sh: -------------------------------------------------------------------------------- 1 | # Start zookeeper 2 | bin/zookeeper-server-start.sh config/zookeeper.properties 3 | 4 | # Start Kafka 5 | bin/kafka-server-start.sh config/server.properties 6 | -------------------------------------------------------------------------------- /bin/start-spark.sh: -------------------------------------------------------------------------------- 1 | 2 | source "$(dirname "$0")"/env.sh 3 | 4 | $SPARK_HOME/bin/spark-submit \ 5 | --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \ 6 | $RRD_HOME/spark/spark_server.py \ 7 | localhost:2181 website-collect website-report 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "realtime-dashboard", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "tracking.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/duyetdev/realtime-dashboard.git" 12 | }, 13 | "author": "", 14 | "license": "MIT", 15 | "bugs": { 16 | "url": "https://github.com/duyetdev/realtime-dashboard/issues" 17 | }, 18 | "homepage": "https://github.com/duyetdev/realtime-dashboard#readme", 19 | "dependencies": { 20 | "express": "^4.16.2", 21 | "kafka-node": "^4.0.0" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /nodejs-kafka-proxy/server.js: -------------------------------------------------------------------------------- 1 | const kafka = require('kafka-node') 2 | const express = require('express') 3 | const app = express() 4 | 5 | const Producer = kafka.Producer 6 | const client = new kafka.Client() 7 | const producer = new Producer(client) 8 | 9 | app.get('/', (req, res) => res.send('Kafka Proxy server!')) 10 | 11 | app.all('/proxy', (req, res) => { 12 | res.send("Send message by POST /proxy/:topic") 13 | }) 14 | 15 | app.all('/proxy/:topic', (req, res) => { 16 | const topic = req.params['topic']; 17 | const messages = JSON.stringify(req.method == 'POST' ? req.body : req.query); 18 | const payloads = [{ topic, messages }]; 19 | console.log("===> Payload data: ", payloads); 20 | 21 | producer.send(payloads, function (err, data) { 22 | if (err) { 23 | return res.status(999).json({error: 1, message: err, topic, data}) 24 | } else { 25 | console.log(data); 26 | res.json({error:0, message: 'success', topic, data}) 27 | } 28 | }); 29 | }) 30 | 31 | producer.on('ready', function () { 32 | app.listen(3000, () => console.log('Example app listening on port 3000!')) 33 | }) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Van-Duyet Le 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (http://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # Typescript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | -------------------------------------------------------------------------------- /spark/spark_server.py: -------------------------------------------------------------------------------- 1 | """ 2 | Counts words in UTF8 encoded, '\n' delimited text received from the network 3 | every second. 4 | Usage: spark_server.py 5 | To run this on your local machine, you need to setup Kafka and create 6 | a producer first, see http://kafka.apache.org/documentation.html#quickstart 7 | and then run the example 8 | `$ bin/spark-submit \ 9 | --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \ 10 | spark/spark_server.py \ 11 | localhost:2181 website-tracking website-report` 12 | """ 13 | from __future__ import print_function 14 | 15 | import sys 16 | import json 17 | 18 | from pyspark import SparkContext 19 | from pyspark.streaming import StreamingContext 20 | from pyspark.streaming.kafka import KafkaUtils 21 | from kafka import KafkaProducer 22 | 23 | producer = KafkaProducer(bootstrap_servers='localhost:9092', 24 | value_serializer=lambda v: json.dumps(v) 25 | .encode('utf-8')) 26 | 27 | def get_json(s): 28 | """ 29 | Parse JSON from string. 30 | """ 31 | try: 32 | return json.loads(s) 33 | except ValueError: 34 | return None 35 | 36 | 37 | def handler(message, output_topic="website-report"): 38 | records = message.collect() 39 | print (records, "===============") 40 | for record in records: 41 | # TODO: Send output throught Kafka back to Node.js dashboard 42 | producer.send(output_topic, record[1]) 43 | producer.flush() 44 | 45 | 46 | def kafka_send(topic, message): 47 | print ("11111111111111111111111111111111111111111111") 48 | producer.send(topic, message) 49 | producer.flush() 50 | 51 | 52 | if __name__ == "__main__": 53 | if len(sys.argv) != 4: 54 | print("Usage: spark_server.py ", 55 | file=sys.stderr) 56 | exit(-1) 57 | 58 | sc = SparkContext(appName="RealtimeDashboardTracking") 59 | sc.setLogLevel("WARN") 60 | 61 | ssc = StreamingContext(sc, 3) 62 | 63 | zkQuorum, topic, output_topic = sys.argv[1:] 64 | print ("zkQuorum", zkQuorum) 65 | print ("topic", topic) 66 | print ("output_topic", output_topic) 67 | 68 | kvs = KafkaUtils.createStream(ssc, zkQuorum, "spark-streaming-consumer", 69 | {topic: 1}) 70 | 71 | # Format and filter DStream 72 | lines = kvs.map(lambda x: get_json(x[1])) 73 | lines.transform(lambda x: kafka_send(output_topic, {"hits": x})) 74 | 75 | # Send number of hits to output_topic 76 | lines.map(lambda rdd: kafka_send(output_topic, rdd)) 77 | lines.pprint() 78 | 79 | ssc.start() 80 | ssc.awaitTermination() 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Realtime Dashboard 2 | Real-time report dashboard with Apache Kafka, Apache Spark Streaming and Node.js 3 | 4 | **Support** 5 | 6 | 7 | 8 | 9 | ![](RRD.png) 10 | 11 | # Getting started 12 | 13 | ## 1. Setup environment 14 | 15 | Clone this project 16 | 17 | ```sh 18 | git clone https://github.com/duyetdev/realtime-dashboard.git 19 | cd realtime-dashboard/ 20 | 21 | # Setup env 22 | ./bin/env.sh 23 | ``` 24 | 25 | Download [Apache Spark 2.2.0](http://spark.apache.org/downloads.html) 26 | 27 | ```sh 28 | cd $RRD_HOME 29 | wget http://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz 30 | tar -xzf spark-2.2.0-bin-hadoop2.7.tgz 31 | export SPARK_HOME=$RRD_HOME/spark-2.2.0-bin-hadoop2.7 32 | ``` 33 | 34 | Download Kafka 35 | ```sh 36 | cd $RRD_HOME 37 | wget http://mirrors.viethosting.com/apache/kafka/1.0.0/kafka_2.11-1.0.0.tgz 38 | tar -xzf kafka_2.11-1.0.0.tgz 39 | export KAFKA_HOME=$RRD_HOME/kafka_2.11-1.0.0 40 | ``` 41 | 42 | Install Node.js packages 43 | ```sh 44 | npm install 45 | ``` 46 | 47 | ## 2. Start Kafka Server 48 | 49 | Start Zookeeper and Kafka 50 | ```sh 51 | cd $RRD_HOME/kafka_2.11-1.0.0 52 | 53 | # Start zookeeper 54 | bin/zookeeper-server-start.sh config/zookeeper.properties & 55 | 56 | # Start Kafka 57 | bin/kafka-server-start.sh config/server.properties & 58 | ``` 59 | 60 | Create Topics 61 | ```sh 62 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic website-collect 63 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic website-report 64 | ``` 65 | 66 | We can't access Kafka directly via HTTP, so we start **Kafka Proxy** : 67 | ```sh 68 | node nodejs-kafka-proxy/server.js 69 | 70 | # [2017-11-16 14:24:03,008] INFO Accepted socket connection from /127.0.0.1:42984 (org.apache.zookeeper.server.NIOServerCnxnFactory) 71 | # [2017-11-16 14:24:03,010] WARN Connection request from old client /127.0.0.1:42984; will be dropped if server is in r-o mode (org.apache.zookeeper.server.ZooKeeperServer) 72 | # [2017-11-16 14:24:03,010] INFO Client attempting to establish new session at /127.0.0.1:42984 (org.apache.zookeeper.server.ZooKeeperServer) 73 | # [2017-11-16 14:24:03,025] INFO Established session 0x15fc38ffab40011 with negotiated timeout 30000 for client /127.0.0.1:42984 (org.apache.zookeeper.server.ZooKeeperServer) 74 | # Example app listening on port 3000! 75 | 76 | ``` 77 | 78 | Test (Optional) Kafka Produder and Consumer 79 | 80 | Open two terminals: 81 | ```sh 82 | # Terminal 1 83 | $ bin/kafka-console-producer.sh --broker-list localhost:9092 --topic website-collect 84 | This is a message 85 | This is another message 86 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "view", "ip":"1.2.3.4", "UA": "Chrome"} 87 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "click", "ip":"1.2.3.5", "UA": "Firefox"} 88 | ``` 89 | 90 | ```sh 91 | # Terminal 2 92 | $ bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic website-collect --from-beginning 93 | This is a message 94 | This is another message 95 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "view", "ip":"1.2.3.4", "UA": "Chrome"} 96 | {"client_id": "blog.duyet.net", "time": "1510736940", "event": "click", "ip":"1.2.3.5", "UA": "Firefox"} 97 | ``` 98 | 99 | Test proxy server: 100 | ``` 101 | http://localhost:3000/proxy/website-collect?message=hello 102 | ``` 103 | 104 | You will see in Consumer Kafka: 105 | 106 | ![](.github/images/test_kafka.png) 107 | 108 | 109 | ## 3. Apache Spark Streaming 110 | 111 | ![](https://spark.apache.org/docs/latest/img/streaming-arch.png) 112 | 113 | Submit Spark Streaming script 114 | 115 | ```sh 116 | # Usage: spark_server.py 117 | 118 | $SPARK_HOME/bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2 \ 119 | $RRD_HOME/spark/spark_server.py \ 120 | localhost:2181 website-collect website-report 121 | ``` 122 | --------------------------------------------------------------------------------