├── .gitignore ├── index.js ├── package.json ├── samples ├── server.js ├── producer.js └── consumer.js ├── test └── index.js ├── README.md └── lib └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | samples/offsets 3 | .vscode -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = require('./lib/index'); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kafka-proxy", 3 | "version": "1.0.0", 4 | "scripts": {}, 5 | "description": "WebSockets based proxy for Kafka", 6 | "author": "lawrips ", 7 | "dependencies": { 8 | "debug": "^2.2.0", 9 | "no-kafka": "^2.5.5", 10 | "path-to-regexp": "^1.2.1", 11 | "ws": "^1.0.1" 12 | }, 13 | "main": "server.js", 14 | "devDependencies": { 15 | "commander": "^2.9.0", 16 | "should": "^9.0.2" 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "https://github.com/Microsoft/kafka-proxy-ws.git" 21 | }, 22 | "keywords": [ 23 | "kafka", 24 | "proxy", 25 | "reverse proxy", 26 | "ws", 27 | "websockets", 28 | "websocket", 29 | "socket", 30 | "sockets" 31 | ], 32 | "license": "MIT" 33 | } 34 | -------------------------------------------------------------------------------- /samples/server.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const KafkaProxy = require('../index'); 4 | 5 | console.log('Starting Web Socket Server'); 6 | 7 | // initiate the proxy 8 | let kafkaProxy = new KafkaProxy({ 9 | wsPort: 9999, 10 | kafka: 'localhost:9092/', 11 | idleTimeout: 100, 12 | maxBytes: 1000000, 13 | partition: 0, 14 | //auth: 'thisisapassword' 15 | }); 16 | 17 | kafkaProxy.listen(); 18 | 19 | console.log('Started Web Socket Server'); 20 | 21 | // Handle uncaught exceptions 22 | 23 | if(typeof v8debug !== 'object') { 24 | console.log('No debugger attached, listening for uncaught exceptions for logging...'); 25 | 26 | process.on('uncaughtException', function(err) { 27 | console.error('UNCAUGHT EXCEPTION!') 28 | console.error((new Date).toUTCString() + ' uncaughtException:', err.message); 29 | console.error(err.stack); 30 | 31 | // Exit the app on the next tick, incase others are listen to uncaughtException 32 | process.nextTick(function() { 33 | process.exit(1); 34 | }); 35 | }); 36 | } 37 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const should = require('should'), 4 | debug = require('debug')('kafka-prox-ws-test'); 5 | 6 | let defaults = { 7 | port : 9999, 8 | kafkaAddress : 'localhost:9092/', 9 | idleTimeout : 1000, 10 | maxBytes : 500000, 11 | recordInterval: 5000, 12 | defaultPartition: 0, 13 | auth: null 14 | } 15 | 16 | describe('kafka proxy unit tests', () => { 17 | it('create constructor with options', (done) => { 18 | var KafkaProxy = require('../index'); 19 | 20 | let kafkaProxy = new KafkaProxy({ 21 | wsPort: defaults.port, 22 | kafka: defaults.kafkaAddress, 23 | idleTimeout: defaults.idleTimeout + 1, 24 | recordInterval: defaults.recordInterval + 1, 25 | maxBytes: defaults.maxBytes + 1, 26 | partition: defaults.defaultPartition + 1, 27 | auth: 'password' 28 | }); 29 | 30 | kafkaProxy.port.should.equal(defaults.port); 31 | kafkaProxy.kafkaAddress.should.equal(defaults.kafkaAddress); 32 | kafkaProxy.idleTimeout.should.equal(defaults.idleTimeout + 1); 33 | kafkaProxy.maxBytes.should.equal(defaults.maxBytes + 1); 34 | kafkaProxy.recordInterval.should.equal(defaults.recordInterval + 1); 35 | kafkaProxy.defaultPartition.should.equal(defaults.defaultPartition + 1); 36 | kafkaProxy.auth.should.equal('password'); 37 | done(); 38 | }); 39 | 40 | it('create constructor without options and check defaults are set', (done) => { 41 | var KafkaProxy = require('../index'); 42 | 43 | let kafkaProxy = new KafkaProxy({ 44 | wsPort: defaults.port, 45 | kafka: defaults.kafkaAddress, 46 | }); 47 | 48 | kafkaProxy.port.should.equal(defaults.port); 49 | kafkaProxy.kafkaAddress.should.equal(defaults.kafkaAddress); 50 | kafkaProxy.idleTimeout.should.equal(defaults.idleTimeout); 51 | kafkaProxy.maxBytes.should.equal(defaults.maxBytes); 52 | kafkaProxy.recordInterval.should.equal(defaults.recordInterval); 53 | kafkaProxy.defaultPartition.should.equal(defaults.defaultPartition); 54 | should.not.exist(kafkaProxy.auth); 55 | done(); 56 | }); 57 | 58 | 59 | it('create constructor without an address should throw an error', (done) => { 60 | var KafkaProxy = require('../index'); 61 | 62 | try { 63 | let kafkaProxy = new KafkaProxy({}); 64 | } catch (ex) { 65 | should.exist(ex); 66 | } 67 | done(); 68 | }); 69 | 70 | }); 71 | -------------------------------------------------------------------------------- /samples/producer.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const nokafka = require('no-kafka'), 4 | program = require('commander'); 5 | 6 | let producer; 7 | 8 | program 9 | .option('-t, --topic ', 'topic (required)') 10 | .option('-n, --num [value]', 'number of messages or batches', 100) 11 | .option('-p, --partition [value]', 'option partition (default is 0)') 12 | .parse(process.argv); 13 | 14 | let topic = program.topic; 15 | let numMessages = Number.parseInt(program.num); 16 | let partition = program.partition; 17 | 18 | if (!topic) { 19 | program.outputHelp(); 20 | process.exit(1); 21 | } 22 | 23 | function connectToKafka() { 24 | // connect to kafka 25 | console.log("CONNECTING TO KAFKA") 26 | 27 | // Create producer 28 | producer = new nokafka.Producer({ 29 | connectionString: 'localhost:9092', 30 | clientId: 'myservice', 31 | codec: nokafka.COMPRESSION_SNAPPY 32 | }); 33 | 34 | // connect to kafka 35 | return producer.init().then(() => { 36 | 37 | // create a batch of messages and send to kafka 38 | let messages = []; 39 | for (var i=0; i { 76 | console.log(`sent ${kafkaMsgs.length} messages to KAFKA TOPIC ${queueName}, partitionId ${partitionId}. Data from topic is ${JSON.stringify(data)}`); 77 | process.exit(1); 78 | }). 79 | catch((err) => { 80 | console.log('err on kafka queue.push: ' + err); 81 | }); 82 | } 83 | 84 | connectToKafka(); 85 | -------------------------------------------------------------------------------- /samples/consumer.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const WebSocket = require('ws'), 4 | fs = require('fs'), 5 | program = require('commander'); 6 | 7 | let server = 'ws://localhost:9999/'; 8 | 9 | let ws = {}; 10 | let offset = 0; 11 | let count = 0; 12 | 13 | let auth = process.env.KAFKA_AUTH; 14 | 15 | program 16 | .option('-t, --topic ', 'topic (required)') 17 | .option('-c, --consumer ', 'consumer group (required)') 18 | .option('-n, --num [value]', 'number of messages or batches', 100) 19 | .option('-o, --offset [value]', 'manually set offset position') 20 | .option('-x, --nooffset', 'rely on server for offset') 21 | .option('-p, --partition [value]', 'option partition (default is 0)') 22 | .parse(process.argv); 23 | 24 | let topic = program.topic; 25 | let consumer = program.consumer; 26 | let numMessages = Number.parseInt(program.num); 27 | let programOffset = program.offset ? Number.parseInt(program.offset) : null; 28 | let noOffset = program.nooffset; 29 | let partition = program.partition; 30 | 31 | if (!topic || !consumer) { 32 | program.outputHelp(); 33 | process.exit(1); 34 | } 35 | 36 | // open or create a file 37 | let filePath = './offsets/' + topic + '_offset.txt'; 38 | 39 | // decide whether to send up partition param 40 | let partitionParam = partition ? '&partition=' + partition : ''; 41 | 42 | // if nooffset = false, rely on locally tracked offset 43 | if (!noOffset) { 44 | let loadedOffset = 0; 45 | try { 46 | loadedOffset = Number.parseInt(fs.readFileSync(filePath, 'utf8')) + 1; 47 | } catch (ex) { 48 | fs.writeFileSync(filePath, '0'); 49 | } 50 | let offset; 51 | if (programOffset != null) { 52 | offset = programOffset; 53 | console.log('using offset from program params: ' + programOffset); 54 | } 55 | else { 56 | offset = loadedOffset; 57 | console.log('loading last known offsets from file: ' + loadedOffset); 58 | } 59 | let options = auth ? {headers: { Authorization: auth}} : null; 60 | ws[topic] = new WebSocket(server + '?topic=' + topic + '&consumerGroup=' + consumer + '&offset=' + offset + partitionParam, options); 61 | } 62 | // if nooffset is supplied, rely on server 63 | else { 64 | console.log('nooffset supplied, relying on server'); 65 | console.log(server + '/?topic=' + topic + '&consumerGroup=' + consumer + partitionParam) 66 | let options = auth ? {headers: { Authorization: auth}} : null; 67 | ws[topic] = new WebSocket(server + '?topic=' + topic + '&consumerGroup=' + consumer + partitionParam, options); 68 | } 69 | 70 | ws[topic].on('open', () => { 71 | console.log('Opened socket to server for topic ' + topic); 72 | }); 73 | 74 | ws[topic].on('error', (error) => { 75 | console.log(error); 76 | }); 77 | 78 | ws[topic].on('message', (data, flags) => { 79 | // flags.binary will be set if a binary data is received. 80 | // flags.masked will be set if the data was masked. 81 | let batch = JSON.parse(data); 82 | offset = batch[batch.length-1].offset; 83 | console.log(`Received a batch of messages from kafka. Size: ${batch.length}, last offset: ${offset}, lastMessage:\n${JSON.stringify(batch[batch.length-1])}`); 84 | }); 85 | 86 | process.on('SIGINT', (something) => { 87 | console.log('Exiting from Ctrl-C... latest offset received from kafka: ' + offset); 88 | fs.writeFileSync(filePath, offset); 89 | process.exit(1); 90 | }); 91 | 92 | process.on('exit', (something) => { 93 | console.log('Exiting from graceful exit... latest offset received from kafka: ' + offset); 94 | fs.writeFileSync(filePath, offset); 95 | process.exit(1); 96 | }); 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #kafka-proxy 2 | 3 | A robust, scalable, high performance WebSockets based proxy for Kafka. 4 | 5 | ``` 6 | 'use strict'; 7 | const KafkaProxy = require('kafka-proxy'); 8 | 9 | let kafkaProxy = new KafkaProxy({ 10 | wsPort: 9999, 11 | kafka: 'localhost:9092/', 12 | }); 13 | 14 | kafkaProxy.listen(); 15 | ``` 16 | 17 | ## Why a proxy for Kafka? 18 | This library adds a few features that Kafka itself doesn’t natively support such as easy connectivity to Kafka over standard web protocols and a central point of management for offsetting, logging, alerting. These capabilities aim to increase agility while in development and can also prove useful in production. 19 | 20 | ##Features 21 | * Enables connectivity to Kafka via WebSockets, leveraging all their benefits such as performance, security, cross-platform, etc. For example, runs over HTTP(S), making it easy to connect to Kafka through a firewall without having to expose the broker address / ports 22 | * Any standard web socket library should work (we test / runn with the excellent [ws](https://www.npmjs.com/package/ws) library in production) 23 | * Multi tenant. A single proxy can handle many incoming clients / web sockets connections. Client uniqueness is maintained via the topic / consumer group / partition combo. 24 | * Auto offset management. Connect to kafka-proxy by either specifying an offset, or optionally letting the proxy manage the offset for you (recommended for development only at this time - more details below). 25 | * Centralized reporting 26 | * Stable. Observed to be running in production for weeks on end without a dropped web socket connection, processing 10M’s of messages 27 | * High performance. Tested locally on a quad core PC at 30k+ messages / second 28 | * Built on top of the excellent [no-kafka](https://www.npmjs.com/package/no-kafka) library (enabling connection directly to Kafka brokers), so kafka-proxy inherits the ability to set throughput rate (e.g. # of bytes per batch of messages, delay between messages). 29 | 30 | ##Usage 31 | ### Server 32 | First, create a server which will connect to your Kafka broker(s) and also listening for any incoming web socket connections: 33 | 34 | ``` 35 | 'use strict'; 36 | const KafkaProxy = require('kafka-proxy'); 37 | 38 | let kafkaProxy = new KafkaProxy({ 39 | wsPort: 9999, 40 | kafka: 'localhost:9092/', 41 | }); 42 | 43 | kafkaProxy.listen(); 44 | ``` 45 | 46 | ### Consuming messages 47 | 48 | Then create a web socket client to listen for messages on the topic. This can be done easily through: 49 | 50 | 1. Your own WebSocket client 51 | 2. Using the included consumer.js file in the ./samples directory 52 | 3. By installing the [wscat](https://www.npmjs.com/package/wscat) client 53 | 54 | This is an example wscat connection string: 55 | 56 | ``` 57 | wscat --connect "ws://127.0.0.1:9999/?topic=test&consumerGroup=group1" 58 | ``` 59 | That's it ! Now whenever messages are sent to your Kafka broker for the topic "test", you'll receive them over this WebSocket. 60 | 61 | An optional, but recommended, parameter that can be sent over the WebSocket URL is 'offset'. Kafka-Proxy will automatically maintain an offset for you, but there are cases where it can skip forwards (e.g. if your process crashes during receiving a batch, the whole batch can be marked as read). If you need accurate offset management, best results will experienced by maintaining your own offset and passing it into the URL each time. For example: 62 | 63 | ``` 64 | wscat --connect "ws://127.0.0.1:9999/?topic=test&consumerGroup=group1&offset=1000" 65 | ``` 66 | 67 | The file ./samples/consumer.js shows an example of managing an offset locally by storing it in a file. Another good option is redis. 68 | 69 | ### Message format 70 | Messages are received in batches (according to the set batch size) over the WebSocket in the follolowing format: 71 | ``` 72 | [ 73 | {"message":"hello one","offset":225107}, 74 | {"message":"hello two","offset":225108}, 75 | {"message":"hello three","offset":225109} 76 | ] 77 | ``` 78 | 79 | ### Startup Options 80 | kafka-proxy can be constructed with the following optional parameters: 81 | ``` 82 | let kafkaProxy = new KafkaProxy({ 83 | wsPort: 9999, // required 84 | kafka: 'localhost:9092/', // required 85 | idleTimeout: 100, // time to wait between batches 86 | maxBytes: 1000000, // the max size of a batch to be downloaded 87 | partition: 0, // the default partition to listen to 88 | auth: 'thisisapassword' // optional authentication 89 | }); 90 | ``` 91 | 92 | ### Authentication 93 | Baic HTTP authentication can be enabled by setting the "auth" parameter in the constructor. After this is set, it can be sent over the WebSocket. E.g. 94 | ``` 95 | wscat --connect "ws://127.0.0.1:9999/?topic=test&consumerGroup=group1" -H 'authorization: basic thisisapassword' 96 | ``` 97 | 98 | 99 | ## Limitations 100 | This is an early project. I started a new, clean repo as the old one had a long and unnecessary commit history. It's a stable code base and we have this running in production for several months. A couple of notes / limitations: 101 | * This proxy is for receiving messages only. No sending capability yet. Our scenarios mostly have required getting messages off of Kafka to dev machines (rather than sending them back in directly). If there’s demand, I’ll add a sending capability too. 102 | * Make sure you set an appropriate throughput rate (using the **maxBytes** and **idleTimeout** variables) to avoid “back pressure”. If set higher than your client’s ability to process messages. kafka-proxy can send too quickly and crash your client by out of memory. 103 | * Auto offsetting. This is a useful feature but there are cases where messages can be skipped (e.g. if your process crashes halfway through receiving a batch). 104 | * If a batch of messages is sent greater in size than maxBytes, messages will not be consumable until maxBytes is set above this value. Needs investigation as to how to solve programmatically. 105 | 106 | ## Future features 107 | Planned future features: 108 | * Full test suite and CLI tools 109 | * Sending capability (current limited to receiving only) 110 | * Improved auto offset management 111 | * More robust handling of maxBytes e.g. via a warning / error message when exceeded 112 | -------------------------------------------------------------------------------- /lib/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const kafka = require('no-kafka'), 4 | debug = require('debug')('kafka-ws-proxy'), 5 | querystring = require('querystring'), 6 | WebSocket = require('ws'); 7 | 8 | let errorCount = 0; 9 | 10 | class KafkaProxy { 11 | 12 | // constructor 13 | constructor(config) { 14 | // web socket stuff 15 | this.WebSocketServer = {}; 16 | this.wss = {}; 17 | this.ws = {}; 18 | if (!config.wsPort) { 19 | throw ("web socket port needed, please set e.g. {wsPort: 9999}"); 20 | } 21 | this.port = config.wsPort; 22 | 23 | if (!config.kafka) { 24 | throw ("kafka server address needed, please set e.g. {kafka: \"localhost:2181/\"}"); 25 | } 26 | 27 | // tracking variables 28 | this.clients = {} 29 | this.offsets = {}; 30 | this.consumers = {}; 31 | 32 | // config and default settings 33 | this.kafkaAddress = config.kafka; 34 | this.idleTimeout = Number.parseInt(config.idleTimeout || 1000); 35 | this.maxBytes = Number.parseInt(config.maxBytes || 500000); 36 | this.recordInterval = Number.parseInt(config.recordInterval || 5000); 37 | this.defaultPartition = config.partition || 0; 38 | this.auth = config.auth; 39 | } 40 | 41 | listen() { 42 | // setup websocket server 43 | this.WebSocketServer = WebSocket.Server, 44 | this.wss = new this.WebSocketServer({ port: this.port, verifyClient: (info, callback) => this._verifyClient(info, callback)}); 45 | debug('listening on web socket'); 46 | 47 | // once we have a web socket client connected 48 | this.wss.on('connection', (ws) => { 49 | this._registerTopic(ws); 50 | }); 51 | 52 | // error handling 53 | this.wss.on('error', (err) => { 54 | debug('something went wrong: ' + err); 55 | }); 56 | 57 | setInterval(() => { 58 | this._recordMetrics(); 59 | }, this.recordInterval); 60 | } 61 | 62 | // Connect to kafka server and topics 63 | _connectToKafka(consumerGroup, topic) { 64 | debug("Connecting to Kafka") 65 | this.consumers[consumerGroup][topic] = new kafka.SimpleConsumer( 66 | { 67 | 'connectionString': this.kafkaAddress, 68 | 'idleTimeout': this.idleTimeout, 69 | 'maxBytes': this.maxBytes, 70 | groupId: consumerGroup 71 | }); 72 | 73 | return this.consumers[consumerGroup][topic].init(); 74 | } 75 | 76 | _registerTopic(ws) { 77 | debug(`got new incoming web socket connection: ${ws.upgradeReq.url}`); 78 | // store this web socket connection 79 | if (ws.upgradeReq.url) { 80 | // querystrings always come in like this: /?param1=value1¶m2=value2 81 | let query = querystring.parse(ws.upgradeReq.url.slice(2)); 82 | let topic = query.topic; 83 | let requestOffset = Number.parseInt(query.offset); 84 | let consumerGroup = query.consumerGroup || 'default'; 85 | let partition = Number.parseInt(query.partition || this.defaultPartition); 86 | 87 | // tracking variables 88 | if (!this.clients[consumerGroup]) this.clients[consumerGroup] = {}; 89 | if (!this.offsets[consumerGroup]) this.offsets[consumerGroup] = {}; 90 | if (!this.consumers[consumerGroup]) this.consumers[consumerGroup] = {}; 91 | 92 | // if a client for this topic is already connected, kill it 93 | if (this.clients[consumerGroup][topic]) { 94 | debug(`client already connected to ${consumerGroup} / ${topic}. disconnecting`); 95 | this._disconnectClient(consumerGroup, topic, partition, () => { 96 | // wait for the kill to complete, then start listening 97 | this._startListening(ws, consumerGroup, topic, partition, requestOffset); 98 | }); 99 | } 100 | else { 101 | // if no existing client, start straight away 102 | this._startListening(ws, consumerGroup, topic, partition, requestOffset); 103 | } 104 | } 105 | } 106 | 107 | _startListening(ws, consumerGroup, topic, partition, requestOffset) { 108 | // Add the client to tracking tables 109 | this.clients[consumerGroup][topic] = this._clientFromWebsocket(ws, consumerGroup, topic); 110 | this.clients[consumerGroup][topic].ws.on('close', () => { 111 | debug(`close socket event raised for ${consumerGroup} / ${topic}`); 112 | this._disconnectClient(consumerGroup, topic, partition, () => { 113 | debug(`socket is closed and cleanup completed for ${consumerGroup} / ${topic}`); 114 | }); 115 | }); 116 | 117 | // save the partition this is no (reserved for future) 118 | this.clients[consumerGroup][topic].partition = partition; 119 | 120 | debug(`[${consumerGroup} / ${topic}] storing incoming connection, querying offset`); 121 | // if topic is not on this subscription 122 | if (!this.consumers[consumerGroup][topic]) { 123 | // reconnect to kafka to get the latest topics 124 | this._connectToKafka(consumerGroup, topic).then(() => { 125 | debug('registered consumer to kafka!!!!'); 126 | return this._subscribe(requestOffset, consumerGroup, topic, partition); 127 | }).catch((ex) => { 128 | // if we get an error on connecting, delete and return 129 | debug(`error subscribing to kafka topic. error: ${ex}`); 130 | return this._disconnectClient(consumerGroup, topic, partition, () => { 131 | // delete the consumer so we can try again next time 132 | delete this.consumers[consumerGroup][topic]; 133 | return; 134 | }); 135 | }); 136 | } 137 | else { 138 | return this._subscribe(requestOffset, consumerGroup, topic, partition); 139 | } 140 | } 141 | 142 | _subscribe(requestOffset, consumerGroup, topic, partition) { 143 | if (!requestOffset) { 144 | // if no offset has been supplied by the client, get the offset from kafka 145 | return this._getOffset(consumerGroup, topic, partition).then((result) =>{ 146 | if (result[0].error) { 147 | // the topic exists in kafka, just never had its offset set (i.e. not been read from) 148 | debug(`Error fetching offset for existing topic ${result[0].error}`); 149 | } 150 | 151 | debug(`[${consumerGroup + '/' + topic}] Got offset data: ${result[0].offset}`); 152 | this.offsets[consumerGroup][topic] = result[0].offset; 153 | // Subscribe to the topic 154 | return this.consumers[consumerGroup][topic].subscribe(topic, partition, { offset: (result[0].offset || 0) + 1 }, (messageSet, topicCombo, partition) => { 155 | this.clients[consumerGroup][topic].messageHandler(messageSet, consumerGroup + '/' + topic, partition) 156 | }); 157 | 158 | }).catch((err) => { 159 | debug(`.catch(error) fetching offset for existent topic: ${err}`); 160 | // if we end up here, it means the topic does not exist in kafka. reject the web socket 161 | return this._disconnectClient(consumerGroup, topic, partition, () => { 162 | // delete the consumer so we can try again next time 163 | delete this.consumers[consumerGroup][topic]; 164 | return; 165 | }); 166 | }).error((err) => { 167 | debug(`.error(error) fetching offset for existent topic: ${err}`); 168 | return this._disconnectClient(consumerGroup, topic, partition, () => { 169 | // delete the consumer so we can try again next time 170 | delete this.consumers[consumerGroup][topic]; 171 | return; 172 | }); 173 | }); 174 | } 175 | else { 176 | // the offset was supplied by the client 177 | return this.consumers[consumerGroup][topic].subscribe(topic, partition, { offset: requestOffset }, (messageSet, topicCombo, partition) => { 178 | this.clients[consumerGroup][topic].messageHandler(messageSet, consumerGroup + '/' + topic, partition) 179 | }); 180 | } 181 | } 182 | 183 | _getOffset(consumerGroup, topic, partition) { 184 | // get the current cursor position for this topic 185 | return this.consumers[consumerGroup][topic].fetchOffset([ 186 | { 187 | topic: topic, 188 | partition: partition 189 | } 190 | ]); 191 | } 192 | 193 | 194 | _batchMessageHandler(messageSet, topicCombo, partition) { 195 | // workaround... nokafka onyl allows three params to be sent to message handlers, so we just concat and then unpack 196 | let consumerGroup = topicCombo.split('/')[0]; 197 | let topic = topicCombo.split('/')[1]; 198 | let batchSize = messageSet.length - 1; 199 | this.clients[consumerGroup][topic].received = this.clients[consumerGroup][topic].received + messageSet.length; 200 | this.clients[consumerGroup][topic].backlog = this.clients[consumerGroup][topic].received - this.clients[consumerGroup][topic].sent; 201 | //debug(`[${consumerGroup} / ${topic}] received offset: ${messageSet[batchSize].offset}`); 202 | 203 | if (this.clients[consumerGroup][topic]) { 204 | this.clients[consumerGroup][topic].ws.send(JSON.stringify( 205 | messageSet.map((message) => { 206 | return { 207 | message: message.message.value.toString(), 208 | offset: message.offset 209 | } 210 | }) 211 | ), (err) => this._handleSendResponse(err, this.clients[consumerGroup][topic], consumerGroup, topic, partition, messageSet[batchSize].offset, messageSet.length)); 212 | } 213 | } 214 | 215 | _outOfRangeHandler(err) { 216 | debug(`Got offset error on kafka: ${JSON.stringify(err) }`); 217 | } 218 | 219 | _errorHandler(err) { 220 | debug(`got error on kafka: ${err}`); 221 | } 222 | 223 | _handleSendResponse(err, client, consumerGroup, topic, partition, offset, batchSize) { 224 | if (err) { 225 | // this case occurs when we have messages from kafka, and trying to send them to a closed web socket 226 | // no need to do anything in this case other than just silently drop the messages 227 | debug(`could not send to: ${consumerGroup} / ${topic} as web socket has disconnected`); 228 | } 229 | else { 230 | if (offset % 10000 == 0) { 231 | //debug(`[${consumerGroup} / ${topic}] sent offset: ${offset}`); 232 | } 233 | // we must check the client is still available before setting the offset 234 | // in the case when we're swapping the connection over to a conflicting web socket, it won't be 235 | if (this.clients[consumerGroup][topic]) { 236 | this.clients[consumerGroup][topic].sent = this.clients[consumerGroup][topic].sent + batchSize; 237 | this.clients[consumerGroup][topic].backlog = this.clients[consumerGroup][topic].received - this.clients[consumerGroup][topic].sent; 238 | 239 | // commits will happen after every batch is handled. this will be frequent when messages trickle in and infrequent when we're behind 240 | this.consumers[consumerGroup][topic].commitOffset([{ topic: topic, partition: partition, offset: offset }]) 241 | .catch((ex) => { 242 | debug(`error setting offset for consumer group [${consumerGroup}], topic [${topic}]: ${ex}`); 243 | }); 244 | 245 | this.offsets[consumerGroup][topic] = offset; 246 | } 247 | } 248 | } 249 | 250 | // this case is triggered when we try to send a client which has an error, or a new client connects 251 | _disconnectClient(consumerGroup, topic, partition, callback) { 252 | let client = this.clients[consumerGroup][topic]; 253 | 254 | // this will only be true if we've called disconnect already 255 | if (!client) return; 256 | 257 | // Remove the client from tracking 258 | delete this.clients[consumerGroup][topic]; 259 | debug('removed client from active connection pool'); 260 | 261 | // unsubscribe from kafka 262 | return this.consumers[consumerGroup][topic].unsubscribe([topic], partition).then(() => { 263 | debug(`Successfully unsubscribed!!! from ${topic}`); 264 | 265 | if (this.offsets[consumerGroup][topic]) { 266 | // Save the last knwown offset 267 | debug(`Latest offset is ${this.offsets[consumerGroup][topic]}, writing to kafka`) 268 | this.consumers[consumerGroup][topic].commitOffset([{ topic: topic, partition: partition, offset: this.offsets[consumerGroup][topic] }]); 269 | } 270 | 271 | // if the client is not already closing, forcefully close it... this occurs when in the case of conflicting clients 272 | if (client.ws.readyState != WebSocket.CLOSING && client.ws.readyState != WebSocket.CLOSED) { 273 | debug(`force closing websocket ${consumerGroup + '/' + topic}`); 274 | 275 | // ws.close() does work but takes some time to finish sending, so need to listen for a successful close 276 | client.ws.close(); 277 | 278 | // only need to raise the callback via an event handler, if one has been requested 279 | client.ws.on('close', () => { 280 | // once the client has finished sending, it will close and trigger here 281 | // optional callback so we can notify when we're done 282 | debug(`force close of websocket ${consumerGroup + '/' + topic} completed`); 283 | return callback(); 284 | }); 285 | } 286 | else { 287 | return callback(); 288 | } 289 | }); 290 | } 291 | 292 | _clientFromWebsocket(ws, consumerGroup, topic) { 293 | return { 294 | // Client web socket 295 | ws: ws, 296 | // method for handling messages 297 | messageHandler: (messageSet, topic, partition) => this._batchMessageHandler(messageSet, topic, partition), 298 | // Desired offset for the client 299 | offset: (this.offsets[consumerGroup][topic]) || -1, 300 | // Tracking 301 | partition: 0, 302 | // Tracking 303 | received: 0, 304 | sent: 0, 305 | backlog: 0, 306 | createdAt: new Date() 307 | }; 308 | } 309 | 310 | _verifyClient(info, callback) { 311 | // optional authorization 312 | // currently support only basic http 313 | if (this.auth && 314 | (('basic ' + this.auth) != info.req.headers.authorization && ('Basic ' + this.auth) != info.req.headers.authorization)) { 315 | debug(`Unauthorized client connection for ${info.req.url}. Rejecting 401`) 316 | return callback(false, 401, "unauthorized"); 317 | } 318 | return callback(true); 319 | } 320 | 321 | _recordMetrics() { 322 | debug(`************ REPORTING **************`); 323 | Object.keys(this.clients).forEach((consumerGroup) => { 324 | debug(`******* Consumer Group: (${consumerGroup}) *********`); 325 | Object.keys(this.clients[consumerGroup]).forEach((topicName) => { 326 | debug(`******* Topic / Partition: ${topicName} / ${this.clients[consumerGroup][topicName].partition} *********`); 327 | debug(`[${topicName}] latest offset: ${this.offsets[consumerGroup][topicName]}`); 328 | debug(`[${topicName}] backlog: ${this.clients[consumerGroup][topicName].backlog}`); 329 | debug(`[${topicName}] received: ${this.clients[consumerGroup][topicName].received}`); 330 | debug(`[${topicName}] sent: ${this.clients[consumerGroup][topicName].sent}`); 331 | let duration = (new Date().getTime() - this.clients[consumerGroup][topicName].createdAt.getTime()) / 1000; 332 | debug(`[${topicName}] duration: ${duration}`); 333 | debug(`[${topicName}] throughput: ${this.clients[consumerGroup][topicName].sent / duration}`); 334 | }); 335 | }); 336 | debug(`************************************`); 337 | } 338 | } 339 | 340 | module.exports = KafkaProxy; 341 | 342 | --------------------------------------------------------------------------------