├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── index.js ├── lib └── responseParser.js ├── package.json └── test ├── bloomd_test.js └── words.txt /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | 10 | pids 11 | logs 12 | results 13 | node_modules 14 | 15 | npm-debug.log 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | language: node_js 3 | node_js: 4 | - '0.10' 5 | - '4.2' 6 | - 'node' 7 | env: 8 | - CXX=g++-4.8 9 | before_install: 10 | - sudo apt-get -qq update 11 | - sudo apt-get install -y scons 12 | - git clone https://armon@github.com/armon/bloomd.git /tmp/bloomd 13 | - pushd /tmp/bloomd 14 | - scons 15 | - sudo cp ./bloomd /usr/local/bin/ 16 | - popd 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and 10 | distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 13 | owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities 16 | that control, are controlled by, or are under common control with that entity. 17 | For the purposes of this definition, "control" means (i) the power, direct or 18 | indirect, to cause the direction or management of such entity, whether by 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising 23 | permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, including 26 | but not limited to software source code, documentation source, and configuration 27 | files. 28 | 29 | "Object" form shall mean any form resulting from mechanical transformation or 30 | translation of a Source form, including but not limited to compiled object code, 31 | generated documentation, and conversions to other media types. 32 | 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made 34 | available under the License, as indicated by a copyright notice that is included 35 | in or attached to the work (an example is provided in the Appendix below). 36 | 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that 38 | is based on (or derived from) the Work and for which the editorial revisions, 39 | annotations, elaborations, or other modifications represent, as a whole, an 40 | original work of authorship. For the purposes of this License, Derivative Works 41 | shall not include works that remain separable from, or merely link (or bind by 42 | name) to the interfaces of, the Work and Derivative Works thereof. 43 | 44 | "Contribution" shall mean any work of authorship, including the original version 45 | of the Work and any modifications or additions to that Work or Derivative Works 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 47 | by the copyright owner or by an individual or Legal Entity authorized to submit 48 | on behalf of the copyright owner. For the purposes of this definition, 49 | "submitted" means any form of electronic, verbal, or written communication sent 50 | to the Licensor or its representatives, including but not limited to 51 | communication on electronic mailing lists, source code control systems, and 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for 53 | the purpose of discussing and improving the Work, but excluding communication 54 | that is conspicuously marked or otherwise designated in writing by the copyright 55 | owner as "Not a Contribution." 56 | 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 58 | of whom a Contribution has been received by Licensor and subsequently 59 | incorporated within the Work. 60 | 61 | 2. Grant of Copyright License. 62 | 63 | Subject to the terms and conditions of this License, each Contributor hereby 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 65 | irrevocable copyright license to reproduce, prepare Derivative Works of, 66 | publicly display, publicly perform, sublicense, and distribute the Work and such 67 | Derivative Works in Source or Object form. 68 | 69 | 3. Grant of Patent License. 70 | 71 | Subject to the terms and conditions of this License, each Contributor hereby 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 73 | irrevocable (except as stated in this section) patent license to make, have 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 75 | such license applies only to those patent claims licensable by such Contributor 76 | that are necessarily infringed by their Contribution(s) alone or by combination 77 | of their Contribution(s) with the Work to which such Contribution(s) was 78 | submitted. If You institute patent litigation against any entity (including a 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 80 | Contribution incorporated within the Work constitutes direct or contributory 81 | patent infringement, then any patent licenses granted to You under this License 82 | for that Work shall terminate as of the date such litigation is filed. 83 | 84 | 4. Redistribution. 85 | 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof 87 | in any medium, with or without modifications, and in Source or Object form, 88 | provided that You meet the following conditions: 89 | 90 | You must give any other recipients of the Work or Derivative Works a copy of 91 | this License; and 92 | You must cause any modified files to carry prominent notices stating that You 93 | changed the files; and 94 | You must retain, in the Source form of any Derivative Works that You distribute, 95 | all copyright, patent, trademark, and attribution notices from the Source form 96 | of the Work, excluding those notices that do not pertain to any part of the 97 | Derivative Works; and 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any 99 | Derivative Works that You distribute must include a readable copy of the 100 | attribution notices contained within such NOTICE file, excluding those notices 101 | that do not pertain to any part of the Derivative Works, in at least one of the 102 | following places: within a NOTICE text file distributed as part of the 103 | Derivative Works; within the Source form or documentation, if provided along 104 | with the Derivative Works; or, within a display generated by the Derivative 105 | Works, if and wherever such third-party notices normally appear. The contents of 106 | the NOTICE file are for informational purposes only and do not modify the 107 | License. You may add Your own attribution notices within Derivative Works that 108 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 109 | provided that such additional attribution notices cannot be construed as 110 | modifying the License. 111 | You may add Your own copyright statement to Your modifications and may provide 112 | additional or different license terms and conditions for use, reproduction, or 113 | distribution of Your modifications, or for any such Derivative Works as a whole, 114 | provided Your use, reproduction, and distribution of the Work otherwise complies 115 | with the conditions stated in this License. 116 | 117 | 5. Submission of Contributions. 118 | 119 | Unless You explicitly state otherwise, any Contribution intentionally submitted 120 | for inclusion in the Work by You to the Licensor shall be under the terms and 121 | conditions of this License, without any additional terms or conditions. 122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 123 | any separate license agreement you may have executed with Licensor regarding 124 | such Contributions. 125 | 126 | 6. Trademarks. 127 | 128 | This License does not grant permission to use the trade names, trademarks, 129 | service marks, or product names of the Licensor, except as required for 130 | reasonable and customary use in describing the origin of the Work and 131 | reproducing the content of the NOTICE file. 132 | 133 | 7. Disclaimer of Warranty. 134 | 135 | Unless required by applicable law or agreed to in writing, Licensor provides the 136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, 137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 138 | including, without limitation, any warranties or conditions of TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 140 | solely responsible for determining the appropriateness of using or 141 | redistributing the Work and assume any risks associated with Your exercise of 142 | permissions under this License. 143 | 144 | 8. Limitation of Liability. 145 | 146 | In no event and under no legal theory, whether in tort (including negligence), 147 | contract, or otherwise, unless required by applicable law (such as deliberate 148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 149 | liable to You for damages, including any direct, indirect, special, incidental, 150 | or consequential damages of any character arising as a result of this License or 151 | out of the use or inability to use the Work (including but not limited to 152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 153 | any and all other commercial damages or losses), even if such Contributor has 154 | been advised of the possibility of such damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. 157 | 158 | While redistributing the Work or Derivative Works thereof, You may choose to 159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 160 | other liability obligations and/or rights consistent with this License. However, 161 | in accepting such obligations, You may act only on Your own behalf and on Your 162 | sole responsibility, not on behalf of any other Contributor, and only if You 163 | agree to indemnify, defend, and hold each Contributor harmless for any liability 164 | incurred by, or claims asserted against, such Contributor by reason of your 165 | accepting any such warranty or additional liability. 166 | 167 | END OF TERMS AND CONDITIONS 168 | 169 | APPENDIX: How to apply the Apache License to your work 170 | 171 | To apply the Apache License to your work, attach the following boilerplate 172 | notice, with the fields enclosed by brackets "[]" replaced with your own 173 | identifying information. (Don't include the brackets!) The text should be 174 | enclosed in the appropriate comment syntax for the file format. We also 175 | recommend that a file or class name and description of purpose be included on 176 | the same "printed page" as the copyright notice for easier identification within 177 | third-party archives. 178 | 179 | Copyright 2013 Medium 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. 192 | 193 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/Medium/node-bloomd.svg?branch=master)](https://travis-ci.org/Medium/node-bloomd) 2 | node-bloomd 3 | =========== 4 | 5 | A NodeJS client for [Bloomd](https://github.com/armon/bloomd) 6 | 7 | Features 8 | -------- 9 | 10 | * Complete support for all Bloomd's commands. 11 | * Fast performance: insertion of 235k items in ~600ms on a 2010 MBP, over localhost. 12 | * Familiar interface, similar to node-redis 13 | * A number of useful extensions over and above bloomd's default behaviour: 14 | - [set|bulk|check|multi|info]Safe() commands to automatically create a filter if it doesn't exist when running a filter-specific command. 15 | - Squashing non-existent filter errors on drop. 16 | 17 | Install 18 | ------- 19 | 20 | npm install bloomd 21 | 22 | Requirements 23 | ------------ 24 | 25 | node-bloomd uses stream transforms, and therefore requires Node 0.10 or later. 26 | 27 | Usage 28 | ----- 29 | 30 | Create a client, then call bloomd commands directly on it. A simple example: 31 | 32 | 33 | ```js 34 | var bloomd = require('./index'), 35 | client = bloomd.createClient() 36 | 37 | client.on('error', function (err) { 38 | console.log('Error:' + err) 39 | }) 40 | 41 | function printer(error, data) { 42 | console.log(data) 43 | } 44 | 45 | client.list(null, bloomd.print) 46 | client.create('newFilter', {}, printer) 47 | client.info('newFilter', bloomd.print) 48 | client.check('newFilter', 'monkey', printer) 49 | client.set('newFilter', 'monkey', printer) 50 | client.check('newFilter', 'monkey', printer) 51 | client.bulk('newFilter', ['monkey', 'magic', 'muppet'], printer) 52 | client.multi('newFilter', ['monkey', 'magic', 'muppet'], printer) 53 | client.info('newFilter', bloomd.print) 54 | client.drop('newFilter', printer) 55 | client.dispose() 56 | ``` 57 | 58 | Client Options 59 | -------------- 60 | 61 | A number of config options are available for the client: 62 | 63 | * ```host [127.0.0.1]```: The host of bloomd to connect to. 64 | * ```port [8673]```: The port to connect on. 65 | * ```debug [false]```: Outputs debug information to the log. 66 | * ```reconnectDelay [160]```: The base amount of time in ms to wait between reconnection attempts. This number is multiplied by the current count of reconnection attempts to give a measure of backoff. 67 | * ```maxConnectionAttempts [0]```: The amount of times to try to get a connection to bloomd, after which the client will declare itself unavailable. 0 means no limit. 68 | 69 | Memorable Commands 70 | ------------------ 71 | 72 | Pop quiz: Bulk and Multi - which is used for batch checking, and which is used for batch setting? I 73 | can never remember either. node-bloomd helps out by providing two methods to make it explicit: 74 | ```multiCheck()``` and ```bulkSet()```. Use them. The maintainers of your code will thank you. 75 | 76 | 'Safe' Commands 77 | --------------- 78 | 79 | Typically, when issuing a ```set```, ```check```, ```bulk```, or ```multi``` command, 80 | bloomd will respond with "Filter does not exist" if the filter has not been created. node-bloomd 81 | provides 'safe' versions of these commands which auto-create the filter in this situation. These 82 | are ```setSafe()```, ```checkSafe()```, ```bulkSafe()```, and ```multiSafe()```. 83 | 84 | The method signatures of these are the same as the non-safe equivalent, with the addition of an optional 85 | createOptions parameter, which can be used to control the configuration of the filter that might be created. 86 | 87 | There is overhead to co-ordinating all this (see below), so if you are sure that a filter exists, 88 | you should use the non-safe version of the command. 89 | 90 | Subsequent commands issued to the same filter are guaranteed to happen after both the creation command 91 | and the safe command that triggered the creation, even if the filter didn't previously exist. For example: 92 | 93 | ```js 94 | var bloomd = require('./index'), 95 | client = bloomd.createClient() 96 | 97 | client.bulkSafe('nonExistent', ['a', 'b', 'c', 'd'], function(error, data) { 98 | console.log('First, we created and bulk set some values') 99 | }, { 100 | prob: 0.01, 101 | capacity: 50000 102 | }) 103 | 104 | client.check('nonExistent', 'a', function (error, data) { 105 | console.log('This will run second, and will be true') 106 | }) 107 | ``` 108 | 109 | In order to do this, when a safe command is issued, subsequent commands on the same filter are held 110 | until we have attempted to create the filter and process the original safe command. 111 | 112 | This requires the use of a per-filter sub-queue, which is then processed when both the create command 113 | and the originating command has completed. While not a huge overhead, it is certainly slower than just 114 | the non-safe version of the command. 115 | 116 | In order of speed, from fastest to slowest: 117 | 118 | * set(). 119 | * setSafe(), where the filter already exists. 120 | * setSafe() on a non-existent filter. 121 | 122 | Note that a safe command can still fail if the create method fails. Typically, this happens due to bad 123 | creation parameters, such as too low a capacity being chosen. To aid with debugging, in this instance, 124 | the error passed to the safe command's callback will be the reason that the filter creation failed, not 125 | the reason that the safe command failed (which would be, in all cases "Filter does not exist"). Any 126 | subsequent commands that were also queued will still fail with "Filter does not exist". 127 | 128 | Finally, 'safe' is a terrible designation, and I welcome suggestions for a better name. 129 | 130 | Still To Do 131 | ----------- 132 | 133 | * More Error checking. 134 | * Instrumentation and optimisation. 135 | * Better documentation. 136 | * Auto-retry of filter creation when failing due to the filter having recently been dropped. 137 | 138 | Contributions 139 | ------------- 140 | 141 | Questions, comments, bug reports and pull requests are all welcomed. 142 | 143 | In particular, improvements that address any of the tasks on the above 144 | list would be great. 145 | 146 | Author 147 | ------ 148 | 149 | [Jamie Talbot](https://github.com/majelbstoat), supported by 150 | [Medium](https://medium.com). 151 | 152 | License 153 | ------- 154 | 155 | Copyright 2013 [The Obvious Corporation](https://medium.com) 156 | 157 | Licensed under Apache License Version 2.0. Details in the attached LICENSE 158 | file. 159 | 160 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Obvious Corporation 2 | 3 | var net = require('net'), 4 | events = require('events'), 5 | ResponseParser = require('./lib/responseParser').ResponseParser, 6 | responseTypes = ResponseParser.responseTypes, 7 | util = require('util'), 8 | defaultPort = 8673, 9 | defaultHost = '127.0.0.1' 10 | 11 | /** 12 | * A client for BloomD (https://github.com/armon/bloomd) 13 | * 14 | * Requires Node 0.10's stream transformations. 15 | * 16 | * Opens a single stream and continually writes data to it, offloading the 17 | * resulting data to a parser and then applying a queued callback to the response. 18 | * This relies on the fact that queries to bloomd are answered in the 19 | * order that they were made. 20 | * 21 | * The documentation consistently states that checks will return true if the key 22 | * is in the filter. Of course, this is only 'true' in the context of bloom filters. 23 | * False positives are possible. False negatives are not. 24 | * 25 | * More info here: http://en.wikipedia.org/wiki/Bloom_filter 26 | * 27 | * TODO(jamie) 28 | * + Handle list with prefix after safe commands. 29 | * + Safe creation after dropping 30 | * + Mock service for better testing. 31 | * ? StreamNoDelay configuration 32 | * 33 | * Options are: 34 | * 35 | * debug [false] Emit debug information 36 | * maxConnectionAttempts [0] The number of times to try reconnecting. 0 for infinite. 37 | * reconnectDelay [160] The additional time in ms between each reconnect retry. 38 | * maxErrors [0] The number of internal errors received from bloomd after which time 39 | * the service is marked as unavailable. 0 for infinite. 40 | * 41 | * @param {Object} stream 42 | * @param {Object} options 43 | */ 44 | function BloomClient(stream, options) { 45 | this.options = options 46 | this.responseParser = null 47 | 48 | // Connection handling 49 | this.disposed = false 50 | this.stream = stream 51 | this.connectionAttempts = 1 52 | this.maxConnectionAttempts = options.maxConnectionAttempts || 0 53 | this.reconnectDelay = options.reconnectDelay || 160 54 | this.reconnector = null 55 | 56 | // Queue handling 57 | this.unavailable = false 58 | this.buffering = true 59 | this.commandQueue = [] 60 | this.offlineQueue = [] 61 | this.commandsSent = 0 62 | this.filterQueues = {} 63 | 64 | // Error handling 65 | this.maxErrors = options.maxErrors || 0 66 | this.errors = 0 67 | 68 | var self = this 69 | 70 | stream.on('connect', function() { 71 | self._onConnect() 72 | }) 73 | 74 | stream.on('error', function(message) { 75 | self._onError(message.message) 76 | }) 77 | 78 | stream.on('close', function() { 79 | self._connectionClosed('close') 80 | }) 81 | 82 | stream.on('end', function() { 83 | self._connectionClosed('end') 84 | }) 85 | 86 | stream.on('drain', function () { 87 | self._drain() 88 | }) 89 | 90 | events.EventEmitter.call(this) 91 | 92 | } 93 | util.inherits(BloomClient, events.EventEmitter) 94 | 95 | // API 96 | 97 | /** 98 | * Whether or not the client will process commands immediately. 99 | * 100 | * @return {bool} 101 | */ 102 | BloomClient.prototype.isBuffering = function() { 103 | return this.buffering 104 | } 105 | 106 | /** 107 | * Allows client code to request a reconnection. 108 | * 109 | * node-bloomd automatically attempts to connect or reconnect to the bloomd 110 | * server. However, if a connectTimeout option is specified, or if the 111 | * maximum number of internal errors is reached, node-bloomd eventually 112 | * gives up. This method allows long running processes to request 113 | * a reconnection in that eventuality. 114 | * 115 | * This command is ignored unless the client is marked unavailable, because if 116 | * it is not marked unavailable, we are still going through the standard retry 117 | * progression. 118 | * 119 | */ 120 | BloomClient.prototype.reconnect = function() { 121 | if (!this.unavailable) { 122 | return 123 | } 124 | 125 | // Reset the critical data. 126 | this.unavailable = false 127 | this.totalReconnectionTime = 0 128 | this.connectionAttempts = 0 129 | this.errors = 0 130 | this._reconnect() 131 | } 132 | 133 | /** 134 | * Closes the connection to BloomD 135 | */ 136 | BloomClient.prototype.dispose = function () { 137 | this.disposed = true 138 | this.stream.end() 139 | } 140 | 141 | // Standard Bloomd Commands 142 | 143 | /** 144 | * Creates a named bloom filter. 145 | * 146 | * A number of options are available. Stated defaults are for those of 147 | * a bloomd server with default configuration. Check your server for 148 | * specifics. 149 | * 150 | * prob [0.0001] - The desired probability of false positives. 151 | * capacity [100000] - The required initial capacity of the filter. 152 | * in_memory [0] - Whether the filter should exist only in memory, with no disk backing. 153 | * 154 | * The data passed back to the callback will be true on success, null otherwise. 155 | * 156 | * @param {string} filterName 157 | * @param {Object} options 158 | * @param {Function} callback 159 | */ 160 | BloomClient.prototype.create = function (filterName, options, callback) { 161 | var self = this 162 | var args = [filterName] 163 | options = options || {} 164 | for (var key in options) { 165 | args.push(key + '=' + options[key]) 166 | } 167 | this._process('create', filterName, args, responseTypes.CREATE_CONFIRMATION, function (error, data) { 168 | 169 | // First, run the callback. 170 | if (callback) { 171 | callback.call(callback, error, data) 172 | } 173 | 174 | // Then, clear the filter queue if we have one. 175 | self._clearFilterQueue(filterName) 176 | }) 177 | } 178 | 179 | /** 180 | * Lists filters matching the specified optional prefix. 181 | * 182 | * If no prefix is specified, all filters are returned. 183 | * 184 | * The data passed back to the callback will be an array of BloomFilter objects. 185 | * 186 | * @param {string} prefix 187 | * @param {Function} callback 188 | */ 189 | BloomClient.prototype.list = function (prefix, callback) { 190 | var args = prefix ? [prefix] : [] 191 | this._process('list', null, args, responseTypes.FILTER_LIST, callback) 192 | } 193 | 194 | /** 195 | * Drops the specified filter. 196 | * 197 | * The data passed back to the callback will be true if the filter was dropped successfully. 198 | * 199 | * @param {string} filterName 200 | * @param {Function} callback 201 | */ 202 | BloomClient.prototype.drop = function (filterName, callback) { 203 | this._process('drop', filterName, [filterName], responseTypes.DROP_CONFIRMATION, callback) 204 | } 205 | 206 | /** 207 | * Closes a filter. 208 | * 209 | * @param {string} filterName 210 | * @param {Function} callback 211 | */ 212 | BloomClient.prototype.close = function (filterName, callback) { 213 | this._process('close', filterName, [filterName], responseTypes.CONFIRMATION, callback) 214 | } 215 | 216 | /** 217 | * Clears a filter. 218 | * 219 | * @param {string} filterName 220 | * @param {Function} callback 221 | */ 222 | BloomClient.prototype.clear = function (filterName, callback) { 223 | this._process('clear', filterName, [filterName], responseTypes.CONFIRMATION, callback) 224 | } 225 | 226 | /** 227 | * Checks to see if a key is set in the filter. 228 | * 229 | * The data passed back to the callback will be true if it is 230 | * in the filter or false, if it is not. 231 | * 232 | * @param {string} filterName 233 | * @param {string} key 234 | * @param {Function} callback 235 | */ 236 | BloomClient.prototype.check = function (filterName, key, callback) { 237 | this._handle(this._buildCheckCommand(filterName, key, callback)) 238 | } 239 | 240 | /** 241 | * Checks to see if multiple keys are set in the filter. 242 | * 243 | * The data passed back to the callback will be an object map with 244 | * key mapped to a boolean value indicating its presence in the filter. 245 | * 246 | * @param {string} filterName 247 | * @param {Array} keys 248 | * @param {Function} callback 249 | */ 250 | BloomClient.prototype.multi = function (filterName, keys, callback) { 251 | this._handle(this._buildMultiCommand(filterName, keys, callback)) 252 | } 253 | 254 | /** 255 | * Sets a key in the filter. 256 | * 257 | * The data passed back to the callback will be true if the key was newly set, 258 | * or false if it was already in the filter. The latter is not considered an error. 259 | * 260 | * @param {string} filterName 261 | * @param {string} key 262 | * @param {Function} callback 263 | */ 264 | BloomClient.prototype.set = function (filterName, key, callback) { 265 | this._handle(this._buildSetCommand(filterName, key, callback)) 266 | } 267 | 268 | /** 269 | * Sets multiple keys in the filter. 270 | * 271 | * The data passed back to the callback will be an object map with 272 | * key mapped to a boolean value; true if the key was newly 273 | * set, false if it was already in the set. 274 | * 275 | * @param {string} filterName 276 | * @param {Array} keys 277 | * @param {Function} callback 278 | */ 279 | BloomClient.prototype.bulk = function (filterName, keys, callback) { 280 | this._handle(this._buildBulkCommand(filterName, keys, callback)) 281 | } 282 | 283 | /** 284 | * Retrieves information about the specified filter. 285 | * 286 | * The data passed back to the callback will be a single BloomFilter object. 287 | * 288 | * @param {string} filterName 289 | * @param {Function} callback 290 | */ 291 | BloomClient.prototype.info = function (filterName, callback) { 292 | this._process('info', filterName, [filterName], responseTypes.INFO, callback) 293 | } 294 | 295 | /** 296 | * Flushes filters to disk. 297 | * 298 | * If a filter name is provided, that filter is flushed, otherwise 299 | * all filters are flushed. 300 | * 301 | * @param {string} filterName 302 | * @param {Function} callback 303 | */ 304 | BloomClient.prototype.flush = function (filterName, callback) { 305 | var args = filterName ? [filterName] : [] 306 | this._process('flush', filterName, args, responseTypes.CONFIRMATION, callback) 307 | } 308 | 309 | // 'Safe' Commands 310 | 311 | BloomClient.prototype._buildCheckCommand = function (filterName, key, callback) { 312 | return this._buildCommand('check', filterName, [filterName, key], responseTypes.BOOL, callback) 313 | } 314 | 315 | BloomClient.prototype._buildMultiCommand = function (filterName, keys, callback) { 316 | var args = keys.slice(0) 317 | args.unshift(filterName) 318 | return this._buildCommand('multi', filterName, args, responseTypes.BOOL_LIST, callback) 319 | } 320 | 321 | BloomClient.prototype._buildSetCommand = function (filterName, key, callback) { 322 | return this._buildCommand('set', filterName, [filterName, key], responseTypes.BOOL, callback) 323 | } 324 | 325 | BloomClient.prototype._buildBulkCommand = function (filterName, keys, callback) { 326 | var args = keys.slice(0) 327 | args.unshift(filterName) 328 | return this._buildCommand('bulk', filterName, args, responseTypes.BOOL_LIST, callback) 329 | } 330 | 331 | 332 | /** 333 | * Safe versions of standard functions. 334 | * They appear on the prototype as setSafe, checkSafe, bulkSafe etc. 335 | * 336 | * @see _makeSafe() 337 | */ 338 | var _safeCommands = ['set', 'check', 'bulk', 'multi'] 339 | for (var i = 0, l = _safeCommands.length; i < l; i++) { 340 | var commandName = _safeCommands[i] 341 | BloomClient.prototype[commandName + 'Safe'] = _makeSafe(commandName) 342 | } 343 | 344 | // Extended Commands 345 | 346 | /** 347 | * Alias for bulk, for ease of remembering. 348 | * 349 | * Bulk sets many items. 350 | * 351 | * @see BloomClient.prototype.bulk 352 | */ 353 | BloomClient.prototype.bulkSet = BloomClient.prototype.bulk 354 | BloomClient.prototype.bulkSetSafe = BloomClient.prototype.bulkSafe 355 | 356 | /** 357 | * Alias for multi, for ease of remembering. 358 | * 359 | * Multi checks many items. 360 | * 361 | * @see BloomClient.prototype.multi 362 | */ 363 | BloomClient.prototype.multiCheck = BloomClient.prototype.multi 364 | BloomClient.prototype.multiCheckSafe = BloomClient.prototype.multiSafe 365 | 366 | // Private Methods 367 | 368 | /** 369 | * Fires when the parser is able to send back a complete response from the server. 370 | * 371 | * Because operations are performed in the order they are received, we can safely 372 | * unshift a command off the queue and use it to match the response to the callback 373 | * that is waiting for it. 374 | */ 375 | BloomClient.prototype._onReadable = function () { 376 | var response 377 | while (response = this.responseParser.read()) { 378 | var command = this.commandQueue.shift(), 379 | error = null, 380 | data = null 381 | 382 | if (this.options.debug) { 383 | _timer(command.started, 'Response received for: ' + command.filterName + ' ' + command.arguments[0]) 384 | } 385 | 386 | if (ResponseParser.isError(response)) { 387 | this.errors++ 388 | if (this.maxErrors && (this.errors >= this.maxErrors)) { 389 | return this._unavailable() 390 | } 391 | error = new Error('Bloomd Internal Error') 392 | } else { 393 | if (this.errors > 0) { 394 | this.errors-- 395 | } 396 | try { 397 | switch (command.responseType) { 398 | case responseTypes.BOOL: 399 | data = ResponseParser.parseBool(response) 400 | break 401 | 402 | case responseTypes.BOOL_LIST: 403 | data = ResponseParser.parseBoolList(response, command.arguments.slice(2)) 404 | break 405 | 406 | case responseTypes.FILTER_LIST: 407 | data = ResponseParser.parseFilterList(response) 408 | break 409 | 410 | case responseTypes.CONFIRMATION: 411 | data = ResponseParser.parseConfirmation(response) 412 | break 413 | 414 | case responseTypes.CREATE_CONFIRMATION: 415 | data = ResponseParser.parseCreateConfirmation(response) 416 | break 417 | 418 | case responseTypes.DROP_CONFIRMATION: 419 | data = ResponseParser.parseDropConfirmation(response) 420 | break 421 | 422 | case responseTypes.INFO: 423 | data = ResponseParser.parseInfo(response, command.filterName) 424 | break 425 | 426 | default: 427 | throw new Error('Unknown response type: ' + command.responseType) 428 | break 429 | } 430 | } catch (err) { 431 | error = command.error || err 432 | } 433 | } 434 | 435 | // Callbacks are optional. 436 | if (command.callback) { 437 | if (error) { 438 | error.command = command.arguments 439 | } 440 | command.callback(error, data) 441 | } 442 | } 443 | } 444 | 445 | /** 446 | * Fires when the underlying stream connects. 447 | */ 448 | BloomClient.prototype._onConnect = function () { 449 | if (this.options.debug) { 450 | console.log('Connected to ' + this.options.host + ':' + this.options.port) 451 | } 452 | 453 | this.unavailable = false 454 | 455 | this.responseParser = new ResponseParser(this) 456 | this.stream.pipe(this.responseParser) 457 | 458 | var self = this 459 | this.responseParser.on('readable', function() { 460 | self._onReadable() 461 | }) 462 | 463 | this.connected = true 464 | this.emit('connected') 465 | this._drain() 466 | } 467 | 468 | /** 469 | * Fires when there is an error on the underlying stream. 470 | */ 471 | BloomClient.prototype._onError = function (msg) { 472 | var message = 'Connection failed to ' + this.options.host + ':' + this.options.port + ' (' + msg + ')' 473 | if (this.options.debug) { 474 | console.warn(message) 475 | } 476 | 477 | this.connected = false 478 | this._connectionClosed('error') 479 | } 480 | 481 | /** 482 | * Fires when a connection is closed, either through error, or naturally. 483 | * 484 | * @param {string} reason 485 | */ 486 | BloomClient.prototype._connectionClosed = function (reason) { 487 | if (this.options.debug) { 488 | console.warn('Connection closed (' + reason + ')') 489 | } 490 | this.buffering = true 491 | 492 | this.emit('disconnected') 493 | this._reconnect() 494 | } 495 | 496 | /** 497 | * Attempts to reconnect to the underlying stream. 498 | */ 499 | BloomClient.prototype._reconnect = function () { 500 | if (this.reconnector || this.unavailable) { 501 | // We explicitly disposed the client, or we've exhausted our 502 | // attempts to connect, so no need to reconnect. 503 | return 504 | } 505 | 506 | var self = this 507 | 508 | if (this.disposed || (this.maxConnectionAttempts && (this.connectionAttempts >= this.maxConnectionAttempts))) { 509 | // We've hits the max number of connection attempts, or we have been disposed. 510 | // Mark the client as unavailable, which will also reject the various queues. 511 | if (this.options.debug) { 512 | console.log('Bloomd is unavailable.') 513 | } 514 | this._unavailable() 515 | return 516 | } 517 | 518 | // Simple linear back-off. Defaults would give ms delays of [160, 320, 480, ...] 519 | var reconnectDelay = this.connectionAttempts * this.reconnectDelay 520 | 521 | this.connectionAttempts++ 522 | this.reconnector = setTimeout(function () { 523 | if (self.options.debug) { 524 | console.log('Connecting: attempt (' + self.connectionAttempts + ')') 525 | } 526 | if (!self.disposed) { 527 | self.stream.connect(self.options.port, self.options.host) 528 | } 529 | self.reconnector = null 530 | }, reconnectDelay) 531 | this.reconnector.unref() 532 | } 533 | 534 | /** 535 | * Marks the client as unavailable. 536 | * 537 | * An unavailable client will fail all commands sent to it immediately, as well 538 | * as fail all commands that have been requested but not responded to. 539 | */ 540 | BloomClient.prototype._unavailable = function() { 541 | var command 542 | this.unavailable = true 543 | 544 | // Clear the command queue. 545 | while (command = this.commandQueue.shift()) { 546 | this._rejectCommand(command) 547 | } 548 | 549 | while (command = this.offlineQueue.shift()) { 550 | this._rejectCommand(command) 551 | } 552 | 553 | for (var filterName in this.filterQueues) { 554 | var queue = this.filterQueues[filterName] 555 | while (command = queue.shift()) { 556 | this._rejectCommand(command) 557 | } 558 | } 559 | 560 | this.filterQueues = {} 561 | 562 | // Announce that we are unavailable. 563 | this.emit('unavailable') 564 | } 565 | 566 | /** 567 | * Rejects a command that cannot be processed. 568 | * 569 | * If a callback was specified, it will be called with an error. 570 | * 571 | * @param {Object} command 572 | */ 573 | BloomClient.prototype._rejectCommand = function (command) { 574 | if (this.options.debug) { 575 | console.log('Rejecting command:', command.arguments[0], command.filterName) 576 | } 577 | if (command.callback) { 578 | command.callback(new Error('Bloomd is unavailable'), null) 579 | } 580 | } 581 | 582 | /** 583 | * Convenience function to build and handle a command. 584 | * 585 | * @param {string} commandName 586 | * @param {string} filterName 587 | * @param {Array} args 588 | * @param {string} responseType one of ResponseParser.responseTypes 589 | * @param {Function} callback 590 | */ 591 | BloomClient.prototype._process = function (commandName, filterName, args, responseType, callback) { 592 | this._handle(this._buildCommand(commandName, filterName, args, responseType, callback)) 593 | } 594 | 595 | /** 596 | * Prepares a command from the supplied arguments. 597 | * 598 | * @param {string} commandName 599 | * @param {string} filterName 600 | * @param {Array} args 601 | * @param {string} responseType one of ResponseParser.responseTypes 602 | * @param {Function} callback 603 | */ 604 | BloomClient.prototype._buildCommand = function (commandName, filterName, args, responseType, callback) { 605 | args = args || [] 606 | args.unshift(commandName) 607 | return { 608 | filterName: filterName, 609 | arguments: args, 610 | responseType: responseType, 611 | callback: callback 612 | } 613 | } 614 | 615 | /** 616 | * Prepares a command to be sent. If the stream is ready to receive a command, 617 | * sends it immediately, otherwise queues it up to be sent when the stream is ready. 618 | * 619 | * @param {Object} command 620 | * @param {boolean} clearing 621 | */ 622 | BloomClient.prototype._handle = function (command, clearing) { 623 | var commandName = command.arguments[0] 624 | var filterName = command.filterName 625 | 626 | if (this.unavailable) { 627 | this._rejectCommand(command) 628 | return 629 | } 630 | 631 | if (filterName && this.filterQueues[filterName] && ('create' !== commandName) && !clearing) { 632 | // There are other commands outstanding for this filter, so hold this one until they are processed. 633 | if (this.options.debug) { 634 | console.log('Holding command in filter sub-queue:', commandName, filterName) 635 | } 636 | this.filterQueues[filterName].push(command) 637 | return 638 | } 639 | 640 | if (this.buffering) { 641 | if (this.options.debug) { 642 | console.log('Buffering command:', commandName) 643 | } 644 | this.offlineQueue.push(command) 645 | } else { 646 | if (this.options.debug) { 647 | console.log('Processing:', commandName) 648 | } 649 | this._send(command) 650 | } 651 | } 652 | 653 | /** 654 | * Attempts to send a command to bloomd. If the command was sent, pushes it 655 | * onto the command queue for processing when the response arrives. 656 | * 657 | * Returns a boolean indicating sent status. 658 | * 659 | * @param {Object} command 660 | * @return {boolean} 661 | */ 662 | BloomClient.prototype._send = function (command) { 663 | var line = command.arguments.join(' ') + '\n' 664 | var processedEntirely = this.stream.write(line) 665 | 666 | if (this.options.debug) { 667 | console.log('Sent:', command.arguments[0]) 668 | command.started = process.hrtime() 669 | } 670 | 671 | this.commandsSent++ 672 | this.commandQueue.push(command) 673 | 674 | if (!processedEntirely) { 675 | if (this.options.debug) { 676 | console.log('Waiting after full buffer:', command.arguments[0]) 677 | } 678 | this.buffering = true 679 | } 680 | 681 | return processedEntirely 682 | } 683 | 684 | /** 685 | * Processes the offline command queue. 686 | * 687 | * Marks the client as ready when there is nothing left in the queue. 688 | */ 689 | BloomClient.prototype._drain = function () { 690 | while (this.offlineQueue.length) { 691 | var command = this.offlineQueue.shift() 692 | if (this.options.debug) { 693 | console.log('Sending buffered command:', command.arguments[0]) 694 | } 695 | 696 | if (!this._send(command)) { 697 | // Buffer was filled from this command. Wait some more. 698 | return 699 | } 700 | } 701 | this.buffering = false 702 | this.emit('drain') 703 | } 704 | 705 | /** 706 | * Queues for processing all those commands which were held due to 707 | * a 'safe' method being invoked. 708 | * 709 | * @param {string} filterName 710 | */ 711 | BloomClient.prototype._clearFilterQueue = function (filterName) { 712 | var filterQueue = this.filterQueues[filterName] 713 | if (!filterQueue) { 714 | return 715 | } 716 | 717 | if (this.options.debug) { 718 | console.log('Clearing filter queue:', filterName) 719 | } 720 | 721 | while (filterQueue.length) { 722 | this._handle(filterQueue.shift(), true) 723 | } 724 | 725 | delete this.filterQueues[filterName] 726 | } 727 | 728 | // Helper Functions 729 | 730 | /** 731 | * Returns a function which is a 'safe' version of the command with the supplied name. That is, if 732 | * the filter doesn't exist when the command is run, attempts to automatically create 733 | * the filter and then re-run the command, transparently to the client. 734 | * 735 | * If there is an error in the creation step, the callback will receive the filter creation 736 | * failure, not the original 'filter not found', to help track down why the creation 737 | * would be failing. 738 | * 739 | * @param {string} command 740 | * @return {Function} 741 | */ 742 | function _makeSafe(commandName) { 743 | var commandBuilder = BloomClient.prototype['_build' + commandName[0].toUpperCase() + commandName.slice(1) + 'Command'] 744 | 745 | return function() { 746 | // This is a function like setSafe() 747 | var self = this 748 | var args = Array.prototype.slice.call(arguments, 0) 749 | var filterName = args[0] 750 | var createOptions = {} 751 | 752 | // Supports optional createOptions as a final parameter. 753 | var callback 754 | if (args[args.length - 1] instanceof Function) { 755 | callback = args.pop() 756 | } else { 757 | createOptions = args.pop() 758 | callback = args.pop() 759 | } 760 | 761 | // Create a separate copy of these arguments, so they don't get munged by later commands 762 | // which modify them. 763 | var originalArgs = args.slice(0) 764 | originalArgs.push(callback) 765 | 766 | args.push(function (originalError, originalData) { 767 | // This is the callback which catches the response to the original command 768 | // (e.g. safe, check, bulk, multi etc.) 769 | if (originalError && ('Filter does not exist' === originalError.message)) { 770 | // Try to create the filter. The create method will clear the queue when it completes. 771 | self.create(filterName, createOptions, function (createError, createData) { 772 | // This is the callback which catches the response to the create command. 773 | // In it, we tell it to run the command which triggered this creation. 774 | var command = commandBuilder.apply(self, originalArgs) 775 | 776 | // If the creation fails, the triggering action will also fail. 777 | // Store the creation error so we can give useful feedback for why the triggering 778 | // action wasn't successful, despite it being 'safe'. 779 | if (createError) { 780 | command.error = createError 781 | } 782 | 783 | self._handle(command, true) 784 | }) 785 | } else { 786 | // The filter exists, so run the original callback. 787 | callback.call(callback, originalError, originalData) 788 | 789 | self._clearFilterQueue(filterName) 790 | } 791 | }) 792 | 793 | this._handle(commandBuilder.apply(self, args)) 794 | 795 | // Create a queue for this filter, so that subsequent commands to this filter are 796 | // buffered until it is created. 797 | if (!this.filterQueues[filterName]) { 798 | this.filterQueues[filterName] = [] 799 | } 800 | } 801 | } 802 | 803 | /** 804 | * Helper function to time performance in ms. 805 | * 806 | * @param {Array} since A previous call to process.hrtime() 807 | * @param {string} message an optional message 808 | * @return {number} 809 | */ 810 | function _timer(since, message) { 811 | var interval = process.hrtime(since) 812 | var elapsed = (interval[0] * 1000) + (interval[1] / 1000000) 813 | message = message ? message + ': ' : '' 814 | console.log(message + elapsed.toFixed(3) + 'ms') 815 | return elapsed 816 | } 817 | 818 | // Exports 819 | 820 | exports.BloomClient = BloomClient 821 | 822 | exports.createClient = function (options) { 823 | options = options || {} 824 | options.host = options.host || defaultHost 825 | options.port = options.port || defaultPort 826 | 827 | var netClient = net.createConnection(options.port, options.host) 828 | return new BloomClient(netClient, options) 829 | } 830 | 831 | exports.timer = _timer 832 | -------------------------------------------------------------------------------- /lib/responseParser.js: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Obvious Corporation 2 | 3 | var stream = require('stream'), 4 | util = require('util') 5 | 6 | /** 7 | * A named bloom filter object. 8 | */ 9 | function BloomFilter() { 10 | this.capacity = null 11 | this.checks = null 12 | this.checkHits = null 13 | this.checkMisses = null 14 | this.name = null 15 | this.pageIns = null 16 | this.pageOuts = null 17 | this.probability = null 18 | this.sets = null 19 | this.setHits = null 20 | this.setMisses = null 21 | this.size = null 22 | this.storage = null 23 | } 24 | 25 | /** 26 | * A parser for data returned by bloomd 27 | * 28 | * Provides a stream transformer to extract responses from 29 | * the server, and static methods to parse them into native 30 | * JS types. 31 | */ 32 | function ResponseParser(client) { 33 | this.client = client 34 | this.lines = [] 35 | this.lineData = '' 36 | this.blockLines = 1 37 | stream.Transform.call(this, {objectMode: true}) 38 | } 39 | util.inherits(ResponseParser, stream.Transform); 40 | 41 | /** 42 | * An enumeration of possible response types. 43 | */ 44 | ResponseParser.responseTypes = { 45 | BOOL: 'bool', 46 | BOOL_LIST: 'boolList', 47 | CONFIRMATION: 'confirmation', 48 | CREATE_CONFIRMATION: 'createConfirmation', 49 | DROP_CONFIRMATION: 'dropConfirmation', 50 | FILTER_LIST: 'filterList', 51 | INFO: 'info' 52 | } 53 | 54 | /** 55 | * Given a chunk of data, appends onto previously received data, 56 | * decomposes it into lines and parses those lines into either single 57 | * or multi-line responses that can be used to populate JS types. 58 | * 59 | * @param {Buffer} chunk 60 | * @param {string} encoding 61 | * @param {Function} done 62 | */ 63 | ResponseParser.prototype._transform = function (chunk, encoding, done) { 64 | 65 | // Add the chunk to the line buffer 66 | this.lineData += chunk.toString() 67 | var lines = this.lineData.split(/\r\n|\r|\n/g); 68 | 69 | // If the chunk finishes on a newline, the final line 70 | // will be empty, otherwise it will be a partially completed 71 | // line. Either way, we don't want to process it. 72 | this.lineData = lines.pop() 73 | 74 | for (var i = 0, l = lines.length; i < l; i++) { 75 | this.lines.push(lines[i]) 76 | } 77 | 78 | parseLines: 79 | while (this.lines.length) { 80 | 81 | if ('START' === this.lines[0]) { 82 | for (var i = this.blockLines, l = this.lines.length; i < l; i++) { 83 | if ('END' === this.lines[i]) { 84 | // Got a full list. Push it and continue parsing. 85 | this.push(this.lines.splice(0, i + 1).slice(1, -1)) 86 | 87 | // Reset the block count for the next block. 88 | this.blockLines = 1 89 | 90 | // Goto might be considered harmful, but this is a continue ;) 91 | continue parseLines 92 | } 93 | } 94 | 95 | // We had an incomplete list, so we have to wait until we get more data. 96 | // Remember which line we got to, so we don't have to start from 1 again. 97 | this.blockLines = i 98 | break 99 | } else { 100 | this.push(this.lines.shift()) 101 | } 102 | } 103 | 104 | done() 105 | } 106 | 107 | // Static Converters 108 | 109 | /** 110 | * Detects an internal error from bloomd. 111 | * 112 | * @param {string} data 113 | * @return {bool} 114 | */ 115 | ResponseParser.isError = function (data) { 116 | return 'Internal Error' === data 117 | } 118 | 119 | /** 120 | * Parses a Yes/No response from bloomd into a boolean. 121 | * 122 | * @param {string} data 123 | * @return {bool} 124 | */ 125 | ResponseParser.parseBool = function (data) { 126 | if ('Yes' === data) { 127 | return true 128 | } else if ('No' === data) { 129 | return false 130 | } else { 131 | throw new Error(data) 132 | } 133 | } 134 | 135 | /** 136 | * Pairs a list of Yes/No responses with the queried keys and 137 | * returns a map between the two. 138 | * 139 | * @param {string} data 140 | * @param {Array} keys 141 | * @return {bool} 142 | */ 143 | ResponseParser.parseBoolList = function (data, keys) { 144 | var values = data.split(' '), 145 | results = {} 146 | 147 | try { 148 | for (var i = 0, l = values.length; i < l; i++) { 149 | results[keys[i]] = ResponseParser.parseBool(values[i]) 150 | } 151 | } catch (err) { 152 | // If there was an error parsing a bool, make the entire line available for debugging. 153 | throw new Error(data) 154 | } 155 | 156 | return results 157 | } 158 | 159 | /** 160 | * Parses a Done response from bloomd into a boolean. 161 | * 162 | * @param {string} data 163 | * @return {bool} 164 | */ 165 | ResponseParser.parseConfirmation = function (data) { 166 | if ('Done' === data) { 167 | return true 168 | } else { 169 | throw new Error(data) 170 | } 171 | } 172 | 173 | /** 174 | * Parses a Done response from bloomd into a boolean, following a create command. 175 | * 176 | * For create commands, we don't care if the filter already existed. 177 | * 178 | * @param {string} data 179 | * @return {bool} 180 | */ 181 | ResponseParser.parseCreateConfirmation = function (data) { 182 | if ('Done' === data || 'Exists' === data) { 183 | return true 184 | } else { 185 | throw new Error(data) 186 | } 187 | } 188 | 189 | /** 190 | * Parses a Done response from bloomd into a boolean, following a drop command. 191 | * 192 | * For drop commands, we don't care if the filter existed or not. 193 | * 194 | * @param {string} data 195 | * @return {bool} 196 | */ 197 | ResponseParser.parseDropConfirmation = function (data) { 198 | if ('Done' === data || 'Filter does not exist' === data) { 199 | return true 200 | } else { 201 | throw new Error(data) 202 | } 203 | } 204 | 205 | /** 206 | * Parses a list of filter definitions into an array of BloomFilter objects. 207 | * 208 | * @param {Array} data 209 | * @return {Array} 210 | */ 211 | ResponseParser.parseFilterList = function (data) { 212 | if (!Array.isArray(data)) { 213 | throw new Error(data) 214 | } 215 | return data.map(function(item) { 216 | var definition = item.split(' ') 217 | var filter = new BloomFilter() 218 | filter.name = definition[0] 219 | filter.probability = definition[1] 220 | filter.storage = definition[2] 221 | filter.capacity = definition[3] 222 | filter.size = definition[4] 223 | return filter 224 | }) 225 | } 226 | 227 | /** 228 | * Parses filter information into a single BloomFilter objects. 229 | * 230 | * @param {Array} data 231 | * @return {BloomFilter} 232 | */ 233 | ResponseParser.parseInfo = function (data, name) { 234 | if (!Array.isArray(data)) { 235 | throw new Error(data) 236 | } 237 | var filter = new BloomFilter() 238 | for (var i = 0, l = data.length; i < l; i++) { 239 | var definition = data[i].split(' ') 240 | filter[definition[0].replace(/_([a-z])/g, function (g) { return g[1].toUpperCase() })] = definition[1] 241 | } 242 | filter.name = name 243 | return filter 244 | } 245 | 246 | // Exports 247 | 248 | exports.ResponseParser = ResponseParser 249 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bloomd" 3 | ,"license": "Apache-2.0" 4 | , "description": "NodeJS Driver for BloomD" 5 | , "version": "0.2.6" 6 | , "homepage": "https://github.com/obvious/node-bloomd" 7 | , "authors": [ 8 | "Jamie Talbot (https://github.com/majelbstoat)" 9 | ] 10 | , "contributors": [ 11 | 12 | ] 13 | , "keywords": ["bloomd", "bloom", "filter"] 14 | , "main": "index.js" 15 | , "repository": { 16 | "type": "git" 17 | , "url": "https://github.com/obvious/node-bloomd.git" 18 | } 19 | , "dependencies": { 20 | } 21 | , "devDependencies": { 22 | "nodeunit": "0.9.1", 23 | "sleep": "1.1.1" 24 | } 25 | , "scripts": { 26 | "test": "./node_modules/nodeunit/bin/nodeunit test" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /test/bloomd_test.js: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Obvious Corporation 2 | 3 | var bloom = require('../index'), 4 | fs = require('fs'), 5 | assert = require('assert'), 6 | spawn = require('child_process').spawn, 7 | sleep = require('sleep').sleep, 8 | bloomd 9 | 10 | /** 11 | * Delay in ms that we should wait after doing a drop command before issuing 12 | * a create command to the same filter name, due to bloomd's limitations. 13 | * If tests are failing, try increasing this. 14 | */ 15 | var DROP_THEN_CREATE_DELAY_MS = 200 16 | 17 | /** 18 | * Delay in seconds that we should wait after starting and stopping the bloomd 19 | * server to make sure it is ready. 20 | */ 21 | var SERVER_START_STOP_TIME = 1 22 | 23 | /** 24 | * Starts bloomd 25 | */ 26 | function _startServer() { 27 | bloomd = spawn('bloomd') 28 | bloomd.on('close', function (code) { 29 | if (code) { 30 | console.log('Bloomd could not start. Is it already running?') 31 | throw new Error() 32 | } 33 | }) 34 | sleep(SERVER_START_STOP_TIME) 35 | } 36 | 37 | /** 38 | * Stops bloomd 39 | */ 40 | function _stopServer() { 41 | bloomd.kill() 42 | sleep(SERVER_START_STOP_TIME) 43 | } 44 | 45 | /** 46 | * We use different named filters for each, as bloomd does not allow for the creation 47 | * of a filter while it is deleting one of the same name, and this library doesn't 48 | * currently handle this case. 49 | * 50 | * These tests work by starting and stopping bloomd a couple of times. 51 | * It assumes 'bloomd' is an executable on the system. 52 | * 53 | * Starting and stopping the server throughout the tests introduces a dependency 54 | * on the ordering of tests, and makes them potentially non-deterministic. This isn't 55 | * necessarily best practice but it allows us to test the server going away, and 56 | * sending commands before the server becomes available. 57 | */ 58 | 59 | /** 60 | * Tests that the client can connect to a bloomd instance which becomes 61 | * available after the client starts up. 62 | */ 63 | exports.reconnectsOnConnectionFailure = function (test) { 64 | var filterName = 'reconnects_on_failure' 65 | var bloomClient = bloom.createClient() 66 | 67 | bloomClient.setSafe(filterName, 'monkey', function(error, data) { 68 | test.equals(data, true) 69 | }) 70 | 71 | bloomClient.check(filterName, 'monkey', function (error, data) { 72 | test.equals(data, true) 73 | }) 74 | 75 | bloomClient.drop(filterName, function (error, data) { 76 | bloomClient.dispose() 77 | 78 | // Set, Create, Set, Check, Drop 79 | test.equals(5, bloomClient.commandsSent) 80 | test.done() 81 | }) 82 | 83 | // Start the server only after all the events have been queued. 84 | _startServer() 85 | } 86 | 87 | /** 88 | * Tests that an unavailable client rejects both queued commands 89 | * and ones issued after the fact. 90 | */ 91 | exports.unavailableClientRejectsQueuedAndNewCommands = function (test) { 92 | var filterName = 'unavailable_client_rejection' 93 | var bloomClient = bloom.createClient({maxConnectionAttempts: 1}) 94 | 95 | _stopServer() 96 | 97 | bloomClient.on('connected', function () { 98 | test.ok(false, 'We should not have been able to connect.') 99 | test.done() 100 | }) 101 | 102 | bloomClient.create(filterName, {}, function (error, data) { 103 | test.equals('Bloomd is unavailable', error.message, 'Command should have been rejected') 104 | }) 105 | 106 | // Wait until we get the unavailable signal, then try a command. 107 | bloomClient.on('unavailable', function() { 108 | bloomClient.set(filterName, 'monkey', function (error, data) { 109 | test.equals('Bloomd is unavailable', error.message, 'Command should have been rejected') 110 | test.done() 111 | _startServer() 112 | }) 113 | }) 114 | } 115 | 116 | /** 117 | * Tests that disposing a client doesn't reconnect 118 | */ 119 | exports.disposedClientDoesNotReconnect = function (test) { 120 | var filterName = 'disposed_client_reconnected' 121 | var bloomClient = bloom.createClient() 122 | 123 | _stopServer() 124 | 125 | bloomClient.create(filterName, {}, function (error, data) { 126 | test.equals('Bloomd is unavailable', error.message, 'Command should have been rejected') 127 | test.done() 128 | _startServer() 129 | }) 130 | 131 | bloomClient.dispose() 132 | } 133 | 134 | // Server Is Available 135 | 136 | /** 137 | * Tests that calling setSafe on a filter actually calls the original 138 | * callback if the filter already exists. 139 | */ 140 | exports.setAndCreateTestFilterExists = function (test) { 141 | var filterName = 'set_and_create_already_exists' 142 | var bloomClient = bloom.createClient() 143 | var called = false 144 | 145 | // Create a filter. 146 | bloomClient.create(filterName, {}, function (error, data) { 147 | test.equals(data, true, 'Failed to create filter') 148 | }) 149 | 150 | bloomClient.setSafe(filterName, 'monkey', function(error, data) { 151 | test.equals(data, true) 152 | called = true 153 | }) 154 | 155 | bloomClient.check(filterName, 'monkey', function(error, data) { 156 | test.equals(data, true) 157 | }) 158 | 159 | bloomClient.drop(filterName, function() { 160 | bloomClient.dispose() 161 | test.equals(called, true, 'The original callback was not called') 162 | test.done() 163 | }) 164 | } 165 | 166 | /** 167 | * Test that creating a filter when it already exists doesn't throw an 168 | * error. Technically the create options could be different, but we don't 169 | * worry about that. It's unclear what the correct resolution would be 170 | * and we need to support this so that *safe() commands operating on the 171 | * same filter but run from different clients don't cause failure on 172 | * a race. 173 | */ 174 | exports.consecutiveCreateCommandsDoNotFail = function (test) { 175 | var filterName = 'consecutive_creates' 176 | var bloomClient = bloom.createClient() 177 | 178 | bloomClient.drop(filterName, function (error, data) { 179 | bloomClient.create(filterName, {}, function (error, data) { 180 | test.equals(data, true, 'First creation failed') 181 | }) 182 | 183 | bloomClient.create(filterName, {}, function (error, data) { 184 | test.equals(data, true, 'Second creation failed') 185 | }) 186 | 187 | bloomClient.drop(filterName, function (error, data) { 188 | bloomClient.dispose() 189 | test.done() 190 | }) 191 | }) 192 | } 193 | 194 | /** 195 | * Tests the setting of a key on a filter that doesn't exist, that the 196 | * filter is automatically created, that the key is set, and that the 197 | * original callback is still called. 198 | */ 199 | exports.setAndCreateTestFilterDoesNotExist = function (test) { 200 | var filterName = 'set_and_create_non_existent' 201 | var bloomClient = bloom.createClient() 202 | 203 | bloomClient.drop(filterName, function (error, data) { 204 | // This is a bit janky, as we have to drop the bloomClient to 205 | // ensure that the filter doesn't exist beforehand, 206 | // but bloomd has a period of time where you can't create immediately 207 | // after a drop where creation will fail, so we wait for a bit. 208 | // Non-deterministic, but probably ok. 209 | setTimeout(function() { 210 | bloomClient.setSafe(filterName, 'monkey', function(error, data) { 211 | test.equals(data, true) 212 | 213 | // The cleanup drop command also has to come in this callback, 214 | // otherwise it will be in the queue before the create and retry 215 | // commands that are generated by the non-existence of the filter. 216 | // This is why promises are good. 217 | bloomClient.drop(filterName, function() { 218 | // Drop, Set, Create, Set, Drop 219 | bloomClient.dispose() 220 | test.equals(5, bloomClient.commandsSent) 221 | test.done() 222 | }) 223 | }) 224 | }, DROP_THEN_CREATE_DELAY_MS) 225 | }) 226 | } 227 | 228 | /** 229 | * Tests the checking of a key after we call setSafe. When a filter doesn't exist, setSafe 230 | * automatically creates it, then sets the value. If a client issues a check command after 231 | * a setSafe command, it should return true, even if the filter didn't exist. 232 | */ 233 | exports.checkAfterSetSafe = function (test) { 234 | var filterName = 'check_after_set_safe' 235 | var bloomClient = bloom.createClient() 236 | 237 | bloomClient.drop(filterName, function (error, data) { 238 | // Also janky. 239 | setTimeout(function() { 240 | bloomClient.setSafe(filterName, 'monkey', function(error, data) { 241 | test.equals(data, true) 242 | }) 243 | 244 | bloomClient.check(filterName, 'monkey', function(error, data) { 245 | test.equals(data, true, 'Check after safe set was not true') 246 | }) 247 | 248 | bloomClient.drop(filterName, function() { 249 | // Drop, Set, Create, Set, Check, Drop 250 | bloomClient.dispose() 251 | test.equals(6, bloomClient.commandsSent) 252 | test.done() 253 | }) 254 | }, DROP_THEN_CREATE_DELAY_MS) 255 | }) 256 | } 257 | 258 | /** 259 | * Tests interleaved consecutive safe and non-safe commands, to ensure they run in the specified order. 260 | */ 261 | exports.interleavedSafeNonSafe = function (test) { 262 | var filterName = 'interleaved_safe_non_safe' 263 | var bloomClient = bloom.createClient() 264 | 265 | bloomClient.drop(filterName, function (error, data) { 266 | // Also janky. 267 | setTimeout(function() { 268 | bloomClient.multiSafe(filterName, ['monkey'], function(error, data) { 269 | test.deepEqual(data, { 270 | monkey: false 271 | }) 272 | }) 273 | 274 | bloomClient.bulk(filterName, ['monkey', 'magic', 'muppet'], function(error, data) { 275 | test.deepEqual(data, { 276 | monkey: true, 277 | magic: true, 278 | muppet: true 279 | }) 280 | }) 281 | 282 | bloomClient.multiSafe(filterName, ['magic', 'muppet', 'moonbeam'], function(error, data) { 283 | test.deepEqual(data, { 284 | magic: true, 285 | muppet: true, 286 | moonbeam: false 287 | }) 288 | }) 289 | 290 | bloomClient.bulkSafe(filterName, ['monkey', 'moonbeam'], function(error, data) { 291 | test.deepEqual(data, { 292 | monkey: false, 293 | moonbeam: true 294 | }) 295 | }) 296 | 297 | bloomClient.multi(filterName, ['monkey', 'magic', 'muppet', 'moonbeam'], function(error, data) { 298 | test.deepEqual(data, { 299 | monkey: true, 300 | magic: true, 301 | muppet: true, 302 | moonbeam: true 303 | }) 304 | }) 305 | 306 | bloomClient.drop(filterName, function() { 307 | // Drop, Multi, Create, Multi, Bulk, Multi, Bulk, Multi, Drop 308 | bloomClient.dispose() 309 | test.equals(9, bloomClient.commandsSent) 310 | test.done() 311 | }) 312 | }, DROP_THEN_CREATE_DELAY_MS) 313 | }) 314 | } 315 | 316 | 317 | /** 318 | * Tests the setting of a key on a filter that doesn't exist, in the situation 319 | * where the creation of the filter fails for some reason. 320 | * 321 | * We can simulate this by using a sufficiently low desired capacity. 322 | */ 323 | exports.setAndCreateTestFilterCannotBeCreated = function (test) { 324 | var filterName = 'set_and_create_error_creating' 325 | var bloomClient = bloom.createClient() 326 | 327 | bloomClient.drop(filterName, function (error, data) { 328 | // Same as prior test, also janky. 329 | setTimeout(function() { 330 | bloomClient.setSafe(filterName, 'monkey', function(error, data) { 331 | test.equals(error.message, 'Client Error: Bad arguments') 332 | 333 | bloomClient.drop(filterName, function() { 334 | bloomClient.dispose() 335 | test.done() 336 | }) 337 | }, { 338 | // A low capacity will cause a creation failure due to bad arguments. 339 | capacity: 100 340 | }) 341 | }, DROP_THEN_CREATE_DELAY_MS) 342 | }) 343 | } 344 | 345 | /** 346 | * Test insertion and subsequent retrieval of 235k items, into a filter initially 347 | * sized for 20k, forcing multiple resizes. We chain the multi on the callback of 348 | * the bulk in order to get accurate timings. 349 | */ 350 | exports.bulkPerformance = function (test) { 351 | var filterName = 'bulk_performance' 352 | var bloomClient = bloom.createClient() 353 | 354 | // Read in a dictionary. 355 | fs.readFile('./test/words.txt', 'utf8', function (error, data) { 356 | 357 | // Create a filter. 358 | bloomClient.create(filterName, { 359 | prob: 0.0001, 360 | capacity: 20000 361 | }) 362 | 363 | // The last line will be blank. 364 | var lines = data.split('\n') 365 | lines.pop() 366 | 367 | var bulkExpected = {} 368 | var multiExpected = {} 369 | 370 | for (var i = 0, l = lines.length; i < l; i++) { 371 | var line = lines[i] 372 | bulkExpected[line] = true 373 | multiExpected[line] = true 374 | } 375 | 376 | // There are a couple of collisions at this probability. 377 | bulkExpected['choledochotomy'] = false 378 | bulkExpected['ensnarer'] = false 379 | bulkExpected['renunciatory'] = false 380 | bulkExpected['unboundless'] = false 381 | 382 | // Insert lots of data. 383 | var bulkStart = process.hrtime() 384 | bloomClient.bulk(filterName, lines, function (error, data) { 385 | var elapsed = bloom.timer(bulkStart, 'Inserted ' + lines.length + ' items') 386 | 387 | // Totally arbitrary, but should be plenty of room on even a moderate laptop. 388 | test.ok(elapsed < 1000, 'Bulk set considered too slow') 389 | 390 | test.deepEqual(bulkExpected, data) 391 | 392 | var multiStart = process.hrtime() 393 | bloomClient.multi(filterName, lines, function (error, data) { 394 | var elapsed = bloom.timer(multiStart, 'Retrieved ' + lines.length + ' items') 395 | 396 | // Totally arbitrary, but should be plenty of room on even a moderate laptop. 397 | test.ok(elapsed < 1000, 'Multi check considered too slow') 398 | test.deepEqual(multiExpected, data) 399 | }) 400 | 401 | bloomClient.drop(filterName, function() { 402 | bloomClient.dispose() 403 | test.done() 404 | }) 405 | }) 406 | }) 407 | } 408 | 409 | /** 410 | * Test repeated calls to info, to force data buffering that will 411 | * result in incomplete lists, and give the stream transformation code a workout. 412 | */ 413 | exports.consecutiveInfo = function (test) { 414 | var filterName = 'consecutive_info' 415 | var bloomClient = bloom.createClient() 416 | 417 | // Create a filter. 418 | bloomClient.create(filterName, { 419 | prob: 0.01, 420 | capacity: 20000 421 | }) 422 | 423 | var iterations = 1000 424 | var responseCount = 0 425 | function callback(error, data) { 426 | test.equals(data.name, filterName, 'Did not get back the same thing we put it') 427 | responseCount++ 428 | } 429 | 430 | for (var i = 0; i < iterations; i++) { 431 | bloomClient.info(filterName, callback) 432 | } 433 | 434 | bloomClient.drop(filterName, function() { 435 | bloomClient.dispose() 436 | test.equals(iterations, responseCount, 'Did not iterate the correct number of times') 437 | test.done() 438 | }) 439 | } 440 | 441 | /** 442 | * Perform the canonical steps listed in the bloomd readme 443 | * 444 | * https://github.com/armon/bloomd/blob/master/README.md 445 | */ 446 | exports.canonicalTest = function (test) { 447 | var filterName = 'canonical_test' 448 | var bloomClient = bloom.createClient() 449 | 450 | bloomClient.list(null, function(error, data) { 451 | test.equals(data.length, 0, 'We have a list, somehow') 452 | }) 453 | 454 | // Create a filter. 455 | bloomClient.create(filterName, {}, function (error, data) { 456 | test.equals(data, true, 'Failed to create filter') 457 | }) 458 | 459 | bloomClient.check(filterName, 'zipzab', function (error, data) { 460 | test.deepEqual(data, false, 'zipzab should not exist') 461 | }) 462 | 463 | bloomClient.set(filterName, 'zipzab', function (error, data) { 464 | test.equals(data, true, 'zipzab should have been created') 465 | }) 466 | 467 | bloomClient.check(filterName, 'zipzab', function (error, data) { 468 | test.equals(data, true, 'zipzab should now exist') 469 | }) 470 | 471 | bloomClient.multi(filterName, ['zipzab', 'blah', 'boo'], function (error, data) { 472 | test.deepEqual(data, { 473 | zipzab: true, 474 | blah: false, 475 | boo: false 476 | }) 477 | }) 478 | 479 | bloomClient.bulk(filterName, ['zipzab', 'blah', 'boo'], function (error, data) { 480 | test.deepEqual(data, { 481 | zipzab: false, 482 | blah: true, 483 | boo: true 484 | }) 485 | }) 486 | 487 | bloomClient.multi(filterName, ['zipzab', 'blah', 'boo'], function (error, data) { 488 | test.deepEqual(data, { 489 | zipzab: true, 490 | blah: true, 491 | boo: true 492 | }) 493 | }) 494 | 495 | bloomClient.list(null, function(error, data) { 496 | test.equals(data.length, 1, 'We had a list, somehow.') 497 | test.equals(data[0].name, filterName) 498 | }) 499 | 500 | bloomClient.drop(filterName, function (error, data) { 501 | test.equals(data, true, 'Failed to drop filter') 502 | }) 503 | 504 | bloomClient.list(null, function(error, data) { 505 | bloomClient.dispose() 506 | test.equals(data.length, 0, 'We had a list, somehow.') 507 | test.done() 508 | }) 509 | 510 | } 511 | 512 | /** 513 | * Dummy test to kill the server and finish up. 514 | */ 515 | exports.stopServer = function (test) { 516 | _stopServer() 517 | test.done() 518 | } 519 | --------------------------------------------------------------------------------