├── .gitignore ├── index.js ├── package.json ├── test ├── imgsrc.js ├── script.js ├── encode.js └── analyze.js ├── filters └── youtube.js ├── README.md └── lib └── bleach.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /* 2 | * bleach 3 | * a minimal html sanitizer 4 | * cam@onswipe.com 5 | */ 6 | 7 | module.exports = require('./lib/bleach.js'); 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Cam Pedersen (http://campedersen.com/)", 3 | "name": "bleach", 4 | "description": "A minimalistic HTML sanitizer", 5 | "version": "0.3.0", 6 | "homepage": "https://github.com/ecto/bleach/issues", 7 | "repository": { 8 | "type": "git", 9 | "url": "git://github.com/ecto/bleach.git" 10 | }, 11 | "main": "index.js", 12 | "engines": { 13 | "node": "*" 14 | }, 15 | "dependencies": { 16 | "he": "0.4.x" 17 | }, 18 | "devDependencies": { 19 | "vows": "0.5.x" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/imgsrc.js: -------------------------------------------------------------------------------- 1 | var vows = require('vows'), 2 | assert = require('assert'), 3 | bleach = require('../lib/bleach'); 4 | 5 | var HTML = '

This is an

'; 6 | 7 | vows.describe('img src tests').addBatch({ 8 | 9 | 'analyze': { 10 | topic: function () { return HTML; }, 11 | 12 | 'gets the correct img src': function ( html ) { 13 | var tags = bleach.analyze( html ); 14 | assert.equal( tags[1].attr[0].value, 'http://site.com?setting=value&othersetting=othervalue' ); 15 | } 16 | } 17 | 18 | }).export(module); 19 | -------------------------------------------------------------------------------- /filters/youtube.js: -------------------------------------------------------------------------------- 1 | module.exports = function(html) { 2 | html = String(html) || ''; 3 | 4 | var match, 5 | matches = [], 6 | regex = //gi; 7 | 8 | while (match = regex.exec(html)) { 9 | matches.push(match); 10 | } delete match; 11 | 12 | matches.forEach(function(match){ 13 | var full = match[0], 14 | id = match[4]; 15 | 16 | var rep = ''; 21 | 22 | html = html.replace(full, rep); 23 | }); 24 | 25 | return html; 26 | } 27 | 28 | -------------------------------------------------------------------------------- /test/script.js: -------------------------------------------------------------------------------- 1 | var vows = require('vows'), 2 | assert = require('assert'), 3 | bleach = require('../lib/bleach'); 4 | 5 | var HTML1 = 'This is HTML with a SCRIPT', 6 | HTML2 = 'This is HTML with a SCRIPT', 7 | HTML3 = 'This is HTML with a SCRIPT'; 8 | 9 | vows.describe('script tests').addBatch({ 10 | 11 | 'whitelist mode': { 12 | topic: function (){ return HTML1; }, 13 | 14 | 'eliminates script tags but keeps listed tags': function (HTML1){ 15 | var HTML = bleach.sanitize(HTML1, {mode: 'white', list:['a']}); 16 | assert.equal(HTML, HTML2); 17 | }, 18 | 19 | 'eliminates all tags when given an empty list': function (HTML1){ 20 | var HTML = bleach.sanitize(HTML1, {mode: 'white', list:[]}); 21 | assert.equal(HTML, HTML3); 22 | } 23 | } 24 | 25 | }).export(module); 26 | -------------------------------------------------------------------------------- /test/encode.js: -------------------------------------------------------------------------------- 1 | var vows = require('vows'), 2 | assert = require('assert'), 3 | bleach = require('../lib/bleach'); 4 | 5 | var HTML1 = 'This is HTML with a SCRIPT', 6 | HTML2 = 'This is <a href="#html">HTML</a> with a SCRIPT', 7 | HTML3 = 'This is HTML with a SCRIPT'; 8 | 9 | vows.describe('encode tests').addBatch({ 10 | 11 | 'whitelist mode': { 12 | topic: function (){ return HTML1; }, 13 | 14 | 'eliminates script tags but encodes listed tags': function (HTML1){ 15 | var HTML = bleach.sanitize(HTML1, {mode: 'white', list:['a'], encode_entities: true}); 16 | assert.equal(HTML, HTML2); 17 | }, 18 | 19 | 'eliminates all tags when given an empty list': function (HTML1){ 20 | var HTML = bleach.sanitize(HTML1, {mode: 'white', list:[], encode_entities: true}); 21 | assert.equal(HTML, HTML3); 22 | } 23 | }, 24 | 25 | 'blacklist mode': { 26 | topic: function (){ return HTML1; }, 27 | 28 | 'eliminates listed tags but encodes other tags': function (HTML1){ 29 | var HTML = bleach.sanitize(HTML1, {mode: 'black', list:['script'], encode_entities: true}); 30 | assert.equal(HTML, HTML2); 31 | } 32 | } 33 | 34 | }).export(module); 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bleach 2 | 3 | Sanitize your HTML the easy way! 4 | 5 | ![bleach](http://i.imgur.com/9qSfd.png) 6 | 7 | ## install 8 | 9 | npm install bleach 10 | 11 | ## test 12 | 13 | vows --spec test/* 14 | 15 | ## example 16 | 17 | Basic: 18 | 19 | ````javascript 20 | 21 | var bleach = require('bleach'); 22 | 23 | var html = bleach.sanitize(aBunchOfHTML); 24 | 25 | console.log(html); 26 | ```` 27 | 28 | Advanced: 29 | 30 | ````javascript 31 | 32 | var bleach = require('bleach'); 33 | 34 | var whitelist = [ 35 | 'a', 36 | 'b', 37 | 'i', 38 | 'em', 39 | 'strong' 40 | ] 41 | 42 | var options = { 43 | mode: 'white', 44 | list: whitelist 45 | } 46 | 47 | var html = bleach.sanitize(aBunchOfHTML, options); 48 | 49 | console.log(html); 50 | ```` 51 | 52 | ## usage 53 | 54 | ### bleach.sanitize(html, options) 55 | 56 | Runs HTML through sanitizer and returns sanitized HTML as string. 57 | 58 | `options` may contain the following optional attributes: 59 | 60 | * `mode` may be set to `'white'` or `'black'` 61 | * `list` is an array containing tags to match against 62 | 63 | `white`mode will remove all tags from `html`, excluding those in `list` 64 | 65 | `black`mode will remove all tags found in `list` that are found in `html` 66 | 67 | ### bleach.analyze(html) 68 | 69 | Will extract all tags from HTML and return an array of JSON objects. Example return: 70 | 71 | ````javascript 72 | [ 73 | { 74 | full: '
', 75 | name: 'div', 76 | attr: [ 77 | "id": "post-119477" 78 | ] 79 | }, 80 | ... 81 | ] 82 | ```` 83 | 84 | ### bleach.filter(html, filters) 85 | 86 | SEXY FUN TIME 87 | 88 | ````javascript 89 | 90 | var nyanFilter = function(input){ 91 | return input.replace('cats', 'nyannyannyan'); 92 | } 93 | 94 | console.log( 95 | bleach.filter('cats', nyanFilter) 96 | ); 97 | 98 | // nyannyannyan 99 | 100 | ```` 101 | 102 | ````javascript 103 | 104 | var cutFilter = function(input){ 105 | return input.slice(0, 3); 106 | } 107 | 108 | console.log( 109 | bleach.filter('cats', [ 110 | nyanFilter, 111 | cutFilter 112 | ]) 113 | ); 114 | 115 | // nyan 116 | 117 | ```` 118 | 119 | You may also define longer filters and include them in the ./node_modules/bleach/filters directory. 120 | A sample filter is included to convert YouTube flash embed objects to iDevice-compatible YouTube iframes. 121 | 122 | ````javascript 123 | 124 | var html = ''; 125 | 126 | console.log( 127 | bleach.filter(html, 'youtube') 128 | ); 129 | 130 | // 131 | 132 | ```` 133 | 134 | Refer to the filters directory for the template. 135 | 136 | ## disclaimer 137 | 138 | This is not a port of the Python **bleach** library - in fact their implementations are very different. 139 | 140 | ## license 141 | 142 | (The MIT License) 143 | 144 | Copyright (c) 2011 Cam Pedersen 145 | 146 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 147 | 148 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 149 | 150 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 151 | -------------------------------------------------------------------------------- /test/analyze.js: -------------------------------------------------------------------------------- 1 | var vows = require('vows'), 2 | assert = require('assert'); 3 | bleach = require('../lib/bleach'); 4 | 5 | vows.describe('bleach').addBatch({ 6 | 7 | 'bleach.analyze(html)': { 8 | topic: function(){ return bleach.analyze; }, 9 | 'is a function': function(analyze) { 10 | assert.equal(typeof analyze, 'function'); 11 | }, 12 | 'returns blank array on invalid or missing input': function(analyze) { 13 | assert.deepEqual(analyze({}), []); 14 | assert.deepEqual(analyze([]), []); 15 | assert.deepEqual(analyze(''), []); 16 | }, 17 | 'finds self-closing tags': function(analyze){ 18 | assert.ok(analyze('').length > 0); 19 | }, 20 | 'returns an array': function(analyze) { 21 | assert.isArray(analyze(' ')); 22 | }, 23 | 'extracts attributes': function(analyze){ 24 | assert.equal(analyze('')[0].attr[0], { name: 'type', value: '"text"' }.toString()); 25 | } 26 | }, 27 | 28 | 'bleach.sanitize(html, options)': { 29 | topic: function(){ return bleach.sanitize; }, 30 | 'is a function': function(sanitize) { 31 | assert.equal(typeof sanitize, 'function'); 32 | }, 33 | 'does not require options to be passed in': function(sanitize){ 34 | assert.doesNotThrow(function(){ 35 | sanitize(' '); 36 | }, Error); 37 | }, 38 | 'returns a string': function(sanitize) { 39 | assert.isString(sanitize(' ')); 40 | }, 41 | 'returns blank string on invalid or missing input': function(analyze) { 42 | assert.isString(analyze({})); 43 | assert.isString(analyze([])); 44 | assert.isString(analyze('')); 45 | }, 46 | 'whitelist is respected': function(sanitize){ 47 | var whitelist = ['br'], 48 | input = '

hello
world!