├── .gitignore ├── .travis.yml ├── README.md ├── index.js ├── package.json └── test ├── append.txt ├── characters.txt ├── default.txt ├── full.txt ├── index.js ├── mocha.opts ├── snippet.html └── words.txt /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "4.1" 4 | - "4.0" 5 | - "0.12" 6 | - "0.10" 7 | before_install: 8 | - npm install -g npm 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # excerpts [](https://travis-ci.org/gnowoel/excerpts) 2 | 3 | Excerpting words or characters of text from an HTML snippet. 4 | 5 | ## Installation 6 | 7 | ``` 8 | $ npm install excerpts 9 | ``` 10 | 11 | ## Usage 12 | 13 | Given HTML snippet: 14 | 15 | ``` html 16 |
Lorem ipsum dolor sit amet.
17 | ``` 18 | 19 | ### Words 20 | 21 | Excerpting words with the `words` option: 22 | 23 | ```javascript 24 | var excerpts = require('excerpts'); 25 | var text = excerpts(html, { words: 3 }); 26 | //=> Lorem ipsum dolor... 27 | ``` 28 | 29 | ### Characters 30 | 31 | Excerpting characters with the `characters` option: 32 | 33 | ```javascript 34 | var excerpts = require('excerpts'); 35 | var text = excerpts(html, { characters: 10 }); 36 | //=> Lorem ipsum dol... 37 | ``` 38 | 39 | The `words` option takes precedence over the `characters` option. By default, 50 words will be extracted when options are missing. 40 | 41 | ### Appendix 42 | 43 | The appendix can be customized with the `append` option: 44 | 45 | ```javascript 46 | var excerpts = require('excerpts'); 47 | var text = excerpts(html, { words: 3, append: ' >>' }); 48 | //=> Lorem ipsum dolor >> 49 | ``` 50 | 51 | The appendix won't appear when full text has been extracted. 52 | 53 | ## Tests 54 | 55 | ``` 56 | $ npm install 57 | $ npm test 58 | ``` 59 | 60 | ## License 61 | 62 | MIT 63 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var $ = require('cheerio'); 2 | 3 | function excerpts(html, opts) { 4 | html = String(html); 5 | opts = prepare(opts); 6 | 7 | var text = $('').html(html).text().trim() 8 | .replace(/(\r\n|\r|\n|\s)+/g, ' '); 9 | 10 | var excerpt = ''; 11 | 12 | if (opts.characters != null) { 13 | excerpt = text.slice(0, opts.characters); 14 | } 15 | 16 | if (opts.words != null) { 17 | excerpt = text.split(' ').slice(0, opts.words).join(' '); 18 | } 19 | 20 | if (excerpt.length < text.length) { 21 | excerpt += opts.append; 22 | } 23 | 24 | return excerpt; 25 | } 26 | 27 | function prepare(opts) { 28 | opts = opts || {}; 29 | 30 | if (opts.append == null) { 31 | opts.append = '...'; 32 | } 33 | 34 | if (!opts.words && !opts.characters) { 35 | opts.words = 50; 36 | } 37 | 38 | if (opts.words && opts.characters) { 39 | delete opts.characters; 40 | } 41 | 42 | if (opts.words != null) { 43 | opts.words = parseInt(opts.words, 10); 44 | } 45 | 46 | if (opts.characters != null) { 47 | opts.characters = parseInt(opts.characters, 10); 48 | } 49 | 50 | return opts; 51 | } 52 | 53 | module.exports = excerpts; 54 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "excerpts", 3 | "version": "0.0.3", 4 | "description": "Excerpting text of given words or characters from HTML.", 5 | "license": "MIT", 6 | "repository": { 7 | "url": "https://github.com/gnowoel/excerpts" 8 | }, 9 | "scripts": { 10 | "test": "NODE_ENV=test mocha" 11 | }, 12 | "keywords": [ 13 | "excerpt", 14 | "truncate", 15 | "convert", 16 | "html", 17 | "text", 18 | "words", 19 | "characters" 20 | ], 21 | "dependencies": { 22 | "cheerio": "^0.22.0" 23 | }, 24 | "devDependencies": { 25 | "chai": "*", 26 | "mocha": "*" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /test/append.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, test link adipiscing elit. This >> 2 | -------------------------------------------------------------------------------- /test/characters.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, te... 2 | -------------------------------------------------------------------------------- /test/default.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, test link adipiscing elit. This is strong. Nullam dignissim convallis est. Quisque aliquam. This is emphasized. Donec faucibus. Nunc iaculis suscipit dui. 53 = 125. Water is H2O. Nam sit amet sem. Aliquam libero nisi, imperdiet at, tincidunt nec, gravida vehicula, nisl. The New York... 2 | -------------------------------------------------------------------------------- /test/full.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, test link adipiscing elit. This is strong. Nullam dignissim convallis est. Quisque aliquam. This is emphasized. Donec faucibus. Nunc iaculis suscipit dui. 53 = 125. Water is H2O. Nam sit amet sem. Aliquam libero nisi, imperdiet at, tincidunt nec, gravida vehicula, nisl. The New York Times (That’s a citation). Underline. Maecenas ornare tortor. Donec sed tellus eget sapien fringilla nonummy. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus. HTML and CSS are our tools. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus. Praesent mattis, massa quis luctus fermentum, turpis mi volutpat justo, eu volutpat enim diam eget metus. To copy a file type COPY filename. Dinner’s at 5:00. Let’s make that 7. This text has been struck. 2 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var path = require('path'); 3 | var assert = require('chai').assert; 4 | var excerpts = require('..'); 5 | 6 | var html = fs.readFileSync(path.join(__dirname, 'snippet.html'), 'utf8'); 7 | 8 | describe('excerpts(html, opts)', function() { 9 | it('should extract 50 words by default', function(done) { 10 | var text = fs.readFileSync(path.join(__dirname, 'default.txt'), 'utf8'); 11 | var excerpt = excerpts(html); 12 | 13 | assert.equal(excerpt, text.trim()); 14 | 15 | done(); 16 | }); 17 | 18 | it('should extract specified words', function(done) { 19 | var text = fs.readFileSync(path.join(__dirname, 'words.txt'), 'utf8'); 20 | var excerpt = excerpts(html, { words: 10 }); 21 | 22 | assert.equal(excerpt, text.trim()); 23 | 24 | done(); 25 | }); 26 | 27 | it('should accept either number or string options', function(done) { 28 | var text = fs.readFileSync(path.join(__dirname, 'words.txt'), 'utf8'); 29 | var excerpt = excerpts(html, { words: '10' }); 30 | 31 | assert.equal(excerpt, text.trim()); 32 | 33 | done(); 34 | }); 35 | 36 | it('should extract specified characters', function(done) { 37 | var text = fs.readFileSync(path.join(__dirname, 'characters.txt'), 'utf8'); 38 | var excerpt = excerpts(html, { characters: 30 }); 39 | 40 | assert.equal(excerpt, text.trim()); 41 | 42 | done(); 43 | }); 44 | 45 | it('should honor words over characters', function(done) { 46 | var text = fs.readFileSync(path.join(__dirname, 'words.txt'), 'utf8'); 47 | var excerpt = excerpts(html, { characters: 30, words: 10 }); 48 | 49 | assert.equal(excerpt, text.trim()); 50 | 51 | done(); 52 | }); 53 | 54 | if ('should replace newlines with a space', function(done) { 55 | var text = fs.readFileSync(path.join(__dirname, 'full.txt'), 'utf8'); 56 | var excerpt = excerpts(html, { words: 10000 }); 57 | 58 | assert.equal(excerpt, text.trim()); 59 | assert.euqal(/\n/.test(excerpt), false); 60 | 61 | done(); 62 | }); 63 | 64 | it('should append ellipses to an excerpt by default', function(done) { 65 | var text = fs.readFileSync(path.join(__dirname, 'words.txt'), 'utf8'); 66 | var excerpt = excerpts(html, { words: 10 }); 67 | 68 | assert.equal(excerpt, text.trim()); 69 | assert.equal(/\.{3}$/.test(excerpt.trim()). true); 70 | 71 | done(); 72 | }); 73 | 74 | it('should customize appendix', function(done) { 75 | var text = fs.readFileSync(path.join(__dirname, 'append.txt'), 'utf8'); 76 | var excerpt = excerpts(html, { words: 10, append: ' >>' }); 77 | 78 | assert.equal(excerpt, text.trim()); 79 | assert.equal(/ >>$/.test(excerpt.trim()). true); 80 | 81 | done(); 82 | }); 83 | 84 | 85 | it('should omit trailing ellipses for full text', function(done) { 86 | var text = fs.readFileSync(path.join(__dirname, 'full.txt'), 'utf8'); 87 | var excerpt = excerpts(html, { words: 10000 }); 88 | 89 | assert.equal(excerpt, text.trim()); 90 | assert.equal(/\.{3}$/.test(excerpt). false); 91 | 92 | done(); 93 | }); 94 | }); 95 | -------------------------------------------------------------------------------- /test/mocha.opts: -------------------------------------------------------------------------------- 1 | --bail 2 | -------------------------------------------------------------------------------- /test/snippet.html: -------------------------------------------------------------------------------- 1 |
Lorem ipsum dolor sit amet, test link adipiscing elit. This is strong. Nullam dignissim convallis est. Quisque aliquam. This is emphasized. Donec faucibus. Nunc iaculis suscipit dui. 53 = 125. Water is H2O. Nam sit amet sem. Aliquam libero nisi, imperdiet at, tincidunt nec, gravida vehicula, nisl. The New York Times (That’s a citation). Underline. Maecenas ornare tortor. Donec sed tellus eget sapien fringilla nonummy. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus.
2 | 3 |HTML and CSS are our tools. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus. Praesent mattis, massa quis luctus fermentum, turpis mi volutpat justo, eu volutpat enim diam eget metus. To copy a file type COPY filename
. Dinner’s at 5:00. Let’s make that 7. This text has been struck.