├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── Readme.md ├── index.js ├── package.json └── test └── index.js /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Testing 2 | 3 | on: [ push, pull_request ] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-16.04 8 | strategy: 9 | matrix: 10 | node: [ '12', '14' ] 11 | name: Node ${{ matrix.node }} sample 12 | steps: 13 | - uses: actions/checkout@v2 14 | - uses: actions/setup-node@v2 15 | with: 16 | node-version: ${{ matrix.node }} 17 | - run: npm install 18 | - uses: paambaati/codeclimate-action@v2.7.5 19 | if: ${{ matrix.node == '14' }} 20 | env: 21 | CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} 22 | with: 23 | coverageCommand: npm run ci -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | .nyc_output 3 | coverage 4 | package-lock.json 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2021 Martin Heidegger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | [](https://travis-ci.org/martinheidegger/excerpt-html) 2 | [](http://standardjs.com/) 3 | [](https://codeclimate.com/github/martinheidegger/excerpt-html/maintainability) 4 | [](https://codeclimate.com/github/martinheidegger/excerpt-html/test_coverage) 5 | 6 | # excerpt-html 7 | 8 | parses a given html text for a good excerpt. 9 | 10 | # Install 11 | 12 | ``` 13 | $ npm i excerpt-html --save 14 | ``` 15 | 16 | # API usage 17 | 18 | ```JavaScript 19 | var htmlCode = '
Hello world
'; 20 | var excerptHtml = require('excerpt-html'); 21 | var excerpt = excerptHtml(htmlCode); 22 | ``` 23 | 24 | It will either use the first found paragraph or everything up to a 25 | 26 | `` 27 | 28 | # Options 29 | 30 | You can specify a few options that modify the way the excerpt is parsed: 31 | 32 | ``` JavaScript 33 | excerptHtml(htmlCode, { 34 | moreRegExp: /\s*/i, // Search for the slug 35 | stripTags: true, // Set to false to get html code 36 | pruneLength: 140, // Amount of characters that the excerpt should contain 37 | pruneString: '…', // Character that will be added to the pruned string 38 | pruneSeparator: ' ', // Separator to be used to separate words 39 | }) 40 | ``` 41 | 42 | Note: `pruneLength` and `prunestring` only work when `stripTags` is set to `true` (default). 43 | 44 | # History 45 | 46 | To make this project we detached the code of [metalsmith-better-excerpts](https://github.com/simbo/metalsmith-better-excerpts) from `metalsmith`. 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | /** 3 | * Extracted from https://github.com/simbo/metalsmith-better-excerpts 4 | * (published under MIT license) 5 | */ 6 | 7 | const cheerio = require('cheerio') 8 | const unescapeHTML = require('he').unescape 9 | const stripTags = require('striptags') 10 | const truncate = require('lodash.truncate') 11 | 12 | /** 13 | * retrieve excerpt from file object by extracting contents until a 'more' tag 14 | * @param {string} html file object 15 | * @param {RegExp} regExp 'more' tag regexp 16 | * @return {string} excerpt string or undefined 17 | */ 18 | function getExcerptByMoreTag (html, regExp) { 19 | html = cheerio.load('Hello world
'), 'Hello world') 8 | t.end() 9 | }) 10 | 11 | test('non html example', function (t) { 12 | t.equal(excerptHtml('Hello world'), 'Hello world') 13 | t.end() 14 | }) 15 | 16 | test('more section without html tags', function (t) { 17 | t.equal(excerptHtml('Fancy text it is more than I need Is still here'), 'Fancy text it is more than I need') 18 | t.end() 19 | }) 20 | 21 | test('more section without html tags doesnt work with or without spaces', function (t) { 22 | t.equal(excerptHtml('Fancy text it is more than I need Is still here'), 'Fancy text it is more than I need') 23 | t.end() 24 | }) 25 | 26 | test('cut off by word at default', function (t) { 27 | t.equal(excerptHtml('Hello you', { 28 | pruneLength: 8, 29 | pruneString: '' 30 | }), 'Hello') 31 | t.end() 32 | }) 33 | 34 | test('cut off characters without prune separator', function (t) { 35 | t.equal(excerptHtml('Hello you', { 36 | pruneLength: 8, 37 | pruneString: '', 38 | pruneSeparator: '' 39 | }), 'Hello yo') 40 | t.end() 41 | }) 42 | 43 | test('cut off characters without prune separator', function (t) { 44 | t.equal(excerptHtml('Hello you', { 45 | pruneLength: 8, 46 | pruneSeparator: '' 47 | }), 'Hello y…') 48 | t.end() 49 | }) 50 | 51 | test('strip html tags', function (t) { 52 | t.equal(excerptHtml('This is a fancy world, I think it might be weird to ask me.
', { 53 | stripTags: false 54 | }), 'This is a fancy world, I think it might be weird to ask me.') 55 | t.end() 56 | }) 57 | 58 | test('cropping stripped html tags doesnt work', function (t) { 59 | t.equal(excerptHtml('This is a fancy world, I think it might be weird to ask me.
', { 60 | stripTags: false, 61 | pruneLength: 16 62 | }), 'This is a fancy world, I think it might be weird to ask me.') 63 | t.end() 64 | }) 65 | 66 | test('cropping stripped html tags doesnt work', function (t) { 67 | t.equal(excerptHtml('Hello World
This is not taken
', { 68 | stripTags: false, 69 | pruneLength: 16 70 | }), 'Hello World') 71 | t.end() 72 | }) 73 | 74 | test('cropping stripped html tags doesnt work', function (t) { 75 | t.equal(excerptHtml('Hello World
This is not taken
', { 76 | stripTags: false, 77 | pruneLength: 50 78 | }), 'Hello World') 79 | t.end() 80 | }) 81 | 82 | test('unescaping should work for all characters', function (t) { 83 | t.equal(excerptHtml('Hello & World ö ♥'), 'Hello & World ö ♥') 84 | t.end() 85 | }) 86 | 87 | test('dont prune text if pruneLength is < 1', function (t) { 88 | const longString = 'This is text. This text is longer than 140 characters, the default value for' + 89 | 'this method. If pruneLength is set to a number < 1 it will ignore the default' + 90 | 'limit of 140. Let us make the text a little longer.' 91 | t.equal(excerptHtml(longString, { 92 | pruneLength: -1 93 | }), longString) 94 | t.end() 95 | }) 96 | 97 | test('empty text', function (t) { 98 | t.equal(excerptHtml('', { 99 | pruneLength: -1 100 | }), '') 101 | t.end() 102 | }) 103 | 104 | test('make sure that empty tags are removed', function (t) { 105 | t.equal(excerptHtml('
test
test
' 120 | , {} 121 | ), 122 | 'test' 123 | ) 124 | t.end() 125 | }) 126 | --------------------------------------------------------------------------------