├── .gitignore ├── phpstan.neon ├── .travis.yml ├── CHANGELOG.md ├── phpunit.xml ├── composer.json ├── .github └── workflows │ └── main.yml ├── LICENSE.md ├── README.md ├── tests └── TextSnippetTest.php └── src └── TextSnippet.php /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | vendor 3 | -------------------------------------------------------------------------------- /phpstan.neon: -------------------------------------------------------------------------------- 1 | includes: 2 | - vendor/phpstan/phpstan-nette/extension.neon 3 | - vendor/phpstan/phpstan-nette/rules.neon 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | php: 3 | - '7.1' 4 | - '7.2' 5 | - '7.3' 6 | - '7.4' 7 | 8 | install: 9 | - composer install 10 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 1.0.0 2 | 3 | * Initial release. 4 | 5 | 6 | # 3.0.0 7 | 8 | * Add codestyle checks and modernize code to use proper new PHP features. (thanks @janbarasek) 9 | -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | ./tests 6 | ./tests/_files 7 | 8 | 9 | 10 | src 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "swisnl/textsnippet", 3 | "description": "Create a snippet of text highlighting a given string", 4 | "type": "library", 5 | "license": "MIT", 6 | "authors": [ 7 | { 8 | "name": "Vincent Kleijnendorst", 9 | "email": "vkleijnendort@swis.nl" 10 | }, 11 | { 12 | "name": "Björn Brala", 13 | "email": "bjorn@swis.nl" 14 | } 15 | ], 16 | "minimum-stability": "stable", 17 | "require": { 18 | "php": ">=7.1" 19 | }, 20 | "require-dev": { 21 | "phpunit/phpunit": "~6.0", 22 | "phpstan/phpstan": "^0.12.18", 23 | "tracy/tracy": "^2.7", 24 | "phpstan/phpstan-nette": "^0.12.6" 25 | }, 26 | "autoload": { 27 | "psr-4": { 28 | "Swis\\": "src/" 29 | } 30 | }, 31 | "autoload-dev": { 32 | "psr-4": { 33 | "Test\\": "tests/" 34 | } 35 | }, 36 | "scripts": { 37 | "phpstan": [ 38 | "vendor/bin/phpstan analyse src -c phpstan.neon --level 6 --no-progress" 39 | ] 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Integrity check 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@master 11 | 12 | - name: Install PHP 13 | uses: shivammathur/setup-php@master 14 | with: 15 | php-version: 7.4 16 | 17 | - name: Install composer deps 18 | run: | 19 | composer create-project nette/code-checker temp/code-checker ^3 --no-progress 20 | composer create-project nette/coding-standard temp/coding-standard ^2 --no-progress 21 | 22 | # Install app deps 23 | composer install --no-interaction --prefer-dist 24 | 25 | # Check code checker and coding standards 26 | - name: Check coding standards 27 | run: | 28 | php temp/code-checker/code-checker --short-arrays --strict-types --fix --no-progress 29 | php temp/coding-standard/ecs check src --config temp/coding-standard/coding-standard-php71.yml 30 | 31 | - name: Check PHPStan rules 32 | run: composer phpstan 33 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 SWIS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SWIS Text Snippet 2 | 3 | Easy and fast way to create a snippet of text, for example for a search result. It will try and highlight the given words and give you the relevant text around it. 4 | 5 | [![Build Status](https://img.shields.io/travis/swisnl/textsnippet/master.svg)](https://travis-ci.org/swisnl/textsnippet) 6 | [![Latest Stable Version](https://img.shields.io/packagist/v/swisnl/textsnippet.svg)](https://packagist.org/packages/swisnl/textsnippet) 7 | [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://packagist.org/packages/swisnl/textsnippet) 8 | [![Buy us a tree](https://img.shields.io/badge/Treeware-%F0%9F%8C%B3-lightgreen.svg)](https://plant.treeware.earth/swisnl/textsnippet) 9 | 10 | 11 | ## Installation 12 | 13 | Just use composer to install the package. Or download and include the `TextSnipet.php` file. 14 | 15 | ``composer require swisnl/textsnippet`` 16 | 17 | 18 | ## Examples 19 | 20 | Some examples based on a 3 paragraph long Lorum ipsum text. 21 | 22 | ### Basic usage 23 | 24 | ```php 25 | $snippet = new TextSnippet() 26 | $snippet->createSnippet('Lorem', $lorumIpsum); 27 | ``` 28 | 29 | Will result in: 30 | 31 | 32 | **Lorem** ipsum dolor sit amet, consectetur adipiscing elit. ... Etiam bibendum **lorem** nec tempus sollicitudin. ... Sed in dapibus **lorem**. ... Nunc turpis ipsum, bibendum quis sodales sed, ullamcorper et **lorem**. Donec et metus hendrerit, interdum elit ut, dignissim dui. 33 | 34 | 35 | ### Setting highlight html 36 | 37 | You can set the tags surrounding the highlighted text. The `%word%` tag is required. 38 | 39 | ```php 40 | $snippet = new TextSnippet() 41 | $snippet->setHighlightTemplate('%word%') 42 | ``` 43 | 44 | ### Setting min and max words 45 | 46 | Setting min and maxwords tells the class to try and keep the number of words between the min and max. 47 | 48 | ```php 49 | // Defaults 50 | $minWords = 30; 51 | $maxWords = 100; 52 | ``` 53 | 54 | Setting min and max words. 55 | 56 | ```php 57 | $snippet = new TextSnippet() 58 | $snippet->setMinWords(10); 59 | $snippet->setMaxWords(30); 60 | ``` 61 | 62 | There is a known issue if you set min and max very close to eachother. It might not find the correct set of words/sentences to get exactly between the small gap. 63 | 64 | ## Licence 65 | 66 | The MIT License (MIT). Please see [License File](LICENSE.md) for more information. 67 | 68 | This package is [Treeware](https://treeware.earth). If you use it in production, then we ask that you [**buy the world a tree**](https://plant.treeware.earth/swisnl/textsnippet) to thank us for our work. By contributing to the Treeware forest you’ll be creating employment for local families and restoring wildlife habitats. 69 | 70 | ## SWIS :heart: Open Source 71 | 72 | [SWIS](https://www.swis.nl) is a web agency from Leiden, the Netherlands. We love working with open source software. 73 | -------------------------------------------------------------------------------- /tests/TextSnippetTest.php: -------------------------------------------------------------------------------- 1 | createSnippet('Lorem', $this->lorumIpsum); 33 | $this->assertEquals('Lorem ipsum dolor sit amet, consectetur adipiscing elit. ... Etiam bibendum lorem nec tempus sollicitudin. ... Sed in dapibus lorem. ... Nunc turpis ipsum, bibendum quis sodales sed, ullamcorper et lorem. Donec et metus hendrerit, interdum elit ut, dignissim dui.', $result); 34 | } 35 | 36 | 37 | public function testCorrectHighlight() 38 | { 39 | $snippet = new TextSnippet(); 40 | $snippet->setHighlightTemplate('%word%'); 41 | $result = $snippet->createSnippet('ultrices', $this->lorumIpsum); 42 | 43 | $this->assertEquals('Ut faucibus at nulla a ultrices. ... Nullam ultrices magna ut porta pellentesque. ... Fusce orci quam, faucibus non sem nec, tempor ultrices nibh. Mauris non pharetra leo. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.', $result); 44 | } 45 | 46 | 47 | public function testCorrectDisabledHighlight() 48 | { 49 | $snippet = new TextSnippet(); 50 | $result = $snippet->createSnippet('ultrices', $this->lorumIpsum, false); 51 | $this->assertEquals('Ut faucibus at nulla a ultrices. ... Nullam ultrices magna ut porta pellentesque. ... Fusce orci quam, faucibus non sem nec, tempor ultrices nibh. Mauris non pharetra leo. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.', $result); 52 | } 53 | 54 | 55 | public function testWordVariableIsRequired() 56 | { 57 | $this->expectException('RuntimeException'); 58 | 59 | $snippet = new TextSnippet(); 60 | $snippet->setHighlightTemplate('my broken template'); 61 | } 62 | 63 | 64 | public function minMaxWordsProvider() 65 | { 66 | return [ 67 | [10, 30], 68 | [5, 10], 69 | [30, 40], 70 | [50, 60], 71 | ]; 72 | } 73 | 74 | 75 | /** 76 | * @dataProvider minMaxWordsProvider 77 | * @param $minWords 78 | * @param $maxWords 79 | */ 80 | public function testMinAndMax($minWords, $maxWords) 81 | { 82 | $snippet = new TextSnippet(); 83 | $snippet->setMinWords($minWords); 84 | $snippet->setMaxWords($maxWords); 85 | $result = $snippet->createSnippet('urna', $this->lorumIpsum, false); 86 | $this->assertGreaterThan($minWords, str_word_count($result, 0, implode('', $this->specialChars))); 87 | $this->assertLessThan($maxWords, str_word_count($result, 0, implode('', $this->specialChars))); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/TextSnippet.php: -------------------------------------------------------------------------------- 1 | %word%'; 17 | 18 | /** @var int */ 19 | protected $minWords = 30; 20 | 21 | /** @var int */ 22 | protected $maxWords = 100; 23 | 24 | 25 | /** 26 | * Break a text into sentences 27 | * 28 | * @param string $text 29 | * @return string[] 30 | */ 31 | public function breakIntoSentences(string $text): array 32 | { 33 | return preg_split('/(?<=[.?!;:])\s+/', $text, -1, PREG_SPLIT_NO_EMPTY); 34 | } 35 | 36 | 37 | /** 38 | * Set the minimum number of words, returned in the snippet 39 | * 40 | * @param int $minWords 41 | * @return TextSnippet 42 | */ 43 | public function setMinWords(int $minWords): self 44 | { 45 | $this->minWords = $minWords; 46 | 47 | return $this; 48 | } 49 | 50 | 51 | /** 52 | * Set the maximum number of words, returned in the snippet 53 | * 54 | * @param int $maxWords 55 | * @return TextSnippet 56 | */ 57 | public function setMaxWords(int $maxWords): self 58 | { 59 | $this->maxWords = $maxWords; 60 | 61 | return $this; 62 | } 63 | 64 | 65 | /** 66 | * Set the template for the highlighting, for example '%word%' 67 | * 68 | * @param string $template 69 | * @return TextSnippet 70 | * @throws \RuntimeException 71 | */ 72 | public function setHighlightTemplate(string $template): self 73 | { 74 | if (strpos($template, '%word%') === false) { 75 | throw new \RuntimeException('HighlightTemplate should contain "%word%"'); 76 | } 77 | $this->highlightTemplate = $template; 78 | 79 | return $this; 80 | } 81 | 82 | 83 | /** 84 | * Highlight words, while keeping casing and accents 85 | * 86 | * @param string $query 87 | * @param string $text 88 | * @return string 89 | */ 90 | public function highlightMatches(string $query, string $text): string 91 | { 92 | $queryWords = str_word_count($query, 1, implode('', $this->specialChars)); 93 | $snippetWords = str_word_count(str_replace('-', ' ', $text), 1, implode('', $this->specialChars)); 94 | $replaces = []; 95 | foreach ($queryWords as $word) { 96 | foreach ($snippetWords as $snippetWord) { 97 | // case-insensitive matching. accent-insensitive matching 98 | if (strtolower(str_replace($this->specialChars, $this->specialReplaces, $word)) === 99 | strtolower(str_replace($this->specialChars, $this->specialReplaces, $snippetWord))) { 100 | $replaces['/\b' . preg_quote($snippetWord, '/') . '\b/'] = str_replace('%word%', $snippetWord, $this->highlightTemplate); 101 | } 102 | } 103 | } 104 | 105 | return preg_replace(array_keys($replaces), array_values($replaces), $text); 106 | } 107 | 108 | 109 | /** 110 | * Create the snippet and highlight matched words 111 | * 112 | * @param string $query 113 | * @param string $text 114 | * @param bool $highlight 115 | * @return string 116 | */ 117 | public function createSnippet(string $query, string $text, bool $highlight = true) 118 | { 119 | $query = htmlspecialchars($query); 120 | $text = strip_tags($text); 121 | $sentences = $this->breakIntoSentences($text); 122 | $matchedSentences = $this->getMatchedSentences($query, $sentences); 123 | $result = ''; 124 | $wordCounter = 0; 125 | $lastKey = key($matchedSentences) - 1; 126 | foreach ($matchedSentences as $key => $sentence) { 127 | $wordCounter += str_word_count($sentence, 0, implode('', $this->specialChars)); 128 | if ($wordCounter < $this->maxWords || $result === '') { 129 | if ((int) $key !== $lastKey + 1) { 130 | // if this sentence is not the next sentence, add ' ... ' 131 | $result .= ' ...'; 132 | } 133 | $result .= ' ' . $sentence; 134 | } 135 | $lastKey = (int) $key; 136 | } 137 | 138 | // Matched text is smaller than [minWords]. Try to add next sentences 139 | while ($wordCounter < $this->minWords && isset($sentences[$lastKey + 1]) && str_word_count($sentences[$lastKey + 1], 0, implode('', $this->specialChars)) + $wordCounter < $this->maxWords) { 140 | $result .= ' ' . $sentences[$lastKey + 1]; 141 | $wordCounter += str_word_count($sentences[$lastKey + 1], 0, implode('', $this->specialChars)); 142 | $lastKey++; 143 | } 144 | 145 | // Matched text is possibly still to small. Try to add sentences before the first sentence 146 | $firstKey = key($matchedSentences); 147 | while ($wordCounter < $this->minWords && isset($sentences[$firstKey - 1]) && str_word_count($sentences[$firstKey - 1], 0, implode('', $this->specialChars)) + $wordCounter < $this->maxWords) { 148 | // add this sentence before the current result 149 | $result = $sentences[$firstKey - 1] . ' ' . $result; 150 | $wordCounter += str_word_count($sentences[$firstKey - 1], 0, implode('', $this->specialChars)); 151 | $firstKey--; 152 | } 153 | 154 | if ($highlight === true) { 155 | return $this->highlightMatches($query, trim($result)); 156 | } 157 | 158 | return trim($result); 159 | } 160 | 161 | 162 | /** 163 | * Returns an array of matched sentences against the words in the query 164 | * Keys are the original sentence positions 165 | * 166 | * @param string $query 167 | * @param string[] $sentences 168 | * @return string[] 169 | */ 170 | protected function getMatchedSentences(string $query, array $sentences): array 171 | { 172 | $queryWords = str_word_count($query, 1, implode('', $this->specialChars)); 173 | $matchedSentences = []; 174 | foreach ($queryWords as $word) { 175 | foreach ($sentences as $key => $sentence) { 176 | if (preg_match('/\b' . preg_quote(str_replace($this->specialChars, $this->specialReplaces, $word), '/') . '\b/i', str_replace($this->specialChars, $this->specialReplaces, $sentence))) { 177 | // if word is matched in this sentence (word boundary) 178 | $matchedSentences[$key] = $sentence; 179 | } 180 | } 181 | } 182 | 183 | ksort($matchedSentences); 184 | 185 | return $matchedSentences; 186 | } 187 | } 188 | --------------------------------------------------------------------------------