├── .gitignore
├── phpstan.neon
├── .travis.yml
├── CHANGELOG.md
├── phpunit.xml
├── composer.json
├── .github
└── workflows
│ └── main.yml
├── LICENSE.md
├── README.md
├── tests
└── TextSnippetTest.php
└── src
└── TextSnippet.php
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | vendor
3 |
--------------------------------------------------------------------------------
/phpstan.neon:
--------------------------------------------------------------------------------
1 | includes:
2 | - vendor/phpstan/phpstan-nette/extension.neon
3 | - vendor/phpstan/phpstan-nette/rules.neon
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: php
2 | php:
3 | - '7.1'
4 | - '7.2'
5 | - '7.3'
6 | - '7.4'
7 |
8 | install:
9 | - composer install
10 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 1.0.0
2 |
3 | * Initial release.
4 |
5 |
6 | # 3.0.0
7 |
8 | * Add codestyle checks and modernize code to use proper new PHP features. (thanks @janbarasek)
9 |
--------------------------------------------------------------------------------
/phpunit.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 | ./tests
6 | ./tests/_files
7 |
8 |
9 |
10 | src
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "swisnl/textsnippet",
3 | "description": "Create a snippet of text highlighting a given string",
4 | "type": "library",
5 | "license": "MIT",
6 | "authors": [
7 | {
8 | "name": "Vincent Kleijnendorst",
9 | "email": "vkleijnendort@swis.nl"
10 | },
11 | {
12 | "name": "Björn Brala",
13 | "email": "bjorn@swis.nl"
14 | }
15 | ],
16 | "minimum-stability": "stable",
17 | "require": {
18 | "php": ">=7.1"
19 | },
20 | "require-dev": {
21 | "phpunit/phpunit": "~6.0",
22 | "phpstan/phpstan": "^0.12.18",
23 | "tracy/tracy": "^2.7",
24 | "phpstan/phpstan-nette": "^0.12.6"
25 | },
26 | "autoload": {
27 | "psr-4": {
28 | "Swis\\": "src/"
29 | }
30 | },
31 | "autoload-dev": {
32 | "psr-4": {
33 | "Test\\": "tests/"
34 | }
35 | },
36 | "scripts": {
37 | "phpstan": [
38 | "vendor/bin/phpstan analyse src -c phpstan.neon --level 6 --no-progress"
39 | ]
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: Integrity check
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - uses: actions/checkout@master
11 |
12 | - name: Install PHP
13 | uses: shivammathur/setup-php@master
14 | with:
15 | php-version: 7.4
16 |
17 | - name: Install composer deps
18 | run: |
19 | composer create-project nette/code-checker temp/code-checker ^3 --no-progress
20 | composer create-project nette/coding-standard temp/coding-standard ^2 --no-progress
21 |
22 | # Install app deps
23 | composer install --no-interaction --prefer-dist
24 |
25 | # Check code checker and coding standards
26 | - name: Check coding standards
27 | run: |
28 | php temp/code-checker/code-checker --short-arrays --strict-types --fix --no-progress
29 | php temp/coding-standard/ecs check src --config temp/coding-standard/coding-standard-php71.yml
30 |
31 | - name: Check PHPStan rules
32 | run: composer phpstan
33 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 SWIS
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SWIS Text Snippet
2 |
3 | Easy and fast way to create a snippet of text, for example for a search result. It will try and highlight the given words and give you the relevant text around it.
4 |
5 | [](https://travis-ci.org/swisnl/textsnippet)
6 | [](https://packagist.org/packages/swisnl/textsnippet)
7 | [](https://packagist.org/packages/swisnl/textsnippet)
8 | [](https://plant.treeware.earth/swisnl/textsnippet)
9 |
10 |
11 | ## Installation
12 |
13 | Just use composer to install the package. Or download and include the `TextSnipet.php` file.
14 |
15 | ``composer require swisnl/textsnippet``
16 |
17 |
18 | ## Examples
19 |
20 | Some examples based on a 3 paragraph long Lorum ipsum text.
21 |
22 | ### Basic usage
23 |
24 | ```php
25 | $snippet = new TextSnippet()
26 | $snippet->createSnippet('Lorem', $lorumIpsum);
27 | ```
28 |
29 | Will result in:
30 |
31 |
32 | **Lorem** ipsum dolor sit amet, consectetur adipiscing elit. ... Etiam bibendum **lorem** nec tempus sollicitudin. ... Sed in dapibus **lorem**. ... Nunc turpis ipsum, bibendum quis sodales sed, ullamcorper et **lorem**. Donec et metus hendrerit, interdum elit ut, dignissim dui.
33 |
34 |
35 | ### Setting highlight html
36 |
37 | You can set the tags surrounding the highlighted text. The `%word%` tag is required.
38 |
39 | ```php
40 | $snippet = new TextSnippet()
41 | $snippet->setHighlightTemplate('%word%')
42 | ```
43 |
44 | ### Setting min and max words
45 |
46 | Setting min and maxwords tells the class to try and keep the number of words between the min and max.
47 |
48 | ```php
49 | // Defaults
50 | $minWords = 30;
51 | $maxWords = 100;
52 | ```
53 |
54 | Setting min and max words.
55 |
56 | ```php
57 | $snippet = new TextSnippet()
58 | $snippet->setMinWords(10);
59 | $snippet->setMaxWords(30);
60 | ```
61 |
62 | There is a known issue if you set min and max very close to eachother. It might not find the correct set of words/sentences to get exactly between the small gap.
63 |
64 | ## Licence
65 |
66 | The MIT License (MIT). Please see [License File](LICENSE.md) for more information.
67 |
68 | This package is [Treeware](https://treeware.earth). If you use it in production, then we ask that you [**buy the world a tree**](https://plant.treeware.earth/swisnl/textsnippet) to thank us for our work. By contributing to the Treeware forest you’ll be creating employment for local families and restoring wildlife habitats.
69 |
70 | ## SWIS :heart: Open Source
71 |
72 | [SWIS](https://www.swis.nl) is a web agency from Leiden, the Netherlands. We love working with open source software.
73 |
--------------------------------------------------------------------------------
/tests/TextSnippetTest.php:
--------------------------------------------------------------------------------
1 | createSnippet('Lorem', $this->lorumIpsum);
33 | $this->assertEquals('Lorem ipsum dolor sit amet, consectetur adipiscing elit. ... Etiam bibendum lorem nec tempus sollicitudin. ... Sed in dapibus lorem. ... Nunc turpis ipsum, bibendum quis sodales sed, ullamcorper et lorem. Donec et metus hendrerit, interdum elit ut, dignissim dui.', $result);
34 | }
35 |
36 |
37 | public function testCorrectHighlight()
38 | {
39 | $snippet = new TextSnippet();
40 | $snippet->setHighlightTemplate('%word%');
41 | $result = $snippet->createSnippet('ultrices', $this->lorumIpsum);
42 |
43 | $this->assertEquals('Ut faucibus at nulla a ultrices. ... Nullam ultrices magna ut porta pellentesque. ... Fusce orci quam, faucibus non sem nec, tempor ultrices nibh. Mauris non pharetra leo. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.', $result);
44 | }
45 |
46 |
47 | public function testCorrectDisabledHighlight()
48 | {
49 | $snippet = new TextSnippet();
50 | $result = $snippet->createSnippet('ultrices', $this->lorumIpsum, false);
51 | $this->assertEquals('Ut faucibus at nulla a ultrices. ... Nullam ultrices magna ut porta pellentesque. ... Fusce orci quam, faucibus non sem nec, tempor ultrices nibh. Mauris non pharetra leo. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.', $result);
52 | }
53 |
54 |
55 | public function testWordVariableIsRequired()
56 | {
57 | $this->expectException('RuntimeException');
58 |
59 | $snippet = new TextSnippet();
60 | $snippet->setHighlightTemplate('my broken template');
61 | }
62 |
63 |
64 | public function minMaxWordsProvider()
65 | {
66 | return [
67 | [10, 30],
68 | [5, 10],
69 | [30, 40],
70 | [50, 60],
71 | ];
72 | }
73 |
74 |
75 | /**
76 | * @dataProvider minMaxWordsProvider
77 | * @param $minWords
78 | * @param $maxWords
79 | */
80 | public function testMinAndMax($minWords, $maxWords)
81 | {
82 | $snippet = new TextSnippet();
83 | $snippet->setMinWords($minWords);
84 | $snippet->setMaxWords($maxWords);
85 | $result = $snippet->createSnippet('urna', $this->lorumIpsum, false);
86 | $this->assertGreaterThan($minWords, str_word_count($result, 0, implode('', $this->specialChars)));
87 | $this->assertLessThan($maxWords, str_word_count($result, 0, implode('', $this->specialChars)));
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/TextSnippet.php:
--------------------------------------------------------------------------------
1 | %word%';
17 |
18 | /** @var int */
19 | protected $minWords = 30;
20 |
21 | /** @var int */
22 | protected $maxWords = 100;
23 |
24 |
25 | /**
26 | * Break a text into sentences
27 | *
28 | * @param string $text
29 | * @return string[]
30 | */
31 | public function breakIntoSentences(string $text): array
32 | {
33 | return preg_split('/(?<=[.?!;:])\s+/', $text, -1, PREG_SPLIT_NO_EMPTY);
34 | }
35 |
36 |
37 | /**
38 | * Set the minimum number of words, returned in the snippet
39 | *
40 | * @param int $minWords
41 | * @return TextSnippet
42 | */
43 | public function setMinWords(int $minWords): self
44 | {
45 | $this->minWords = $minWords;
46 |
47 | return $this;
48 | }
49 |
50 |
51 | /**
52 | * Set the maximum number of words, returned in the snippet
53 | *
54 | * @param int $maxWords
55 | * @return TextSnippet
56 | */
57 | public function setMaxWords(int $maxWords): self
58 | {
59 | $this->maxWords = $maxWords;
60 |
61 | return $this;
62 | }
63 |
64 |
65 | /**
66 | * Set the template for the highlighting, for example '%word%'
67 | *
68 | * @param string $template
69 | * @return TextSnippet
70 | * @throws \RuntimeException
71 | */
72 | public function setHighlightTemplate(string $template): self
73 | {
74 | if (strpos($template, '%word%') === false) {
75 | throw new \RuntimeException('HighlightTemplate should contain "%word%"');
76 | }
77 | $this->highlightTemplate = $template;
78 |
79 | return $this;
80 | }
81 |
82 |
83 | /**
84 | * Highlight words, while keeping casing and accents
85 | *
86 | * @param string $query
87 | * @param string $text
88 | * @return string
89 | */
90 | public function highlightMatches(string $query, string $text): string
91 | {
92 | $queryWords = str_word_count($query, 1, implode('', $this->specialChars));
93 | $snippetWords = str_word_count(str_replace('-', ' ', $text), 1, implode('', $this->specialChars));
94 | $replaces = [];
95 | foreach ($queryWords as $word) {
96 | foreach ($snippetWords as $snippetWord) {
97 | // case-insensitive matching. accent-insensitive matching
98 | if (strtolower(str_replace($this->specialChars, $this->specialReplaces, $word)) ===
99 | strtolower(str_replace($this->specialChars, $this->specialReplaces, $snippetWord))) {
100 | $replaces['/\b' . preg_quote($snippetWord, '/') . '\b/'] = str_replace('%word%', $snippetWord, $this->highlightTemplate);
101 | }
102 | }
103 | }
104 |
105 | return preg_replace(array_keys($replaces), array_values($replaces), $text);
106 | }
107 |
108 |
109 | /**
110 | * Create the snippet and highlight matched words
111 | *
112 | * @param string $query
113 | * @param string $text
114 | * @param bool $highlight
115 | * @return string
116 | */
117 | public function createSnippet(string $query, string $text, bool $highlight = true)
118 | {
119 | $query = htmlspecialchars($query);
120 | $text = strip_tags($text);
121 | $sentences = $this->breakIntoSentences($text);
122 | $matchedSentences = $this->getMatchedSentences($query, $sentences);
123 | $result = '';
124 | $wordCounter = 0;
125 | $lastKey = key($matchedSentences) - 1;
126 | foreach ($matchedSentences as $key => $sentence) {
127 | $wordCounter += str_word_count($sentence, 0, implode('', $this->specialChars));
128 | if ($wordCounter < $this->maxWords || $result === '') {
129 | if ((int) $key !== $lastKey + 1) {
130 | // if this sentence is not the next sentence, add ' ... '
131 | $result .= ' ...';
132 | }
133 | $result .= ' ' . $sentence;
134 | }
135 | $lastKey = (int) $key;
136 | }
137 |
138 | // Matched text is smaller than [minWords]. Try to add next sentences
139 | while ($wordCounter < $this->minWords && isset($sentences[$lastKey + 1]) && str_word_count($sentences[$lastKey + 1], 0, implode('', $this->specialChars)) + $wordCounter < $this->maxWords) {
140 | $result .= ' ' . $sentences[$lastKey + 1];
141 | $wordCounter += str_word_count($sentences[$lastKey + 1], 0, implode('', $this->specialChars));
142 | $lastKey++;
143 | }
144 |
145 | // Matched text is possibly still to small. Try to add sentences before the first sentence
146 | $firstKey = key($matchedSentences);
147 | while ($wordCounter < $this->minWords && isset($sentences[$firstKey - 1]) && str_word_count($sentences[$firstKey - 1], 0, implode('', $this->specialChars)) + $wordCounter < $this->maxWords) {
148 | // add this sentence before the current result
149 | $result = $sentences[$firstKey - 1] . ' ' . $result;
150 | $wordCounter += str_word_count($sentences[$firstKey - 1], 0, implode('', $this->specialChars));
151 | $firstKey--;
152 | }
153 |
154 | if ($highlight === true) {
155 | return $this->highlightMatches($query, trim($result));
156 | }
157 |
158 | return trim($result);
159 | }
160 |
161 |
162 | /**
163 | * Returns an array of matched sentences against the words in the query
164 | * Keys are the original sentence positions
165 | *
166 | * @param string $query
167 | * @param string[] $sentences
168 | * @return string[]
169 | */
170 | protected function getMatchedSentences(string $query, array $sentences): array
171 | {
172 | $queryWords = str_word_count($query, 1, implode('', $this->specialChars));
173 | $matchedSentences = [];
174 | foreach ($queryWords as $word) {
175 | foreach ($sentences as $key => $sentence) {
176 | if (preg_match('/\b' . preg_quote(str_replace($this->specialChars, $this->specialReplaces, $word), '/') . '\b/i', str_replace($this->specialChars, $this->specialReplaces, $sentence))) {
177 | // if word is matched in this sentence (word boundary)
178 | $matchedSentences[$key] = $sentence;
179 | }
180 | }
181 | }
182 |
183 | ksort($matchedSentences);
184 |
185 | return $matchedSentences;
186 | }
187 | }
188 |
--------------------------------------------------------------------------------