├── Grapheme.php ├── LICENSE ├── README.md ├── bootstrap.php ├── bootstrap80.php └── composer.json /Grapheme.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Polyfill\Intl\Grapheme; 13 | 14 | \define('SYMFONY_GRAPHEME_CLUSTER_RX', ((float) \PCRE_VERSION < 10 ? (float) \PCRE_VERSION >= 8.32 : (float) \PCRE_VERSION >= 10.39) ? '\X' : Grapheme::GRAPHEME_CLUSTER_RX); 15 | 16 | /** 17 | * Partial intl implementation in pure PHP. 18 | * 19 | * Implemented: 20 | * - grapheme_extract - Extract a sequence of grapheme clusters from a text buffer, which must be encoded in UTF-8 21 | * - grapheme_stripos - Find position (in grapheme units) of first occurrence of a case-insensitive string 22 | * - grapheme_stristr - Returns part of haystack string from the first occurrence of case-insensitive needle to the end of haystack 23 | * - grapheme_strlen - Get string length in grapheme units 24 | * - grapheme_strpos - Find position (in grapheme units) of first occurrence of a string 25 | * - grapheme_strripos - Find position (in grapheme units) of last occurrence of a case-insensitive string 26 | * - grapheme_strrpos - Find position (in grapheme units) of last occurrence of a string 27 | * - grapheme_strstr - Returns part of haystack string from the first occurrence of needle to the end of haystack 28 | * - grapheme_substr - Return part of a string 29 | * 30 | * @author Nicolas Grekas 31 | * 32 | * @internal 33 | */ 34 | final class Grapheme 35 | { 36 | // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control]) 37 | // This regular expression is a work around for http://bugs.exim.org/1279 38 | public const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])'; 39 | 40 | private const CASE_FOLD = [ 41 | ['µ', 'ſ', "\xCD\x85", 'ς', "\xCF\x90", "\xCF\x91", "\xCF\x95", "\xCF\x96", "\xCF\xB0", "\xCF\xB1", "\xCF\xB5", "\xE1\xBA\x9B", "\xE1\xBE\xBE"], 42 | ['μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1", 'ι'], 43 | ]; 44 | 45 | public static function grapheme_extract($s, $size, $type = \GRAPHEME_EXTR_COUNT, $start = 0, &$next = 0) 46 | { 47 | if (0 > $start) { 48 | $start = \strlen($s) + $start; 49 | } 50 | 51 | if (!\is_scalar($s)) { 52 | $hasError = false; 53 | set_error_handler(function () use (&$hasError) { $hasError = true; }); 54 | $next = substr($s, $start); 55 | restore_error_handler(); 56 | if ($hasError) { 57 | substr($s, $start); 58 | $s = ''; 59 | } else { 60 | $s = $next; 61 | } 62 | } else { 63 | $s = substr($s, $start); 64 | } 65 | $size = (int) $size; 66 | $type = (int) $type; 67 | $start = (int) $start; 68 | 69 | if (\GRAPHEME_EXTR_COUNT !== $type && \GRAPHEME_EXTR_MAXBYTES !== $type && \GRAPHEME_EXTR_MAXCHARS !== $type) { 70 | if (80000 > \PHP_VERSION_ID) { 71 | return false; 72 | } 73 | 74 | throw new \ValueError('grapheme_extract(): Argument #3 ($type) must be one of GRAPHEME_EXTR_COUNT, GRAPHEME_EXTR_MAXBYTES, or GRAPHEME_EXTR_MAXCHARS'); 75 | } 76 | 77 | if (!isset($s[0]) || 0 > $size || 0 > $start) { 78 | return false; 79 | } 80 | if (0 === $size) { 81 | return ''; 82 | } 83 | 84 | $next = $start; 85 | 86 | $s = preg_split('/('.SYMFONY_GRAPHEME_CLUSTER_RX.')/u', "\r\n".$s, $size + 1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE); 87 | 88 | if (!isset($s[1])) { 89 | return false; 90 | } 91 | 92 | $i = 1; 93 | $ret = ''; 94 | 95 | do { 96 | if (\GRAPHEME_EXTR_COUNT === $type) { 97 | --$size; 98 | } elseif (\GRAPHEME_EXTR_MAXBYTES === $type) { 99 | $size -= \strlen($s[$i]); 100 | } else { 101 | $size -= iconv_strlen($s[$i], 'UTF-8//IGNORE'); 102 | } 103 | 104 | if ($size >= 0) { 105 | $ret .= $s[$i]; 106 | } 107 | } while (isset($s[++$i]) && $size > 0); 108 | 109 | $next += \strlen($ret); 110 | 111 | return $ret; 112 | } 113 | 114 | public static function grapheme_strlen($s) 115 | { 116 | preg_replace('/'.SYMFONY_GRAPHEME_CLUSTER_RX.'/u', '', $s, -1, $len); 117 | 118 | return 0 === $len && '' !== $s ? null : $len; 119 | } 120 | 121 | public static function grapheme_substr($s, $start, $len = null) 122 | { 123 | if (null === $len) { 124 | $len = 2147483647; 125 | } 126 | 127 | preg_match_all('/'.SYMFONY_GRAPHEME_CLUSTER_RX.'/u', $s, $s); 128 | 129 | $slen = \count($s[0]); 130 | $start = (int) $start; 131 | 132 | if (0 > $start) { 133 | $start += $slen; 134 | } 135 | if (0 > $start) { 136 | if (\PHP_VERSION_ID < 80000) { 137 | return false; 138 | } 139 | 140 | $start = 0; 141 | } 142 | if ($start >= $slen) { 143 | return \PHP_VERSION_ID >= 80000 ? '' : false; 144 | } 145 | 146 | $rem = $slen - $start; 147 | 148 | if (0 > $len) { 149 | $len += $rem; 150 | } 151 | if (0 === $len) { 152 | return ''; 153 | } 154 | if (0 > $len) { 155 | return \PHP_VERSION_ID >= 80000 ? '' : false; 156 | } 157 | if ($len > $rem) { 158 | $len = $rem; 159 | } 160 | 161 | return implode('', \array_slice($s[0], $start, $len)); 162 | } 163 | 164 | public static function grapheme_strpos($s, $needle, $offset = 0) 165 | { 166 | return self::grapheme_position($s, $needle, $offset, 0); 167 | } 168 | 169 | public static function grapheme_stripos($s, $needle, $offset = 0) 170 | { 171 | return self::grapheme_position($s, $needle, $offset, 1); 172 | } 173 | 174 | public static function grapheme_strrpos($s, $needle, $offset = 0) 175 | { 176 | return self::grapheme_position($s, $needle, $offset, 2); 177 | } 178 | 179 | public static function grapheme_strripos($s, $needle, $offset = 0) 180 | { 181 | return self::grapheme_position($s, $needle, $offset, 3); 182 | } 183 | 184 | public static function grapheme_stristr($s, $needle, $beforeNeedle = false) 185 | { 186 | return mb_stristr($s, $needle, $beforeNeedle, 'UTF-8'); 187 | } 188 | 189 | public static function grapheme_strstr($s, $needle, $beforeNeedle = false) 190 | { 191 | return mb_strstr($s, $needle, $beforeNeedle, 'UTF-8'); 192 | } 193 | 194 | private static function grapheme_position($s, $needle, $offset, $mode) 195 | { 196 | $needle = (string) $needle; 197 | if (80000 > \PHP_VERSION_ID && !preg_match('/./us', $needle)) { 198 | return false; 199 | } 200 | $s = (string) $s; 201 | if (!preg_match('/./us', $s)) { 202 | return false; 203 | } 204 | if ($offset > 0) { 205 | $s = self::grapheme_substr($s, $offset); 206 | } elseif ($offset < 0) { 207 | if (2 > $mode) { 208 | $offset += self::grapheme_strlen($s); 209 | $s = self::grapheme_substr($s, $offset); 210 | if (0 > $offset) { 211 | $offset = 0; 212 | } 213 | } elseif (0 > $offset += self::grapheme_strlen($needle)) { 214 | $s = self::grapheme_substr($s, 0, $offset); 215 | $offset = 0; 216 | } else { 217 | $offset = 0; 218 | } 219 | } 220 | 221 | // As UTF-8 is self-synchronizing, and we have ensured the strings are valid UTF-8, 222 | // we can use normal binary string functions here. For case-insensitive searches, 223 | // case fold the strings first. 224 | $caseInsensitive = $mode & 1; 225 | $reverse = $mode & 2; 226 | if ($caseInsensitive) { 227 | // Use the same case folding mode as mbstring does for mb_stripos(). 228 | // Stick to SIMPLE case folding to avoid changing the length of the string, which 229 | // might result in offsets being shifted. 230 | $mode = \defined('MB_CASE_FOLD_SIMPLE') ? \MB_CASE_FOLD_SIMPLE : \MB_CASE_LOWER; 231 | $s = mb_convert_case($s, $mode, 'UTF-8'); 232 | $needle = mb_convert_case($needle, $mode, 'UTF-8'); 233 | 234 | if (!\defined('MB_CASE_FOLD_SIMPLE')) { 235 | $s = str_replace(self::CASE_FOLD[0], self::CASE_FOLD[1], $s); 236 | $needle = str_replace(self::CASE_FOLD[0], self::CASE_FOLD[1], $needle); 237 | } 238 | } 239 | if ($reverse) { 240 | $needlePos = strrpos($s, $needle); 241 | } else { 242 | $needlePos = strpos($s, $needle); 243 | } 244 | 245 | return false !== $needlePos ? self::grapheme_strlen(substr($s, 0, $needlePos)) + $offset : false; 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-present Fabien Potencier 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Symfony Polyfill / Intl: Grapheme 2 | ================================= 3 | 4 | This component provides a partial, native PHP implementation of the 5 | [Grapheme functions](https://php.net/intl.grapheme) from the 6 | [Intl](https://php.net/intl) extension. 7 | 8 | - [`grapheme_extract`](https://php.net/grapheme_extract): Extract a sequence of grapheme 9 | clusters from a text buffer, which must be encoded in UTF-8 10 | - [`grapheme_stripos`](https://php.net/grapheme_stripos): Find position (in grapheme units) 11 | of first occurrence of a case-insensitive string 12 | - [`grapheme_stristr`](https://php.net/grapheme_stristr): Returns part of haystack string 13 | from the first occurrence of case-insensitive needle to the end of haystack 14 | - [`grapheme_strlen`](https://php.net/grapheme_strlen): Get string length in grapheme units 15 | - [`grapheme_strpos`](https://php.net/grapheme_strpos): Find position (in grapheme units) 16 | of first occurrence of a string 17 | - [`grapheme_strripos`](https://php.net/grapheme_strripos): Find position (in grapheme units) 18 | of last occurrence of a case-insensitive string 19 | - [`grapheme_strrpos`](https://php.net/grapheme_strrpos): Find position (in grapheme units) 20 | of last occurrence of a string 21 | - [`grapheme_strstr`](https://php.net/grapheme_strstr): Returns part of haystack string from 22 | the first occurrence of needle to the end of haystack 23 | - [`grapheme_substr`](https://php.net/grapheme_substr): Return part of a string 24 | 25 | More information can be found in the 26 | [main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md). 27 | 28 | License 29 | ======= 30 | 31 | This library is released under the [MIT license](LICENSE). 32 | -------------------------------------------------------------------------------- /bootstrap.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | use Symfony\Polyfill\Intl\Grapheme as p; 13 | 14 | if (extension_loaded('intl')) { 15 | return; 16 | } 17 | 18 | if (\PHP_VERSION_ID >= 80000) { 19 | return require __DIR__.'/bootstrap80.php'; 20 | } 21 | 22 | if (!defined('GRAPHEME_EXTR_COUNT')) { 23 | define('GRAPHEME_EXTR_COUNT', 0); 24 | } 25 | if (!defined('GRAPHEME_EXTR_MAXBYTES')) { 26 | define('GRAPHEME_EXTR_MAXBYTES', 1); 27 | } 28 | if (!defined('GRAPHEME_EXTR_MAXCHARS')) { 29 | define('GRAPHEME_EXTR_MAXCHARS', 2); 30 | } 31 | 32 | if (!function_exists('grapheme_extract')) { 33 | function grapheme_extract($haystack, $size, $type = 0, $start = 0, &$next = 0) { return p\Grapheme::grapheme_extract($haystack, $size, $type, $start, $next); } 34 | } 35 | if (!function_exists('grapheme_stripos')) { 36 | function grapheme_stripos($haystack, $needle, $offset = 0) { return p\Grapheme::grapheme_stripos($haystack, $needle, $offset); } 37 | } 38 | if (!function_exists('grapheme_stristr')) { 39 | function grapheme_stristr($haystack, $needle, $beforeNeedle = false) { return p\Grapheme::grapheme_stristr($haystack, $needle, $beforeNeedle); } 40 | } 41 | if (!function_exists('grapheme_strlen')) { 42 | function grapheme_strlen($input) { return p\Grapheme::grapheme_strlen($input); } 43 | } 44 | if (!function_exists('grapheme_strpos')) { 45 | function grapheme_strpos($haystack, $needle, $offset = 0) { return p\Grapheme::grapheme_strpos($haystack, $needle, $offset); } 46 | } 47 | if (!function_exists('grapheme_strripos')) { 48 | function grapheme_strripos($haystack, $needle, $offset = 0) { return p\Grapheme::grapheme_strripos($haystack, $needle, $offset); } 49 | } 50 | if (!function_exists('grapheme_strrpos')) { 51 | function grapheme_strrpos($haystack, $needle, $offset = 0) { return p\Grapheme::grapheme_strrpos($haystack, $needle, $offset); } 52 | } 53 | if (!function_exists('grapheme_strstr')) { 54 | function grapheme_strstr($haystack, $needle, $beforeNeedle = false) { return p\Grapheme::grapheme_strstr($haystack, $needle, $beforeNeedle); } 55 | } 56 | if (!function_exists('grapheme_substr')) { 57 | function grapheme_substr($string, $offset, $length = null) { return p\Grapheme::grapheme_substr($string, $offset, $length); } 58 | } 59 | -------------------------------------------------------------------------------- /bootstrap80.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | use Symfony\Polyfill\Intl\Grapheme as p; 13 | 14 | if (!defined('GRAPHEME_EXTR_COUNT')) { 15 | define('GRAPHEME_EXTR_COUNT', 0); 16 | } 17 | if (!defined('GRAPHEME_EXTR_MAXBYTES')) { 18 | define('GRAPHEME_EXTR_MAXBYTES', 1); 19 | } 20 | if (!defined('GRAPHEME_EXTR_MAXCHARS')) { 21 | define('GRAPHEME_EXTR_MAXCHARS', 2); 22 | } 23 | 24 | if (!function_exists('grapheme_extract')) { 25 | function grapheme_extract(?string $haystack, ?int $size, ?int $type = GRAPHEME_EXTR_COUNT, ?int $offset = 0, &$next = null): string|false { return p\Grapheme::grapheme_extract((string) $haystack, (int) $size, (int) $type, (int) $offset, $next); } 26 | } 27 | if (!function_exists('grapheme_stripos')) { 28 | function grapheme_stripos(?string $haystack, ?string $needle, ?int $offset = 0): int|false { return p\Grapheme::grapheme_stripos((string) $haystack, (string) $needle, (int) $offset); } 29 | } 30 | if (!function_exists('grapheme_stristr')) { 31 | function grapheme_stristr(?string $haystack, ?string $needle, ?bool $beforeNeedle = false): string|false { return p\Grapheme::grapheme_stristr((string) $haystack, (string) $needle, (bool) $beforeNeedle); } 32 | } 33 | if (!function_exists('grapheme_strlen')) { 34 | function grapheme_strlen(?string $string): int|false|null { return p\Grapheme::grapheme_strlen((string) $string); } 35 | } 36 | if (!function_exists('grapheme_strpos')) { 37 | function grapheme_strpos(?string $haystack, ?string $needle, ?int $offset = 0): int|false { return p\Grapheme::grapheme_strpos((string) $haystack, (string) $needle, (int) $offset); } 38 | } 39 | if (!function_exists('grapheme_strripos')) { 40 | function grapheme_strripos(?string $haystack, ?string $needle, ?int $offset = 0): int|false { return p\Grapheme::grapheme_strripos((string) $haystack, (string) $needle, (int) $offset); } 41 | } 42 | if (!function_exists('grapheme_strrpos')) { 43 | function grapheme_strrpos(?string $haystack, ?string $needle, ?int $offset = 0): int|false { return p\Grapheme::grapheme_strrpos((string) $haystack, (string) $needle, (int) $offset); } 44 | } 45 | if (!function_exists('grapheme_strstr')) { 46 | function grapheme_strstr(?string $haystack, ?string $needle, ?bool $beforeNeedle = false): string|false { return p\Grapheme::grapheme_strstr((string) $haystack, (string) $needle, (bool) $beforeNeedle); } 47 | } 48 | if (!function_exists('grapheme_substr')) { 49 | function grapheme_substr(?string $string, ?int $offset, ?int $length = null): string|false { return p\Grapheme::grapheme_substr((string) $string, (int) $offset, $length); } 50 | } 51 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "symfony/polyfill-intl-grapheme", 3 | "type": "library", 4 | "description": "Symfony polyfill for intl's grapheme_* functions", 5 | "keywords": ["polyfill", "shim", "compatibility", "portable", "intl", "grapheme"], 6 | "homepage": "https://symfony.com", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "Nicolas Grekas", 11 | "email": "p@tchwork.com" 12 | }, 13 | { 14 | "name": "Symfony Community", 15 | "homepage": "https://symfony.com/contributors" 16 | } 17 | ], 18 | "require": { 19 | "php": ">=7.2" 20 | }, 21 | "autoload": { 22 | "psr-4": { "Symfony\\Polyfill\\Intl\\Grapheme\\": "" }, 23 | "files": [ "bootstrap.php" ] 24 | }, 25 | "suggest": { 26 | "ext-intl": "For best performance" 27 | }, 28 | "minimum-stability": "dev", 29 | "extra": { 30 | "thanks": { 31 | "name": "symfony/polyfill", 32 | "url": "https://github.com/symfony/polyfill" 33 | } 34 | } 35 | } 36 | --------------------------------------------------------------------------------