├── .gitignore ├── LICENSE ├── README.md ├── composer.json ├── demo ├── attachmentDecodeTest.php ├── entitiesDecodeTest.php └── entitiesExtractTest.php ├── docs └── _config.yml └── src └── EntityDecoder.php /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor/ 2 | *.log 3 | /demo/intentitiesDecodeTest.php 4 | .phpdoc/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 LucaDevelop 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # telegram-entities-decoder 2 | [![Build Status](https://scrutinizer-ci.com/g/LucaDevelop/telegram-entities-decoder/badges/build.png?b=master)](https://scrutinizer-ci.com/g/LucaDevelop/telegram-entities-decoder/build-status/master) [![Latest Stable Version](https://img.shields.io/github/v/release/lucadevelop/telegram-entities-decoder?display_name=tag&label=stable)](https://packagist.org/packages/lucadevelop/telegram-entities-decoder) [![Total Downloads](http://poser.pugx.org/lucadevelop/telegram-entities-decoder/downloads)](https://packagist.org/packages/lucadevelop/telegram-entities-decoder) [![Latest Unstable Version](http://poser.pugx.org/lucadevelop/telegram-entities-decoder/v/unstable)](https://packagist.org/packages/lucadevelop/telegram-entities-decoder) [![License](http://poser.pugx.org/lucadevelop/telegram-entities-decoder/license)](https://packagist.org/packages/lucadevelop/telegram-entities-decoder) [![PHP Version Require](http://poser.pugx.org/lucadevelop/telegram-entities-decoder/require/php)](https://packagist.org/packages/lucadevelop/telegram-entities-decoder) 3 | 4 | ![EntityDecoder](https://user-images.githubusercontent.com/68305127/164949030-622a200e-8c18-4480-b8e2-08476801bb90.PNG) 5 | 6 | This class decode style entities from Telegram bot messages (bold, italic, etc.) in text with inline entities that duplicate (when possible) the 7 | exact style the message had originally when was sended to the bot. 8 | All this work is necessary because Telegram returns offset and length of the entities in UTF-16 code units that they've been hard to decode correctly in PHP 9 | 10 | Consider to ⭐️ Star this project 11 | 12 | ## Compatibility 13 | PHP >= 7.0 14 | 15 | ## Features 16 | - Decode entities from text messages and attachments caption. 17 | - Supports all Telegram parse modes (Markdown, HTML and MarkdownV2). HTML has more entropy but it's easily the best and it's recommended. 18 | - Supports emoji in the text field 19 | - Easy to use 20 | 21 | _NOTE: Markdown parse mode is deprecated and no longer up-to-date so it doesn't support all entities. Use MarkdownV2 or HTML._ 22 | 23 | ## Example usage 24 | ``` 25 | $entity_decoder = new EntityDecoder('HTML'); 26 | $decoded_text = $entity_decoder->decode($message); 27 | ``` 28 | _See demo folder for full example_ 29 | 30 | ## Composer 31 | ``` 32 | composer require lucadevelop/telegram-entities-decoder 33 | ``` 34 | Usage: 35 | ``` 36 | require 'vendor/autoload.php'; 37 | use lucadevelop\TelegramEntitiesDecoder\EntityDecoder; 38 | [...] 39 | $entity_decoder = new EntityDecoder('HTML'); 40 | $decoded_text = $entity_decoder->decode($message); 41 | ``` 42 | 43 | ## Credits 44 | - Telegram docs: https://core.telegram.org/bots/api#formatting-options 45 | - Inspired By: https://github.com/php-telegram-bot/core/issues/544#issuecomment-564950430 46 | 47 | ## Contacts 48 | ![Telegram](https://telegram.org/favicon.ico) [@LucaDevelop](https://t.me/LucaDevelop) 49 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "lucadevelop/telegram-entities-decoder", 3 | "description": "Telegram entities decoder in PHP", 4 | "type": "library", 5 | "require": { 6 | "php": ">=7.0.0", 7 | "ext-mbstring": "*" 8 | }, 9 | "license": "MIT", 10 | "authors": [ 11 | { 12 | "name": "LucaDevelop", 13 | "email": "lsirri.gm@gmail.com" 14 | } 15 | ], 16 | "minimum-stability": "dev", 17 | "autoload": { 18 | "psr-4": { "lucadevelop\\TelegramEntitiesDecoder\\": "src" } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /demo/attachmentDecodeTest.php: -------------------------------------------------------------------------------- 1 | decode($updateObj->message); 81 | 82 | echo $decoded_text; 83 | ?> -------------------------------------------------------------------------------- /demo/entitiesDecodeTest.php: -------------------------------------------------------------------------------- 1 | decode($updateObj->message); 51 | 52 | echo $decoded_text; 53 | ?> -------------------------------------------------------------------------------- /demo/entitiesExtractTest.php: -------------------------------------------------------------------------------- 1 | extractAllEntities($updateObj->message); 56 | 57 | echo json_encode($decoded_entities, JSON_PRETTY_PRINT); 58 | ?> -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/EntityDecoder.php: -------------------------------------------------------------------------------- 1 | decode($message); 12 | * 13 | * @author LucaDevelop 14 | * @access public 15 | * @see https://github.com/LucaDevelop/telegram-entities-decoder 16 | */ 17 | 18 | namespace lucadevelop\TelegramEntitiesDecoder; 19 | 20 | class EntityDecoder 21 | { 22 | private $entitiesToParse = ['bold', 'italic', 'code', 'pre', 'text_mention', 'text_link', 'strikethrough', 'underline', 'spoiler', 'blockquote', 'custom_emoji']; 23 | private $entities = []; 24 | private $style; 25 | 26 | /** 27 | * @param string $style Either 'HTML', 'Markdown' or 'MarkdownV2'. 28 | * 29 | * @throws InvalidArgumentException if the provided style name in invalid. 30 | */ 31 | public function __construct(string $style = 'HTML') 32 | { 33 | if (in_array($style, ["HTML", "MarkdownV2", "Markdown"])) 34 | { 35 | $this->style = $style; 36 | } 37 | else 38 | { 39 | throw new \InvalidArgumentException("Wrong style name"); 40 | } 41 | } 42 | 43 | /** 44 | * Decode entities and return decoded text 45 | * 46 | * @param object $message message object to reconstruct Entities from (json decoded without assoc). 47 | * @return string 48 | */ 49 | public function decode($message): string 50 | { 51 | if (!is_object($message)) 52 | { 53 | throw new \Exception('message must be an object'); 54 | } 55 | //Get available entities (for text or for attachment like photo, document, etc.) 56 | if (!empty($message->entities)) 57 | { 58 | $this->entities = $message->entities; 59 | } 60 | if (!empty($message->caption_entities)) 61 | { 62 | $this->entities = $message->caption_entities; 63 | } 64 | //Get internal encoding 65 | $prevencoding = mb_internal_encoding(); 66 | //Set encoding to UTF-8 67 | mb_internal_encoding('UTF-8'); 68 | //Get available text (text message or caption for attachment) 69 | $textToDecode = (!empty($message->text) ? $message->text : (!empty($message->caption) ? $message->caption : "")); 70 | //if the message has no entities or no text return the original text 71 | if (empty($this->entities) || $textToDecode == "") { 72 | if ($prevencoding) 73 | { 74 | mb_internal_encoding($prevencoding); 75 | } 76 | return $textToDecode; 77 | } 78 | //split text in char array with UTF-16 code units length 79 | $arrayText = $this->splitCharAndLength($textToDecode); 80 | $finalText = ""; 81 | 82 | $openedEntities = []; 83 | $currenPosition = 0; 84 | //Cycle characters one by one to calculate begins and ends of entities and escape special chars 85 | for ($i = 0, $c = count($arrayText); $i < $c; $i++) { 86 | $offsetAndLength = $currenPosition + $arrayText[$i]['length']; 87 | $entityCheckStart = $this->checkForEntityStart($currenPosition); 88 | $entityCheckStop = $this->checkForEntityStop($offsetAndLength); 89 | if ($entityCheckStart !== false) 90 | { 91 | foreach ($entityCheckStart as $stEntity) 92 | { 93 | $startChar = $this->getEntityStartString($stEntity); 94 | $openedEntities[] = $stEntity; 95 | $finalText .= $startChar; 96 | } 97 | $finalText .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); 98 | } 99 | if ($entityCheckStop !== false) 100 | { 101 | if ($entityCheckStart === false) 102 | { 103 | $finalText .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); 104 | } 105 | if ($this->style == 'MarkdownV2' && $this->checkMarkdownV2AmbiguousEntities($entityCheckStop)) 106 | { 107 | $stopChar = "_\r__"; 108 | $finalText .= $stopChar; 109 | array_pop($openedEntities); 110 | array_pop($openedEntities); 111 | } 112 | foreach ($entityCheckStop as $stEntity) 113 | { 114 | $stopChar = $this->getEntityStopString($stEntity); 115 | $finalText .= $stopChar; 116 | array_pop($openedEntities); 117 | } 118 | } 119 | if ($entityCheckStart === false && $entityCheckStop === false) 120 | { 121 | $isEntityOpen = !empty($openedEntities); 122 | $finalText .= $this->escapeSpecialChars($arrayText[$i]['char'], $isEntityOpen, $openedEntities); 123 | } 124 | $currenPosition = $offsetAndLength; 125 | } 126 | if (!empty($openedEntities)) 127 | { 128 | $openedEntities = array_reverse($openedEntities); 129 | foreach ($openedEntities as $oe) 130 | { 131 | $finalText .= $this->getEntityStopString($oe); 132 | } 133 | } 134 | if ($prevencoding) 135 | { 136 | mb_internal_encoding($prevencoding); 137 | } 138 | 139 | return $finalText; 140 | } 141 | 142 | /** 143 | * Extract all entities in an array 144 | * 145 | * @param object $message message object to reconstruct Entities from (json decoded without assoc). 146 | * @return array 147 | */ 148 | public function extractAllEntities($message): array 149 | { 150 | $entitiesArray = []; 151 | if (!is_object($message)) 152 | { 153 | throw new \Exception('message must be an object'); 154 | } 155 | //Get available entities (for text or for attachment like photo, document, etc.) 156 | if (!empty($message->entities)) 157 | { 158 | $this->entities = $message->entities; 159 | } 160 | if (!empty($message->caption_entities)) 161 | { 162 | $this->entities = $message->caption_entities; 163 | } 164 | //Get internal encoding 165 | $prevencoding = mb_internal_encoding(); 166 | //Set encoding to UTF-8 167 | mb_internal_encoding('UTF-8'); 168 | //Get available text (text message or caption for attachment) 169 | $textToDecode = (!empty($message->text) ? $message->text : (!empty($message->caption) ? $message->caption : "")); 170 | //if the message has no entities or no text return the empty array 171 | if (empty($this->entities) || $textToDecode == "") { 172 | if ($prevencoding) 173 | { 174 | mb_internal_encoding($prevencoding); 175 | } 176 | return $entitiesArray; 177 | } 178 | $arrayText = $this->splitCharAndLength($textToDecode); 179 | $entitytext = ""; 180 | 181 | $openedEntities = []; 182 | $currenPosition = 0; 183 | //Cycle characters one by one to calculate begins and ends of entities and escape special chars 184 | for ($i = 0, $c = count($arrayText); $i < $c; $i++) { 185 | $offsetAndLength = $currenPosition + $arrayText[$i]['length']; 186 | $entityCheckStart = $this->checkForEntityStart($currenPosition); 187 | $entityCheckStop = $this->checkForEntityStop($offsetAndLength); 188 | if ($entityCheckStart !== false) 189 | { 190 | foreach ($entityCheckStart as $stEntity) 191 | { 192 | $startChar = $this->getEntityStartString($stEntity); 193 | $openedEntities[] = $stEntity; 194 | $entitytext .= $startChar; 195 | } 196 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); 197 | } 198 | if ($entityCheckStop !== false) 199 | { 200 | if ($entityCheckStart === false) 201 | { 202 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); 203 | } 204 | if ($this->style == 'MarkdownV2' && $this->checkMarkdownV2AmbiguousEntities($entityCheckStop)) 205 | { 206 | $stopChar = "_\r__"; 207 | $entitytext .= $stopChar; 208 | array_pop($openedEntities); 209 | array_pop($openedEntities); 210 | if(empty($openedEntities)) 211 | { 212 | $entitiesArray[] = $entitytext; 213 | $entitytext = ""; 214 | } 215 | } 216 | foreach ($entityCheckStop as $stEntity) 217 | { 218 | $stopChar = $this->getEntityStopString($stEntity); 219 | $entitytext .= $stopChar; 220 | array_pop($openedEntities); 221 | if(empty($openedEntities)) 222 | { 223 | $entitiesArray[] = $entitytext; 224 | $entitytext = ""; 225 | } 226 | } 227 | } 228 | if ($entityCheckStart === false && $entityCheckStop === false) 229 | { 230 | $isEntityOpen = !empty($openedEntities); 231 | if($isEntityOpen) 232 | { 233 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], $isEntityOpen, $openedEntities); 234 | } 235 | } 236 | $currenPosition = $offsetAndLength; 237 | } 238 | if (!empty($openedEntities)) 239 | { 240 | $openedEntities = array_reverse($openedEntities); 241 | foreach ($openedEntities as $oe) 242 | { 243 | $entitytext .= $this->getEntityStopString($oe); 244 | $entitiesArray[] = $entitytext; 245 | } 246 | } 247 | if ($prevencoding) 248 | { 249 | mb_internal_encoding($prevencoding); 250 | } 251 | return $entitiesArray; 252 | } 253 | 254 | /** 255 | * Split message text in chars array with lengthes 256 | */ 257 | protected function splitCharAndLength($string) 258 | { 259 | //Split string in individual unicode points 260 | $str_split_unicode = preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY); 261 | $new_string_split = []; 262 | $joiner = false; 263 | for ($i = 0, $c = count($str_split_unicode); $i < $c; $i++) 264 | { 265 | //loop the array 266 | $codepoint = bin2hex(mb_convert_encoding($str_split_unicode[$i], 'UTF-16')); //Get the string rappresentation of the unicode char 267 | if ($codepoint == "fe0f" || $codepoint == "1f3fb" || $codepoint == "1f3fc" || $codepoint == "1f3fd" || $codepoint == "1f3fe" || $codepoint == "1f3ff") 268 | { 269 | //Manage the modifiers 270 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend the modifier to the previous char 271 | } 272 | else 273 | { 274 | if ($codepoint == "200d") 275 | { 276 | //Manage the Zero Width Joiner 277 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend the ZWJ to the previous char 278 | $joiner = true; 279 | } 280 | else 281 | { 282 | if ($joiner) 283 | { 284 | //If previous one was a ZWJ 285 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend to the previous char 286 | $joiner = false; 287 | } 288 | else 289 | { 290 | $new_string_split[] = $str_split_unicode[$i]; //New char 291 | } 292 | } 293 | } 294 | } 295 | $data = []; 296 | foreach ($new_string_split as $s) 297 | { 298 | $data[] = ["char" => $s, "length" => $this->getUTF16CodePointsLength($s)]; 299 | } 300 | return $data; 301 | } 302 | 303 | /** 304 | * Apply Telegram escape rules for the choosen style 305 | */ 306 | protected function escapeSpecialChars($char, $isEntityOpen, $entities) { 307 | if ($this->style == 'Markdown') 308 | { 309 | if ($isEntityOpen) 310 | { 311 | $entity = $entities[0]; 312 | if ($char == '*' || $char == '_') 313 | { 314 | if ($char == $this->getEntityStartString($entity)) 315 | { 316 | return $char."\\".$char.$char; 317 | } 318 | else 319 | { 320 | return $char; 321 | } 322 | } 323 | else 324 | { 325 | return $char; 326 | } 327 | } 328 | else 329 | { 330 | if ($char == '*' || $char == '_' || $char == '[' || $char == '`') 331 | { 332 | return "\\".$char; 333 | } 334 | else 335 | { 336 | return $char; 337 | } 338 | } 339 | } 340 | else if ($this->style == 'HTML') 341 | { 342 | return ($char == '<' ? '<' : ($char == '>' ? '>' : ($char == '&' ? '&' : $char))); 343 | } 344 | else if ($this->style == 'MarkdownV2') 345 | { 346 | $isBlockquoteOpen = false; 347 | foreach ($entities as $entity) { 348 | if ($entity->type === 'blockquote') { 349 | $isBlockquoteOpen = true; 350 | break; 351 | } 352 | } 353 | if($isBlockquoteOpen && $char == "\n") 354 | { 355 | return $char.'>'; 356 | } 357 | else 358 | { 359 | return (in_array($char, ['_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!', '\\']) ? '\\'.$char : $char); 360 | } 361 | } 362 | else 363 | { 364 | return $char; 365 | } 366 | } 367 | 368 | /** 369 | * Get the begin string of the entity for the choosen style 370 | */ 371 | protected function getEntityStartString($entity) 372 | { 373 | $startString = ''; 374 | if ($this->style == 'Markdown') 375 | { 376 | switch ($entity->type) 377 | { 378 | case 'bold': 379 | { 380 | $startString = '*'; 381 | break; 382 | } 383 | case 'italic': 384 | { 385 | $startString = '_'; 386 | break; 387 | } 388 | case 'code': 389 | { 390 | $startString = '`'; 391 | break; 392 | } 393 | case 'pre': 394 | { 395 | $startString = '```'; 396 | if (isset($entity->language)) 397 | { 398 | $startString .= $entity->language; 399 | } 400 | $startString .= "\n"; 401 | break; 402 | } 403 | case 'text_mention': 404 | case 'text_link': 405 | { 406 | $startString = '['; 407 | break; 408 | } 409 | } 410 | } 411 | else if ($this->style == 'HTML') 412 | { 413 | switch ($entity->type) 414 | { 415 | case 'bold': 416 | { 417 | $startString = ''; 418 | break; 419 | } 420 | case 'italic': 421 | { 422 | $startString = ''; 423 | break; 424 | } 425 | case 'underline': 426 | { 427 | $startString = ''; 428 | break; 429 | } 430 | case 'strikethrough': 431 | { 432 | $startString = ''; 433 | break; 434 | } 435 | case 'spoiler': 436 | { 437 | $startString = ''; 438 | break; 439 | } 440 | case 'code': 441 | { 442 | $startString = ''; 443 | break; 444 | } 445 | case 'pre': 446 | { 447 | $startString = '
';
448 |                     if (isset($entity->language))
449 |                     {
450 |                         $startString .= '';
451 |                     }
452 |                     break;
453 |                 }
454 |                 case 'text_mention':
455 |                 {
456 |                     $startString = '';
457 |                     break;
458 |                 }
459 |                 case 'text_link':
460 |                 {
461 |                     $startString = '';
462 |                     break;
463 |                 }
464 |                 case 'custom_emoji':
465 |                 {
466 |                     $startString = '';
467 |                     break;
468 |                 }
469 |                 case 'blockquote':
470 |                 {
471 |                     $startString = '
'; 472 | break; 473 | } 474 | } 475 | } 476 | else if ($this->style == 'MarkdownV2') 477 | { 478 | switch ($entity->type) 479 | { 480 | case 'bold': 481 | { 482 | $startString = '*'; 483 | break; 484 | } 485 | case 'italic': 486 | { 487 | $startString = '_'; 488 | break; 489 | } 490 | case 'spoiler': 491 | { 492 | $startString = '||'; 493 | break; 494 | } 495 | case 'code': 496 | { 497 | $startString = '`'; 498 | break; 499 | } 500 | case 'pre': 501 | { 502 | $startString = '```'; 503 | if (isset($entity->language)) 504 | { 505 | $startString .= $entity->language; 506 | } 507 | $startString .= "\n"; 508 | break; 509 | } 510 | case 'underline': 511 | { 512 | $startString .= '__'; 513 | break; 514 | } 515 | case 'strikethrough': 516 | { 517 | $startString .= '~'; 518 | break; 519 | } 520 | case 'text_mention': 521 | case 'text_link': 522 | { 523 | $startString = '['; 524 | break; 525 | } 526 | case 'custom_emoji': 527 | { 528 | $startString = '!['; 529 | break; 530 | } 531 | case 'blockquote': 532 | { 533 | $startString = '>'; 534 | break; 535 | } 536 | } 537 | } 538 | return $startString; 539 | } 540 | 541 | /** 542 | * Check if there are entities that start at the given position and return them 543 | */ 544 | protected function checkForEntityStart($pos) 545 | { 546 | $entities = []; 547 | foreach ($this->entities as $entity) 548 | { 549 | if ($entity->offset == $pos) 550 | { 551 | if (in_array($entity->type, $this->entitiesToParse)) 552 | { 553 | $entities[] = $entity; 554 | } 555 | } 556 | } 557 | if (!empty($entities)) { 558 | return $entities; 559 | } else { 560 | return false; 561 | } 562 | } 563 | 564 | /** 565 | * Get the end string of the entity for the choosen style 566 | */ 567 | protected function getEntityStopString($entity) 568 | { 569 | $stopString = ''; 570 | if ($this->style == 'Markdown') 571 | { 572 | switch ($entity->type) 573 | { 574 | case 'bold': 575 | { 576 | $stopString = '*'; 577 | break; 578 | } 579 | case 'italic': 580 | { 581 | $stopString = '_'; 582 | break; 583 | } 584 | case 'code': 585 | { 586 | $stopString = '`'; 587 | break; 588 | } 589 | case 'pre': 590 | { 591 | $stopString = "\n".'```'; 592 | break; 593 | } 594 | case 'text_mention': 595 | { 596 | $stopString = '](tg://user?id='.$entity->user->id.')'; 597 | break; 598 | } 599 | case 'text_link': 600 | { 601 | $stopString = ']('.$entity->url.')'; 602 | break; 603 | } 604 | } 605 | } 606 | else if ($this->style == 'HTML') 607 | { 608 | switch ($entity->type) 609 | { 610 | case 'bold': 611 | { 612 | $stopString = ''; 613 | break; 614 | } 615 | case 'italic': 616 | { 617 | $stopString = ''; 618 | break; 619 | } 620 | case 'underline': 621 | { 622 | $stopString = ''; 623 | break; 624 | } 625 | case 'strikethrough': 626 | { 627 | $stopString = ''; 628 | break; 629 | } 630 | case 'spoiler': 631 | { 632 | $stopString = ''; 633 | break; 634 | } 635 | case 'code': 636 | { 637 | $stopString = ''; 638 | break; 639 | } 640 | case 'pre': 641 | { 642 | if (isset($entity->language)) 643 | { 644 | $stopString = ''; 645 | } 646 | $stopString .= '
'; 647 | break; 648 | } 649 | case 'text_mention': 650 | case 'text_link': 651 | { 652 | $stopString = ''; 653 | break; 654 | } 655 | case 'custom_emoji': 656 | { 657 | $stopString = ''; 658 | break; 659 | } 660 | case 'blockquote': 661 | { 662 | $stopString = ''; 663 | break; 664 | } 665 | } 666 | } 667 | else if ($this->style == 'MarkdownV2') 668 | { 669 | switch ($entity->type) 670 | { 671 | case 'bold': 672 | { 673 | $stopString = '*'; 674 | break; 675 | } 676 | case 'italic': 677 | { 678 | $stopString = '_'; 679 | break; 680 | } 681 | case 'spoiler': 682 | { 683 | $stopString = '||'; 684 | break; 685 | } 686 | case 'code': 687 | { 688 | $stopString = '`'; 689 | break; 690 | } 691 | case 'pre': 692 | { 693 | $stopString = "\n".'```'; 694 | break; 695 | } 696 | case 'underline': 697 | { 698 | $stopString = '__'; 699 | break; 700 | } 701 | case 'strikethrough': 702 | { 703 | $stopString = '~'; 704 | break; 705 | } 706 | case 'text_mention': 707 | { 708 | $stopString = '](tg://user?id='.$entity->user->id.')'; 709 | break; 710 | } 711 | case 'text_link': 712 | { 713 | $stopString = ']('.$entity->url.')'; 714 | break; 715 | } 716 | case 'custom_emoji': 717 | { 718 | $stopString = '](tg://emoji?id='.$entity->custom_emoji_id.')'; 719 | break; 720 | } 721 | } 722 | } 723 | return $stopString; 724 | } 725 | 726 | /** 727 | * Check if there are entities that end at the given position and return them (reversed because they are nested) 728 | */ 729 | protected function checkForEntityStop($pos) 730 | { 731 | $entities = []; 732 | foreach ($this->entities as $entity) 733 | { 734 | if ($entity->offset + $entity->length == $pos) 735 | { 736 | if (in_array($entity->type, $this->entitiesToParse)) 737 | { 738 | $entities[] = $entity; 739 | } 740 | } 741 | } 742 | if (!empty($entities)) { 743 | return array_reverse($entities); 744 | } else { 745 | return false; 746 | } 747 | } 748 | 749 | /** 750 | * Check for ambiguous entities in MarkdownV2 style (see Telegram docs) 751 | */ 752 | protected function checkMarkdownV2AmbiguousEntities(&$entitiesToCheck) 753 | { 754 | $result = false; 755 | $newEntities = []; 756 | $foundIndex = 0; 757 | foreach ($entitiesToCheck as $ec) 758 | { 759 | if ($ec->type == 'italic' || $ec->type == 'underline') 760 | { 761 | $foundIndex++; 762 | } 763 | } 764 | if ($foundIndex == 2) 765 | { 766 | $result = true; 767 | foreach ($entitiesToCheck as $ec) 768 | { 769 | if ($ec->type != 'italic' && $ec->type != 'underline') 770 | { 771 | $newEntities[] = $ec; 772 | } 773 | } 774 | $entitiesToCheck = $newEntities; 775 | } 776 | return $result; 777 | } 778 | 779 | /** 780 | * Count UTF-16 code units of the char passed 781 | */ 782 | protected function getUTF16CodePointsLength($char) { 783 | $chunks = str_split(bin2hex(mb_convert_encoding($char, 'UTF-16')), 4); 784 | return count($chunks); 785 | } 786 | } 787 | --------------------------------------------------------------------------------