├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── composer.json └── src └── KubAT └── PhpSimple ├── HtmlDomParser.php └── lib └── simple_html_dom.php /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | I'm not the maintainer of the PHP Simple HTML DOM Parser project (https://sourceforge.net/projects/simplehtmldom/) 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Jakub Stawowy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | php-simple-html-dom-parser 2 | ========================== 3 | 4 | Version 1.9.1 - PHP 7.3 compatible 5 | PHP Simple HTML DOM Parser changelog: https://sourceforge.net/projects/simplehtmldom/files/simplehtmldom/1.9.1/ 6 | 7 | 8 | Install 9 | ------- 10 | 11 | ``` 12 | composer require kub-at/php-simple-html-dom-parser 13 | ``` 14 | 15 | Usage 16 | ----- 17 | 18 | ```php 19 | use KubAT\PhpSimple\HtmlDomParser; 20 | 21 | ... 22 | $dom = HtmlDomParser::str_get_html( $str ); 23 | or 24 | $dom = HtmlDomParser::file_get_html( $file_name ); 25 | 26 | $elems = $dom->find($elem_name); 27 | ... 28 | 29 | ``` 30 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kub-at/php-simple-html-dom-parser", 3 | "description": "PHP Simple HTML DOM Parser with namespace and PHP 7.3 compatible", 4 | "keywords": ["html", "dom", "simple"], 5 | "homepage": "http://simplehtmldom.sourceforge.net/", 6 | "type": "library", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "S.C. Chen", 11 | "email": "me578022@gmail.com" 12 | }, 13 | { 14 | "name": "Jakub Stawowy", 15 | "email": "Kub-AT@users.noreply.github.com" 16 | } 17 | ], 18 | "require": { 19 | "php": ">=5.3.2" 20 | }, 21 | "autoload": { 22 | "psr-0": { "KubAT\\PhpSimple\\HtmlDomParser": "src/" } 23 | } 24 | } -------------------------------------------------------------------------------- /src/KubAT/PhpSimple/HtmlDomParser.php: -------------------------------------------------------------------------------- 1 | $maxLen) { 89 | $dom->clear(); 90 | return false; 91 | } 92 | 93 | return $dom->load($contents, $lowercase, $stripRN); 94 | } 95 | 96 | function str_get_html( 97 | $str, 98 | $lowercase = true, 99 | $forceTagsClosed = true, 100 | $target_charset = DEFAULT_TARGET_CHARSET, 101 | $stripRN = true, 102 | $defaultBRText = DEFAULT_BR_TEXT, 103 | $defaultSpanText = DEFAULT_SPAN_TEXT) 104 | { 105 | $dom = new simple_html_dom( 106 | null, 107 | $lowercase, 108 | $forceTagsClosed, 109 | $target_charset, 110 | $stripRN, 111 | $defaultBRText, 112 | $defaultSpanText 113 | ); 114 | 115 | if (empty($str) || strlen($str) > MAX_FILE_SIZE) { 116 | $dom->clear(); 117 | return false; 118 | } 119 | 120 | return $dom->load($str, $lowercase, $stripRN); 121 | } 122 | 123 | function dump_html_tree($node, $show_attr = true, $deep = 0) 124 | { 125 | $node->dump($node); 126 | } 127 | 128 | class simple_html_dom_node 129 | { 130 | public $nodetype = HDOM_TYPE_TEXT; 131 | public $tag = 'text'; 132 | public $attr = array(); 133 | public $children = array(); 134 | public $nodes = array(); 135 | public $parent = null; 136 | public $_ = array(); 137 | public $tag_start = 0; 138 | private $dom = null; 139 | 140 | function __construct($dom) 141 | { 142 | $this->dom = $dom; 143 | $dom->nodes[] = $this; 144 | } 145 | 146 | function __destruct() 147 | { 148 | $this->clear(); 149 | } 150 | 151 | function __toString() 152 | { 153 | return $this->outertext(); 154 | } 155 | 156 | function clear() 157 | { 158 | $this->dom = null; 159 | $this->nodes = null; 160 | $this->parent = null; 161 | $this->children = null; 162 | } 163 | 164 | function dump($show_attr = true, $depth = 0) 165 | { 166 | echo str_repeat("\t", $depth) . $this->tag; 167 | 168 | if ($show_attr && count($this->attr) > 0) { 169 | echo '('; 170 | foreach ($this->attr as $k => $v) { 171 | echo "[$k]=>\"$v\", "; 172 | } 173 | echo ')'; 174 | } 175 | 176 | echo "\n"; 177 | 178 | if ($this->nodes) { 179 | foreach ($this->nodes as $node) { 180 | $node->dump($show_attr, $depth + 1); 181 | } 182 | } 183 | } 184 | 185 | function dump_node($echo = true) 186 | { 187 | $string = $this->tag; 188 | 189 | if (count($this->attr) > 0) { 190 | $string .= '('; 191 | foreach ($this->attr as $k => $v) { 192 | $string .= "[$k]=>\"$v\", "; 193 | } 194 | $string .= ')'; 195 | } 196 | 197 | if (count($this->_) > 0) { 198 | $string .= ' $_ ('; 199 | foreach ($this->_ as $k => $v) { 200 | if (is_array($v)) { 201 | $string .= "[$k]=>("; 202 | foreach ($v as $k2 => $v2) { 203 | $string .= "[$k2]=>\"$v2\", "; 204 | } 205 | $string .= ')'; 206 | } else { 207 | $string .= "[$k]=>\"$v\", "; 208 | } 209 | } 210 | $string .= ')'; 211 | } 212 | 213 | if (isset($this->text)) { 214 | $string .= " text: ({$this->text})"; 215 | } 216 | 217 | $string .= ' HDOM_INNER_INFO: '; 218 | 219 | if (isset($node->_[HDOM_INFO_INNER])) { 220 | $string .= "'" . $node->_[HDOM_INFO_INNER] . "'"; 221 | } else { 222 | $string .= ' NULL '; 223 | } 224 | 225 | $string .= ' children: ' . count($this->children); 226 | $string .= ' nodes: ' . count($this->nodes); 227 | $string .= ' tag_start: ' . $this->tag_start; 228 | $string .= "\n"; 229 | 230 | if ($echo) { 231 | echo $string; 232 | return; 233 | } else { 234 | return $string; 235 | } 236 | } 237 | 238 | function parent($parent = null) 239 | { 240 | // I am SURE that this doesn't work properly. 241 | // It fails to unset the current node from it's current parents nodes or 242 | // children list first. 243 | if ($parent !== null) { 244 | $this->parent = $parent; 245 | $this->parent->nodes[] = $this; 246 | $this->parent->children[] = $this; 247 | } 248 | 249 | return $this->parent; 250 | } 251 | 252 | function has_child() 253 | { 254 | return !empty($this->children); 255 | } 256 | 257 | function children($idx = -1) 258 | { 259 | if ($idx === -1) { 260 | return $this->children; 261 | } 262 | 263 | if (isset($this->children[$idx])) { 264 | return $this->children[$idx]; 265 | } 266 | 267 | return null; 268 | } 269 | 270 | function first_child() 271 | { 272 | if (count($this->children) > 0) { 273 | return $this->children[0]; 274 | } 275 | return null; 276 | } 277 | 278 | function last_child() 279 | { 280 | if (count($this->children) > 0) { 281 | return end($this->children); 282 | } 283 | return null; 284 | } 285 | 286 | function next_sibling() 287 | { 288 | if ($this->parent === null) { 289 | return null; 290 | } 291 | 292 | $idx = array_search($this, $this->parent->children, true); 293 | 294 | if ($idx !== false && isset($this->parent->children[$idx + 1])) { 295 | return $this->parent->children[$idx + 1]; 296 | } 297 | 298 | return null; 299 | } 300 | 301 | function prev_sibling() 302 | { 303 | if ($this->parent === null) { 304 | return null; 305 | } 306 | 307 | $idx = array_search($this, $this->parent->children, true); 308 | 309 | if ($idx !== false && $idx > 0) { 310 | return $this->parent->children[$idx - 1]; 311 | } 312 | 313 | return null; 314 | } 315 | 316 | function find_ancestor_tag($tag) 317 | { 318 | global $debug_object; 319 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 320 | 321 | if ($this->parent === null) { 322 | return null; 323 | } 324 | 325 | $ancestor = $this->parent; 326 | 327 | while (!is_null($ancestor)) { 328 | if (is_object($debug_object)) { 329 | $debug_object->debug_log(2, 'Current tag is: ' . $ancestor->tag); 330 | } 331 | 332 | if ($ancestor->tag === $tag) { 333 | break; 334 | } 335 | 336 | $ancestor = $ancestor->parent; 337 | } 338 | 339 | return $ancestor; 340 | } 341 | 342 | function innertext() 343 | { 344 | if (isset($this->_[HDOM_INFO_INNER])) { 345 | return $this->_[HDOM_INFO_INNER]; 346 | } 347 | 348 | if (isset($this->_[HDOM_INFO_TEXT])) { 349 | return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 350 | } 351 | 352 | $ret = ''; 353 | 354 | foreach ($this->nodes as $n) { 355 | $ret .= $n->outertext(); 356 | } 357 | 358 | return $ret; 359 | } 360 | 361 | function outertext() 362 | { 363 | global $debug_object; 364 | 365 | if (is_object($debug_object)) { 366 | $text = ''; 367 | 368 | if ($this->tag === 'text') { 369 | if (!empty($this->text)) { 370 | $text = ' with text: ' . $this->text; 371 | } 372 | } 373 | 374 | $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); 375 | } 376 | 377 | if ($this->tag === 'root') { 378 | return $this->innertext(); 379 | } 380 | 381 | // todo: What is the use of this callback? Remove? 382 | if ($this->dom && $this->dom->callback !== null) { 383 | call_user_func_array($this->dom->callback, array($this)); 384 | } 385 | 386 | if (isset($this->_[HDOM_INFO_OUTER])) { 387 | return $this->_[HDOM_INFO_OUTER]; 388 | } 389 | 390 | if (isset($this->_[HDOM_INFO_TEXT])) { 391 | return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 392 | } 393 | 394 | $ret = ''; 395 | 396 | if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) { 397 | $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); 398 | } 399 | 400 | if (isset($this->_[HDOM_INFO_INNER])) { 401 | // todo:
should either never have HDOM_INFO_INNER or always 402 | if ($this->tag !== 'br') { 403 | $ret .= $this->_[HDOM_INFO_INNER]; 404 | } 405 | } elseif ($this->nodes) { 406 | foreach ($this->nodes as $n) { 407 | $ret .= $this->convert_text($n->outertext()); 408 | } 409 | } 410 | 411 | if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) { 412 | $ret .= 'tag . '>'; 413 | } 414 | 415 | return $ret; 416 | } 417 | 418 | function text() 419 | { 420 | if (isset($this->_[HDOM_INFO_INNER])) { 421 | return $this->_[HDOM_INFO_INNER]; 422 | } 423 | 424 | switch ($this->nodetype) { 425 | case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 426 | case HDOM_TYPE_COMMENT: return ''; 427 | case HDOM_TYPE_UNKNOWN: return ''; 428 | } 429 | 430 | if (strcasecmp($this->tag, 'script') === 0) { return ''; } 431 | if (strcasecmp($this->tag, 'style') === 0) { return ''; } 432 | 433 | $ret = ''; 434 | 435 | // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed 436 | // for some span tags, and some p tags) $this->nodes is set to NULL. 437 | // NOTE: This indicates that there is a problem where it's set to NULL 438 | // without a clear happening. 439 | // WHY is this happening? 440 | if (!is_null($this->nodes)) { 441 | foreach ($this->nodes as $n) { 442 | // Start paragraph after a blank line 443 | if ($n->tag === 'p') { 444 | $ret = trim($ret) . "\n\n"; 445 | } 446 | 447 | $ret .= $this->convert_text($n->text()); 448 | 449 | // If this node is a span... add a space at the end of it so 450 | // multiple spans don't run into each other. This is plaintext 451 | // after all. 452 | if ($n->tag === 'span') { 453 | $ret .= $this->dom->default_span_text; 454 | } 455 | } 456 | } 457 | return $ret; 458 | } 459 | 460 | function xmltext() 461 | { 462 | $ret = $this->innertext(); 463 | $ret = str_ireplace('', '', $ret); 465 | return $ret; 466 | } 467 | 468 | function makeup() 469 | { 470 | // text, comment, unknown 471 | if (isset($this->_[HDOM_INFO_TEXT])) { 472 | return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 473 | } 474 | 475 | $ret = '<' . $this->tag; 476 | $i = -1; 477 | 478 | foreach ($this->attr as $key => $val) { 479 | ++$i; 480 | 481 | // skip removed attribute 482 | if ($val === null || $val === false) { continue; } 483 | 484 | $ret .= $this->_[HDOM_INFO_SPACE][$i][0]; 485 | 486 | //no value attr: nowrap, checked selected... 487 | if ($val === true) { 488 | $ret .= $key; 489 | } else { 490 | switch ($this->_[HDOM_INFO_QUOTE][$i]) 491 | { 492 | case HDOM_QUOTE_DOUBLE: $quote = '"'; break; 493 | case HDOM_QUOTE_SINGLE: $quote = '\''; break; 494 | default: $quote = ''; 495 | } 496 | 497 | $ret .= $key 498 | . $this->_[HDOM_INFO_SPACE][$i][1] 499 | . '=' 500 | . $this->_[HDOM_INFO_SPACE][$i][2] 501 | . $quote 502 | . $val 503 | . $quote; 504 | } 505 | } 506 | 507 | $ret = $this->dom->restore_noise($ret); 508 | return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; 509 | } 510 | 511 | function find($selector, $idx = null, $lowercase = false) 512 | { 513 | $selectors = $this->parse_selector($selector); 514 | if (($count = count($selectors)) === 0) { return array(); } 515 | $found_keys = array(); 516 | 517 | // find each selector 518 | for ($c = 0; $c < $count; ++$c) { 519 | // The change on the below line was documented on the sourceforge 520 | // code tracker id 2788009 521 | // used to be: if (($levle=count($selectors[0]))===0) return array(); 522 | if (($levle = count($selectors[$c])) === 0) { return array(); } 523 | if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); } 524 | 525 | $head = array($this->_[HDOM_INFO_BEGIN] => 1); 526 | $cmd = ' '; // Combinator 527 | 528 | // handle descendant selectors, no recursive! 529 | for ($l = 0; $l < $levle; ++$l) { 530 | $ret = array(); 531 | 532 | foreach ($head as $k => $v) { 533 | $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k]; 534 | //PaperG - Pass this optional parameter on to the seek function. 535 | $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase); 536 | } 537 | 538 | $head = $ret; 539 | $cmd = $selectors[$c][$l][4]; // Next Combinator 540 | } 541 | 542 | foreach ($head as $k => $v) { 543 | if (!isset($found_keys[$k])) { 544 | $found_keys[$k] = 1; 545 | } 546 | } 547 | } 548 | 549 | // sort keys 550 | ksort($found_keys); 551 | 552 | $found = array(); 553 | foreach ($found_keys as $k => $v) { 554 | $found[] = $this->dom->nodes[$k]; 555 | } 556 | 557 | // return nth-element or array 558 | if (is_null($idx)) { return $found; } 559 | elseif ($idx < 0) { $idx = count($found) + $idx; } 560 | return (isset($found[$idx])) ? $found[$idx] : null; 561 | } 562 | 563 | protected function seek($selector, &$ret, $parent_cmd, $lowercase = false) 564 | { 565 | global $debug_object; 566 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 567 | 568 | list($tag, $id, $class, $attributes, $cmb) = $selector; 569 | $nodes = array(); 570 | 571 | if ($parent_cmd === ' ') { // Descendant Combinator 572 | // Find parent closing tag if the current element doesn't have a closing 573 | // tag (i.e. void element) 574 | $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; 575 | if ($end == 0) { 576 | $parent = $this->parent; 577 | while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) { 578 | $end -= 1; 579 | $parent = $parent->parent; 580 | } 581 | $end += $parent->_[HDOM_INFO_END]; 582 | } 583 | 584 | // Get list of target nodes 585 | $nodes_start = $this->_[HDOM_INFO_BEGIN] + 1; 586 | $nodes_count = $end - $nodes_start; 587 | $nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true); 588 | } elseif ($parent_cmd === '>') { // Child Combinator 589 | $nodes = $this->children; 590 | } elseif ($parent_cmd === '+' 591 | && $this->parent 592 | && in_array($this, $this->parent->children)) { // Next-Sibling Combinator 593 | $index = array_search($this, $this->parent->children, true) + 1; 594 | if ($index < count($this->parent->children)) 595 | $nodes[] = $this->parent->children[$index]; 596 | } elseif ($parent_cmd === '~' 597 | && $this->parent 598 | && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator 599 | $index = array_search($this, $this->parent->children, true); 600 | $nodes = array_slice($this->parent->children, $index); 601 | } 602 | 603 | // Go throgh each element starting at this element until the end tag 604 | // Note: If this element is a void tag, any previous void element is 605 | // skipped. 606 | foreach($nodes as $node) { 607 | $pass = true; 608 | 609 | // Skip root nodes 610 | if(!$node->parent) { 611 | $pass = false; 612 | } 613 | 614 | // Handle 'text' selector 615 | if($pass && $tag === 'text' && $node->tag === 'text') { 616 | $ret[array_search($node, $this->dom->nodes, true)] = 1; 617 | unset($node); 618 | continue; 619 | } 620 | 621 | // Skip if node isn't a child node (i.e. text nodes) 622 | if($pass && !in_array($node, $node->parent->children, true)) { 623 | $pass = false; 624 | } 625 | 626 | // Skip if tag doesn't match 627 | if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') { 628 | $pass = false; 629 | } 630 | 631 | // Skip if ID doesn't exist 632 | if ($pass && $id !== '' && !isset($node->attr['id'])) { 633 | $pass = false; 634 | } 635 | 636 | // Check if ID matches 637 | if ($pass && $id !== '' && isset($node->attr['id'])) { 638 | // Note: Only consider the first ID (as browsers do) 639 | $node_id = explode(' ', trim($node->attr['id']))[0]; 640 | 641 | if($id !== $node_id) { $pass = false; } 642 | } 643 | 644 | // Check if all class(es) exist 645 | if ($pass && $class !== '' && is_array($class) && !empty($class)) { 646 | if (isset($node->attr['class'])) { 647 | $node_classes = explode(' ', $node->attr['class']); 648 | 649 | if ($lowercase) { 650 | $node_classes = array_map('strtolower', $node_classes); 651 | } 652 | 653 | foreach($class as $c) { 654 | if(!in_array($c, $node_classes)) { 655 | $pass = false; 656 | break; 657 | } 658 | } 659 | } else { 660 | $pass = false; 661 | } 662 | } 663 | 664 | // Check attributes 665 | if ($pass 666 | && $attributes !== '' 667 | && is_array($attributes) 668 | && !empty($attributes)) { 669 | foreach($attributes as $a) { 670 | list ( 671 | $att_name, 672 | $att_expr, 673 | $att_val, 674 | $att_inv, 675 | $att_case_sensitivity 676 | ) = $a; 677 | 678 | // Handle indexing attributes (i.e. "[2]") 679 | /** 680 | * Note: This is not supported by the CSS Standard but adds 681 | * the ability to select items compatible to XPath (i.e. 682 | * the 3rd element within it's parent). 683 | * 684 | * Note: This doesn't conflict with the CSS Standard which 685 | * doesn't work on numeric attributes anyway. 686 | */ 687 | if (is_numeric($att_name) 688 | && $att_expr === '' 689 | && $att_val === '') { 690 | $count = 0; 691 | 692 | // Find index of current element in parent 693 | foreach ($node->parent->children as $c) { 694 | if ($c->tag === $node->tag) ++$count; 695 | if ($c === $node) break; 696 | } 697 | 698 | // If this is the correct node, continue with next 699 | // attribute 700 | if ($count === (int)$att_name) continue; 701 | } 702 | 703 | // Check attribute availability 704 | if ($att_inv) { // Attribute should NOT be set 705 | if (isset($node->attr[$att_name])) { 706 | $pass = false; 707 | break; 708 | } 709 | } else { // Attribute should be set 710 | // todo: "plaintext" is not a valid CSS selector! 711 | if ($att_name !== 'plaintext' 712 | && !isset($node->attr[$att_name])) { 713 | $pass = false; 714 | break; 715 | } 716 | } 717 | 718 | // Continue with next attribute if expression isn't defined 719 | if ($att_expr === '') continue; 720 | 721 | // If they have told us that this is a "plaintext" 722 | // search then we want the plaintext of the node - right? 723 | // todo "plaintext" is not a valid CSS selector! 724 | if ($att_name === 'plaintext') { 725 | $nodeKeyValue = $node->text(); 726 | } else { 727 | $nodeKeyValue = $node->attr[$att_name]; 728 | } 729 | 730 | if (is_object($debug_object)) { 731 | $debug_object->debug_log(2, 732 | 'testing node: ' 733 | . $node->tag 734 | . ' for attribute: ' 735 | . $att_name 736 | . $att_expr 737 | . $att_val 738 | . ' where nodes value is: ' 739 | . $nodeKeyValue 740 | ); 741 | } 742 | 743 | // If lowercase is set, do a case insensitive test of 744 | // the value of the selector. 745 | if ($lowercase) { 746 | $check = $this->match( 747 | $att_expr, 748 | strtolower($att_val), 749 | strtolower($nodeKeyValue), 750 | $att_case_sensitivity 751 | ); 752 | } else { 753 | $check = $this->match( 754 | $att_expr, 755 | $att_val, 756 | $nodeKeyValue, 757 | $att_case_sensitivity 758 | ); 759 | } 760 | 761 | if (is_object($debug_object)) { 762 | $debug_object->debug_log(2, 763 | 'after match: ' 764 | . ($check ? 'true' : 'false') 765 | ); 766 | } 767 | 768 | if (!$check) { 769 | $pass = false; 770 | break; 771 | } 772 | } 773 | } 774 | 775 | // Found a match. Add to list and clear node 776 | if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1; 777 | unset($node); 778 | } 779 | // It's passed by reference so this is actually what this function returns. 780 | if (is_object($debug_object)) { 781 | $debug_object->debug_log(1, 'EXIT - ret: ', $ret); 782 | } 783 | } 784 | 785 | protected function match($exp, $pattern, $value, $case_sensitivity) 786 | { 787 | global $debug_object; 788 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 789 | 790 | if ($case_sensitivity === 'i') { 791 | $pattern = strtolower($pattern); 792 | $value = strtolower($value); 793 | } 794 | 795 | switch ($exp) { 796 | case '=': 797 | return ($value === $pattern); 798 | case '!=': 799 | return ($value !== $pattern); 800 | case '^=': 801 | return preg_match('/^' . preg_quote($pattern, '/') . '/', $value); 802 | case '$=': 803 | return preg_match('/' . preg_quote($pattern, '/') . '$/', $value); 804 | case '*=': 805 | return preg_match('/' . preg_quote($pattern, '/') . '/', $value); 806 | case '|=': 807 | /** 808 | * [att|=val] 809 | * 810 | * Represents an element with the att attribute, its value 811 | * either being exactly "val" or beginning with "val" 812 | * immediately followed by "-" (U+002D). 813 | */ 814 | return strpos($value, $pattern) === 0; 815 | case '~=': 816 | /** 817 | * [att~=val] 818 | * 819 | * Represents an element with the att attribute whose value is a 820 | * whitespace-separated list of words, one of which is exactly 821 | * "val". If "val" contains whitespace, it will never represent 822 | * anything (since the words are separated by spaces). Also if 823 | * "val" is the empty string, it will never represent anything. 824 | */ 825 | return in_array($pattern, explode(' ', trim($value)), true); 826 | } 827 | return false; 828 | } 829 | 830 | protected function parse_selector($selector_string) 831 | { 832 | global $debug_object; 833 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 834 | 835 | /** 836 | * Pattern of CSS selectors, modified from mootools (https://mootools.net/) 837 | * 838 | * Paperg: Add the colon to the attribute, so that it properly finds 839 | * like google does. 840 | * 841 | * Note: if you try to look at this attribute, you MUST use getAttribute 842 | * since $dom->x:y will fail the php syntax check. 843 | * 844 | * Notice the \[ starting the attribute? and the @? following? This 845 | * implies that an attribute can begin with an @ sign that is not 846 | * captured. This implies that an html attribute specifier may start 847 | * with an @ sign that is NOT captured by the expression. Farther study 848 | * is required to determine of this should be documented or removed. 849 | * 850 | * Matches selectors in this order: 851 | * 852 | * [0] - full match 853 | * 854 | * [1] - tag name 855 | * ([\w:\*-]*) 856 | * Matches the tag name consisting of zero or more words, colons, 857 | * asterisks and hyphens. 858 | * 859 | * [2] - id name 860 | * (?:\#([\w-]+)) 861 | * Optionally matches a id name, consisting of an "#" followed by 862 | * the id name (one or more words and hyphens). 863 | * 864 | * [3] - class names (including dots) 865 | * (?:\.([\w\.-]+))? 866 | * Optionally matches a list of classs, consisting of an "." 867 | * followed by the class name (one or more words and hyphens) 868 | * where multiple classes can be chained (i.e. ".foo.bar.baz") 869 | * 870 | * [4] - attributes 871 | * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)? 872 | * Optionally matches the attributes list 873 | * 874 | * [5] - separator 875 | * ([\/, >+~]+) 876 | * Matches the selector list separator 877 | */ 878 | // phpcs:ignore Generic.Files.LineLength 879 | $pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is"; 880 | 881 | preg_match_all( 882 | $pattern, 883 | trim($selector_string) . ' ', // Add final ' ' as pseudo separator 884 | $matches, 885 | PREG_SET_ORDER 886 | ); 887 | 888 | if (is_object($debug_object)) { 889 | $debug_object->debug_log(2, 'Matches Array: ', $matches); 890 | } 891 | 892 | $selectors = array(); 893 | $result = array(); 894 | 895 | foreach ($matches as $m) { 896 | $m[0] = trim($m[0]); 897 | 898 | // Skip NoOps 899 | if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; } 900 | 901 | // Convert to lowercase 902 | if ($this->dom->lowercase) { 903 | $m[1] = strtolower($m[1]); 904 | } 905 | 906 | // Extract classes 907 | if ($m[3] !== '') { $m[3] = explode('.', $m[3]); } 908 | 909 | /* Extract attributes (pattern based on the pattern above!) 910 | 911 | * [0] - full match 912 | * [1] - attribute name 913 | * [2] - attribute expression 914 | * [3] - attribute value 915 | * [4] - case sensitivity 916 | * 917 | * Note: Attributes can be negated with a "!" prefix to their name 918 | */ 919 | if($m[4] !== '') { 920 | preg_match_all( 921 | "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is", 922 | trim($m[4]), 923 | $attributes, 924 | PREG_SET_ORDER 925 | ); 926 | 927 | // Replace element by array 928 | $m[4] = array(); 929 | 930 | foreach($attributes as $att) { 931 | // Skip empty matches 932 | if(trim($att[0]) === '') { continue; } 933 | 934 | $inverted = (isset($att[1][0]) && $att[1][0] === '!'); 935 | $m[4][] = array( 936 | $inverted ? substr($att[1], 1) : $att[1], // Name 937 | (isset($att[2])) ? $att[2] : '', // Expression 938 | (isset($att[3])) ? $att[3] : '', // Value 939 | $inverted, // Inverted Flag 940 | (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity 941 | ); 942 | } 943 | } 944 | 945 | // Sanitize Separator 946 | if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator 947 | $m[5] = ' '; 948 | } else { // Other Separator 949 | $m[5] = trim($m[5]); 950 | } 951 | 952 | // Clear Separator if it's a Selector List 953 | if ($is_list = ($m[5] === ',')) { $m[5] = ''; } 954 | 955 | // Remove full match before adding to results 956 | array_shift($m); 957 | $result[] = $m; 958 | 959 | if ($is_list) { // Selector List 960 | $selectors[] = $result; 961 | $result = array(); 962 | } 963 | } 964 | 965 | if (count($result) > 0) { $selectors[] = $result; } 966 | return $selectors; 967 | } 968 | 969 | function __get($name) 970 | { 971 | if (isset($this->attr[$name])) { 972 | return $this->convert_text($this->attr[$name]); 973 | } 974 | switch ($name) { 975 | case 'outertext': return $this->outertext(); 976 | case 'innertext': return $this->innertext(); 977 | case 'plaintext': return $this->text(); 978 | case 'xmltext': return $this->xmltext(); 979 | default: return array_key_exists($name, $this->attr); 980 | } 981 | } 982 | 983 | function __set($name, $value) 984 | { 985 | global $debug_object; 986 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 987 | 988 | switch ($name) { 989 | case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; 990 | case 'innertext': 991 | if (isset($this->_[HDOM_INFO_TEXT])) { 992 | return $this->_[HDOM_INFO_TEXT] = $value; 993 | } 994 | return $this->_[HDOM_INFO_INNER] = $value; 995 | } 996 | 997 | if (!isset($this->attr[$name])) { 998 | $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); 999 | $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; 1000 | } 1001 | 1002 | $this->attr[$name] = $value; 1003 | } 1004 | 1005 | function __isset($name) 1006 | { 1007 | switch ($name) { 1008 | case 'outertext': return true; 1009 | case 'innertext': return true; 1010 | case 'plaintext': return true; 1011 | } 1012 | //no value attr: nowrap, checked selected... 1013 | return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]); 1014 | } 1015 | 1016 | function __unset($name) 1017 | { 1018 | if (isset($this->attr[$name])) { unset($this->attr[$name]); } 1019 | } 1020 | 1021 | function convert_text($text) 1022 | { 1023 | global $debug_object; 1024 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 1025 | 1026 | $converted_text = $text; 1027 | 1028 | $sourceCharset = ''; 1029 | $targetCharset = ''; 1030 | 1031 | if ($this->dom) { 1032 | $sourceCharset = strtoupper($this->dom->_charset); 1033 | $targetCharset = strtoupper($this->dom->_target_charset); 1034 | } 1035 | 1036 | if (is_object($debug_object)) { 1037 | $debug_object->debug_log(3, 1038 | 'source charset: ' 1039 | . $sourceCharset 1040 | . ' target charaset: ' 1041 | . $targetCharset 1042 | ); 1043 | } 1044 | 1045 | if (!empty($sourceCharset) 1046 | && !empty($targetCharset) 1047 | && (strcasecmp($sourceCharset, $targetCharset) != 0)) { 1048 | // Check if the reported encoding could have been incorrect and the text is actually already UTF-8 1049 | if ((strcasecmp($targetCharset, 'UTF-8') == 0) 1050 | && ($this->is_utf8($text))) { 1051 | $converted_text = $text; 1052 | } else { 1053 | $converted_text = iconv($sourceCharset, $targetCharset, $text); 1054 | } 1055 | } 1056 | 1057 | // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. 1058 | if ($targetCharset === 'UTF-8') { 1059 | if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") { 1060 | $converted_text = substr($converted_text, 3); 1061 | } 1062 | 1063 | if (substr($converted_text, -3) === "\xef\xbb\xbf") { 1064 | $converted_text = substr($converted_text, 0, -3); 1065 | } 1066 | } 1067 | 1068 | return $converted_text; 1069 | } 1070 | 1071 | static function is_utf8($str) 1072 | { 1073 | $c = 0; $b = 0; 1074 | $bits = 0; 1075 | $len = strlen($str); 1076 | for($i = 0; $i < $len; $i++) { 1077 | $c = ord($str[$i]); 1078 | if($c > 128) { 1079 | if(($c >= 254)) { return false; } 1080 | elseif($c >= 252) { $bits = 6; } 1081 | elseif($c >= 248) { $bits = 5; } 1082 | elseif($c >= 240) { $bits = 4; } 1083 | elseif($c >= 224) { $bits = 3; } 1084 | elseif($c >= 192) { $bits = 2; } 1085 | else { return false; } 1086 | if(($i + $bits) > $len) { return false; } 1087 | while($bits > 1) { 1088 | $i++; 1089 | $b = ord($str[$i]); 1090 | if($b < 128 || $b > 191) { return false; } 1091 | $bits--; 1092 | } 1093 | } 1094 | } 1095 | return true; 1096 | } 1097 | 1098 | function get_display_size() 1099 | { 1100 | global $debug_object; 1101 | 1102 | $width = -1; 1103 | $height = -1; 1104 | 1105 | if ($this->tag !== 'img') { 1106 | return false; 1107 | } 1108 | 1109 | // See if there is aheight or width attribute in the tag itself. 1110 | if (isset($this->attr['width'])) { 1111 | $width = $this->attr['width']; 1112 | } 1113 | 1114 | if (isset($this->attr['height'])) { 1115 | $height = $this->attr['height']; 1116 | } 1117 | 1118 | // Now look for an inline style. 1119 | if (isset($this->attr['style'])) { 1120 | // Thanks to user gnarf from stackoverflow for this regular expression. 1121 | $attributes = array(); 1122 | 1123 | preg_match_all( 1124 | '/([\w-]+)\s*:\s*([^;]+)\s*;?/', 1125 | $this->attr['style'], 1126 | $matches, 1127 | PREG_SET_ORDER 1128 | ); 1129 | 1130 | foreach ($matches as $match) { 1131 | $attributes[$match[1]] = $match[2]; 1132 | } 1133 | 1134 | // If there is a width in the style attributes: 1135 | if (isset($attributes['width']) && $width == -1) { 1136 | // check that the last two characters are px (pixels) 1137 | if (strtolower(substr($attributes['width'], -2)) === 'px') { 1138 | $proposed_width = substr($attributes['width'], 0, -2); 1139 | // Now make sure that it's an integer and not something stupid. 1140 | if (filter_var($proposed_width, FILTER_VALIDATE_INT)) { 1141 | $width = $proposed_width; 1142 | } 1143 | } 1144 | } 1145 | 1146 | // If there is a width in the style attributes: 1147 | if (isset($attributes['height']) && $height == -1) { 1148 | // check that the last two characters are px (pixels) 1149 | if (strtolower(substr($attributes['height'], -2)) == 'px') { 1150 | $proposed_height = substr($attributes['height'], 0, -2); 1151 | // Now make sure that it's an integer and not something stupid. 1152 | if (filter_var($proposed_height, FILTER_VALIDATE_INT)) { 1153 | $height = $proposed_height; 1154 | } 1155 | } 1156 | } 1157 | 1158 | } 1159 | 1160 | // Future enhancement: 1161 | // Look in the tag to see if there is a class or id specified that has 1162 | // a height or width attribute to it. 1163 | 1164 | // Far future enhancement 1165 | // Look at all the parent tags of this image to see if they specify a 1166 | // class or id that has an img selector that specifies a height or width 1167 | // Note that in this case, the class or id will have the img subselector 1168 | // for it to apply to the image. 1169 | 1170 | // ridiculously far future development 1171 | // If the class or id is specified in a SEPARATE css file thats not on 1172 | // the page, go get it and do what we were just doing for the ones on 1173 | // the page. 1174 | 1175 | $result = array( 1176 | 'height' => $height, 1177 | 'width' => $width 1178 | ); 1179 | 1180 | return $result; 1181 | } 1182 | 1183 | function save($filepath = '') 1184 | { 1185 | $ret = $this->outertext(); 1186 | 1187 | if ($filepath !== '') { 1188 | file_put_contents($filepath, $ret, LOCK_EX); 1189 | } 1190 | 1191 | return $ret; 1192 | } 1193 | 1194 | function addClass($class) 1195 | { 1196 | if (is_string($class)) { 1197 | $class = explode(' ', $class); 1198 | } 1199 | 1200 | if (is_array($class)) { 1201 | foreach($class as $c) { 1202 | if (isset($this->class)) { 1203 | if ($this->hasClass($c)) { 1204 | continue; 1205 | } else { 1206 | $this->class .= ' ' . $c; 1207 | } 1208 | } else { 1209 | $this->class = $c; 1210 | } 1211 | } 1212 | } else { 1213 | if (is_object($debug_object)) { 1214 | $debug_object->debug_log(2, 'Invalid type: ', gettype($class)); 1215 | } 1216 | } 1217 | } 1218 | 1219 | function hasClass($class) 1220 | { 1221 | if (is_string($class)) { 1222 | if (isset($this->class)) { 1223 | return in_array($class, explode(' ', $this->class), true); 1224 | } 1225 | } else { 1226 | if (is_object($debug_object)) { 1227 | $debug_object->debug_log(2, 'Invalid type: ', gettype($class)); 1228 | } 1229 | } 1230 | 1231 | return false; 1232 | } 1233 | 1234 | function removeClass($class = null) 1235 | { 1236 | if (!isset($this->class)) { 1237 | return; 1238 | } 1239 | 1240 | if (is_null($class)) { 1241 | $this->removeAttribute('class'); 1242 | return; 1243 | } 1244 | 1245 | if (is_string($class)) { 1246 | $class = explode(' ', $class); 1247 | } 1248 | 1249 | if (is_array($class)) { 1250 | $class = array_diff(explode(' ', $this->class), $class); 1251 | if (empty($class)) { 1252 | $this->removeAttribute('class'); 1253 | } else { 1254 | $this->class = implode(' ', $class); 1255 | } 1256 | } 1257 | } 1258 | 1259 | function getAllAttributes() 1260 | { 1261 | return $this->attr; 1262 | } 1263 | 1264 | function getAttribute($name) 1265 | { 1266 | return $this->__get($name); 1267 | } 1268 | 1269 | function setAttribute($name, $value) 1270 | { 1271 | $this->__set($name, $value); 1272 | } 1273 | 1274 | function hasAttribute($name) 1275 | { 1276 | return $this->__isset($name); 1277 | } 1278 | 1279 | function removeAttribute($name) 1280 | { 1281 | $this->__set($name, null); 1282 | } 1283 | 1284 | function remove() 1285 | { 1286 | if ($this->parent) { 1287 | $this->parent->removeChild($this); 1288 | } 1289 | } 1290 | 1291 | function removeChild($node) 1292 | { 1293 | $nidx = array_search($node, $this->nodes, true); 1294 | $cidx = array_search($node, $this->children, true); 1295 | $didx = array_search($node, $this->dom->nodes, true); 1296 | 1297 | if ($nidx !== false && $cidx !== false && $didx !== false) { 1298 | 1299 | foreach($node->children as $child) { 1300 | $node->removeChild($child); 1301 | } 1302 | 1303 | foreach($node->nodes as $entity) { 1304 | $enidx = array_search($entity, $node->nodes, true); 1305 | $edidx = array_search($entity, $node->dom->nodes, true); 1306 | 1307 | if ($enidx !== false && $edidx !== false) { 1308 | unset($node->nodes[$enidx]); 1309 | unset($node->dom->nodes[$edidx]); 1310 | } 1311 | } 1312 | 1313 | unset($this->nodes[$nidx]); 1314 | unset($this->children[$cidx]); 1315 | unset($this->dom->nodes[$didx]); 1316 | 1317 | $node->clear(); 1318 | 1319 | } 1320 | } 1321 | 1322 | function getElementById($id) 1323 | { 1324 | return $this->find("#$id", 0); 1325 | } 1326 | 1327 | function getElementsById($id, $idx = null) 1328 | { 1329 | return $this->find("#$id", $idx); 1330 | } 1331 | 1332 | function getElementByTagName($name) 1333 | { 1334 | return $this->find($name, 0); 1335 | } 1336 | 1337 | function getElementsByTagName($name, $idx = null) 1338 | { 1339 | return $this->find($name, $idx); 1340 | } 1341 | 1342 | function parentNode() 1343 | { 1344 | return $this->parent(); 1345 | } 1346 | 1347 | function childNodes($idx = -1) 1348 | { 1349 | return $this->children($idx); 1350 | } 1351 | 1352 | function firstChild() 1353 | { 1354 | return $this->first_child(); 1355 | } 1356 | 1357 | function lastChild() 1358 | { 1359 | return $this->last_child(); 1360 | } 1361 | 1362 | function nextSibling() 1363 | { 1364 | return $this->next_sibling(); 1365 | } 1366 | 1367 | function previousSibling() 1368 | { 1369 | return $this->prev_sibling(); 1370 | } 1371 | 1372 | function hasChildNodes() 1373 | { 1374 | return $this->has_child(); 1375 | } 1376 | 1377 | function nodeName() 1378 | { 1379 | return $this->tag; 1380 | } 1381 | 1382 | function appendChild($node) 1383 | { 1384 | $node->parent($this); 1385 | return $node; 1386 | } 1387 | 1388 | } 1389 | 1390 | class simple_html_dom 1391 | { 1392 | public $root = null; 1393 | public $nodes = array(); 1394 | public $callback = null; 1395 | public $lowercase = false; 1396 | public $original_size; 1397 | public $size; 1398 | 1399 | protected $pos; 1400 | protected $doc; 1401 | protected $char; 1402 | 1403 | protected $cursor; 1404 | protected $parent; 1405 | protected $noise = array(); 1406 | protected $token_blank = " \t\r\n"; 1407 | protected $token_equal = ' =/>'; 1408 | protected $token_slash = " />\r\n\t"; 1409 | protected $token_attr = ' >'; 1410 | 1411 | public $_charset = ''; 1412 | public $_target_charset = ''; 1413 | 1414 | protected $default_br_text = ''; 1415 | 1416 | public $default_span_text = ''; 1417 | 1418 | protected $self_closing_tags = array( 1419 | 'area' => 1, 1420 | 'base' => 1, 1421 | 'br' => 1, 1422 | 'col' => 1, 1423 | 'embed' => 1, 1424 | 'hr' => 1, 1425 | 'img' => 1, 1426 | 'input' => 1, 1427 | 'link' => 1, 1428 | 'meta' => 1, 1429 | 'param' => 1, 1430 | 'source' => 1, 1431 | 'track' => 1, 1432 | 'wbr' => 1 1433 | ); 1434 | protected $block_tags = array( 1435 | 'body' => 1, 1436 | 'div' => 1, 1437 | 'form' => 1, 1438 | 'root' => 1, 1439 | 'span' => 1, 1440 | 'table' => 1 1441 | ); 1442 | protected $optional_closing_tags = array( 1443 | // Not optional, see 1444 | // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element 1445 | 'b' => array('b' => 1), 1446 | 'dd' => array('dd' => 1, 'dt' => 1), 1447 | // Not optional, see 1448 | // https://www.w3.org/TR/html/grouping-content.html#the-dl-element 1449 | 'dl' => array('dd' => 1, 'dt' => 1), 1450 | 'dt' => array('dd' => 1, 'dt' => 1), 1451 | 'li' => array('li' => 1), 1452 | 'optgroup' => array('optgroup' => 1, 'option' => 1), 1453 | 'option' => array('optgroup' => 1, 'option' => 1), 1454 | 'p' => array('p' => 1), 1455 | 'rp' => array('rp' => 1, 'rt' => 1), 1456 | 'rt' => array('rp' => 1, 'rt' => 1), 1457 | 'td' => array('td' => 1, 'th' => 1), 1458 | 'th' => array('td' => 1, 'th' => 1), 1459 | 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1), 1460 | ); 1461 | 1462 | function __construct( 1463 | $str = null, 1464 | $lowercase = true, 1465 | $forceTagsClosed = true, 1466 | $target_charset = DEFAULT_TARGET_CHARSET, 1467 | $stripRN = true, 1468 | $defaultBRText = DEFAULT_BR_TEXT, 1469 | $defaultSpanText = DEFAULT_SPAN_TEXT, 1470 | $options = 0) 1471 | { 1472 | if ($str) { 1473 | if (preg_match('/^http:\/\//i', $str) || is_file($str)) { 1474 | $this->load_file($str); 1475 | } else { 1476 | $this->load( 1477 | $str, 1478 | $lowercase, 1479 | $stripRN, 1480 | $defaultBRText, 1481 | $defaultSpanText, 1482 | $options 1483 | ); 1484 | } 1485 | } 1486 | // Forcing tags to be closed implies that we don't trust the html, but 1487 | // it can lead to parsing errors if we SHOULD trust the html. 1488 | if (!$forceTagsClosed) { 1489 | $this->optional_closing_array = array(); 1490 | } 1491 | 1492 | $this->_target_charset = $target_charset; 1493 | } 1494 | 1495 | function __destruct() 1496 | { 1497 | $this->clear(); 1498 | } 1499 | 1500 | function load( 1501 | $str, 1502 | $lowercase = true, 1503 | $stripRN = true, 1504 | $defaultBRText = DEFAULT_BR_TEXT, 1505 | $defaultSpanText = DEFAULT_SPAN_TEXT, 1506 | $options = 0) 1507 | { 1508 | global $debug_object; 1509 | 1510 | // prepare 1511 | $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText); 1512 | 1513 | // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 1514 | // Script tags removal now preceeds style tag removal. 1515 | // strip out