├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── composer.json
└── src
└── KubAT
└── PhpSimple
├── HtmlDomParser.php
└── lib
└── simple_html_dom.php
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | I'm not the maintainer of the PHP Simple HTML DOM Parser project (https://sourceforge.net/projects/simplehtmldom/)
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Jakub Stawowy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | php-simple-html-dom-parser
2 | ==========================
3 |
4 | Version 1.9.1 - PHP 7.3 compatible
5 | PHP Simple HTML DOM Parser changelog: https://sourceforge.net/projects/simplehtmldom/files/simplehtmldom/1.9.1/
6 |
7 |
8 | Install
9 | -------
10 |
11 | ```
12 | composer require kub-at/php-simple-html-dom-parser
13 | ```
14 |
15 | Usage
16 | -----
17 |
18 | ```php
19 | use KubAT\PhpSimple\HtmlDomParser;
20 |
21 | ...
22 | $dom = HtmlDomParser::str_get_html( $str );
23 | or
24 | $dom = HtmlDomParser::file_get_html( $file_name );
25 |
26 | $elems = $dom->find($elem_name);
27 | ...
28 |
29 | ```
30 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "kub-at/php-simple-html-dom-parser",
3 | "description": "PHP Simple HTML DOM Parser with namespace and PHP 7.3 compatible",
4 | "keywords": ["html", "dom", "simple"],
5 | "homepage": "http://simplehtmldom.sourceforge.net/",
6 | "type": "library",
7 | "license": "MIT",
8 | "authors": [
9 | {
10 | "name": "S.C. Chen",
11 | "email": "me578022@gmail.com"
12 | },
13 | {
14 | "name": "Jakub Stawowy",
15 | "email": "Kub-AT@users.noreply.github.com"
16 | }
17 | ],
18 | "require": {
19 | "php": ">=5.3.2"
20 | },
21 | "autoload": {
22 | "psr-0": { "KubAT\\PhpSimple\\HtmlDomParser": "src/" }
23 | }
24 | }
--------------------------------------------------------------------------------
/src/KubAT/PhpSimple/HtmlDomParser.php:
--------------------------------------------------------------------------------
1 | $maxLen) {
89 | $dom->clear();
90 | return false;
91 | }
92 |
93 | return $dom->load($contents, $lowercase, $stripRN);
94 | }
95 |
96 | function str_get_html(
97 | $str,
98 | $lowercase = true,
99 | $forceTagsClosed = true,
100 | $target_charset = DEFAULT_TARGET_CHARSET,
101 | $stripRN = true,
102 | $defaultBRText = DEFAULT_BR_TEXT,
103 | $defaultSpanText = DEFAULT_SPAN_TEXT)
104 | {
105 | $dom = new simple_html_dom(
106 | null,
107 | $lowercase,
108 | $forceTagsClosed,
109 | $target_charset,
110 | $stripRN,
111 | $defaultBRText,
112 | $defaultSpanText
113 | );
114 |
115 | if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
116 | $dom->clear();
117 | return false;
118 | }
119 |
120 | return $dom->load($str, $lowercase, $stripRN);
121 | }
122 |
123 | function dump_html_tree($node, $show_attr = true, $deep = 0)
124 | {
125 | $node->dump($node);
126 | }
127 |
128 | class simple_html_dom_node
129 | {
130 | public $nodetype = HDOM_TYPE_TEXT;
131 | public $tag = 'text';
132 | public $attr = array();
133 | public $children = array();
134 | public $nodes = array();
135 | public $parent = null;
136 | public $_ = array();
137 | public $tag_start = 0;
138 | private $dom = null;
139 |
140 | function __construct($dom)
141 | {
142 | $this->dom = $dom;
143 | $dom->nodes[] = $this;
144 | }
145 |
146 | function __destruct()
147 | {
148 | $this->clear();
149 | }
150 |
151 | function __toString()
152 | {
153 | return $this->outertext();
154 | }
155 |
156 | function clear()
157 | {
158 | $this->dom = null;
159 | $this->nodes = null;
160 | $this->parent = null;
161 | $this->children = null;
162 | }
163 |
164 | function dump($show_attr = true, $depth = 0)
165 | {
166 | echo str_repeat("\t", $depth) . $this->tag;
167 |
168 | if ($show_attr && count($this->attr) > 0) {
169 | echo '(';
170 | foreach ($this->attr as $k => $v) {
171 | echo "[$k]=>\"$v\", ";
172 | }
173 | echo ')';
174 | }
175 |
176 | echo "\n";
177 |
178 | if ($this->nodes) {
179 | foreach ($this->nodes as $node) {
180 | $node->dump($show_attr, $depth + 1);
181 | }
182 | }
183 | }
184 |
185 | function dump_node($echo = true)
186 | {
187 | $string = $this->tag;
188 |
189 | if (count($this->attr) > 0) {
190 | $string .= '(';
191 | foreach ($this->attr as $k => $v) {
192 | $string .= "[$k]=>\"$v\", ";
193 | }
194 | $string .= ')';
195 | }
196 |
197 | if (count($this->_) > 0) {
198 | $string .= ' $_ (';
199 | foreach ($this->_ as $k => $v) {
200 | if (is_array($v)) {
201 | $string .= "[$k]=>(";
202 | foreach ($v as $k2 => $v2) {
203 | $string .= "[$k2]=>\"$v2\", ";
204 | }
205 | $string .= ')';
206 | } else {
207 | $string .= "[$k]=>\"$v\", ";
208 | }
209 | }
210 | $string .= ')';
211 | }
212 |
213 | if (isset($this->text)) {
214 | $string .= " text: ({$this->text})";
215 | }
216 |
217 | $string .= ' HDOM_INNER_INFO: ';
218 |
219 | if (isset($node->_[HDOM_INFO_INNER])) {
220 | $string .= "'" . $node->_[HDOM_INFO_INNER] . "'";
221 | } else {
222 | $string .= ' NULL ';
223 | }
224 |
225 | $string .= ' children: ' . count($this->children);
226 | $string .= ' nodes: ' . count($this->nodes);
227 | $string .= ' tag_start: ' . $this->tag_start;
228 | $string .= "\n";
229 |
230 | if ($echo) {
231 | echo $string;
232 | return;
233 | } else {
234 | return $string;
235 | }
236 | }
237 |
238 | function parent($parent = null)
239 | {
240 | // I am SURE that this doesn't work properly.
241 | // It fails to unset the current node from it's current parents nodes or
242 | // children list first.
243 | if ($parent !== null) {
244 | $this->parent = $parent;
245 | $this->parent->nodes[] = $this;
246 | $this->parent->children[] = $this;
247 | }
248 |
249 | return $this->parent;
250 | }
251 |
252 | function has_child()
253 | {
254 | return !empty($this->children);
255 | }
256 |
257 | function children($idx = -1)
258 | {
259 | if ($idx === -1) {
260 | return $this->children;
261 | }
262 |
263 | if (isset($this->children[$idx])) {
264 | return $this->children[$idx];
265 | }
266 |
267 | return null;
268 | }
269 |
270 | function first_child()
271 | {
272 | if (count($this->children) > 0) {
273 | return $this->children[0];
274 | }
275 | return null;
276 | }
277 |
278 | function last_child()
279 | {
280 | if (count($this->children) > 0) {
281 | return end($this->children);
282 | }
283 | return null;
284 | }
285 |
286 | function next_sibling()
287 | {
288 | if ($this->parent === null) {
289 | return null;
290 | }
291 |
292 | $idx = array_search($this, $this->parent->children, true);
293 |
294 | if ($idx !== false && isset($this->parent->children[$idx + 1])) {
295 | return $this->parent->children[$idx + 1];
296 | }
297 |
298 | return null;
299 | }
300 |
301 | function prev_sibling()
302 | {
303 | if ($this->parent === null) {
304 | return null;
305 | }
306 |
307 | $idx = array_search($this, $this->parent->children, true);
308 |
309 | if ($idx !== false && $idx > 0) {
310 | return $this->parent->children[$idx - 1];
311 | }
312 |
313 | return null;
314 | }
315 |
316 | function find_ancestor_tag($tag)
317 | {
318 | global $debug_object;
319 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
320 |
321 | if ($this->parent === null) {
322 | return null;
323 | }
324 |
325 | $ancestor = $this->parent;
326 |
327 | while (!is_null($ancestor)) {
328 | if (is_object($debug_object)) {
329 | $debug_object->debug_log(2, 'Current tag is: ' . $ancestor->tag);
330 | }
331 |
332 | if ($ancestor->tag === $tag) {
333 | break;
334 | }
335 |
336 | $ancestor = $ancestor->parent;
337 | }
338 |
339 | return $ancestor;
340 | }
341 |
342 | function innertext()
343 | {
344 | if (isset($this->_[HDOM_INFO_INNER])) {
345 | return $this->_[HDOM_INFO_INNER];
346 | }
347 |
348 | if (isset($this->_[HDOM_INFO_TEXT])) {
349 | return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
350 | }
351 |
352 | $ret = '';
353 |
354 | foreach ($this->nodes as $n) {
355 | $ret .= $n->outertext();
356 | }
357 |
358 | return $ret;
359 | }
360 |
361 | function outertext()
362 | {
363 | global $debug_object;
364 |
365 | if (is_object($debug_object)) {
366 | $text = '';
367 |
368 | if ($this->tag === 'text') {
369 | if (!empty($this->text)) {
370 | $text = ' with text: ' . $this->text;
371 | }
372 | }
373 |
374 | $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
375 | }
376 |
377 | if ($this->tag === 'root') {
378 | return $this->innertext();
379 | }
380 |
381 | // todo: What is the use of this callback? Remove?
382 | if ($this->dom && $this->dom->callback !== null) {
383 | call_user_func_array($this->dom->callback, array($this));
384 | }
385 |
386 | if (isset($this->_[HDOM_INFO_OUTER])) {
387 | return $this->_[HDOM_INFO_OUTER];
388 | }
389 |
390 | if (isset($this->_[HDOM_INFO_TEXT])) {
391 | return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
392 | }
393 |
394 | $ret = '';
395 |
396 | if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) {
397 | $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
398 | }
399 |
400 | if (isset($this->_[HDOM_INFO_INNER])) {
401 | // todo:
should either never have HDOM_INFO_INNER or always
402 | if ($this->tag !== 'br') {
403 | $ret .= $this->_[HDOM_INFO_INNER];
404 | }
405 | } elseif ($this->nodes) {
406 | foreach ($this->nodes as $n) {
407 | $ret .= $this->convert_text($n->outertext());
408 | }
409 | }
410 |
411 | if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) {
412 | $ret .= '' . $this->tag . '>';
413 | }
414 |
415 | return $ret;
416 | }
417 |
418 | function text()
419 | {
420 | if (isset($this->_[HDOM_INFO_INNER])) {
421 | return $this->_[HDOM_INFO_INNER];
422 | }
423 |
424 | switch ($this->nodetype) {
425 | case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
426 | case HDOM_TYPE_COMMENT: return '';
427 | case HDOM_TYPE_UNKNOWN: return '';
428 | }
429 |
430 | if (strcasecmp($this->tag, 'script') === 0) { return ''; }
431 | if (strcasecmp($this->tag, 'style') === 0) { return ''; }
432 |
433 | $ret = '';
434 |
435 | // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed
436 | // for some span tags, and some p tags) $this->nodes is set to NULL.
437 | // NOTE: This indicates that there is a problem where it's set to NULL
438 | // without a clear happening.
439 | // WHY is this happening?
440 | if (!is_null($this->nodes)) {
441 | foreach ($this->nodes as $n) {
442 | // Start paragraph after a blank line
443 | if ($n->tag === 'p') {
444 | $ret = trim($ret) . "\n\n";
445 | }
446 |
447 | $ret .= $this->convert_text($n->text());
448 |
449 | // If this node is a span... add a space at the end of it so
450 | // multiple spans don't run into each other. This is plaintext
451 | // after all.
452 | if ($n->tag === 'span') {
453 | $ret .= $this->dom->default_span_text;
454 | }
455 | }
456 | }
457 | return $ret;
458 | }
459 |
460 | function xmltext()
461 | {
462 | $ret = $this->innertext();
463 | $ret = str_ireplace('', '', $ret);
465 | return $ret;
466 | }
467 |
468 | function makeup()
469 | {
470 | // text, comment, unknown
471 | if (isset($this->_[HDOM_INFO_TEXT])) {
472 | return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
473 | }
474 |
475 | $ret = '<' . $this->tag;
476 | $i = -1;
477 |
478 | foreach ($this->attr as $key => $val) {
479 | ++$i;
480 |
481 | // skip removed attribute
482 | if ($val === null || $val === false) { continue; }
483 |
484 | $ret .= $this->_[HDOM_INFO_SPACE][$i][0];
485 |
486 | //no value attr: nowrap, checked selected...
487 | if ($val === true) {
488 | $ret .= $key;
489 | } else {
490 | switch ($this->_[HDOM_INFO_QUOTE][$i])
491 | {
492 | case HDOM_QUOTE_DOUBLE: $quote = '"'; break;
493 | case HDOM_QUOTE_SINGLE: $quote = '\''; break;
494 | default: $quote = '';
495 | }
496 |
497 | $ret .= $key
498 | . $this->_[HDOM_INFO_SPACE][$i][1]
499 | . '='
500 | . $this->_[HDOM_INFO_SPACE][$i][2]
501 | . $quote
502 | . $val
503 | . $quote;
504 | }
505 | }
506 |
507 | $ret = $this->dom->restore_noise($ret);
508 | return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
509 | }
510 |
511 | function find($selector, $idx = null, $lowercase = false)
512 | {
513 | $selectors = $this->parse_selector($selector);
514 | if (($count = count($selectors)) === 0) { return array(); }
515 | $found_keys = array();
516 |
517 | // find each selector
518 | for ($c = 0; $c < $count; ++$c) {
519 | // The change on the below line was documented on the sourceforge
520 | // code tracker id 2788009
521 | // used to be: if (($levle=count($selectors[0]))===0) return array();
522 | if (($levle = count($selectors[$c])) === 0) { return array(); }
523 | if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); }
524 |
525 | $head = array($this->_[HDOM_INFO_BEGIN] => 1);
526 | $cmd = ' '; // Combinator
527 |
528 | // handle descendant selectors, no recursive!
529 | for ($l = 0; $l < $levle; ++$l) {
530 | $ret = array();
531 |
532 | foreach ($head as $k => $v) {
533 | $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
534 | //PaperG - Pass this optional parameter on to the seek function.
535 | $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
536 | }
537 |
538 | $head = $ret;
539 | $cmd = $selectors[$c][$l][4]; // Next Combinator
540 | }
541 |
542 | foreach ($head as $k => $v) {
543 | if (!isset($found_keys[$k])) {
544 | $found_keys[$k] = 1;
545 | }
546 | }
547 | }
548 |
549 | // sort keys
550 | ksort($found_keys);
551 |
552 | $found = array();
553 | foreach ($found_keys as $k => $v) {
554 | $found[] = $this->dom->nodes[$k];
555 | }
556 |
557 | // return nth-element or array
558 | if (is_null($idx)) { return $found; }
559 | elseif ($idx < 0) { $idx = count($found) + $idx; }
560 | return (isset($found[$idx])) ? $found[$idx] : null;
561 | }
562 |
563 | protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
564 | {
565 | global $debug_object;
566 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
567 |
568 | list($tag, $id, $class, $attributes, $cmb) = $selector;
569 | $nodes = array();
570 |
571 | if ($parent_cmd === ' ') { // Descendant Combinator
572 | // Find parent closing tag if the current element doesn't have a closing
573 | // tag (i.e. void element)
574 | $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
575 | if ($end == 0) {
576 | $parent = $this->parent;
577 | while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
578 | $end -= 1;
579 | $parent = $parent->parent;
580 | }
581 | $end += $parent->_[HDOM_INFO_END];
582 | }
583 |
584 | // Get list of target nodes
585 | $nodes_start = $this->_[HDOM_INFO_BEGIN] + 1;
586 | $nodes_count = $end - $nodes_start;
587 | $nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true);
588 | } elseif ($parent_cmd === '>') { // Child Combinator
589 | $nodes = $this->children;
590 | } elseif ($parent_cmd === '+'
591 | && $this->parent
592 | && in_array($this, $this->parent->children)) { // Next-Sibling Combinator
593 | $index = array_search($this, $this->parent->children, true) + 1;
594 | if ($index < count($this->parent->children))
595 | $nodes[] = $this->parent->children[$index];
596 | } elseif ($parent_cmd === '~'
597 | && $this->parent
598 | && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
599 | $index = array_search($this, $this->parent->children, true);
600 | $nodes = array_slice($this->parent->children, $index);
601 | }
602 |
603 | // Go throgh each element starting at this element until the end tag
604 | // Note: If this element is a void tag, any previous void element is
605 | // skipped.
606 | foreach($nodes as $node) {
607 | $pass = true;
608 |
609 | // Skip root nodes
610 | if(!$node->parent) {
611 | $pass = false;
612 | }
613 |
614 | // Handle 'text' selector
615 | if($pass && $tag === 'text' && $node->tag === 'text') {
616 | $ret[array_search($node, $this->dom->nodes, true)] = 1;
617 | unset($node);
618 | continue;
619 | }
620 |
621 | // Skip if node isn't a child node (i.e. text nodes)
622 | if($pass && !in_array($node, $node->parent->children, true)) {
623 | $pass = false;
624 | }
625 |
626 | // Skip if tag doesn't match
627 | if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
628 | $pass = false;
629 | }
630 |
631 | // Skip if ID doesn't exist
632 | if ($pass && $id !== '' && !isset($node->attr['id'])) {
633 | $pass = false;
634 | }
635 |
636 | // Check if ID matches
637 | if ($pass && $id !== '' && isset($node->attr['id'])) {
638 | // Note: Only consider the first ID (as browsers do)
639 | $node_id = explode(' ', trim($node->attr['id']))[0];
640 |
641 | if($id !== $node_id) { $pass = false; }
642 | }
643 |
644 | // Check if all class(es) exist
645 | if ($pass && $class !== '' && is_array($class) && !empty($class)) {
646 | if (isset($node->attr['class'])) {
647 | $node_classes = explode(' ', $node->attr['class']);
648 |
649 | if ($lowercase) {
650 | $node_classes = array_map('strtolower', $node_classes);
651 | }
652 |
653 | foreach($class as $c) {
654 | if(!in_array($c, $node_classes)) {
655 | $pass = false;
656 | break;
657 | }
658 | }
659 | } else {
660 | $pass = false;
661 | }
662 | }
663 |
664 | // Check attributes
665 | if ($pass
666 | && $attributes !== ''
667 | && is_array($attributes)
668 | && !empty($attributes)) {
669 | foreach($attributes as $a) {
670 | list (
671 | $att_name,
672 | $att_expr,
673 | $att_val,
674 | $att_inv,
675 | $att_case_sensitivity
676 | ) = $a;
677 |
678 | // Handle indexing attributes (i.e. "[2]")
679 | /**
680 | * Note: This is not supported by the CSS Standard but adds
681 | * the ability to select items compatible to XPath (i.e.
682 | * the 3rd element within it's parent).
683 | *
684 | * Note: This doesn't conflict with the CSS Standard which
685 | * doesn't work on numeric attributes anyway.
686 | */
687 | if (is_numeric($att_name)
688 | && $att_expr === ''
689 | && $att_val === '') {
690 | $count = 0;
691 |
692 | // Find index of current element in parent
693 | foreach ($node->parent->children as $c) {
694 | if ($c->tag === $node->tag) ++$count;
695 | if ($c === $node) break;
696 | }
697 |
698 | // If this is the correct node, continue with next
699 | // attribute
700 | if ($count === (int)$att_name) continue;
701 | }
702 |
703 | // Check attribute availability
704 | if ($att_inv) { // Attribute should NOT be set
705 | if (isset($node->attr[$att_name])) {
706 | $pass = false;
707 | break;
708 | }
709 | } else { // Attribute should be set
710 | // todo: "plaintext" is not a valid CSS selector!
711 | if ($att_name !== 'plaintext'
712 | && !isset($node->attr[$att_name])) {
713 | $pass = false;
714 | break;
715 | }
716 | }
717 |
718 | // Continue with next attribute if expression isn't defined
719 | if ($att_expr === '') continue;
720 |
721 | // If they have told us that this is a "plaintext"
722 | // search then we want the plaintext of the node - right?
723 | // todo "plaintext" is not a valid CSS selector!
724 | if ($att_name === 'plaintext') {
725 | $nodeKeyValue = $node->text();
726 | } else {
727 | $nodeKeyValue = $node->attr[$att_name];
728 | }
729 |
730 | if (is_object($debug_object)) {
731 | $debug_object->debug_log(2,
732 | 'testing node: '
733 | . $node->tag
734 | . ' for attribute: '
735 | . $att_name
736 | . $att_expr
737 | . $att_val
738 | . ' where nodes value is: '
739 | . $nodeKeyValue
740 | );
741 | }
742 |
743 | // If lowercase is set, do a case insensitive test of
744 | // the value of the selector.
745 | if ($lowercase) {
746 | $check = $this->match(
747 | $att_expr,
748 | strtolower($att_val),
749 | strtolower($nodeKeyValue),
750 | $att_case_sensitivity
751 | );
752 | } else {
753 | $check = $this->match(
754 | $att_expr,
755 | $att_val,
756 | $nodeKeyValue,
757 | $att_case_sensitivity
758 | );
759 | }
760 |
761 | if (is_object($debug_object)) {
762 | $debug_object->debug_log(2,
763 | 'after match: '
764 | . ($check ? 'true' : 'false')
765 | );
766 | }
767 |
768 | if (!$check) {
769 | $pass = false;
770 | break;
771 | }
772 | }
773 | }
774 |
775 | // Found a match. Add to list and clear node
776 | if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1;
777 | unset($node);
778 | }
779 | // It's passed by reference so this is actually what this function returns.
780 | if (is_object($debug_object)) {
781 | $debug_object->debug_log(1, 'EXIT - ret: ', $ret);
782 | }
783 | }
784 |
785 | protected function match($exp, $pattern, $value, $case_sensitivity)
786 | {
787 | global $debug_object;
788 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
789 |
790 | if ($case_sensitivity === 'i') {
791 | $pattern = strtolower($pattern);
792 | $value = strtolower($value);
793 | }
794 |
795 | switch ($exp) {
796 | case '=':
797 | return ($value === $pattern);
798 | case '!=':
799 | return ($value !== $pattern);
800 | case '^=':
801 | return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
802 | case '$=':
803 | return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
804 | case '*=':
805 | return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
806 | case '|=':
807 | /**
808 | * [att|=val]
809 | *
810 | * Represents an element with the att attribute, its value
811 | * either being exactly "val" or beginning with "val"
812 | * immediately followed by "-" (U+002D).
813 | */
814 | return strpos($value, $pattern) === 0;
815 | case '~=':
816 | /**
817 | * [att~=val]
818 | *
819 | * Represents an element with the att attribute whose value is a
820 | * whitespace-separated list of words, one of which is exactly
821 | * "val". If "val" contains whitespace, it will never represent
822 | * anything (since the words are separated by spaces). Also if
823 | * "val" is the empty string, it will never represent anything.
824 | */
825 | return in_array($pattern, explode(' ', trim($value)), true);
826 | }
827 | return false;
828 | }
829 |
830 | protected function parse_selector($selector_string)
831 | {
832 | global $debug_object;
833 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
834 |
835 | /**
836 | * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
837 | *
838 | * Paperg: Add the colon to the attribute, so that it properly finds
839 | * like google does.
840 | *
841 | * Note: if you try to look at this attribute, you MUST use getAttribute
842 | * since $dom->x:y will fail the php syntax check.
843 | *
844 | * Notice the \[ starting the attribute? and the @? following? This
845 | * implies that an attribute can begin with an @ sign that is not
846 | * captured. This implies that an html attribute specifier may start
847 | * with an @ sign that is NOT captured by the expression. Farther study
848 | * is required to determine of this should be documented or removed.
849 | *
850 | * Matches selectors in this order:
851 | *
852 | * [0] - full match
853 | *
854 | * [1] - tag name
855 | * ([\w:\*-]*)
856 | * Matches the tag name consisting of zero or more words, colons,
857 | * asterisks and hyphens.
858 | *
859 | * [2] - id name
860 | * (?:\#([\w-]+))
861 | * Optionally matches a id name, consisting of an "#" followed by
862 | * the id name (one or more words and hyphens).
863 | *
864 | * [3] - class names (including dots)
865 | * (?:\.([\w\.-]+))?
866 | * Optionally matches a list of classs, consisting of an "."
867 | * followed by the class name (one or more words and hyphens)
868 | * where multiple classes can be chained (i.e. ".foo.bar.baz")
869 | *
870 | * [4] - attributes
871 | * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
872 | * Optionally matches the attributes list
873 | *
874 | * [5] - separator
875 | * ([\/, >+~]+)
876 | * Matches the selector list separator
877 | */
878 | // phpcs:ignore Generic.Files.LineLength
879 | $pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is";
880 |
881 | preg_match_all(
882 | $pattern,
883 | trim($selector_string) . ' ', // Add final ' ' as pseudo separator
884 | $matches,
885 | PREG_SET_ORDER
886 | );
887 |
888 | if (is_object($debug_object)) {
889 | $debug_object->debug_log(2, 'Matches Array: ', $matches);
890 | }
891 |
892 | $selectors = array();
893 | $result = array();
894 |
895 | foreach ($matches as $m) {
896 | $m[0] = trim($m[0]);
897 |
898 | // Skip NoOps
899 | if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
900 |
901 | // Convert to lowercase
902 | if ($this->dom->lowercase) {
903 | $m[1] = strtolower($m[1]);
904 | }
905 |
906 | // Extract classes
907 | if ($m[3] !== '') { $m[3] = explode('.', $m[3]); }
908 |
909 | /* Extract attributes (pattern based on the pattern above!)
910 |
911 | * [0] - full match
912 | * [1] - attribute name
913 | * [2] - attribute expression
914 | * [3] - attribute value
915 | * [4] - case sensitivity
916 | *
917 | * Note: Attributes can be negated with a "!" prefix to their name
918 | */
919 | if($m[4] !== '') {
920 | preg_match_all(
921 | "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
922 | trim($m[4]),
923 | $attributes,
924 | PREG_SET_ORDER
925 | );
926 |
927 | // Replace element by array
928 | $m[4] = array();
929 |
930 | foreach($attributes as $att) {
931 | // Skip empty matches
932 | if(trim($att[0]) === '') { continue; }
933 |
934 | $inverted = (isset($att[1][0]) && $att[1][0] === '!');
935 | $m[4][] = array(
936 | $inverted ? substr($att[1], 1) : $att[1], // Name
937 | (isset($att[2])) ? $att[2] : '', // Expression
938 | (isset($att[3])) ? $att[3] : '', // Value
939 | $inverted, // Inverted Flag
940 | (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
941 | );
942 | }
943 | }
944 |
945 | // Sanitize Separator
946 | if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator
947 | $m[5] = ' ';
948 | } else { // Other Separator
949 | $m[5] = trim($m[5]);
950 | }
951 |
952 | // Clear Separator if it's a Selector List
953 | if ($is_list = ($m[5] === ',')) { $m[5] = ''; }
954 |
955 | // Remove full match before adding to results
956 | array_shift($m);
957 | $result[] = $m;
958 |
959 | if ($is_list) { // Selector List
960 | $selectors[] = $result;
961 | $result = array();
962 | }
963 | }
964 |
965 | if (count($result) > 0) { $selectors[] = $result; }
966 | return $selectors;
967 | }
968 |
969 | function __get($name)
970 | {
971 | if (isset($this->attr[$name])) {
972 | return $this->convert_text($this->attr[$name]);
973 | }
974 | switch ($name) {
975 | case 'outertext': return $this->outertext();
976 | case 'innertext': return $this->innertext();
977 | case 'plaintext': return $this->text();
978 | case 'xmltext': return $this->xmltext();
979 | default: return array_key_exists($name, $this->attr);
980 | }
981 | }
982 |
983 | function __set($name, $value)
984 | {
985 | global $debug_object;
986 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
987 |
988 | switch ($name) {
989 | case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
990 | case 'innertext':
991 | if (isset($this->_[HDOM_INFO_TEXT])) {
992 | return $this->_[HDOM_INFO_TEXT] = $value;
993 | }
994 | return $this->_[HDOM_INFO_INNER] = $value;
995 | }
996 |
997 | if (!isset($this->attr[$name])) {
998 | $this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
999 | $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
1000 | }
1001 |
1002 | $this->attr[$name] = $value;
1003 | }
1004 |
1005 | function __isset($name)
1006 | {
1007 | switch ($name) {
1008 | case 'outertext': return true;
1009 | case 'innertext': return true;
1010 | case 'plaintext': return true;
1011 | }
1012 | //no value attr: nowrap, checked selected...
1013 | return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
1014 | }
1015 |
1016 | function __unset($name)
1017 | {
1018 | if (isset($this->attr[$name])) { unset($this->attr[$name]); }
1019 | }
1020 |
1021 | function convert_text($text)
1022 | {
1023 | global $debug_object;
1024 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1025 |
1026 | $converted_text = $text;
1027 |
1028 | $sourceCharset = '';
1029 | $targetCharset = '';
1030 |
1031 | if ($this->dom) {
1032 | $sourceCharset = strtoupper($this->dom->_charset);
1033 | $targetCharset = strtoupper($this->dom->_target_charset);
1034 | }
1035 |
1036 | if (is_object($debug_object)) {
1037 | $debug_object->debug_log(3,
1038 | 'source charset: '
1039 | . $sourceCharset
1040 | . ' target charaset: '
1041 | . $targetCharset
1042 | );
1043 | }
1044 |
1045 | if (!empty($sourceCharset)
1046 | && !empty($targetCharset)
1047 | && (strcasecmp($sourceCharset, $targetCharset) != 0)) {
1048 | // Check if the reported encoding could have been incorrect and the text is actually already UTF-8
1049 | if ((strcasecmp($targetCharset, 'UTF-8') == 0)
1050 | && ($this->is_utf8($text))) {
1051 | $converted_text = $text;
1052 | } else {
1053 | $converted_text = iconv($sourceCharset, $targetCharset, $text);
1054 | }
1055 | }
1056 |
1057 | // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1058 | if ($targetCharset === 'UTF-8') {
1059 | if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1060 | $converted_text = substr($converted_text, 3);
1061 | }
1062 |
1063 | if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1064 | $converted_text = substr($converted_text, 0, -3);
1065 | }
1066 | }
1067 |
1068 | return $converted_text;
1069 | }
1070 |
1071 | static function is_utf8($str)
1072 | {
1073 | $c = 0; $b = 0;
1074 | $bits = 0;
1075 | $len = strlen($str);
1076 | for($i = 0; $i < $len; $i++) {
1077 | $c = ord($str[$i]);
1078 | if($c > 128) {
1079 | if(($c >= 254)) { return false; }
1080 | elseif($c >= 252) { $bits = 6; }
1081 | elseif($c >= 248) { $bits = 5; }
1082 | elseif($c >= 240) { $bits = 4; }
1083 | elseif($c >= 224) { $bits = 3; }
1084 | elseif($c >= 192) { $bits = 2; }
1085 | else { return false; }
1086 | if(($i + $bits) > $len) { return false; }
1087 | while($bits > 1) {
1088 | $i++;
1089 | $b = ord($str[$i]);
1090 | if($b < 128 || $b > 191) { return false; }
1091 | $bits--;
1092 | }
1093 | }
1094 | }
1095 | return true;
1096 | }
1097 |
1098 | function get_display_size()
1099 | {
1100 | global $debug_object;
1101 |
1102 | $width = -1;
1103 | $height = -1;
1104 |
1105 | if ($this->tag !== 'img') {
1106 | return false;
1107 | }
1108 |
1109 | // See if there is aheight or width attribute in the tag itself.
1110 | if (isset($this->attr['width'])) {
1111 | $width = $this->attr['width'];
1112 | }
1113 |
1114 | if (isset($this->attr['height'])) {
1115 | $height = $this->attr['height'];
1116 | }
1117 |
1118 | // Now look for an inline style.
1119 | if (isset($this->attr['style'])) {
1120 | // Thanks to user gnarf from stackoverflow for this regular expression.
1121 | $attributes = array();
1122 |
1123 | preg_match_all(
1124 | '/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1125 | $this->attr['style'],
1126 | $matches,
1127 | PREG_SET_ORDER
1128 | );
1129 |
1130 | foreach ($matches as $match) {
1131 | $attributes[$match[1]] = $match[2];
1132 | }
1133 |
1134 | // If there is a width in the style attributes:
1135 | if (isset($attributes['width']) && $width == -1) {
1136 | // check that the last two characters are px (pixels)
1137 | if (strtolower(substr($attributes['width'], -2)) === 'px') {
1138 | $proposed_width = substr($attributes['width'], 0, -2);
1139 | // Now make sure that it's an integer and not something stupid.
1140 | if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1141 | $width = $proposed_width;
1142 | }
1143 | }
1144 | }
1145 |
1146 | // If there is a width in the style attributes:
1147 | if (isset($attributes['height']) && $height == -1) {
1148 | // check that the last two characters are px (pixels)
1149 | if (strtolower(substr($attributes['height'], -2)) == 'px') {
1150 | $proposed_height = substr($attributes['height'], 0, -2);
1151 | // Now make sure that it's an integer and not something stupid.
1152 | if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1153 | $height = $proposed_height;
1154 | }
1155 | }
1156 | }
1157 |
1158 | }
1159 |
1160 | // Future enhancement:
1161 | // Look in the tag to see if there is a class or id specified that has
1162 | // a height or width attribute to it.
1163 |
1164 | // Far future enhancement
1165 | // Look at all the parent tags of this image to see if they specify a
1166 | // class or id that has an img selector that specifies a height or width
1167 | // Note that in this case, the class or id will have the img subselector
1168 | // for it to apply to the image.
1169 |
1170 | // ridiculously far future development
1171 | // If the class or id is specified in a SEPARATE css file thats not on
1172 | // the page, go get it and do what we were just doing for the ones on
1173 | // the page.
1174 |
1175 | $result = array(
1176 | 'height' => $height,
1177 | 'width' => $width
1178 | );
1179 |
1180 | return $result;
1181 | }
1182 |
1183 | function save($filepath = '')
1184 | {
1185 | $ret = $this->outertext();
1186 |
1187 | if ($filepath !== '') {
1188 | file_put_contents($filepath, $ret, LOCK_EX);
1189 | }
1190 |
1191 | return $ret;
1192 | }
1193 |
1194 | function addClass($class)
1195 | {
1196 | if (is_string($class)) {
1197 | $class = explode(' ', $class);
1198 | }
1199 |
1200 | if (is_array($class)) {
1201 | foreach($class as $c) {
1202 | if (isset($this->class)) {
1203 | if ($this->hasClass($c)) {
1204 | continue;
1205 | } else {
1206 | $this->class .= ' ' . $c;
1207 | }
1208 | } else {
1209 | $this->class = $c;
1210 | }
1211 | }
1212 | } else {
1213 | if (is_object($debug_object)) {
1214 | $debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1215 | }
1216 | }
1217 | }
1218 |
1219 | function hasClass($class)
1220 | {
1221 | if (is_string($class)) {
1222 | if (isset($this->class)) {
1223 | return in_array($class, explode(' ', $this->class), true);
1224 | }
1225 | } else {
1226 | if (is_object($debug_object)) {
1227 | $debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1228 | }
1229 | }
1230 |
1231 | return false;
1232 | }
1233 |
1234 | function removeClass($class = null)
1235 | {
1236 | if (!isset($this->class)) {
1237 | return;
1238 | }
1239 |
1240 | if (is_null($class)) {
1241 | $this->removeAttribute('class');
1242 | return;
1243 | }
1244 |
1245 | if (is_string($class)) {
1246 | $class = explode(' ', $class);
1247 | }
1248 |
1249 | if (is_array($class)) {
1250 | $class = array_diff(explode(' ', $this->class), $class);
1251 | if (empty($class)) {
1252 | $this->removeAttribute('class');
1253 | } else {
1254 | $this->class = implode(' ', $class);
1255 | }
1256 | }
1257 | }
1258 |
1259 | function getAllAttributes()
1260 | {
1261 | return $this->attr;
1262 | }
1263 |
1264 | function getAttribute($name)
1265 | {
1266 | return $this->__get($name);
1267 | }
1268 |
1269 | function setAttribute($name, $value)
1270 | {
1271 | $this->__set($name, $value);
1272 | }
1273 |
1274 | function hasAttribute($name)
1275 | {
1276 | return $this->__isset($name);
1277 | }
1278 |
1279 | function removeAttribute($name)
1280 | {
1281 | $this->__set($name, null);
1282 | }
1283 |
1284 | function remove()
1285 | {
1286 | if ($this->parent) {
1287 | $this->parent->removeChild($this);
1288 | }
1289 | }
1290 |
1291 | function removeChild($node)
1292 | {
1293 | $nidx = array_search($node, $this->nodes, true);
1294 | $cidx = array_search($node, $this->children, true);
1295 | $didx = array_search($node, $this->dom->nodes, true);
1296 |
1297 | if ($nidx !== false && $cidx !== false && $didx !== false) {
1298 |
1299 | foreach($node->children as $child) {
1300 | $node->removeChild($child);
1301 | }
1302 |
1303 | foreach($node->nodes as $entity) {
1304 | $enidx = array_search($entity, $node->nodes, true);
1305 | $edidx = array_search($entity, $node->dom->nodes, true);
1306 |
1307 | if ($enidx !== false && $edidx !== false) {
1308 | unset($node->nodes[$enidx]);
1309 | unset($node->dom->nodes[$edidx]);
1310 | }
1311 | }
1312 |
1313 | unset($this->nodes[$nidx]);
1314 | unset($this->children[$cidx]);
1315 | unset($this->dom->nodes[$didx]);
1316 |
1317 | $node->clear();
1318 |
1319 | }
1320 | }
1321 |
1322 | function getElementById($id)
1323 | {
1324 | return $this->find("#$id", 0);
1325 | }
1326 |
1327 | function getElementsById($id, $idx = null)
1328 | {
1329 | return $this->find("#$id", $idx);
1330 | }
1331 |
1332 | function getElementByTagName($name)
1333 | {
1334 | return $this->find($name, 0);
1335 | }
1336 |
1337 | function getElementsByTagName($name, $idx = null)
1338 | {
1339 | return $this->find($name, $idx);
1340 | }
1341 |
1342 | function parentNode()
1343 | {
1344 | return $this->parent();
1345 | }
1346 |
1347 | function childNodes($idx = -1)
1348 | {
1349 | return $this->children($idx);
1350 | }
1351 |
1352 | function firstChild()
1353 | {
1354 | return $this->first_child();
1355 | }
1356 |
1357 | function lastChild()
1358 | {
1359 | return $this->last_child();
1360 | }
1361 |
1362 | function nextSibling()
1363 | {
1364 | return $this->next_sibling();
1365 | }
1366 |
1367 | function previousSibling()
1368 | {
1369 | return $this->prev_sibling();
1370 | }
1371 |
1372 | function hasChildNodes()
1373 | {
1374 | return $this->has_child();
1375 | }
1376 |
1377 | function nodeName()
1378 | {
1379 | return $this->tag;
1380 | }
1381 |
1382 | function appendChild($node)
1383 | {
1384 | $node->parent($this);
1385 | return $node;
1386 | }
1387 |
1388 | }
1389 |
1390 | class simple_html_dom
1391 | {
1392 | public $root = null;
1393 | public $nodes = array();
1394 | public $callback = null;
1395 | public $lowercase = false;
1396 | public $original_size;
1397 | public $size;
1398 |
1399 | protected $pos;
1400 | protected $doc;
1401 | protected $char;
1402 |
1403 | protected $cursor;
1404 | protected $parent;
1405 | protected $noise = array();
1406 | protected $token_blank = " \t\r\n";
1407 | protected $token_equal = ' =/>';
1408 | protected $token_slash = " />\r\n\t";
1409 | protected $token_attr = ' >';
1410 |
1411 | public $_charset = '';
1412 | public $_target_charset = '';
1413 |
1414 | protected $default_br_text = '';
1415 |
1416 | public $default_span_text = '';
1417 |
1418 | protected $self_closing_tags = array(
1419 | 'area' => 1,
1420 | 'base' => 1,
1421 | 'br' => 1,
1422 | 'col' => 1,
1423 | 'embed' => 1,
1424 | 'hr' => 1,
1425 | 'img' => 1,
1426 | 'input' => 1,
1427 | 'link' => 1,
1428 | 'meta' => 1,
1429 | 'param' => 1,
1430 | 'source' => 1,
1431 | 'track' => 1,
1432 | 'wbr' => 1
1433 | );
1434 | protected $block_tags = array(
1435 | 'body' => 1,
1436 | 'div' => 1,
1437 | 'form' => 1,
1438 | 'root' => 1,
1439 | 'span' => 1,
1440 | 'table' => 1
1441 | );
1442 | protected $optional_closing_tags = array(
1443 | // Not optional, see
1444 | // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
1445 | 'b' => array('b' => 1),
1446 | 'dd' => array('dd' => 1, 'dt' => 1),
1447 | // Not optional, see
1448 | // https://www.w3.org/TR/html/grouping-content.html#the-dl-element
1449 | 'dl' => array('dd' => 1, 'dt' => 1),
1450 | 'dt' => array('dd' => 1, 'dt' => 1),
1451 | 'li' => array('li' => 1),
1452 | 'optgroup' => array('optgroup' => 1, 'option' => 1),
1453 | 'option' => array('optgroup' => 1, 'option' => 1),
1454 | 'p' => array('p' => 1),
1455 | 'rp' => array('rp' => 1, 'rt' => 1),
1456 | 'rt' => array('rp' => 1, 'rt' => 1),
1457 | 'td' => array('td' => 1, 'th' => 1),
1458 | 'th' => array('td' => 1, 'th' => 1),
1459 | 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
1460 | );
1461 |
1462 | function __construct(
1463 | $str = null,
1464 | $lowercase = true,
1465 | $forceTagsClosed = true,
1466 | $target_charset = DEFAULT_TARGET_CHARSET,
1467 | $stripRN = true,
1468 | $defaultBRText = DEFAULT_BR_TEXT,
1469 | $defaultSpanText = DEFAULT_SPAN_TEXT,
1470 | $options = 0)
1471 | {
1472 | if ($str) {
1473 | if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
1474 | $this->load_file($str);
1475 | } else {
1476 | $this->load(
1477 | $str,
1478 | $lowercase,
1479 | $stripRN,
1480 | $defaultBRText,
1481 | $defaultSpanText,
1482 | $options
1483 | );
1484 | }
1485 | }
1486 | // Forcing tags to be closed implies that we don't trust the html, but
1487 | // it can lead to parsing errors if we SHOULD trust the html.
1488 | if (!$forceTagsClosed) {
1489 | $this->optional_closing_array = array();
1490 | }
1491 |
1492 | $this->_target_charset = $target_charset;
1493 | }
1494 |
1495 | function __destruct()
1496 | {
1497 | $this->clear();
1498 | }
1499 |
1500 | function load(
1501 | $str,
1502 | $lowercase = true,
1503 | $stripRN = true,
1504 | $defaultBRText = DEFAULT_BR_TEXT,
1505 | $defaultSpanText = DEFAULT_SPAN_TEXT,
1506 | $options = 0)
1507 | {
1508 | global $debug_object;
1509 |
1510 | // prepare
1511 | $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
1512 |
1513 | // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1514 | // Script tags removal now preceeds style tag removal.
1515 | // strip out