├── application ├── config │ └── hooks.php ├── libraries │ └── CI_Minifier.php └── third_party │ ├── JSPacker.php │ └── Simple_html_dom.php ├── README.md └── LICENSE /application/config/hooks.php: -------------------------------------------------------------------------------- 1 | '', 17 | 'function' => 'CI_Minifier_Hook_Loader', 18 | 'filename' => '', 19 | 'filepath' => '' 20 | ); 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CodeIgniter Minifier - A HTML / CSS / Javascript Minification Library 2 | 3 | ![Screenshot](http://i.imgur.com/L5Cps84.png) 4 | 5 | Compress and minify output for your CodeIgniter framework websites. This library supports CodeIgniter 3 only, it is able to not just minify HTML, but also CSS and Javascript. 6 | 7 | It also works with `$this->output->cache($n);` to save minified content into the cache files. 8 | 9 | ------------------------------------ 10 | ### Change Logs 11 | * ver 1.0 - first release 12 | * ver 1.1 - Add javascript obfuscator (Dean Edwards' version) 13 | * ver 1.2 - Add PHP Simple Dom parser to parse "script" and "style" tags. It is an alternative if the default parser (DOMDocument) causes your Javasctipt to not work. 14 | 15 | -------------------------------------- 16 | 17 | ### Step 1: Load CI_Minifier library 18 | 19 | Copy CI_Minifier.php to libraries folder, and then load CI_Minifier library by one of the following ways. 20 | 21 | 22 | (1) Load CI_Minifier library in Controller. 23 | ```php 24 | $this->load->library('CI_Minifier'); 25 | ``` 26 | (2) Load CI_Minifier library in config/autoload.php (recommeded) 27 | ```php 28 | $autoload['libraries'] = array('CI_Minifier'); 29 | ``` 30 | 31 | ### Step 2: Enable Hooks in config/config.php 32 | ```php 33 | $config['enable_hooks'] = TRUE; 34 | ``` 35 | ### Step 3: Define a 'display_override' to config/hooks.php 36 | ```php 37 | $hook['display_override'][] = array( 38 | 'class' => '', 39 | 'function' => 'CI_Minifier_Hook_Loader', 40 | 'filename' => '', 41 | 'filepath' => '' 42 | ); 43 | ``` 44 | Keep "class", "filename" and "filepath" fields blank. 45 | 46 | ---------------------------------------- 47 | ## Options 48 | 49 | CodeIgniter Minifier has the following options, you can set the "option number" or "option string" to init() to minify HTML, CSS, Javascript as your choice. 50 | 51 | ------------------------ 52 | 53 | For setting option string, you can put them together and separated by commas, for example: 'html,js' 54 | 55 | | option number | option string | HTML | CSS | Javascript | 56 | | ------------- | ------------- | ------------- | ------------- | ------------- | 57 | | 0 | off | x | x | x | 58 | | 1 | html,css,js | o | o | o | 59 | | 2 | html,css | o | o | x | 60 | | 3 | html,js | o | x | o | 61 | | 4 | html | o | x | x | 62 | | 5 | css,js | x | o | o | 63 | | 6 | css | x | o | x | 64 | | 7 | js | x | x | o | 65 | 66 | Notice that setting option to '1' or 'html,css,js' is totally unnecessary, because it is default. 67 | 68 | Here is the examples: 69 | ```php 70 | // Keep original output, nothing changes. You can use it on some pages you won't minify. 71 | $this->ci_minifier->init(0); 72 | // same as 73 | $this->ci_minifier->init('off'); 74 | ``` 75 | ```php 76 | // Minify html only 77 | $this->ci_minifier->init(4); 78 | // same as 79 | $this->ci_minifier->init('html'); 80 | ``` 81 | ```php 82 | // Minify html and css, except jaascript 83 | $this->ci_minifier->init(2); 84 | // same as 85 | $this->ci_minifier->init('html,css'); 86 | ``` 87 | #### Enable Javascript obfuscator 88 | 89 | ![Screenshot](http://i.imgur.com/PRGEKHj.png) 90 | 91 | enable_obfuscator($level = 2) 92 | ```php 93 | /** 94 | * @param int $level - default: 2 95 | * @return bool 96 | */ 97 | $this->ci_minifier->enable_obfuscator(); 98 | ``` 99 | | option level | obfuscation type | 100 | | ------------- | ------------- | 101 | | 0 | None | 102 | | 1 | Numeric | 103 | | 2 | Normal | 104 | | 3 | High ASCII | 105 | 106 | 107 | Javascript obfuscator is off by default, if you would like to use this feature, copy `JSPacker.php` to `/application/third_party/` folder, then put `$this->ci_minifier->enable_obfuscator();` in Controller. 108 | 109 | -------------------------------------- 110 | 111 | #### Use PHP Simple Dom parser to parse "script" and "style" tags 112 | 113 | 1. put Simple_html_dom.php at /third_party folder 114 | 2. use $this->ci_minifier->set_domparser(2); in Controller. 115 | 116 | option value: 1 (default, PHP bulti-in Dom parser - DOMDocument) 117 | option value: 2 (PHP Simple Dom parser) 118 | 119 | -------------------------------------------------- 120 | ## API 121 | 122 | #### html() 123 | Minify HTML string 124 | ```php 125 | /** 126 | * @param $input 127 | * @return string 128 | */ 129 | $this->ci_minifier->html($input); 130 | ``` 131 | 132 | #### css() 133 | Minify CSS string 134 | ```php 135 | /** 136 | * @param $input 137 | * @return string 138 | */ 139 | $this->ci_minifier->css($input); 140 | ``` 141 | 142 | #### js() 143 | Minify Javascript string 144 | ```php 145 | /** 146 | * @param $input 147 | * @return string 148 | */ 149 | $this->ci_minifier->js($input); 150 | ``` 151 | ***Be careful: This method doesn't support "javascript automatic semicolon insertion", you must add semicolon by yourself, otherwise your javascript code will not work and generate error messages***. 152 | 153 | ### js_packer() 154 | 155 | Minify Javascript string by use JSPacker (Dean Edwards' version) 156 | ```php 157 | /** 158 | * @param $input 159 | * @param $level 160 | * @return string 161 | */ 162 | $this->ci_minifier->js($input, $level = 2); 163 | ``` 164 | ### Success example 165 | 166 | Original code: 167 | ```javascript 168 | 174 | ``` 175 | After minifying 176 | ```javascript 177 | 178 | ``` 179 | ### Failure example 180 | 181 | Original code is working with popular browsers because that browsers support "javascript automatic semicolon insertion". 182 | ```javascript 183 | 189 | ``` 190 | After minifying, this code will generate error because of semicolon issue. 191 | ```javascript 192 | 193 | ``` 194 | ### Ideas 195 | 196 | Minifying all Javascript snippets is good but it breaks Google AdSense's TOS, so how to minify all of them excepts Google AdSense? 197 | ``` 198 | 199 | 203 | 206 | 207 | ``` 208 | CI Minifier will skip script tags contain `data-minify-level="0"`, this option can also control Javascript obfuscator encoding level, the default value is 2, you set the value 1-3 whatever you like. 209 | 210 | --------------------------------------------- 211 | 212 | ## License 213 | 214 | GPL version 3 215 | 216 | -------------------------------------------------------------------------------- /application/libraries/CI_Minifier.php: -------------------------------------------------------------------------------- 1 | ci_minifier->enable_obfuscator() in Controller. 15 | * 1.2 Add PHP Simple Dom parser (If you met problem with DOMDocument, it is a solution) 16 | * Step 1. put Simple_html_dom.php at /third_party folder 17 | * Step 2. use $this->ci_minifier->set_domparser(2); in Controller. 18 | */ 19 | 20 | class CI_Minifier 21 | { 22 | 23 | private static $enable_html = true; 24 | private static $enable_js = true; 25 | private static $enable_css = true; 26 | /** 27 | * @var bool 28 | */ 29 | private static $enable_obfuscator = false; 30 | private static $obfuscator; 31 | 32 | /* 33 | * 1: DOMDocument 34 | * 2: simplehtmldom 35 | */ 36 | 37 | public static $dom_parser = 2; 38 | 39 | /** 40 | * Set a level type to handle the output. 41 | * 42 | * @param number $level 0: original output, nothing changes. 43 | * 1: minify html, css and javascript. 44 | * 2: minify html, css 45 | * 3: minidy html, javascript 46 | * 4: minify html 47 | * 5: minify css and javascript 48 | * 6: minify css 49 | * 7: minify javascript 50 | * 51 | * string $level accept string 'js', 'css, 'html' 52 | * you can put them together and separated by commas, for example: 'html,js' 53 | */ 54 | public function init($level) 55 | { 56 | self::$enable_html = false; 57 | self::$enable_css = false; 58 | self::$enable_js = false; 59 | 60 | if (is_numeric($level)) { 61 | switch ($level) { 62 | case 7: 63 | self::$enable_js = true; 64 | break; 65 | case 6: 66 | self::$enable_css = true; 67 | break; 68 | case 5: 69 | self::$enable_css = true; 70 | self::$enable_js = true; 71 | break; 72 | case 4: 73 | self::$enable_html = true; 74 | break; 75 | case 3: 76 | self::$enable_html = true; 77 | self::$enable_js = true; 78 | break; 79 | case 2: 80 | self::$enable_html = true; 81 | self::$enable_css = true; 82 | break; 83 | case 1: 84 | default: 85 | self::$enable_html = true; 86 | self::$enable_css = true; 87 | self::$enable_js = true; 88 | break; 89 | case 0: 90 | break; 91 | } 92 | } 93 | 94 | if (is_string($level)) { 95 | $level = str_replace(' ', '', $level); 96 | $types = explode(',', $level); 97 | 98 | foreach ($types as $type) { 99 | if ($type == 'html') { 100 | self::$enable_html = true; 101 | } 102 | if ($type == 'css') { 103 | self::$enable_css = true; 104 | } 105 | if ($type == 'js') { 106 | self::$enable_js = true; 107 | } 108 | if ($type == 'off') { 109 | // nothing changes 110 | } 111 | } 112 | } 113 | } 114 | 115 | public function set_domparser($type = 1) 116 | { 117 | self::$dom_parser = $type; 118 | 119 | if ($type == 2) { 120 | if (!class_exists('Simple_html_dom')) { 121 | try { 122 | include APPPATH . 'third_party/Simple_html_dom.php'; 123 | 124 | } catch (Exception $e) { 125 | self::$dom_parser = 1; 126 | return false; 127 | } 128 | } 129 | } 130 | } 131 | 132 | 133 | /** 134 | * @param int $level 135 | * @return bool 136 | */ 137 | public function enable_obfuscator($level = 2) 138 | { 139 | self::$enable_obfuscator = true; 140 | 141 | if (!class_exists('JSPacker')) { 142 | try { 143 | include APPPATH . 'third_party/JSPacker.php'; 144 | 145 | } catch (Exception $e) { 146 | self::$enable_obfuscator = false; 147 | return false; 148 | } 149 | } 150 | 151 | switch ($level) { 152 | case 0: 153 | $packed_level = 'None'; 154 | break; 155 | case 1: 156 | $packed_level = 'Numeric'; 157 | break; 158 | case 2: 159 | default: 160 | $packed_level = 'Normal'; 161 | break; 162 | case 3: 163 | $packed_level = 'High ASCII'; 164 | break; 165 | } 166 | 167 | self::$obfuscator = new JSPacker('', $packed_level, true, false); 168 | } 169 | 170 | 171 | /** 172 | * CI Minifier - Output handler 173 | * 174 | * @return mixed 175 | */ 176 | public static function output() 177 | { 178 | ini_set("pcre.recursion_limit", "16777"); 179 | 180 | $CI =& get_instance(); 181 | 182 | $buffer = $CI->output->get_output(); 183 | $new_buffer = null; 184 | 185 | if (!(!self::$enable_html and !self::$enable_css and !self::$enable_js)) { 186 | if (self::$enable_js or self::$enable_css) { 187 | 188 | if (self::$dom_parser == 1) { 189 | // You're facing "Fatal error: Class 'DOMDocument' not found" error 190 | // you need to install php-xml to support PHP DOM 191 | // For CentOS, run "yum install php-xml" 192 | $dom = new DOMDocument; 193 | 194 | // prevent DOMDocument::loadHTML error 195 | libxml_use_internal_errors(true); 196 | $dom->loadHTML($buffer, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); 197 | } 198 | if (self::$dom_parser == 2) { 199 | $dom = new \SimpleDom\Simple_html_dom(); 200 | $dom->load($buffer, true, false); 201 | } 202 | } 203 | 204 | if (self::$enable_js) { 205 | // Get all script Tags and minify them 206 | if (self::$dom_parser == 1) { 207 | $scripts = $dom->getElementsByTagName('script'); 208 | foreach ($scripts as $script) { 209 | $data_minify_level = $script->getAttribute('data-minify-level'); 210 | if ($data_minify_level != '0') { 211 | if (!empty($script->nodeValue)) { 212 | if (self::$enable_obfuscator) { 213 | $script->nodeValue = self::packerJS($script->nodeValue, $data_minify_level); 214 | } else { 215 | $script->nodeValue = self::minifyJS($script->nodeValue); 216 | } 217 | } 218 | } 219 | } 220 | } 221 | if (self::$dom_parser == 2) { 222 | $scripts = $dom->find('script'); 223 | foreach ($scripts as $script) { 224 | $data_minify_level = $script->{'data-minify-level'}; 225 | if ($data_minify_level !== '0') { 226 | if (!empty($script->innertext)) { 227 | if (self::$enable_obfuscator) { 228 | $script->innertext = self::packerJS($script->innertext, $data_minify_level); 229 | } else { 230 | $script->innertext = self::minifyJS($script->innertext); 231 | } 232 | } 233 | } 234 | } 235 | } 236 | 237 | } 238 | 239 | if (self::$enable_css) { 240 | // Get all style Tags and minify them 241 | if (self::$dom_parser == 1) { 242 | $styles = $dom->getElementsByTagName('style'); 243 | foreach ($styles as $style) { 244 | if (!empty($style->nodeValue)) { 245 | $style->nodeValue = self::minifyCSS($style->nodeValue); 246 | } 247 | } 248 | } 249 | if (self::$dom_parser == 2) { 250 | $styles = $dom->find('style'); 251 | foreach ($styles as $style) { 252 | if (!empty($style->innertext)) { 253 | $style->innertext = self::minifyCSS($style->innertext); 254 | } 255 | } 256 | } 257 | 258 | } 259 | 260 | if (self::$enable_js or self::$enable_css) { 261 | if (self::$dom_parser == 1) { 262 | if (self::$enable_html) { 263 | $new_buffer = self::minifyHTML($dom->saveHTML()); 264 | } else { 265 | $new_buffer = $dom->saveHTML(); 266 | } 267 | libxml_use_internal_errors(false); 268 | unset($dom); 269 | } 270 | if (self::$dom_parser == 2) { 271 | if (self::$enable_html) { 272 | $new_buffer = self::minifyHTML($dom->save()); 273 | } else { 274 | $new_buffer = $dom->save(); 275 | } 276 | } 277 | } else { 278 | if (self::$enable_html) { 279 | $new_buffer = self::minifyHTML($buffer); 280 | } 281 | } 282 | } 283 | 284 | if ($new_buffer === null) { 285 | $new_buffer = $buffer; 286 | } 287 | $CI->output->set_output($new_buffer); 288 | $CI->output->_display(); 289 | } 290 | 291 | 292 | /** 293 | * Minify the HTML text 294 | * 295 | * @param string $input 296 | * @return mixed 297 | * 298 | * @link https://github.com/mecha-cms/mecha-cms/blob/master/engine/kernel/converter.php 299 | * @author Taufik Nurrohman 300 | * @license GPL version 3 License Copyright 301 | * 302 | */ 303 | private static function minifyHTML($input) 304 | { 305 | if (trim($input) === "") { 306 | return $input; 307 | } 308 | 309 | // Remove extra white-space(s) between HTML attribute(s) 310 | $input = preg_replace_callback( 311 | '#<([^\/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(\/?)>#s', 312 | function ($matches) { 313 | return '<' . $matches[1] . preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>'; 314 | }, 315 | str_replace("\r", "", $input) 316 | ); 317 | 318 | // Minify inline CSS declaration(s) 319 | if (strpos($input, ' style=') !== false) { 320 | $input = preg_replace_callback( 321 | '#<([^<]+?)\s+style=([\'"])(.*?)\2(?=[\/\s>])#s', 322 | function ($matches) { 323 | return '<' . $matches[1] . ' style=' . $matches[2] . self::minifyCSS($matches[3]) . $matches[2]; 324 | }, 325 | $input 326 | ); 327 | } 328 | return preg_replace( 329 | array( 330 | // t = text 331 | // o = tag open 332 | // c = tag close 333 | // Keep important white-space(s) after self-closing HTML tag(s) 334 | '#<(img|input)(>| .*?>)#s', 335 | 336 | // Remove a line break and two or more white-space(s) between tag(s) 337 | '#()|(>)(?:\n*|\s{2,})(<)|^\s*|\s*$#s', 338 | '#()|(?)\s+(<\/.*?>)|(<[^\/]*?>)\s+(?!\<)#s', // t+c || o+t 339 | '#()|(<[^\/]*?>)\s+(<[^\/]*?>)|(<\/.*?>)\s+(<\/.*?>)#s', // o+o || c+c 340 | '#()|(<\/.*?>)\s+(\s)(?!\<)|(?)\s+(\s)(<[^\/]*?\/?>)|(<[^\/]*?\/?>)\s+(\s)(?!\<)#s', // c+t || t+o || o+t -- separated by long white-space(s) 341 | '#()|(<[^\/]*?>)\s+(<\/.*?>)#s', // empty tag 342 | '#<(img|input)(>| .*?>)<\/\1\x1A>#s', // reset previous fix 343 | '#( ) (?![<\s])#', // clean up ... 344 | 345 | // Force line-break with ` ` or ` ` 346 | '#&\#(?:10|xa);#', 347 | 348 | // Force white-space with ` ` or ` ` 349 | '#&\#(?:32|x20);#', 350 | 351 | // Remove HTML comment(s) except IE comment(s) 352 | '#\s*\s*|(?)\n+(?=\<[^!])#s' 353 | ), 354 | array( 355 | "<$1$2", 356 | '$1$2$3', 357 | '$1$2$3', 358 | '$1$2$3$4$5', 359 | '$1$2$3$4$5$6$7', 360 | '$1$2$3', 361 | '<$1$2', 362 | '$1 ', 363 | "\n", 364 | ' ', 365 | "" 366 | ), 367 | $input); 368 | } 369 | /** 370 | * Minify the CSS text 371 | * 372 | * @param string $input 373 | * @return mixed 374 | * 375 | * @link http://ideone.com/Q5USEF + improvement(s) 376 | * @author Unknown, improved by Taufik Nurrohman 377 | * @license GPL version 3 License Copyright 378 | */ 379 | private static function minifyCSS($input) 380 | { 381 | if (trim($input) === "") { 382 | return $input; 383 | } 384 | 385 | // Force white-space(s) in `calc()` 386 | if (strpos($input, 'calc(') !== false) { 387 | $input = preg_replace_callback('#(?<=[\s:])calc\(\s*(.*?)\s*\)#', 388 | function ($matches) { 389 | return 'calc(' . preg_replace('#\s+#', "\x1A", $matches[1]) . ')'; 390 | }, 391 | $input 392 | ); 393 | } 394 | 395 | return preg_replace( 396 | array( 397 | // Remove comment(s) 398 | '#("(?:[^"\\\]++|\\\.)*+"|\'(?:[^\'\\\\]++|\\\.)*+\')|\/\*(?!\!)(?>.*?\*\/)|^\s*|\s*$#s', 399 | 400 | // Remove unused white-space(s) 401 | '#("(?:[^"\\\]++|\\\.)*+"|\'(?:[^\'\\\\]++|\\\.)*+\'|\/\*(?>.*?\*\/))|\s*+;\s*+(})\s*+|\s*+([*$~^|]?+=|[{};,>~+]|\s*+-(?![0-9\.])|!important\b)\s*+|([[(:])\s++|\s++([])])|\s++(:)\s*+(?!(?>[^{}"\']++|"(?:[^"\\\]++|\\\.)*+"|\'(?:[^\'\\\\]++|\\\.)*+\')*+{)|^\s++|\s++\z|(\s)\s+#si', 402 | 403 | // Replace `0(cm|em|ex|in|mm|pc|pt|px|vh|vw|%)` with `0` 404 | '#(?<=[\s:])(0)(cm|em|ex|in|mm|pc|pt|px|vh|vw|%)#si', 405 | 406 | // Replace `:0 0 0 0` with `:0` 407 | '#:(0\s+0|0\s+0\s+0\s+0)(?=[;\}]|\!important)#i', 408 | 409 | // Replace `background-position:0` with `background-position:0 0` 410 | '#(background-position):0(?=[;\}])#si', 411 | 412 | // Replace `0.6` with `.6`, but only when preceded by a white-space or `=`, `:`, `,`, `(`, `-` 413 | '#(?<=[\s=:,\(\-]|&\#32;)0+\.(\d+)#s', 414 | 415 | // Minify string value 416 | '#(\/\*(?>.*?\*\/))|(?.*?\*\/))|(\burl\()([\'"])([^\s]+?)\3(\))#si', 418 | 419 | // Minify HEX color code 420 | '#(?<=[\s=:,\(]\#)([a-f0-6]+)\1([a-f0-6]+)\2([a-f0-6]+)\3#i', 421 | 422 | // Replace `(border|outline):none` with `(border|outline):0` 423 | '#(?<=[\{;])(border|outline):none(?=[;\}\!])#', 424 | 425 | // Remove empty selector(s) 426 | '#(\/\*(?>.*?\*\/))|(^|[\{\}])(?:[^\s\{\}]+)\{\}#s', 427 | '#\x1A#' 428 | ), 429 | array( 430 | '$1', 431 | '$1$2$3$4$5$6$7', 432 | '$1', 433 | ':0', 434 | '$1:0 0', 435 | '.$1', 436 | '$1$3', 437 | '$1$2$4$5', 438 | '$1$2$3', 439 | '$1:0', 440 | '$1$2', 441 | ' ' 442 | ), 443 | $input 444 | ); 445 | } 446 | 447 | /** 448 | * Minify the Javascript text 449 | * 450 | * Be careful: 451 | * This method doesn't support "javascript automatic semicolon insertion", you must add semicolon by yourself, 452 | * otherwise your javascript code will not work and generate error messages. 453 | * 454 | * @param string $input 455 | * @return mixed 456 | * 457 | * @link https://github.com/mecha-cms/mecha-cms/blob/master/engine/kernel/converter.php 458 | * @author Taufik Nurrohman 459 | * @license GPL version 3 License Copyright 460 | */ 461 | private static function minifyJS($input) 462 | { 463 | if (trim($input) === "") { 464 | return $input; 465 | } 466 | 467 | return preg_replace( 468 | array( 469 | // Remove comment(s) 470 | '#\s*("(?:[^"\\\]++|\\\.)*+"|\'(?:[^\'\\\\]++|\\\.)*+\')\s*|\s*\/\*(?!\!|@cc_on)(?>[\s\S]*?\*\/)\s*|\s*(?.*?\*\/)|\/(?!\/)[^\n\r]*?\/(?=[\s.,;]|[gimuy]|$))|\s*([!%&*\(\)\-=+\[\]\{\}|;:,.<>?\/])\s*#s', 474 | 475 | // Remove the last semicolon 476 | '#;+\}#', 477 | 478 | // Minify object attribute(s) except JSON attribute(s). From `{'foo':'bar'}` to `{foo:'bar'}` 479 | '#([\{,])([\'])(\d+|[a-z_]\w*)\2(?=\:)#i', 480 | 481 | // --ibid. From `foo['bar']` to `foo.bar` 482 | '#([\w\)\]])\[([\'"])([a-z_]\w*)\2\]#i', 483 | 484 | // Replace `true` with `!0` 485 | '#(?<=return |[=:,\(\[])true\b#', 486 | 487 | // Replace `false` with `!1` 488 | '#(?<=return |[=:,\(\[])false\b#', 489 | 490 | // Clean up ... 491 | '#\s*(\/\*|\*\/)\s*#' 492 | ), 493 | array( 494 | '$1', 495 | '$1$2', 496 | '}', 497 | '$1$3', 498 | '$1.$3', 499 | '!0', 500 | '!1', 501 | '$1' 502 | ), 503 | $input 504 | ); 505 | } 506 | /** 507 | * Another minification engine by Dean Edwards. 508 | */ 509 | private static function packerJS($input, $level = 2) 510 | { 511 | switch ($level) { 512 | case 1: 513 | $data_minify_level = 10; 514 | break; 515 | case 2: 516 | default: 517 | $data_minify_level = 62; 518 | break; 519 | case 3: 520 | $data_minify_level = 95; 521 | break; 522 | } 523 | self::$obfuscator->set_encoding($data_minify_level); 524 | self::$obfuscator->load_script($input); 525 | return self::$obfuscator->pack(); 526 | } 527 | /** 528 | * Alias for static minifyHTML() 529 | * 530 | * @param $input 531 | * @return mixed 532 | */ 533 | public function html($input) 534 | { 535 | return self::minifyHTML($input); 536 | } 537 | 538 | /** 539 | * Alias for static minifyCSS() 540 | * 541 | * @param $input 542 | * @return mixed 543 | */ 544 | public function css($input) 545 | { 546 | return self::minifyCSS($input); 547 | } 548 | 549 | /** 550 | * Alias for static minifyJS() 551 | * 552 | * @param $input 553 | * @return mixed 554 | */ 555 | public function js($input) 556 | { 557 | return self::minifyJS($input); 558 | } 559 | /** 560 | * Alias for static packJS() 561 | * 562 | * @param $input 563 | * @return mixed 564 | */ 565 | public function js_packer($input, $level = 2) 566 | { 567 | return self::packerJS($input, $level); 568 | } 569 | } 570 | 571 | 572 | // This global function is only used hook "display_override" in /config/hooks.php 573 | // Please add the following setting in /config/hooks.php 574 | 575 | # $hook['display_override'][] = array( 576 | # 'class' => '', 577 | # 'function' => 'CI_Minifier_Hook_Loader', 578 | # 'filename' => '', 579 | # 'filepath' => '' 580 | # ); 581 | 582 | # For getting more control of output, but there is no way to pass varibles to hook function and class.. 583 | # Finally I decide to use class instead of function, but it is still need a function can be called by 584 | # /system/core/Hook.php (line:259) 585 | 586 | function CI_Minifier_Hook_Loader() 587 | { 588 | return CI_Minifier::output(); 589 | } 590 | -------------------------------------------------------------------------------- /application/third_party/JSPacker.php: -------------------------------------------------------------------------------- 1 | pack(); 26 | * 27 | * or 28 | * 29 | * $myPacker = new JSPacker($script, 'Normal', true, false); 30 | * $packed = $myPacker->pack(); 31 | * 32 | * or (default values) 33 | * 34 | * $myPacker = new JSPacker($script); 35 | * $packed = $myPacker->pack(); 36 | * 37 | * 38 | * params of the constructor : 39 | * $script: the JavaScript to pack, string. 40 | * $encoding: level of encoding, int or string : 41 | * 0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'. 42 | * default: 62. 43 | * $fastDecode: include the fast decoder in the packed result, boolean. 44 | * default : true. 45 | * $specialChars: if you are flagged your private and local variables 46 | * in the script, boolean. 47 | * default: false. 48 | * 49 | * The pack() method return the compressed JavasScript, as a string. 50 | * 51 | * see http://dean.edwards.name/packer/usage/ for more information. 52 | * 53 | * Notes : 54 | * # need PHP 5 . Tested with PHP 5.1.2, 5.1.3, 5.1.4, 5.2.3 55 | * 56 | * # The packed result may be different than with the Dean Edwards 57 | * version, but with the same length. The reason is that the PHP 58 | * function usort to sort array don't necessarily preserve the 59 | * original order of two equal member. The Javascript sort function 60 | * in fact preserve this order (but that's not require by the 61 | * ECMAScript standard). So the encoded keywords order can be 62 | * different in the two results. 63 | * 64 | * # Be careful with the 'High ASCII' Level encoding if you use 65 | * UTF-8 in your files... 66 | */ 67 | 68 | 69 | class JSPacker { 70 | // constants 71 | const IGNORE = '$1'; 72 | 73 | // validate parameters 74 | private $_script = ''; 75 | private $_encoding = 62; 76 | private $_fastDecode = true; 77 | private $_specialChars = false; 78 | 79 | private $LITERAL_ENCODING = array( 80 | 'None' => 0, 81 | 'Numeric' => 10, 82 | 'Normal' => 62, 83 | 'High ASCII' => 95 84 | ); 85 | 86 | public function __construct($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false) 87 | { 88 | $this->_script = $_script . "\n"; 89 | if (array_key_exists($_encoding, $this->LITERAL_ENCODING)) 90 | $_encoding = $this->LITERAL_ENCODING[$_encoding]; 91 | $this->_encoding = min((int)$_encoding, 95); 92 | $this->_fastDecode = $_fastDecode; 93 | $this->_specialChars = $_specialChars; 94 | } 95 | 96 | public function load_script($_script) 97 | { 98 | $this->_script = $_script . "\n"; 99 | } 100 | 101 | public function set_encoding($_encoding) 102 | { 103 | $this->_encoding = $_encoding; 104 | } 105 | public function pack() { 106 | $this->_addParser('_basicCompression'); 107 | if ($this->_specialChars) 108 | $this->_addParser('_encodeSpecialChars'); 109 | if ($this->_encoding) 110 | $this->_addParser('_encodeKeywords'); 111 | 112 | // go! 113 | return $this->_pack($this->_script); 114 | } 115 | 116 | // apply all parsing routines 117 | private function _pack($script) { 118 | for ($i = 0; isset($this->_parsers[$i]); $i++) { 119 | $script = call_user_func(array(&$this,$this->_parsers[$i]), $script); 120 | } 121 | return $script; 122 | } 123 | 124 | // keep a list of parsing functions, they'll be executed all at once 125 | private $_parsers = array(); 126 | private function _addParser($parser) { 127 | $this->_parsers[] = $parser; 128 | } 129 | 130 | // zero encoding - just removal of white space and comments 131 | private function _basicCompression($script) { 132 | $parser = new ParseMaster(); 133 | // make safe 134 | $parser->escapeChar = '\\'; 135 | // protect strings 136 | $parser->add('/\'[^\'\\n\\r]*\'/', self::IGNORE); 137 | $parser->add('/"[^"\\n\\r]*"/', self::IGNORE); 138 | // remove comments 139 | $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' '); 140 | $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' '); 141 | // protect regular expressions 142 | $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE 143 | $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', self::IGNORE); 144 | // remove: ;;; doSomething(); 145 | if ($this->_specialChars) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/'); 146 | // remove redundant semi-colons 147 | $parser->add('/\\(;;\\)/', self::IGNORE); // protect for (;;) loops 148 | $parser->add('/;+\\s*([};])/', '$2'); 149 | // apply the above 150 | $script = $parser->exec($script); 151 | 152 | // remove white-space 153 | $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3'); 154 | $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3'); 155 | $parser->add('/\\s+/', ''); 156 | // done 157 | return $parser->exec($script); 158 | } 159 | 160 | private function _encodeSpecialChars($script) { 161 | $parser = new ParseMaster(); 162 | // replace: $name -> n, $$name -> na 163 | $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/', 164 | array('fn' => '_replace_name') 165 | ); 166 | // replace: _name -> _0, double-underscore (__name) is ignored 167 | $regexp = '/\\b_[A-Za-z\\d]\\w*/'; 168 | // build the word list 169 | $keywords = $this->_analyze($script, $regexp, '_encodePrivate'); 170 | // quick ref 171 | $encoded = $keywords['encoded']; 172 | 173 | $parser->add($regexp, 174 | array( 175 | 'fn' => '_replace_encoded', 176 | 'data' => $encoded 177 | ) 178 | ); 179 | return $parser->exec($script); 180 | } 181 | 182 | private function _encodeKeywords($script) { 183 | // escape high-ascii values already in the script (i.e. in strings) 184 | if ($this->_encoding > 62) 185 | $script = $this->_escape95($script); 186 | // create the parser 187 | $parser = new ParseMaster(); 188 | $encode = $this->_getEncoder($this->_encoding); 189 | // for high-ascii, don't encode single character low-ascii 190 | $regexp = ($this->_encoding > 62) ? '/\\w\\w+/' : '/\\w+/'; 191 | // build the word list 192 | $keywords = $this->_analyze($script, $regexp, $encode); 193 | $encoded = $keywords['encoded']; 194 | 195 | // encode 196 | $parser->add($regexp, 197 | array( 198 | 'fn' => '_replace_encoded', 199 | 'data' => $encoded 200 | ) 201 | ); 202 | if (empty($script)) return $script; 203 | else { 204 | //$res = $parser->exec($script); 205 | //$res = $this->_bootStrap($res, $keywords); 206 | //return $res; 207 | return $this->_bootStrap($parser->exec($script), $keywords); 208 | } 209 | } 210 | 211 | private function _analyze($script, $regexp, $encode) { 212 | // analyse 213 | // retreive all words in the script 214 | $all = array(); 215 | preg_match_all($regexp, $script, $all); 216 | $_sorted = array(); // list of words sorted by frequency 217 | $_encoded = array(); // dictionary of word->encoding 218 | $_protected = array(); // instances of "protected" words 219 | $all = $all[0]; // simulate the javascript comportement of global match 220 | if (!empty($all)) { 221 | $unsorted = array(); // same list, not sorted 222 | $protected = array(); // "protected" words (dictionary of word->"word") 223 | $value = array(); // dictionary of charCode->encoding (eg. 256->ff) 224 | $this->_count = array(); // word->count 225 | $i = count($all); $j = 0; //$word = null; 226 | // count the occurrences - used for sorting later 227 | do { 228 | --$i; 229 | $word = '$' . $all[$i]; 230 | if (!isset($this->_count[$word])) { 231 | $this->_count[$word] = 0; 232 | $unsorted[$j] = $word; 233 | // make a dictionary of all of the protected words in this script 234 | // these are words that might be mistaken for encoding 235 | //if (is_string($encode) && method_exists($this, $encode)) 236 | $values[$j] = call_user_func(array(&$this, $encode), $j); 237 | $protected['$' . $values[$j]] = $j++; 238 | } 239 | // increment the word counter 240 | $this->_count[$word]++; 241 | } while ($i > 0); 242 | // prepare to sort the word list, first we must protect 243 | // words that are also used as codes. we assign them a code 244 | // equivalent to the word itself. 245 | // e.g. if "do" falls within our encoding range 246 | // then we store keywords["do"] = "do"; 247 | // this avoids problems when decoding 248 | $i = count($unsorted); 249 | do { 250 | $word = $unsorted[--$i]; 251 | if (isset($protected[$word]) /*!= null*/) { 252 | $_sorted[$protected[$word]] = substr($word, 1); 253 | $_protected[$protected[$word]] = true; 254 | $this->_count[$word] = 0; 255 | } 256 | } while ($i); 257 | 258 | // sort the words by frequency 259 | // Note: the javascript and php version of sort can be different : 260 | // in php manual, usort : 261 | // " If two members compare as equal, 262 | // their order in the sorted array is undefined." 263 | // so the final packed script is different of the Dean's javascript version 264 | // but equivalent. 265 | // the ECMAscript standard does not guarantee this behaviour, 266 | // and thus not all browsers (e.g. Mozilla versions dating back to at 267 | // least 2003) respect this. 268 | usort($unsorted, array(&$this, '_sortWords')); 269 | $j = 0; 270 | // because there are "protected" words in the list 271 | // we must add the sorted words around them 272 | do { 273 | if (!isset($_sorted[$i])) 274 | $_sorted[$i] = substr($unsorted[$j++], 1); 275 | $_encoded[$_sorted[$i]] = $values[$i]; 276 | } while (++$i < count($unsorted)); 277 | } 278 | return array( 279 | 'sorted' => $_sorted, 280 | 'encoded' => $_encoded, 281 | 'protected' => $_protected); 282 | } 283 | 284 | private $_count = array(); 285 | private function _sortWords($match1, $match2) { 286 | return $this->_count[$match2] - $this->_count[$match1]; 287 | } 288 | 289 | // build the boot function used for loading and decoding 290 | private function _bootStrap($packed, $keywords) { 291 | $ENCODE = $this->_safeRegExp('$encode\\($count\\)'); 292 | 293 | // $packed: the packed script 294 | $packed = "'" . $this->_escape($packed) . "'"; 295 | 296 | // $ascii: base for encoding 297 | $ascii = min(count($keywords['sorted']), $this->_encoding); 298 | if ($ascii == 0) $ascii = 1; 299 | 300 | // $count: number of words contained in the script 301 | $count = count($keywords['sorted']); 302 | 303 | // $keywords: list of words contained in the script 304 | foreach ($keywords['protected'] as $i=>$value) { 305 | $keywords['sorted'][$i] = ''; 306 | } 307 | // convert from a string to an array 308 | ksort($keywords['sorted']); 309 | $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')"; 310 | 311 | $encode = ($this->_encoding > 62) ? '_encode95' : $this->_getEncoder($ascii); 312 | $encode = $this->_getJSFunction($encode); 313 | $encode = preg_replace('/_encoding/','$ascii', $encode); 314 | $encode = preg_replace('/arguments\\.callee/','$encode', $encode); 315 | $inline = '\\$count' . ($ascii > 10 ? '.toString(\\$ascii)' : ''); 316 | 317 | // $decode: code snippet to speed up decoding 318 | if ($this->_fastDecode) { 319 | // create the decoder 320 | $decode = $this->_getJSFunction('_decodeBody'); 321 | if ($this->_encoding > 62) 322 | $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode); 323 | // perform the encoding inline for lower ascii values 324 | elseif ($ascii < 36) 325 | $decode = preg_replace($ENCODE, $inline, $decode); 326 | // special case: when $count==0 there are no keywords. I want to keep 327 | // the basic shape of the unpacking funcion so i'll frig the code... 328 | if ($count == 0) 329 | $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1); 330 | } 331 | 332 | // boot function 333 | $unpack = $this->_getJSFunction('_unpack'); 334 | if ($this->_fastDecode) { 335 | // insert the decoder 336 | $this->buffer = $decode; 337 | $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1); 338 | } 339 | $unpack = preg_replace('/"/', "'", $unpack); 340 | if ($this->_encoding > 62) { // high-ascii 341 | // get rid of the word-boundaries for regexp matches 342 | $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack); 343 | } 344 | if ($ascii > 36 || $this->_encoding > 62 || $this->_fastDecode) { 345 | // insert the encode function 346 | $this->buffer = $encode; 347 | $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1); 348 | } else { 349 | // perform the encoding inline 350 | $unpack = preg_replace($ENCODE, $inline, $unpack); 351 | } 352 | // pack the boot function too 353 | $unpackPacker = new JSPacker($unpack, 0, false, true); 354 | $unpack = $unpackPacker->pack(); 355 | 356 | // arguments 357 | $params = array($packed, $ascii, $count, $keywords); 358 | if ($this->_fastDecode) { 359 | $params[] = 0; 360 | $params[] = '{}'; 361 | } 362 | $params = implode(',', $params); 363 | 364 | // the whole thing 365 | return 'eval(' . $unpack . '(' . $params . "))\n"; 366 | } 367 | 368 | private $buffer; 369 | private function _insertFastDecode($match) { 370 | return '{' . $this->buffer . ';'; 371 | } 372 | private function _insertFastEncode($match) { 373 | return '{$encode=' . $this->buffer . ';'; 374 | } 375 | 376 | // mmm.. ..which one do i need ?? 377 | private function _getEncoder($ascii) { 378 | return $ascii > 10 ? $ascii > 36 ? $ascii > 62 ? 379 | '_encode95' : '_encode62' : '_encode36' : '_encode10'; 380 | } 381 | 382 | // zero encoding 383 | // characters: 0123456789 384 | private function _encode10($charCode) { 385 | return $charCode; 386 | } 387 | 388 | // inherent base36 support 389 | // characters: 0123456789abcdefghijklmnopqrstuvwxyz 390 | private function _encode36($charCode) { 391 | return base_convert($charCode, 10, 36); 392 | } 393 | 394 | // hitch a ride on base36 and add the upper case alpha characters 395 | // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ 396 | private function _encode62($charCode) { 397 | $res = ''; 398 | if ($charCode >= $this->_encoding) { 399 | $res = $this->_encode62((int)($charCode / $this->_encoding)); 400 | } 401 | $charCode = $charCode % $this->_encoding; 402 | 403 | if ($charCode > 35) 404 | return $res . chr($charCode + 29); 405 | else 406 | return $res . base_convert($charCode, 10, 36); 407 | } 408 | 409 | // use high-ascii values 410 | // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ 411 | private function _encode95($charCode) { 412 | $res = ''; 413 | if ($charCode >= $this->_encoding) 414 | $res = $this->_encode95($charCode / $this->_encoding); 415 | 416 | return $res . chr(($charCode % $this->_encoding) + 161); 417 | } 418 | 419 | private function _safeRegExp($string) { 420 | return '/'.preg_replace('/\$/', '\\\$', $string).'/'; 421 | } 422 | 423 | private function _encodePrivate($charCode) { 424 | return "_" . $charCode; 425 | } 426 | 427 | // protect characters used by the parser 428 | private function _escape($script) { 429 | return preg_replace('/([\\\\\'])/', '\\\$1', $script); 430 | } 431 | 432 | // protect high-ascii characters already in the script 433 | private function _escape95($script) { 434 | return preg_replace_callback( 435 | '/[\\xa1-\\xff]/', 436 | array(&$this, '_escape95Bis'), 437 | $script 438 | ); 439 | } 440 | private function _escape95Bis($match) { 441 | return '\x'.((string)dechex(ord($match))); 442 | } 443 | 444 | 445 | private function _getJSFunction($aName) { 446 | if (defined('self::JSFUNCTION'.$aName)) 447 | return constant('self::JSFUNCTION'.$aName); 448 | else 449 | return ''; 450 | } 451 | 452 | // JavaScript Functions used. 453 | // Note : In Dean's version, these functions are converted 454 | // with 'String(aFunctionName);'. 455 | // This internal conversion complete the original code, ex : 456 | // 'while (aBool) anAction();' is converted to 457 | // 'while (aBool) { anAction(); }'. 458 | // The JavaScript functions below are corrected. 459 | 460 | // unpacking function - this is the boot strap function 461 | // data extracted from this packing routine is passed to 462 | // this function when decoded in the target 463 | // NOTE ! : without the ';' final. 464 | const JSFUNCTION_unpack = 465 | 466 | 'function($packed, $ascii, $count, $keywords, $encode, $decode) { 467 | while ($count--) { 468 | if ($keywords[$count]) { 469 | $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]); 470 | } 471 | } 472 | return $packed; 473 | }'; 474 | /* 475 | 'function($packed, $ascii, $count, $keywords, $encode, $decode) { 476 | while ($count--) 477 | if ($keywords[$count]) 478 | $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]); 479 | return $packed; 480 | }'; 481 | */ 482 | 483 | // code-snippet inserted into the unpacker to speed up decoding 484 | const JSFUNCTION_decodeBody = 485 | //_decode = function() { 486 | // does the browser support String.replace where the 487 | // replacement value is a function? 488 | 489 | ' if (!\'\'.replace(/^/, String)) { 490 | // decode all the values we need 491 | while ($count--) { 492 | $decode[$encode($count)] = $keywords[$count] || $encode($count); 493 | } 494 | // global replacement function 495 | $keywords = [function ($encoded) {return $decode[$encoded]}]; 496 | // generic match 497 | $encode = function () {return \'\\\\w+\'}; 498 | // reset the loop counter - we are now doing a global replace 499 | $count = 1; 500 | } 501 | '; 502 | //}; 503 | /* 504 | ' if (!\'\'.replace(/^/, String)) { 505 | // decode all the values we need 506 | while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count); 507 | // global replacement function 508 | $keywords = [function ($encoded) {return $decode[$encoded]}]; 509 | // generic match 510 | $encode = function () {return\'\\\\w+\'}; 511 | // reset the loop counter - we are now doing a global replace 512 | $count = 1; 513 | }'; 514 | */ 515 | 516 | // zero encoding 517 | // characters: 0123456789 518 | const JSFUNCTION_encode10 = 519 | 'function($charCode) { 520 | return $charCode; 521 | }';//;'; 522 | 523 | // inherent base36 support 524 | // characters: 0123456789abcdefghijklmnopqrstuvwxyz 525 | const JSFUNCTION_encode36 = 526 | 'function($charCode) { 527 | return $charCode.toString(36); 528 | }';//;'; 529 | 530 | // hitch a ride on base36 and add the upper case alpha characters 531 | // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ 532 | const JSFUNCTION_encode62 = 533 | 'function($charCode) { 534 | return ($charCode < _encoding ? \'\' : arguments.callee(parseInt($charCode / _encoding))) + 535 | (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36)); 536 | }'; 537 | 538 | // use high-ascii values 539 | // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ 540 | const JSFUNCTION_encode95 = 541 | 'function($charCode) { 542 | return ($charCode < _encoding ? \'\' : arguments.callee($charCode / _encoding)) + 543 | String.fromCharCode($charCode % _encoding + 161); 544 | }'; 545 | 546 | } 547 | 548 | 549 | class ParseMaster { 550 | public $ignoreCase = false; 551 | public $escapeChar = ''; 552 | 553 | // constants 554 | const EXPRESSION = 0; 555 | const REPLACEMENT = 1; 556 | const LENGTH = 2; 557 | 558 | // used to determine nesting levels 559 | private $GROUPS = '/\\(/';//g 560 | private $SUB_REPLACE = '/\\$\\d/'; 561 | private $INDEXED = '/^\\$\\d+$/'; 562 | private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/'; 563 | private $ESCAPE = '/\\\./';//g 564 | private $QUOTE = '/\'/'; 565 | private $DELETED = '/\\x01[^\\x01]*\\x01/';//g 566 | 567 | public function add($expression, $replacement = '') { 568 | // count the number of sub-expressions 569 | // - add one because each pattern is itself a sub-expression 570 | $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out); 571 | 572 | // treat only strings $replacement 573 | if (is_string($replacement)) { 574 | // does the pattern deal with sub-expressions? 575 | if (preg_match($this->SUB_REPLACE, $replacement)) { 576 | // a simple lookup? (e.g. "$2") 577 | if (preg_match($this->INDEXED, $replacement)) { 578 | // store the index (used for fast retrieval of matched strings) 579 | $replacement = (int)(substr($replacement, 1)) - 1; 580 | } else { // a complicated lookup (e.g. "Hello $2 $1") 581 | // build a function to do the lookup 582 | $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement)) 583 | ? '"' : "'"; 584 | $replacement = array( 585 | 'fn' => '_backReferences', 586 | 'data' => array( 587 | 'replacement' => $replacement, 588 | 'length' => $length, 589 | 'quote' => $quote 590 | ) 591 | ); 592 | } 593 | } 594 | } 595 | // pass the modified arguments 596 | if (!empty($expression)) $this->_add($expression, $replacement, $length); 597 | else $this->_add('/^$/', $replacement, $length); 598 | } 599 | 600 | public function exec($string) { 601 | // execute the global replacement 602 | $this->_escaped = array(); 603 | 604 | // simulate the _patterns.toSTring of Dean 605 | $regexp = '/'; 606 | foreach ($this->_patterns as $reg) { 607 | $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|'; 608 | } 609 | $regexp = substr($regexp, 0, -1) . '/'; 610 | $regexp .= ($this->ignoreCase) ? 'i' : ''; 611 | 612 | $string = $this->_escape($string, $this->escapeChar); 613 | $string = preg_replace_callback( 614 | $regexp, 615 | array( 616 | &$this, 617 | '_replacement' 618 | ), 619 | $string 620 | ); 621 | $string = $this->_unescape($string, $this->escapeChar); 622 | 623 | return preg_replace($this->DELETED, '', $string); 624 | } 625 | 626 | public function reset() { 627 | // clear the patterns collection so that this object may be re-used 628 | $this->_patterns = array(); 629 | } 630 | 631 | // private 632 | private $_escaped = array(); // escaped characters 633 | private $_patterns = array(); // patterns stored by index 634 | 635 | // create and add a new pattern to the patterns collection 636 | private function _add() { 637 | $arguments = func_get_args(); 638 | $this->_patterns[] = $arguments; 639 | } 640 | 641 | // this is the global replace function (it's quite complicated) 642 | private function _replacement($arguments) { 643 | if (empty($arguments)) return ''; 644 | 645 | $i = 1; $j = 0; 646 | // loop through the patterns 647 | while (isset($this->_patterns[$j])) { 648 | $pattern = $this->_patterns[$j++]; 649 | // do we have a result? 650 | if (isset($arguments[$i]) && ($arguments[$i] != '')) { 651 | $replacement = $pattern[self::REPLACEMENT]; 652 | 653 | if (is_array($replacement) && isset($replacement['fn'])) { 654 | 655 | if (isset($replacement['data'])) $this->buffer = $replacement['data']; 656 | return call_user_func(array(&$this, $replacement['fn']), $arguments, $i); 657 | 658 | } elseif (is_int($replacement)) { 659 | return $arguments[$replacement + $i]; 660 | 661 | } 662 | $delete = ($this->escapeChar == '' || 663 | strpos($arguments[$i], $this->escapeChar) === false) 664 | ? '' : "\x01" . $arguments[$i] . "\x01"; 665 | return $delete . $replacement; 666 | 667 | // skip over references to sub-expressions 668 | } else { 669 | $i += $pattern[self::LENGTH]; 670 | } 671 | } 672 | } 673 | 674 | private function _backReferences($match, $offset) { 675 | $replacement = $this->buffer['replacement']; 676 | $quote = $this->buffer['quote']; 677 | $i = $this->buffer['length']; 678 | while ($i) { 679 | $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement); 680 | } 681 | return $replacement; 682 | } 683 | 684 | private function _replace_name($match, $offset){ 685 | $length = strlen($match[$offset + 2]); 686 | $start = $length - max($length - strlen($match[$offset + 3]), 0); 687 | return substr($match[$offset + 1], $start, $length) . $match[$offset + 4]; 688 | } 689 | 690 | private function _replace_encoded($match, $offset) { 691 | return $this->buffer[$match[$offset]]; 692 | } 693 | 694 | 695 | // php : we cannot pass additional data to preg_replace_callback, 696 | // and we cannot use &$this in create_function, so let's go to lower level 697 | private $buffer; 698 | 699 | // encode escaped characters 700 | private function _escape($string, $escapeChar) { 701 | if ($escapeChar) { 702 | $this->buffer = $escapeChar; 703 | return preg_replace_callback( 704 | '/\\' . $escapeChar . '(.)' .'/', 705 | array(&$this, '_escapeBis'), 706 | $string 707 | ); 708 | 709 | } else { 710 | return $string; 711 | } 712 | } 713 | private function _escapeBis($match) { 714 | $this->_escaped[] = $match[1]; 715 | return $this->buffer; 716 | } 717 | 718 | // decode escaped characters 719 | private function _unescape($string, $escapeChar) { 720 | if ($escapeChar) { 721 | $regexp = '/'.'\\'.$escapeChar.'/'; 722 | $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0); 723 | return preg_replace_callback 724 | ( 725 | $regexp, 726 | array(&$this, '_unescapeBis'), 727 | $string 728 | ); 729 | 730 | } else { 731 | return $string; 732 | } 733 | } 734 | private function _unescapeBis() { 735 | if (isset($this->_escaped[$this->buffer['i']]) 736 | && $this->_escaped[$this->buffer['i']] != '') 737 | { 738 | $temp = $this->_escaped[$this->buffer['i']]; 739 | } else { 740 | $temp = ''; 741 | } 742 | $this->buffer['i']++; 743 | return $this->buffer['escapeChar'] . $temp; 744 | } 745 | 746 | private function _internalEscape($string) { 747 | return preg_replace($this->ESCAPE, '', $string); 748 | } 749 | } 750 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /application/third_party/Simple_html_dom.php: -------------------------------------------------------------------------------- 1 | size is the "real" number of bytes the dom was created from. 19 | * but for most purposes, it's a really good estimation. 20 | * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags closed is great for malformed html, but it CAN lead to parsing errors. 21 | * Allow the user to tell us how much they trust the html. 22 | * Paperg add the text and plaintext to the selectors for the find syntax. plaintext implies text in the innertext of a node. text implies that the tag is a text node. 23 | * This allows for us to find tags based on the text they contain. 24 | * Create find_ancestor_tag to see if a tag is - at any level - inside of another specific tag. 25 | * Paperg: added parse_charset so that we know about the character set of the source document. 26 | * NOTE: If the user's system has a routine called get_last_retrieve_url_contents_content_type availalbe, we will assume it's returning the content-type header from the 27 | * last transfer or curl_exec, and we will parse that and use it in preference to any other method of charset detection. 28 | * 29 | * Found infinite loop in the case of broken html in restore_noise. Rewrote to protect from that. 30 | * PaperG (John Schlick) Added get_display_size for "IMG" tags. 31 | * 32 | * Licensed under The MIT License 33 | * Redistributions of files must retain the above copyright notice. 34 | * 35 | * @author S.C. Chen 36 | * @author John Schlick 37 | * @author Rus Carroll 38 | * @version 1.5 ($Rev: 196 $) 39 | * @package PlaceLocalInclude 40 | * @subpackage Simple_html_dom 41 | */ 42 | 43 | /** 44 | * All of the Defines for the classes below. 45 | * @author S.C. Chen 46 | */ 47 | define('HDOM_TYPE_ELEMENT', 1); 48 | define('HDOM_TYPE_COMMENT', 2); 49 | define('HDOM_TYPE_TEXT', 3); 50 | define('HDOM_TYPE_ENDTAG', 4); 51 | define('HDOM_TYPE_ROOT', 5); 52 | define('HDOM_TYPE_UNKNOWN', 6); 53 | define('HDOM_QUOTE_DOUBLE', 0); 54 | define('HDOM_QUOTE_SINGLE', 1); 55 | define('HDOM_QUOTE_NO', 3); 56 | define('HDOM_INFO_BEGIN', 0); 57 | define('HDOM_INFO_END', 1); 58 | define('HDOM_INFO_QUOTE', 2); 59 | define('HDOM_INFO_SPACE', 3); 60 | define('HDOM_INFO_TEXT', 4); 61 | define('HDOM_INFO_INNER', 5); 62 | define('HDOM_INFO_OUTER', 6); 63 | define('HDOM_INFO_ENDSPACE',7); 64 | define('DEFAULT_TARGET_CHARSET', 'UTF-8'); 65 | define('DEFAULT_BR_TEXT', "\r\n"); 66 | define('DEFAULT_SPAN_TEXT', " "); 67 | define('MAX_FILE_SIZE', 12600000); 68 | // helper functions 69 | // ----------------------------------------------------------------------------- 70 | // get html dom from file 71 | // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. 72 | function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 73 | { 74 | // We DO force the tags to be terminated. 75 | $dom = new Simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); 76 | // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. 77 | $contents = file_get_contents($url, $use_include_path, $context, $offset); 78 | // Paperg - use our own mechanism for getting the contents as we want to control the timeout. 79 | //$contents = retrieve_url_contents($url); 80 | if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) 81 | { 82 | return false; 83 | } 84 | // The second parameter can force the selectors to all be lowercase. 85 | $dom->load($contents, $lowercase, $stripRN); 86 | return $dom; 87 | } 88 | 89 | // get html dom from string 90 | function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 91 | { 92 | $dom = new Simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); 93 | if (empty($str) || strlen($str) > MAX_FILE_SIZE) 94 | { 95 | $dom->clear(); 96 | return false; 97 | } 98 | $dom->load($str, $lowercase, $stripRN); 99 | return $dom; 100 | } 101 | 102 | // dump html dom tree 103 | function dump_html_tree($node, $show_attr=true, $deep=0) 104 | { 105 | $node->dump($node); 106 | } 107 | 108 | 109 | /** 110 | * simple html dom node 111 | * PaperG - added ability for "find" routine to lowercase the value of the selector. 112 | * PaperG - added $tag_start to track the start position of the tag in the total byte index 113 | * 114 | * @package PlaceLocalInclude 115 | */ 116 | class Simple_html_dom_node 117 | { 118 | public $nodetype = HDOM_TYPE_TEXT; 119 | public $tag = 'text'; 120 | public $attr = array(); 121 | public $children = array(); 122 | public $nodes = array(); 123 | public $parent = null; 124 | // The "info" array - see HDOM_INFO_... for what each element contains. 125 | public $_ = array(); 126 | public $tag_start = 0; 127 | private $dom = null; 128 | 129 | function __construct($dom) 130 | { 131 | $this->dom = $dom; 132 | $dom->nodes[] = $this; 133 | } 134 | 135 | function __destruct() 136 | { 137 | $this->clear(); 138 | } 139 | 140 | function __toString() 141 | { 142 | return $this->outertext(); 143 | } 144 | 145 | // clean up memory due to php5 circular references memory leak... 146 | function clear() 147 | { 148 | $this->dom = null; 149 | $this->nodes = null; 150 | $this->parent = null; 151 | $this->children = null; 152 | } 153 | 154 | // dump node's tree 155 | function dump($show_attr=true, $deep=0) 156 | { 157 | $lead = str_repeat(' ', $deep); 158 | 159 | echo $lead.$this->tag; 160 | if ($show_attr && count($this->attr)>0) 161 | { 162 | echo '('; 163 | foreach ($this->attr as $k=>$v) 164 | echo "[$k]=>\"".$this->$k.'", '; 165 | echo ')'; 166 | } 167 | echo "\n"; 168 | 169 | if ($this->nodes) 170 | { 171 | foreach ($this->nodes as $c) 172 | { 173 | $c->dump($show_attr, $deep+1); 174 | } 175 | } 176 | } 177 | 178 | 179 | // Debugging function to dump a single dom node with a bunch of information about it. 180 | function dump_node($echo=true) 181 | { 182 | 183 | $string = $this->tag; 184 | if (count($this->attr)>0) 185 | { 186 | $string .= '('; 187 | foreach ($this->attr as $k=>$v) 188 | { 189 | $string .= "[$k]=>\"".$this->$k.'", '; 190 | } 191 | $string .= ')'; 192 | } 193 | if (count($this->_)>0) 194 | { 195 | $string .= ' $_ ('; 196 | foreach ($this->_ as $k=>$v) 197 | { 198 | if (is_array($v)) 199 | { 200 | $string .= "[$k]=>("; 201 | foreach ($v as $k2=>$v2) 202 | { 203 | $string .= "[$k2]=>\"".$v2.'", '; 204 | } 205 | $string .= ")"; 206 | } else { 207 | $string .= "[$k]=>\"".$v.'", '; 208 | } 209 | } 210 | $string .= ")"; 211 | } 212 | 213 | if (isset($this->text)) 214 | { 215 | $string .= " text: (" . $this->text . ")"; 216 | } 217 | 218 | $string .= " HDOM_INNER_INFO: '"; 219 | if (isset($node->_[HDOM_INFO_INNER])) 220 | { 221 | $string .= $node->_[HDOM_INFO_INNER] . "'"; 222 | } 223 | else 224 | { 225 | $string .= ' NULL '; 226 | } 227 | 228 | $string .= " children: " . count($this->children); 229 | $string .= " nodes: " . count($this->nodes); 230 | $string .= " tag_start: " . $this->tag_start; 231 | $string .= "\n"; 232 | 233 | if ($echo) 234 | { 235 | echo $string; 236 | return; 237 | } 238 | else 239 | { 240 | return $string; 241 | } 242 | } 243 | 244 | // returns the parent of node 245 | // If a node is passed in, it will reset the parent of the current node to that one. 246 | function parent($parent=null) 247 | { 248 | // I am SURE that this doesn't work properly. 249 | // It fails to unset the current node from it's current parents nodes or children list first. 250 | if ($parent !== null) 251 | { 252 | $this->parent = $parent; 253 | $this->parent->nodes[] = $this; 254 | $this->parent->children[] = $this; 255 | } 256 | 257 | return $this->parent; 258 | } 259 | 260 | // verify that node has children 261 | function has_child() 262 | { 263 | return !empty($this->children); 264 | } 265 | 266 | // returns children of node 267 | function children($idx=-1) 268 | { 269 | if ($idx===-1) 270 | { 271 | return $this->children; 272 | } 273 | if (isset($this->children[$idx])) return $this->children[$idx]; 274 | return null; 275 | } 276 | 277 | // returns the first child of node 278 | function first_child() 279 | { 280 | if (count($this->children)>0) 281 | { 282 | return $this->children[0]; 283 | } 284 | return null; 285 | } 286 | 287 | // returns the last child of node 288 | function last_child() 289 | { 290 | if (($count=count($this->children))>0) 291 | { 292 | return $this->children[$count-1]; 293 | } 294 | return null; 295 | } 296 | 297 | // returns the next sibling of node 298 | function next_sibling() 299 | { 300 | if ($this->parent===null) 301 | { 302 | return null; 303 | } 304 | 305 | $idx = 0; 306 | $count = count($this->parent->children); 307 | while ($idx<$count && $this!==$this->parent->children[$idx]) 308 | { 309 | ++$idx; 310 | } 311 | if (++$idx>=$count) 312 | { 313 | return null; 314 | } 315 | return $this->parent->children[$idx]; 316 | } 317 | 318 | // returns the previous sibling of node 319 | function prev_sibling() 320 | { 321 | if ($this->parent===null) return null; 322 | $idx = 0; 323 | $count = count($this->parent->children); 324 | while ($idx<$count && $this!==$this->parent->children[$idx]) 325 | ++$idx; 326 | if (--$idx<0) return null; 327 | return $this->parent->children[$idx]; 328 | } 329 | 330 | // function to locate a specific ancestor tag in the path to the root. 331 | function find_ancestor_tag($tag) 332 | { 333 | global $debugObject; 334 | if (is_object($debugObject)) { $debugObject->debugLogEntry(1); } 335 | 336 | // Start by including ourselves in the comparison. 337 | $returnDom = $this; 338 | 339 | while (!is_null($returnDom)) 340 | { 341 | if (is_object($debugObject)) { $debugObject->debugLog(2, "Current tag is: " . $returnDom->tag); } 342 | 343 | if ($returnDom->tag == $tag) 344 | { 345 | break; 346 | } 347 | $returnDom = $returnDom->parent; 348 | } 349 | return $returnDom; 350 | } 351 | 352 | // get dom node's inner html 353 | function innertext() 354 | { 355 | if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; 356 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 357 | 358 | $ret = ''; 359 | foreach ($this->nodes as $n) 360 | $ret .= $n->outertext(); 361 | return $ret; 362 | } 363 | 364 | // get dom node's outer text (with tag) 365 | function outertext() 366 | { 367 | global $debugObject; 368 | if (is_object($debugObject)) 369 | { 370 | $text = ''; 371 | if ($this->tag == 'text') 372 | { 373 | if (!empty($this->text)) 374 | { 375 | $text = " with text: " . $this->text; 376 | } 377 | } 378 | $debugObject->debugLog(1, 'Innertext of tag: ' . $this->tag . $text); 379 | } 380 | 381 | if ($this->tag==='root') return $this->innertext(); 382 | 383 | // trigger callback 384 | if ($this->dom && $this->dom->callback!==null) 385 | { 386 | call_user_func_array($this->dom->callback, array($this)); 387 | } 388 | 389 | if (isset($this->_[HDOM_INFO_OUTER])) return $this->_[HDOM_INFO_OUTER]; 390 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 391 | 392 | // render begin tag 393 | if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) 394 | { 395 | $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); 396 | } else { 397 | $ret = ""; 398 | } 399 | 400 | // render inner text 401 | if (isset($this->_[HDOM_INFO_INNER])) 402 | { 403 | // If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added. 404 | if ($this->tag != "br") 405 | { 406 | $ret .= $this->_[HDOM_INFO_INNER]; 407 | } 408 | } else { 409 | if ($this->nodes) 410 | { 411 | foreach ($this->nodes as $n) 412 | { 413 | $ret .= $this->convert_text($n->outertext()); 414 | } 415 | } 416 | } 417 | 418 | // render end tag 419 | if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END]!=0) 420 | $ret .= 'tag.'>'; 421 | return $ret; 422 | } 423 | 424 | // get dom node's plain text 425 | function text() 426 | { 427 | if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; 428 | switch ($this->nodetype) 429 | { 430 | case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 431 | case HDOM_TYPE_COMMENT: return ''; 432 | case HDOM_TYPE_UNKNOWN: return ''; 433 | } 434 | if (strcasecmp($this->tag, 'script')===0) return ''; 435 | if (strcasecmp($this->tag, 'style')===0) return ''; 436 | 437 | $ret = ''; 438 | // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL. 439 | // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening. 440 | // WHY is this happening? 441 | if (!is_null($this->nodes)) 442 | { 443 | foreach ($this->nodes as $n) 444 | { 445 | $ret .= $this->convert_text($n->text()); 446 | } 447 | 448 | // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all. 449 | if ($this->tag == "span") 450 | { 451 | $ret .= $this->dom->default_span_text; 452 | } 453 | 454 | 455 | } 456 | return $ret; 457 | } 458 | 459 | function xmltext() 460 | { 461 | $ret = $this->innertext(); 462 | $ret = str_ireplace('', '', $ret); 464 | return $ret; 465 | } 466 | 467 | // build node's text with tag 468 | function makeup() 469 | { 470 | // text, comment, unknown 471 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 472 | 473 | $ret = '<'.$this->tag; 474 | $i = -1; 475 | 476 | foreach ($this->attr as $key=>$val) 477 | { 478 | ++$i; 479 | 480 | // skip removed attribute 481 | if ($val===null || $val===false) 482 | continue; 483 | 484 | $ret .= $this->_[HDOM_INFO_SPACE][$i][0]; 485 | //no value attr: nowrap, checked selected... 486 | if ($val===true) 487 | $ret .= $key; 488 | else { 489 | switch ($this->_[HDOM_INFO_QUOTE][$i]) 490 | { 491 | case HDOM_QUOTE_DOUBLE: $quote = '"'; break; 492 | case HDOM_QUOTE_SINGLE: $quote = '\''; break; 493 | default: $quote = ''; 494 | } 495 | $ret .= $key.$this->_[HDOM_INFO_SPACE][$i][1].'='.$this->_[HDOM_INFO_SPACE][$i][2].$quote.$val.$quote; 496 | } 497 | } 498 | $ret = $this->dom->restore_noise($ret); 499 | return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; 500 | } 501 | 502 | // find elements by css selector 503 | //PaperG - added ability for find to lowercase the value of the selector. 504 | function find($selector, $idx=null, $lowercase=false) 505 | { 506 | $selectors = $this->parse_selector($selector); 507 | if (($count=count($selectors))===0) return array(); 508 | $found_keys = array(); 509 | 510 | // find each selector 511 | for ($c=0; $c<$count; ++$c) 512 | { 513 | // The change on the below line was documented on the sourceforge code tracker id 2788009 514 | // used to be: if (($levle=count($selectors[0]))===0) return array(); 515 | if (($levle=count($selectors[$c]))===0) return array(); 516 | if (!isset($this->_[HDOM_INFO_BEGIN])) return array(); 517 | 518 | $head = array($this->_[HDOM_INFO_BEGIN]=>1); 519 | 520 | // handle descendant selectors, no recursive! 521 | for ($l=0; $l<$levle; ++$l) 522 | { 523 | $ret = array(); 524 | foreach ($head as $k=>$v) 525 | { 526 | $n = ($k===-1) ? $this->dom->root : $this->dom->nodes[$k]; 527 | //PaperG - Pass this optional parameter on to the seek function. 528 | $n->seek($selectors[$c][$l], $ret, $lowercase); 529 | } 530 | $head = $ret; 531 | } 532 | 533 | foreach ($head as $k=>$v) 534 | { 535 | if (!isset($found_keys[$k])) 536 | $found_keys[$k] = 1; 537 | } 538 | } 539 | 540 | // sort keys 541 | ksort($found_keys); 542 | 543 | $found = array(); 544 | foreach ($found_keys as $k=>$v) 545 | $found[] = $this->dom->nodes[$k]; 546 | 547 | // return nth-element or array 548 | if (is_null($idx)) return $found; 549 | else if ($idx<0) $idx = count($found) + $idx; 550 | return (isset($found[$idx])) ? $found[$idx] : null; 551 | } 552 | 553 | // seek for given conditions 554 | // PaperG - added parameter to allow for case insensitive testing of the value of a selector. 555 | protected function seek($selector, &$ret, $lowercase=false) 556 | { 557 | global $debugObject; 558 | if (is_object($debugObject)) { $debugObject->debugLogEntry(1); } 559 | 560 | list($tag, $key, $val, $exp, $no_key) = $selector; 561 | 562 | // xpath index 563 | if ($tag && $key && is_numeric($key)) 564 | { 565 | $count = 0; 566 | foreach ($this->children as $c) 567 | { 568 | if ($tag==='*' || $tag===$c->tag) { 569 | if (++$count==$key) { 570 | $ret[$c->_[HDOM_INFO_BEGIN]] = 1; 571 | return; 572 | } 573 | } 574 | } 575 | return; 576 | } 577 | 578 | $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; 579 | if ($end==0) { 580 | $parent = $this->parent; 581 | while (!isset($parent->_[HDOM_INFO_END]) && $parent!==null) { 582 | $end -= 1; 583 | $parent = $parent->parent; 584 | } 585 | $end += $parent->_[HDOM_INFO_END]; 586 | } 587 | 588 | for ($i=$this->_[HDOM_INFO_BEGIN]+1; $i<$end; ++$i) { 589 | $node = $this->dom->nodes[$i]; 590 | 591 | $pass = true; 592 | 593 | if ($tag==='*' && !$key) { 594 | if (in_array($node, $this->children, true)) 595 | $ret[$i] = 1; 596 | continue; 597 | } 598 | 599 | // compare tag 600 | if ($tag && $tag!=$node->tag && $tag!=='*') {$pass=false;} 601 | // compare key 602 | if ($pass && $key) { 603 | if ($no_key) { 604 | if (isset($node->attr[$key])) $pass=false; 605 | } else { 606 | if (($key != "plaintext") && !isset($node->attr[$key])) $pass=false; 607 | } 608 | } 609 | // compare value 610 | if ($pass && $key && $val && $val!=='*') { 611 | // If they have told us that this is a "plaintext" search then we want the plaintext of the node - right? 612 | if ($key == "plaintext") { 613 | // $node->plaintext actually returns $node->text(); 614 | $nodeKeyValue = $node->text(); 615 | } else { 616 | // this is a normal search, we want the value of that attribute of the tag. 617 | $nodeKeyValue = $node->attr[$key]; 618 | } 619 | if (is_object($debugObject)) {$debugObject->debugLog(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} 620 | 621 | //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. 622 | if ($lowercase) { 623 | $check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue)); 624 | } else { 625 | $check = $this->match($exp, $val, $nodeKeyValue); 626 | } 627 | if (is_object($debugObject)) {$debugObject->debugLog(2, "after match: " . ($check ? "true" : "false"));} 628 | 629 | // handle multiple class 630 | if (!$check && strcasecmp($key, 'class')===0) { 631 | foreach (explode(' ',$node->attr[$key]) as $k) { 632 | // Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form. 633 | if (!empty($k)) { 634 | if ($lowercase) { 635 | $check = $this->match($exp, strtolower($val), strtolower($k)); 636 | } else { 637 | $check = $this->match($exp, $val, $k); 638 | } 639 | if ($check) break; 640 | } 641 | } 642 | } 643 | if (!$check) $pass = false; 644 | } 645 | if ($pass) $ret[$i] = 1; 646 | unset($node); 647 | } 648 | // It's passed by reference so this is actually what this function returns. 649 | if (is_object($debugObject)) {$debugObject->debugLog(1, "EXIT - ret: ", $ret);} 650 | } 651 | 652 | protected function match($exp, $pattern, $value) { 653 | global $debugObject; 654 | if (is_object($debugObject)) {$debugObject->debugLogEntry(1);} 655 | 656 | switch ($exp) { 657 | case '=': 658 | return ($value===$pattern); 659 | case '!=': 660 | return ($value!==$pattern); 661 | case '^=': 662 | return preg_match("/^".preg_quote($pattern,'/')."/", $value); 663 | case '$=': 664 | return preg_match("/".preg_quote($pattern,'/')."$/", $value); 665 | case '*=': 666 | if ($pattern[0]=='/') { 667 | return preg_match($pattern, $value); 668 | } 669 | return preg_match("/".$pattern."/i", $value); 670 | } 671 | return false; 672 | } 673 | 674 | protected function parse_selector($selector_string) { 675 | global $debugObject; 676 | if (is_object($debugObject)) {$debugObject->debugLogEntry(1);} 677 | 678 | // pattern of CSS selectors, modified from mootools 679 | // Paperg: Add the colon to the attrbute, so that it properly finds like google does. 680 | // Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check. 681 | // Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured. 682 | // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. 683 | // farther study is required to determine of this should be documented or removed. 684 | // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; 685 | $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; 686 | preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); 687 | if (is_object($debugObject)) {$debugObject->debugLog(2, "Matches Array: ", $matches);} 688 | 689 | $selectors = array(); 690 | $result = array(); 691 | //print_r($matches); 692 | 693 | foreach ($matches as $m) { 694 | $m[0] = trim($m[0]); 695 | if ($m[0]==='' || $m[0]==='/' || $m[0]==='//') continue; 696 | // for browser generated xpath 697 | //if ($m[1]==='tbody') continue; 698 | 699 | list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false); 700 | if (!empty($m[2])) {$key='id'; $val=$m[2];} 701 | if (!empty($m[3])) {$key='class'; $val=$m[3];} 702 | if (!empty($m[4])) {$key=$m[4];} 703 | if (!empty($m[5])) {$exp=$m[5];} 704 | if (!empty($m[6])) {$val=$m[6];} 705 | 706 | // convert to lowercase 707 | if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);} 708 | //elements that do NOT have the specified attribute 709 | if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;} 710 | 711 | $result[] = array($tag, $key, $val, $exp, $no_key); 712 | if (trim($m[7])===',') { 713 | $selectors[] = $result; 714 | $result = array(); 715 | } 716 | } 717 | if (count($result)>0) 718 | $selectors[] = $result; 719 | return $selectors; 720 | } 721 | 722 | function __get($name) { 723 | if (isset($this->attr[$name])) 724 | { 725 | return $this->convert_text($this->attr[$name]); 726 | } 727 | switch ($name) { 728 | case 'outertext': return $this->outertext(); 729 | case 'innertext': return $this->innertext(); 730 | case 'plaintext': return $this->text(); 731 | case 'xmltext': return $this->xmltext(); 732 | default: return array_key_exists($name, $this->attr); 733 | } 734 | } 735 | 736 | function __set($name, $value) { 737 | switch ($name) { 738 | case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; 739 | case 'innertext': 740 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; 741 | return $this->_[HDOM_INFO_INNER] = $value; 742 | } 743 | if (!isset($this->attr[$name])) { 744 | $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); 745 | $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; 746 | } 747 | $this->attr[$name] = $value; 748 | } 749 | 750 | function __isset($name) { 751 | switch ($name) { 752 | case 'outertext': return true; 753 | case 'innertext': return true; 754 | case 'plaintext': return true; 755 | } 756 | //no value attr: nowrap, checked selected... 757 | return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]); 758 | } 759 | 760 | function __unset($name) { 761 | if (isset($this->attr[$name])) 762 | unset($this->attr[$name]); 763 | } 764 | 765 | // PaperG - Function to convert the text from one character set to another if the two sets are not the same. 766 | function convert_text($text) 767 | { 768 | global $debugObject; 769 | if (is_object($debugObject)) {$debugObject->debugLogEntry(1);} 770 | 771 | $converted_text = $text; 772 | 773 | $sourceCharset = ""; 774 | $targetCharset = ""; 775 | 776 | if ($this->dom) 777 | { 778 | $sourceCharset = strtoupper($this->dom->_charset); 779 | $targetCharset = strtoupper($this->dom->_target_charset); 780 | } 781 | if (is_object($debugObject)) {$debugObject->debugLog(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} 782 | 783 | if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) 784 | { 785 | // Check if the reported encoding could have been incorrect and the text is actually already UTF-8 786 | if ((strcasecmp($targetCharset, 'UTF-8') == 0) && ($this->is_utf8($text))) 787 | { 788 | $converted_text = $text; 789 | } 790 | else 791 | { 792 | $converted_text = iconv($sourceCharset, $targetCharset, $text); 793 | } 794 | } 795 | 796 | // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. 797 | if ($targetCharset == 'UTF-8') 798 | { 799 | if (substr($converted_text, 0, 3) == "\xef\xbb\xbf") 800 | { 801 | $converted_text = substr($converted_text, 3); 802 | } 803 | if (substr($converted_text, -3) == "\xef\xbb\xbf") 804 | { 805 | $converted_text = substr($converted_text, 0, -3); 806 | } 807 | } 808 | 809 | return $converted_text; 810 | } 811 | 812 | /** 813 | * Returns true if $string is valid UTF-8 and false otherwise. 814 | * 815 | * @param mixed $str String to be tested 816 | * @return boolean 817 | */ 818 | static function is_utf8($str) 819 | { 820 | $c=0; $b=0; 821 | $bits=0; 822 | $len=strlen($str); 823 | for($i=0; $i<$len; $i++) 824 | { 825 | $c=ord($str[$i]); 826 | if($c > 128) 827 | { 828 | if(($c >= 254)) return false; 829 | elseif($c >= 252) $bits=6; 830 | elseif($c >= 248) $bits=5; 831 | elseif($c >= 240) $bits=4; 832 | elseif($c >= 224) $bits=3; 833 | elseif($c >= 192) $bits=2; 834 | else return false; 835 | if(($i+$bits) > $len) return false; 836 | while($bits > 1) 837 | { 838 | $i++; 839 | $b=ord($str[$i]); 840 | if($b < 128 || $b > 191) return false; 841 | $bits--; 842 | } 843 | } 844 | } 845 | return true; 846 | } 847 | /* 848 | function is_utf8($string) 849 | { 850 | //this is buggy 851 | return (utf8_encode(utf8_decode($string)) == $string); 852 | } 853 | */ 854 | 855 | /** 856 | * Function to try a few tricks to determine the displayed size of an img on the page. 857 | * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. 858 | * 859 | * @author John Schlick 860 | * @version April 19 2012 861 | * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out. 862 | */ 863 | function get_display_size() 864 | { 865 | global $debugObject; 866 | 867 | $width = -1; 868 | $height = -1; 869 | 870 | if ($this->tag !== 'img') 871 | { 872 | return false; 873 | } 874 | 875 | // See if there is aheight or width attribute in the tag itself. 876 | if (isset($this->attr['width'])) 877 | { 878 | $width = $this->attr['width']; 879 | } 880 | 881 | if (isset($this->attr['height'])) 882 | { 883 | $height = $this->attr['height']; 884 | } 885 | 886 | // Now look for an inline style. 887 | if (isset($this->attr['style'])) 888 | { 889 | // Thanks to user gnarf from stackoverflow for this regular expression. 890 | $attributes = array(); 891 | preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); 892 | foreach ($matches as $match) { 893 | $attributes[$match[1]] = $match[2]; 894 | } 895 | 896 | // If there is a width in the style attributes: 897 | if (isset($attributes['width']) && $width == -1) 898 | { 899 | // check that the last two characters are px (pixels) 900 | if (strtolower(substr($attributes['width'], -2)) == 'px') 901 | { 902 | $proposed_width = substr($attributes['width'], 0, -2); 903 | // Now make sure that it's an integer and not something stupid. 904 | if (filter_var($proposed_width, FILTER_VALIDATE_INT)) 905 | { 906 | $width = $proposed_width; 907 | } 908 | } 909 | } 910 | 911 | // If there is a width in the style attributes: 912 | if (isset($attributes['height']) && $height == -1) 913 | { 914 | // check that the last two characters are px (pixels) 915 | if (strtolower(substr($attributes['height'], -2)) == 'px') 916 | { 917 | $proposed_height = substr($attributes['height'], 0, -2); 918 | // Now make sure that it's an integer and not something stupid. 919 | if (filter_var($proposed_height, FILTER_VALIDATE_INT)) 920 | { 921 | $height = $proposed_height; 922 | } 923 | } 924 | } 925 | 926 | } 927 | 928 | // Future enhancement: 929 | // Look in the tag to see if there is a class or id specified that has a height or width attribute to it. 930 | 931 | // Far future enhancement 932 | // Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width 933 | // Note that in this case, the class or id will have the img subselector for it to apply to the image. 934 | 935 | // ridiculously far future development 936 | // If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page. 937 | 938 | $result = array('height' => $height, 939 | 'width' => $width); 940 | return $result; 941 | } 942 | 943 | // camel naming conventions 944 | function getAllAttributes() {return $this->attr;} 945 | function getAttribute($name) {return $this->__get($name);} 946 | function setAttribute($name, $value) {$this->__set($name, $value);} 947 | function hasAttribute($name) {return $this->__isset($name);} 948 | function removeAttribute($name) {$this->__set($name, null);} 949 | function getElementById($id) {return $this->find("#$id", 0);} 950 | function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);} 951 | function getElementByTagName($name) {return $this->find($name, 0);} 952 | function getElementsByTagName($name, $idx=null) {return $this->find($name, $idx);} 953 | function parentNode() {return $this->parent();} 954 | function childNodes($idx=-1) {return $this->children($idx);} 955 | function firstChild() {return $this->first_child();} 956 | function lastChild() {return $this->last_child();} 957 | function nextSibling() {return $this->next_sibling();} 958 | function previousSibling() {return $this->prev_sibling();} 959 | function hasChildNodes() {return $this->has_child();} 960 | function nodeName() {return $this->tag;} 961 | function appendChild($node) {$node->parent($this); return $node;} 962 | 963 | } 964 | 965 | /** 966 | * simple html dom parser 967 | * Paperg - in the find routine: allow us to specify that we want case insensitive testing of the value of the selector. 968 | * Paperg - change $size from protected to public so we can easily access it 969 | * Paperg - added ForceTagsClosed in the constructor which tells us whether we trust the html or not. Default is to NOT trust it. 970 | * 971 | * @package PlaceLocalInclude 972 | */ 973 | class Simple_html_dom 974 | { 975 | public $root = null; 976 | public $nodes = array(); 977 | public $callback = null; 978 | public $lowercase = false; 979 | // Used to keep track of how large the text was when we started. 980 | public $original_size; 981 | public $size; 982 | protected $pos; 983 | protected $doc; 984 | protected $char; 985 | protected $cursor; 986 | protected $parent; 987 | protected $noise = array(); 988 | protected $token_blank = " \t\r\n"; 989 | protected $token_equal = ' =/>'; 990 | protected $token_slash = " />\r\n\t"; 991 | protected $token_attr = ' >'; 992 | // Note that this is referenced by a child node, and so it needs to be public for that node to see this information. 993 | public $_charset = ''; 994 | public $_target_charset = ''; 995 | protected $default_br_text = ""; 996 | public $default_span_text = ""; 997 | 998 | // use isset instead of in_array, performance boost about 30%... 999 | protected $self_closing_tags = array('img'=>1, 'br'=>1, 'input'=>1, 'meta'=>1, 'link'=>1, 'hr'=>1, 'base'=>1, 'embed'=>1, 'spacer'=>1); 1000 | protected $block_tags = array('root'=>1, 'body'=>1, 'form'=>1, 'div'=>1, 'span'=>1, 'table'=>1, 'tbody'=>1); 1001 | // Known sourceforge issue #2977341 1002 | // B tags that are not closed cause us to return everything to the end of the document. 1003 | protected $optional_closing_tags = array( 1004 | 'tr'=>array('tr'=>1, 'td'=>1, 'th'=>1), 1005 | 'th'=>array('th'=>1), 1006 | 'td'=>array('td'=>1), 1007 | 'li'=>array('li'=>1), 1008 | 'dt'=>array('dt'=>1, 'dd'=>1), 1009 | 'dd'=>array('dd'=>1, 'dt'=>1), 1010 | 'dl'=>array('dd'=>1, 'dt'=>1), 1011 | 'p'=>array('p'=>1), 1012 | 'nobr'=>array('nobr'=>1), 1013 | 'b'=>array('b'=>1), 1014 | 'option'=>array('option'=>1), 1015 | ); 1016 | 1017 | function __construct($str=null, $lowercase=true, $forceTagsClosed=true, $target_charset=DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 1018 | { 1019 | if ($str) 1020 | { 1021 | if (preg_match("/^http:\/\//i",$str) || is_file($str)) 1022 | { 1023 | $this->load_file($str); 1024 | } 1025 | else 1026 | { 1027 | $this->load($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); 1028 | } 1029 | } 1030 | // Forcing tags to be closed implies that we don't trust the html, but it can lead to parsing errors if we SHOULD trust the html. 1031 | if (!$forceTagsClosed) { 1032 | $this->optional_closing_array=array(); 1033 | } 1034 | $this->_target_charset = $target_charset; 1035 | } 1036 | 1037 | function __destruct() 1038 | { 1039 | $this->clear(); 1040 | } 1041 | 1042 | // load html from string 1043 | function load($str, $lowercase=true, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 1044 | { 1045 | global $debugObject; 1046 | 1047 | // prepare 1048 | $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); 1049 | // strip out comments 1050 | $this->remove_noise("''is"); 1051 | // strip out cdata 1052 | $this->remove_noise("''is", true); 1053 | // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 1054 | // Script tags removal now preceeds style tag removal. 1055 | // strip out