├── .github └── workflows │ └── tests.yml ├── .gitignore ├── LICENSE ├── PrettyMin.php ├── README.md ├── Tests ├── PrettyMinTest.php └── benchmark.php ├── composer.json └── phpunit.xml.dist /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | workflow_dispatch: 11 | 12 | jobs: 13 | php: 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | matrix: 18 | php: [8.0, 8.1, 8.2, 8.3] 19 | dependency-version: [prefer-lowest, prefer-stable] 20 | 21 | steps: 22 | - name: checkout code 23 | uses: actions/checkout@v4 24 | 25 | - name: setup PHP 26 | uses: shivammathur/setup-php@v2 27 | with: 28 | php-version: ${{ matrix.php }} 29 | coverage: xdebug 30 | 31 | - name: install dependencies 32 | run: composer update --${{ matrix.dependency-version }} 33 | 34 | - name: run tests 35 | run: php vendor/bin/phpunit 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | composer.lock 3 | composer.phar 4 | vendor 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2025 Christoph Singer 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | The software is provided "as is", without warranty of any kind, express or 14 | implied, including but not limited to the warranties of merchantability, 15 | fitness for a particular purpose and noninfringement. In no event shall the 16 | authors or copyright holders be liable for any claim, damages or other 17 | liability, whether in an action of contract, tort or otherwise, arising from, 18 | out of or in connection with the software or the use or other dealings in 19 | the software. 20 | -------------------------------------------------------------------------------- /PrettyMin.php: -------------------------------------------------------------------------------- 1 | configureOptions($resolver); 31 | $this->options = $resolver->resolve($options); 32 | } 33 | 34 | /** 35 | * @param OptionsResolver $resolver 36 | */ 37 | public function configureOptions(OptionsResolver $resolver) 38 | { 39 | $resolver->setDefaults([ 40 | 'minify_js' => true, 41 | 'minify_css' => true, 42 | 'remove_comments' => true, 43 | 'remove_comments_exeptions' => ['/^\[if /'], 44 | 'keep_whitespace_around' => [ 45 | // keep whitespace around inline elements 46 | 'b', 'big', 'i', 'small', 'tt', 47 | 'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var', 48 | 'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'span', 'sub', 'sup', 49 | 'button', 'input', 'label', 'select', 'textarea' 50 | ], 51 | 'keep_whitespace_in' => ['script', 'style', 'pre'], 52 | 'remove_empty_attributes' => ['style', 'class'], 53 | 'indent_characters' => "\t" 54 | ]); 55 | } 56 | 57 | /** 58 | * Load an HTML document 59 | * 60 | * @param \DOMDocument|\DOMElement|\SplFileInfo|string $html 61 | * @return PrettyMin 62 | */ 63 | public function load($html) { 64 | if ($html instanceof \DOMDocument) { 65 | $d = $html; 66 | } elseif ($html instanceof \DOMElement) { 67 | $d = $html->ownerDocument; 68 | } elseif ($html instanceof \SplFileInfo) { 69 | $d = new \DOMDocument(); 70 | $d->preserveWhiteSpace = false; 71 | $d->validateOnParse = true; 72 | $d->loadHTMLFile($html->getPathname()); 73 | } else { 74 | $d = new \DOMDocument(); 75 | $d->preserveWhiteSpace = false; 76 | $d->validateOnParse = true; 77 | $d->loadHTML($html); 78 | } 79 | $d->formatOutput = false; 80 | $d->normalizeDocument(); 81 | $this->doc = $d; 82 | return $this; 83 | } 84 | 85 | /** 86 | * Minify the loaded HTML document 87 | * 88 | * @param array $options 89 | * @return PrettyMin 90 | */ 91 | public function minify($options = []) 92 | { 93 | $resolver = new OptionsResolver(); 94 | $resolver->setDefaults([ 95 | 'minify_js' => $this->options['minify_js'], 96 | 'minify_css' => $this->options['minify_css'], 97 | 'remove_comments' => $this->options['remove_comments'], 98 | 'remove_empty_attributes' => $this->options['remove_empty_attributes'] 99 | ]); 100 | $options = $resolver->resolve($options); 101 | 102 | if ($options['minify_js']) { 103 | $this->minifyJavascript(); 104 | } 105 | if ($options['minify_css']) { 106 | $this->minifyCss(); 107 | } 108 | if ($options['remove_comments']) { 109 | $this->removeComments(); 110 | } 111 | 112 | if ($options['remove_empty_attributes']) { 113 | $this->removeEmptyAttributes(); 114 | } 115 | 116 | $this->removeWhitespace(); 117 | 118 | return $this; 119 | } 120 | 121 | /** 122 | * nicely indent HTML code 123 | * 124 | * @return PrettyMin 125 | */ 126 | public function indent() 127 | { 128 | $this->removeWhitespace(); 129 | $this->indentRecursive($this->doc->documentElement, 0); 130 | return $this; 131 | } 132 | 133 | /** 134 | * Get the DOMDocument 135 | * 136 | * @return \DOMDocument 137 | */ 138 | public function getDomDocument() 139 | { 140 | return $this->doc; 141 | } 142 | 143 | /** 144 | * Get the HTML code as string 145 | * 146 | * This is a shortcut for calling $this->getDomDocument()->saveHTML() 147 | * 148 | * @return string 149 | */ 150 | public function saveHtml() 151 | { 152 | return $this->doc->saveHTML(); 153 | } 154 | 155 | protected function minifyJavascript() 156 | { 157 | $elements = $this->doc->getElementsByTagName('script'); 158 | 159 | $to_be_removed = []; 160 | /** @var \DOMElement $element */ 161 | foreach ($elements as $element) { 162 | $code = $element->textContent; 163 | $element->nodeValue = ''; 164 | if (trim($code)) { 165 | $code = JSMin::minify($code); 166 | $ct = $this->doc->createCDATASection($code); 167 | $element->appendChild($ct); 168 | } elseif (!$element->hasAttribute('src')) { 169 | // script tag has neither content nor a src attribute, remove it completely 170 | array_push($to_be_removed, $element); 171 | } 172 | } 173 | foreach ($to_be_removed as $element) { 174 | $element->parentNode->removeChild($element); 175 | } 176 | } 177 | 178 | protected function minifyCss() 179 | { 180 | $elements = $this->doc->getElementsByTagName('style'); 181 | $to_be_removed = []; 182 | /** @var \DOMElement $element */ 183 | foreach ($elements as $element) { 184 | $code = $element->nodeValue; 185 | $element->nodeValue = ''; 186 | if (trim($code)) { 187 | $min = new CSSmin(); 188 | if (trim($code)) { 189 | $code = trim($min->run($code)); 190 | } 191 | $ct = $this->doc->createCDATASection($code); 192 | $element->appendChild($ct); 193 | } else { 194 | // Style tag is empty, remove it completely 195 | array_push($to_be_removed, $element); 196 | } 197 | } 198 | foreach ($to_be_removed as $element) { 199 | $element->parentNode->removeChild($element); 200 | } 201 | } 202 | 203 | protected function removeEmptyAttributes() 204 | { 205 | if (!$this->options['remove_empty_attributes']) return; 206 | if (is_string($this->options['remove_empty_attributes'])) { 207 | $this->options['remove_empty_attributes'] = [$this->options['remove_empty_attributes']]; 208 | } 209 | if (is_array($this->options['remove_empty_attributes'])) { 210 | $xpath = new \DOMXPath($this->doc); 211 | foreach ($this->options['remove_empty_attributes'] as $attr) { 212 | /** @var \DOMElement $el */ 213 | foreach ($xpath->query('//*[@' . $attr . ']') as $el) { 214 | if (trim($el->getAttribute($attr)) == '') { 215 | $el->removeAttribute($attr); 216 | } 217 | } 218 | } 219 | } 220 | } 221 | 222 | protected function removeComments($exception_patterns = null) 223 | { 224 | if ($exception_patterns === null) { 225 | $exception_patterns = $this->options['remove_comments_exeptions']; 226 | } 227 | $xpath = new \DOMXPath($this->doc); 228 | foreach ($xpath->query('//comment()') as $comment) { 229 | /** @var \DOMNode $comment */ 230 | $remove = true; 231 | foreach ($exception_patterns as $exception) { 232 | if (preg_match($exception, $comment->textContent)) { 233 | $remove = false; 234 | break; 235 | } 236 | } 237 | if ($remove) $comment->parentNode->removeChild($comment); 238 | } 239 | } 240 | 241 | /** 242 | * originally based on http://stackoverflow.com/a/18260955 243 | */ 244 | protected function removeWhitespace() { 245 | // Retrieve all text nodes using XPath 246 | $x = new \DOMXPath($this->doc); 247 | 248 | // Ignore child nodes where we need to preserve whitespace on ancestor 249 | $x_filter = array_map(function ($nodeName) { 250 | return "ancestor::$nodeName"; 251 | }, $this->options['keep_whitespace_in']); 252 | 253 | $x_filter = implode(' or ', $x_filter); 254 | 255 | if ($x_filter) { 256 | $nodeList = $x->query("//*[not($x_filter)]/text()"); 257 | } else { 258 | $nodeList = $x->query("//text()"); 259 | } 260 | 261 | foreach ($nodeList as $node) { 262 | /** @var \DOMNode $node */ 263 | 264 | if (in_array($node->parentNode->nodeName, $this->options['keep_whitespace_in'])) { 265 | continue; 266 | }; 267 | 268 | $node->nodeValue = str_replace(["\r", "\n", "\t"], ' ', $node->nodeValue); 269 | //$node->nodeValue = preg_replace('/ {2,}/', ' ', $node->nodeValue); 270 | while (strpos($node->nodeValue, ' ') !== false) { 271 | $node->nodeValue = str_replace(' ', ' ', $node->nodeValue); 272 | } 273 | 274 | if (!in_array($node->parentNode->nodeName, $this->options['keep_whitespace_around'])) { 275 | if (!($node->previousSibling && in_array($node->previousSibling->nodeName, 276 | $this->options['keep_whitespace_around'])) 277 | ) { 278 | $node->nodeValue = ltrim($node->nodeValue); 279 | } 280 | 281 | if (!($node->nextSibling && in_array($node->nextSibling->nodeName, 282 | $this->options['keep_whitespace_around'])) 283 | ) { 284 | $node->nodeValue = rtrim($node->nodeValue); 285 | } 286 | } 287 | 288 | if((strlen($node->nodeValue) == 0)) { 289 | $node->parentNode->removeChild($node); 290 | } 291 | } 292 | } 293 | 294 | /** 295 | * indent HTML code 296 | * 297 | * originally based on http://stackoverflow.com/a/18260955 298 | * 299 | * @param \DOMNode $currentNode 300 | * @param int $depth 301 | * @return bool 302 | */ 303 | protected function indentRecursive(\DOMNode $currentNode, $depth) { 304 | $indent_characters = $this->options['indent_characters']; 305 | 306 | $indentCurrent = true; 307 | $indentChildren = true; 308 | $indentClosingTag = false; 309 | if(($currentNode->nodeType == XML_TEXT_NODE)) { 310 | $indentCurrent = false; 311 | } 312 | 313 | if (in_array($currentNode->nodeName, $this->options['keep_whitespace_in'])) { 314 | $indentCurrent = true; 315 | $indentChildren = false; 316 | $indentClosingTag = (strpos($currentNode->nodeValue, "\n") !== false); 317 | } 318 | 319 | if (in_array($currentNode->nodeName, $this->options['keep_whitespace_around'])) { 320 | $indentCurrent = false; 321 | } 322 | if($indentCurrent && $depth > 0) { 323 | // Indenting a node consists of inserting before it a new text node 324 | // containing a newline followed by a number of tabs corresponding 325 | // to the node depth. 326 | $textNode = $currentNode->ownerDocument->createTextNode("\n" . str_repeat($indent_characters, $depth)); 327 | $currentNode->parentNode->insertBefore($textNode, $currentNode); 328 | } 329 | if($indentCurrent && $currentNode->childNodes && $indentChildren) { 330 | foreach($currentNode->childNodes as $childNode) { 331 | $indentClosingTag = $this->indentRecursive($childNode, $depth + 1); 332 | } 333 | } 334 | if($indentClosingTag) { 335 | // If children have been indented, then the closing tag 336 | // of the current node must also be indented. 337 | if ($currentNode->lastChild && ($currentNode->lastChild->nodeType == XML_CDATA_SECTION_NODE || $currentNode->lastChild->nodeType == XML_TEXT_NODE) && preg_match('/\n\s?$/', $currentNode->lastChild->textContent)) { 338 | $currentNode->lastChild->nodeValue = preg_replace('/\n\s?$/', "\n" . str_repeat($indent_characters, $depth), $currentNode->lastChild->nodeValue); 339 | } else { 340 | $textNode = $currentNode->ownerDocument->createTextNode("\n" . str_repeat($indent_characters, $depth)); 341 | $currentNode->appendChild($textNode); 342 | } 343 | } 344 | return $indentCurrent; 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HTML Pretty-Min 2 | =============== 3 | 4 | ![tests](https://github.com/wasinger/html-pretty-min/actions/workflows/tests.yml/badge.svg?branch=master) 5 | [![Latest Version](http://img.shields.io/packagist/v/wa72/html-pretty-min.svg)](https://packagist.org/packages/wa72/html-pretty-min) 6 | 7 | 8 | HTML Pretty-Min is a PHP library for minifying and prettyprinting (indenting) HTML documents 9 | that works directly on the DOM tree of an HTML document. 10 | 11 | Currently, it has the following features: 12 | 13 | - **Prettyprint**: 14 | - Indent Block-level elements, do not indent inline elements 15 | 16 | - **Minify**: 17 | - Remove whitespace and newlines 18 | - Compress embedded Javascript using [mrclay/jsmin-php](https://packagist.org/packages/mrclay/jsmin-php) 19 | - Compress embedded CSS using [tubalmartin/cssmin](https://packagist.org/packages/tubalmartin/cssmin) 20 | - Remove some attributes when their value is empty (by default "style" and "class" attributes) 21 | - Remove comments, except those matching some given regular expressions (by default, IE conditional comments are kept) 22 | 23 | Installation 24 | ------------ 25 | 26 | HTML Pretty-Min is listed on [Packagist](https://packagist.org/packages/wa72/html-pretty-min). 27 | 28 | `composer require wa72/html-pretty-min` 29 | 30 | Usage 31 | ----- 32 | 33 | ```php 34 | load($html) // $html may be a \DOMDocument, a string containing an HTML code, 41 | // or an \SplFileInfo pointing to an HTML document 42 | ->minify() 43 | ->saveHtml(); 44 | ``` 45 | 46 | For prettyprinting, call the `indent()` method instead of `minify()`. 47 | 48 | 49 | The `PrettyMin()` constructor can be given an associative options array. Here are the possible option keys and their default values: 50 | ``` 51 | 'minify_js' => true, 52 | 'minify_css' => true, 53 | 'remove_comments' => true, 54 | 'remove_comments_exeptions' => ['/^\[if /'], 55 | 'keep_whitespace_around' => [ 56 | // keep whitespace around all inline elements 57 | 'b', 'big', 'i', 'small', 'tt', 58 | 'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var', 59 | 'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'span', 'sub', 'sup', 60 | 'button', 'input', 'label', 'select', 'textarea' 61 | ], 62 | 'keep_whitespace_in' => ['script', 'style', 'pre'], 63 | 'remove_empty_attributes' => ['style', 'class'], 64 | 'indent_characters' => "\t" 65 | ``` 66 | 67 | 68 | **Attention**: Because the formatting is done directly on the DOM tree, a DOMDocument object given to the `load()` method 69 | will be modified: 70 | 71 | ```php 72 | $dom_document = new \DOMDocument('1.0', 'UTF-8'); 73 | $dom_document->loadHTML('...some html code...'); 74 | 75 | $pm->load($dom_document)->minify(); 76 | 77 | echo $dom_document->saveHTML(); // Will output the minified, not the original, document 78 | ``` 79 | -------------------------------------------------------------------------------- /Tests/PrettyMinTest.php: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | Test 19 | 20 | 29 | 34 | 35 | 36 |

Test

37 |
38 |

This is bold 39 | Text. 40 | And some more text, still in the same paragraph. 41 | Inline tag whith whitespace at the end but not after. 42 |

This is another paragraph with a link. 43 |

44 |
45 |
46 |
47 | 48 | 49 | HTML; 50 | return $html; 51 | } 52 | public function testMinify() 53 | { 54 | $pm = new PrettyMin(); 55 | $pm->load($this->getHtmlDocument()); 56 | $pm->minify(); 57 | 58 | $expected = << 60 | Test

Test

This is bold Text. And some more text, still in the same paragraph. Inline tag whith whitespace at the end but not after.

This is another paragraph with a link.

61 | 62 | HTML; 63 | 64 | 65 | $this->assertEquals($expected, $pm->saveHtml()); 66 | } 67 | 68 | public function testLoadOnDOMDocument() 69 | { 70 | $doc = new \DOMDocument(); 71 | $doc->loadHTML("Test
"); 72 | $pm = new PrettyMin(); 73 | $pm->load($doc); 74 | $pm->minify(); 75 | 76 | $expected = << 78 | Test
79 | 80 | HTML; 81 | 82 | 83 | $this->assertEquals($expected, $pm->saveHtml()); 84 | } 85 | 86 | public function testGetDomDocument() 87 | { 88 | $doc = new \DOMDocument(); 89 | $doc->loadHTML("Test
"); 90 | $pm = new PrettyMin(); 91 | $pm->load($doc); 92 | $pm->minify(); 93 | 94 | $this->assertInstanceOf('\DOMDocument', $pm->getDomDocument()); 95 | } 96 | 97 | public function testIndent() 98 | { 99 | $pm = new PrettyMin(); 100 | $pm->load($this->getHtmlDocument()); 101 | $pm->indent(); 102 | $expected = << 104 | 105 | 106 | Test 107 | 108 | 117 | 122 | 123 | 124 |

Test

125 |
126 |
127 |

This is bold Text. And some more text, still in the same paragraph. Inline tag whith whitespace at the end but not after.

128 |

This is another paragraph with a link.

129 |
130 |
131 |
132 | 133 | 134 | 135 | HTML; 136 | 137 | $this->assertEquals($expected, $pm->saveHtml()); 138 | } 139 | 140 | public function testIndentWithSpaces() 141 | { 142 | $pm = new PrettyMin(['indent_characters' => ' ']); 143 | $pm->load($this->getHtmlDocument()); 144 | $pm->indent(); 145 | $expected = << 147 | 148 | 149 | Test 150 | 151 | 160 | 165 | 166 | 167 |

Test

168 |
169 |
170 |

This is bold Text. And some more text, still in the same paragraph. Inline tag whith whitespace at the end but not after.

171 |

This is another paragraph with a link.

172 |
173 |
174 |
175 | 176 | 177 | 178 | HTML; 179 | 180 | $this->assertEquals($expected, $pm->saveHtml()); 181 | } 182 | 183 | public function testPre() 184 | { 185 | $pre = << 190 | if test 191 | do this 192 | endif 193 | 194 | 195 | if test 196 | do this 197 | endif 198 | 199 | 200 | if test 201 | do this 202 | endif 203 | 204 | HTML; 205 | 206 | $html = "
{$pre}
"; 207 | 208 | $pm = new PrettyMin(); 209 | $pm->load($html); 210 | $pm->indent(); 211 | 212 | // Contents of the
 section needs to match perfectly
213 |         preg_match('#
(.*)
#ms', $pm->saveHtml(), $match); 214 | 215 | $this->assertEquals(trim($pre), trim($match[1])); // Trailing and leading whitespace is allowed to be different 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /Tests/benchmark.php: -------------------------------------------------------------------------------- 1 | query('//*[@' . $attr . ']') as $el) { 16 | if (trim($el->getAttribute($attr)) == '') { 17 | $el->removeAttribute($attr); 18 | } 19 | } 20 | } 21 | }); 22 | 23 | runTest('Select empty attributes using XPath string functions', function($d) { 24 | $xpath = new \DOMXPath($d); 25 | foreach (['style', 'class'] as $attr) { 26 | /** @var \DOMElement $el */ 27 | foreach ($xpath->query('//*[string-length(normalize-space(@' . $attr . ')) = 0]') as $el) { 28 | $el->removeAttribute($attr); 29 | } 30 | } 31 | }); 32 | 33 | runTest('Select empty attributes using XPath | operator and PHP test for empty string', function($d) { 34 | $xpath = new \DOMXPath($d); 35 | $query = ''; 36 | foreach (['style', 'class'] as $no => $attr) { 37 | $query .= ($no == 0 ? '' : ' | ') . '//@' . $attr; 38 | } 39 | 40 | // echo "Query: $query \n"; 41 | /** @var \DOMNode $attr */ 42 | foreach ($xpath->query($query) as $no => $attr) { 43 | // if ($no < 10) echo "Found attr " . $attr->nodeName . " with value: " . $attr->textContent . "\n"; 44 | if (trim($attr->textContent) == '') { 45 | $attr->parentNode->removeAttribute($attr->nodeName); 46 | } 47 | } 48 | }); 49 | 50 | runTest('Remove whitespace v1', function($d) { 51 | $x = new \DOMXPath($d); 52 | $keep_whitespace_in = ['pre', 'style', 'script']; 53 | $keep_whitespace_around = ['a', 'b', 'i']; 54 | $nodeList = $x->query("//text()"); 55 | foreach($nodeList as $node) { 56 | /** @var \DOMNode $node */ 57 | 58 | if (in_array($node->parentNode->nodeName, $keep_whitespace_in)) { 59 | continue; 60 | }; 61 | 62 | // 1. "Trim" each text node by removing its leading and trailing spaces and newlines. 63 | // Modified by CS: keep whitespace around inline elements 64 | if (in_array($node->parentNode->nodeName, $keep_whitespace_around)) { 65 | $replacement = ' '; 66 | } else { 67 | $replacement = ''; 68 | } 69 | 70 | $r_replacement = $replacement; 71 | if ($node->previousSibling && in_array($node->previousSibling->nodeName, $keep_whitespace_around)) { 72 | $r_replacement = ' '; 73 | } 74 | $node->nodeValue = preg_replace('/^[\s\r\n]+/', $r_replacement, $node->nodeValue); 75 | 76 | $l_replacement = $replacement; 77 | if ($node->nextSibling && in_array($node->nextSibling->nodeName, $keep_whitespace_around)) { 78 | $l_replacement = ' '; 79 | } 80 | $node->nodeValue = preg_replace('/[\s\r\n]+$/', $l_replacement, $node->nodeValue); 81 | 82 | $node->nodeValue = preg_replace('/[\s]+/', ' ', $node->nodeValue); 83 | 84 | 85 | // 2. Resulting text node may have become "empty" (zero length nodeValue) after trim. If so, remove it from the dom. 86 | if((strlen($node->nodeValue) == 0)) { 87 | $node->parentNode->removeChild($node); 88 | } 89 | } 90 | }); 91 | 92 | 93 | runTest('Remove whitespace v2', function($d) { 94 | $x = new \DOMXPath($d); 95 | $keep_whitespace_in = ['pre', 'style', 'script']; 96 | $keep_whitespace_around = ['a', 'b', 'i']; 97 | $nodeList = $x->query("//text()"); 98 | foreach($nodeList as $node) { 99 | /** @var \DOMNode $node */ 100 | 101 | if (in_array($node->parentNode->nodeName, $keep_whitespace_in)) { 102 | continue; 103 | }; 104 | 105 | $node->nodeValue = str_replace(["\r", "\n", "\t"], ' ', $node->nodeValue); 106 | $node->nodeValue = preg_replace('/ {2,}/', ' ', $node->nodeValue); 107 | 108 | // 1. "Trim" each text node by removing its leading and trailing spaces and newlines. 109 | if (!($node->previousSibling && in_array($node->previousSibling->nodeName, $keep_whitespace_around))) { 110 | $node->nodeValue = ltrim($node->nodeValue); 111 | } 112 | 113 | if (!($node->nextSibling && in_array($node->nextSibling->nodeName, $keep_whitespace_around))) { 114 | $node->nodeValue = rtrim($node->nodeValue); 115 | } 116 | 117 | if((strlen($node->nodeValue) == 0)) { 118 | $node->parentNode->removeChild($node); 119 | } 120 | } 121 | }); 122 | 123 | 124 | runTest('Remove whitespace v3', function($d) { 125 | $x = new \DOMXPath($d); 126 | $keep_whitespace_in = ['pre', 'style', 'script']; 127 | $keep_whitespace_around = ['a', 'b', 'i']; 128 | $nodeList = $x->query("//text()"); 129 | foreach($nodeList as $node) { 130 | /** @var \DOMNode $node */ 131 | 132 | if (in_array($node->parentNode->nodeName, $keep_whitespace_in)) { 133 | continue; 134 | }; 135 | 136 | $node->nodeValue = str_replace(["\r", "\n", "\t"], ' ', $node->nodeValue); 137 | while (strpos($node->nodeValue, ' ') !== false) { 138 | $node->nodeValue = str_replace(' ', ' ', $node->nodeValue); 139 | } 140 | 141 | 142 | // 1. "Trim" each text node by removing its leading and trailing spaces and newlines. 143 | if (!($node->previousSibling && in_array($node->previousSibling->nodeName, $keep_whitespace_around))) { 144 | $node->nodeValue = ltrim($node->nodeValue); 145 | } 146 | 147 | if (!($node->nextSibling && in_array($node->nextSibling->nodeName, $keep_whitespace_around))) { 148 | $node->nodeValue = rtrim($node->nodeValue); 149 | } 150 | 151 | if((strlen($node->nodeValue) == 0)) { 152 | $node->parentNode->removeChild($node); 153 | } 154 | } 155 | }); 156 | 157 | runTest('Remove whitespace v4', function($d) { 158 | $x = new \DOMXPath($d); 159 | $keep_whitespace_in = ['pre', 'style', 'script']; 160 | $keep_whitespace_around = ['a', 'b', 'i']; 161 | $nodeList = $x->query("//text()"); 162 | foreach($nodeList as $node) { 163 | /** @var \DOMNode $node */ 164 | 165 | if (in_array($node->parentNode->nodeName, $keep_whitespace_in)) { 166 | continue; 167 | }; 168 | 169 | $node->nodeValue = str_replace(["\r", "\n", "\t"], ' ', $node->nodeValue); 170 | while (strpos($node->nodeValue, ' ') !== false) { 171 | $node->nodeValue = str_replace(' ', ' ', $node->nodeValue); 172 | } 173 | 174 | 175 | // 1. "Trim" each text node by removing its leading and trailing spaces and newlines. 176 | if (substr($node->nodeValue, 0, 1) == ' ' && !($node->previousSibling && in_array($node->previousSibling->nodeName, $keep_whitespace_around))) { 177 | $node->nodeValue = ltrim($node->nodeValue); 178 | } 179 | 180 | if (substr($node->nodeValue, -1) == ' ' && !($node->nextSibling && in_array($node->nextSibling->nodeName, $keep_whitespace_around))) { 181 | $node->nodeValue = rtrim($node->nodeValue); 182 | } 183 | 184 | if((strlen($node->nodeValue) == 0)) { 185 | $node->parentNode->removeChild($node); 186 | } 187 | } 188 | }); 189 | 190 | ////// helper funtions ////// 191 | 192 | function runTest($description, callable $function) 193 | { 194 | echo "\n" . $description; 195 | $d = createTestDocument(); 196 | $begin = microtime(true); 197 | call_user_func($function, $d); 198 | $runtime = microtime(true) - $begin; 199 | echo "\nRuntime: $runtime\n"; 200 | } 201 | 202 | /** 203 | * @return DOMDocument 204 | */ 205 | function createTestDocument() 206 | { 207 | $d = new \DOMDocument('1.0', 'UTF-8'); 208 | $html = 'Benchmark'; 209 | $d->loadHTML($html); 210 | $body = $d->getElementsByTagName('body')->item(0); 211 | $text = <<createElement('p', $text); 223 | $p->appendChild($d->createElement('b', ' Lorem ipsum dolor sit amet, consetetur sadipscing elitr ')); 224 | $p->appendChild($d->createElement('a', 'Stet clita kasd gubergren')); 225 | $p->appendChild($d->createTextNode(' At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd 226 | gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. ')); 227 | $p->setAttribute('class', ($i % 2 ? 'test' : '')); 228 | $p->setAttribute('style', ($i % 2 ? ' ' : 'color: red;')); 229 | $body->appendChild ($p); 230 | } 231 | return $d; 232 | } 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wa72/html-pretty-min", 3 | "description":"HTML minifier and indenter that works on the DOM tree", 4 | "type":"library", 5 | "license":"MIT", 6 | "require": { 7 | "php": ">=8", 8 | "mrclay/jsmin-php": "^2.3", 9 | "tubalmartin/cssmin": "^4", 10 | "symfony/options-resolver": ">=2.3" 11 | }, 12 | "require-dev": { 13 | "phpunit/phpunit": "^9" 14 | }, 15 | "autoload":{ 16 | "psr-4":{ 17 | "Wa72\\HtmlPrettymin\\": "" 18 | } 19 | }, 20 | "autoload-dev": { 21 | "psr-4": { 22 | "Wa72\\HtmlPrettymin\\Tests\\": "Tests" 23 | } 24 | }, 25 | "authors": [ 26 | { 27 | "name": "Christoph Singer", 28 | "email": "singer@webagentur72.de" 29 | } 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ./Tests/ 7 | 8 | 9 | 10 | 11 | 12 | ./PrettyMin.php 13 | 14 | 15 | 16 | --------------------------------------------------------------------------------