├── .build.props ├── .gitattributes ├── .gitchange ├── .github ├── CONTRIBUTING.md └── ISSUE_TEMPLATE.md ├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── LICENSE.txt ├── README.md ├── _config.yml ├── build.xml ├── composer.json ├── composer.lock ├── src ├── .htaccess └── includes │ ├── classes │ ├── Benchmark.php │ ├── Core.php │ └── HookApi.php │ ├── phar-stub.php │ └── stub.php └── tests ├── .htaccess └── test.php /.build.props: -------------------------------------------------------------------------------- 1 | project_title = WebSharks HTML Compressor 2 | 3 | project_owner = websharks 4 | project_slug = html-compressor 5 | 6 | project_text_domain = html-compressor 7 | project_slack_channel = html-compressor 8 | 9 | project_namespace = WebSharks\\HtmlCompressor 10 | project_sub_namespace = HtmlCompressor 11 | 12 | project_version = %y%m%d.%now 13 | 14 | project_php_required_version = 5.4 15 | project_php_tested_up_to_version = ${php.version} 16 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Text/Binary ---------------------------------------------------------------------------------------------------------- 2 | 3 | # Default 4 | 5 | * text=auto 6 | 7 | # Text 8 | 9 | *.md text 10 | *.txt text 11 | 12 | # Logs 13 | 14 | *.log text 15 | 16 | # YAML 17 | 18 | *.yml 19 | 20 | # Properties. 21 | 22 | *.properties text 23 | 24 | # Translations 25 | 26 | *.pot text 27 | 28 | # SQL 29 | 30 | *.sql text 31 | *.sqlite text 32 | 33 | # Templates 34 | 35 | *.tmpl text 36 | *.tpl text 37 | 38 | # Config 39 | 40 | *.admins text 41 | *.cfg text 42 | *.cnf text 43 | *.conf text 44 | *.htaccess text 45 | *.htaccess-apache text 46 | *.htpasswd text 47 | *.ini text 48 | 49 | # CSS 50 | 51 | *.css text 52 | *.sass text 53 | *.scss text 54 | *.less text 55 | *.css.map text 56 | 57 | # JavaScript 58 | 59 | *.js text 60 | *.ts text 61 | *.jsx text 62 | *.tsx text 63 | *.json text 64 | *.js.map text 65 | 66 | # Applescript 67 | 68 | *.applescript text 69 | *.scpt binary 70 | 71 | # PHP 72 | 73 | *.inc text 74 | *.php text 75 | *.php4 text 76 | *.php5 text 77 | *.php6 text 78 | *.phps text 79 | *.x-php text 80 | 81 | # ASP 82 | 83 | *.asp text 84 | *.aspx text 85 | 86 | # Perl 87 | 88 | *.cgi text 89 | *.pl text 90 | 91 | # HTML/XML 92 | 93 | *.dtd text 94 | *.hta text 95 | *.htc text 96 | *.htm text 97 | *.html text 98 | *.shtml text 99 | *.xhtml text 100 | *.xml text 101 | *.xsl text 102 | *.xslt text 103 | *.xsd text 104 | 105 | # Documents 106 | 107 | *.csv text 108 | *.doc binary 109 | *.docx binary 110 | *.odt binary 111 | *.pdf binary 112 | *.rtf binary 113 | *.xls binary 114 | 115 | # Images 116 | 117 | *.ai binary 118 | *.bmp binary 119 | *.eps binary 120 | *.gif binary 121 | *.ico binary 122 | *.jpe binary 123 | *.jpeg binary 124 | *.jpg binary 125 | *.png binary 126 | *.webp binary 127 | *.psd binary 128 | *.svg text 129 | *.tif binary 130 | *.tiff binary 131 | *.pspimage binary 132 | 133 | # Audio 134 | 135 | *.mid binary 136 | *.midi binary 137 | *.mp3 binary 138 | *.wav binary 139 | *.wma binary 140 | 141 | # Video 142 | 143 | *.avi binary 144 | *.flv binary 145 | *.ogg binary 146 | *.ogv binary 147 | *.mp4 binary 148 | *.mov binary 149 | *.mpg binary 150 | *.mpeg binary 151 | *.qt binary 152 | *.webm binary 153 | *.wmv binary 154 | *.fla binary 155 | *.swf binary 156 | *.blend binary 157 | 158 | # Fonts 159 | 160 | *.eot binary 161 | *.otf binary 162 | *.ttf binary 163 | *.woff binary 164 | 165 | # Archives 166 | 167 | *.7z binary 168 | *.dmg binary 169 | *.gtar binary 170 | *.gz binary 171 | *.iso binary 172 | *.jar binary 173 | *.phar binary 174 | *.rar binary 175 | *.tar binary 176 | *.tgz binary 177 | *.zip binary 178 | 179 | # Other 180 | 181 | *.bat text 182 | *.bash text 183 | *.bin binary 184 | *.class binary 185 | *.com binary 186 | *.dll binary 187 | *.exe binary 188 | *.sh text 189 | *.so binary 190 | 191 | # Export Ignore — this follows `.gitignore` almost exactly. ------------------------------------------------------------ 192 | # The only exceptions are that we don't ignore `node_modules/` or `vendor/`. 193 | 194 | # Local 195 | 196 | .~* export-ignore 197 | 198 | # Logs 199 | 200 | *.log export-ignore 201 | 202 | # Backups 203 | 204 | *~ export-ignore 205 | *.bak export-ignore 206 | 207 | # Vagrant 208 | 209 | .vagrant/ export-ignore 210 | 211 | # TypeScript 212 | 213 | typings/ export-ignore 214 | 215 | # IntelliJ 216 | 217 | .idea/ export-ignore 218 | 219 | # Sublime 220 | 221 | *.sublime-project export-ignore 222 | *.sublime-workspace export-ignore 223 | 224 | # SASS 225 | 226 | .sass-cache/ export-ignore 227 | 228 | # Elastic Beanstalk 229 | 230 | .elasticbeanstalk/ export-ignore 231 | 232 | # CTAGs 233 | 234 | *.ctags export-ignore 235 | *.tags export-ignore 236 | 237 | # VCS 238 | 239 | .git/ export-ignore 240 | 241 | .svn/ export-ignore 242 | _svn/ export-ignore 243 | 244 | CVS/ export-ignore 245 | .cvsignore export-ignore 246 | 247 | .bzr/ export-ignore 248 | .bzrignore export-ignore 249 | 250 | .hg/ export-ignore 251 | .hgignore export-ignore 252 | 253 | SCCS/ export-ignore 254 | RCS/ export-ignore 255 | 256 | # PC Files 257 | 258 | $RECYCLE.BIN/ export-ignore 259 | Desktop.ini export-ignore 260 | Thumbs.db export-ignore 261 | ehthumbs.db export-ignore 262 | 263 | # Mac Files 264 | 265 | .AppleDB export-ignore 266 | .AppleDouble export-ignore 267 | .AppleDesktop export-ignore 268 | .com.apple.timemachine.donotpresent export-ignore 269 | Network Trash Folder export-ignore 270 | Temporary Items export-ignore 271 | .LSOverride export-ignore 272 | .Spotlight-V100 export-ignore 273 | .VolumeIcon.icns export-ignore 274 | .TemporaryItems export-ignore 275 | .fseventsd export-ignore 276 | .DS_Store export-ignore 277 | .Trashes export-ignore 278 | .apdisk export-ignore 279 | Icon? export-ignore 280 | Icons -export-ignore 281 | ._* export-ignore 282 | 283 | # Export Ignore — in addition to what we have in `.gitignore`. --------------------------------------------------------- 284 | 285 | # Assets 286 | 287 | /assets/ export-ignore 288 | 289 | # LFS Storage (Assets) ------------------------------------------------------------------------------------------------- 290 | 291 | /assets/** filter=lfs diff=lfs merge=lfs -text 292 | 293 | # ---------------------------------------------------------------------------------------------------------------------- 294 | 295 | # 296 | # Put your rules in custom comment markers. 297 | # 298 | -------------------------------------------------------------------------------- /.gitchange: -------------------------------------------------------------------------------- 1 | 1440139757 2 | 853d9120b42cf4f2eabe2f35ea18a6ace7170ea6:58482bb88a66f5.16980470 3 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Before Posting an Issue 2 | 3 | - Try to add as much detail as possible. Be specific! 4 | - If you're requesting a new feature, explain why you'd like it to be added. 5 | - Search this repository (top of the page) to be sure it has not been fixed or reported already. 6 | - GitHub issues ARE NOT FOR SUPPORT! If you have questions, please visit the website 7 | where you downloaded the software and use an official support channel for customers. 8 | 9 | ## Before You Report a Bug 10 | 11 | - Use the latest stable release of the software. 12 | - Disable all other components to ensure it's a real bug and not a conflict. 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## EXPLANATION OF THE ISSUE 2 | 3 | 4 | 5 | ## STEPS TO REPRODUCE THE ISSUE 6 | 7 | 8 | 9 | ## BEHAVIOR THAT I EXPECTED 10 | 11 | 12 | 13 | ## BEHAVIOR THAT I OBSERVED 14 | 15 | 16 | 17 | 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore --------------------------------------------------------------------------------------------------------------- 2 | 3 | # Local 4 | 5 | .~* 6 | 7 | # Logs 8 | 9 | *.log 10 | 11 | # Backups 12 | 13 | *~ 14 | *.bak 15 | 16 | # Vagrant 17 | 18 | .vagrant/ 19 | 20 | # TypeScript 21 | 22 | typings/ 23 | 24 | # IntelliJ 25 | 26 | .idea/ 27 | 28 | # Sublime 29 | 30 | *.sublime-project 31 | *.sublime-workspace 32 | 33 | # Vendor 34 | 35 | vendor/ 36 | 37 | # NodeJS 38 | 39 | node_modules/ 40 | 41 | # SASS 42 | 43 | .sass-cache/ 44 | 45 | # Elastic Beanstalk 46 | 47 | .elasticbeanstalk/ 48 | 49 | # CTAGs 50 | 51 | *.ctags 52 | *.tags 53 | 54 | # VCS 55 | 56 | .git/ 57 | 58 | .svn/ 59 | _svn/ 60 | 61 | CVS/ 62 | .cvsignore 63 | 64 | .bzr/ 65 | .bzrignore 66 | 67 | .hg/ 68 | .hgignore 69 | 70 | SCCS/ 71 | RCS/ 72 | 73 | # PC Files 74 | 75 | $RECYCLE.BIN/ 76 | Desktop.ini 77 | Thumbs.db 78 | ehthumbs.db 79 | 80 | # Mac Files 81 | 82 | .AppleDB 83 | .AppleDouble 84 | .AppleDesktop 85 | .com.apple.timemachine.donotpresent 86 | Network Trash Folder 87 | Temporary Items 88 | .LSOverride 89 | .Spotlight-V100 90 | .VolumeIcon.icns 91 | .TemporaryItems 92 | .fseventsd 93 | .DS_Store 94 | .Trashes 95 | .apdisk 96 | Icon? 97 | !Icons 98 | ._* 99 | 100 | # ---------------------------------------------------------------------------------------------------------------------- 101 | 102 | # 103 | # Put your rules in custom comment markers. 104 | # 105 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "phings"] 2 | path = phings 3 | url = https://github.com/websharks/phings.git 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## v170420.24924 2 | 3 | - **Bug Fix:** The HTML Compressor was returning an empty string upon encountering an invalid UTF-8 sequence. See [Issue #871](https://github.com/websharks/comet-cache/issues/871) reported by a Comet Cache user. 4 | 5 | ## v161208 6 | 7 | - **Bug Fix:** Undefined variable `$caSe_insensitive`, corrected in this release. 8 | 9 | ## v161207 10 | 11 | - Adding new config option: `amp_exclusions_enable` (default is enabled). This improves compatibility with [Accelerated Mobile Pages](https://www.ampproject.org/). When this option is enabled and the URI being compressed ends with `/amp/`, or the document contains a top-level `` tag (`` is accepted as well), then features which are incompatible with [Accelerated Mobile Pages](https://www.ampproject.org/) will be disabled accordingly, regardless of your other settings. See [Issue #695](https://github.com/websharks/comet-cache/issues/695) in the Comet Cache repo. 12 | - Adding `isDocAmpd()` conditional check against current URI & document. 13 | - Adding automatic AMP feature exclusions for improved AMP compatibility. 14 | - Optimizing for speed by removing unnecessary calls to `unset()`. 15 | - Enhancing unicode compatibility by taking full advantage of all `mb_*()` functions. 16 | - Adding multibyte compatible `pregQuote()`. 17 | - Adding multibyte compatible `replaceOnce()`. 18 | - Adding multibyte compatible `substrReplace()`. 19 | - Updating all regex patterns to add the `/u` flag for unicode compatibility. 20 | - Updating minimum PHP requirement. Now requires PHP v5.4+ in support of short array syntax `[]`. 21 | 22 | ## v161108 23 | 24 | - **Enhancing Exclusion Tests:** Now testing exclusion rules against full open tag instead of only the CSS or JS URL and content. This allows for exclusion rules that exclude CSS or JS based on an opening tag attribute; e.g., `` tags. 236 | 237 | - (array)`js_exclusions` Defaults to `array()`. If you have some JS files (or inline scripts) that should NOT be included by compression routines, please specify an array of search tokens to exclude. Search tokens are compared to external JS file `src` values (i.e., URLs or paths). Search tokens are also compared to the contents of any inline `` tags in cases where compression is possible. 238 | 239 | - (array)`uri_exclusions` Defaults to `array()`. If you have some URLs that should NOT be included by compression routines, please specify an array of search tokens to exclude. Search tokens are compared to the current URI (i.e., everything after the domain name). 240 | 241 | **NOTE:** Search tokens should be string literals. The HTML Compressor currently does NOT support wildcards or regex in search tokens. If you need to use regex patterns instead of search tokens, please use a `regex_` prefix when you define the option key; e.g., `'regex_js_exclusions' => '/\special\-script\.js/i'`. This works for `regex_css_exclusions` and `regex_uri_exclusions` too. 242 | 243 | ---- 244 | 245 | - (boolean)`disable_built_in_css_exclusions` Defaults to `FALSE`. There are a few built-in exclusion patterns that are updated (improved) with each new release of the HTML Compressor. It is recommended that you leave these enabled at all times. However, if you prefer to turn them off (i.e., to *only* exclude the patterns you specify), you can set this to a `TRUE` value to disable all of those which are built-in. Built-in exclusions deal with things that should almost always be excluded; for any site. If you'd like to see the list of built-in patterns you can read the `$built_in_regex_css_exclusion_patterns` class property. It's an array of regular expressions that are compared to external CSS file `href` values (i.e., URLs or paths); and they are also compared to the contents of any inline `` tags. 246 | 247 | - (boolean)`disable_built_in_js_exclusions` Defaults to `FALSE`. There are a few built-in exclusion patterns that are updated (improved) with each new release of the HTML Compressor. It is recommended that you leave these enabled at all times. However, if you prefer to turn them off (i.e., to *only* exclude the patterns you specify), you can set this to a `TRUE` value to disable all of those which are built-in. Built-in exclusions deal with things that should almost always be excluded; for any site. If you'd like to see the list of built-in patterns you can read the `$built_in_regex_js_exclusion_patterns` class property. It's an array of regular expressions that are compared to external JS file `src` values (i.e., URLs or paths); and they are also compared to the contents of any inline `` tags. 248 | 249 | - (boolean)`disable_built_in_uri_exclusions` Defaults to `FALSE`. There are a few built-in exclusion patterns that are updated (improved) with each new release of the HTML Compressor. It is recommended that you leave these enabled at all times. However, if you prefer to turn them off (i.e., to *only* exclude the patterns you specify), you can set this to a `TRUE` value to disable all of those which are built-in. Built-in exclusions deal with things that should almost always be excluded; for any site. If you'd like to see the list of built-in patterns you can read the `$built_in_regex_uri_exclusion_patterns` class property. It's an array of regular expressions that are compared to URIs (i.e., everything after the domain). 250 | 251 | ---- 252 | 253 | ##### The following options can be used to setup custom cache directories/URLs. 254 | 255 | *NOTE: under most circumstances, the built-in default values will do just fine.* 256 | 257 | - (string)`cache_expiration_time` Defaults to a value of `14 days`. You can use anything compatible with PHP's `strtotime()` function. 258 | NOTE: This expiration time is mostly irrelevant, because the HTML Compressor uses an internal checksum, and it also checks `filemtime()` before 259 | using an existing cache file. The HTML Compressor class also handles the automatic cleanup of your cache directories to keep it from growing too large over time. 260 | Therefore, unless you have VERY little disk space there is no reason to set this to a lower value (even if your site changes dynamically quite often). 261 | If anything, you might like to increase this value which could help to further reduce server load. 262 | 263 | **There are two scenarios where a cache regeneration occurs.** 264 | 265 | 1. When a cache file expires (based on your expiration time). 266 | 2. A cache file MUST be regenerated because there is a checksum mis-match (e.g., the content changed dynamically). 267 | 268 | In short, `cache_expiration_time` controls the first scenario; i.e., the absolute maximum amount of time that a cache file can ever live (or be used); and this will impact the automatic cleanup routine too of course. 269 | 270 | - (string)`cache_dir_public` Absolute server path to a local cache directory that is available over HTTP (i.e., publicly accessible). 271 | If you exclude this, there are two default handlers. If `WP_CONTENT_DIR` is defined (i.e., you are running this within WordPress), 272 | then your public cache directory will be located under: `wp-content/htmlc/cache/public`. Otherwise, this will default to 273 | a value of `$_SERVER['DOCUMENT_ROOT']/htmlc/cache/public`. 274 | 275 | - (string)`cache_dir_url_public` A publicly available URL which leads to your `cache_dir_public`. 276 | If you exclude this option, an automatic detection is used (i.e., a best guess based on `cache_dir_public`). 277 | 278 | - (string)`cache_dir_private` Absolute server path to a local cache directory that is NOT available over HTTP (i.e., private/hidden). 279 | If you exclude this, there are two default handlers. If `WP_CONTENT_DIR` is defined (i.e., you are running this within WordPress), 280 | then your private cache directory will be located under: `wp-content/htmlc/cache/private`. Otherwise, this will default to 281 | a value of `$_SERVER['DOCUMENT_ROOT']/htmlc/cache/private`. 282 | 283 | - (string)`cache_dir_url_private` Not applicable. This option exists internally but URLs to the private cache directory 284 | are never generated. Therefore, under normal circumstances you can ignore this option value all together. 285 | 286 | - (boolean)`cleanup_cache_dirs` Defaults to TRUE. By default, cache directories are cleaned up automatically 287 | over time; i.e., at semi-random intervals and also based on your `cache_expiration_time`. If you would prefer to cleanup 288 | the cache on your own, you can set this to a FALSE value. 289 | 290 | ---- 291 | 292 | ##### The following options can be used to specify the current URL. 293 | 294 | *NOTE: it is normally NOT necessary to supply any of these values.* 295 | 296 | - (string)`current_url_scheme` When this class is running as a web application, we can detect this value automatically. 297 | That said, if you intend to run this class outside of a web server environment you will need to tell the compressor 298 | what the current URL scheme would be if the file you are compressing was being served as a web page. 299 | This should be set to one of `https` or `http`. 300 | 301 | - (string)`current_url_host` When this class is running as a web application, we can detect this value automatically. 302 | That said, if you intend to run this class outside of a web server environment you will need to tell the compressor 303 | what the current URL host would be if the file you are compressing was being served as a web page. 304 | This should be set to something like `www.example.com`. 305 | 306 | - (string)`current_url_uri` When this class is running as a web application, we can detect this value automatically. 307 | That said, if you intend to run this class outside of a web server environment you will need to tell the compressor 308 | what the current URI (i.e., path and query string) would be if the file you are compressing was being served as a web page. 309 | This should be set to something like `/path/to/page/?one=1&two=2`. 310 | 311 | ---- 312 | 313 | ##### The following options control compression behavior. 314 | 315 | *NOTE: compression routines are applied in the same order as these options are listed below.* 316 | 317 | - (boolean)`compress_combine_head_body_css` TRUE by default. If you prefer NOT to combine CSS files into a single HTTP connection, 318 | please set this to a FALSE value. This can be helpful if your site (for whatever reason) is incompatible with the CSS compress-combine routines. 319 | NOTE: if you disable this due to an incompatibility, please report it via GitHub so the issue can be resolved for everyone. 320 | 321 | - (boolean)`compress_combine_head_js` TRUE by default. If you prefer NOT to combine JS files into a single HTTP connection, 322 | please set this to a FALSE value. This can be helpful if your site (for whatever reason) is incompatible with the JS compress-combine routines. 323 | NOTE: if you disable this due to an incompatibility, please report it via GitHub so the issue can be resolved for everyone. 324 | 325 | - (boolean)`compress_combine_footer_js` TRUE by default. If you prefer NOT to combine JS files in the footer into a single HTTP connection, 326 | please set this to a FALSE value. This can be helpful if your site (for whatever reason) is incompatible with the JS compress-combine routines. 327 | NOTE: if you disable this due to an incompatibility, please report it via GitHub so the issue can be resolved for everyone. 328 | 329 | - (boolean)`compress_combine_remote_css_js` TRUE by default. If you prefer NOT to combine CSS/JS files from remote resource locations 330 | please set this to a FALSE value. By default, the options: `compress_combine_head_body_css`, `compress_combine_head_js`, `compress_combine_footer_js` will recursively combine all resources (including those from remote locations). 331 | If you set this to a FALSE value, all remote (externally hosted resources; e.g., those from CDNs or other remote URLs) will be excluded automatically to prevent remote off-site connections from taking place. 332 | 333 | - (boolean)`compress_inline_js_code` TRUE by default. If you prefer NOT to compress inline JS code (i.e., minify the contents of inline ` 15 | 32 | 38 | 39 | 40 | Testing one, two, three. 41 | 42 | 43 | HTML; 44 | 45 | $html_compressor_options = array( 46 | 'css_exclusions' => array(), 47 | 'js_exclusions' => array('.php?'), 48 | 'uri_exclusions' => array(), 49 | 50 | 'cache_dir_url_public' => 'http://example.com/cache/public', 51 | 'cache_dir_public' => dirname(__FILE__).'/.~cache/public', 52 | 'cache_dir_private' => dirname(__FILE__).'/.~cache/private', 53 | 54 | 'current_url_scheme' => 'http', 55 | 'current_url_host' => 'www.example.com', 56 | 'current_url_uri' => '/test.php?one=1&two=2', 57 | 58 | 'compress_combine_head_body_css' => true, 59 | 'compress_combine_head_js' => true, 60 | 'compress_combine_footer_js' => true, 61 | 'compress_inline_js_code' => true, 62 | 'compress_css_code' => true, 63 | 'compress_js_code' => true, 64 | 'compress_html_code' => true, 65 | 66 | 'benchmark' => true, 67 | ); 68 | require_once dirname(dirname(__FILE__)).'/src/includes/stub.php'; 69 | # require_once dirname(dirname(__FILE__)).'/.~build/websharks-html-compressor.phar'; 70 | $html_compressor = new WebSharks\HtmlCompressor\Core($html_compressor_options); 71 | $html = $html_compressor->compress($html); 72 | 73 | echo $html; 74 | --------------------------------------------------------------------------------