├── .github ├── dependabot.yml └── workflows │ ├── ci-version.yml │ └── ci.yml ├── src ├── functions.rs ├── html_writer.rs ├── errors.rs ├── lib.rs └── html_minifier_helper.rs ├── Cargo.toml ├── LICENSE ├── rustfmt.toml ├── tests ├── data │ ├── index.min.html │ └── w3schools.com_tryhow_css_example_website.htm ├── mix.rs └── steps.rs ├── .gitignore └── README.md /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" -------------------------------------------------------------------------------- /src/functions.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt::{self, Formatter}, 3 | str::from_utf8_unchecked, 4 | }; 5 | 6 | #[inline] 7 | pub(crate) fn str_bytes_fmt(v: &[u8], f: &mut Formatter) -> Result<(), fmt::Error> { 8 | f.write_fmt(format_args!("{:?}", unsafe { from_utf8_unchecked(v) })) 9 | } 10 | 11 | #[inline] 12 | pub(crate) const fn is_whitespace(e: u8) -> bool { 13 | matches!(e, 0x09..=0x0D | 0x1C..=0x20) 14 | } 15 | 16 | #[inline] 17 | pub(crate) const fn is_ascii_control(e: u8) -> bool { 18 | matches!(e, 0..=8 | 11..=31 | 127) 19 | } 20 | -------------------------------------------------------------------------------- /src/html_writer.rs: -------------------------------------------------------------------------------- 1 | use std::io::Write; 2 | 3 | use crate::HTMLMinifierError; 4 | 5 | /// Implement this trait to build a HTML writer. 6 | pub trait HTMLWriter { 7 | fn push(&mut self, e: u8) -> Result<(), HTMLMinifierError>; 8 | fn push_bytes(&mut self, bytes: &[u8]) -> Result<(), HTMLMinifierError>; 9 | } 10 | 11 | impl HTMLWriter for W { 12 | #[inline] 13 | fn push(&mut self, e: u8) -> Result<(), HTMLMinifierError> { 14 | Ok(self.write_all(&[e])?) 15 | } 16 | 17 | #[inline] 18 | fn push_bytes(&mut self, bytes: &[u8]) -> Result<(), HTMLMinifierError> { 19 | Ok(self.write_all(bytes)?) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | error::Error, 3 | fmt::{self, Display, Formatter}, 4 | io, 5 | }; 6 | 7 | /// Errors for `HTMLMinifier`. 8 | #[derive(Debug)] 9 | pub enum HTMLMinifierError { 10 | CSSError(&'static str), 11 | IOError(io::Error), 12 | } 13 | 14 | impl From for HTMLMinifierError { 15 | #[inline] 16 | fn from(error: io::Error) -> Self { 17 | HTMLMinifierError::IOError(error) 18 | } 19 | } 20 | 21 | impl Display for HTMLMinifierError { 22 | #[inline] 23 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> { 24 | match self { 25 | HTMLMinifierError::CSSError(error) => Display::fmt(error, f), 26 | HTMLMinifierError::IOError(error) => Display::fmt(error, f), 27 | } 28 | } 29 | } 30 | 31 | impl Error for HTMLMinifierError {} 32 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "html-minifier" 3 | version = "5.0.0" 4 | authors = ["Magic Len "] 5 | edition = "2021" 6 | rust-version = "1.60" 7 | repository = "https://github.com/magiclen/html-minifier" 8 | homepage = "https://magiclen.org/html-minifier" 9 | keywords = ["html", "minify", "sctipt", "style"] 10 | categories = ["encoding"] 11 | description = "This library can help you generate and minify your HTML code at the same time. It also supports to minify JS and CSS in ` 8 | 9 | 10 |
11 |

My Website

12 |

A responsive website created by me.

13 |
14 | 20 |
21 |
22 |

About Me

23 |
Photo of me:
24 |
Image
25 |

Some text about me in culpa qui officia deserunt mollit anim..

26 |

More Text

27 |

Lorem ipsum dolor sit ame.

28 |
Image
29 |
30 |
Image
31 |
32 |
Image
33 |
34 |
35 |

TITLE HEADING

36 |
Title description, Dec 7, 2017
37 |
Image
38 |

Some text..

39 |

Sunt in culpa qui officia deserunt mollit anim id est laborum consectetur adipiscing elit, sed do eiusmod 40 | tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation 41 | ullamco.

42 |
43 |

TITLE HEADING

44 |
Title description, Sep 2, 2017
45 |
Image
46 |

Some text..

47 |

Sunt in culpa qui officia deserunt mollit anim id est laborum consectetur adipiscing elit, sed do eiusmod 48 | tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation 49 | ullamco.

50 |
51 |
52 | 55 | 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Intellij+all ### 2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 4 | 5 | # User-specific stuff 6 | .idea/**/workspace.xml 7 | .idea/**/tasks.xml 8 | .idea/**/usage.statistics.xml 9 | .idea/**/dictionaries 10 | .idea/**/shelf 11 | 12 | # AWS User-specific 13 | .idea/**/aws.xml 14 | 15 | # Generated files 16 | .idea/**/contentModel.xml 17 | 18 | # Sensitive or high-churn files 19 | .idea/**/dataSources/ 20 | .idea/**/dataSources.ids 21 | .idea/**/dataSources.local.xml 22 | .idea/**/sqlDataSources.xml 23 | .idea/**/dynamic.xml 24 | .idea/**/uiDesigner.xml 25 | .idea/**/dbnavigator.xml 26 | 27 | # Gradle 28 | .idea/**/gradle.xml 29 | .idea/**/libraries 30 | 31 | # Gradle and Maven with auto-import 32 | # When using Gradle or Maven with auto-import, you should exclude module files, 33 | # since they will be recreated, and may cause churn. Uncomment if using 34 | # auto-import. 35 | # .idea/artifacts 36 | # .idea/compiler.xml 37 | # .idea/jarRepositories.xml 38 | # .idea/modules.xml 39 | # .idea/*.iml 40 | # .idea/modules 41 | # *.iml 42 | # *.ipr 43 | 44 | # CMake 45 | cmake-build-*/ 46 | 47 | # Mongo Explorer plugin 48 | .idea/**/mongoSettings.xml 49 | 50 | # File-based project format 51 | *.iws 52 | 53 | # IntelliJ 54 | out/ 55 | 56 | # mpeltonen/sbt-idea plugin 57 | .idea_modules/ 58 | 59 | # JIRA plugin 60 | atlassian-ide-plugin.xml 61 | 62 | # Cursive Clojure plugin 63 | .idea/replstate.xml 64 | 65 | # SonarLint plugin 66 | .idea/sonarlint/ 67 | 68 | # Crashlytics plugin (for Android Studio and IntelliJ) 69 | com_crashlytics_export_strings.xml 70 | crashlytics.properties 71 | crashlytics-build.properties 72 | fabric.properties 73 | 74 | # Editor-based Rest Client 75 | .idea/httpRequests 76 | 77 | # Android studio 3.1+ serialized cache file 78 | .idea/caches/build_file_checksums.ser 79 | 80 | ### Intellij+all Patch ### 81 | # Ignore everything but code style settings and run configurations 82 | # that are supposed to be shared within teams. 83 | 84 | .idea/* 85 | 86 | !.idea/codeStyles 87 | !.idea/runConfigurations 88 | 89 | ### Rust ### 90 | # Generated by Cargo 91 | # will have compiled files and executables 92 | debug/ 93 | target/ 94 | 95 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 96 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 97 | Cargo.lock 98 | 99 | # These are backup files generated by rustfmt 100 | **/*.rs.bk 101 | 102 | # MSVC Windows builds of rustc generate these, which store debugging information 103 | *.pdb 104 | 105 | ### Vim ### 106 | # Swap 107 | [._]*.s[a-v][a-z] 108 | !*.svg # comment out if you don't need vector files 109 | [._]*.sw[a-p] 110 | [._]s[a-rt-v][a-z] 111 | [._]ss[a-gi-z] 112 | [._]sw[a-p] 113 | 114 | # Session 115 | Session.vim 116 | Sessionx.vim 117 | 118 | # Temporary 119 | .netrwhist 120 | *~ 121 | # Auto-generated tag files 122 | tags 123 | # Persistent undo 124 | [._]*.un~ 125 | 126 | ### VisualStudioCode ### 127 | .vscode/* 128 | !.vscode/settings.json 129 | !.vscode/tasks.json 130 | !.vscode/launch.json 131 | !.vscode/extensions.json 132 | !.vscode/*.code-snippets 133 | 134 | # Local History for Visual Studio Code 135 | .history/ 136 | 137 | # Built Visual Studio Code Extensions 138 | *.vsix 139 | 140 | ### VisualStudioCode Patch ### 141 | # Ignore all local history of files 142 | .history 143 | .ionide -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HTML Minifier 2 | ==================== 3 | 4 | [![CI](https://github.com/magiclen/html-minifier/actions/workflows/ci.yml/badge.svg)](https://github.com/magiclen/html-minifier/actions/workflows/ci.yml) 5 | 6 | This library can help you generate and minify your HTML code at the same time. It also supports to minify JS and CSS in ` 122 | 123 | 124 | 125 |
126 |

My Website

127 |

A responsive website created by me.

128 |
129 | 130 | 136 | 137 |
138 |
139 |

About Me

140 |
Photo of me:
141 |
Image
142 |

Some text about me in culpa qui officia deserunt mollit anim..

143 |

More Text

144 |

Lorem ipsum dolor sit ame.

145 |
Image
146 |
147 |
Image
148 |
149 |
Image
150 |
151 |
152 |

TITLE HEADING

153 |
Title description, Dec 7, 2017
154 |
Image
155 |

Some text..

156 |

Sunt in culpa qui officia deserunt mollit anim id est laborum consectetur adipiscing elit, sed do eiusmod 157 | tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation 158 | ullamco.

159 |
160 |

TITLE HEADING

161 |
Title description, Sep 2, 2017
162 |
Image
163 |

Some text..

164 |

Sunt in culpa qui officia deserunt mollit anim id est laborum consectetur adipiscing elit, sed do eiusmod 165 | tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation 166 | ullamco.

167 |
168 |
169 | 170 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | # HTML Minifier 3 | 4 | This library can help you generate and minify your HTML code at the same time. It also supports to minify JS and CSS in `"#, 264 | ) 265 | .unwrap(); 266 | 267 | assert_eq!( 268 | "" 270 | .as_bytes(), 271 | html_minifier.get_html() 272 | ); 273 | } 274 | 275 | html_minifier.reset(); 276 | 277 | { 278 | html_minifier 279 | .digest( 280 | r#""#, 292 | ) 293 | .unwrap(); 294 | 295 | assert_eq!( 296 | r#""#.as_bytes(), 297 | html_minifier.get_html() 298 | ); 299 | } 300 | } 301 | 302 | #[test] 303 | fn minify_javascript() { 304 | let mut html_minifier = HTMLMinifier::new(); 305 | 306 | { 307 | html_minifier 308 | .digest( 309 | r#""#, 313 | ) 314 | .unwrap(); 315 | 316 | assert_eq!(b"", html_minifier.get_html()); 317 | } 318 | 319 | html_minifier.reset(); 320 | 321 | { 322 | html_minifier 323 | .digest( 324 | r#""#, 328 | ) 329 | .unwrap(); 330 | 331 | assert_eq!( 332 | r#""#.as_bytes(), 333 | html_minifier.get_html() 334 | ); 335 | } 336 | } 337 | 338 | #[test] 339 | fn minify_javascript_css() { 340 | let mut html_minifier = HTMLMinifier::new(); 341 | 342 | { 343 | html_minifier 344 | .digest( 345 | r#""#, 360 | ) 361 | .unwrap(); 362 | 363 | assert_eq!( 364 | "" 366 | .as_bytes(), 367 | html_minifier.get_html() 368 | ); 369 | } 370 | } 371 | 372 | #[test] 373 | fn preserve_pre() { 374 | let mut html_minifier = HTMLMinifier::new(); 375 | 376 | { 377 | html_minifier 378 | .digest( 379 | r#"
380 |     
381 |         1234567
382 |     
383 |
384 | 1234567 385 |
386 |
387 |         1234567
388 |     
"#, 389 | ) 390 | .unwrap(); 391 | 392 | assert_eq!( 393 | r#"
394 |     
395 |         1234567
396 |     
397 |
398 | 1234567 399 |
400 |
401 |         1234567
402 |     
"# 403 | .as_bytes(), 404 | html_minifier.get_html() 405 | ); 406 | } 407 | } 408 | 409 | #[test] 410 | fn preserve_code() { 411 | let mut html_minifier = HTMLMinifier::new(); 412 | html_minifier.set_minify_code(false); 413 | 414 | { 415 | html_minifier 416 | .digest( 417 | r#" 418 | 419 | 1234567 420 | 421 |
422 | 1234567 423 |
424 | 425 | 1234567 426 | "#, 427 | ) 428 | .unwrap(); 429 | 430 | assert_eq!( 431 | r#" 432 | 433 | 1234567 434 | 435 |
436 | 1234567 437 |
438 | 439 | 1234567 440 | "# 441 | .as_bytes(), 442 | html_minifier.get_html() 443 | ); 444 | } 445 | } 446 | 447 | #[test] 448 | fn preserve_textarea() { 449 | let mut html_minifier = HTMLMinifier::new(); 450 | 451 | { 452 | html_minifier 453 | .digest( 454 | r#" 459 |
460 | 1234567 461 |
462 | "#, 465 | ) 466 | .unwrap(); 467 | 468 | assert_eq!( 469 | r#" 474 |
475 | 1234567 476 |
477 | "# 480 | .as_bytes(), 481 | html_minifier.get_html() 482 | ); 483 | } 484 | } 485 | 486 | #[test] 487 | fn preserve_unsupported_script_type() { 488 | let mut html_minifier = HTMLMinifier::new(); 489 | 490 | { 491 | html_minifier 492 | .digest( 493 | r#""#, 497 | ) 498 | .unwrap(); 499 | 500 | assert_eq!( 501 | r#""# 505 | .as_bytes(), 506 | html_minifier.get_html() 507 | ); 508 | } 509 | } 510 | 511 | #[test] 512 | fn preserve_unsupported_style_type() { 513 | let mut html_minifier = HTMLMinifier::new(); 514 | 515 | { 516 | html_minifier 517 | .digest( 518 | r#""#, 530 | ) 531 | .unwrap(); 532 | 533 | assert_eq!( 534 | r#""# 546 | .as_bytes(), 547 | html_minifier.get_html() 548 | ); 549 | } 550 | } 551 | -------------------------------------------------------------------------------- /tests/steps.rs: -------------------------------------------------------------------------------- 1 | use std::str::from_utf8_unchecked; 2 | 3 | use html_minifier::HTMLMinifier; 4 | 5 | fn test_enabled_all_options(cases: &[(&str, &str)]) { 6 | for (index, (expect, test)) in cases.iter().copied().enumerate() { 7 | let mut html_minifier = HTMLMinifier::new(); 8 | html_minifier.digest(test).unwrap(); 9 | assert_eq!(expect.as_bytes(), html_minifier.get_html(), "case {}", index); 10 | } 11 | 12 | let mut buffer = [0u8; 8]; 13 | 14 | for (index, (expect, test)) in cases.iter().copied().enumerate() { 15 | let mut html_minifier = HTMLMinifier::new(); 16 | 17 | for c in test.chars() { 18 | html_minifier.digest(c.encode_utf8(&mut buffer)).unwrap(); 19 | } 20 | 21 | assert_eq!(expect.as_bytes(), html_minifier.get_html(), "case-chunk-1 {}", index); 22 | } 23 | 24 | for (index, (expect, test)) in cases.iter().copied().enumerate() { 25 | let mut html_minifier = HTMLMinifier::new(); 26 | 27 | let mut chars = test.chars(); 28 | 29 | while let Some(c) = chars.next() { 30 | let mut length = c.encode_utf8(&mut buffer).len(); 31 | 32 | if let Some(c) = chars.next() { 33 | length = length + c.encode_utf8(&mut buffer[length..]).len(); 34 | } 35 | 36 | html_minifier.digest(unsafe { from_utf8_unchecked(&buffer[..length]) }).unwrap(); 37 | } 38 | 39 | assert_eq!(expect.as_bytes(), html_minifier.get_html(), "case-chunk-2 {}", index); 40 | } 41 | } 42 | 43 | fn test_disabled_all_options(cases: &[(&str, &str)]) { 44 | for (index, (expect, test)) in cases.iter().copied().enumerate() { 45 | let mut html_minifier = HTMLMinifier::new(); 46 | html_minifier.set_remove_comments(false); 47 | html_minifier.set_minify_code(false); 48 | 49 | html_minifier.digest(test).unwrap(); 50 | assert_eq!(expect.as_bytes(), html_minifier.get_html(), "case {}", index); 51 | } 52 | 53 | let mut buffer = [0u8; 8]; 54 | 55 | for (index, (expect, test)) in cases.iter().copied().enumerate() { 56 | let mut html_minifier = HTMLMinifier::new(); 57 | html_minifier.set_remove_comments(false); 58 | html_minifier.set_minify_code(false); 59 | 60 | for c in test.chars() { 61 | html_minifier.digest(c.encode_utf8(&mut buffer)).unwrap(); 62 | } 63 | 64 | assert_eq!(expect.as_bytes(), html_minifier.get_html(), "case-chunk-1 {}", index); 65 | } 66 | 67 | for (index, (expect, test)) in cases.iter().copied().enumerate() { 68 | let mut html_minifier = HTMLMinifier::new(); 69 | html_minifier.set_remove_comments(false); 70 | html_minifier.set_minify_code(false); 71 | 72 | let mut chars = test.chars(); 73 | 74 | while let Some(c) = chars.next() { 75 | let mut length = c.encode_utf8(&mut buffer).len(); 76 | 77 | if let Some(c) = chars.next() { 78 | length = length + c.encode_utf8(&mut buffer[length..]).len(); 79 | } 80 | 81 | html_minifier.digest(unsafe { from_utf8_unchecked(&buffer[..length]) }).unwrap(); 82 | } 83 | 84 | assert_eq!(expect.as_bytes(), html_minifier.get_html(), "case-chunk-2 {}", index); 85 | } 86 | } 87 | 88 | #[test] 89 | fn initial() { 90 | const CASES: [(&str, &str); 3] = [("", ""), ("", " "), ("1", " 1")]; 91 | 92 | test_enabled_all_options(&CASES); 93 | } 94 | 95 | #[test] 96 | fn initial_remain_one_whitespace() { 97 | const CASES: [(&str, &str); 2] = [("1", "1 "), ("1", "1\t")]; 98 | 99 | test_enabled_all_options(&CASES); 100 | } 101 | 102 | #[test] 103 | fn initial_ignore_whitespace() { 104 | const CASES: [(&str, &str); 2] = [("1 23", "1 23"), ("1 234 567", "1 234 567")]; 105 | 106 | test_enabled_all_options(&CASES); 107 | } 108 | 109 | #[test] 110 | fn start_tag_initial() { 111 | const CASES: [(&str, &str); 6] = 112 | [("", "<>"), ("123", "123<>"), (""), ("123", "123"), ("", "")]; 129 | 130 | test_enabled_all_options(&CASES); 131 | } 132 | 133 | #[test] 134 | fn start_tag_in() { 135 | const CASES: [(&str, &str); 6] = [ 136 | ("", ""), 139 | ("", ""), 140 | ("", ""), 153 | ("", ""), 166 | ("", ""), 167 | ("", ""), 179 | ("", ""), 212 | ("", "")]; 221 | 222 | test_enabled_all_options(&CASES); 223 | } 224 | 225 | #[test] 226 | fn tag_end() { 227 | const CASES: [(&str, &str); 4] = [ 228 | ("", ""), 230 | ("", ""), 232 | ]; 233 | 234 | test_enabled_all_options(&CASES); 235 | } 236 | 237 | #[test] 238 | fn doctype() { 239 | const CASES: [(&str, &str); 5] = [ 240 | ("", ""), 243 | ("", ""), 244 | ("", "23"), ("", "")]; 253 | 254 | test_enabled_all_options(&CASES); 255 | 256 | const CASES2: [(&str, &str); 2] = [("123", "123"), ("", "")]; 257 | 258 | test_disabled_all_options(&CASES2); 259 | } 260 | 261 | #[test] 262 | fn script_default() { 263 | const CASES: [(&str, &str); 2] = [ 264 | ( 265 | "", 266 | "", 267 | ), 268 | ( 269 | "", 270 | "", 271 | ), 272 | ]; 273 | 274 | test_enabled_all_options(&CASES); 275 | } 276 | 277 | #[test] 278 | fn script_javascript() { 279 | const CASES: [(&str, &str); 3] = [ 280 | ( 281 | "", 282 | "", 283 | ), 284 | ("", ""), 285 | ("", ""), 286 | ]; 287 | 288 | test_enabled_all_options(&CASES); 289 | } 290 | 291 | #[test] 292 | fn style_default() { 293 | const CASES: [(&str, &str); 2] = [ 294 | ( 295 | "", 302 | "", 309 | ), 310 | ( 311 | "", 318 | "", 325 | ), 326 | ]; 327 | 328 | test_enabled_all_options(&CASES); 329 | } 330 | 331 | #[test] 332 | fn style_css() { 333 | const CASES: [(&str, &str); 3] = [ 334 | ( 335 | "", 336 | "", 343 | ), 344 | ( 345 | "", 346 | "", 353 | ), 354 | ( 355 | "", 356 | "", 357 | ), 358 | ]; 359 | 360 | test_enabled_all_options(&CASES); 361 | } 362 | 363 | #[test] 364 | fn pre() { 365 | const CASES: [(&str, &str); 2] = [ 366 | ("
   alert('1234!')    ;   
", "
   alert('1234!')    ;   
"), 367 | ("
   alert('1234!')    ;   
", "
   alert('1234!')    ;   
"), 368 | ]; 369 | 370 | test_enabled_all_options(&CASES); 371 | } 372 | 373 | #[test] 374 | fn code() { 375 | const CASES: [(&str, &str); 2] = [ 376 | (" alert('1234!') ; ", " alert('1234!') ; "), 377 | (" alert('1234!') ; ", " alert('1234!') ; "), 378 | ]; 379 | 380 | test_enabled_all_options(&CASES); 381 | 382 | const CASES2: [(&str, &str); 2] = [ 383 | (" alert('1234!') ; ", " alert('1234!') ; "), 384 | (" alert('1234!') ; ", " alert('1234!') ; "), 385 | ]; 386 | 387 | test_disabled_all_options(&CASES2); 388 | } 389 | 390 | #[test] 391 | fn textarea() { 392 | const CASES: [(&str, &str); 2] = [ 393 | ( 394 | "", 395 | "", 396 | ), 397 | ( 398 | "", 399 | "", 400 | ), 401 | ]; 402 | 403 | test_enabled_all_options(&CASES); 404 | } 405 | 406 | // TODO -----Width 2----- 407 | 408 | #[test] 409 | fn width_2_initial() { 410 | const CASES: [(&str, &str); 1] = [("é", " é")]; 411 | 412 | test_enabled_all_options(&CASES); 413 | } 414 | 415 | #[test] 416 | fn width_2_initial_remain_one_whitespace() { 417 | const CASES: [(&str, &str); 2] = [("é", "é "), ("é", "é\t")]; 418 | 419 | test_enabled_all_options(&CASES); 420 | } 421 | 422 | #[test] 423 | fn width_2_initial_ignore_whitespace() { 424 | const CASES: [(&str, &str); 2] = [("é éé", "é éé"), ("é ééé ééé", "é ééé ééé")]; 425 | 426 | test_enabled_all_options(&CASES); 427 | } 428 | 429 | #[test] 430 | fn width_2_start_tag_initial() { 431 | const CASES: [(&str, &str); 1] = [("<é >", "<é >")]; 432 | 433 | test_enabled_all_options(&CASES); 434 | } 435 | 436 | #[test] 437 | fn width_2_end_tag_initial() { 438 | const CASES: [(&str, &str); 1] = [("", "")]; 439 | 440 | test_enabled_all_options(&CASES); 441 | } 442 | 443 | #[test] 444 | fn width_2_start_tag() { 445 | const CASES: [(&str, &str); 1] = [("", "")]; 446 | 447 | test_enabled_all_options(&CASES); 448 | } 449 | 450 | #[test] 451 | fn width_2_end_tag() { 452 | const CASES: [(&str, &str); 1] = [("", "")]; 453 | 454 | test_enabled_all_options(&CASES); 455 | } 456 | 457 | #[test] 458 | fn width_2_start_tag_in() { 459 | const CASES: [(&str, &str); 1] = [("", "")]; 460 | 461 | test_enabled_all_options(&CASES); 462 | } 463 | 464 | #[test] 465 | fn width_2_start_tag_attribute_name() { 466 | const CASES: [(&str, &str); 1] = [("", "")]; 467 | 468 | test_enabled_all_options(&CASES); 469 | } 470 | 471 | #[test] 472 | fn width_2_start_tag_attribute_name_waiting_value() { 473 | const CASES: [(&str, &str); 1] = [("", "")]; 474 | 475 | test_enabled_all_options(&CASES); 476 | } 477 | 478 | #[test] 479 | fn width_2_start_tag_attribute_value_initial() { 480 | const CASES: [(&str, &str); 1] = [("", "")]; 481 | 482 | test_enabled_all_options(&CASES); 483 | } 484 | 485 | #[test] 486 | fn width_2_start_tag_quoted_attribute_value() { 487 | const CASES: [(&str, &str); 1] = [("", "")]; 488 | 489 | test_enabled_all_options(&CASES); 490 | } 491 | 492 | #[test] 493 | fn width_2_start_tag_unquoted_attribute_value() { 494 | const CASES: [(&str, &str); 1] = [("", "")]; 495 | 496 | test_enabled_all_options(&CASES); 497 | } 498 | 499 | #[test] 500 | fn width_2_tag_end() { 501 | const CASES: [(&str, &str); 3] = 502 | [("", "")]; 503 | 504 | test_enabled_all_options(&CASES); 505 | } 506 | 507 | #[test] 508 | fn width_2_doctype() { 509 | const CASES: [(&str, &str); 1] = [("éé"), ("", "")]; 517 | 518 | test_enabled_all_options(&CASES); 519 | 520 | const CASES2: [(&str, &str); 2] = [("ééé", "ééé"), ("", "")]; 521 | 522 | test_disabled_all_options(&CASES2); 523 | } 524 | 525 | #[test] 526 | fn width_2_script_default() { 527 | const CASES: [(&str, &str); 2] = [ 528 | ( 529 | "", 530 | "", 531 | ), 532 | ( 533 | "", 534 | "", 535 | ), 536 | ]; 537 | 538 | test_enabled_all_options(&CASES); 539 | } 540 | 541 | #[test] 542 | fn width_2_script_javascript() { 543 | const CASES: [(&str, &str); 3] = [ 544 | ( 545 | "", 546 | "", 547 | ), 548 | ("", ""), 549 | ("", ""), 550 | ]; 551 | 552 | test_enabled_all_options(&CASES); 553 | } 554 | 555 | // TODO -----Width n (3 & 4)----- 556 | 557 | #[test] 558 | fn width_n_initial() { 559 | const CASES: [(&str, &str); 1] = [("中", " 中")]; 560 | 561 | test_enabled_all_options(&CASES); 562 | } 563 | 564 | #[test] 565 | fn width_n_initial_remain_one_whitespace() { 566 | const CASES: [(&str, &str); 2] = [("中", "中 "), ("中", "中\t")]; 567 | 568 | test_enabled_all_options(&CASES); 569 | } 570 | 571 | #[test] 572 | fn width_n_initial_ignore_whitespace() { 573 | const CASES: [(&str, &str); 14] = [ 574 | ("中 中中", "中 中中"), 575 | ("中 中中中 中中中", "中 中中中 中中中"), 576 | ("中\n中", "中\n\t 中"), 577 | ("中\n中", "中 \n\t 中"), 578 | ("中\na", "中\n\t a"), 579 | ("中\na", "中 \n\t a"), 580 | ("a\n中", "a\n\t 中"), 581 | ("a\n中", "a \n\t 中"), 582 | ("中 中", "中\t 中"), 583 | ("中 中", "中 \t 中"), 584 | ("中 a", "中\t a"), 585 | ("中 a", "中 \t a"), 586 | ("a 中", "a\t 中"), 587 | ("a 中", "a \t 中"), 588 | ]; 589 | 590 | test_enabled_all_options(&CASES); 591 | } 592 | 593 | #[test] 594 | fn width_n_start_tag_initial() { 595 | const CASES: [(&str, &str); 1] = [("<中 >", "<中 >")]; 596 | 597 | test_enabled_all_options(&CASES); 598 | } 599 | 600 | #[test] 601 | fn width_n_end_tag_initial() { 602 | const CASES: [(&str, &str); 1] = [("", "")]; 603 | 604 | test_enabled_all_options(&CASES); 605 | } 606 | 607 | #[test] 608 | fn width_n_start_tag() { 609 | const CASES: [(&str, &str); 1] = [("", "")]; 610 | 611 | test_enabled_all_options(&CASES); 612 | } 613 | 614 | #[test] 615 | fn width_n_end_tag() { 616 | const CASES: [(&str, &str); 1] = [("", "")]; 617 | 618 | test_enabled_all_options(&CASES); 619 | } 620 | 621 | #[test] 622 | fn width_n_start_tag_in() { 623 | const CASES: [(&str, &str); 1] = [("", "")]; 624 | 625 | test_enabled_all_options(&CASES); 626 | } 627 | 628 | #[test] 629 | fn width_n_start_tag_attribute_name() { 630 | const CASES: [(&str, &str); 1] = [("", "")]; 631 | 632 | test_enabled_all_options(&CASES); 633 | } 634 | 635 | #[test] 636 | fn width_n_start_tag_attribute_name_waiting_value() { 637 | const CASES: [(&str, &str); 1] = [("", "")]; 638 | 639 | test_enabled_all_options(&CASES); 640 | } 641 | 642 | #[test] 643 | fn width_n_start_tag_attribute_value_initial() { 644 | const CASES: [(&str, &str); 1] = [("", "")]; 645 | 646 | test_enabled_all_options(&CASES); 647 | } 648 | 649 | #[test] 650 | fn width_n_start_tag_quoted_attribute_value() { 651 | const CASES: [(&str, &str); 1] = [("", "")]; 652 | 653 | test_enabled_all_options(&CASES); 654 | } 655 | 656 | #[test] 657 | fn width_n_start_tag_unquoted_attribute_value() { 658 | const CASES: [(&str, &str); 1] = [("", "")]; 659 | 660 | test_enabled_all_options(&CASES); 661 | } 662 | 663 | #[test] 664 | fn width_n_tag_end() { 665 | const CASES: [(&str, &str); 3] = 666 | [("", "")]; 667 | 668 | test_enabled_all_options(&CASES); 669 | } 670 | 671 | #[test] 672 | fn width_n_doctype() { 673 | const CASES: [(&str, &str); 1] = [("中中"), ("", "")]; 681 | 682 | test_enabled_all_options(&CASES); 683 | 684 | const CASES2: [(&str, &str); 2] = 685 | [("中中中", "中中中"), ("", "")]; 686 | 687 | test_disabled_all_options(&CASES2); 688 | } 689 | 690 | #[test] 691 | fn width_n_script_default() { 692 | const CASES: [(&str, &str); 2] = [ 693 | ( 694 | "", 695 | "", 696 | ), 697 | ( 698 | "", 699 | "", 700 | ), 701 | ]; 702 | 703 | test_enabled_all_options(&CASES); 704 | } 705 | 706 | #[test] 707 | fn width_n_script_javascript() { 708 | const CASES: [(&str, &str); 3] = [ 709 | ( 710 | "", 711 | "", 712 | ), 713 | ("", ""), 714 | ("", ""), 715 | ]; 716 | 717 | test_enabled_all_options(&CASES); 718 | } 719 | -------------------------------------------------------------------------------- /src/html_minifier_helper.rs: -------------------------------------------------------------------------------- 1 | use std::{borrow::Cow, str::from_utf8_unchecked}; 2 | 3 | use cow_utils::CowUtils; 4 | use educe::Educe; 5 | pub use minifier::{css, js}; 6 | 7 | use crate::{functions::*, HTMLMinifierError, HTMLWriter}; 8 | 9 | #[derive(Educe, Debug, Copy, Clone, Eq, PartialEq)] 10 | #[educe(Default)] 11 | enum Step { 12 | #[educe(Default)] 13 | Initial, 14 | InitialRemainOneWhitespace, 15 | InitialIgnoreWhitespace, 16 | StartTagInitial, 17 | EndTagInitial, 18 | StartTag, 19 | StartTagIn, 20 | StartTagAttributeName, 21 | StartTagAttributeNameWaitingValue, 22 | StartTagAttributeValueInitial, 23 | StartTagUnquotedAttributeValue, 24 | StartTagQuotedAttributeValue, 25 | EndTag, 26 | TagEnd, 27 | Doctype, 28 | Comment, 29 | ScriptDefault, 30 | ScriptJavaScript, 31 | StyleDefault, 32 | StyleCSS, 33 | Pre, 34 | Code, 35 | Textarea, 36 | } 37 | 38 | /// This struct helps you generate and minify your HTML code in the same time. The output destination is outside this struct. 39 | #[derive(Educe, Clone)] 40 | #[educe(Debug, Default(new))] 41 | pub struct HTMLMinifierHelper { 42 | #[educe(Default = true)] 43 | /// Remove HTML comments. 44 | pub remove_comments: bool, 45 | #[educe(Default = true)] 46 | /// Minify the content in the `code` element. 47 | pub minify_code: bool, 48 | 49 | // Buffers 50 | #[educe(Debug(method = "str_bytes_fmt"))] 51 | buffer: Vec, 52 | #[educe(Debug(method = "str_bytes_fmt"))] 53 | tag: Vec, 54 | #[educe(Debug(method = "str_bytes_fmt"))] 55 | attribute_type: Vec, 56 | 57 | // Steps 58 | step: Step, 59 | step_counter: u8, 60 | 61 | // Temp 62 | quote: u8, 63 | last_space: u8, 64 | 65 | // Flags 66 | quoted_value_spacing: bool, 67 | quoted_value_empty: bool, 68 | in_handled_attribute: bool, 69 | in_attribute_type: bool, 70 | } 71 | 72 | impl HTMLMinifierHelper { 73 | #[inline] 74 | fn set_flags_by_attribute(&mut self) { 75 | match self.buffer.as_slice() { 76 | b"class" => { 77 | self.in_handled_attribute = true; 78 | self.in_attribute_type = false; 79 | }, 80 | b"type" => match self.tag.as_slice() { 81 | b"script" | b"style" => { 82 | self.in_handled_attribute = true; 83 | self.in_attribute_type = true; 84 | }, 85 | _ => (), 86 | }, 87 | _ => { 88 | self.in_handled_attribute = false; 89 | self.in_attribute_type = false; 90 | }, 91 | } 92 | } 93 | 94 | #[inline] 95 | fn finish_buffer(&mut self) { 96 | if self.in_attribute_type { 97 | if let Cow::Owned(attribute_value) = html_escape::decode_html_entities(unsafe { 98 | from_utf8_unchecked(&self.attribute_type) 99 | }) { 100 | self.attribute_type = attribute_value.into_bytes(); 101 | } 102 | 103 | if let Cow::Owned(attribute_value) = 104 | unsafe { from_utf8_unchecked(&self.attribute_type) }.cow_to_ascii_lowercase() 105 | { 106 | self.attribute_type = attribute_value.into_bytes(); 107 | } 108 | } 109 | } 110 | 111 | #[inline] 112 | fn end_start_tag_and_get_next_step( 113 | &mut self, 114 | out: &mut impl HTMLWriter, 115 | text_bytes: &[u8], 116 | start: &mut usize, 117 | p: usize, 118 | ) -> Result { 119 | let step = match self.tag.as_slice() { 120 | b"script" => { 121 | self.step_counter = 0; 122 | 123 | match self.attribute_type.as_slice() { 124 | b"" | b"application/javascript" | b"module" => { 125 | out.push_bytes(&text_bytes[*start..=p])?; 126 | *start = p + 1; 127 | 128 | self.attribute_type.clear(); 129 | self.buffer.clear(); 130 | 131 | Step::ScriptJavaScript 132 | }, 133 | _ => { 134 | self.attribute_type.clear(); 135 | 136 | Step::ScriptDefault 137 | }, 138 | } 139 | }, 140 | b"style" => { 141 | self.step_counter = 0; 142 | 143 | match self.attribute_type.as_slice() { 144 | b"" | b"text/css" => { 145 | out.push_bytes(&text_bytes[*start..=p])?; 146 | *start = p + 1; 147 | 148 | self.attribute_type.clear(); 149 | self.buffer.clear(); 150 | 151 | Step::StyleCSS 152 | }, 153 | _ => { 154 | self.attribute_type.clear(); 155 | 156 | Step::StyleDefault 157 | }, 158 | } 159 | }, 160 | b"pre" => { 161 | self.step_counter = 0; 162 | Step::Pre 163 | }, 164 | b"code" => { 165 | if self.minify_code { 166 | self.last_space = 0; 167 | 168 | Step::InitialRemainOneWhitespace 169 | } else { 170 | self.step_counter = 0; 171 | Step::Code 172 | } 173 | }, 174 | b"textarea" => { 175 | self.step_counter = 0; 176 | Step::Textarea 177 | }, 178 | _ => { 179 | self.last_space = 0; 180 | 181 | Step::InitialRemainOneWhitespace 182 | }, 183 | }; 184 | 185 | Ok(step) 186 | } 187 | } 188 | 189 | impl HTMLMinifierHelper { 190 | /// Reset this html minifier helper. The option settings and allocated memory will be be preserved. 191 | #[inline] 192 | pub fn reset(&mut self) { 193 | self.step = Step::default(); 194 | 195 | self.attribute_type.clear(); 196 | } 197 | 198 | /// Input some text to generate HTML code. It is not necessary to input a full HTML text at once. 199 | pub fn digest, W: HTMLWriter>( 200 | &mut self, 201 | text: S, 202 | out: &mut W, 203 | ) -> Result<(), HTMLMinifierError> { 204 | let text_bytes = text.as_ref(); 205 | let text_length = text_bytes.len(); 206 | 207 | let mut start = 0; 208 | let mut p = 0; 209 | 210 | while p < text_length { 211 | let e = text_bytes[p]; 212 | 213 | if e <= 0x7F { 214 | // ASCII 215 | if is_ascii_control(e) { 216 | out.push_bytes(&text_bytes[start..p])?; 217 | start = p + 1; 218 | } else { 219 | match self.step { 220 | Step::Initial => { 221 | // ? 222 | match e { 223 | b'<' => { 224 | out.push_bytes(&text_bytes[start..p])?; 225 | start = p + 1; 226 | 227 | self.step = Step::StartTagInitial; 228 | }, 229 | _ => { 230 | if is_whitespace(e) { 231 | debug_assert_eq!(start, p); 232 | start = p + 1; 233 | } else { 234 | self.last_space = 0; 235 | self.step = Step::InitialRemainOneWhitespace; 236 | } 237 | }, 238 | } 239 | }, 240 | Step::InitialRemainOneWhitespace => { 241 | // a? 242 | if is_whitespace(e) { 243 | out.push_bytes(&text_bytes[start..p])?; 244 | start = p + 1; 245 | 246 | self.last_space = e; 247 | 248 | self.step = Step::InitialIgnoreWhitespace; 249 | } else if e == b'<' { 250 | out.push_bytes(&text_bytes[start..p])?; 251 | start = p + 1; 252 | 253 | self.step = Step::StartTagInitial; 254 | } else { 255 | self.last_space = 0; 256 | } 257 | }, 258 | Step::InitialIgnoreWhitespace => { 259 | // a ? 260 | match e { 261 | b'\n' => { 262 | debug_assert_eq!(start, p); 263 | start = p + 1; 264 | 265 | if self.last_space > 0 { 266 | self.last_space = b'\n'; 267 | } 268 | }, 269 | 0x09 | 0x0B..=0x0D | 0x1C..=0x20 => { 270 | debug_assert_eq!(start, p); 271 | start = p + 1; 272 | }, 273 | b'<' => { 274 | // This can just push ' ', but the minified HTML would be ugly 275 | if self.last_space == b'\n' { 276 | out.push(b'\n')?; 277 | } else if self.last_space > 0 { 278 | out.push(b' ')?; 279 | } 280 | 281 | out.push_bytes(&text_bytes[start..p])?; 282 | start = p + 1; 283 | 284 | self.step = Step::StartTagInitial; 285 | }, 286 | _ => { 287 | if self.last_space == b'\n' { 288 | out.push(b'\n')?; 289 | } else if self.last_space > 0 { 290 | out.push(b' ')?; 291 | } 292 | 293 | self.last_space = 0; 294 | self.step = Step::InitialRemainOneWhitespace; 295 | }, 296 | } 297 | }, 298 | Step::StartTagInitial => { 299 | debug_assert_eq!(start, p); 300 | 301 | // { 304 | start = p + 1; 305 | 306 | self.step = Step::EndTagInitial; 307 | }, 308 | b'!' => { 309 | // ' => { 316 | // <> 317 | start = p + 1; 318 | 319 | self.last_space = 0; 320 | self.step = Step::InitialRemainOneWhitespace; 321 | }, 322 | _ => { 323 | out.push(b'<')?; 324 | 325 | if is_whitespace(e) { 326 | out.push_bytes(&text_bytes[start..p])?; 327 | start = p + 1; 328 | 329 | self.last_space = e; 330 | 331 | self.step = Step::InitialIgnoreWhitespace; 332 | } else { 333 | self.tag.clear(); 334 | self.tag.push(e.to_ascii_lowercase()); 335 | 336 | self.step = Step::StartTag; 337 | } 338 | }, 339 | } 340 | }, 341 | Step::EndTagInitial => { 342 | // ' => { 345 | // 346 | start = p + 1; 347 | 348 | self.last_space = 0; 349 | self.step = Step::InitialRemainOneWhitespace; 350 | }, 351 | _ => { 352 | out.push_bytes(b" { 367 | // self.step = Step::TagEnd, 379 | b'>' => { 380 | self.buffer.clear(); // the buffer may be used for the `type` attribute 381 | 382 | self.step = self.end_start_tag_and_get_next_step( 383 | out, text_bytes, &mut start, p, 384 | )?; 385 | }, 386 | _ => self.tag.push(e.to_ascii_lowercase()), 387 | } 388 | } 389 | }, 390 | Step::StartTagIn => { 391 | // { 394 | if self.last_space > 0 { 395 | out.push(b' ')?; 396 | } 397 | 398 | self.step = Step::TagEnd; 399 | }, 400 | b'>' => { 401 | self.step = self.end_start_tag_and_get_next_step( 402 | out, text_bytes, &mut start, p, 403 | )?; 404 | }, 405 | _ => { 406 | if is_whitespace(e) { 407 | debug_assert_eq!(start, p); 408 | start = p + 1; 409 | } else { 410 | out.push(b' ')?; 411 | 412 | self.buffer.clear(); 413 | self.buffer.push(e.to_ascii_lowercase()); 414 | 415 | self.step = Step::StartTagAttributeName; 416 | } 417 | }, 418 | } 419 | }, 420 | Step::StartTagAttributeName => { 421 | // self.step = Step::TagEnd, 424 | b'>' => { 425 | self.step = self.end_start_tag_and_get_next_step( 426 | out, text_bytes, &mut start, p, 427 | )?; 428 | }, 429 | b'=' => { 430 | out.push_bytes(&text_bytes[start..p])?; 431 | start = p + 1; 432 | 433 | self.set_flags_by_attribute(); 434 | 435 | self.step = Step::StartTagAttributeValueInitial; 436 | }, 437 | _ => { 438 | if is_whitespace(e) { 439 | out.push_bytes(&text_bytes[start..p])?; 440 | start = p + 1; 441 | 442 | self.step = Step::StartTagAttributeNameWaitingValue; 443 | } else { 444 | self.buffer.push(e.to_ascii_lowercase()); 445 | } 446 | }, 447 | } 448 | }, 449 | Step::StartTagAttributeNameWaitingValue => { 450 | // self.step = Step::TagEnd, 453 | b'>' => { 454 | self.step = self.end_start_tag_and_get_next_step( 455 | out, text_bytes, &mut start, p, 456 | )?; 457 | }, 458 | b'=' => { 459 | out.push_bytes(&text_bytes[start..p])?; 460 | start = p + 1; 461 | 462 | self.set_flags_by_attribute(); 463 | 464 | self.step = Step::StartTagAttributeValueInitial; 465 | }, 466 | _ => { 467 | if is_whitespace(e) { 468 | debug_assert_eq!(start, p); 469 | start = p + 1; 470 | } else { 471 | out.push(b' ')?; 472 | 473 | self.buffer.clear(); 474 | self.buffer.push(e.to_ascii_lowercase()); 475 | 476 | self.step = Step::StartTagAttributeName; 477 | } 478 | }, 479 | } 480 | }, 481 | Step::StartTagAttributeValueInitial => { 482 | // { 487 | self.step = Step::TagEnd; 488 | }, 489 | b'>' => { 490 | self.step = self.end_start_tag_and_get_next_step( 491 | out, text_bytes, &mut start, p, 492 | )?; 493 | }, 494 | b'"' | b'\'' => { 495 | self.quoted_value_spacing = false; 496 | self.quoted_value_empty = true; 497 | 498 | start = p + 1; 499 | 500 | self.quote = e; 501 | self.step = Step::StartTagQuotedAttributeValue; 502 | }, 503 | _ => { 504 | if is_whitespace(e) { 505 | start = p + 1; 506 | } else { 507 | if self.in_attribute_type { 508 | self.attribute_type.push(e); 509 | } 510 | 511 | out.push(b'=')?; 512 | 513 | self.step = Step::StartTagUnquotedAttributeValue; 514 | } 515 | }, 516 | } 517 | }, 518 | Step::StartTagQuotedAttributeValue => { 519 | // match e { 633 | b'-' => { 634 | start = p + 1; 635 | 636 | self.step_counter = 1; 637 | }, 638 | _ => { 639 | out.push_bytes(b" match e { 645 | b'-' => { 646 | if !self.remove_comments { 647 | out.push_bytes(b"