├── README └── format.php /README: -------------------------------------------------------------------------------- 1 | A simple HTML formatter. 2 | 3 | Example: 4 | 5 | HTML($html); 14 | 15 | echo $formatted_html; 16 | -------------------------------------------------------------------------------- /format.php: -------------------------------------------------------------------------------- 1 | input = $input; 17 | $this->output = ''; 18 | 19 | $starting_index = 0; 20 | 21 | if (preg_match('/<\!doctype/i', $this->input)) { 22 | $starting_index = strpos($this->input, '>') + 1; 23 | $this->output .= substr($this->input, 0, $starting_index); 24 | } 25 | 26 | for ($this->input_index = $starting_index; $this->input_index < strlen($this->input); $this->input_index++) { 27 | if ($this->in_comment) { 28 | $this->parse_comment(); 29 | } elseif ($this->in_tag) { 30 | $this->parse_inner_tag(); 31 | } elseif ($this->inline_tag) { 32 | $this->parse_inner_inline_tag(); 33 | } else { 34 | if (preg_match('/[\r\n\t]/', $this->input[$this->input_index])) { 35 | continue; 36 | } elseif ($this->input[$this->input_index] == '<') { 37 | if ( ! $this->is_inline_tag()) { 38 | $this->in_content = FALSE; 39 | } 40 | $this->parse_tag(); 41 | } elseif ( ! $this->in_content) { 42 | if ( ! $this->inline_tag) { 43 | $this->output .= "\n" . str_repeat("\t", $this->tabs); 44 | } 45 | $this->in_content = TRUE; 46 | } 47 | $this->output .= $this->input[$this->input_index]; 48 | } 49 | } 50 | 51 | return $this->output; 52 | } 53 | 54 | private function parse_comment() 55 | { 56 | if ($this->is_end_comment()) { 57 | $this->in_comment = FALSE; 58 | $this->output .= '-->'; 59 | $this->input_index += 3; 60 | } else { 61 | $this->output .= $this->input[$this->input_index]; 62 | } 63 | } 64 | 65 | private function parse_inner_tag() 66 | { 67 | if ($this->input[$this->input_index] == '>') { 68 | $this->in_tag = FALSE; 69 | $this->output .= '>'; 70 | } else { 71 | $this->output .= $this->input[$this->input_index]; 72 | } 73 | } 74 | 75 | private function parse_inner_inline_tag() 76 | { 77 | if ($this->input[$this->input_index] == '>') { 78 | $this->inline_tag = FALSE; 79 | $this->decrement_tabs(); 80 | $this->output .= '>'; 81 | } else { 82 | $this->output .= $this->input[$this->input_index]; 83 | } 84 | } 85 | 86 | private function parse_tag() 87 | { 88 | if ($this->is_comment()) { 89 | $this->output .= "\n" . str_repeat("\t", $this->tabs); 90 | $this->in_comment = TRUE; 91 | } elseif ($this->is_end_tag()) { 92 | $this->in_tag = TRUE; 93 | $this->inline_tag = FALSE; 94 | $this->decrement_tabs(); 95 | if ( ! $this->is_inline_tag() AND ! $this->is_tag_empty()) { 96 | $this->output .= "\n" . str_repeat("\t", $this->tabs); 97 | } 98 | } else { 99 | $this->in_tag = TRUE; 100 | if ( ! $this->in_content AND ! $this->inline_tag) { 101 | $this->output .= "\n" . str_repeat("\t", $this->tabs); 102 | } 103 | if ( ! $this->is_closed_tag()) { 104 | $this->tabs++; 105 | } 106 | if ($this->is_inline_tag()) { 107 | $this->inline_tag = TRUE; 108 | } 109 | } 110 | } 111 | 112 | private function is_end_tag() 113 | { 114 | for ($input_index = $this->input_index; $input_index < strlen($this->input); $input_index++) { 115 | if ($this->input[$input_index] == '<' AND $this->input[$input_index + 1] == '/') { 116 | return true; 117 | } elseif ($this->input[$input_index] == '<' AND $this->input[$input_index + 1] == '!') { 118 | return true; 119 | } elseif ($this->input[$input_index] == '>') { 120 | return false; 121 | } 122 | } 123 | return false; 124 | } 125 | 126 | private function decrement_tabs() 127 | { 128 | $this->tabs--; 129 | if ($this->tabs < 0) { 130 | $this->tabs = 0; 131 | } 132 | } 133 | 134 | private function is_comment() 135 | { 136 | if ($this->input[$this->input_index] == '<' 137 | AND $this->input[$this->input_index + 1] == '!' 138 | AND $this->input[$this->input_index + 2] == '-' 139 | AND $this->input[$this->input_index + 3] == '-') { 140 | return true; 141 | } else { 142 | return false; 143 | } 144 | } 145 | 146 | private function is_end_comment() 147 | { 148 | if ($this->input[$this->input_index] == '-' 149 | AND $this->input[$this->input_index + 1] == '-' 150 | AND $this->input[$this->input_index + 2] == '>') { 151 | return TRUE; 152 | } else { 153 | return FALSE; 154 | } 155 | } 156 | 157 | private function is_tag_empty() 158 | { 159 | $current_tag = $this->get_current_tag($this->input_index + 2); 160 | $in_tag = FALSE; 161 | 162 | for ($input_index = $this->input_index - 1; $input_index >= 0; $input_index--) { 163 | if ( ! $in_tag) { 164 | if ($this->input[$input_index] == '>') { 165 | $in_tag = TRUE; 166 | } elseif ( ! preg_match('/\s/', $this->input[$input_index])) { 167 | return FALSE; 168 | } 169 | } else { 170 | if ($this->input[$input_index] == '<') { 171 | if ($current_tag == $this->get_current_tag($input_index + 1)) { 172 | return TRUE; 173 | } else { 174 | return FALSE; 175 | } 176 | } 177 | } 178 | } 179 | return TRUE; 180 | } 181 | 182 | private function get_current_tag($input_index) 183 | { 184 | $current_tag = ''; 185 | 186 | for ($input_index; $input_index < strlen($this->input); $input_index++) { 187 | if ($this->input[$input_index] == '<') { 188 | continue; 189 | } elseif ($this->input[$input_index] == '>' OR preg_match('/\s/', $this->input[$input_index])) { 190 | return $current_tag; 191 | } else { 192 | $current_tag .= $this->input[$input_index]; 193 | } 194 | } 195 | 196 | return $current_tag; 197 | } 198 | 199 | private function is_closed_tag() 200 | { 201 | $closed_tags = array( 202 | 'meta', 'link', 'img', 'hr', 'br', 'input', 203 | ); 204 | 205 | $current_tag = ''; 206 | 207 | for ($input_index = $this->input_index; $input_index < strlen($this->input); $input_index++) { 208 | if ($this->input[$input_index] == '<') { 209 | continue; 210 | } elseif (preg_match('/\s/', $this->input[$input_index])) { 211 | break; 212 | } else { 213 | $current_tag .= $this->input[$input_index]; 214 | } 215 | } 216 | 217 | if (in_array($current_tag, $closed_tags)) { 218 | return true; 219 | } else { 220 | return false; 221 | } 222 | } 223 | 224 | private function is_inline_tag() 225 | { 226 | $inline_tags = array( 227 | 'title', 'a', 'span', 'abbr', 'acronym', 'b', 'basefont', 'bdo', 'big', 'cite', 'code', 'dfn', 'em', 'font', 'i', 'kbd', 'q', 's', 'samp', 'small', 'strike', 'strong', 'sub', 'sup', 'textarea', 'tt', 'u', 'var', 'del', 'pre', 228 | ); 229 | 230 | $current_tag = ''; 231 | 232 | for ($input_index = $this->input_index; $input_index < strlen($this->input); $input_index++) { 233 | if ($this->input[$input_index] == '<' OR $this->input[$input_index] == '/') { 234 | continue; 235 | } elseif (preg_match('/\s/', $this->input[$input_index]) OR $this->input[$input_index] == '>') { 236 | break; 237 | } else { 238 | $current_tag .= $this->input[$input_index]; 239 | } 240 | } 241 | 242 | if (in_array($current_tag, $inline_tags)) { 243 | return true; 244 | } else { 245 | return false; 246 | } 247 | } 248 | } 249 | --------------------------------------------------------------------------------