├── .gitignore ├── HtmlTokenizer.zig ├── Layout.md ├── README.md ├── Refcounted.zig ├── alext.zig ├── build.zig ├── build.zig.zon ├── dom.zig ├── font ├── schrift.zig └── times-new-roman.ttf ├── html-css-renderer.template.html ├── htmlid.zig ├── imagerenderer.zig ├── layout.zig ├── lint.zig ├── make-renderer-webpage.zig ├── render.zig ├── revit.zig ├── test ├── hello.html └── svg.html ├── testrunner.zig ├── wasmrenderer.zig └── x11renderer.zig /.gitignore: -------------------------------------------------------------------------------- 1 | .zig-cache/ 2 | zig-out/ 3 | /dep/ 4 | /htmlidmaps.zig 5 | -------------------------------------------------------------------------------- /HtmlTokenizer.zig: -------------------------------------------------------------------------------- 1 | /// An html5 tokenizer. 2 | /// Implements the state machine described here: 3 | /// https://html.spec.whatwg.org/multipage/parsing.html#tokenization 4 | /// This tokenizer does not perform any processing/allocation, it simply 5 | /// splits the input text into higher-level tokens. 6 | const HtmlTokenizer = @This(); 7 | 8 | const std = @import("std"); 9 | 10 | start: [*]const u8, 11 | limit: [*]const u8, 12 | ptr: [*]const u8, 13 | state: State = .data, 14 | deferred_token: ?Token = null, 15 | current_input_character: struct { 16 | len: u3, 17 | val: u21, 18 | } = undefined, 19 | 20 | const DOCTYPE = "DOCTYPE"; 21 | const form_feed = 0xc; 22 | 23 | pub fn init(slice: []const u8) HtmlTokenizer { 24 | return .{ 25 | .start = slice.ptr, 26 | .limit = slice.ptr + slice.len, 27 | .ptr = slice.ptr, 28 | }; 29 | } 30 | 31 | pub const Span = struct { 32 | start: usize, 33 | limit: usize, 34 | pub fn slice(self: Span, text: []const u8) []const u8 { 35 | return text[self.start..self.limit]; 36 | } 37 | }; 38 | 39 | pub const Token = union(enum) { 40 | doctype: Doctype, 41 | start_tag: Span, 42 | end_tag: Span, 43 | start_tag_self_closed: usize, 44 | attr: struct { 45 | // NOTE: process the name_raw by replacing 46 | // - upper-case ascii alpha with lower case (add 0x20) 47 | // - 0 with U+FFFD 48 | name_raw: Span, 49 | // NOTE: process value...somehow... 50 | value_raw: ?Span, 51 | }, 52 | comment: Span, 53 | // TODO: maybe combine multiple utf8-encoded chars in a single string 54 | char: Span, 55 | parse_error: enum { 56 | unexpected_null_character, 57 | invalid_first_character_of_tag_name, 58 | incorrectly_opened_comment, 59 | missing_end_tag_name, 60 | eof_before_tag_name, 61 | eof_in_doctype, 62 | eof_in_tag, 63 | eof_in_comment, 64 | missing_whitespace_before_doctype_name, 65 | unexpected_character_in_attribute_name, 66 | missing_attribute_value, 67 | unexpected_solidus_in_tag, 68 | abrupt_closing_of_empty_comment, 69 | }, 70 | 71 | pub const Doctype = struct { 72 | // NOTE: process name_raw by replacing 73 | // - upper-case ascii alpha with lower case (add 0x20) 74 | // - 0 with U+FFFD 75 | name_raw: ?Span, 76 | force_quirks: bool, 77 | //public_id: usize, 78 | //system_id: usize, 79 | }; 80 | 81 | pub fn start(self: Token) ?usize { 82 | return switch (self) { 83 | .start_tag => |t| t.start, // todo: subtract 1 for '<'? 84 | .end_tag => |t| t.start, // todo: subtract 2 for ''? 85 | .start_tag_self_closed => |s| s, 86 | .char => |c| c.start, 87 | else => null, 88 | }; 89 | } 90 | }; 91 | 92 | const State = union(enum) { 93 | data: void, 94 | tag_open: usize, 95 | end_tag_open: usize, 96 | character_reference: void, 97 | markup_declaration_open: void, 98 | doctype: void, 99 | before_doctype_name: void, 100 | doctype_name: struct { 101 | name_offset: usize, 102 | }, 103 | after_doctype_name: struct { 104 | name_offset: usize, 105 | name_limit: usize, 106 | }, 107 | comment_start: usize, 108 | comment_start_dash: void, 109 | comment: usize, 110 | comment_end_dash: Span, 111 | comment_end: Span, 112 | tag_name: struct { 113 | is_end: bool, 114 | start: usize, 115 | }, 116 | self_closing_start_tag: void, 117 | before_attribute_name: void, 118 | attribute_name: usize, 119 | after_attribute_name: void, 120 | before_attribute_value: Span, 121 | attribute_value: struct { 122 | quote: enum { double, single }, 123 | name_raw: Span, 124 | start: usize, 125 | }, 126 | attribute_value_unquoted: struct { 127 | name_raw: Span, 128 | }, 129 | after_attribute_value: struct { 130 | }, 131 | bogus_comment: void, 132 | eof: void, 133 | }; 134 | 135 | fn consume(self: *HtmlTokenizer) !void { 136 | if (self.ptr == self.limit) { 137 | self.current_input_character = .{ .len = 0, .val = undefined }; 138 | return; 139 | } 140 | const len = try std.unicode.utf8CodepointSequenceLength(self.ptr[0]); 141 | if (@intFromPtr(self.ptr) + len > @intFromPtr(self.limit)) 142 | return error.Utf8ExpectedContinuation; 143 | self.current_input_character = .{ .len = len, .val = try std.unicode.utf8Decode(self.ptr[0 .. len]) }; 144 | self.ptr += len; 145 | } 146 | 147 | // why isn't this pub in std.unicode? 148 | const Utf8DecodeError = error { 149 | Utf8ExpectedContinuation, 150 | Utf8OverlongEncoding, 151 | Utf8EncodesSurrogateHalf, 152 | Utf8CodepointTooLarge, 153 | }; 154 | 155 | pub fn next(self: *HtmlTokenizer) Utf8DecodeError!?Token { 156 | //std.log.info("next: offset={}", .{@intFromPtr(self.ptr) - @intFromPtr(self.start)}); 157 | if (self.deferred_token) |t| { 158 | const token_copy = t; 159 | self.deferred_token = null; 160 | return token_copy; 161 | } 162 | const result = (self.next2() catch |err| switch (err) { 163 | // Why does std.unicode have both these errors? 164 | error.CodepointTooLarge => return error.Utf8CodepointTooLarge, 165 | error.NotImpl => @panic("not implemented"), 166 | else => |e| return e, 167 | }) orelse return null; 168 | if (result.deferred) |d| { 169 | self.deferred_token = d; 170 | } 171 | return result.token; 172 | } 173 | 174 | fn next2(self: *HtmlTokenizer) !?struct { 175 | token: Token, 176 | deferred: ?Token = null, 177 | } { 178 | while (true) { 179 | switch (self.state) { 180 | .data => { 181 | try self.consume(); 182 | if (self.current_input_character.len == 0) return null; 183 | switch (self.current_input_character.val) { 184 | //'&' => {} we don't process character references in the tokenizer 185 | '<' => self.state = .{ 186 | .tag_open = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 187 | }, 188 | 0 => { 189 | const limit = @intFromPtr(self.ptr) - @intFromPtr(self.start); 190 | return .{ 191 | .token = .{ .parse_error = .unexpected_null_character }, 192 | .deferred = .{ .char = .{ 193 | .start = limit - self.current_input_character.len, 194 | .limit = limit, 195 | }}, 196 | }; 197 | }, 198 | else => { 199 | const limit = @intFromPtr(self.ptr) - @intFromPtr(self.start); 200 | return .{ .token = .{ .char = .{ 201 | .start = limit - self.current_input_character.len, 202 | .limit = limit, 203 | }}}; 204 | }, 205 | } 206 | }, 207 | .tag_open => |tag_open_start| { 208 | try self.consume(); 209 | if (self.current_input_character.len == 0) { 210 | self.state = .eof; 211 | const limit = @intFromPtr(self.ptr) - @intFromPtr(self.start); 212 | return .{ 213 | .token = .{ .parse_error = .eof_before_tag_name }, 214 | .deferred = .{ .char = .{ 215 | .start = tag_open_start, 216 | .limit = limit, 217 | } }, 218 | }; 219 | } 220 | switch (self.current_input_character.val) { 221 | '!' => self.state = .markup_declaration_open, 222 | '/' => self.state = .{ .end_tag_open = tag_open_start }, 223 | '?' => return error.NotImpl, 224 | else => |c| if (isAsciiAlpha(c)) { 225 | self.state = .{ 226 | .tag_name = .{ 227 | .is_end = false, 228 | .start = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 229 | }, 230 | }; 231 | } else { 232 | self.state = .data; 233 | self.ptr -= self.current_input_character.len; 234 | return .{ 235 | .token = .{ .parse_error = .invalid_first_character_of_tag_name }, 236 | .deferred = .{ .char = .{ 237 | .start = tag_open_start, 238 | // TODO: hopefully the '<' was only 1 byte! 239 | .limit = tag_open_start + 1, 240 | } }, 241 | }; 242 | }, 243 | } 244 | }, 245 | .end_tag_open => |tag_open_start| { 246 | const save_previous_char_len = self.current_input_character.len; 247 | try self.consume(); 248 | if (self.current_input_character.len == 0) { 249 | // NOTE: this is implemented differently from the spec so we only need to 250 | // support 1 deferred token, but, should result in the same tokens. 251 | self.state = .data; 252 | self.ptr -= save_previous_char_len; 253 | return .{ 254 | .token = .{ .parse_error = .eof_before_tag_name }, 255 | .deferred = .{ .char = .{ 256 | .start = tag_open_start, 257 | // TODO: hopefully the '<' was only 1 byte! 258 | .limit = tag_open_start + 1, 259 | } }, 260 | }; 261 | } 262 | switch (self.current_input_character.val) { 263 | '>' => { 264 | self.state = .data; 265 | return .{ .token = .{ .parse_error = .missing_end_tag_name } }; 266 | }, 267 | else => |c| if (isAsciiAlpha(c)) { 268 | self.state = .{ 269 | .tag_name = .{ 270 | .is_end = true, 271 | .start = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 272 | }, 273 | }; 274 | } else { 275 | self.state = .bogus_comment; 276 | return .{ .token = .{ .parse_error = .invalid_first_character_of_tag_name } }; 277 | }, 278 | } 279 | }, 280 | .tag_name => |tag_state| { 281 | try self.consume(); 282 | if (self.current_input_character.len == 0) { 283 | self.state = .eof; 284 | return .{ .token = .{ .parse_error = .eof_in_tag } }; 285 | } 286 | switch (self.current_input_character.val) { 287 | '\t', '\n', form_feed, ' ' => { 288 | self.state = .before_attribute_name; 289 | const name_span = Span{ 290 | .start = tag_state.start, 291 | .limit = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 292 | }; 293 | return 294 | if (tag_state.is_end) .{ .token = .{ .end_tag = name_span } } 295 | else .{ .token = .{ .start_tag = name_span } }; 296 | }, 297 | '/' => self.state = .self_closing_start_tag, 298 | '>' => { 299 | self.state = .data; 300 | const name_span = Span{ 301 | .start = tag_state.start, 302 | .limit = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 303 | }; 304 | return 305 | if (tag_state.is_end) .{ .token = .{ .end_tag = name_span } } 306 | else .{ .token = .{ .start_tag = name_span } }; 307 | }, 308 | 0 => return .{ .token = .{ .parse_error = .unexpected_null_character } }, 309 | else => {}, 310 | } 311 | }, 312 | .self_closing_start_tag => { 313 | try self.consume(); 314 | if (self.current_input_character.len == 0) { 315 | self.state = .eof; 316 | return .{ .token = .{ .parse_error = .eof_in_tag } }; 317 | } else switch (self.current_input_character.val) { 318 | '>' => { 319 | self.state = .data; 320 | return .{ .token = .{ 321 | // TODO: can we assume the start will be 2 bytes back? 322 | .start_tag_self_closed = @intFromPtr(self.ptr) - 2 - @intFromPtr(self.start), 323 | }}; 324 | }, 325 | else => { 326 | self.state = .before_attribute_name; 327 | self.ptr -= self.current_input_character.len; 328 | return .{ .token = .{ .parse_error = .unexpected_solidus_in_tag } }; 329 | }, 330 | } 331 | }, 332 | .before_attribute_name => { 333 | try self.consume(); 334 | if (self.current_input_character.len == 0) { 335 | self.state = .after_attribute_name; 336 | } else switch (self.current_input_character.val) { 337 | '\t', '\n', form_feed, ' ' => {}, 338 | '/', '>' => { 339 | self.ptr -= self.current_input_character.len; 340 | self.state = .after_attribute_name; 341 | }, 342 | '=' => { 343 | // unexpected_equals_sign_before_attribute_name 344 | return error.NotImpl; 345 | }, 346 | else => self.state = .{ 347 | .attribute_name = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 348 | }, 349 | } 350 | }, 351 | .attribute_name => |start| { 352 | try self.consume(); 353 | if (self.current_input_character.len == 0) { 354 | self.state = .after_attribute_name; 355 | } else switch (self.current_input_character.val) { 356 | '\t', '\n', form_feed, ' ', '/', '>' => { 357 | self.ptr -= self.current_input_character.len; 358 | // TODO: pass something to after_attribute_name like start/limit? 359 | // .start = start, 360 | // .limit = @intFromPtr(self.ptr) - @intFromPtr(self.start), 361 | self.state = .after_attribute_name; 362 | }, 363 | '=' => self.state = .{ .before_attribute_value = .{ 364 | .start = start, 365 | .limit = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 366 | }}, 367 | '"', '\'', '<' => return .{ .token = .{ .parse_error = .unexpected_character_in_attribute_name } }, 368 | else => {}, 369 | } 370 | }, 371 | .after_attribute_name => return error.NotImpl, 372 | .before_attribute_value => |name_span| { 373 | try self.consume(); 374 | if (self.current_input_character.len == 0) { 375 | self.state = .{ .attribute_value_unquoted = .{ .name_raw = name_span } }; 376 | } else switch (self.current_input_character.val) { 377 | '\t', '\n', form_feed, ' ' => {}, 378 | '"' => self.state = .{ .attribute_value = .{ 379 | .name_raw = name_span, 380 | .quote = .double, 381 | .start = @intFromPtr(self.ptr) - @intFromPtr(self.start), 382 | } }, 383 | '\'' => self.state = .{ .attribute_value = .{ 384 | .name_raw = name_span, 385 | .quote = .single, 386 | .start = @intFromPtr(self.ptr) - @intFromPtr(self.start), 387 | } }, 388 | '>' => { 389 | self.state = .data; 390 | return .{ 391 | .token = .{ .parse_error = .missing_attribute_value }, 392 | // TODO: return an attribute name without a value? 393 | //.deferred = .{ .attribute = .{ .name = ..., .value = null } }, 394 | }; 395 | }, 396 | else => { 397 | self.ptr -= self.current_input_character.len; 398 | self.state = .{ .attribute_value_unquoted = .{ 399 | .name_raw = name_span, 400 | }}; 401 | }, 402 | } 403 | }, 404 | .attribute_value => |attr_state| { 405 | try self.consume(); 406 | if (self.current_input_character.len == 0) { 407 | self.state = .eof; 408 | // NOTE: spec doesn't say to emit the current tag? 409 | return .{ .token = .{ .parse_error = .eof_in_tag } }; 410 | } else switch (self.current_input_character.val) { 411 | '"' => switch (attr_state.quote) { 412 | .double => { 413 | self.state = .after_attribute_value; 414 | return .{ .token = .{ .attr = .{ 415 | .name_raw = attr_state.name_raw, 416 | .value_raw = .{ 417 | .start = attr_state.start, 418 | .limit = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 419 | }, 420 | }}}; 421 | }, 422 | .single => return error.NotImpl, 423 | }, 424 | '\'' => switch (attr_state.quote) { 425 | .double => return error.NotImpl, 426 | .single => return error.NotImpl, 427 | }, 428 | // TODO: the spec says the tokenizer should handle "character references" here, but, 429 | // that would require allocation, so, we should probably handle that elsewhere 430 | //'&' => return error.NotImpl, 431 | 0 => return .{ .token = .{ .parse_error = .unexpected_null_character } }, 432 | else => {}, 433 | } 434 | }, 435 | .attribute_value_unquoted => { 436 | return error.NotImpl; 437 | }, 438 | .after_attribute_value => { 439 | try self.consume(); 440 | if (self.current_input_character.len == 0) { 441 | self.state = .eof; 442 | // NOTE: spec doesn't say to emit the current tag? 443 | return .{ .token = .{ .parse_error = .eof_in_tag } }; 444 | } else switch (self.current_input_character.val) { 445 | '\t', '\n', form_feed, ' ' => self.state = .before_attribute_name, 446 | '>' => { 447 | self.state = .data; 448 | }, 449 | '/' => self.state = .self_closing_start_tag, 450 | else => |c| std.debug.panic("todo c={}", .{c}), 451 | } 452 | }, 453 | .markup_declaration_open => { 454 | if (self.nextCharsAre("--")) { 455 | self.ptr += 2; 456 | self.state = .{ .comment_start = @intFromPtr(self.ptr) - @intFromPtr(self.start) }; 457 | } else if (self.nextCharsAre(DOCTYPE)) { 458 | self.ptr += DOCTYPE.len; 459 | self.state = .doctype; 460 | } else if (self.nextCharsAre("[CDATA[")) { 461 | return error.NotImpl; 462 | } else { 463 | self.state = .bogus_comment; 464 | return .{ .token = .{ .parse_error = .incorrectly_opened_comment } }; 465 | } 466 | }, 467 | .character_reference => { 468 | return error.NotImpl; 469 | }, 470 | .doctype => { 471 | try self.consume(); 472 | if (self.current_input_character.len == 0) { 473 | self.state = .eof; 474 | return .{ 475 | .token = .{ .parse_error = .eof_in_doctype }, 476 | .deferred = .{ .doctype = .{ 477 | .force_quirks = true, 478 | .name_raw = null, 479 | }}, 480 | }; 481 | } 482 | switch (self.current_input_character.val) { 483 | '\t', '\n', form_feed, ' ' => self.state = .before_doctype_name, 484 | '>' => { 485 | self.ptr -= self.current_input_character.len; 486 | self.state = .before_doctype_name; 487 | }, 488 | else => { 489 | self.ptr -= self.current_input_character.len; 490 | self.state = .before_doctype_name; 491 | return .{ .token = .{ .parse_error = .missing_whitespace_before_doctype_name } }; 492 | }, 493 | } 494 | }, 495 | .before_doctype_name => { 496 | try self.consume(); 497 | if (self.current_input_character.len == 0) { 498 | self.state = .eof; 499 | return .{ 500 | .token = .{ .parse_error = .eof_in_doctype }, 501 | .deferred = .{ .doctype = .{ 502 | .force_quirks = true, 503 | .name_raw = null, 504 | }} 505 | }; 506 | } 507 | switch (self.current_input_character.val) { 508 | '\t', '\n', form_feed, ' ' => {}, 509 | 0 => { 510 | self.state = .{ .doctype_name = .{ 511 | .name_offset = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 512 | }}; 513 | return .{ .token = .{ .parse_error = .unexpected_null_character } }; 514 | }, 515 | '>' => { 516 | self.ptr -= self.current_input_character.len; 517 | self.state = .data; 518 | return .{ .token = .{ .doctype = .{ 519 | .force_quirks = true, 520 | .name_raw = null, 521 | }}}; 522 | }, 523 | else => { 524 | // NOTE: same thing for isAsciiAlphaUpper since we post-process the name 525 | self.state = .{ .doctype_name = .{ 526 | .name_offset = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 527 | }}; 528 | } 529 | } 530 | }, 531 | .doctype_name => |doctype_state| { 532 | try self.consume(); 533 | if (self.current_input_character.len == 0) { 534 | self.state = .eof; 535 | return .{ 536 | .token = .{ .parse_error = .eof_in_doctype }, 537 | .deferred = .{ .doctype = .{ 538 | .force_quirks = true, 539 | .name_raw = null, 540 | }}, 541 | }; 542 | } 543 | switch (self.current_input_character.val) { 544 | '\t', '\n', form_feed, ' ' => { 545 | self.state = .{ .after_doctype_name = .{ 546 | .name_offset = doctype_state.name_offset, 547 | .name_limit = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 548 | }}; 549 | }, 550 | '>' => { 551 | self.state = .data; 552 | return .{ .token = .{ .doctype = .{ 553 | .name_raw = .{ 554 | .start = doctype_state.name_offset, 555 | .limit = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 556 | }, 557 | .force_quirks = false, 558 | }}}; 559 | }, 560 | 0 => return .{ .token = .{ .parse_error = .unexpected_null_character } }, 561 | else => {}, 562 | } 563 | }, 564 | .after_doctype_name => { 565 | return error.NotImpl; 566 | }, 567 | .comment_start => |comment_start| { 568 | try self.consume(); 569 | if (self.current_input_character.len == 0) { 570 | self.ptr -= self.current_input_character.len; 571 | self.state = .{ .comment = comment_start }; 572 | } else switch (self.current_input_character.val) { 573 | '-' => self.state = .comment_start_dash, 574 | '>' => { 575 | self.state = .data; 576 | return .{ .token = .{ .parse_error = .abrupt_closing_of_empty_comment } }; 577 | }, 578 | else => { 579 | self.ptr -= self.current_input_character.len; 580 | self.state = .{ .comment = comment_start }; 581 | }, 582 | } 583 | }, 584 | .comment_start_dash => { 585 | return error.NotImpl; 586 | }, 587 | .comment => |comment_start| { 588 | try self.consume(); 589 | if (self.current_input_character.len == 0) { 590 | self.state = .eof; 591 | return .{ 592 | .token = .{ .parse_error = .eof_in_comment }, 593 | .deferred = .{ .comment = .{ 594 | .start = comment_start, 595 | .limit = @intFromPtr(self.ptr) - @intFromPtr(self.start), 596 | } }, 597 | }; 598 | } 599 | switch (self.current_input_character.val) { 600 | '<' => return error.NotImpl, 601 | '-' => self.state = .{ .comment_end_dash = .{ 602 | .start = comment_start, 603 | .limit = @intFromPtr(self.ptr) - self.current_input_character.len - @intFromPtr(self.start), 604 | }}, 605 | 0 => return error.NotImpl, 606 | else => {}, 607 | } 608 | }, 609 | .comment_end_dash => |comment_span| { 610 | try self.consume(); 611 | if (self.current_input_character.len == 0) { 612 | self.state = .eof; 613 | return .{ 614 | .token = .{ .parse_error = .eof_in_comment }, 615 | .deferred = .{ .comment = comment_span }, 616 | }; 617 | } 618 | switch (self.current_input_character.val) { 619 | '-' => self.state = .{ .comment_end = comment_span }, 620 | else => { 621 | self.ptr -= self.current_input_character.len; 622 | self.state = .{ .comment = comment_span.start }; 623 | }, 624 | } 625 | }, 626 | .comment_end => |comment_span| { 627 | try self.consume(); 628 | if (self.current_input_character.len == 0) { 629 | self.state = .eof; 630 | return .{ 631 | .token = .{ .parse_error = .eof_in_comment }, 632 | .deferred = .{ .comment = comment_span }, 633 | }; 634 | } 635 | switch (self.current_input_character.val) { 636 | '>' => { 637 | self.state = .data; 638 | return .{ .token = .{ .comment = comment_span } }; 639 | }, 640 | '!' => return error.NotImpl, 641 | '-' => return error.NotImpl, 642 | else => return error.NotImpl, 643 | } 644 | }, 645 | .bogus_comment => { 646 | return error.NotImpl; 647 | }, 648 | .eof => return null, 649 | } 650 | } 651 | } 652 | 653 | fn nextCharsAre(self: HtmlTokenizer, s: []const u8) bool { 654 | return (@intFromPtr(self.ptr) + s.len <= @intFromPtr(self.limit)) and 655 | std.mem.eql(u8, self.ptr[0 .. s.len], s); 656 | } 657 | 658 | fn isAsciiAlphaLower(c: u21) bool { 659 | return (c >= 'a' and c <= 'z'); 660 | } 661 | fn isAsciiAlphaUpper(c: u21) bool { 662 | return (c >= 'A' and c <= 'Z'); 663 | } 664 | fn isAsciiAlpha(c: u21) bool { 665 | return isAsciiAlphaLower(c) or isAsciiAlphaUpper(c); 666 | } 667 | -------------------------------------------------------------------------------- /Layout.md: -------------------------------------------------------------------------------- 1 | # Layout 2 | 3 | My notes on HTML Layout. 4 | 5 | ## Differences between Horizontal and Vertical 6 | 7 | Who determines the size of things in an HTML/CSS layout? 8 | Here's my understanding of the defaults so far: 9 | 10 | ```css 11 | /*[viewport]*/ { 12 | width: [readonly-set-for-us]; 13 | height: [readonly-set-for-us]; 14 | } 15 | html { 16 | width: auto; 17 | height: max-content; /* is this right, maybe fit or min content */ 18 | } 19 | body { 20 | width: auto; 21 | height: max-content; /* is this right, maybe fit or min content */ 22 | margin: 8; // seems to be the default in chrome at least 23 | } 24 | ``` 25 | 26 | My understanding is that for `display: block` elements, `width: auto` means `width: 100%`. 27 | Note that percentage sizes are a percentage of the size of the parent container. 28 | This means the size comes from the parent container rather than the content. 29 | 30 | From the defaults above, the top-level elements get their width from the viewport and their 31 | height from their content, meaning that HTML behaves differently in the X/Y direction by default. 32 | 33 | > NOTE: for `display: inline-block` elements, `width: auto` means `max-content` I think? 34 | you can see this by setting display to `inline-block` on the body and see that its 35 | width will grow to fit its content like it normally does in the y direction. 36 | 37 | Also note that `display: flex` seems to behave like `display: block` in this respect, namely, 38 | that by default its width is `100%` (even for elements who default to `display: inline-block` like `span`) 39 | and its height is `max-content` (I think?). 40 | 41 | NOTE: fit-content is a value between min/max content determined by this conditional: 42 | ``` 43 | if available >= max-content 44 | fit-content = max-content 45 | if available >= min-content 46 | fit-content = available 47 | else 48 | fit-content = min-content 49 | ``` 50 | 51 | ## Flexbox 52 | 53 | There's a "main axis" and "cross axis". 54 | Set `display: flex` to make an element a "flex container". 55 | All its "direct children" become "flex items". 56 | 57 | ### Flex Container Properties 58 | 59 | #### flex-direction: direction to place items 60 | 61 | - row: left to right 62 | - row-reverse: right to left 63 | - column: top to bottom 64 | - coloumn-reverse: bottom to top 65 | 66 | #### justify-content: where to put the "extra space" on the main axis 67 | 68 | - flex-start (default): items packed to start so all "extra space" at the end 69 | - flex-end: items packed to end so all "extra space" at the start 70 | - center: "extra space" evenly split between start/end 71 | - space-between: "extra space" evenly split between all items 72 | - space-evenly: "exta space" evently split between and around all items 73 | - space-around (dumb): like space-evenly but start/end space is halfed 74 | 75 | #### align-items: how to align (or stretch) items on the cross axis 76 | 77 | - flex-start 78 | - flex-end 79 | - center 80 | - baseline: all items aligned so their "baselines" align 81 | - stretch 82 | 83 | 84 | By default flexbox only has a single main axis, the following properties apply to flex containers 85 | that allow multiple lines: 86 | 87 | #### flex-wrap 88 | 89 | - nowrap (default): keep all items on the same main axis, may cause overflow 90 | - wrap: allow multiple "main axis" 91 | - wrap-reverse: new axis are added in the "opposite cross direction" of a normal wrap 92 | for example, for flex-direction "row", new wrapped lines would go 93 | on top of the previous line instead of below. 94 | 95 | ### align-content: where to put the "extra space" on the cross axis 96 | 97 | Note that this is only applicable when wrapping multiple lines. 98 | 99 | Same values as "justify-content" except it doesn't have "space-evenly" 100 | and it adds "stretch", which is the default. 101 | 102 | #### flex-flow 103 | 104 | Shorthand for `flex-direction` and `flex-wrap`. 105 | 106 | ### Flex Item Properties 107 | 108 | #### order: set the item's "order group" 109 | 110 | All items in a lower "order group" come first. 111 | The default "order group" is 0. 112 | Order can be negative. 113 | 114 | #### align-self: how to align (or strech) this item on the cross axis 115 | 116 | Same as "align-items" on the container except it affects this one item. 117 | 118 | 119 | ### Flex Layout Algorithm 120 | 121 | See if I can come up with a set of steps that can be done independently of each other to layout a flexbox. 122 | 123 | - Step ?: if there is "extra space" on the main axis, position items based on justify-content 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Html Css Renderer 2 | 3 | An HTML/CSS Renderer. 4 | -------------------------------------------------------------------------------- /Refcounted.zig: -------------------------------------------------------------------------------- 1 | const Refcounted = @This(); 2 | 3 | const std = @import("std"); 4 | const arc = std.log.scoped(.arc); 5 | 6 | const Metadata = struct { 7 | refcount: usize, 8 | }; 9 | const alloc_prefix_len = std.mem.alignForward(usize, @sizeOf(Metadata), @alignOf(Metadata)); 10 | 11 | data_ptr: [*]u8, 12 | pub fn alloc(allocator: std.mem.Allocator, len: usize) error{OutOfMemory}!Refcounted { 13 | const alloc_len = Refcounted.alloc_prefix_len + len; 14 | const full = try allocator.alignedAlloc(u8, @alignOf(Refcounted.Metadata), alloc_len); 15 | const buf = Refcounted{ .data_ptr = full.ptr + Refcounted.alloc_prefix_len }; 16 | buf.getMetadataRef().refcount = 1; 17 | arc.debug( 18 | "alloc {} (full={}) returning data_ptr 0x{x}", 19 | .{len, alloc_len, @intFromPtr(buf.data_ptr)}, 20 | ); 21 | return buf; 22 | } 23 | pub fn getMetadataRef(self: Refcounted) *Metadata { 24 | const addr = @intFromPtr(self.data_ptr); 25 | return @ptrFromInt(addr - alloc_prefix_len); 26 | } 27 | pub fn addRef(self: Refcounted) void { 28 | // TODO: what is AtomicOrder supposed to be? 29 | const old_count = @atomicRmw(usize, &self.getMetadataRef().refcount, .Add, 1, .seq_cst); 30 | arc.debug("addRef data_ptr=0x{x} new_count={}", .{@intFromPtr(self.data_ptr), old_count + 1}); 31 | } 32 | pub fn unref(self: Refcounted, allocator: std.mem.Allocator, len: usize) void { 33 | const base_addr = @intFromPtr(self.data_ptr) - alloc_prefix_len; 34 | // TODO: what is AtomicOrder supposed to be? 35 | const old_count = @atomicRmw(usize, &@as(*Metadata, @ptrFromInt(base_addr)).refcount, .Sub, 1, .seq_cst); 36 | std.debug.assert(old_count != 0); 37 | if (old_count == 1) { 38 | arc.debug("free full_len={} (data_ptr=0x{x})", .{alloc_prefix_len + len, @intFromPtr(self.data_ptr)}); 39 | allocator.free(@as([*]u8, @ptrFromInt(base_addr))[0 .. alloc_prefix_len + len]); 40 | } else { 41 | arc.debug("unref full_len={} (data_ptr=0x{x}) new_count={}", .{ 42 | alloc_prefix_len + len, 43 | @intFromPtr(self.data_ptr), 44 | old_count - 1, 45 | }); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /alext.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub const unmanaged = struct { 4 | pub fn finalize(comptime T: type, self: *std.ArrayListUnmanaged(T), allocator: std.mem.Allocator) void { 5 | const old_memory = self.allocatedSlice(); 6 | if (allocator.resize(old_memory, self.items.len)) { 7 | self.capacity = self.items.len; 8 | } 9 | } 10 | }; 11 | -------------------------------------------------------------------------------- /build.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const htmlid = @import("htmlid.zig"); 3 | 4 | pub fn build(b: *std.Build) !void { 5 | const target = b.standardTargetOptions(.{}); 6 | const optimize = b.standardOptimizeOption(.{}); 7 | 8 | const id_maps_src = b.pathFromRoot("htmlidmaps.zig"); 9 | const gen_id_maps = b.addWriteFile(id_maps_src, allocIdMapSource(b.allocator)); 10 | 11 | { 12 | const exe = b.addExecutable(.{ 13 | .name = "lint", 14 | .root_source_file = b.path("lint.zig"), 15 | .target = target, 16 | .optimize = optimize, 17 | }); 18 | exe.step.dependOn(&gen_id_maps.step); 19 | b.installArtifact(exe); 20 | } 21 | 22 | { 23 | const exe = b.addExecutable(.{ 24 | .name = "imagerenderer", 25 | .root_source_file = b.path("imagerenderer.zig"), 26 | .target = target, 27 | .optimize = optimize, 28 | }); 29 | exe.step.dependOn(&gen_id_maps.step); 30 | const install = b.addInstallArtifact(exe, .{}); 31 | b.step("image", "build/install imagerenderer").dependOn(&install.step); 32 | } 33 | 34 | const zigx_dep = b.dependency("zigx", .{}); 35 | const zigx_mod = zigx_dep.module("zigx"); 36 | 37 | { 38 | const exe = b.addExecutable(.{ 39 | .name = "x11renderer", 40 | .root_source_file = b.path("x11renderer.zig"), 41 | .target = target, 42 | .optimize = optimize, 43 | }); 44 | exe.step.dependOn(&gen_id_maps.step); 45 | exe.root_module.addImport("x11", zigx_mod); 46 | const install = b.addInstallArtifact(exe, .{}); 47 | b.step("x11", "build/install the x11renderer").dependOn(&install.step); 48 | } 49 | 50 | { 51 | const exe = b.addExecutable(.{ 52 | .name = "wasmrenderer", 53 | .root_source_file = b.path("wasmrenderer.zig"), 54 | .target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding }), 55 | .optimize = optimize, 56 | }); 57 | exe.entry = .disabled; 58 | //exe.export_table = true; 59 | exe.root_module.export_symbol_names = &.{ 60 | "alloc", 61 | "release", 62 | "onResize", 63 | "loadHtml", 64 | }; 65 | 66 | const make_exe = b.addExecutable(.{ 67 | .name = "make-renderer-webpage", 68 | .root_source_file = b.path("make-renderer-webpage.zig"), 69 | .target = b.graph.host, 70 | }); 71 | const run = b.addRunArtifact(make_exe); 72 | run.addArtifactArg(exe); 73 | run.addFileArg(b.path("html-css-renderer.template.html")); 74 | run.addArg(b.pathJoin(&.{ b.install_path, "html-css-renderer.html" })); 75 | b.step("wasm", "build the wasm-based renderer").dependOn(&run.step); 76 | } 77 | 78 | { 79 | const exe = b.addExecutable(.{ 80 | .name = "testrunner", 81 | .root_source_file = b.path("testrunner.zig"), 82 | .target = target, 83 | .optimize = optimize, 84 | }); 85 | exe.step.dependOn(&gen_id_maps.step); 86 | const install = b.addInstallArtifact(exe, .{}); 87 | const test_step = b.step("test", "run tests"); 88 | test_step.dependOn(&install.step); // make testrunner easily accessible 89 | inline for ([_][]const u8{"hello.html"}) |test_filename| { 90 | const run_step = b.addRunArtifact(exe); 91 | run_step.addFileArg(b.path("test/" ++ test_filename)); 92 | run_step.expectStdOutEqual("Success\n"); 93 | test_step.dependOn(&run_step.step); 94 | } 95 | } 96 | } 97 | 98 | fn allocIdMapSource(allocator: std.mem.Allocator) []const u8 { 99 | var src = std.ArrayList(u8).init(allocator); 100 | defer src.deinit(); 101 | writeIdMapSource(src.writer()) catch unreachable; 102 | return src.toOwnedSlice() catch unreachable; 103 | } 104 | 105 | fn writeIdMapSource(writer: anytype) !void { 106 | try writer.writeAll( 107 | \\const std = @import("std"); 108 | \\const htmlid = @import("htmlid.zig"); 109 | \\ 110 | ); 111 | try writeIdEnum(writer, htmlid.TagId, "tag"); 112 | try writeIdEnum(writer, htmlid.AttrId, "attr"); 113 | } 114 | fn writeIdEnum(writer: anytype, comptime Enum: type, name: []const u8) !void { 115 | try writer.print( 116 | \\ 117 | \\pub const {s} = struct {{ 118 | \\ pub const Enum = {s}; 119 | \\ pub const map = blk: {{ 120 | \\ @setEvalBranchQuota(6000); 121 | \\ break :blk std.ComptimeStringMap(Enum, .{{ 122 | \\ 123 | , .{name, @typeName(Enum)}); 124 | inline for (@typeInfo(Enum).Enum.fields) |field| { 125 | var lower_buf: [field.name.len]u8 = undefined; 126 | for (field.name, 0..) |c, i| { 127 | lower_buf[i] = std.ascii.toLower(c); 128 | } 129 | try writer.print(" .{{ \"{s}\", .{} }},\n", .{lower_buf, std.zig.fmtId(field.name)}); 130 | } 131 | try writer.writeAll( 132 | \\ }); 133 | \\ }; 134 | \\}; 135 | \\ 136 | ); 137 | } 138 | -------------------------------------------------------------------------------- /build.zig.zon: -------------------------------------------------------------------------------- 1 | .{ 2 | .name = "html-css-renderer", 3 | .version = "0.0.0", 4 | .minimum_zig_version = "0.13.0", 5 | .dependencies = .{ 6 | .zigx = .{ 7 | .url = "https://github.com/marler8997/zigx/archive/f09fd6fa5d593c759c0d9d35db4dfb5a150d366a.tar.gz", 8 | .hash = "122026f249798ac9b3ab3561b94d8460aaf1fb3487a5cb7c021387db1136cf08936d", 9 | }, 10 | }, 11 | .paths = .{ 12 | "", 13 | }, 14 | } 15 | -------------------------------------------------------------------------------- /dom.zig: -------------------------------------------------------------------------------- 1 | // see https://dom.spec.whatwg.org/#nodes 2 | const std = @import("std"); 3 | 4 | const HtmlTokenizer = @import("HtmlTokenizer.zig"); 5 | const Token = HtmlTokenizer.Token; 6 | 7 | const htmlid = @import("htmlid.zig"); 8 | const TagId = htmlid.TagId; 9 | const AttrId = htmlid.AttrId; 10 | const SvgTagId = htmlid.SvgTagId; 11 | const SvgAttrId = htmlid.SvgAttrId; 12 | 13 | const htmlidmaps = @import("htmlidmaps.zig"); 14 | 15 | pub const EventTarget = struct { 16 | 17 | // ctor 18 | // addEventListener 19 | // removeEventListener 20 | // dispatchEvent 21 | 22 | }; 23 | 24 | pub const EventListener = struct { 25 | // handleEvent 26 | }; 27 | 28 | pub const EventListenerOptions = struct { 29 | capture: bool = false, 30 | }; 31 | 32 | pub const AddEventListenerOptions = struct { 33 | base: EventListenerOptions, 34 | passive: bool, 35 | once: bool = false, 36 | //signal: AbortSignal, 37 | }; 38 | 39 | pub const DOMString = struct { 40 | }; 41 | pub const USVString = struct { 42 | }; 43 | 44 | pub const GetRootNodeOptions = struct { 45 | composed: bool = false, 46 | }; 47 | 48 | pub const NodeInterface = struct { 49 | pub const ELEMENT_NODE = 1; 50 | pub const ATTRIBUTE_NODE = 2; 51 | pub const TEXT_NODE = 3; 52 | pub const CDATA_SECTION_NODE = 4; 53 | pub const ENTITY_REFERENCE_NODE = 5; // legacy 54 | pub const ENTITY_NODE = 6; // legacy 55 | pub const PROCESSING_INSTRUCTION_NODE = 7; 56 | pub const COMMENT_NODE = 8; 57 | pub const DOCUMENT_NODE = 9; 58 | pub const DOCUMENT_TYPE_NODE = 10; 59 | pub const DOCUMENT_FRAGMENT_NODE = 11; 60 | pub const NOTATION_NODE = 12; // legacy 61 | 62 | // eventTarget: EventTarget, 63 | // nodeType: u16, 64 | // nodeName: DOMString, 65 | // baseURI: USVString, 66 | // isConnected: bool, 67 | // ownerDocument: ?Document, 68 | // parentNode: ?Node, 69 | // parentElement: ?Element, 70 | 71 | //pub fn nodeType(node: Node) u16 { ... } 72 | 73 | // fn getRootNode(options: GetRootNodeOptions) Node { 74 | // _ = options; 75 | // @panic("todo"); 76 | // } 77 | 78 | }; 79 | 80 | pub const Document = struct { 81 | node: Node, 82 | }; 83 | 84 | pub const Element = struct { 85 | node: Node, 86 | }; 87 | 88 | pub fn defaultDisplayIsBlock(id: TagId) bool { 89 | return switch (id) { 90 | .address, .article, .aside, .blockquote, .canvas, .dd, .div, 91 | .dl, .dt, .fieldset, .figcaption, .figure, .footer, .form, 92 | .h1, .h2, .h3, .h4, .h5, .h6, .header, .hr, .li, .main, .nav, 93 | .noscript, .ol, .p, .pre, .section, .table, .tfoot, .ul, .video, 94 | => true, 95 | else => false, 96 | }; 97 | } 98 | 99 | /// An element that can never have content 100 | pub fn isVoidElement(id: TagId) bool { 101 | return switch (id) { 102 | .area, .base, .br, .col, .command, .embed, .hr, .img, .input, 103 | .keygen, .link, .meta, .param, .source, .track, .wbr, 104 | => true, 105 | else => false, 106 | }; 107 | } 108 | 109 | fn lookupIdIgnoreCase(comptime map_namespace: type, name: []const u8) ?map_namespace.Enum { 110 | // need enough room for the max tag name 111 | var buf: [20]u8 = undefined; 112 | if (name.len > buf.len) return null; 113 | for (name, 0..) |c, i| { 114 | buf[i] = std.ascii.toLower(c); 115 | } 116 | return map_namespace.map.get(buf[0 .. name.len]); 117 | } 118 | fn lookupTagIgnoreCase(name: []const u8) ?TagId { 119 | return lookupIdIgnoreCase(htmlidmaps.tag, name); 120 | } 121 | fn lookupAttrIgnoreCase(name: []const u8) ?AttrId { 122 | return lookupIdIgnoreCase(htmlidmaps.attr, name); 123 | } 124 | 125 | pub const Node = union(enum) { 126 | start_tag: struct { 127 | id: TagId, 128 | //self_closing: bool, 129 | // TODO: maybe make this a u32? 130 | parent_index: usize, 131 | }, 132 | end_tag: TagId, 133 | attr: struct { 134 | id: AttrId, 135 | value: ?HtmlTokenizer.Span, 136 | }, 137 | text: HtmlTokenizer.Span, 138 | }; 139 | 140 | const ParseOptions = struct { 141 | context: ?*anyopaque = null, 142 | on_error: ?*const fn(context: ?*anyopaque, msg: []const u8) void = null, 143 | 144 | // allows void elements like