33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2022-present Kevin Newton
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/test/fixture/erb_syntax_unformatted.html.erb:
--------------------------------------------------------------------------------
1 | <% this = "avoids line break after expression"-%>
2 | <%# This is an ERB-comment https://stackoverflow.com/a/25626629 this answer describes ERB and erubis syntax%>
3 | <%== rails_raw_output%>
4 | <%-"this only works in ERB not erubis"%>
5 | <% # This should be written on one line %>
6 | <%#
7 | This is a comment
8 | It can be mutiline
9 | Treat it as a comment
10 | %>
11 |
12 | <% if this -%>
13 | <%= form.submit -%>
14 | <% elsif that -%>
15 | <%= form.submit -%>
16 | <% else -%>
17 | <%= form.submit -%>
18 | <% end -%>
19 |
20 | <%- if this %>
21 | <%= form.submit -%>
22 | <%- elsif that %>
23 | <%= form.submit -%>
24 | <%- else %>
25 | <%= form.submit -%>
26 | <%- end %>
27 |
28 | <%= link_to(link, text) do -%>
29 |
51 |
--------------------------------------------------------------------------------
/test/fixture/erb_syntax_formatted.html.erb:
--------------------------------------------------------------------------------
1 | <% this = "avoids line break after expression" -%>
2 | <%# This is an ERB-comment https://stackoverflow.com/a/25626629 this answer describes ERB and erubis syntax%>
3 | <%== rails_raw_output %>
4 | <%- "this only works in ERB not erubis" %>
5 | <% # This should be written on one line %>
6 | <%#
7 | This is a comment
8 | It can be mutiline
9 | Treat it as a comment
10 | %>
11 |
12 | <% if this -%>
13 | <%= form.submit -%>
14 | <% elsif that -%>
15 | <%= form.submit -%>
16 | <% else -%>
17 | <%= form.submit -%>
18 | <% end -%>
19 |
20 | <%- if this %>
21 | <%= form.submit -%>
22 | <%- elsif that %>
23 | <%= form.submit -%>
24 | <%- else %>
25 | <%= form.submit -%>
26 | <%- end %>
27 |
28 | <%= link_to(link, text) do -%>
29 |
This is some text <%= variable %> and the special value after
"
209 | expected =
210 | "
This is some text <%= variable %> and the special value after
\n"
211 |
212 | assert_formatting(source, expected)
213 | end
214 |
215 | def test_erb_with_comment
216 | source = "<%= what # This is a comment %>\n"
217 |
218 | assert_formatting(source, source)
219 | end
220 |
221 | def test_erb_only_ruby_comment
222 | source = "<% # This should be written on one line %>\n"
223 |
224 | assert_formatting(source, source)
225 | end
226 |
227 | def test_erb_comment
228 | source = "<%# This should be written on one line %>\n"
229 |
230 | assert_formatting(source, source)
231 | end
232 |
233 | def test_erb_multiline_comment
234 | source =
235 | "<%#\n This is the first\n This is the second\n This is the third %>"
236 | expected =
237 | "<%#\nThis is the first\nThis is the second\nThis is the third %>\n"
238 |
239 | assert_formatting(source, expected)
240 | end
241 |
242 | def test_erb_ternary_as_argument_without_parentheses
243 | source =
244 | "<%= f.submit( f.object.id.present? ? t('buttons.titles.save'):t('buttons.titles.create')) %>"
245 | expected = <<~EXPECTED
246 | <%=
247 | f.submit(
248 | f.object.id.present? ? t("buttons.titles.save") : t("buttons.titles.create")
249 | )
250 | %>
251 | EXPECTED
252 |
253 | assert_formatting(source, expected)
254 | end
255 |
256 | def test_erb_whitespace
257 | source =
258 | "<%= 1 %>,<%= 2 %>What\n<%= link_to(url) do %>Very long link Very long link Very long link Very long link<% end %>"
259 | expected =
260 | "<%= 1 %>,<%= 2 %>What\n<%= link_to(url) do %>\n Very long link Very long link Very long link Very long link\n<% end %>\n"
261 |
262 | assert_formatting(source, expected)
263 | end
264 |
265 | def test_erb_block_do_arguments
266 | source = "<%= link_to(url) do |link, other_arg|%>Whaaaaaaat<% end %>"
267 | expected =
268 | "<%= link_to(url) do |link, other_arg| %>\n Whaaaaaaat\n<% end %>\n"
269 |
270 | assert_formatting(source, expected)
271 | end
272 |
273 | def test_erb_newline
274 | source = "<%= what if this %>\n
hej
"
275 | expected = "<%= what if this %>\n
hej
\n"
276 |
277 | assert_formatting(source, expected)
278 | end
279 |
280 | def test_erb_group_blank_line
281 | source = "<%= hello %>\n<%= heya %>\n\n<%# breaks the group %>\n"
282 |
283 | assert_formatting(source, source)
284 | end
285 |
286 | def test_erb_empty_first_line
287 | source = "\n\n<%= what %>\n"
288 | expected = "<%= what %>\n"
289 |
290 | assert_formatting(source, expected)
291 | end
292 |
293 | def test_parsing_column_position
294 | example = <<~HTML
295 |
296 | <% if condition %>
297 |
A
298 | <% end %>
299 | <%= yes %>
302 |
303 | HTML
304 | parsed = ERB.parse(example)
305 | elements = parsed.elements
306 |
307 | assert_equal(1, elements.size)
308 |
309 | ul = elements.first
310 |
311 | assert_equal(1, ul.location.start_line)
312 | assert_equal(8, ul.location.end_line)
313 | assert_equal(0, ul.location.start_char)
314 | assert_equal(0, ul.location.start_column)
315 | assert_equal(5, ul.location.end_column)
316 | assert_equal(3, ul.elements.size)
317 |
318 | if_node = ul.elements.first
319 |
320 | assert_equal(2, if_node.location.start_line)
321 | assert_equal(4, if_node.location.end_line)
322 | assert_equal(2, if_node.location.start_column)
323 |
324 | comment_node = ul.elements[1]
325 |
326 | assert_equal(5, comment_node.location.start_line)
327 | assert_equal(7, comment_node.location.end_line)
328 | assert_equal(2, comment_node.location.start_column)
329 | assert_equal(6, comment_node.location.end_column)
330 | end
331 | end
332 | end
333 |
--------------------------------------------------------------------------------
/lib/syntax_tree/erb/format.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module SyntaxTree
4 | module ERB
5 | class Format < Visitor
6 | attr_reader :q
7 |
8 | def initialize(q)
9 | @q = q
10 | @inside_html_attributes = false
11 | end
12 |
13 | # Visit a Token node.
14 | def visit_token(node)
15 | if %i[text whitespace].include?(node.type)
16 | q.text(node.value)
17 | else
18 | q.text(node.value.strip)
19 | end
20 | end
21 |
22 | # Visit a Document node.
23 | def visit_document(node)
24 | child_nodes =
25 | node.child_nodes.sort_by { |node| node.location.start_char }
26 |
27 | handle_child_nodes(child_nodes)
28 |
29 | q.breakable(force: true)
30 | end
31 |
32 | # Dependent block is one that follows after a "main one", e.g. <% else %>
33 | def visit_block(node, dependent: false)
34 | process =
35 | proc do
36 | visit(node.opening)
37 |
38 | breakable = breakable_inside(node)
39 | if node.elements.any?
40 | q.indent do
41 | q.breakable("") if breakable
42 | handle_child_nodes(node.elements)
43 | end
44 | end
45 |
46 | if node.closing
47 | q.breakable("") if breakable
48 | visit(node.closing)
49 | end
50 | end
51 |
52 | if dependent
53 | process.call
54 | else
55 | q.group do
56 | q.break_parent unless @inside_html_attributes
57 | process.call
58 | end
59 | end
60 | end
61 |
62 | def visit_html_groupable(node, group)
63 | if node.elements.size == 0
64 | visit(node.opening)
65 | visit(node.closing)
66 | else
67 | visit(node.opening)
68 |
69 | with_break = breakable_inside(node)
70 | q.indent do
71 | if with_break
72 | group ? q.breakable("") : q.breakable
73 | end
74 | handle_child_nodes(node.elements)
75 | end
76 |
77 | if with_break
78 | group ? q.breakable("") : q.breakable
79 | end
80 | visit(node.closing)
81 | end
82 | end
83 |
84 | def visit_html(node)
85 | # Make sure to group the tags together if there is no child nodes.
86 | if node.elements.size == 0 ||
87 | node.elements.any? { |node|
88 | node.is_a?(SyntaxTree::ERB::CharData)
89 | } ||
90 | (
91 | node.elements.size == 1 &&
92 | node.elements.first.is_a?(SyntaxTree::ERB::ErbNode)
93 | )
94 | q.group { visit_html_groupable(node, true) }
95 | else
96 | visit_html_groupable(node, false)
97 | end
98 | end
99 |
100 | def visit_erb_block(node)
101 | visit_block(node)
102 | end
103 |
104 | def visit_erb_if(node)
105 | visit_block(node)
106 | end
107 |
108 | def visit_erb_elsif(node)
109 | visit_block(node, dependent: true)
110 | end
111 |
112 | def visit_erb_else(node)
113 | visit_block(node, dependent: true)
114 | end
115 |
116 | def visit_erb_case(node)
117 | visit_block(node)
118 | end
119 |
120 | def visit_erb_case_when(node)
121 | visit_block(node, dependent: true)
122 | end
123 |
124 | # Visit an ErbNode node.
125 | def visit_erb(node)
126 | visit(node.opening_tag)
127 |
128 | q.group do
129 | if !node.keyword && node.content.blank?
130 | q.text(" ")
131 | elsif node.keyword && node.content.blank?
132 | q.text(" ")
133 | visit(node.keyword)
134 | q.text(" ")
135 | else
136 | visit_erb_content(node.content, keyword: node.keyword)
137 | q.breakable unless node.closing_tag.is_a?(ErbDoClose)
138 | end
139 | end
140 |
141 | visit(node.closing_tag)
142 | end
143 |
144 | def visit_erb_do_close(node)
145 | closing = node.closing.value.end_with?("-%>") ? "-%>" : "%>"
146 | # Append the "do" at the end of Ruby code (within the same group)
147 | last_erb_content_group = q.current_group.contents.last
148 | last_erb_content_indent = last_erb_content_group.contents.last
149 | q.with_target(last_erb_content_indent.contents) do
150 | q.text(" ")
151 | q.text(node.closing.value.gsub(closing, "").rstrip)
152 | end
153 |
154 | # Add a breakable space after the indent, but within the same group
155 | q.with_target(last_erb_content_group.contents) { q.breakable }
156 |
157 | q.text(closing)
158 | end
159 |
160 | def visit_erb_close(node)
161 | visit(node.closing)
162 | end
163 |
164 | def visit_erb_yield(node)
165 | q.text("yield")
166 | end
167 |
168 | # Visit an ErbEnd node.
169 | def visit_erb_end(node)
170 | visit(node.opening_tag)
171 | q.text(" ")
172 | visit(node.keyword)
173 | q.text(" ")
174 | visit(node.closing_tag)
175 | end
176 |
177 | def visit_erb_content(node, keyword: nil)
178 | # Reject all VoidStmt to avoid empty lines
179 | nodes = child_nodes_without_void_statements(node)
180 | return if nodes.empty?
181 |
182 | q.indent do
183 | q.breakable
184 | q.seplist(nodes, -> { q.breakable(force: true) }) do |child_node|
185 | code =
186 | format_statement_with_keyword_prefix(child_node, keyword: keyword)
187 | output_rows(code.split("\n"))
188 | # Pass the keyword only to the first child node
189 | keyword = nil
190 | end
191 | end
192 | end
193 |
194 | # Visit an HtmlNode::OpeningTag node.
195 | def visit_opening_tag(node)
196 | @inside_html_attributes = true
197 | q.group do
198 | visit(node.opening)
199 | visit(node.name)
200 |
201 | if node.attributes.any?
202 | q.indent do
203 | q.breakable
204 | q.seplist(node.attributes, -> { q.breakable }) do |child_node|
205 | visit(child_node)
206 | end
207 | end
208 |
209 | # Only add breakable if we have attributes
210 | q.breakable(node.closing.value == "/>" ? " " : "")
211 | elsif node.closing.value == "/>"
212 | # Need a space before end-tag for self-closing
213 | q.text(" ")
214 | end
215 |
216 | # If element is a valid void element, but not currently self-closing
217 | # format to be self-closing
218 | q.text(" /") if node.is_void_element? and node.closing.value == ">"
219 |
220 | visit(node.closing)
221 | end
222 | @inside_html_attributes = false
223 | end
224 |
225 | # Visit an HtmlNode::ClosingTag node.
226 | def visit_closing_tag(node)
227 | q.group do
228 | visit(node.opening)
229 | visit(node.name)
230 | visit(node.closing)
231 | end
232 | end
233 |
234 | # Visit an Attribute node.
235 | def visit_attribute(node)
236 | q.group do
237 | visit(node.key)
238 | visit(node.equals)
239 | visit(node.value)
240 | end
241 | end
242 |
243 | # Visit a HtmlString node.
244 | def visit_html_string(node)
245 | q.group do
246 | q.text("\"")
247 | q.seplist(node.contents, -> { "" }) { |child_node| visit(child_node) }
248 | q.text("\"")
249 | end
250 | end
251 |
252 | def visit_html_comment(node)
253 | visit(node.token)
254 | end
255 |
256 | def visit_erb_comment(node)
257 | q.seplist(node.token.value.split("\n"), -> { q.breakable }) do |line|
258 | q.text(line.lstrip)
259 | end
260 | end
261 |
262 | # Visit a CharData node.
263 | def visit_char_data(node)
264 | return if node.value.value.strip.empty?
265 |
266 | q.text(node.value.value)
267 | end
268 |
269 | def visit_new_line(node)
270 | q.breakable(force: :skip_parent_break)
271 | q.breakable(force: :skip_parent_break) if node.count > 1
272 | end
273 |
274 | # Visit a Doctype node.
275 | def visit_doctype(node)
276 | q.group do
277 | visit(node.opening)
278 | q.text(" ")
279 | visit(node.name)
280 |
281 | visit(node.closing)
282 | end
283 | end
284 |
285 | private
286 |
287 | def breakable_inside(node)
288 | if node.is_a?(SyntaxTree::ERB::HtmlNode)
289 | node.elements.first.class != SyntaxTree::ERB::CharData ||
290 | node_new_line_count(node.opening) > 0
291 | elsif node.is_a?(SyntaxTree::ERB::Block)
292 | true
293 | end
294 | end
295 |
296 | def breakable_between(node, next_node)
297 | new_lines = node_new_line_count(node)
298 |
299 | if new_lines == 1
300 | q.breakable
301 | elsif new_lines > 1
302 | q.breakable
303 | q.breakable(force: :skip_parent_break)
304 | elsif next_node && !node.is_a?(SyntaxTree::ERB::CharData) &&
305 | !next_node.is_a?(SyntaxTree::ERB::CharData)
306 | q.breakable
307 | end
308 | end
309 |
310 | def breakable_between_group(node, next_node)
311 | new_lines = node_new_line_count(node)
312 |
313 | if new_lines == 1
314 | q.breakable(force: true)
315 | elsif new_lines > 1
316 | q.breakable(force: true)
317 | q.breakable(force: true)
318 | elsif next_node && !node.is_a?(SyntaxTree::ERB::CharData) &&
319 | !next_node.is_a?(SyntaxTree::ERB::CharData)
320 | q.breakable("")
321 | end
322 | end
323 |
324 | def node_new_line_count(node)
325 | node.respond_to?(:new_line) ? node.new_line&.count || 0 : 0
326 | end
327 |
328 | def handle_child_nodes(child_nodes)
329 | group = []
330 |
331 | if child_nodes.size == 1
332 | visit(child_nodes.first.without_new_line)
333 | return
334 | end
335 |
336 | child_nodes.each_with_index do |child_node, index|
337 | is_last = index == child_nodes.size - 1
338 |
339 | # Last element should not have new lines
340 | node = is_last ? child_node.without_new_line : child_node
341 |
342 | if node_should_group(node)
343 | group << node
344 | next
345 | end
346 |
347 | # Render all group elements before the current node
348 | handle_group(group, break_after: true)
349 | group = []
350 |
351 | # Render the current node
352 | visit(node)
353 | next_node = child_nodes[index + 1]
354 |
355 | breakable_between(node, next_node)
356 | end
357 |
358 | # Handle group if we have any nodes left
359 | handle_group(group, break_after: false)
360 | end
361 |
362 | def handle_group(nodes, break_after:)
363 | if nodes.size == 1
364 | handle_group_nodes(nodes)
365 | elsif nodes.size > 1
366 | q.group { handle_group_nodes(nodes) }
367 | else
368 | return
369 | end
370 |
371 | breakable_between_group(nodes.last, nil) if break_after
372 | end
373 |
374 | def handle_group_nodes(nodes)
375 | nodes.each_with_index do |node, group_index|
376 | visit(node)
377 | next_node = nodes[group_index + 1]
378 | next if next_node.nil?
379 | breakable_between_group(node, next_node)
380 | end
381 | end
382 |
383 | def node_should_group(node)
384 | node.is_a?(SyntaxTree::ERB::CharData) ||
385 | node.is_a?(SyntaxTree::ERB::ErbNode)
386 | end
387 |
388 | def child_nodes_without_void_statements(node)
389 | (node.value&.statements&.child_nodes || []).reject do |node|
390 | node.is_a?(SyntaxTree::VoidStmt)
391 | end
392 | end
393 |
394 | def format_statement_with_keyword_prefix(statement, keyword: nil)
395 | case keyword&.value
396 | when nil
397 | format_statement(statement)
398 | when "if"
399 | statement =
400 | SyntaxTree::IfNode.new(
401 | predicate: statement,
402 | statements: void_body,
403 | consequent: nil,
404 | location: keyword.location
405 | )
406 | format_statement(statement).delete_suffix("\nend")
407 | when "unless"
408 | statement =
409 | SyntaxTree::UnlessNode.new(
410 | predicate: statement,
411 | statements: void_body,
412 | consequent: nil,
413 | location: keyword.location
414 | )
415 | format_statement(statement).delete_suffix("\nend")
416 | when "elsif"
417 | statement =
418 | SyntaxTree::Elsif.new(
419 | predicate: statement,
420 | statements: void_body,
421 | consequent: nil,
422 | location: keyword.location
423 | )
424 | format_statement(statement).delete_suffix("\nend")
425 | when "case"
426 | statement =
427 | SyntaxTree::Case.new(
428 | keyword:
429 | SyntaxTree::Kw.new(value: "case", location: keyword.location),
430 | value: statement,
431 | consequent: void_body,
432 | location: keyword.location
433 | )
434 | format_statement(statement).delete_suffix("\nend")
435 | when "when"
436 | statement =
437 | SyntaxTree::When.new(
438 | arguments: statement.contents,
439 | statements: void_body,
440 | consequent: nil,
441 | location: keyword.location
442 | )
443 | format_statement(statement).delete_suffix("\nend")
444 | else
445 | q.text(keyword.value)
446 | q.breakable
447 | format_statement(statement)
448 | end
449 | end
450 |
451 | def format_statement(statement)
452 | formatter =
453 | SyntaxTree::Formatter.new("", [], SyntaxTree::ERB::MAX_WIDTH)
454 |
455 | formatter.format(statement)
456 | formatter.flush
457 |
458 | formatter.output.join.gsub(
459 | SyntaxTree::ERB::ErbYield::PLACEHOLDER,
460 | "yield"
461 | )
462 | end
463 |
464 | def output_rows(rows)
465 | if rows.size > 1
466 | q.seplist(rows, -> { q.breakable(force: true) }) { |row| q.text(row) }
467 | elsif rows.size == 1
468 | q.text(rows.first)
469 | end
470 | end
471 |
472 | def fake_location
473 | Location.new(
474 | start_line: 0,
475 | start_char: 0,
476 | start_column: 0,
477 | end_line: 0,
478 | end_char: 0,
479 | end_column: 0
480 | )
481 | end
482 |
483 | def void_body
484 | SyntaxTree::Statements.new(
485 | body: [SyntaxTree::VoidStmt.new(location: fake_location)],
486 | location: fake_location
487 | )
488 | end
489 | end
490 | end
491 | end
492 |
--------------------------------------------------------------------------------
/lib/syntax_tree/erb/nodes.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module SyntaxTree
4 | module ERB
5 | # A parent node that contains a bit of shared functionality.
6 | class Node
7 | def format(q)
8 | Format.new(q).visit(self)
9 | end
10 |
11 | def pretty_print(q)
12 | PrettyPrint.new(q).visit(self)
13 | end
14 |
15 | def without_new_line
16 | self
17 | end
18 |
19 | def skip?
20 | false
21 | end
22 | end
23 |
24 | # A Token is any kind of lexical token from the source. It has a type, a
25 | # value which is a subset of the source, and an index where it starts in
26 | # the source.
27 | class Token < Node
28 | attr_reader :type, :value, :location
29 |
30 | def initialize(type:, value:, location:)
31 | @type = type
32 | @value = value
33 | @location = location
34 | end
35 |
36 | def accept(visitor)
37 | visitor.visit_token(self)
38 | end
39 |
40 | def child_nodes
41 | []
42 | end
43 |
44 | alias deconstruct child_nodes
45 |
46 | def deconstruct_keys(keys)
47 | { type: type, value: value, location: location }
48 | end
49 | end
50 |
51 | # The Document node is the top of the syntax tree.
52 | # It contains any number of:
53 | # - Text
54 | # - HtmlNode
55 | # - ErbNodes
56 | class Document < Node
57 | attr_reader :elements, :location
58 |
59 | def initialize(elements:, location:)
60 | @elements = elements
61 | @location = location
62 | end
63 |
64 | def accept(visitor)
65 | visitor.visit_document(self)
66 | end
67 |
68 | def child_nodes
69 | [*elements].compact
70 | end
71 |
72 | alias deconstruct child_nodes
73 |
74 | def deconstruct_keys(keys)
75 | { elements: elements, location: location }
76 | end
77 | end
78 |
79 | # This is a base class for a Node that can also hold an appended
80 | # new line.
81 | class Element < Node
82 | attr_reader(:new_line, :location)
83 |
84 | def initialize(new_line:, location:)
85 | @new_line = new_line
86 | @location = location
87 | end
88 |
89 | def without_new_line
90 | self.class.new(**deconstruct_keys([]).merge(new_line: nil))
91 | end
92 |
93 | def deconstruct_keys(keys)
94 | { new_line: new_line, location: location }
95 | end
96 | end
97 |
98 | # This is a base class for a block that contains:
99 | # - an opening
100 | # - optional elements
101 | # - optional closing
102 | class Block < Node
103 | attr_reader(:opening, :elements, :closing, :location)
104 | def initialize(opening:, location:, elements: nil, closing: nil)
105 | @opening = opening
106 | @elements = elements || []
107 | @closing = closing
108 | @location = location
109 | end
110 |
111 | def accept(visitor)
112 | visitor.visit_block(self)
113 | end
114 |
115 | def child_nodes
116 | [opening, *elements, closing].compact
117 | end
118 |
119 | def new_line
120 | closing.new_line if closing.respond_to?(:new_line)
121 | end
122 |
123 | def without_new_line
124 | self.class.new(
125 | **deconstruct_keys([]).merge(closing: closing&.without_new_line)
126 | )
127 | end
128 |
129 | alias deconstruct child_nodes
130 |
131 | def deconstruct_keys(keys)
132 | {
133 | opening: opening,
134 | elements: elements,
135 | closing: closing,
136 | location: location
137 | }
138 | end
139 | end
140 |
141 | # An element is a child of the document. It contains an opening tag, any
142 | # optional content within the tag, and a closing tag. It can also
143 | # potentially contain an opening tag that self-closes, in which case the
144 | # content and closing tag will be nil.
145 | class HtmlNode < Block
146 | # These elements do not require a closing tag
147 | # https://developer.mozilla.org/en-US/docs/Glossary/Void_element
148 | HTML_VOID_ELEMENTS = %w[
149 | area
150 | base
151 | br
152 | col
153 | embed
154 | hr
155 | img
156 | input
157 | link
158 | meta
159 | param
160 | source
161 | track
162 | wbr
163 | ]
164 |
165 | # The opening tag of an element. It contains the opening character (<),
166 | # the name of the element, any optional attributes, and the closing
167 | # token (either > or />).
168 | class OpeningTag < Element
169 | attr_reader :opening, :name, :attributes, :closing
170 |
171 | def initialize(
172 | opening:,
173 | name:,
174 | attributes:,
175 | closing:,
176 | new_line:,
177 | location:
178 | )
179 | super(new_line: new_line, location: location)
180 | @opening = opening
181 | @name = name
182 | @attributes = attributes
183 | @closing = closing
184 | end
185 |
186 | def accept(visitor)
187 | visitor.visit_opening_tag(self)
188 | end
189 |
190 | def child_nodes
191 | [opening, name, *attributes, closing]
192 | end
193 |
194 | def is_void_element?
195 | HTML_VOID_ELEMENTS.include?(name.value)
196 | end
197 |
198 | alias deconstruct child_nodes
199 |
200 | def deconstruct_keys(keys)
201 | super.merge(
202 | opening: opening,
203 | name: name,
204 | attributes: attributes,
205 | closing: closing
206 | )
207 | end
208 | end
209 |
210 | # The closing tag of an element. It contains the opening character (<),
211 | # the name of the element, and the closing character (>).
212 | class ClosingTag < Element
213 | attr_reader :opening, :name, :closing
214 |
215 | def initialize(opening:, name:, closing:, location:, new_line:)
216 | super(new_line: new_line, location: location)
217 | @opening = opening
218 | @name = name
219 | @closing = closing
220 | end
221 |
222 | def accept(visitor)
223 | visitor.visit_closing_tag(self)
224 | end
225 |
226 | def child_nodes
227 | [opening, name, closing]
228 | end
229 |
230 | alias deconstruct child_nodes
231 |
232 | def deconstruct_keys(keys)
233 | super.merge(opening: opening, name: name, closing: closing)
234 | end
235 | end
236 |
237 | def is_void_element?
238 | false
239 | end
240 |
241 | def without_new_line
242 | self.class.new(
243 | **deconstruct_keys([]).merge(
244 | opening: closing.nil? ? opening.without_new_line : opening,
245 | closing: closing&.without_new_line
246 | )
247 | )
248 | end
249 |
250 | # The HTML-closing tag is responsible for new lines after the node.
251 | def new_line
252 | closing.nil? ? opening.new_line : closing&.new_line
253 | end
254 |
255 | def accept(visitor)
256 | visitor.visit_html(self)
257 | end
258 | end
259 |
260 | class ErbNode < Element
261 | attr_reader :opening_tag, :keyword, :content, :closing_tag
262 |
263 | def initialize(
264 | opening_tag:,
265 | keyword:,
266 | content:,
267 | closing_tag:,
268 | new_line:,
269 | location:
270 | )
271 | super(new_line: new_line, location: location)
272 | @opening_tag = opening_tag
273 | # prune whitespace from keyword
274 | @keyword =
275 | if keyword
276 | Token.new(
277 | type: keyword.type,
278 | value: keyword.value.strip,
279 | location: keyword.location
280 | )
281 | end
282 |
283 | @content = content
284 | @content = prepare_content(content)
285 | @closing_tag = closing_tag
286 | end
287 |
288 | def accept(visitor)
289 | visitor.visit_erb(self)
290 | end
291 |
292 | def child_nodes
293 | [opening_tag, keyword, content, closing_tag].compact
294 | end
295 |
296 | def new_line
297 | closing_tag&.new_line
298 | end
299 |
300 | def without_new_line
301 | self.class.new(
302 | **deconstruct_keys([]).merge(
303 | closing_tag: closing_tag.without_new_line
304 | )
305 | )
306 | end
307 |
308 | alias deconstruct child_nodes
309 |
310 | def deconstruct_keys(keys)
311 | super.merge(
312 | opening_tag: opening_tag,
313 | keyword: keyword,
314 | content: content,
315 | closing_tag: closing_tag
316 | )
317 | end
318 |
319 | private
320 |
321 | def prepare_content(content)
322 | if content.is_a?(ErbContent)
323 | content
324 | else
325 | content ||= []
326 |
327 | if !content.empty? && keyword&.value == "when"
328 | # "when" accepts multiple comma-separated arguments, so let's try
329 | # to make them parsable.
330 | ErbContent.new(value: ["[", *content, "]"])
331 | else
332 | ErbContent.new(value: content)
333 | end
334 | end
335 | rescue SyntaxTree::Parser::ParseError
336 | # Try to add the keyword to see if it parses
337 | begin
338 | result = ErbContent.new(value: [keyword, *content])
339 | @keyword = nil
340 |
341 | result
342 | rescue SyntaxTree::Parser::ParseError => error
343 | opening_location = opening_tag.location
344 | content_location = content.first&.location || opening_location
345 | raise(
346 | SyntaxTree::Parser::ParseError.new(
347 | "Could not parse ERB-tag: #{error.message}",
348 | @opening_tag.location.start_line + error.lineno - 1,
349 | (
350 | if opening_location.start_line == error.lineno
351 | opening_location.start_column + error.column - 1
352 | else
353 | content_location.start_column + error.column - 1
354 | end
355 | )
356 | )
357 | )
358 | end
359 | end
360 | end
361 |
362 | class ErbBlock < Block
363 | def initialize(opening:, location:, elements: nil, closing: nil)
364 | super(
365 | opening: opening,
366 | location: location,
367 | elements: elements,
368 | closing: closing
369 | )
370 | end
371 |
372 | def accept(visitor)
373 | visitor.visit_erb_block(self)
374 | end
375 | end
376 |
377 | class ErbClose < Element
378 | attr_reader :closing
379 |
380 | def initialize(closing:, new_line:, location:)
381 | super(new_line: new_line, location: location)
382 | @closing = closing
383 | end
384 |
385 | def accept(visitor)
386 | visitor.visit_erb_close(self)
387 | end
388 |
389 | def child_nodes
390 | []
391 | end
392 |
393 | alias deconstruct child_nodes
394 |
395 | def deconstruct_keys(keys)
396 | super.merge(closing: closing)
397 | end
398 | end
399 |
400 | class ErbDoClose < ErbClose
401 | def accept(visitor)
402 | visitor.visit_erb_do_close(self)
403 | end
404 | end
405 |
406 | class ErbControl < Block
407 | end
408 |
409 | class ErbIf < ErbControl
410 | # opening: ErbNode
411 | # elements: [[HtmlNode | ErbNode | CharDataNode]]
412 | # closing: [nil | ErbElsif | ErbElse]
413 | def accept(visitor)
414 | visitor.visit_erb_if(self)
415 | end
416 | end
417 |
418 | class ErbUnless < ErbIf
419 | # opening: ErbNode
420 | # elements: [[HtmlNode | ErbNode | CharDataNode]]
421 | # closing: [nil | ErbElsif | ErbElse]
422 | def accept(visitor)
423 | visitor.visit_erb_if(self)
424 | end
425 | end
426 |
427 | class ErbElsif < ErbIf
428 | def accept(visitor)
429 | visitor.visit_erb_if(self)
430 | end
431 | end
432 |
433 | class ErbElse < ErbIf
434 | def accept(visitor)
435 | visitor.visit_erb_else(self)
436 | end
437 | end
438 |
439 | class ErbEnd < ErbNode
440 | def accept(visitor)
441 | visitor.visit_erb_end(self)
442 | end
443 |
444 | def child_nodes
445 | []
446 | end
447 |
448 | alias deconstruct child_nodes
449 | end
450 |
451 | class ErbCase < ErbControl
452 | # opening: ErbNode
453 | # elements: [[HtmlNode | ErbNode | CharDataNode]]
454 | # closing: [nil | ErbCaseWhen | ErbElse | ErbEnd]
455 | def accept(visitor)
456 | visitor.visit_erb_case(self)
457 | end
458 | end
459 |
460 | class ErbCaseWhen < ErbControl
461 | # opening: ErbNode
462 | # elements: [[HtmlNode | ErbNode | CharDataNode]]
463 | # closing: [nil | ErbCaseWhen | ErbElse | ErbEnd]
464 | def accept(visitor)
465 | visitor.visit_erb_case_when(self)
466 | end
467 | end
468 |
469 | class ErbContent < Node
470 | attr_reader(:value)
471 |
472 | def initialize(value:)
473 | if value.is_a?(Array)
474 | value =
475 | value
476 | .map do |token|
477 | if token.is_a?(Token)
478 | token.value
479 | elsif token.is_a?(ErbYield)
480 | ErbYield::PLACEHOLDER
481 | else
482 | token
483 | end
484 | end
485 | .join
486 | end
487 | @value = SyntaxTree.parse(value.strip)
488 | end
489 |
490 | def blank?
491 | value.nil? ||
492 | value
493 | .statements
494 | .child_nodes
495 | .reject { |node| node.is_a?(SyntaxTree::VoidStmt) }
496 | .empty?
497 | end
498 |
499 | def accept(visitor)
500 | visitor.visit_erb_content(self)
501 | end
502 |
503 | def child_nodes
504 | [@value].compact
505 | end
506 |
507 | alias deconstruct child_nodes
508 |
509 | def deconstruct_keys(keys)
510 | { value: value }
511 | end
512 | end
513 |
514 | class ErbYield < Element
515 | PLACEHOLDER = "qqqqy"
516 | def initialize(new_line:, location:)
517 | super(new_line: new_line, location: location)
518 | end
519 |
520 | def accept(visitor)
521 | visitor.visit_erb_yield(self)
522 | end
523 |
524 | def child_nodes
525 | []
526 | end
527 | end
528 |
529 | # An HtmlAttribute is a key-value pair within a tag. It contains the key, the
530 | # equals sign, and the value.
531 | class HtmlAttribute < Node
532 | attr_reader :key, :equals, :value, :location
533 |
534 | def initialize(key:, equals:, value:, location:)
535 | @key = key
536 | @equals = equals
537 | @value = value
538 | @location = location
539 | end
540 |
541 | def accept(visitor)
542 | visitor.visit_attribute(self)
543 | end
544 |
545 | def child_nodes
546 | [key, equals, value]
547 | end
548 |
549 | alias deconstruct child_nodes
550 |
551 | def deconstruct_keys(keys)
552 | { key: key, equals: equals, value: value, location: location }
553 | end
554 | end
555 |
556 | # A HtmlString can include ERB-tags
557 | class HtmlString < Node
558 | attr_reader :opening, :contents, :closing, :location
559 |
560 | def initialize(opening:, contents:, closing:, location:)
561 | @opening = opening
562 | @contents = contents
563 | @closing = closing
564 | @location = location
565 | end
566 |
567 | def accept(visitor)
568 | visitor.visit_html_string(self)
569 | end
570 |
571 | def child_nodes
572 | [*contents]
573 | end
574 |
575 | alias deconstruct child_nodes
576 |
577 | def deconstruct_keys(keys)
578 | {
579 | opening: opening,
580 | contents: contents,
581 | closing: closing,
582 | location: location
583 | }
584 | end
585 | end
586 |
587 | class HtmlComment < Element
588 | attr_reader :token
589 |
590 | def initialize(token:, new_line:, location:)
591 | super(new_line: new_line, location: location)
592 | @token = token
593 | end
594 |
595 | def accept(visitor)
596 | visitor.visit_html_comment(self)
597 | end
598 |
599 | def child_nodes
600 | []
601 | end
602 |
603 | alias deconstruct child_nodes
604 |
605 | def deconstruct_keys(keys)
606 | super.merge(token: token)
607 | end
608 | end
609 |
610 | class ErbComment < Element
611 | attr_reader :token
612 |
613 | def initialize(token:, new_line:, location:)
614 | super(new_line: new_line, location: location)
615 | @token = token
616 | end
617 |
618 | def accept(visitor)
619 | visitor.visit_erb_comment(self)
620 | end
621 |
622 | def child_nodes
623 | []
624 | end
625 |
626 | alias deconstruct child_nodes
627 |
628 | def deconstruct_keys(keys)
629 | super.merge(token: token)
630 | end
631 | end
632 |
633 | # A CharData contains either plain text or whitespace within an element.
634 | # It wraps a single token value.
635 | class CharData < Element
636 | attr_reader :value
637 |
638 | def initialize(value:, new_line:, location:)
639 | super(new_line: new_line, location: location)
640 | @value = value
641 | end
642 |
643 | def accept(visitor)
644 | visitor.visit_char_data(self)
645 | end
646 |
647 | def child_nodes
648 | [value]
649 | end
650 |
651 | alias deconstruct child_nodes
652 |
653 | def deconstruct_keys(keys)
654 | super.merge(value: value)
655 | end
656 |
657 | def skip?
658 | value.value.strip.empty?
659 | end
660 |
661 | # Also remove trailing whitespace
662 | def without_new_line
663 | self.class.new(
664 | **deconstruct_keys([]).merge(
665 | new_line: nil,
666 | value:
667 | Token.new(
668 | type: value.type,
669 | location: value.location,
670 | value: value.value.rstrip
671 | )
672 | )
673 | )
674 | end
675 | end
676 |
677 | class NewLine < Node
678 | attr_reader :count, :location
679 |
680 | def initialize(location:, count:)
681 | @location = location
682 | @count = count
683 | end
684 |
685 | def accept(visitor)
686 | visitor.visit_new_line(self)
687 | end
688 |
689 | def child_nodes
690 | []
691 | end
692 |
693 | alias deconstruct child_nodes
694 |
695 | def deconstruct_keys(keys)
696 | { location: location, count: count }
697 | end
698 | end
699 |
700 | # A document type declaration is a special kind of tag that specifies the
701 | # type of the document. It contains an opening declaration, the name of
702 | # the document type, an optional external identifier, and a closing of the
703 | # tag.
704 | class Doctype < Element
705 | attr_reader :opening, :name, :closing
706 |
707 | def initialize(opening:, name:, closing:, new_line:, location:)
708 | super(new_line: new_line, location: location)
709 | @opening = opening
710 | @name = name
711 | @closing = closing
712 | end
713 |
714 | def accept(visitor)
715 | visitor.visit_doctype(self)
716 | end
717 |
718 | def child_nodes
719 | [opening, name, closing].compact
720 | end
721 |
722 | alias deconstruct child_nodes
723 |
724 | def deconstruct_keys(keys)
725 | super.merge(opening: opening, name: name, closing: closing)
726 | end
727 | end
728 | end
729 | end
730 |
--------------------------------------------------------------------------------
/lib/syntax_tree/erb/parser.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module SyntaxTree
4 | module ERB
5 | class Parser
6 | # This is the parent class of any kind of errors that will be raised by
7 | # the parser.
8 |
9 | # This error occurs when a certain token is expected in a certain place
10 | # but is not found. Sometimes this is handled internally because some
11 | # elements are optional. Other times it is not and it is raised to end the
12 | # parsing process.
13 | class MissingTokenError < SyntaxTree::Parser::ParseError
14 | end
15 |
16 | attr_reader :source, :tokens
17 |
18 | def initialize(source)
19 | @source = source
20 | @tokens = make_tokens
21 | @found_doctype = false
22 | @erb_context = :outside
23 | end
24 |
25 | def parse
26 | elements = many { parse_any_tag }
27 |
28 | location =
29 | elements.first.location.to(elements.last.location) if elements.any?
30 |
31 | Document.new(elements: elements, location: location)
32 | end
33 |
34 | def debug_tokens
35 | @tokens.each do |key, value, index, line|
36 | puts("#{key} #{value.inspect} #{index} #{line}")
37 | end
38 | end
39 |
40 | private
41 |
42 | def parse_any_tag
43 | loop do
44 | tag =
45 | atleast do
46 | maybe { parse_doctype } || maybe { parse_html_comment } ||
47 | maybe { parse_erb_tag } || maybe { parse_erb_comment } ||
48 | maybe { parse_html_element } || maybe { parse_new_line } ||
49 | maybe { parse_chardata }
50 | end
51 |
52 | if tag.is_a?(Doctype)
53 | if @found_doctype
54 | raise(
55 | SyntaxTree::Parser::ParseError.new(
56 | "Duplicate doctype declaration",
57 | tag.location.start_line,
58 | tag.location.start_column
59 | )
60 | )
61 | else
62 | @found_doctype = true
63 | end
64 | end
65 |
66 | # Ignore new lines in beginning of document
67 | next if tag.is_a?(NewLine)
68 |
69 | # Allow skipping empty CharData
70 | return tag unless tag.skip?
71 | end
72 | end
73 |
74 | def make_tokens
75 | Enumerator.new do |enum|
76 | index = 0
77 | column_index = 0
78 | line = 1
79 | state = %i[outside]
80 |
81 | while index < source.length
82 | case state.last
83 | in :outside
84 | case source[index..]
85 | when /\A\n{2,}/
86 | # two or more newlines should be ONE blank line
87 | enum.yield(:blank_line, $&, index, line, column_index)
88 | line += $&.count("\n")
89 | when /\A\n/
90 | # newlines
91 | enum.yield(:new_line, $&, index, line, column_index)
92 | line += 1
93 | when /\A/m
94 | # comments
95 | #
96 | enum.yield(:html_comment, $&, index, line, column_index)
97 | line += $&.count("\n")
98 | when /\A/
104 | # An ERB-comment
105 | # <%# this is an ERB comment %>
106 | enum.yield(:erb_comment, $&, index, line, column_index)
107 | when /\A<%={1,2}/, /\A<%-/, /\A<%/
108 | # the beginning of an ERB tag
109 | # <%
110 | # <%=, <%==
111 | enum.yield(:erb_open, $&, index, line, column_index)
112 | state << :erb_start
113 | line += $&.count("\n")
114 | when %r{\A}
115 | # the beginning of a closing tag
116 | #
117 | enum.yield(:slash_open, $&, index, line, column_index)
118 | state << :inside
119 | when /\A
120 | # the beginning of an opening tag
121 | # <
122 | enum.yield(:open, $&, index, line, column_index)
123 | state << :inside
124 | when /\A(?: |\t|\r)+/m
125 | # whitespace
126 | enum.yield(:whitespace, $&, index, line, column_index)
127 | when /\A(?!\s+$)[^<\n]+/
128 | # plain text content, but do not allow only white space
129 | # abc
130 | enum.yield(:text, $&, index, line, column_index)
131 | else
132 | raise(
133 | SyntaxTree::Parser::ParseError.new(
134 | "Unexpected character: #{source[index]}",
135 | line,
136 | column_index
137 | )
138 | )
139 | end
140 | in :erb_start
141 | case source[index..]
142 | when /\A\s*if/
143 | # if statement
144 | enum.yield(:erb_if, $&, index, line, column_index)
145 | state.pop
146 | state << :erb
147 | when /\A\s*unless/
148 | enum.yield(:erb_unless, $&, index, line, column_index)
149 | state.pop
150 | state << :erb
151 | when /\A\s*elsif/
152 | enum.yield(:erb_elsif, $&, index, line, column_index)
153 | state.pop
154 | state << :erb
155 | when /\A\s*else/
156 | enum.yield(:erb_else, $&, index, line, column_index)
157 | state.pop
158 | state << :erb
159 | when /\A\s*case/
160 | enum.yield(:erb_case, $&, index, line, column_index)
161 | state.pop
162 | state << :erb
163 | when /\A\s*when/
164 | enum.yield(:erb_when, $&, index, line, column_index)
165 | state.pop
166 | state << :erb
167 | when /\A\s*end/
168 | enum.yield(:erb_end, $&, index, line, column_index)
169 | state.pop
170 | state << :erb
171 | else
172 | # If we get here, then we did not have any special
173 | # keyword in the erb-tag.
174 | state.pop
175 | state << :erb
176 | next
177 | end
178 | in :erb
179 | case source[index..]
180 | when /\A[\n]+/
181 | # newlines
182 | enum.yield(:erb_code, $&, index, line, column_index)
183 | line += $&.count("\n")
184 | when /\Ado\b(\s*\|[\w\s,]+\|)?\s*-?%>/
185 | enum.yield(:erb_do_close, $&, index, line, column_index)
186 | state.pop
187 | when /\A-?%>/
188 | enum.yield(:erb_close, $&, index, line, column_index)
189 | state.pop
190 | when /\Ayield\b/
191 | enum.yield(:erb_yield, $&, index, line, column_index)
192 | when /\A[\p{L}\w]*\b/
193 | # Split by word boundary while parsing the code
194 | # This allows us to separate what_to_do vs do
195 | enum.yield(:erb_code, $&, index, line, column_index)
196 | else
197 | enum.yield(:erb_code, source[index], index, line, column_index)
198 | index += 1
199 | column_index += 1
200 | next
201 | end
202 | in :string_single_quote
203 | case source[index..]
204 | when /\A(?: |\t|\n|\r\n)+/m
205 | enum.yield(:whitespace, $&, index, line, column_index)
206 | line += $&.count("\n")
207 | when /\A\'/
208 | # the end of a quoted string
209 | enum.yield(
210 | :string_close_single_quote,
211 | $&,
212 | index,
213 | line,
214 | column_index
215 | )
216 | state.pop
217 | when /\A<%[=]?/
218 | # the beginning of an ERB tag
219 | # <%
220 | enum.yield(:erb_open, $&, index, line, column_index)
221 | state << :erb_start
222 | when /\A[^<']+/
223 | # plain text content
224 | # abc
225 | enum.yield(:text, $&, index, line, column_index)
226 | else
227 | raise(
228 | SyntaxTree::Parser::ParseError.new(
229 | "Unexpected character, #{source[index]}, when looking for closing single quote",
230 | line,
231 | column_index
232 | )
233 | )
234 | end
235 | in :string_double_quote
236 | case source[index..]
237 | when /\A(?: |\t|\n|\r\n)+/m
238 | enum.yield(:whitespace, $&, index, line, column_index)
239 | line += $&.count("\n")
240 | when /\A\"/
241 | enum.yield(
242 | :string_close_double_quote,
243 | $&,
244 | index,
245 | line,
246 | column_index
247 | )
248 | state.pop
249 | when /\A<%[=]?/
250 | # the beginning of an ERB tag
251 | # <%
252 | enum.yield(:erb_open, $&, index, line, column_index)
253 | state << :erb_start
254 | when /\A[^<"]+/
255 | # plain text content
256 | # abc
257 | enum.yield(:text, $&, index, line, column_index)
258 | else
259 | raise(
260 | SyntaxTree::Parser::ParseError.new(
261 | "Unexpected character, #{source[index]}, when looking for closing double quote",
262 | line,
263 | column_index
264 | )
265 | )
266 | end
267 | in :inside
268 | case source[index..]
269 | when /\A[ \t\r\n]+/
270 | # whitespace
271 | line += $&.count("\n")
272 | when /\A-?%>/
273 | # the end of an ERB tag
274 | # -%> or %>
275 | enum.yield(:erb_close, $&, index, line, column_index)
276 | state.pop
277 | when /\A>/
278 | # the end of a tag
279 | # >
280 | enum.yield(:close, $&, index, line, column_index)
281 | state.pop
282 | when /\A\?>/
283 | # the end of a tag
284 | # ?>
285 | enum.yield(:special_close, $&, index, line, column_index)
286 | state.pop
287 | when %r{\A/>}
288 | # the end of a self-closing tag
289 | enum.yield(:slash_close, $&, index, line, column_index)
290 | state.pop
291 | when %r{\A/}
292 | # a forward slash
293 | # /
294 | enum.yield :slash, $&, index, line, column_index
295 | when /\A=/
296 | # an equals sign
297 | # =
298 | enum.yield :equals, $&, index, line, column_index
299 | when /\A[@#]*[:\w\.\-\_]+\b/
300 | # a name for an element or an attribute
301 | # strong, vue-component-kebab, VueComponentPascal
302 | # abc, #abc, @abc, :abc
303 | enum.yield :name, $&, index, line, column_index
304 | when /\A<%={1,2}/, /\A<%-/, /\A<%/
305 | # the beginning of an ERB tag
306 | # <%
307 | enum.yield :erb_open, $&, index, line, column_index
308 | state << :erb_start
309 | when /\A"/
310 | # the beginning of a string
311 | enum.yield(
312 | :string_open_double_quote,
313 | $&,
314 | index,
315 | line,
316 | column_index
317 | )
318 | state << :string_double_quote
319 | when /\A'/
320 | # the beginning of a string
321 | enum.yield(
322 | :string_open_single_quote,
323 | $&,
324 | index,
325 | line,
326 | column_index
327 | )
328 | state << :string_single_quote
329 | else
330 | raise(
331 | SyntaxTree::Parser::ParseError.new(
332 | "Unexpected character, #{source[index]}, when parsing HTML- or ERB-tag",
333 | line,
334 | column_index
335 | )
336 | )
337 | end
338 | end
339 |
340 | index += $&.length
341 | column_index = $&.rindex("\n") || column_index + $&.length
342 | end
343 |
344 | enum.yield(:EOF, nil, index, line, column_index)
345 | end
346 | end
347 |
348 | # If the next token in the list of tokens matches the expected type, then
349 | # we're going to create a new Token, advance the token enumerator, and
350 | # return the new Token. Otherwise we're going to raise a
351 | # MissingTokenError.
352 | def consume(expected)
353 | type, value, index, line, column = tokens.peek
354 |
355 | if expected != type
356 | raise(
357 | MissingTokenError.new(
358 | "expected #{expected} got #{type}",
359 | line,
360 | index
361 | )
362 | )
363 | end
364 |
365 | tokens.next
366 |
367 | rindex = value.rindex("\n")
368 |
369 | Token.new(
370 | type: type,
371 | value: value,
372 | location:
373 | Location.new(
374 | start_char: index,
375 | end_char: index + value.length,
376 | start_line: line,
377 | end_line: line + value.count("\n"),
378 | start_column: column,
379 | end_column: rindex ? value.length - rindex : column + value.length
380 | )
381 | )
382 | end
383 |
384 | # We're going to yield to the block which should attempt to consume some
385 | # number of tokens. If any of them are missing, then we're going to return
386 | # nil from this block.
387 | def maybe
388 | yield
389 | rescue MissingTokenError
390 | end
391 |
392 | # We're going to attempt to parse everything by yielding to the block. If
393 | # nothing is returned by the block, then we're going to raise an error.
394 | # Otherwise we'll return the value returned by the block.
395 | def atleast
396 | result = yield
397 | if result.nil?
398 | raise(MissingTokenError.new("No matching token", nil, nil))
399 | end
400 | result
401 | end
402 |
403 | # We're going to attempt to parse with the block many times. We'll stop
404 | # parsing once we get an error back from the block.
405 | def many
406 | items = []
407 |
408 | loop do
409 | begin
410 | items << yield
411 | rescue MissingTokenError
412 | break
413 | end
414 | end
415 |
416 | items
417 | end
418 |
419 | def parse_until_erb(classes:)
420 | items = []
421 |
422 | loop do
423 | result =
424 | case @erb_context
425 | when :string
426 | atleast do
427 | maybe { consume(:text) } || maybe { consume(:whitespace) } ||
428 | maybe { parse_erb_tag }
429 | end
430 | when :inside
431 | atleast do
432 | maybe { parse_erb_tag } || maybe { parse_html_attribute }
433 | end
434 | when :outside
435 | parse_any_tag
436 | end
437 | items << result
438 | break if classes.any? { |cls| result.is_a?(cls) }
439 | end
440 |
441 | items
442 | end
443 |
444 | def parse_html_opening_tag
445 | opening = consume(:open)
446 | name = consume(:name)
447 |
448 | if name.value =~ /\A[@:#]/
449 | raise(
450 | SyntaxTree::Parser::ParseError.new(
451 | "Invalid HTML-tag name #{name.value}",
452 | name.location.start_line,
453 | name.location.start_column
454 | )
455 | )
456 | end
457 |
458 | @erb_context = :inside
459 |
460 | attributes =
461 | many do
462 | atleast do
463 | maybe { parse_erb_tag } || maybe { parse_html_attribute }
464 | end
465 | end
466 |
467 | @erb_context = :outside
468 |
469 | closing =
470 | atleast do
471 | maybe { consume(:close) } || maybe { consume(:slash_close) }
472 | end
473 |
474 | new_line = maybe { parse_new_line }
475 |
476 | # Parse any whitespace after new lines
477 | maybe { consume(:whitespace) }
478 |
479 | HtmlNode::OpeningTag.new(
480 | opening: opening,
481 | name: name,
482 | attributes: attributes,
483 | closing: closing,
484 | location: opening.location.to(closing.location),
485 | new_line: new_line
486 | )
487 | end
488 |
489 | def parse_html_closing
490 | opening = consume(:slash_open)
491 | name = consume(:name)
492 | closing = consume(:close)
493 |
494 | new_line = maybe { parse_new_line }
495 |
496 | HtmlNode::ClosingTag.new(
497 | opening: opening,
498 | name: name,
499 | closing: closing,
500 | location: opening.location.to(closing.location),
501 | new_line: new_line
502 | )
503 | end
504 |
505 | def parse_html_element
506 | opening = parse_html_opening_tag
507 |
508 | if opening.closing.value == "/>"
509 | HtmlNode.new(opening: opening, location: opening.location)
510 | elsif opening.is_void_element?
511 | HtmlNode.new(opening: opening, location: opening.location)
512 | else
513 | elements = many { parse_any_tag }
514 | closing = maybe { parse_html_closing }
515 |
516 | if closing.nil?
517 | raise(
518 | SyntaxTree::Parser::ParseError.new(
519 | "Missing closing tag for <#{opening.name.value}>",
520 | opening.location.start_line,
521 | opening.location.start_column
522 | )
523 | )
524 | end
525 |
526 | if closing.name.value != opening.name.value
527 | raise(
528 | SyntaxTree::Parser::ParseError.new(
529 | "Expected closing tag for <#{opening.name.value}> but got <#{closing.name.value}>",
530 | closing.location.start_line,
531 | closing.location.start_column
532 | )
533 | )
534 | end
535 |
536 | HtmlNode.new(
537 | opening: opening,
538 | elements: elements,
539 | closing: closing,
540 | location: opening.location.to(closing.location)
541 | )
542 | end
543 | end
544 |
545 | def parse_erb_case(erb_node)
546 | elements =
547 | maybe { parse_until_erb(classes: [ErbCaseWhen, ErbElse, ErbEnd]) } ||
548 | []
549 |
550 | erb_tag = elements.pop
551 |
552 | unless erb_tag.is_a?(ErbCaseWhen) || erb_tag.is_a?(ErbElse) ||
553 | erb_tag.is_a?(ErbEnd)
554 | location = erb_tag&.location || erb_node.location
555 | raise(
556 | SyntaxTree::Parser::ParseError.new(
557 | "No matching ERB-tag for the <% #{erb_node.keyword.value} %>",
558 | location.start_line,
559 | location.start_column
560 | )
561 | )
562 | end
563 |
564 | case erb_node.keyword.type
565 | when :erb_case
566 | ErbCase.new(
567 | opening: erb_node,
568 | elements: elements,
569 | closing: erb_tag,
570 | location: erb_node.location.to(erb_tag.location)
571 | )
572 | when :erb_when
573 | ErbCaseWhen.new(
574 | opening: erb_node,
575 | elements: elements,
576 | closing: erb_tag,
577 | location: erb_node.location.to(erb_tag.location)
578 | )
579 | else
580 | raise(
581 | SyntaxTree::Parser::ParseError.new(
582 | "No matching when- or else-tag for the case-tag",
583 | erb_node.location.start_line,
584 | erb_node.location.start_column
585 | )
586 | )
587 | end
588 | end
589 |
590 | def parse_erb_if(erb_node)
591 | # Skip any leading whitespace
592 | maybe { consume(:whitespace) }
593 |
594 | elements =
595 | maybe { parse_until_erb(classes: [ErbElsif, ErbElse, ErbEnd]) } || []
596 |
597 | erb_tag = elements.pop
598 |
599 | unless erb_tag.is_a?(ErbControl) || erb_tag.is_a?(ErbEnd)
600 | raise(
601 | SyntaxTree::Parser::ParseError.new(
602 | "No matching ERB-tag for the <% if %>",
603 | erb_node.location.start_line,
604 | erb_node.location.start_column
605 | )
606 | )
607 | end
608 |
609 | case erb_node.keyword.type
610 | when :erb_if
611 | ErbIf.new(
612 | opening: erb_node,
613 | elements: elements,
614 | closing: erb_tag,
615 | location: erb_node.location.to(erb_tag.location)
616 | )
617 | when :erb_unless
618 | ErbUnless.new(
619 | opening: erb_node,
620 | elements: elements,
621 | closing: erb_tag,
622 | location: erb_node.location.to(erb_tag.location)
623 | )
624 | when :erb_elsif
625 | ErbElsif.new(
626 | opening: erb_node,
627 | elements: elements,
628 | closing: erb_tag,
629 | location: erb_node.location.to(erb_tag.location)
630 | )
631 | else
632 | raise(
633 | SyntaxTree::Parser::ParseError.new(
634 | "No matching <% elsif %> or <% else %> for the <% if %>",
635 | erb_node.location.start_line,
636 | erb_node.location.start_column
637 | )
638 | )
639 | end
640 | end
641 |
642 | def parse_erb_else(erb_node)
643 | elements = maybe { parse_until_erb(classes: [ErbEnd]) } || []
644 |
645 | erb_end = elements.pop
646 |
647 | unless erb_end.is_a?(ErbEnd)
648 | raise(
649 | SyntaxTree::Parser::ParseError.new(
650 | "No matching <% end %> for the <% else %>",
651 | erb_node.location.start_line,
652 | erb_node.location.start_column
653 | )
654 | )
655 | end
656 |
657 | ErbElse.new(
658 | opening: erb_node,
659 | elements: elements,
660 | closing: erb_end,
661 | location: erb_node.location.to(erb_end.location)
662 | )
663 | end
664 |
665 | def parse_erb_end(erb_node)
666 | new_line = maybe { parse_new_line }
667 |
668 | ErbEnd.new(
669 | opening_tag: erb_node.opening_tag,
670 | keyword: erb_node.keyword,
671 | content: nil,
672 | closing_tag: erb_node.closing_tag,
673 | new_line: new_line,
674 | location: erb_node.location
675 | )
676 | end
677 |
678 | def parse_erb_tag
679 | opening_tag = consume(:erb_open)
680 | keyword =
681 | maybe { consume(:erb_if) } || maybe { consume(:erb_unless) } ||
682 | maybe { consume(:erb_elsif) } || maybe { consume(:erb_else) } ||
683 | maybe { consume(:erb_end) } || maybe { consume(:erb_case) } ||
684 | maybe { consume(:erb_when) }
685 |
686 | content = parse_until_erb_close
687 | closing_tag = content.pop
688 |
689 | if !closing_tag.is_a?(ErbClose)
690 | raise(
691 | SyntaxTree::Parser::ParseError.new(
692 | "No matching closing tag for the <% #{keyword.value} %>",
693 | closing_tag.location.start_line,
694 | closing_tag.location.start_column
695 | )
696 | )
697 | end
698 |
699 | new_line = maybe { parse_new_line }
700 |
701 | erb_node =
702 | ErbNode.new(
703 | opening_tag: opening_tag,
704 | keyword: keyword,
705 | content: content,
706 | closing_tag: closing_tag,
707 | new_line: new_line,
708 | location: opening_tag.location.to(closing_tag.location)
709 | )
710 |
711 | case erb_node.keyword&.type
712 | when :erb_if, :erb_unless, :erb_elsif
713 | parse_erb_if(erb_node)
714 | when :erb_case, :erb_when
715 | parse_erb_case(erb_node)
716 | when :erb_else
717 | parse_erb_else(erb_node)
718 | when :erb_end
719 | parse_erb_end(erb_node)
720 | else
721 | if closing_tag.is_a?(ErbDoClose)
722 | elements = maybe { parse_until_erb(classes: [ErbEnd]) } || []
723 | erb_end = elements.pop
724 |
725 | unless erb_end.is_a?(ErbEnd)
726 | raise(
727 | SyntaxTree::Parser::ParseError.new(
728 | "No matching <% end %> for the <% do %>",
729 | erb_node.location.start_line,
730 | erb_node.location.start_column
731 | )
732 | )
733 | end
734 |
735 | ErbBlock.new(
736 | opening: erb_node,
737 | elements: elements,
738 | closing: erb_end,
739 | location: erb_node.location.to(erb_end.location)
740 | )
741 | else
742 | erb_node
743 | end
744 | end
745 | end
746 |
747 | def parse_until_erb_close
748 | items = []
749 |
750 | loop do
751 | result =
752 | atleast do
753 | maybe { parse_erb_do_close } || maybe { parse_erb_close } ||
754 | maybe { parse_erb_yield } || maybe { consume(:erb_code) }
755 | end
756 |
757 | items << result
758 |
759 | break if result.is_a?(ErbClose)
760 | end
761 |
762 | items
763 | end
764 |
765 | # This method is called at the end of most tags, it fixes:
766 | # 1. Parsing any new lines after the tag
767 | # 2. Parsing any whitespace after the new lines
768 | # The whitespace is just consumed
769 | def parse_new_line
770 | line_break =
771 | atleast do
772 | maybe { consume(:blank_line) } || maybe { consume(:new_line) }
773 | end
774 |
775 | maybe { consume(:whitespace) }
776 |
777 | NewLine.new(
778 | location: line_break.location,
779 | count: line_break.value.count("\n")
780 | )
781 | end
782 |
783 | def parse_erb_close
784 | closing = consume(:erb_close)
785 |
786 | new_line = maybe { parse_new_line }
787 |
788 | ErbClose.new(
789 | location: closing.location,
790 | new_line: new_line,
791 | closing: closing
792 | )
793 | end
794 |
795 | def parse_erb_do_close
796 | closing = consume(:erb_do_close)
797 |
798 | new_line = maybe { parse_new_line }
799 |
800 | ErbDoClose.new(
801 | location: closing.location,
802 | new_line: new_line,
803 | closing: closing
804 | )
805 | end
806 |
807 | def parse_erb_yield
808 | token = consume(:erb_yield)
809 |
810 | new_line = maybe { parse_new_line }
811 |
812 | ErbYield.new(location: token.location, new_line: new_line)
813 | end
814 |
815 | def parse_html_string
816 | opening =
817 | maybe { consume(:string_open_double_quote) } ||
818 | maybe { consume(:string_open_single_quote) }
819 |
820 | if opening.nil?
821 | value = consume(:name)
822 |
823 | return(
824 | HtmlString.new(
825 | opening: nil,
826 | contents: [value],
827 | closing: nil,
828 | location: value.location
829 | )
830 | )
831 | end
832 |
833 | @erb_context = :string
834 |
835 | contents =
836 | many do
837 | atleast do
838 | maybe { consume(:text) } || maybe { consume(:whitespace) } ||
839 | maybe { parse_erb_tag }
840 | end
841 | end
842 |
843 | @erb_context = :inside
844 |
845 | closing =
846 | if opening.type == :string_open_double_quote
847 | consume(:string_close_double_quote)
848 | else
849 | consume(:string_close_single_quote)
850 | end
851 |
852 | HtmlString.new(
853 | opening: opening,
854 | contents: contents,
855 | closing: closing,
856 | location: opening.location.to(closing.location)
857 | )
858 | end
859 |
860 | def parse_html_attribute
861 | key = consume(:name)
862 | equals = maybe { consume(:equals) }
863 |
864 | if equals.nil?
865 | HtmlAttribute.new(
866 | key: key,
867 | equals: nil,
868 | value: nil,
869 | location: key.location
870 | )
871 | else
872 | value = parse_html_string
873 |
874 | HtmlAttribute.new(
875 | key: key,
876 | equals: equals,
877 | value: value,
878 | location: key.location.to(value.location)
879 | )
880 | end
881 | end
882 |
883 | def parse_chardata
884 | values =
885 | many do
886 | atleast do
887 | maybe { consume(:string_open_double_quote) } ||
888 | maybe { consume(:string_open_single_quote) } ||
889 | maybe { consume(:string_close_double_quote) } ||
890 | maybe { consume(:string_close_single_quote) } ||
891 | maybe { consume(:text) } || maybe { consume(:whitespace) }
892 | end
893 | end
894 |
895 | token =
896 | if values.size > 1
897 | Token.new(
898 | type: :text,
899 | value: values.map(&:value).join(""),
900 | location: values.first.location.to(values.last.location)
901 | )
902 | else
903 | values.first
904 | end
905 |
906 | new_line = maybe { parse_new_line }
907 |
908 | if token&.value
909 | CharData.new(
910 | value: token,
911 | location: token.location,
912 | new_line: new_line
913 | )
914 | end
915 | end
916 |
917 | def parse_doctype
918 | opening = consume(:doctype)
919 | name = consume(:name)
920 | closing = consume(:close)
921 |
922 | new_line = maybe { parse_new_line }
923 |
924 | Doctype.new(
925 | opening: opening,
926 | name: name,
927 | closing: closing,
928 | new_line: new_line,
929 | location: opening.location.to(closing.location)
930 | )
931 | end
932 |
933 | def parse_html_comment
934 | comment = consume(:html_comment)
935 |
936 | new_line = maybe { parse_new_line }
937 |
938 | HtmlComment.new(
939 | token: comment,
940 | new_line: new_line,
941 | location: comment.location
942 | )
943 | end
944 |
945 | def parse_erb_comment
946 | comment = consume(:erb_comment)
947 |
948 | new_line = maybe { parse_new_line }
949 |
950 | ErbComment.new(
951 | token: comment,
952 | new_line: new_line,
953 | location: comment.location
954 | )
955 | end
956 | end
957 | end
958 | end
959 |
--------------------------------------------------------------------------------