├── .rspec
├── lib
├── rehtml
│ ├── version.rb
│ ├── scanner.rb
│ ├── elements.rb
│ ├── builder.rb
│ ├── tokenizer.rb
│ └── entities.rb
└── rehtml.rb
├── Rakefile
├── .travis.yml
├── Gemfile
├── .gitignore
├── spec
├── spec_helper.rb
├── rehtml_scanner_spec.rb
├── rehtml_parser_spec.rb
└── rehtml_tokenizer_spec.rb
├── gen_entities.rb
├── rehtml.gemspec
├── LICENSE.txt
└── README.md
/.rspec:
--------------------------------------------------------------------------------
1 | --format documentation
2 | --color
3 |
--------------------------------------------------------------------------------
/lib/rehtml/version.rb:
--------------------------------------------------------------------------------
1 | module REHTML
2 | VERSION = "0.0.1"
3 | end
4 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 | require "rspec/core/rake_task"
3 |
4 | RSpec::Core::RakeTask.new(:spec)
5 |
6 | task :default => :spec
7 |
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: ruby
2 | rvm:
3 | - 1.9.3
4 | - 1.8.7
5 | - 2.1.1
6 | - 2.0.0
7 | - jruby-19mode
8 | # - rbx-2.1.1 Gem bundler is not installed
9 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 |
3 | # Specify your gem's dependencies in rehtml.gemspec
4 | gemspec
5 |
6 | group :test do
7 | gem 'coveralls', :require => false, :platforms => [ :ruby_20 ]
8 | end
9 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | *.rbc
3 | .bundle
4 | .config
5 | .yardoc
6 | Gemfile.lock
7 | InstalledFiles
8 | _yardoc
9 | coverage
10 | doc/
11 | lib/bundler/man
12 | pkg
13 | rdoc
14 | spec/reports
15 | test/tmp
16 | test/version_tmp
17 | tmp
18 | bin
19 |
--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2 | require 'rubygems'
3 | require 'rehtml'
4 | begin
5 | require 'coveralls'
6 | Coveralls.wear!
7 | rescue LoadError
8 | end
9 |
10 | require 'rspec/expectations'
11 |
12 |
13 |
--------------------------------------------------------------------------------
/lib/rehtml.rb:
--------------------------------------------------------------------------------
1 | require 'rehtml/version'
2 | require 'rehtml/tokenizer'
3 | require 'rehtml/builder'
4 |
5 | module REHTML
6 | # convert html(string) to REXML::Document
7 | def self.to_rexml(html)
8 | builder = REXMLBuilder.new
9 | builder.parse(Tokenizer.new(html))
10 | builder.doc
11 | end
12 | end
13 |
--------------------------------------------------------------------------------
/lib/rehtml/scanner.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | require 'strscan'
3 | module REHTML
4 | class Scanner < StringScanner
5 | def scan_before_or_eos(regex, move_after=false)
6 | self.scan_before(regex, true, move_after)
7 | end
8 | def scan_before(regex, or_eos=false, move_after=false)
9 | text = self.scan_until(regex)
10 | if text
11 | size = self.matched.size
12 | self.pos -= size unless move_after
13 | return text[0...(-size)]
14 | end
15 | if or_eos
16 | text = self.rest
17 | self.terminate
18 | end
19 | text
20 | end
21 | end
22 | end
23 |
--------------------------------------------------------------------------------
/spec/rehtml_scanner_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 | require 'rehtml/scanner'
3 | describe REHTML::Scanner do
4 | describe "scan aabcd" do
5 | let(:scanner){ REHTML::Scanner.new("aabcd") }
6 | it "scan_before" do
7 | expect(scanner.scan_before(/b/)).to eq("aa")
8 | expect(scanner.check(/b/)).to eq("b")
9 | expect(scanner.scan(/b/)).to eq("b")
10 | expect(scanner.scan(/b/)).to eq(nil)
11 | end
12 | it "scan_before_or_eos" do
13 | expect(scanner.scan_before_or_eos(/z/)).to eq("aabcd")
14 | expect(scanner.eos?).to eq(true)
15 | end
16 | it "scan_before_or_eos move_after" do
17 | expect(scanner.scan_before_or_eos(/b/,true)).to eq("aa")
18 | expect(scanner.rest).to eq("cd")
19 | end
20 | end
21 | end
22 |
--------------------------------------------------------------------------------
/lib/rehtml/elements.rb:
--------------------------------------------------------------------------------
1 | module REHTML
2 | class Node
3 | end
4 | class Text < Node
5 | attr_reader :value
6 | def initialize(value)
7 | @value = value
8 | end
9 | end
10 | class CData < Text
11 | end
12 | class Tag < Node
13 | attr_reader :name, :attributes
14 | def initialize(name,attributes,empty)
15 | @name = name
16 | @attributes = attributes
17 | @empty = empty
18 | end
19 | def empty?
20 | @empty
21 | end
22 | end
23 | class EndTag < Tag
24 | end
25 | class Instruction < Node
26 | attr_reader :target, :content
27 | def initialize(target,content)
28 | @target = target
29 | @content = content
30 | end
31 | def is_xml_decl?
32 | target.upcase == 'XML'
33 | end
34 | end
35 | class Comment < Node
36 | attr_reader :string
37 | def initialize(string)
38 | @string = string
39 | end
40 | end
41 | class DocType < Node
42 | def initialize
43 | end
44 | end
45 | end
46 |
--------------------------------------------------------------------------------
/gen_entities.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | require 'json'
3 | require 'open-uri'
4 | require 'kconv'
5 |
6 | url = "http://www.w3.org/TR/html5/entities.json"
7 | fname = File.join(File.dirname(__FILE__),"lib/rehtml/entities.rb")
8 |
9 | puts "Generete #{fname} from #{url}"
10 |
11 | # read
12 | json = JSON.parse(open(url).read).delete_if{|k,v|
13 | k !~ /;$/
14 | }
15 |
16 | # write source
17 | entities = json.map{|k,v|
18 | "\"#{k.gsub(/^&/,'').gsub(/;$/,'')}\" => #{v["codepoints"].inspect}.pack( \"U*\" )"
19 | }
20 | max_size = json.keys.map{|a|a.length}.max
21 | open(fname,"w"){|f|
22 | f.write <<-CODE
23 | module REHTML
24 | module ENTITIES
25 | # generate from #{url} on #{Time.now}
26 | MAP = {
27 | #{entities.join(",\n ")}
28 | }
29 | REGEXP = /\\&(?:([a-zA-Z][a-zA-Z0-9]{1,#{max_size-1}})|#([0-9]{1,7})|#x([0-9a-f]{1,6}));/
30 | end
31 | end
32 | CODE
33 | }
34 |
35 | # check
36 | require fname
37 | json.keys.map{|m|
38 | puts "#{m} is not match #{REHTML::ENTITIES::REGEXP}" if m !~ REHTML::ENTITIES::REGEXP
39 | }
40 | puts "done."
41 |
--------------------------------------------------------------------------------
/rehtml.gemspec:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | lib = File.expand_path('../lib', __FILE__)
3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4 | require 'rehtml/version'
5 | description = open(File.dirname(__FILE__)+"/README.md").read.gsub(/^.*\n(Pure Ruby)/m,'\1').gsub(/\n##.*/m,"")
6 |
7 | Gem::Specification.new do |spec|
8 | spec.name = "rehtml"
9 | spec.version = REHTML::VERSION
10 | spec.authors = ["nazoking"]
11 | spec.email = ["nazoking@gmail.com"]
12 | spec.summary = description.split(/\n/)[0].strip
13 | spec.description = description
14 | spec.homepage = "https://github.com/nazoking/rehtml"
15 | spec.license = "MIT"
16 |
17 | spec.files = `git ls-files -z`.split("\x0")
18 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20 | spec.require_paths = ["lib"]
21 |
22 | spec.add_development_dependency "bundler", "~> 1.5"
23 | spec.add_development_dependency "rake"
24 | spec.add_development_dependency "rspec"
25 | end
26 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014 nazoking
2 |
3 | MIT License
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining
6 | a copy of this software and associated documentation files (the
7 | "Software"), to deal in the Software without restriction, including
8 | without limitation the rights to use, copy, modify, merge, publish,
9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 |
--------------------------------------------------------------------------------
/spec/rehtml_parser_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 | require 'rehtml'
3 |
4 | class ReHTML
5 | def initialize(str)
6 | @str = str
7 | end
8 | def to_s
9 | "parse [#{@str}]"
10 | end
11 | def to_rexml
12 | REHTML.to_rexml(@str).to_s
13 | end
14 | def doc
15 | REHTML.to_rexml(@str)
16 | end
17 | end
18 | def parse(str)
19 | ReHTML.new(str)
20 | end
21 |
22 | describe parse(%[
html a]) do
23 | its(:to_rexml){ should eq(%[ html a]) }
24 | end
25 | describe parse(%[html]) do
26 | its(:to_rexml){ should eq(%[html]) }
27 | end
28 | describe parse(%[htmla]) do
29 | its(:to_rexml){ should eq(%[htmla]) }
30 | its("doc.xml_decl.writethis"){ should be_false }
31 | end
32 | describe parse(%[ a]) do
33 | its(:to_rexml){ should eq(%[ a]) }
34 | its("doc.xml_decl.writethis"){ should be_true }
35 | end
36 | describe parse(%[]) do
37 | its(:to_rexml){ should eq(%[]) }
38 | end
39 | =begin
40 | describe %[index.jsp] do
41 | it{
42 | doc = REHTML.to_rexml(open(File.join(File.dirname(__FILE__),'files','login.jsp')).read)
43 | formatter = REXML::Formatters::Pretty.new
44 | formatter.write(doc.root, $stdout)
45 | }
46 | end
47 | =end
48 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # REHTML
2 |
3 | [](http://allthebadges.io/nazoking/rehtml/badge_fury)
4 | [](https://travis-ci.org/nazoking/rehtml)
5 | [](https://coveralls.io/r/nazoking/rehtml?branch=master)
6 | [](https://codeclimate.com/github/nazoking/rehtml)
7 | [](https://gemnasium.com/nazoking/rehtml)
8 |
9 | Pure Ruby html parser.
10 |
11 | This library parse html and build rexml document.
12 |
13 | Nokogiri is very convenient, but the installation is complex because it do I need to build a native library, it is not suitable for chef.
14 |
15 | ## Installation
16 |
17 | Add this line to your application's Gemfile:
18 |
19 | gem 'rehtml'
20 |
21 | And then execute:
22 |
23 | $ bundle
24 |
25 | Or install it yourself as:
26 |
27 | $ gem install rehtml
28 |
29 | ## Usage
30 |
31 | ```
32 | doc = REHTML.to_rexml(open('https://github.com/nazoking/rehtml').read)
33 | ```
34 |
35 | ## Contributing
36 |
37 | 1. Fork it ( http://github.com/nazoking/rehtml/fork )
38 | 2. Create your feature branch (`git checkout -b my-new-feature`)
39 | 3. Commit your changes (`git commit -am 'Add some feature'`)
40 | 4. Push to the branch (`git push origin my-new-feature`)
41 | 5. Create new Pull Request
42 |
--------------------------------------------------------------------------------
/lib/rehtml/builder.rb:
--------------------------------------------------------------------------------
1 | require 'rexml/document'
2 |
3 | module REHTML
4 | class REXMLBuilder
5 | EMPTY_TAGS=Set.new %w[area base br col embed hr img input keygen link meta param source track wbr isindex basefont]
6 | CDATA_TAGS=Set.new %w[script style textarea xmp title]
7 | attr_reader :doc
8 |
9 | # build document use tokenizer
10 | def parse(tokenizer)
11 | @doc = REXML::Document.new
12 | @pos = @doc
13 | while node=tokenizer.next
14 | append(node)
15 | end
16 | end
17 |
18 | # append node to document
19 | def append(node)
20 | if node.is_a?(EndTag)
21 | return if empty_tag?(node.name)
22 | po = @pos
23 | while po.parent and po.name != node.name
24 | po = po.parent
25 | end
26 | if po.name == node.name
27 | @pos = po.parent
28 | end
29 | else
30 | rexml = to_rexml(node)
31 |
32 | # if node is second root element, add root element wrap html tag
33 | if rexml.is_a?(REXML::Element) and @pos == @doc and @doc.root
34 | if @doc.root.name != 'html'
35 | html = REXML::Element.new
36 | html.name = "html"
37 | i = @doc.root.index_in_parent-1
38 | while pos = @doc.delete_at(i)
39 | @doc.delete_element(pos) if pos.is_a?(REXML::Element)
40 | html << pos
41 | end
42 | @doc << html
43 | @pos = html
44 | end
45 | @pos = @doc.root
46 | end
47 | @pos << rexml
48 | if rexml.is_a?(REXML::Element) and !empty_tag?(node.name) and !node.empty?
49 | @pos = rexml
50 | end
51 | end
52 | end
53 |
54 | private
55 |
56 | def to_rexml(node)
57 | case node
58 | when Text
59 | REXML::Text.new(node.value, true)
60 | when CData
61 | REXML::CData.new(node.value)
62 | when Instruction
63 | if node.is_xml_decl? and ( @doc.xml_decl.nil? or !@doc.xml_decl.writethis )
64 | begin
65 | return REXML::Document.new("").xml_decl
66 | rescue REXML::ParseException
67 | end
68 | end
69 | REXML::Instruction.new(node.target,node.content)
70 | when DocType
71 | REXML::Comment.new(node.raw)
72 | when Comment
73 | REXML::Comment.new(node.string)
74 | when Tag
75 | if cdata_tag?(@pos.name)
76 | REXML::Text.new(node.raw, true)
77 | else
78 | xml = REXML::Element.new
79 | xml.name = node.name
80 | xml.add_attributes(node.attributes)
81 | xml
82 | end
83 | else
84 | raise "unknown node type #{node}"
85 | end
86 | end
87 |
88 | def empty_tag?(tagname)
89 | EMPTY_TAGS.include?(tagname)
90 | end
91 |
92 | def cdata_tag?(tagname)
93 | CDATA_TAGS.include?(tagname)
94 | end
95 | end
96 | end
97 |
--------------------------------------------------------------------------------
/lib/rehtml/tokenizer.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | require 'rehtml/scanner'
3 | require 'rehtml/elements'
4 | require 'rehtml/entities'
5 |
6 | module REHTML
7 | module TokenInfo
8 | attr_reader :raw, :start_pos, :end_pos
9 | def set_token_info(bpos,scanner)
10 | @start_pos=bpos
11 | @end_pos= scanner.pos
12 | @raw = scanner.string[@start_pos...(@end_pos)]
13 | end
14 | end
15 | class Tokenizer
16 | # Create a new Tokenizer for the given text.
17 | def initialize(html)
18 | @scanner = Scanner.new(html)
19 | @bpos = 0
20 | end
21 |
22 | # Return the next token in the sequence, or +nil+ if there are no more tokens in
23 | # the stream.
24 | def next
25 | return nil if @scanner.eos?
26 | add_parse_info(@scanner.check(/<\S/) ? scan_element : scan_text)
27 | end
28 |
29 | private
30 | def add_parse_info(node)
31 | node.extend(TokenInfo)
32 | node.set_token_info(@bpos,@scanner)
33 | @bpos = @scanner.pos
34 | node
35 | end
36 |
37 | def scan_text
38 | Text.new(decode("#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"))
39 | end
40 |
41 | # decode html entity
42 | def decode(html)
43 | html.gsub(ENTITIES::REGEXP){
44 | if $1
45 | if ENTITIES::MAP[$1]
46 | ENTITIES::MAP[$1]
47 | else
48 | $&
49 | end
50 | elsif $2
51 | [$2.to_i(10)].pack('U')
52 | elsif $3
53 | [$3.to_i(16)].pack('U')
54 | else
55 | $&
56 | end
57 | }
58 | end
59 |
60 | def scan_element
61 | if @scanner.scan(//,true)
63 | Comment.new(comment)
64 | elsif @scanner.scan(//,true))
66 | elsif @scanner.scan(//,true)
70 | Comment.new(comment)
71 | elsif @scanner.scan(/<\?/) # PI or xml decl
72 | scan_pi
73 | else
74 | scan_tag
75 | end
76 | end
77 |
78 | def scan_tag
79 | @scanner.scan(/<(\/)?([^\x20\x09\x0A\x0C\x0D>]*)/)
80 | is_end = @scanner[1] ? true : false
81 | name = @scanner[2]
82 | attrs = {}
83 | loop do
84 | @scanner.skip(/[\x20\x09\x0A\x0C\x0D]/)
85 | attr = @scanner.scan_before_or_eos(/[=>\x20\x09\x0A\x0C\x0D]|\/>/)
86 | matched = @scanner.matched
87 | if matched == '>' || matched.nil? || matched == '/>'
88 | attrs[attr.downcase]="" unless attr.empty?
89 | break
90 | end
91 | @scanner.skip(/[\x20\x09\x0A\x0C\x0D]/)
92 | if @scanner.scan(/=/)
93 | @scanner.skip(/[\x20\x09\x0A\x0C\x0D]/)
94 | if @scanner.scan(/['"]/)
95 | m = Regexp.compile(Regexp.quote(@scanner.matched))
96 | value = @scanner.scan_before_or_eos(m, true)
97 | else
98 | value = @scanner.scan_before_or_eos(/[>\x20\x09\x0A\x0C\x0D]|\/>/)
99 | end
100 | else
101 | value = ""
102 | end
103 | attrs[attr.downcase]=decode(value) unless attr.empty?
104 | end
105 | empty = !@scanner.scan(/\//).nil?
106 | @scanner.skip(/>/)
107 | if is_end
108 | EndTag.new(name.downcase,attrs,empty)
109 | else
110 | Tag.new(name.downcase,attrs,empty)
111 | end
112 | end
113 | def scan_pi
114 | # http://www.w3.org/TR/REC-xml/#NT-Name
115 | name = @scanner.scan(/([-:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD0-9\u00B7\u0300-\u036F\u203F-\u2040]+)/) || ""
116 | body = @scanner.scan_before_or_eos(/\?>/,true)
117 | Instruction.new(name,body)
118 | end
119 | def scan_doctype
120 | # TODO complex doctype
121 | # https://github.com/ruby/ruby/blob/master/lib/rexml/parsers/baseparser.rb#L258
122 | # source = REXML::Source.new(doctype)
123 | # parser = REXML::Parsers::BaseParser.new(soucre)
124 | # while parser.document_status == in_doctype
125 | # parser.pull_event
126 | doctype = @scanner.scan_before_or_eos(/>/,true)
127 | DocType.new
128 | end
129 | end
130 | end
131 |
--------------------------------------------------------------------------------
/spec/rehtml_tokenizer_spec.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | require 'spec_helper'
3 | require 'rehtml/tokenizer'
4 |
5 | class TokenizeHelper
6 | def initialize(msg,str=nil);
7 | @msg = str.nil? ? "" : " #{msg}"
8 | @str = str.nil? ? msg : str
9 | end
10 | def to_s; "tokenize#{@msg} {#{@str}}"; end
11 | def first_token; REHTML::Tokenizer.new(@str).next; end
12 | def token_size
13 | t = REHTML::Tokenizer.new(@str)
14 | i = 0
15 | i += 1 until t.next.nil?
16 | i
17 | end
18 | def token(num)
19 | t = REHTML::Tokenizer.new(@str)
20 | num.times{|ii|
21 | token = t.next
22 | raise "token size is #{ii}" if token.nil?
23 | }
24 | t.next
25 | end
26 | def method_missing(name, *args)
27 | if name.to_s =~ /^token(\d+)$/
28 | token($1.to_i-1)
29 | else
30 | first_token.send(name, *args)
31 | end
32 | end
33 | end
34 | def tokenize(msg,str=nil); TokenizeHelper.new(msg,str); end
35 |
36 | describe tokenize(%[]) do
37 | its("first_token.raw"){ should eq(%[]) }
38 | its("first_token"){ should be_a(REHTML::Tag) }
39 | its(:name){ should eq("a") }
40 | its(:attributes){ should eq({
41 | "type"=>"checkbox",
42 | "name"=>"be evil",
43 | "value"=>"yes",
44 | "disabled"=>""}) }
45 | its(:token_size){ should eq(1) }
46 | end
47 | describe tokenize(%[]) do
48 | its(:first_token){ should be_a(REHTML::Instruction) }
49 | its(:first_token){ should be_is_xml_decl }
50 | its(:token_size){ should eq(1) }
51 | end
52 | describe tokenize(%[]) do
53 | its(:token_size){ should eq(1) }
54 | its(:first_token){ should be_a(REHTML::Instruction) }
55 | its(:target){ should eq("php") }
56 | its(:content){ should eq(" hoge") }
57 | it{ should_not be_is_xml_decl }
58 | end
59 | describe tokenize(%[ huga?>]) do
60 | its(:token_size){ should eq(1) }
61 | its(:first_token){ should be_a(REHTML::Instruction) }
62 | its(:target){ should eq("") }
63 | its(:content){ should eq(" huga") }
64 | it{ should_not be_is_xml_decl }
65 | end
66 | describe tokenize(%{}) do
67 | its(:token_size){ should eq(1) }
68 | its(:first_token){ should be_a(REHTML::Comment) }
69 | its("first_token.string"){ should eq(" comment ") }
70 | end
71 | describe tokenize(%{abc &a; & & − ' }) do
72 | its(:token_size){ should eq(1) }
73 | its(:first_token){ should be_a(REHTML::Text) }
74 | its(:value){ should eq(%[abc &a; & & − ' ]) }
75 | end
76 | describe tokenize(%{}) do
77 | its(:token_size){ should eq(1) }
78 | its(:first_token){ should be_a(REHTML::CData) }
79 | its(:value){ should eq(" cdata ") }
80 | end
81 | describe tokenize("unclosed comment",%[