├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── jsoner.gemspec
├── lib
├── jsoner.rb
└── jsoner
│ ├── table.rb
│ ├── table_factory.rb
│ └── version.rb
└── spec
├── fixtures
├── json.rb
├── table.html
├── table.rb
├── table_extend.html
└── table_extend.rb
├── jsoner
├── table_factory_spec.rb
└── table_spec.rb
└── jsoner_spec.rb
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | *.rbc
3 | .bundle
4 | .config
5 | .yardoc
6 | Gemfile.lock
7 | InstalledFiles
8 | _yardoc
9 | coverage
10 | doc/
11 | lib/bundler/man
12 | pkg
13 | rdoc
14 | spec/reports
15 | test/tmp
16 | test/version_tmp
17 | tmp
18 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | rvm:
2 | - 1.9.3
3 | - 2.0.0
4 |
5 | script: "bundle exec rspec spec/"
6 |
7 | notifications:
8 | email:
9 | - simlegate@163.com
10 |
11 | branches:
12 | only:
13 | - master
14 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## v0.0.2
2 | * Filter other elements beyond table element
3 | * Get first table element when having more than two in parsed HTML
4 |
5 | ## v0.0.3
6 | * Parse HTML file including HTML Table
7 | * Fixing file is empty or have no table element
8 |
9 | ## v0.0.4
10 | * Add rspec test of checking integrity of HTML table
11 | * Supply exception message of NotFullTable
12 | * Parse Link including HTML Table
13 |
14 | ## v0.1.0
15 | * Support Ruby 2.0.0
16 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'http://rubygems.org'
2 |
3 | gemspec
4 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013 simlegate
2 |
3 | MIT License
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining
6 | a copy of this software and associated documentation files (the
7 | "Software"), to deal in the Software without restriction, including
8 | without limitation the rights to use, copy, modify, merge, publish,
9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Jsoner
2 |
3 | Serialize HTML tables into JSON in Ruby.
4 | [](http://badge.fury.io/rb/jsoner) [](https://travis-ci.org/simlegate/jsoner)
5 |
6 | ## Installation
7 |
8 | Add this line to your application's Gemfile:
9 |
10 | gem 'jsoner'
11 |
12 | And then execute:
13 |
14 | $ bundle
15 |
16 | Or install it yourself as:
17 |
18 | $ gem install jsoner
19 |
20 | ## Usage
21 |
22 | ```ruby
23 | html = <<-eohtml
24 |
25 |
26 |
27 | First Name |
28 | Last Name |
29 | Points |
30 |
31 |
32 |
33 | Jill |
34 | Smith |
35 | 50 |
36 |
37 | Eve |
38 | Jackson |
39 | 94 |
40 |
41 | John |
42 | Doe |
43 | 80 |
44 |
45 | Adam |
46 | Johnson |
47 | 67 |
48 |
49 |
50 | eohtml
51 |
52 | # Convert HTML table into Json
53 |
54 | json = Jsoner.parse(html)
55 |
56 | # output json =>
57 | #
58 | # [ {"First Name"=>"Jill", "Last Name"=>"Smith", "Points"=>"50"},
59 | # {"First Name"=>"Eve", "Last Name"=>"Jackson", "Points"=>"94"},
60 | # {"First Name"=>"John", "Last Name"=>"Doe", "Points"=>"80"},
61 | # {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"} ]
62 |
63 | ```
64 | Or, `Jsoner` can parse HTML file including HTML table
65 |
66 | ```ruby
67 | require 'open-uri'
68 | # you must have table.html file and assign file path correctly.
69 |
70 | table = Jsoner.parse(open('table.html'))
71 |
72 | # output json =>
73 | #
74 | # [ {"First Name"=>"Jill", "Last Name"=>"Smith", "Points"=>"50"},
75 | # {"First Name"=>"Eve", "Last Name"=>"Jackson", "Points"=>"94"},
76 | # {"First Name"=>"John", "Last Name"=>"Doe", "Points"=>"80"},
77 | # {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"} ]
78 |
79 | ```
80 |
81 | Or you can open Link
82 |
83 | ```ruby
84 | require 'open-uri'
85 |
86 | table = Jsoner.parse(open('http://www.w3school.com.cn/tiy/t.asp?f=html_table_header'))
87 |
88 | # output json =>
89 | #
90 | # [{"姓名"=>"Bill Gates", "电话"=>"555 77 855"}]
91 | ```
92 | ## THANKS
93 |
94 | [table-to-json](https://github.com/lightswitch05/table-to-json) written by [@lightswitch05](https://github.com/lightswitch05) in JavaScript.
95 |
96 | ## Contributing
97 |
98 | 1. Fork it
99 | 2. Create your feature branch (`git checkout -b my-new-feature`)
100 | 3. Commit your changes (`git commit -am 'Add some feature'`)
101 | 4. Push to the branch (`git push origin my-new-feature`)
102 | 5. Create new Pull Request
103 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 |
--------------------------------------------------------------------------------
/jsoner.gemspec:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | lib = File.expand_path('../lib', __FILE__)
3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4 | require 'jsoner/version'
5 |
6 | Gem::Specification.new do |spec|
7 | spec.name = "jsoner"
8 | spec.version = Jsoner::VERSION
9 | spec.authors = ["simlegate"]
10 | spec.email = ["simlegate@163.com"]
11 | spec.description = %q{Serialize HTML tables into JSON in Ruby}
12 | spec.summary = %q{Serialize HTML tables into JSON in Ruby}
13 | spec.homepage = "https://github.com/simlegate/jsoner"
14 | spec.license = "MIT"
15 |
16 | spec.files = `git ls-files`.split($/)
17 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19 | spec.require_paths = ["lib"]
20 |
21 | spec.add_development_dependency "bundler", "~> 1.3"
22 | spec.add_development_dependency "rake"
23 | spec.add_development_dependency "nokogiri", "~> 1.6.0"
24 | spec.add_development_dependency "rspec", "~> 2.14.1"
25 | end
26 |
--------------------------------------------------------------------------------
/lib/jsoner.rb:
--------------------------------------------------------------------------------
1 | require "jsoner/version"
2 | require "jsoner/table"
3 | require "jsoner/table_factory"
4 |
5 | require 'nokogiri'
6 |
7 | module Jsoner
8 |
9 | class NotFullTable < StandardError
10 | def message
11 | 'Incomplete HTML Table'
12 | end
13 | end
14 |
15 | class << self
16 |
17 | def parse(html)
18 | # html = filter(html)
19 | if factory = TableFactory.check(Nokogiri::HTML.parse(html))
20 | Table.new(factory).to_json
21 | end
22 | end
23 |
24 | def filter html
25 | if File.file? html
26 | File.open(html) { |file| file.read }
27 | else
28 | html
29 | end
30 | end
31 |
32 | end
33 | end
34 |
--------------------------------------------------------------------------------
/lib/jsoner/table.rb:
--------------------------------------------------------------------------------
1 | require 'json'
2 |
3 | module Jsoner
4 | class Table
5 |
6 | def initialize factory
7 | @table = factory.create
8 | end
9 |
10 | def row_number
11 | @table[:body].count
12 | end
13 |
14 | def to_json
15 | convert.to_json
16 | end
17 |
18 | #
19 | # convert Hash from factory into anthor Hash will be serialized into JSON
20 | #
21 | # Example:
22 | # table = { :header => ["First Name", "Last Name", "Points"]
23 | # :body => [["Jill", "Smith", "50"],
24 | # ["Eve", "Jackson", "94"],
25 | # ["John", "Doe", "80"],
26 | # ["Adam", "Johnson", "67"]] }
27 | #
28 | # Output:
29 | # table == [{"First Name"=>"Jill", "Last Name"=>"Smith", "Points"=>"50"},
30 | # {"First Name"=>"Eve", "Last Name"=>"Jackson", "Points"=>"94"},
31 | # {"First Name"=>"John", "Last Name"=>"Doe", "Points"=>"80"},
32 | # {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"}]
33 | def convert
34 | (0...row_number).map do |index|
35 |
36 | #
37 | # Combine two Arrays into Hash
38 | # a = ["a", "b", "c"]
39 | # b = ["d", "e", "f"]
40 | # Hash[a.zip(b)]
41 | # => {"a" => "d", "b" => "e", "b" => "f" }
42 | Hash[@table[:header].zip(@table[:body][index])]
43 | end
44 | end
45 |
46 | private :convert
47 | end
48 | end
49 |
--------------------------------------------------------------------------------
/lib/jsoner/table_factory.rb:
--------------------------------------------------------------------------------
1 | module Jsoner
2 | #
3 | # build Hash below from doc parsed by Nokogiki
4 | # table = { :header => ["First Name", "Last Name", "Score"]
5 | # :body => [["Jill", "Smith", "50"],
6 | # ["Eve", "Jackson", "94"],
7 | # ["John", "Doe", "80"],
8 | # ["Adam", "Johnson", "67"]
9 | # ]
10 | # }
11 | class TableFactory
12 |
13 | def initialize doc
14 | @table_rows = remove_housing(doc).search('tr')
15 | end
16 |
17 | def create
18 | { :header => build_header, :body => build_body }
19 | end
20 |
21 | def build_header
22 | @table_rows[0].search('th').map(&:content)
23 | end
24 |
25 | def build_body
26 | row_number = @table_rows.count - 1
27 | (1..row_number).map do |row|
28 | @table_rows[row].search('td').map(&:content)
29 | end
30 | end
31 |
32 | #
33 | # check whether table is full
34 | # a full table should include +th+, +td+, +tr+ elements
35 | def self.check doc
36 | table = doc.search('table').first
37 | unless table.search('tr').empty? || table.search('td').empty? || table.search('th').empty?
38 | TableFactory.new doc
39 | else
40 | raise Jsoner::NotFullTable
41 | end
42 | end
43 |
44 | private
45 |
46 | #
47 | # Remove other elements beside table and keep it clean
48 | # Or keep one table when including more than two table elements in parsed HTML
49 | #
50 | # Example:
51 | #
52 | #
53 | # // other elements
54 | #
55 | #
56 | # // other elements
57 | #
58 | #
59 | #
60 | # Output:
61 | #
62 | # // other elements
63 | #
64 | def remove_housing doc
65 | doc.search('table').first
66 | end
67 | end
68 | end
69 |
--------------------------------------------------------------------------------
/lib/jsoner/version.rb:
--------------------------------------------------------------------------------
1 | module Jsoner
2 | VERSION = "0.0.4"
3 | end
4 |
--------------------------------------------------------------------------------
/spec/fixtures/json.rb:
--------------------------------------------------------------------------------
1 | require 'json'
2 | def pre_json
3 | [ {"First Name"=>"Jill", "Last Name"=>"Smith", "Points"=>"50"},
4 | {"First Name"=>"Eve", "Last Name"=>"Jackson", "Points"=>"94"},
5 | {"First Name"=>"John", "Last Name"=>"Doe", "Points"=>"80"},
6 | {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"} ]
7 | end
8 |
9 | def json
10 | pre_json.to_json
11 | end
12 |
--------------------------------------------------------------------------------
/spec/fixtures/table.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | First Name |
5 | Last Name |
6 | Points |
7 |
8 |
9 |
10 | Jill |
11 | Smith |
12 | 50 |
13 |
14 | Eve |
15 | Jackson |
16 | 94 |
17 |
18 | John |
19 | Doe |
20 | 80 |
21 |
22 | Adam |
23 | Johnson |
24 | 67 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/spec/fixtures/table.rb:
--------------------------------------------------------------------------------
1 | def table_str
2 | <<-eohtml
3 |
4 |
5 |
6 | First Name |
7 | Last Name |
8 | Points |
9 |
10 |
11 |
12 | Jill |
13 | Smith |
14 | 50 |
15 |
16 | Eve |
17 | Jackson |
18 | 94 |
19 |
20 | John |
21 | Doe |
22 | 80 |
23 |
24 | Adam |
25 | Johnson |
26 | 67 |
27 |
28 |
29 | eohtml
30 | end
31 |
--------------------------------------------------------------------------------
/spec/fixtures/table_extend.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Jill |
5 | Smith |
6 | 50 |
7 |
8 | Eve |
9 | Jackson |
10 | 94 |
11 |
12 | John |
13 | Doe |
14 | 80 |
15 |
16 | Adam |
17 | Johnson |
18 | 67 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/spec/fixtures/table_extend.rb:
--------------------------------------------------------------------------------
1 | def table_extend_str
2 | <<-eohtml
3 |
4 |
5 |
6 |
7 | First Name |
8 | Last Name |
9 | Points |
10 |
11 |
12 |
13 | Jill |
14 | Smith |
15 | 50 |
16 |
17 | Eve |
18 | Jackson |
19 | 94 |
20 |
21 | John |
22 | Doe |
23 | 80 |
24 |
25 | Adam |
26 | Johnson |
27 | 67 |
28 |
29 |
30 |
32 |
33 | eohtml
34 | end
35 |
36 | def no_td_table
37 | <<-eohtml
38 |
39 |
40 |
41 | First Name |
42 | Last Name |
43 | Points |
44 |
45 |
46 | eohtml
47 | end
48 |
49 | def no_tr_table
50 | <<-eohtml
51 |
53 | eohtml
54 | end
55 |
56 | def no_th_table
57 | <<-eohtml
58 |
59 |
60 |
61 | Jill |
62 | Smith |
63 | 50 |
64 |
65 | Eve |
66 | Jackson |
67 | 94 |
68 |
69 | John |
70 | Doe |
71 | 80 |
72 |
73 | Adam |
74 | Johnson |
75 | 67 |
76 |
77 |
78 | eohtml
79 | end
80 |
81 |
--------------------------------------------------------------------------------
/spec/jsoner/table_factory_spec.rb:
--------------------------------------------------------------------------------
1 | Dir["#{File.dirname(__FILE__)}/fixtures/*.rb"].each {|file| require file }
2 | require "#{File.dirname(__FILE__)}/../../lib/jsoner"
3 |
4 | describe 'build Hash below from doc parsed by Nokogiki' do
5 |
6 | before :each do
7 | @obj = Nokogiri::HTML.parse(table_str)
8 | @factory = Jsoner::TableFactory.new(@obj)
9 | end
10 |
11 | it "@table_rows should be instance of NodeSet" do
12 | @factory.instance_variable_get(:@table_rows).should be_instance_of Nokogiri::XML::NodeSet
13 | end
14 |
15 | context "Building header" do
16 | it "should return Array" do
17 | @factory.build_header.should be_instance_of Array
18 | end
19 |
20 | it "should match data from fixtures" do
21 | @factory.build_header.should == ["First Name", "Last Name", "Points"]
22 | end
23 | end
24 |
25 | context "Building body" do
26 | it "should return Double Dimensional Array" do
27 | @factory.build_body.should be_instance_of Array
28 | @factory.build_body.each{ |column| column.should be_instance_of Array }
29 | end
30 | end
31 |
32 | context "Building full hash defined table" do
33 | it "should have _header_ key" do
34 | @factory.create.should have_key :header
35 | end
36 |
37 | it "should have _body_ key" do
38 | @factory.create.should have_key :header
39 | end
40 |
41 | it "data mapped by _header_ key should match fixtures" do
42 | @factory.create[:header].should == ["First Name", "Last Name", "Points"]
43 | end
44 |
45 | it "data mapped by _body_ key should match fixtures" do
46 | @factory.create[:body].should == [["Jill", "Smith", "50"],
47 | ["Eve", "Jackson", "94"],
48 | ["John", "Doe", "80"],
49 | ["Adam", "Johnson", "67"]]
50 | end
51 |
52 | # TODO testing when having no header in table
53 | end
54 |
55 | context "check integrity of HTML table" do
56 |
57 | it "when having no tr element" do
58 | doc = Nokogiri::HTML.parse(no_tr_table)
59 | expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
60 | end
61 |
62 | it "when having no td element" do
63 | doc = Nokogiri::HTML.parse(no_td_table)
64 | expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
65 | end
66 |
67 | it "when having no th element" do
68 | doc = Nokogiri::HTML.parse(no_th_table)
69 | expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
70 | end
71 |
72 | it "should return factory if integrity" do
73 | doc = Nokogiri::HTML.parse(table_str)
74 | Jsoner::TableFactory.check(doc).should be_instance_of Jsoner::TableFactory
75 | end
76 | end
77 |
78 | context "filter HTML" do
79 |
80 | before :each do
81 | @ext_obj = Nokogiri::HTML.parse(table_extend_str)
82 | @ext_factory = Jsoner::TableFactory.new(@ext_obj)
83 | end
84 |
85 | it "@ext_obj should have two tables" do
86 | @ext_obj.search('table').count.should == 2
87 | end
88 |
89 | it "second table have no descendant nodes" do
90 | @ext_obj.search('table').last.search('descendant').should be_empty
91 | end
92 |
93 | it "first table should have _th_ elements" do
94 | @ext_obj.search('table').first.search('th').count.should == 3
95 | end
96 |
97 | it "should get first table after search table default" do
98 | @ext_factory.instance_variable_get(:@table_rows).search('tr').count.should ==5
99 | end
100 | end
101 | end
102 |
--------------------------------------------------------------------------------
/spec/jsoner/table_spec.rb:
--------------------------------------------------------------------------------
1 | Dir["#{File.dirname(__FILE__)}/fixtures/*.rb"].each {|file| require file }
2 | require "#{File.dirname(__FILE__)}/../../lib/jsoner"
3 |
4 | describe "parse structure of table" do
5 |
6 | before :each do
7 | obj = Nokogiri::HTML.parse(table_str)
8 | factory = Jsoner::TableFactory.new(obj)
9 | @table = Jsoner::Table.new(factory)
10 | end
11 |
12 | context "the number of row beside header in table" do
13 | it "should == 4 " do
14 | @table.row_number.should == 4
15 | end
16 | end
17 |
18 | context "convert Hash from factory" do
19 | it "_convert_ should private method" do
20 | expect{ @table.convert }.to raise_error NoMethodError
21 | end
22 |
23 | it "should be instance of Array" do
24 | @table.send(:convert).should be_instance_of Array
25 | end
26 |
27 | it "everyone should be instance of Hash in Array" do
28 | @table.send(:convert).map{ |h| h.should be_instance_of Hash}
29 | end
30 |
31 | it "everyone should have keys including _First Name_,_Last Name_,_Score_ in Array" do
32 | @table.send(:convert).map{ |h| h.should have_key "First Name"}
33 | @table.send(:convert).map{ |h| h.should have_key "Last Name"}
34 | @table.send(:convert).map{ |h| h.should have_key "Points"}
35 | end
36 |
37 | it "match full data" do
38 | @table.send(:convert).should == pre_json
39 | end
40 | end
41 | end
42 |
--------------------------------------------------------------------------------
/spec/jsoner_spec.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | Dir["#{File.dirname(__FILE__)}/fixtures/*.rb"].each {|file| require file }
3 | require "#{File.dirname(__FILE__)}/../lib/jsoner"
4 |
5 | require 'open-uri'
6 | require 'json'
7 |
8 | describe Jsoner do
9 |
10 | it "should match data from json of fixtures" do
11 | Jsoner.parse(table_str).should == json
12 | end
13 |
14 | it "should parse file including table" do
15 | Jsoner.parse(open("#{File.dirname(__FILE__)}/fixtures/table.html")).should == json
16 | end
17 |
18 | it "should raise error when having no full table in HTML file" do
19 | expect{ Jsoner.parse(open("#{File.dirname(__FILE__)}/fixtures/table_extend.html"))}.to raise_error Jsoner::NotFullTable
20 | end
21 |
22 | # http://www.w3school.com.cn/tiy/t.asp?f=html_table_headers
23 | it "should parse Link include table" do
24 | json = Jsoner.parse(open("http://www.w3school.com.cn/tiy/t.asp?f=html_table_headers"))
25 | JSON.parse(json).should == [{"姓名"=>"Bill Gates", "电话"=>"555 77 855"}]
26 | end
27 |
28 | context "data parsed" do
29 | it "it is HTML file" do
30 | Jsoner.filter(open("#{File.dirname(__FILE__)}/fixtures/table.html")).should == table_str
31 | end
32 |
33 | it "it is String" do
34 | Jsoner.filter("").should == ""
35 | end
36 | end
37 | end
38 |
--------------------------------------------------------------------------------