├── VERSION ├── spec ├── spec.opts ├── .gitignore ├── test-files │ ├── example2.ttl │ ├── example2.html │ ├── example3.ttl │ ├── example1.ttl │ ├── example3.html │ ├── example1.html │ └── test-registry.json ├── spec_helper.rb ├── format_spec.rb ├── suite_spec.rb ├── expansion_spec.rb ├── suite_helper.rb ├── reader_spec.rb └── rdfa_reader_spec.rb ├── AUTHORS ├── lib └── rdf │ ├── .gitignore │ ├── microdata │ ├── vocab.rb │ ├── version.rb │ ├── registry.rb │ ├── rdfa_reader.rb │ ├── format.rb │ ├── expansion.rb │ ├── reader │ │ └── nokogiri.rb │ └── reader.rb │ └── microdata.rb ├── .coveralls.yml ├── .gitignore ├── dependencyci.yml ├── examples ├── itemref.html ├── inventory-level.html ├── locomotive.html ├── svg.svg ├── goodreads.html ├── to_jsonld.html ├── blog_posting.html └── events_schema_org.html ├── .yardopts ├── etc ├── registry.json ├── doap.ttl ├── doap.html └── doap.nt ├── .github └── workflows │ ├── generate-docs.yml │ └── ci.yml ├── UNLICENSE ├── Gemfile ├── Rakefile ├── rdf-microdata.gemspec ├── CONTRIBUTING.md ├── History.md ├── script ├── tc └── parse └── README.md /VERSION: -------------------------------------------------------------------------------- 1 | 3.3.0 2 | -------------------------------------------------------------------------------- /spec/spec.opts: -------------------------------------------------------------------------------- 1 | --colour -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | * Gregg Kellogg -------------------------------------------------------------------------------- /lib/rdf/.gitignore: -------------------------------------------------------------------------------- 1 | /rdfxml 2 | /rdfxml.rb 3 | -------------------------------------------------------------------------------- /spec/.gitignore: -------------------------------------------------------------------------------- 1 | /uri-cache/ 2 | /htmldata 3 | /spec-tests 4 | -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | repo_token: Me6Vqlj2P292rGH0feiRAVt6hc6jwjndv 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.yardoc/ 2 | /doc/ 3 | /pkg/ 4 | /.rbx/ 5 | Gemfile.lock 6 | /.byebug_history 7 | coverage 8 | -------------------------------------------------------------------------------- /dependencyci.yml: -------------------------------------------------------------------------------- 1 | platform: 2 | Rubygems: 3 | rdf-isomorphic: 4 | tests: 5 | unmaintained: skip -------------------------------------------------------------------------------- /examples/itemref.html: -------------------------------------------------------------------------------- 1 |
2 |

1

3 |
4 |
5 |

test

6 |

2

7 |
8 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --title "Microdata reader for RDF.rb." 2 | --output-dir doc/yard 3 | --protected 4 | --no-private 5 | --hide-void-return 6 | --markup markdown 7 | --readme README.md 8 | - 9 | History.md 10 | AUTHORS 11 | VERSION 12 | UNLICENSE 13 | -------------------------------------------------------------------------------- /examples/inventory-level.html: -------------------------------------------------------------------------------- 1 |
2 | Availability: 4 | In-stock 5 | 6 |
7 | -------------------------------------------------------------------------------- /examples/locomotive.html: -------------------------------------------------------------------------------- 1 |
3 |
Name: 4 |
Tank Locomotive (DB 80) 5 |
Product code: 6 |
33041 7 |
Scale: 8 |
HO 9 |
Digital: 10 |
Delta 11 |
12 | -------------------------------------------------------------------------------- /examples/svg.svg: -------------------------------------------------------------------------------- 1 | 2 | A microdata in SVG test 3 | This is a test case for an svg file to see whether microdata processing tools actually process the attributes defined in microdata the same way in SVG as they do in HTML 4 | 5 | -------------------------------------------------------------------------------- /etc/registry.json: -------------------------------------------------------------------------------- 1 | { 2 | "http://schema.org/": { 3 | "properties": { 4 | "additionalType": {"subPropertyOf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"} 5 | } 6 | }, 7 | "https://schema.org/": { 8 | "properties": { 9 | "additionalType": {"subPropertyOf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"} 10 | } 11 | }, 12 | "http://microformats.org/profile/hcard": {} 13 | } 14 | -------------------------------------------------------------------------------- /lib/rdf/microdata/vocab.rb: -------------------------------------------------------------------------------- 1 | module RDF 2 | class MD < Vocabulary("http://www.w3.org/ns/md#") 3 | property :item, 4 | label: "item", 5 | comment: "List of items", 6 | type: "rdf:Property", 7 | range: "rdf:List" 8 | end 9 | class HCard < Vocabulary("http://microformats.org/profile/hcard#"); end 10 | class HCalendar < Vocabulary("http://microformats.org/profile/hcalendar"); end 11 | end 12 | -------------------------------------------------------------------------------- /spec/test-files/example2.ttl: -------------------------------------------------------------------------------- 1 | @prefix md: . 2 | @prefix hcard: . 3 | @prefix rdfa: . 4 | 5 | [ a ; 6 | hcard:fn "Princeton"; 7 | hcard:n [ hcard:given-name "Princeton" ]; 8 | hcard:adr _:a 9 | ] . 10 | [ a ; 11 | hcard:fn "Trekkie"; 12 | hcard:n [ hcard:given-name "Trekkie" ]; 13 | hcard:adr _:a 14 | ] . 15 | 16 | _:a hcard:street-address "Avenue Q" . 17 | -------------------------------------------------------------------------------- /lib/rdf/microdata/version.rb: -------------------------------------------------------------------------------- 1 | module RDF::Microdata::VERSION 2 | VERSION_FILE = File.join(File.expand_path(File.dirname(__FILE__)), "..", "..", "..", "VERSION") 3 | MAJOR, MINOR, TINY, EXTRA = File.read(VERSION_FILE).chop.split(".") 4 | 5 | STRING = [MAJOR, MINOR, TINY, EXTRA].compact.join('.') 6 | 7 | ## 8 | # @return [String] 9 | def self.to_s() STRING end 10 | 11 | ## 12 | # @return [String] 13 | def self.to_str() STRING end 14 | 15 | ## 16 | # @return [Array(Integer, Integer, Integer)] 17 | def self.to_a() STRING.split(".") end 18 | end 19 | -------------------------------------------------------------------------------- /spec/test-files/example2.html: -------------------------------------------------------------------------------- 1 |

2 | Both 3 | 4 | Princeton 7 | 8 | and 9 | 10 | Trekkie 13 | 14 | live at 15 | 16 | Avenue Q. 17 | 18 |

19 | -------------------------------------------------------------------------------- /spec/test-files/example3.ttl: -------------------------------------------------------------------------------- 1 | @prefix md: . 2 | @prefix rdf: . 3 | @prefix schema: . 4 | @prefix rdfa: . 5 | 6 | [ a schema:MusicPlaylist; 7 | schema:name "Classic Rock Playlist"; 8 | schema:byArtist "Lynard Skynard", "AC/DC"; 9 | schema:numTracks "2"; 10 | schema:tracks 11 | [ a schema:MusicRecording; 12 | schema:byArtist "Lynard Skynard"; 13 | schema:name "Sweet Home Alabama"; 14 | schema:url ], 15 | [ a schema:MusicRecording; 16 | schema:byArtist "AC/DC"; 17 | schema:name "Shook you all Night Long"; 18 | schema:url ] 19 | ] . 20 | -------------------------------------------------------------------------------- /spec/test-files/example1.ttl: -------------------------------------------------------------------------------- 1 | @prefix dc: . 2 | @prefix md: . 3 | @prefix frbr: . 4 | @prefix rdfa: . 5 | 6 | a frbr:Work ; 7 | dc:creator "Wil Wheaton"@en ; 8 | dc:title "Just a Geek"@en ; 9 | frbr:realization , 10 | . 11 | 12 | a frbr:Expression ; 13 | dc:type . 14 | 15 | a frbr:Expression ; 16 | dc:type . 17 | -------------------------------------------------------------------------------- /.github/workflows/generate-docs.yml: -------------------------------------------------------------------------------- 1 | name: Build & deploy documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | workflow_dispatch: 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | name: Update gh-pages with docs 11 | steps: 12 | - name: Clone repository 13 | uses: actions/checkout@v3 14 | - name: Set up Ruby 15 | uses: ruby/setup-ruby@v1 16 | with: 17 | ruby-version: "3.1" 18 | - name: Install required gem dependencies 19 | run: gem install yard --no-document 20 | - name: Build YARD Ruby Documentation 21 | run: yardoc 22 | - name: Deploy 23 | uses: peaceiris/actions-gh-pages@v3 24 | with: 25 | github_token: ${{ secrets.GITHUB_TOKEN }} 26 | publish_dir: ./doc/yard 27 | publish_branch: gh-pages 28 | -------------------------------------------------------------------------------- /spec/test-files/example3.html: -------------------------------------------------------------------------------- 1 |
2 | Classic Rock Playlist 3 | 4 |

Including works by 5 | Lynard Skynard and 6 | AC/DC

. 7 | 8 |
9 | 1.Sweet Home Alabama - 10 | Lynard Skynard 11 | 12 |
13 | 14 |
15 | 2.Shook you all Night Long - 16 | AC/DC 17 | 18 |
19 |
20 | -------------------------------------------------------------------------------- /spec/test-files/example1.html: -------------------------------------------------------------------------------- 1 |
5 |
Title
6 |
Just a Geek
7 |
By
8 |
Wil Wheaton
9 |
Format
10 |
14 | 15 | Print 16 |
17 |
21 | 22 | Ebook 23 |
24 |
25 | -------------------------------------------------------------------------------- /examples/goodreads.html: -------------------------------------------------------------------------------- 1 | 25 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This workflow runs continuous CI across different versions of ruby on all branches and pull requests to develop. 2 | 3 | name: CI 4 | on: 5 | push: 6 | branches: [ '**' ] 7 | pull_request: 8 | branches: [ develop ] 9 | workflow_dispatch: 10 | 11 | jobs: 12 | tests: 13 | name: Ruby ${{ matrix.ruby }} 14 | if: "contains(github.event.commits[0].message, '[ci skip]') == false" 15 | runs-on: ubuntu-latest 16 | env: 17 | CI: true 18 | ALLOW_FAILURES: ${{ endsWith(matrix.ruby, 'head') || matrix.ruby == 'jruby' }} 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | ruby: ['3.0', 3.1, 3.2, 3.3, ruby-head, jruby] 23 | steps: 24 | - name: Clone repository 25 | uses: actions/checkout@v3 26 | - name: Set up Ruby 27 | uses: ruby/setup-ruby@v1 28 | with: 29 | ruby-version: ${{ matrix.ruby }} 30 | - name: Install dependencies 31 | run: ruby --version; bundle install --jobs 4 --retry 3 32 | - name: Run tests 33 | run: ruby --version; bundle exec rspec spec || $ALLOW_FAILURES 34 | - name: Coveralls GitHub Action 35 | uses: coverallsapp/github-action@v2 36 | if: "matrix.ruby == '3.3'" 37 | with: 38 | github-token: ${{ secrets.GITHUB_TOKEN }} 39 | -------------------------------------------------------------------------------- /lib/rdf/microdata.rb: -------------------------------------------------------------------------------- 1 | $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..'))) 2 | require 'rdf' 3 | 4 | module RDF 5 | ## 6 | # **`RDF::Microdata`** is a Microdata extension for RDF.rb. 7 | # 8 | # @example Requiring the `RDF::Microdata` module 9 | # require 'rdf/microdata' 10 | # 11 | # @example Parsing RDF statements from an HTML file 12 | # RDF::Microdata::Reader.open("etc/foaf.html") do |reader| 13 | # reader.each_statement do |statement| 14 | # puts statement.inspect 15 | # end 16 | # end 17 | # 18 | # @see https://ruby-rdf.github.io/rdf/ 19 | # @see https://www.w3.org/TR/2011/WD-microdata-20110525/ 20 | # 21 | # @author [Gregg Kellogg](https://greggkellogg.net/) 22 | module Microdata 23 | USES_VOCAB = RDF::URI("http://www.w3.org/ns/rdfa#usesVocabulary") 24 | DEFAULT_REGISTRY = File.expand_path("../../../etc/registry.json", __FILE__) 25 | 26 | require 'rdf/microdata/format' 27 | require 'rdf/microdata/vocab' 28 | autoload :Expansion, 'rdf/microdata/expansion' 29 | autoload :Profile, 'rdf/microdata/profile' 30 | autoload :RdfaReader, 'rdf/microdata/rdfa_reader' 31 | autoload :Reader, 'rdf/microdata/reader' 32 | autoload :Registry, 'rdf/microdata/registry' 33 | autoload :VERSION, 'rdf/microdata/version' 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "rdf", git: "https://github.com/ruby-rdf/rdf", branch: "develop" 6 | gem "rdf-rdfa", git: "https://github.com/ruby-rdf/rdf-rdfa", branch: "develop" 7 | gem "rdf-xsd", git: "https://github.com/ruby-rdf/rdf-xsd", branch: "develop" 8 | 9 | group :development do 10 | gem "json-ld", git: "https://github.com/ruby-rdf/json-ld", branch: "develop" 11 | gem 'ebnf', git: "https://github.com/dryruby/ebnf", branch: "develop" 12 | gem 'rdf-aggregate-repo', git: "https://github.com/ruby-rdf/rdf-aggregate-repo", branch: "develop" 13 | gem 'rdf-isomorphic', git: "https://github.com/ruby-rdf/rdf-isomorphic", branch: "develop" 14 | gem "rdf-spec", git: "https://github.com/ruby-rdf/rdf-spec", branch: "develop" 15 | gem 'rdf-turtle', git: "https://github.com/ruby-rdf/rdf-turtle", branch: "develop" 16 | gem 'rdf-vocab', git: "https://github.com/ruby-rdf/rdf-vocab", branch: "develop" 17 | gem 'sxp', git: "https://github.com/dryruby/sxp.rb", branch: "develop" 18 | end 19 | 20 | group :debug do 21 | gem "byebug", platform: :mri 22 | end 23 | 24 | group :test do 25 | gem 'simplecov', '~> 0.22', platforms: :mri 26 | gem 'simplecov-lcov', '~> 0.8', platforms: :mri 27 | end 28 | -------------------------------------------------------------------------------- /etc/doap.ttl: -------------------------------------------------------------------------------- 1 | @base . 2 | @prefix dc: . 3 | @prefix doap: . 4 | @prefix foaf: . 5 | @prefix rdf: . 6 | @prefix xsd: . 7 | 8 | <> a doap:Project; 9 | doap:name "RDF::Microdata"; 10 | doap:shortdesc "Microdata reader for Ruby RDF.rb."@en; 11 | doap:description "RDF::Microdata is an Microdata reader for Ruby using the RDF.rb library suite."@en; 12 | dc:creator ; 13 | doap:blog ; 14 | doap:bug-database ; 15 | doap:category ; 16 | doap:created "2011-08-29"^^xsd:date; 17 | doap:developer ; 18 | doap:documenter ; 19 | doap:download-page <>; 20 | doap:homepage ; 21 | doap:implements ; 22 | doap:license ; 23 | doap:mailing-list ; 24 | doap:maintainer ; 25 | doap:programming-language "Ruby"; 26 | foaf:creator . 27 | -------------------------------------------------------------------------------- /spec/test-files/test-registry.json: -------------------------------------------------------------------------------- 1 | { 2 | "@comment": "This registry defines several artificial vocabularies used when running tests in the microdata to RDF test suite", 3 | "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#": { 4 | "properties": { 5 | "entries": {"multipleValues": "list"} 6 | } 7 | }, 8 | "http://foo/": {}, 9 | "http://foo#": {}, 10 | "http://schema.org/": { 11 | "properties": { 12 | "additionalType": {"subPropertyOf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"} 13 | } 14 | }, 15 | "https://schema.org/": { 16 | "properties": { 17 | "additionalType": {"subPropertyOf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"} 18 | } 19 | }, 20 | "http://microformats.org/profile/hcard": {}, 21 | "http://vocabulary.unordered/": { 22 | "propertyURI": "vocabulary", 23 | "multipleValues": "unordered", 24 | "properties": { 25 | "list": {"multipleValues": "list"}, 26 | "typed": {"datatype": "http://typed"} 27 | } 28 | }, 29 | "http://expansion/": { 30 | "propertyURI": "vocabulary", 31 | "multipleValues": "unordered", 32 | "properties": { 33 | "equivalentProperty": { 34 | "equivalentProperty": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" 35 | }, 36 | "subPropertyOf": { 37 | "subPropertyOf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" 38 | } 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/to_jsonld.html: -------------------------------------------------------------------------------- 1 | 2 | My Blog 3 |
4 |
5 |

Progress report

6 |

7 | 8 |
9 |

All in all, he's doing well with his swim lessons. The biggest thing was he had trouble 10 | putting his head in, but we got it down.

11 |
12 |

Comments

13 |
14 | 15 |
16 |

Posted by: 17 | Greg 18 |

19 |

20 |
21 |

Ha!

22 |
23 |
24 | 25 |
26 |

Posted by: 27 | Charlotte 28 |

29 |

30 |
31 |

When you say "we got it down"...

32 |
33 |
34 |
35 | -------------------------------------------------------------------------------- /examples/blog_posting.html: -------------------------------------------------------------------------------- 1 | 2 | My Blog 3 |
4 |
5 |

Progress report

6 |

7 | 8 |
9 |

All in all, he's doing well with his swim lessons. The biggest thing was he had trouble 10 | putting his head in, but we got it down.

11 |
12 |

Comments

13 |
14 | 15 |
16 |

Posted by: 17 | Greg 18 |

19 |

20 |
21 |

Ha!

22 |
23 |
24 | 25 |
26 |

Posted by: 27 | Charlotte 28 |

29 |

30 |
31 |

When you say "we got it down"...

32 |
33 |
34 |
35 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'yard' 3 | require 'rspec/core/rake_task' 4 | 5 | namespace :gem do 6 | desc "Build the rdf-microdata-#{File.read('VERSION').chomp}.gem file" 7 | task :build do 8 | sh "gem build rdf-microdata.gemspec && mv rdf-microdata-#{File.read('VERSION').chomp}.gem pkg/" 9 | end 10 | 11 | desc "Release the rdf-microdata-#{File.read('VERSION').chomp}.gem file" 12 | task :release do 13 | sh "gem push pkg/rdf-microdata-#{File.read('VERSION').chomp}.gem" 14 | end 15 | end 16 | 17 | desc 'Run specifications' 18 | RSpec::Core::RakeTask.new(:spec) do |spec| 19 | spec.rspec_opts = %w(--options spec/spec.opts) if File.exists?('spec/spec.opts') 20 | end 21 | 22 | desc "Run specs through RCov" 23 | RSpec::Core::RakeTask.new("spec:rcov") do |spec| 24 | spec.rcov = true 25 | spec.rcov_opts = %q[--exclude "spec"] 26 | end 27 | 28 | namespace :doc do 29 | YARD::Rake::YardocTask.new 30 | 31 | desc "Generate HTML report specs" 32 | RSpec::Core::RakeTask.new("spec") do |spec| 33 | spec.rspec_opts = ["--format", "html", "-o", "doc/spec.html"] 34 | end 35 | end 36 | 37 | task default: :spec 38 | task specs: :spec 39 | 40 | desc "Generate etc/doap.{nt,ttl} from etc/doap.html." 41 | task :doap do 42 | require 'rdf/microdata' 43 | require 'rdf/turtle' 44 | require 'rdf/ntriples' 45 | g = RDF::Graph.load("etc/doap.html") 46 | RDF::NTriples::Writer.open("etc/doap.nt") {|w| w < 2 | 3 | 4 | schema_events 5 | 6 | 7 | 8 |

schema_events

9 | 10 |
11 | 15 | 16 | 17 | Thu, 04/21/21 18 | 8:00 p.m. 19 | 20 |
21 | 24 |
25 | Philadelphia, 26 | PA 27 |
28 |
29 | 30 |
31 | Priced from: $35 32 | 1938 tickets left 33 |
34 |
35 | 36 |
37 |
Pravir Gupta
38 | 39 | 40 | Last modified: Thu Jul 21 17:35:01 PDT 2011 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | $:.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 2 | $:.unshift File.dirname(__FILE__) 3 | 4 | require "bundler/setup" 5 | require 'rspec' 6 | require 'rdf/isomorphic' 7 | require 'rdf/turtle' 8 | require 'rdf/spec' 9 | require 'rdf/spec/matchers' 10 | 11 | ::RSpec.configure do |c| 12 | c.filter_run focus: true 13 | c.run_all_when_everything_filtered = true 14 | c.exclusion_filter = { 15 | } 16 | c.include(RDF::Spec::Matchers) 17 | end 18 | 19 | begin 20 | require 'simplecov' 21 | require 'simplecov-lcov' 22 | 23 | SimpleCov::Formatter::LcovFormatter.config do |config| 24 | #Coveralls is coverage by default/lcov. Send info results 25 | config.report_with_single_file = true 26 | config.single_report_path = 'coverage/lcov.info' 27 | end 28 | 29 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new([ 30 | SimpleCov::Formatter::HTMLFormatter, 31 | SimpleCov::Formatter::LcovFormatter 32 | ]) 33 | SimpleCov.start do 34 | add_filter "/spec/" 35 | end 36 | rescue LoadError => e 37 | STDERR.puts "Coverage Skipped: #{e.message}" 38 | end 39 | require 'rdf/microdata' 40 | 41 | # Heuristically detect the input stream 42 | def detect_format(stream) 43 | # Got to look into the file to see 44 | if stream.is_a?(IO) || stream.is_a?(StringIO) 45 | stream.rewind 46 | string = stream.read(1000) 47 | stream.rewind 48 | else 49 | string = stream.to_s 50 | end 51 | case string 52 | when / "https://ruby-rdf.github.io/rdf-microdata", 15 | "bug_tracker_uri" => "https://github.com/ruby-rdf/rdf-microdata/issues", 16 | "homepage_uri" => "https://github.com/ruby-rdf/rdf-microdata", 17 | "mailing_list_uri" => "https://lists.w3.org/Archives/Public/public-rdf-ruby/", 18 | "source_code_uri" => "https://github.com/ruby-rdf/rdf-microdata", 19 | } 20 | 21 | gem.authors = %w(Gregg Kellogg) 22 | gem.email = 'public-rdf-ruby@w3.org' 23 | 24 | gem.platform = Gem::Platform::RUBY 25 | gem.files = %w(AUTHORS README.md UNLICENSE VERSION) + Dir.glob('lib/**/*.rb') + Dir.glob('etc/*') 26 | gem.require_paths = %w(lib) 27 | 28 | gem.required_ruby_version = '>= 3.0' 29 | gem.requirements = [] 30 | gem.add_runtime_dependency 'rdf', '~> 3.3' 31 | gem.add_runtime_dependency 'rdf-rdfa', '~> 3.3' 32 | gem.add_runtime_dependency 'rdf-xsd', '~> 3.3' 33 | gem.add_runtime_dependency 'htmlentities', '~> 4.3' 34 | gem.add_runtime_dependency 'nokogiri' , '~> 1.15', '>= 1.15.4' 35 | 36 | gem.add_development_dependency 'equivalent-xml' , '~> 0.6' 37 | gem.add_development_dependency 'getoptlong', '~> 0.2' 38 | gem.add_development_dependency 'yard' , '~> 0.9' 39 | gem.add_development_dependency 'rspec', '~> 3.12' 40 | gem.add_development_dependency 'rspec-its', '~> 1.3' 41 | 42 | gem.add_development_dependency 'rdf-spec', '~> 3.3' 43 | gem.add_development_dependency 'rdf-turtle', '~> 3.3' 44 | gem.add_development_dependency 'rdf-isomorphic', '~> 3.3' 45 | gem.add_development_dependency 'json-ld', '~> 3.3' 46 | 47 | gem.post_install_message = nil 48 | end 49 | -------------------------------------------------------------------------------- /etc/doap.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Microdata reader for Ruby. 5 | 6 | 7 | 8 | 9 |

Project description for RDF::Microdata.

10 |

11 | RDF::Microdata is an Microdata reader for Ruby using the RDF.rb library suite. 12 |

13 |
14 |
Creator
15 | Gregg Kellogg 17 |
18 |
Created
19 |
Blog
http://greggkellogg.net/
20 |
Bug DB
21 | 22 | https://github.com/ruby-rdf/rdf-microdata/issues 23 | 24 |
25 |
Category
26 | Resource Description Framework 27 | for 28 | Ruby 29 |
30 |
Implements
31 | Microdata to RDF 32 |
33 |
Download
34 | https://rubygems.org/gems/rdf-microdata 35 |
36 |
Home Page
37 | https://github.com/ruby-rdf/rdf-microdata 38 |
39 |
License
40 | Public Domain 41 |
42 |
Mailing List
43 | http://lists.w3.org/Archives/Public/public-rdf-ruby/ 44 |
45 |
46 | 47 | 48 | -------------------------------------------------------------------------------- /etc/doap.nt: -------------------------------------------------------------------------------- 1 | . 2 | "Microdata reader for Ruby."@en . 3 | "\n RDF::Microdata is an Microdata reader for Ruby using the RDF.rb library suite.\n "@en . 4 | "RDF::Microdata" . 5 | . 6 | . 7 | . 8 | . 9 | . 10 | "2011-08-29"^^ . 11 | . 12 | . 13 | . 14 | "Ruby" . 15 | . 16 | . 17 | . 18 | . 19 | . 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | Community contributions are essential for keeping Ruby RDF great. We want to keep it as easy as possible to contribute changes that get things working in your environment. There are a few guidelines that we need contributors to follow so that we can have a chance of keeping on top of things. 4 | 5 | ## Development 6 | 7 | This repository uses [Git Flow](https://github.com/nvie/gitflow) to manage development and release activity. All submissions _must_ be on a feature branch based on the _develop_ branch to ease staging and integration. 8 | 9 | * create or respond to an issue on the [Github Repository](https://github.com/ruby-rdf/rdf-microdata/issues) 10 | * Fork and clone the repo: 11 | `git clone git@github.com:your-username/rdf-microdata.git` 12 | * Install bundle: 13 | `bundle install` 14 | * Create tests in RSpec and make sure you achieve at least 90% code coverage for the feature your adding or behavior being modified. 15 | * Push to your fork and [submit a pull request][pr]. 16 | 17 | ## Do's and Dont's 18 | * Do your best to adhere to the existing coding conventions and idioms. 19 | * Don't use hard tabs, and don't leave trailing whitespace on any line. 20 | Before committing, run `git diff --check` to make sure of this. 21 | * Do document every method you add using [YARD][] annotations. Read the 22 | [tutorial][YARD-GS] or just look at the existing code for examples. 23 | * Don't touch the `.gemspec` or `VERSION` files. If you need to change them, 24 | do so on your private branch only. 25 | * Do feel free to add yourself to the `CREDITS` file and the 26 | corresponding list in the the `README`. Alphabetical order applies. 27 | * Don't touch the `AUTHORS` file. If your contributions are significant 28 | enough, be assured we will eventually add you in there. 29 | * Do note that in order for us to merge any non-trivial changes (as a rule 30 | of thumb, additions larger than about 15 lines of code), we need an 31 | explicit [public domain dedication][PDD] on record from you, 32 | which you will be asked to agree to on the first commit to a repo within the organization. 33 | Note that the agreement applies to all repos in the [Ruby RDF](https://github.com/ruby-rdf/) organization. 34 | 35 | [YARD]: https://yardoc.org/ 36 | [YARD-GS]: https://rubydoc.info/docs/yard/file/docs/GettingStarted.md 37 | [PDD]: https://unlicense.org/#unlicensing-contributions 38 | [pr]: https://github.com/ruby-rdf/rdf/compare/ 39 | -------------------------------------------------------------------------------- /History.md: -------------------------------------------------------------------------------- 1 | ### 0.2.5 2 | * If RDFa is loaded, don't assert text/html and :html content-type/extension, as RDFa will call out based on presence of @itemscope 3 | 4 | ### 0.2.4 5 | * Update contextual case based on LC spec change to use current_name instead of current_type when creating a predicate URI in contextual. 6 | * Add hCard and hCalendar vocabulary definitions. 7 | * Make sure registry_uri is processed for each invocation, allowing it to be passed as a parameter. 8 | * Add --registry argument to script/parse to allow it to be specified. 9 | 10 | ### 0.2.3 11 | * Update to latest processing rules, including the use of a registry. 12 | * Updated microdata namespace. 13 | * Matcher output in TTL. 14 | * Complete REXML and Nokogori proxies. 15 | * Added etc/registry.json as a copy of the registry used internally. 16 | * Update examples. 17 | * Parse with linkeddata options. 18 | * Use bundler for specs, if installed. 19 | * Always place md:item in a list. 20 | * Don't use nokogiri with jruby. 21 | * Depend on Nokogiri only for development. 22 | * Some examples. 23 | * Progress on separating HTML parsing to Nokogiri and REXML. 24 | * Sync with first HTML Data TF version of spec. 25 | * Recognize @datetime values with lexical form of xsd:duration and generate appropriately typed lite... 26 | * Generate lists for multi-valued properties. 27 | * Remove fallback_name and change fallback_type to current_type 28 | * Simplify generate_triples logic by removing old type and URI munging. 29 | 30 | ### 0.2.2 31 | * Remove non @item* processing 32 | * Sync to HTML Data TF version of spec: http://dvcs.w3.org/hg/htmldata/raw-file/24af1cde0da1/microdata-rdf/index.html 33 | ### 0.2.2 34 | * RDF.rb 0.3.4 compatibility. 35 | * Added format detection. 36 | 37 | ### 0.2.1 38 | * Fixed support for using the document base-uri to resolve relative URIs. 39 | 40 | ### 0.2.0 41 | * There is no longer any official way to generate RDF and use gem as an experimentation platform. 42 | * Use rdf_term-type property generation and remove option to set it. 43 | * Don't generate triple for html\>head\>title 44 | * \@datetime values are scanned lexically to find appropriate datatype 45 | 46 | ### 0.1.3 47 | * Fixed ruby 1.8 regular expression bug. 48 | 49 | ### 0.1.2 50 | * Added :rdf\_terms option to Reader to generate more RDF-friendly URIs from terms. 51 | 52 | ### 0.1.1 53 | * Fixed problem generating appropriate property URIs in Ruby 1.8. 54 | 55 | ### 0.1.0 56 | * Complete parser generates RDF. 57 | 58 | ### 0.0.1 59 | * Initial release 60 | -------------------------------------------------------------------------------- /spec/format_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'rdf/spec/format' 5 | 6 | describe RDF::Microdata::Format do 7 | it_behaves_like 'an RDF::Format' do 8 | let(:format_class) {RDF::Microdata::Format} 9 | end 10 | 11 | describe ".for" do 12 | formats = [ 13 | :microdata, 14 | 'etc/doap.html', 15 | {:file_name => 'etc/doap.html'}, 16 | {file_extension: 'html'}, 17 | {:content_type => 'text/html'}, 18 | ].each do |arg| 19 | it "discovers with #{arg.inspect}" do 20 | expect([RDF::Microdata::Format, RDF::RDFa::Format]).to include RDF::Format.for(arg) 21 | end 22 | end 23 | 24 | { 25 | microdata: '
', 26 | }.each do |sym, str| 27 | it "detects #{sym}" do 28 | expect(described_class.for {str}).to eq described_class 29 | end 30 | end 31 | end 32 | 33 | describe "#to_sym" do 34 | specify {expect(described_class.to_sym).to eq :microdata} 35 | end 36 | 37 | # This interferes with RDFa. 38 | describe "#to_uri", skip: true do 39 | specify {expect(described_class.to_uri).to eq RDF::URI('http://www.w3.org/ns/formats/microdata')} 40 | end 41 | 42 | describe ".detect" do 43 | { 44 | itemprop: '
', 45 | itemtype: '
', 46 | itemref: '
', 47 | itemscope: '
', 48 | itemid: '
', 49 | }.each do |sym, str| 50 | it "detects #{sym}" do 51 | expect(described_class.detect(str)).to be_truthy 52 | end 53 | end 54 | 55 | { 56 | :n3 => "@prefix foo: .\nfoo:bar = { } .", 57 | nquads: " . ", 58 | rdfxml: '', 59 | jsonld: '{"@context" => "foo"}', 60 | :about => '
', 61 | :typeof => '
', 62 | resource: '
', 63 | :vocab => '
', 64 | :prefix => '
', 65 | property: '
', 66 | :ntriples => "
.", 67 | :multi_line => '\n \n "literal"\n .', 68 | :turtle => "@prefix foo: .\n foo:a foo:b .", 69 | :STRING_LITERAL1 => %( 'literal' .), 70 | :STRING_LITERAL2 => %( "literal" .), 71 | :STRING_LITERAL_LONG1 => %( '''\nliteral\n''' .), 72 | :STRING_LITERAL_LONG2 => %( """\nliteral\n""" .), 73 | }.each do |sym, str| 74 | it "does not detect #{sym}" do 75 | expect(described_class.detect(str)).to be_falsey 76 | end 77 | end 78 | end 79 | end 80 | -------------------------------------------------------------------------------- /spec/suite_spec.rb: -------------------------------------------------------------------------------- 1 | $:.unshift "." 2 | require 'spec_helper' 3 | 4 | describe RDF::Microdata::Reader do 5 | # W3C Microdata Test suite from FIXME 6 | describe "w3c microdata tests" do 7 | require 'suite_helper' 8 | MANIFEST = Fixtures::SuiteTest::BASE + "manifest.jsonld" 9 | 10 | {native: :native, RDFa: :rdfa}.each do |w, sym| 11 | describe w do 12 | Fixtures::SuiteTest::Manifest.open(MANIFEST) do |m| 13 | describe m.label do 14 | m.entries.each do |t| 15 | specify "#{t.name}: #{t.comment}" do 16 | t.logger = ::RDF::Spec.logger 17 | t.logger.info t.inspect 18 | t.logger.info "source:\n#{t.input}" 19 | 20 | if sym == :rdfa 21 | %w(0002 0003 0052 0053 0054 0067).include?(t.name.split.last) && skip("Not valid test for RDFa") 22 | %w(0026 0044).include?(t.name.split.last) && skip("Difference in subject for head/body elements") 23 | %w(0071 0073 0074).include?(t.name.split.last) && skip("No vocabulary expansion") 24 | %w(0075 0078).include?(t.name.split.last) && skip("Differences in number parsing") 25 | %w(0081 0082 0084).include?(t.name.split.last) && skip("No @itemprop-reverse") 26 | %w(0064).include?(t.name.split.last) && pending("Double use of itemref with different vocabularies") 27 | %w(0085).include?(t.name.split.last) && pending("Broke in Nokogiri 13.0") 28 | end 29 | 30 | reader = RDF::Microdata::Reader.open(t.action, 31 | base_uri: t.action, 32 | strict: true, 33 | validate: t.negative_test?, 34 | registry: t.registry, 35 | vocab_expansion: t.vocab_expansion, 36 | logger: t.logger, 37 | sym => true # Invoke appropriate writer 38 | ) 39 | expect(reader).to be_a RDF::Reader 40 | graph = RDF::Repository.new 41 | 42 | if t.positive_test? 43 | begin 44 | graph << reader 45 | rescue Exception => e 46 | expect(e.message).to produce("Not exception #{e.inspect}\n#{e.backtrace.join("\n")}", t.logger) 47 | end 48 | if t.evaluate? 49 | 50 | # Remove any rdfa:usesVocabulary property 51 | graph.query({predicate: RDF::RDFA.usesVocabulary}) do |st| 52 | graph.delete!(st) 53 | end 54 | output_graph = RDF::Graph.load(t.result, base_uri: t.action) 55 | expect(graph).to be_equivalent_graph(output_graph, t) 56 | else 57 | expect(graph).to be_a(RDF::Enumerable) 58 | end 59 | else 60 | expect { 61 | graph << reader 62 | expect(graph.dump(:ntriples)).to eql "not this" 63 | }.to raise_error(RDF::ReaderError) 64 | end 65 | end 66 | end 67 | end 68 | end 69 | end 70 | end 71 | end 72 | end unless ENV['CI'] # Skip for continuous integration -------------------------------------------------------------------------------- /lib/rdf/microdata/registry.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | module RDF::Microdata 3 | 4 | # Interface to registry 5 | class Registry 6 | # @return [RDF::URI] Prefix of vocabulary 7 | attr_reader :uri 8 | 9 | # @return [Hash] properties 10 | attr_reader :properties 11 | 12 | ## 13 | # Initialize the registry from a URI or file path 14 | # 15 | # @param [String] registry_uri 16 | def self.load_registry(registry_uri) 17 | return if @registry_uri == registry_uri 18 | 19 | json = RDF::Util::File.open_file(registry_uri) { |f| ::JSON.load(f) } 20 | 21 | @prefixes = {} 22 | json.each do |prefix, elements| 23 | next unless elements.is_a?(Hash) 24 | properties = elements.fetch("properties", {}) 25 | @prefixes[prefix] = Registry.new(prefix, properties) 26 | end 27 | @registry_uri = registry_uri 28 | end 29 | 30 | ## 31 | # Initialize registry for a particular prefix URI 32 | # 33 | # @param [RDF::URI] prefixURI 34 | # @param [Hash] properties ({}) 35 | def initialize(prefixURI, properties = {}) 36 | @uri = prefixURI 37 | @properties = properties 38 | @property_base = prefixURI.to_s 39 | # Append a '#' for fragment if necessary 40 | @property_base += '#' unless %w(/ #).include?(@property_base[-1,1]) 41 | end 42 | 43 | ## 44 | # Find a registry entry given a type URI 45 | # 46 | # @param [RDF::URI] type 47 | # @return [Registry] 48 | def self.find(type) 49 | @prefixes ||= {} 50 | k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 } 51 | @prefixes[k] if k 52 | end 53 | 54 | ## 55 | # Generate a predicateURI given a `name` 56 | # 57 | # @param [#to_s] name 58 | # @param [RDF::URI] base_uri base URI for resolving `name`. 59 | # @return [RDF::URI] 60 | def predicateURI(name, base_uri) 61 | u = RDF::URI(name) 62 | # 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_ 63 | return u if u.absolute? 64 | 65 | n = frag_escape(name) 66 | if uri.nil? 67 | # 2) If current vocabulary from context is null, there can be no current vocabulary. 68 | # Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name 69 | u = RDF::URI(base_uri.to_s) 70 | u.fragment = frag_escape(name) 71 | u 72 | else 73 | # 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/). 74 | RDF::URI(@property_base + n) 75 | end 76 | end 77 | 78 | ## 79 | # Yield a equivalentProperty or subPropertyOf if appropriate 80 | # 81 | # @param [RDF::URI] predicateURI 82 | # @yield equiv 83 | # @yieldparam [RDF::URI] equiv 84 | def expand(predicateURI) 85 | tok = tokenize(predicateURI) 86 | if @properties[tok].is_a?(Hash) 87 | value = @properties[tok].fetch("subPropertyOf", nil) 88 | value ||= @properties[tok].fetch("equivalentProperty", nil) 89 | 90 | Array(value).each {|equiv| yield RDF::URI(equiv)} 91 | end 92 | end 93 | 94 | ## 95 | # Turn a predicateURI into a simple token 96 | # @param [RDF::URI] predicateURI 97 | # @return [String] 98 | def tokenize(predicateURI) 99 | predicateURI.to_s.sub(@property_base, '') 100 | end 101 | 102 | ## 103 | # Fragment escape a name 104 | def frag_escape(name) 105 | name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase} 106 | end 107 | end 108 | 109 | end -------------------------------------------------------------------------------- /script/tc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'rubygems' 3 | $:.unshift(File.expand_path("../../lib", __FILE__)) 4 | require "bundler/setup" 5 | require 'logger' 6 | require 'rdf/microdata' 7 | require 'rdf/isomorphic' 8 | require 'rdf/vocab' 9 | require File.expand_path("../../spec/spec_helper", __FILE__) 10 | require File.expand_path("../../spec/suite_helper", __FILE__) 11 | require 'getoptlong' 12 | 13 | def run_tc(tc, **options) 14 | input_uri = tc.action 15 | 16 | STDOUT.write "run #{tc.attributes['@id']}" 17 | 18 | if options[:verbose] 19 | puts("\nTestCase: #{tc.inspect}") 20 | puts("\nInput:\n" + tc.input) 21 | puts("\nExpected:\n" + tc.expected) if tc.result 22 | end 23 | 24 | begin 25 | puts "open #{input_uri}" if options[:verbose] 26 | options = { 27 | base_uri: input_uri, 28 | registry: tc.registry 29 | }.merge(options) 30 | options[:validate] = !tc.positive_test? 31 | 32 | reader = RDF::Microdata::Reader.open(input_uri, **options) 33 | result = nil 34 | graph = RDF::Graph.new 35 | 36 | if tc.positive_test? 37 | begin 38 | graph << reader 39 | puts("\nTurtle:\n" + graph.dump(:ttl, standard_prefixes: true)) if options[:verbose] 40 | rescue Exception => e 41 | STDERR.puts "Unexpected exception: #{e.inspect}" if options[:verbose] 42 | result = "failed" 43 | end 44 | else 45 | begin 46 | graph << reader 47 | STDERR.puts "Expected exception" if options[:verbose] 48 | result = "failed" 49 | rescue RDF::ReaderError 50 | result = "passed" 51 | end 52 | end 53 | 54 | if tc.evaluate? && result.nil? 55 | # Remove any rdfa:usesVocabulary property 56 | graph.query({predicate: RDF::RDFA.usesVocabulary}) do |st| 57 | graph.delete!(st) 58 | end 59 | output_graph = RDF::Graph.load(tc.result, base_uri: tc.action) 60 | result = graph.isomorphic_with?(output_graph) ? "passed" : "failed" 61 | else 62 | result ||= "passed" 63 | end 64 | 65 | rescue Exception => e 66 | STDERR.puts "#{"exception:" unless options[:quiet]}: #{e}" 67 | if options[:quiet] 68 | return 69 | else 70 | raise 71 | end 72 | end 73 | 74 | options[:result_count][result] ||= 0 75 | options[:result_count][result] += 1 76 | 77 | puts " #{result}" 78 | end 79 | 80 | logger = Logger.new(STDERR) 81 | logger.level = Logger::WARN 82 | logger.formatter = lambda {|severity, datetime, progname, msg| "#{severity}: #{msg}\n"} 83 | 84 | options = { 85 | output: STDOUT, 86 | logger: logger 87 | } 88 | 89 | opts = GetoptLong.new( 90 | ["--help", "-?", GetoptLong::NO_ARGUMENT], 91 | ["--debug", GetoptLong::NO_ARGUMENT], 92 | ["--quiet", GetoptLong::NO_ARGUMENT], 93 | ["--validate", GetoptLong::NO_ARGUMENT], 94 | ["--verbose", GetoptLong::NO_ARGUMENT] 95 | ) 96 | 97 | def help(**options) 98 | puts "Usage: #{$0} [options] [test-number ...]" 99 | puts "Options:" 100 | puts " --dump: Dump raw output, otherwise serialize to Ruby" 101 | puts " --debug Display detailed debug output" 102 | puts " --expand Expand graph with vocab_expansion option" 103 | puts " --quiet: Minimal output" 104 | puts " --validate: Validate input" 105 | puts " --verbose: Verbose processing" 106 | puts " --help,-?: This message" 107 | exit(0) 108 | end 109 | 110 | opts.each do |opt, arg| 111 | case opt 112 | when '--help' then help(**options) 113 | when '--debug' then logger.level = Logger::DEBUG 114 | when '--format' then options[:format] = arg.to_sym 115 | when '--quiet' 116 | options[:quiet] = true 117 | logger.level = Logger::FATAL 118 | when '--validate' then options[:validate] = true 119 | when '--verbose' then options[:verbose] = true 120 | end 121 | end 122 | 123 | result_count = {} 124 | 125 | Fixtures::SuiteTest::Manifest.open(Fixtures::SuiteTest::BASE + "manifest.jsonld") do |m| 126 | m.entries.each do |tc| 127 | next unless ARGV.empty? || ARGV.any? {|n| tc.id.match(/#{n}/)} 128 | run_tc(tc, result_count: result_count, **options) 129 | end 130 | end 131 | 132 | result_count.each do |result, count| 133 | puts "#{result}: #{count}" 134 | end 135 | -------------------------------------------------------------------------------- /lib/rdf/microdata/rdfa_reader.rb: -------------------------------------------------------------------------------- 1 | require 'rdf/rdfa' 2 | 3 | module RDF::Microdata 4 | ## 5 | # Update DOM to turn Microdata into RDFa and parse using the RDFa Reader 6 | class RdfaReader < RDF::RDFa::Reader 7 | # The transformed DOM using RDFa 8 | # @return [RDF::HTML::Document] 9 | attr_reader :rdfa 10 | 11 | def self.format(klass = nil) 12 | if klass.nil? 13 | RDF::Microdata::Format 14 | else 15 | super 16 | end 17 | end 18 | 19 | ## 20 | # Initializes the RdfaReader instance. 21 | # 22 | # @param [IO, File, String] input 23 | # the input stream to read 24 | # @param [Hash{Symbol => Object}] options 25 | # any additional options (see `RDF::Reader#initialize`) 26 | # @return [reader] 27 | # @yield [reader] `self` 28 | # @yieldparam [RDF::Reader] reader 29 | # @yieldreturn [void] ignored 30 | # @raise [RDF::ReaderError] if _validate_ 31 | def initialize(input = $stdin, **options, &block) 32 | @options = options 33 | log_debug('', "using RDFa transformation reader") 34 | 35 | input = case input 36 | when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input 37 | else 38 | # Try to detect charset from input 39 | options[:encoding] ||= input.charset if input.respond_to?(:charset) 40 | 41 | # Otherwise, default is utf-8 42 | options[:encoding] ||= 'utf-8' 43 | options[:encoding] = options[:encoding].to_s if options[:encoding] 44 | begin 45 | input = input.read if input.respond_to?(:read) 46 | ::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000) 47 | rescue LoadError, NoMethodError 48 | ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding]) 49 | end 50 | end 51 | 52 | # For all members having @itemscope 53 | input.css("[itemscope]").each do |item| 54 | # Get @itemtypes to create @type and @vocab 55 | item.attribute('itemscope').remove 56 | if item['itemtype'] 57 | # Only absolute URLs 58 | types = item.attribute('itemtype'). 59 | remove. 60 | to_s. 61 | split(/\s+/). 62 | select {|t| RDF::URI(t).absolute?} 63 | 64 | item['typeof'] = types.join(' ') unless types.empty? 65 | if vocab = types.first 66 | vocab = begin 67 | type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') 68 | Registry.new(type_vocab) if type_vocab 69 | end 70 | item['vocab'] = vocab.uri.to_s if vocab 71 | end 72 | end 73 | item['typeof'] ||= '' 74 | 75 | # Change each itemid attribute to an resource attribute with the same value 76 | if item['itemid'] 77 | id = item.attribute('itemid').remove 78 | item['resource'] = id 79 | end 80 | end 81 | 82 | # Add @resource for all itemprop values of object based on a @data value 83 | input.css("object[itemprop][data]").each do |item| 84 | item['resource'] ||= item['data'] 85 | end 86 | 87 | # Replace all @itemprop values with @property 88 | input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove} 89 | 90 | # Wrap all @itemref properties 91 | input.css("[itemref]").each do |item| 92 | item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')} 93 | item_vocab = item_vocab.to_s if item_vocab 94 | 95 | item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref| 96 | if referenced = input.css("##{ref}") 97 | # Add @vocab to referenced using the closest ansestor having @vocab of item. 98 | # If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element. 99 | # If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element. 100 | referenced.wrap(%(
)) 101 | 102 | # Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference 103 | link = ::Nokogiri::XML::Node.new('link', input) 104 | link['rel'] = 'rdfa:copy' 105 | link['href'] = "##{ref}" 106 | item << link 107 | end 108 | end 109 | end 110 | 111 | @rdfa = input 112 | log_debug('', "Transformed document: #{input.to_html}") 113 | 114 | options = options.merge( 115 | library: :nokogiri, 116 | reference_folding: true, 117 | host_language: :html5, 118 | version: :"rdfa1.1") 119 | 120 | # Rely on RDFa reader 121 | super(input, **options, &block) 122 | end 123 | end 124 | end -------------------------------------------------------------------------------- /spec/expansion_spec.rb: -------------------------------------------------------------------------------- 1 | $:.unshift "." 2 | require 'spec_helper' 3 | 4 | # Class for abstract testing of module 5 | class ExpansionTester 6 | include RDF::Microdata::Expansion 7 | include RDF::Enumerable 8 | include RDF::Util::Logger 9 | 10 | attr_reader :id, :repo, :action, :result, :options 11 | attr_accessor :format 12 | 13 | def initialize(name) 14 | @id = name 15 | @repo = RDF::Repository.new 16 | @options = {logger: RDF::Spec.logger} 17 | 18 | super() 19 | end 20 | 21 | def graph 22 | @repo 23 | end 24 | 25 | def each_statement(&block); @repo.each_statement(&block); end 26 | 27 | def load(elements) 28 | result = nil 29 | elements.each do |context, ttl| 30 | case context 31 | when :default 32 | @action = ttl 33 | @repo << parse(ttl) 34 | when :result 35 | result = ttl 36 | result = parse(ttl) 37 | end 38 | end 39 | 40 | result 41 | end 42 | 43 | def parse(ttl) 44 | RDF::Graph.new << RDF::Turtle::Reader.new(ttl, 45 | logger: false, 46 | prefixes: { 47 | foaf: RDF::URI("http://xmlns.com/foaf/0.1/"), 48 | owl: RDF::OWL.to_uri, 49 | rdf: RDF.to_uri, 50 | rdfa: RDF::RDFA.to_uri, 51 | rdfs: RDF::RDFS.to_uri, 52 | xsd: RDF::XSD.to_uri, 53 | ex: RDF::URI("http://example.org/vocab#"), 54 | nil => "http://example.org/", 55 | }) 56 | end 57 | end 58 | 59 | describe RDF::Microdata::Expansion do 60 | let(:logger) {RDF::Spec.logger} 61 | 62 | describe :owl_entailment do 63 | { 64 | "empty" => { 65 | default: %q(), 66 | result: %q() 67 | }, 68 | "simple" => { 69 | default: %q(:a a rdfs:Class .), 70 | result: %q(:a a rdfs:Class .) 71 | }, 72 | "prp-spo1" => { 73 | default: %q( 74 | <#me> :name "Gregg Kellogg" . 75 | :name rdfs:subPropertyOf foaf:name . 76 | ), 77 | result: %q( 78 | <#me> :name "Gregg Kellogg"; foaf:name "Gregg Kellogg" . 79 | :name rdfs:subPropertyOf foaf:name . 80 | ) 81 | }, 82 | "prp-eqp1" => { 83 | default: %q( 84 | <#me> :name "Gregg Kellogg" . 85 | :name owl:equivalentProperty foaf:name . 86 | ), 87 | result: %q( 88 | <#me> :name "Gregg Kellogg"; foaf:name "Gregg Kellogg" . 89 | :name owl:equivalentProperty foaf:name . 90 | ) 91 | }, 92 | "prp-eqp2" => { 93 | default: %q( 94 | <#me> foaf:name "Gregg Kellogg" . 95 | :name owl:equivalentProperty foaf:name . 96 | ), 97 | result: %q( 98 | <#me> :name "Gregg Kellogg"; foaf:name "Gregg Kellogg" . 99 | :name owl:equivalentProperty foaf:name . 100 | ) 101 | }, 102 | }.each do |test, elements| 103 | it test do 104 | mt = ExpansionTester.new(test) 105 | result = mt.load(elements) 106 | mt.send(:owl_entailment, mt.repo) 107 | expect(mt.graph).to be_equivalent_graph(result, mt) 108 | end 109 | end 110 | end 111 | 112 | describe :expand do 113 | { 114 | "simple" => { 115 | default: %q( rdfa:usesVocabulary ex: .), 116 | result: %q( rdfa:usesVocabulary ex: .) 117 | }, 118 | "prp-spo1" => { 119 | default: %q( 120 | rdfa:usesVocabulary ex: . 121 | <#me> ex:name "Gregg Kellogg" . 122 | ex:name rdfs:subPropertyOf foaf:name . 123 | ), 124 | result: %q( 125 | rdfa:usesVocabulary ex: . 126 | <#me> ex:name "Gregg Kellogg"; 127 | foaf:name "Gregg Kellogg" . 128 | ex:name rdfs:subPropertyOf foaf:name . 129 | ) 130 | }, 131 | "prp-eqp1" => { 132 | default: %q( 133 | rdfa:usesVocabulary ex: . 134 | <#me> ex:name "Gregg Kellogg" . 135 | ex:name owl:equivalentProperty foaf:name . 136 | ), 137 | result: %q( 138 | rdfa:usesVocabulary ex: . 139 | <#me> ex:name "Gregg Kellogg"; 140 | foaf:name "Gregg Kellogg" . 141 | ex:name owl:equivalentProperty foaf:name . 142 | ) 143 | }, 144 | "prp-eqp2" => { 145 | default: %q( 146 | rdfa:usesVocabulary ex: . 147 | <#me> foaf:name "Gregg Kellogg" . 148 | ex:name owl:equivalentProperty foaf:name . 149 | ), 150 | result: %q( 151 | rdfa:usesVocabulary ex: . 152 | <#me> ex:name "Gregg Kellogg"; 153 | foaf:name "Gregg Kellogg" . 154 | ex:name owl:equivalentProperty foaf:name . 155 | ) 156 | }, 157 | }.each do |test, elements| 158 | it test do 159 | mt = ExpansionTester.new(test) 160 | result = mt.load(elements) 161 | vocab = RDF::URI("http://example.org/vocab#") 162 | graph = RDF::Graph.new 163 | expect(RDF::Graph).to receive(:new).at_least(1).times.and_return(graph) 164 | graph = mt.expand 165 | expect(graph).to be_equivalent_graph(result, mt) 166 | end 167 | end 168 | end 169 | end 170 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RDF::Microdata reader/writer 2 | 3 | [Microdata][] parser for RDF.rb. 4 | 5 | [![Gem Version](https://badge.fury.io/rb/rdf-microdata.svg)](https://badge.fury.io/rb/rdf-microdata) 6 | [![Build Status](https://github.com/ruby-rdf/rdf-microdata/workflows/CI/badge.svg?branch=develop)](https://github.com/ruby-rdf/rdf-microdata/actions?query=workflow%3ACI) 7 | [![Coverage Status](https://coveralls.io/repos/ruby-rdf/rdf-microdata/badge.svg?branch=develop)](https://coveralls.io/github/ruby-rdf/rdf-microdata?branch=develop) 8 | [![Gitter chat](https://badges.gitter.im/ruby-rdf/rdf.png)](https://gitter.im/ruby-rdf/rdf) 9 | 10 | ## DESCRIPTION 11 | RDF::Microdata is a Microdata reader for Ruby using the [RDF.rb][RDF.rb] library suite. 12 | 13 | ## FEATURES 14 | RDF::Microdata parses [Microdata][] into statements or triples using the rules defined in [Microdata RDF][]. 15 | 16 | * Microdata parser. 17 | * Uses Nokogiri for parsing HTML 18 | 19 | Install with 'gem install rdf-microdata' 20 | 21 | ### Living implementation 22 | Microdata to RDF transformation is undergoing active development. This implementation attempts to be up-to-date 23 | as of the time of release, and is being used in developing the [Microdata RDF][] specification. 24 | 25 | This implementation includes support for the proposed [``@itemprop-reverse``](https://www.w3.org/wiki/WebSchemas/InverseProperties#Proposed_Action:_New_attribute_.40itemprop-reverse) attribute. 26 | 27 | ### Microdata Registry 28 | The parser uses a build-in version of the [Microdata RDF][] registry. 29 | 30 | ## Usage 31 | 32 | ### Reading RDF data in the Microdata format 33 | 34 | require 'rdf/microdata' 35 | graph = RDF::Graph.load("etc/doap.html", format: :microdata) 36 | 37 | ### Reading using content-negotation 38 | 39 | require 'rdf/microdata' 40 | graph = RDF::Graph.load("etc/doap.html", content_type: "text/html") 41 | 42 | ## Note 43 | This spec is based on the W3C HTML Data Task Force specification and does not support 44 | GRDDL-type triple generation, such as for html>head>title anchor tags. 45 | 46 | If the `RDFa` parser is available, {RDF::Microdata::Format} will not assert content type `text/html` or file extension `.html`, as this is also asserted by RDFa. Instead, the RDFa reader will invoke the microdata reader if an `@itemscope` attribute is detected. 47 | 48 | ## Dependencies 49 | * [RDF.rb](https://rubygems.org/gems/rdf) (~> 3.3) 50 | * [RDF::RDFa](https://rubygems.org/gems/rdf-xsd) (~> 3.3) 51 | * [RDF::XSD](https://rubygems.org/gems/rdf-xsd) (~> 3.3) 52 | * [HTMLEntities](https://rubygems.org/gems/htmlentities) ('~> 4.3') 53 | * [Nokogiri](https://rubygems.org/gems/nokogiri) (~> 1.15) 54 | 55 | ## Documentation 56 | Full documentation available on [Rubydoc.info][Microdata doc] 57 | 58 | ### Principle Classes 59 | * {RDF::Microdata::Format} 60 | Asserts :html format, text/html mime-type and .html file extension. 61 | * {RDF::Microdata::Reader} 62 | * {RDF::Microdata::Reader::Nokogiri} 63 | 64 | 65 | ### RDFa-based Reader 66 | There is an experimental reader based on transforming Microdata to RDFa within the DOM. To invoke 67 | this, add the `rdfa: true` option to the {RDF::Microdata::Reader.new}, or 68 | use {RDF::Microdata::RdfaReader} directly. 69 | 70 | The reader exposes a `#rdfa` method, which can be used to retrieve the transformed HTML+RDFa 71 | 72 | ## Resources 73 | * [RDF.rb][RDF.rb] 74 | * [Documentation](https://ruby-rdf.github.io/rdf-microdata/) 75 | * [History](file:History.md) 76 | * [Microdata][] 77 | * [Microdata RDF][] 78 | 79 | ## Author 80 | * [Gregg Kellogg](https://github.com/gkellogg) - 81 | 82 | ## Change Log 83 | 84 | See [Release Notes on GitHub](https://github.com/ruby-rdf/rdf-microdata/releases) 85 | 86 | ## Contributing 87 | 88 | * Do your best to adhere to the existing coding conventions and idioms. 89 | * Don't use hard tabs, and don't leave trailing whitespace on any line. 90 | * Do document every method you add using [YARD][] annotations. Read the 91 | [tutorial][YARD-GS] or just look at the existing code for examples. 92 | * Don't touch the `.gemspec`, `VERSION` or `AUTHORS` files. If you need to 93 | change them, do so on your private branch only. 94 | * Do feel free to add yourself to the `CREDITS` file and the corresponding 95 | list in the the `README`. Alphabetical order applies. 96 | * Do note that in order for us to merge any non-trivial changes (as a rule 97 | of thumb, additions larger than about 15 lines of code), we need an 98 | explicit [public domain dedication][PDD] on record from you, 99 | which you will be asked to agree to on the first commit to a repo within the organization. 100 | Note that the agreement applies to all repos in the [Ruby RDF](https://github.com/ruby-rdf/) organization. 101 | 102 | ## License 103 | 104 | This is free and unencumbered public domain software. For more information, 105 | see or the accompanying {file:UNLICENSE} file. 106 | 107 | ## FEEDBACK 108 | 109 | * gregg@greggkellogg.net 110 | * 111 | * 112 | * 113 | 114 | [RDF.rb]: https://github.com/ruby-rdf/rdf 115 | [YARD]: https://yardoc.org/ 116 | [YARD-GS]: https://rubydoc.info/docs/yard/file/docs/GettingStarted.md 117 | [PDD]: https://unlicense.org/#unlicensing-contributions 118 | [Microdata]: https://dev.w3.org/html5/md/Overview.html "HTML Microdata" 119 | [Microdata RDF]: https://dvcs.w3.org/hg/htmldata/raw-file/default/microdata-rdf/index.html "Microdata to RDF" 120 | [Microdata doc]: https://ruby-rdf.github.io/rdf-microdata/frames 121 | -------------------------------------------------------------------------------- /script/parse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'rubygems' 3 | $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), "..", 'lib'))) 4 | require "bundler/setup" 5 | require 'logger' 6 | require 'rdf/microdata' 7 | begin 8 | require 'linkeddata' 9 | rescue LoadError 10 | require 'rdf/turtle' 11 | end 12 | require 'getoptlong' 13 | require 'open-uri' 14 | 15 | def run(input, options) 16 | reader_class = RDF::Reader.for(options[:input_format].to_sym) 17 | raise "Reader not found for #{$input_format}" unless reader_class 18 | 19 | start = Time.new 20 | num = 0 21 | 22 | case options[:output_format] 23 | when :ntriples, :quiet 24 | reader_class.new(input, options).each do |statement| 25 | num += 1 26 | if options[:quiet] 27 | #print "." 28 | else 29 | options[:output].puts statement.to_ntriples 30 | end 31 | end 32 | when :rdfa 33 | xsl = Nokogiri::XSLT(%( 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | ).gsub(/^ /, '')) 79 | reader_class.new(input, options.merge(rdfa: true)) do |reader| 80 | reader.rdfa.xpath("//text()").each do |txt| 81 | txt.content = txt.content.to_s.strip 82 | end 83 | options[:output].puts xsl.apply_to(reader.rdfa).to_s 84 | end 85 | when :inspect 86 | reader_class.new(input, options).each do |statement| 87 | num += 1 88 | options[:output].puts statement.inspect 89 | end 90 | else 91 | r = reader_class.new(input, options) 92 | g = RDF::Graph.new << r 93 | num = g.count 94 | options[:output].puts g.dump(options[:output_format], options.merge(prefixes: r.prefixes)) 95 | end 96 | STDERR.puts 97 | secs = Time.new - start 98 | STDERR.puts "Parsed #{num} statements in #{secs} seconds @ #{num/secs} statements/second." 99 | rescue 100 | fname = input.respond_to?(:path) ? input.path : "-stdin-" 101 | STDERR.puts("Error in #{fname}") 102 | raise 103 | end 104 | 105 | logger = Logger.new(STDERR) 106 | logger.level = Logger::WARN 107 | logger.formatter = lambda {|severity, datetime, progname, msg| "#{severity}: #{msg}\n"} 108 | 109 | options = { 110 | verbose: false, 111 | validate: false, 112 | rdfa: false, 113 | logger: logger, 114 | output: STDOUT, 115 | output_format: :turtle, 116 | input_format: :microdata, 117 | standard_prefixes: true, 118 | base_uri: "http://example.com", 119 | } 120 | input = nil 121 | 122 | opts = GetoptLong.new( 123 | ["--debug", GetoptLong::NO_ARGUMENT], 124 | ["--execute", "-e", GetoptLong::REQUIRED_ARGUMENT], 125 | ["--format", GetoptLong::REQUIRED_ARGUMENT], 126 | ["--input-format", GetoptLong::REQUIRED_ARGUMENT], 127 | ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT], 128 | ["--quiet", GetoptLong::NO_ARGUMENT], 129 | ["--registry", GetoptLong::REQUIRED_ARGUMENT], 130 | ["--template", GetoptLong::REQUIRED_ARGUMENT], 131 | ["--uri", GetoptLong::REQUIRED_ARGUMENT], 132 | ["--validate", GetoptLong::NO_ARGUMENT], 133 | ["--verbose", GetoptLong::NO_ARGUMENT] 134 | ) 135 | opts.each do |opt, arg| 136 | case opt 137 | when '--debug' then logger.level = Logger::DEBUG 138 | when '--execute' then input = arg 139 | when '--format' then options[:output_format] = arg.to_sym 140 | when '--input-format' then options[:input_format] = arg.to_sym 141 | when '--quiet' 142 | options[:quiet] = options[:quiet].to_i + 1 143 | logger.level = Logger::FATAL 144 | when '--output' then options[:output] = File.open(arg, "w") 145 | when '--registry' then options[:registry_uri] = arg 146 | when '--template' then options[:haml] = arg.to_sym 147 | when '--uri' then options[:base_uri] = arg 148 | when '--verbose' then options[:verbose] = true 149 | when '--validate' then options[:validate] = true 150 | end 151 | end 152 | 153 | if ARGV.empty? 154 | s = input ? input : $stdin.read 155 | run(StringIO.new(s), options) 156 | else 157 | ARGV.each do |test_file| 158 | run(Kernel.open(test_file), options) 159 | end 160 | end 161 | -------------------------------------------------------------------------------- /lib/rdf/microdata/format.rb: -------------------------------------------------------------------------------- 1 | # Attempt to load RDF::RDFa first, so that RDF::Format.for(:rdfa) is defined 2 | begin 3 | require 'rdf/rdfa' 4 | rescue LoadError 5 | # Soft error 6 | end 7 | 8 | module RDF::Microdata 9 | ## 10 | # Microdata format specification. 11 | # 12 | # @example Obtaining a Microdata format class 13 | # RDF::Format.for(:microdata) #=> RDF::Microdata::Format 14 | # RDF::Format.for("etc/foaf.html") 15 | # RDF::Format.for(:file_name => "etc/foaf.html") 16 | # RDF::Format.for(file_extension: "html") 17 | # RDF::Format.for(:content_type => "text/html") 18 | # 19 | # @example Obtaining serialization format MIME types 20 | # RDF::Format.content_types #=> {"text/html" => [RDF::Microdata::Format]} 21 | # 22 | # @see https://www.w3.org/TR/rdf-testcases/#ntriples 23 | class Format < RDF::Format 24 | content_encoding 'utf-8' 25 | 26 | # Only define content type if RDFa is not available. 27 | # The Microdata processor will be launched from there 28 | # otherwise. 29 | unless RDF::Format.for(:rdfa) 30 | content_type 'text/html;q=0.5', 31 | extension: :html, 32 | uri: 'http://www.w3.org/ns/formats/microdata' 33 | end 34 | reader { RDF::Microdata::Reader } 35 | 36 | ## 37 | # Sample detection to see if it matches Microdata (not RDF/XML or RDFa) 38 | # 39 | # Use a text sample to detect the format of an input file. Sub-classes implement 40 | # a matcher sufficient to detect probably format matches, including disambiguating 41 | # between other similar formats. 42 | # 43 | # @param [String] sample Beginning several bytes (~ 1K) of input. 44 | # @return [Boolean] 45 | def self.detect(sample) 46 | !!sample.match(/<[^>]*(itemprop|itemtype|itemref|itemscope|itemid)[^>]*>/m) 47 | end 48 | 49 | ## 50 | # Hash of CLI commands appropriate for this format 51 | # @return [Hash{Symbol => Hash}] 52 | def self.cli_commands 53 | { 54 | "to-rdfa": { 55 | description: "Transform HTML+Microdata into HTML+RDFa", 56 | parse: false, 57 | help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa", 58 | filter: { 59 | format: :microdata 60 | }, 61 | option_use: {output_format: :disabled}, 62 | lambda: ->(files, **options) do 63 | out = options[:output] || $stdout 64 | xsl = Nokogiri::XSLT(%( 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | ).gsub(/^ /, '')) 110 | if files.empty? 111 | # If files are empty, either use options[::evaluate] 112 | input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN 113 | input.set_encoding(options.fetch(:encoding, Encoding::UTF_8)) 114 | RDF::Microdata::Reader.new(input, **options.merge(rdfa: true)) do |reader| 115 | reader.rdfa.xpath("//text()").each do |txt| 116 | txt.content = txt.content.to_s.strip 117 | end 118 | out.puts xsl.apply_to(reader.rdfa).to_s 119 | end 120 | else 121 | files.each do |file| 122 | RDF::Microdata::Reader.open(file, **options.merge(rdfa: true)) do |reader| 123 | reader.rdfa.xpath("//text()").each do |txt| 124 | txt.content = txt.content.to_s.strip 125 | end 126 | out.puts xsl.apply_to(reader.rdfa).to_s 127 | end 128 | end 129 | end 130 | end 131 | }, 132 | } 133 | end 134 | end 135 | end 136 | -------------------------------------------------------------------------------- /lib/rdf/microdata/expansion.rb: -------------------------------------------------------------------------------- 1 | module RDF::Microdata 2 | ## 3 | # The Expansion module performs a subset of OWL entailment rules on the base class, 4 | # which implementes RDF::Readable. 5 | module Expansion 6 | ## 7 | # Perform vocabulary expansion on the resulting default graph. 8 | # 9 | # Vocabulary expansion relies on a sub-set of OWL [OWL2-PROFILES](http://www.w3.org/TR/2009/REC-owl2-profiles-20091027/#Reasoning_in_OWL_2_RL_and_RDF_Graphs_using_Rules) entailment to add triples to the default graph based on rules and property/class relationships described in referenced vocabularies. 10 | # 11 | # For all objects that are the target of an rdfa:usesVocabulary property, load the IRI into a repository. 12 | # 13 | # Subsequently, perform OWL expansion using rules prp-spo1, prp-eqp1, and prp-eqp2 placing resulting triples into the default graph. Iterate on this step until no more triples are added. 14 | # 15 | # @example 16 | # scm-spo 17 | # {pq rdfs:subPropertyOf pw . pw rdfs:subPropertyOf p3} 18 | # => {p1 rdfs:subPropertyOf p3} 19 | # 20 | # rdprp-spo1fs7 21 | # {p1 rdfs:subPropertyOf p2 . x p1 y} => {x p2 y} 22 | # 23 | # @return [RDF::Graph] 24 | # @see [OWL2 PROFILES][] 25 | def expand 26 | repo = RDF::Repository.new 27 | repo << self # Add default graph 28 | 29 | log_debug("expand") {"Loaded #{repo.size} triples into default graph"} 30 | 31 | repo = owl_entailment(repo) 32 | 33 | # Return graph with default graph 34 | graph = RDF::Graph.new 35 | repo.statements.each {|st| graph << st} 36 | graph 37 | end 38 | 39 | def rule(name, &block) 40 | Rule.new(name, **@options, &block) 41 | end 42 | 43 | ## 44 | # An entailment rule 45 | # 46 | # Takes a list of antecedent patterns used to find solutions against a queryable 47 | # object. Yields each consequent with bindings from the solution 48 | class Rule 49 | include RDF::Util::Logger 50 | 51 | # @!attribute [r] 52 | # @return [Array] patterns necessary to invoke this rule 53 | attr_reader :antecedents 54 | 55 | # @!attribute [r] consequents 56 | # @return [Array] result of this rule 57 | attr_reader :consequents 58 | 59 | # @!attribute [r] name 60 | # @return [String] Name of this rule 61 | attr_reader :name 62 | 63 | ## 64 | # @example 65 | # r = Rule.new("scm-spo") do 66 | # antecedent :p1, RDF::RDFS.subPropertyOf, :p2 67 | # antecedent :p2, RDF::RDFS.subPropertyOf, :p3 68 | # consequent :p1, RDF::RDFS.subPropertyOf, :p3, "t-box" 69 | # end 70 | # 71 | # r.execute(queryable) {|statement| puts statement.inspect} 72 | # 73 | # @param [String] name 74 | def initialize(name, **options, &block) 75 | @antecedents = [] 76 | @consequents = [] 77 | @options = options.dup 78 | @name = name 79 | 80 | if block_given? 81 | case block.arity 82 | when 1 then block.call(self) 83 | else instance_eval(&block) 84 | end 85 | end 86 | end 87 | 88 | def antecedent(subject, prediate, object) 89 | antecedents << RDF::Query::Pattern.new(subject, prediate, object) 90 | end 91 | 92 | def consequent(subject, prediate, object) 93 | consequents << RDF::Query::Pattern.new(subject, prediate, object) 94 | end 95 | 96 | ## 97 | # Execute the rule against queryable, yielding each consequent with bindings 98 | # 99 | # @param [RDF::Queryable] queryable 100 | # @yield [statement] 101 | # @yieldparam [RDF::Statement] statement 102 | def execute(queryable) 103 | RDF::Query.new(antecedents).execute(queryable).each do |solution| 104 | nodes = {} 105 | consequents.each do |consequent| 106 | terms = {} 107 | [:subject, :predicate, :object].each do |r| 108 | terms[r] = case o = consequent.send(r) 109 | when RDF::Node then nodes[o] ||= RDF::Node.new 110 | when RDF::Query::Variable then solution[o] 111 | else o 112 | end 113 | end 114 | 115 | yield RDF::Statement.from(terms) 116 | end 117 | end 118 | end 119 | end 120 | 121 | private 122 | 123 | RULES = [ 124 | Rule.new("prp-spo1") do 125 | antecedent :p1, RDF::RDFS.subPropertyOf, :p2 126 | antecedent :x, :p1, :y 127 | consequent :x, :p2, :y 128 | end, 129 | Rule.new("prp-eqp1") do 130 | antecedent :p1, RDF::OWL.equivalentProperty, :p2 131 | antecedent :x, :p1, :y 132 | consequent :x, :p2, :y 133 | end, 134 | Rule.new("prp-eqp2") do 135 | antecedent :p1, RDF::OWL.equivalentProperty, :p2 136 | antecedent :x, :p2, :y 137 | consequent :x, :p1, :y 138 | end, 139 | ] 140 | 141 | ## 142 | # Perform OWL entailment rules on enumerable 143 | # @param [RDF::Enumerable] repo 144 | # @return [RDF::Enumerable] 145 | def owl_entailment(repo) 146 | old_count = 0 147 | 148 | while old_count < (count = repo.count) 149 | log_debug("entailment", "old: #{old_count} count: #{count}") 150 | old_count = count 151 | 152 | RULES.each do |rule| 153 | rule.execute(repo) do |statement| 154 | log_debug("entailment(#{rule.name})") {statement.inspect} 155 | repo << statement 156 | end 157 | end 158 | end 159 | 160 | log_debug("entailment", "final count: #{count}") 161 | repo 162 | end 163 | end 164 | end 165 | -------------------------------------------------------------------------------- /spec/suite_helper.rb: -------------------------------------------------------------------------------- 1 | $:.unshift "." 2 | require 'spec_helper' 3 | require 'rdf/turtle' 4 | require 'open-uri' 5 | 6 | # For now, override RDF::Utils::File.open_file to look for the file locally before attempting to retrieve it 7 | module RDF::Util 8 | module File 9 | REMOTE_PATH = "http://w3c.github.io/microdata-rdf/tests/" 10 | LOCAL_PATH = ::File.expand_path("../spec-tests", __FILE__) + '/' 11 | 12 | class << self 13 | alias_method :original_open_file, :open_file 14 | end 15 | 16 | ## 17 | # Override to use Patron for http and https, Kernel.open otherwise. 18 | # 19 | # @param [String] filename_or_url to open 20 | # @param [Hash{Symbol => Object}] options 21 | # @option options [Array, String] :headers 22 | # HTTP Request headers. 23 | # @return [IO] File stream 24 | # @yield [IO] File stream 25 | def self.open_file(filename_or_url, **options, &block) 26 | case 27 | when filename_or_url.to_s =~ /^file:/ 28 | path = filename_or_url[5..-1] 29 | Kernel.open(path.to_s, options, &block) 30 | when (filename_or_url.to_s =~ %r{^#{REMOTE_PATH}} && Dir.exist?(LOCAL_PATH)) 31 | #puts "attempt to open #{filename_or_url} locally" 32 | localpath = filename_or_url.to_s.sub(REMOTE_PATH, LOCAL_PATH) 33 | response = begin 34 | ::File.open(localpath) 35 | rescue Errno::ENOENT => e 36 | raise IOError, e.message 37 | end 38 | document_options = { 39 | base_uri: RDF::URI(filename_or_url), 40 | charset: Encoding::UTF_8, 41 | code: 200, 42 | headers: {} 43 | } 44 | #puts "use #{filename_or_url} locally" 45 | document_options[:headers][:content_type] = case filename_or_url.to_s 46 | when /\.html$/ then 'text/html' 47 | when /\.xhtml$/ then 'application/xhtml+xml' 48 | when /\.xml$/ then 'application/xml' 49 | when /\.svg$/ then 'image/svg+xml' 50 | when /\.ttl$/ then 'text/turtle' 51 | when /\.ttl$/ then 'text/turtle' 52 | when /\.jsonld$/ then 'application/ld+json' 53 | when /\.json$/ then 'application/json' 54 | else 'unknown' 55 | end 56 | 57 | document_options[:headers][:content_type] = response.content_type if response.respond_to?(:content_type) 58 | # For overriding content type from test data 59 | document_options[:headers][:content_type] = options[:contentType] if options[:contentType] 60 | 61 | remote_document = RDF::Util::File::RemoteDocument.new(response.read, **document_options) 62 | if block_given? 63 | yield remote_document 64 | else 65 | remote_document 66 | end 67 | else 68 | original_open_file(filename_or_url, **options, &block) 69 | end 70 | end 71 | end 72 | end 73 | 74 | module JSON::LD 75 | # Simple Ruby reflector class to provide native 76 | # access to JSON-LD objects 77 | class Resource 78 | # Object representation of resource 79 | # @attr [Hash Object] attributes 80 | attr :attributes 81 | 82 | # ID of this resource 83 | # @attr [String] id 84 | attr :id 85 | 86 | # A new resource from the parsed graph 87 | # @param [Hash{String => Object}] node_definition 88 | def initialize(node_definition) 89 | @attributes = node_definition 90 | @attributes.delete('@context') # Don't store with object 91 | @id = @attributes['@id'] 92 | @anon = @id.nil? || @id.to_s[0,2] == '_:' 93 | end 94 | 95 | # Values of all properties other than id and type 96 | def property_values 97 | attributes.dup.delete_if {|k, v| %(id type).include?(k)}.values 98 | end 99 | 100 | # Access individual fields, from subject definition 101 | def property(prop_name); @attributes.fetch(prop_name, nil); end 102 | 103 | # Access individual fields, from subject definition 104 | def method_missing(method, *args) 105 | property(method.to_s) 106 | end 107 | 108 | def inspect 109 | "" 114 | end 115 | end 116 | end 117 | 118 | module Fixtures 119 | module SuiteTest 120 | BASE = RDF::URI("http://w3c.github.io/microdata-rdf/tests/") 121 | class Manifest < JSON::LD::Resource 122 | def self.open(file, &block) 123 | #puts "open: #{file}" 124 | RDF::Util::File.open_file(file) do |f| 125 | json = JSON.parse(f.read) 126 | block.call(self.from_jsonld(json['@graph'].first)) 127 | end 128 | end 129 | 130 | # @param [Hash] json framed JSON-LD 131 | # @return [Array] 132 | def self.from_jsonld(json) 133 | Manifest.new(json) 134 | end 135 | 136 | def entries 137 | # Map entries to resources 138 | attributes['entries'].map {|e| Entry.new(e)} 139 | end 140 | end 141 | 142 | class Entry < JSON::LD::Resource 143 | attr_accessor :logger 144 | 145 | # Alias data and query 146 | def action 147 | BASE.join(property('action')) 148 | end 149 | 150 | def input 151 | RDF::Util::File.open_file(action).read 152 | end 153 | 154 | def registry 155 | reg = property('registry') || 156 | BASE + "test-registry.json" 157 | BASE.join(reg) 158 | end 159 | 160 | def result 161 | BASE.join(property('result')) if property('result') 162 | end 163 | 164 | def expected 165 | RDF::Util::File.open_file(result).read 166 | end 167 | 168 | def positive_test? 169 | !Array(attributes['@type']).join(" ").match(/Negative/) 170 | end 171 | 172 | def negative_test? 173 | !positive_test? 174 | end 175 | 176 | def evaluate? 177 | Array(attributes['@type']).join(" ").include?("Eval") 178 | end 179 | 180 | def syntax? 181 | Array(attributes['@type']).join(" ").include?("Syntax") 182 | end 183 | 184 | def inspect 185 | super.sub('>', "\n" + 186 | " positive?: #{positive_test?.inspect}\n" + 187 | ">" 188 | ) 189 | end 190 | 191 | def trace; @debug.join("\n"); end 192 | end 193 | end 194 | end 195 | -------------------------------------------------------------------------------- /lib/rdf/microdata/reader/nokogiri.rb: -------------------------------------------------------------------------------- 1 | module RDF::Microdata 2 | class Reader < RDF::Reader 3 | ## 4 | # Nokogiri implementation of an HTML parser. 5 | # 6 | # @see https://nokogiri.org/ 7 | module Nokogiri 8 | ## 9 | # Returns the name of the underlying XML library. 10 | # 11 | # @return [Symbol] 12 | def self.library 13 | :nokogiri 14 | end 15 | 16 | # Proxy class to implement uniform element accessors 17 | class NodeProxy 18 | attr_reader :node 19 | attr_reader :parent 20 | 21 | def initialize(node, parent = nil) 22 | @node = node 23 | @parent = parent 24 | end 25 | 26 | ## 27 | # Element language 28 | # 29 | # From HTML5 3.2.3.3 30 | # If both the lang attribute in no namespace and the lang attribute in the XML namespace are set 31 | # on an element, user agents must use the lang attribute in the XML namespace, and the lang 32 | # attribute in no namespace must be ignored for the purposes of determining the element's 33 | # language. 34 | # 35 | # @return [String] 36 | def language 37 | language = case 38 | when @node.document.is_a?(::Nokogiri::XML::Document) && @node.attributes["xml:lang"] 39 | @node.attributes["xml:lang"].to_s 40 | when @node.document.is_a?(::Nokogiri::XML::Document) && @node.attributes["lang"] 41 | @node.attributes["lang"].to_s 42 | when @node.attribute("lang") 43 | @node.attribute("lang").to_s 44 | else 45 | parent && parent.element? && parent.language 46 | end 47 | end 48 | 49 | ## 50 | # Get any xml:base in effect for this element 51 | def base 52 | if @base.nil? 53 | @base = attributes['xml:base'] || 54 | (parent && parent.element? && parent.base) || 55 | false 56 | end 57 | 58 | @base == false ? nil : @base 59 | end 60 | 61 | def display_path 62 | @display_path ||= begin 63 | path = [] 64 | path << parent.display_path if parent 65 | path << @node.name 66 | case @node 67 | when ::Nokogiri::XML::Element then path.join("/") 68 | when ::Nokogiri::XML::Attr then path.join("@") 69 | else path.join("?") 70 | end 71 | end 72 | end 73 | 74 | ## 75 | # Return true of all child elements are text 76 | # 77 | # @return [Array<:text, :element, :attribute>] 78 | def text_content? 79 | @node.children.all? {|c| c.text?} 80 | end 81 | 82 | ## 83 | # Retrieve XMLNS definitions for this element 84 | # 85 | # @return [Hash{String => String}] 86 | def namespaces 87 | @node.namespace_definitions.inject({}) {|memo, ns| memo[ns.prefix] = ns.href.to_s; memo } 88 | end 89 | 90 | ## 91 | # Children of this node 92 | # 93 | # @return [NodeSetProxy] 94 | def children 95 | NodeSetProxy.new(@node.children, self) 96 | end 97 | 98 | ## 99 | # Elements of this node 100 | # 101 | # @return [NodeSetProxy] 102 | def elements 103 | NodeSetProxy.new(@node.elements, self) 104 | end 105 | 106 | ## 107 | # Rational debug output 108 | def to_str 109 | @node.path 110 | end 111 | 112 | ## 113 | # Proxy for everything else to @node 114 | def method_missing(method, *args) 115 | @node.send(method, *args) 116 | end 117 | end 118 | 119 | ## 120 | # NodeSet proxy 121 | class NodeSetProxy 122 | attr_reader :node_set 123 | attr_reader :parent 124 | 125 | def initialize(node_set, parent) 126 | @node_set = node_set 127 | @parent = parent 128 | end 129 | 130 | ## 131 | # Return a proxy for each child 132 | # 133 | # @yield child 134 | # @yieldparam [NodeProxy] child 135 | def each 136 | @node_set.each do |c| 137 | yield NodeProxy.new(c, parent) 138 | end 139 | end 140 | 141 | ## 142 | # Return proxy for first element and remove it 143 | # @return [NodeProxy] 144 | def shift 145 | (e = node_set.shift) && NodeProxy.new(e, parent) 146 | end 147 | 148 | ## 149 | # Add NodeSetProxys 150 | # @param [NodeSetProxy, Nokogiri::XML::Node] other 151 | # @return [NodeSetProxy] 152 | def +(other) 153 | NodeSetProxy.new(self.node_set + other.node_set, parent) 154 | end 155 | 156 | ## 157 | # Add a NodeProxy 158 | # @param [NodeProxy, Nokogiri::XML::Node] elem 159 | # @return [NodeSetProxy] 160 | def <<(elem) 161 | node_set << (elem.is_a?(NodeProxy) ? elem.node : elem) 162 | self 163 | end 164 | 165 | def inspect 166 | @node_set.map {|c| NodeProxy.new(c, parent).display_path}.inspect 167 | end 168 | 169 | ## 170 | # Proxy for everything else to @node_set 171 | def method_missing(method, *args) 172 | @node_set.send(method, *args) 173 | end 174 | end 175 | 176 | ## 177 | # Initializes the underlying XML library. 178 | # 179 | # @param [Hash{Symbol => Object}] options 180 | # @return [void] 181 | def initialize_html(input, **options) 182 | require 'nokogiri' unless defined?(::Nokogiri) 183 | @doc = case input 184 | when ::Nokogiri::XML::Document 185 | input 186 | else 187 | # Try to detect charset from input 188 | options[:encoding] ||= input.charset if input.respond_to?(:charset) 189 | 190 | # Otherwise, default is utf-8 191 | options[:encoding] ||= 'utf-8' 192 | options[:encoding] = options[:encoding].to_s if options[:encoding] 193 | 194 | begin 195 | input = input.read if input.respond_to?(:read) 196 | ::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000) 197 | rescue LoadError, NoMethodError 198 | ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding]) 199 | end 200 | end 201 | end 202 | 203 | # Accessor methods to mask native elements & attributes 204 | 205 | ## 206 | # Return proxy for document root 207 | def root 208 | @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root 209 | end 210 | 211 | ## 212 | # Document errors 213 | def doc_errors 214 | @doc.errors.reject do |e| 215 | e.to_s =~ %r{(The doctype must be the first token in the document)|(Expected a doctype token)|(Unexpected '\?' where start tag name is expected)} 216 | end 217 | end 218 | 219 | ## 220 | # Find value of document base 221 | # 222 | # @param [String] base Existing base from URI or :base_uri 223 | # @return [String] 224 | def doc_base(base) 225 | # find if the document has a base element 226 | base_el = @doc.at_css("html>head>base") 227 | base = base_el.attribute("href").to_s.split("#").first if base_el 228 | base 229 | end 230 | 231 | ## 232 | # Based on Microdata element.getItems 233 | # 234 | # @see https://www.w3.org/TR/2011/WD-microdata-20110525/#top-level-microdata-items 235 | def getItems 236 | @doc.css('[itemscope]').select {|el| !el.has_attribute?('itemprop')}.map {|n| NodeProxy.new(n)} 237 | end 238 | 239 | ## 240 | # Look up an element in the document by id 241 | def find_element_by_id(id) 242 | (e = @doc.at_css("##{id}")) && NodeProxy.new(e) 243 | end 244 | end 245 | end 246 | end 247 | -------------------------------------------------------------------------------- /lib/rdf/microdata/reader.rb: -------------------------------------------------------------------------------- 1 | require 'nokogiri' 2 | require 'rdf/xsd' 3 | require 'json' 4 | 5 | module RDF::Microdata 6 | ## 7 | # An Microdata parser in Ruby 8 | # 9 | # Based on processing rules, amended with the following: 10 | # 11 | # @see https://dvcs.w3.org/hg/htmldata/raw-file/0d6b89f5befb/microdata-rdf/index.html 12 | # @author [Gregg Kellogg](https://greggkellogg.net/) 13 | class Reader < RDF::Reader 14 | format Format 15 | include Expansion 16 | include RDF::Util::Logger 17 | URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video) 18 | 19 | # @private 20 | class CrawlFailure < StandardError; end 21 | 22 | # @return [Module] Returns the HTML implementation module for this reader instance. 23 | attr_reader :implementation 24 | 25 | # @return [Hash{Object => RDF::Resource}] maps RDF elements (items) to resources 26 | attr_reader :memory 27 | 28 | ## 29 | # Returns the base URI determined by this reader. 30 | # 31 | # @example 32 | # reader.prefixes[:dc] #=> RDF::URI('http://purl.org/dc/terms/') 33 | # 34 | # @return [Hash{Symbol => RDF::URI}] 35 | # @since 0.3.0 36 | def base_uri 37 | @options[:base_uri] 38 | end 39 | 40 | ## 41 | # Reader options 42 | # @see https://ruby-rdf.github.io/rdf/RDF/Reader#options-class_method 43 | def self.options 44 | super + [ 45 | RDF::CLI::Option.new( 46 | symbol: :rdfa, 47 | datatype: TrueClass, 48 | on: ["--rdfa"], 49 | description: "Transform and parse as RDFa.") {true}, 50 | ] 51 | end 52 | 53 | ## 54 | # Redirect for RDFa Reader given `:rdfa` option 55 | # 56 | # @private 57 | def self.new(input = nil, **options, &block) 58 | klass = if options[:rdfa] 59 | # Requires rdf-rdfa gem to be loaded 60 | begin 61 | require 'rdf/rdfa' 62 | rescue LoadError 63 | raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem" 64 | end 65 | RdfaReader 66 | else 67 | self 68 | end 69 | reader = klass.allocate 70 | reader.send(:initialize, input, **options, &block) 71 | reader 72 | end 73 | 74 | ## 75 | # Initializes the Microdata reader instance. 76 | # 77 | # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, IO, File, String] input 78 | # the input stream to read 79 | # @param [Hash{Symbol => Object}] options 80 | # any additional options 81 | # @option options [Encoding] :encoding (Encoding::UTF_8) 82 | # the encoding of the input stream (Ruby 1.9+) 83 | # @option options [Boolean] :validate (false) 84 | # whether to validate the parsed statements and values 85 | # @option options [Boolean] :canonicalize (false) 86 | # whether to canonicalize parsed literals 87 | # @option options [Boolean] :intern (true) 88 | # whether to intern all parsed URIs 89 | # @option options [#to_s] :base_uri (nil) 90 | # the base URI to use when resolving relative URIs 91 | # @option options [#to_s] :registry 92 | # @return [reader] 93 | # @yield [reader] `self` 94 | # @yieldparam [RDF::Reader] reader 95 | # @yieldreturn [void] ignored 96 | # @raise [Error] Raises `RDF::ReaderError` when validating 97 | def initialize(input = $stdin, **options, &block) 98 | super do 99 | @library = :nokogiri 100 | 101 | require "rdf/microdata/reader/#{@library}" 102 | @implementation = Nokogiri 103 | self.extend(@implementation) 104 | 105 | input.rewind if input.respond_to?(:rewind) 106 | initialize_html(input, **options) rescue log_fatal($!.message, exception: RDF::ReaderError) 107 | 108 | log_error("Empty document") if root.nil? 109 | log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty? 110 | 111 | log_debug('', "library = #{@library}") 112 | 113 | # Load registry 114 | begin 115 | registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY 116 | log_debug('', "registry = #{registry_uri.inspect}") 117 | Registry.load_registry(registry_uri) 118 | rescue JSON::ParserError => e 119 | log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?) 120 | end 121 | 122 | if block_given? 123 | case block.arity 124 | when 0 then instance_eval(&block) 125 | else block.call(self) 126 | end 127 | end 128 | end 129 | end 130 | 131 | ## 132 | # Iterates the given block for each RDF statement in the input. 133 | # 134 | # Reads to graph and performs expansion if required. 135 | # 136 | # @yield [statement] 137 | # @yieldparam [RDF::Statement] statement 138 | # @return [void] 139 | def each_statement(&block) 140 | if block_given? 141 | @callback = block 142 | 143 | # parse 144 | parse_whole_document(@doc, base_uri) 145 | 146 | if validate? && log_statistics[:error] 147 | raise RDF::ReaderError, "Errors found during processing" 148 | end 149 | end 150 | enum_for(:each_statement) 151 | end 152 | 153 | ## 154 | # Iterates the given block for each RDF triple in the input. 155 | # 156 | # @yield [subject, predicate, object] 157 | # @yieldparam [RDF::Resource] subject 158 | # @yieldparam [RDF::URI] predicate 159 | # @yieldparam [RDF::Value] object 160 | # @return [void] 161 | def each_triple(&block) 162 | if block_given? 163 | each_statement do |statement| 164 | block.call(*statement.to_triple) 165 | end 166 | end 167 | enum_for(:each_triple) 168 | end 169 | 170 | private 171 | 172 | # Keep track of allocated BNodes 173 | def bnode(value = nil) 174 | @bnode_cache ||= {} 175 | @bnode_cache[value.to_s] ||= RDF::Node.new(value) 176 | end 177 | 178 | # Figure out the document path, if it is an Element or Attribute 179 | def node_path(node) 180 | "<#{base_uri}>#{node.respond_to?(:display_path) ? node.display_path : node}" 181 | end 182 | 183 | ## 184 | # add a statement, object can be literal or URI or bnode 185 | # 186 | # @param [Nokogiri::XML::Node, any] node XML Node or string for showing context 187 | # 188 | # @param [URI, BNode] subject the subject of the statement 189 | # @param [URI] predicate the predicate of the statement 190 | # @param [URI, BNode, Literal] object the object of the statement 191 | # @return [Statement] Added statement 192 | # @raise [ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_. 193 | def add_triple(node, subject, predicate, object) 194 | statement = RDF::Statement.new(subject, predicate, object) 195 | log_error "#{statement.inspect} is invalid" if statement.invalid? 196 | log_debug(node) {"statement: #{RDF::NTriples.serialize(statement)}"} 197 | @callback.call(statement) 198 | end 199 | 200 | # Parsing a Microdata document (this is *not* the recursive method) 201 | def parse_whole_document(doc, base) 202 | base = doc_base(base) 203 | @memory = {} 204 | options[:base_uri] = if (base) 205 | # Strip any fragment from base 206 | base = base.to_s.split('#').first 207 | base = uri(base) 208 | else 209 | base = RDF::URI("") 210 | end 211 | 212 | log_info(nil) {"parse_whole_doc: base='#{base}'"} 213 | 214 | # 1) For each element that is also a top-level item, Generate the triples for that item using the evaluation context. 215 | getItems.each do |el| 216 | log_depth {generate_triples(el, Registry.new(nil))} 217 | end 218 | 219 | log_info(doc, "parse_whole_doc: traversal complete") 220 | end 221 | 222 | ## 223 | # Generate triples for an item 224 | # 225 | # @param [RDF::Resource] item 226 | # @param [Registry] vocab 227 | # @option ec [Hash{Nokogiri::XML::Element} => RDF::Resource] memory 228 | # @option ec [RDF::Resource] :current_vocabulary 229 | # @return [RDF::Resource] 230 | def generate_triples(item, vocab) 231 | # 1) If there is an entry for item in memory, then let subject be the subject of that entry. Otherwise, if item has a global identifier and that global identifier is an absolute URL, let subject be that global identifier. Otherwise, let subject be a new blank node. 232 | subject = if memory.include?(item.node) 233 | memory[item.node][:subject] 234 | elsif item.has_attribute?('itemid') 235 | uri(item.attribute('itemid'), item.base || base_uri) 236 | end || RDF::Node.new 237 | memory[item.node] ||= {} 238 | 239 | log_debug(item) {"gentrips(2): subject=#{subject.inspect}, vocab: #{vocab.inspect}"} 240 | 241 | # 2) Add a mapping from item to subject in memory, if there isn't one already. 242 | memory[item.node][:subject] ||= subject 243 | 244 | # 3) For each type returned from element.itemType of the element defining the item. 245 | # 4) Set vocab to the first value returned from element.itemType of the element defining the item. 246 | type = nil 247 | item.attribute('itemtype').to_s.split(' ').map{|n| uri(n)}.select(&:absolute?).each do |t| 248 | # 3.1. If type is an absolute URL, generate the following triple: 249 | type ||= t 250 | add_triple(item, subject, RDF.type, t) 251 | end 252 | 253 | # 6) If the registry contains a URI prefix that is a character for character match of vocab up to the length of the URI prefix, set vocab as that URI prefix. 254 | if type || vocab.nil? 255 | vocab = Registry.find(type) || begin 256 | type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless type.nil? 257 | log_debug(item) {"gentrips(7): type_vocab=#{type_vocab.inspect}"} 258 | Registry.new(type_vocab) 259 | end 260 | end 261 | 262 | # Otherwise, use vocab from evaluation context 263 | log_debug(item) {"gentrips(8): vocab: #{vocab.inspect}"} 264 | 265 | # 9. For each element _element_ that has one or more property names and is one of the properties of the item _item_, run the following substep: 266 | props = item_properties(item) 267 | # 9.1. For each name name in element's property names, run the following substeps: 268 | props.each do |element| 269 | element.attribute('itemprop').to_s.split(' ').compact.each do |name| 270 | log_debug(item) {"gentrips(9.1): name=#{name.inspect}, vocab=#{vocab.inspect}"} 271 | # 9.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate. 272 | predicate = vocab.predicateURI(name, base_uri) 273 | 274 | # 9.1.3) Let value be the property value of element. 275 | value = property_value(element) 276 | log_debug(item) {"gentrips(9.1.3) value=#{value.inspect}"} 277 | 278 | # 9.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps. 279 | if value.is_a?(Hash) 280 | value = generate_triples(element, vocab) 281 | log_debug(item) {"gentrips(9.1.4): value=#{value.inspect}"} 282 | end 283 | 284 | # 9.1.4) Generate the following triple: 285 | add_triple(item, subject, predicate, value) 286 | 287 | # 9.1.5) If an entry exists in the registry for name in the vocabulary associated with vocab having the key subPropertyOf or equivalentProperty 288 | vocab.expand(predicate) do |equiv| 289 | log_debug(item) {"gentrips(9.1.5): equiv=#{equiv.inspect}"} 290 | # for each such value equiv, generate the following triple 291 | add_triple(item, subject, equiv, value) 292 | end 293 | end 294 | end 295 | 296 | # 10. For each element element that has one or more reverse property names and is one of the reverse properties of the item item, run the following substep: 297 | props = item_properties(item, true) 298 | # 10.1. For each name name in element's reverse property names, run the following substeps: 299 | props.each do |element| 300 | element.attribute('itemprop-reverse').to_s.split(' ').compact.each do |name| 301 | log_debug(item) {"gentrips(10.1): name=#{name.inspect}"} 302 | 303 | # 10.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate. 304 | predicate = vocab.predicateURI(name, base_uri) 305 | 306 | # 10.1.3) Let value be the property value of element. 307 | value = property_value(element) 308 | log_debug(item) {"gentrips(10.1.3) value=#{value.inspect}"} 309 | 310 | # 10.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps. 311 | if value.is_a?(Hash) 312 | value = generate_triples(element, vocab) 313 | log_debug(item) {"gentrips(10.1.4): value=#{value.inspect}"} 314 | elsif value.is_a?(RDF::Literal) 315 | # 10.1.5) Otherwise, if value is a literal, ignore the value and continue to the next name; it is an error for the value of @itemprop-reverse to be a literal 316 | log_error(element, "Value of @itemprop-reverse may not be a literal: #{value.inspect}") 317 | next 318 | end 319 | 320 | # 10.1.6) Generate the following triple 321 | add_triple(item, value, predicate, subject) 322 | end 323 | end 324 | 325 | # 11) Return subject 326 | subject 327 | end 328 | 329 | ## 330 | # To find the properties of an item defined by the element root, the user agent must try to crawl the properties of the element root, with an empty list as the value of memory: if this fails, then the properties of the item defined by the element root is an empty list; otherwise, it is the returned list. 331 | # 332 | # @param [Nokogiri::XML::Element] item 333 | # @param [Boolean] reverse (false) return reverse properties 334 | # @return [Array] 335 | # List of property elements for an item 336 | def item_properties(item, reverse = false) 337 | log_debug(item, "item_properties (#{reverse.inspect})") 338 | crawl_properties(item, [], reverse) 339 | rescue CrawlFailure => e 340 | log_error(item, e.message) 341 | return [] 342 | end 343 | 344 | ## 345 | # To crawl the properties of an element root with a list memory, the user agent must run the following steps. These steps either fail or return a list with a count of errors. The count of errors is used as part of the authoring conformance criteria below. 346 | # 347 | # @param [Nokogiri::XML::Element] root 348 | # @param [Array] memo 349 | # @param [Boolean] reverse crawl reverse properties 350 | # @return [Array] 351 | # Resultant elements 352 | def crawl_properties(root, memo, reverse) 353 | # 1. If root is in memo, then the algorithm fails; abort these steps. 354 | raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memo.include?(root) 355 | 356 | # 2. Collect all the elements in the item root; let results be the resulting list of elements, and errors be the resulting count of errors. 357 | results = elements_in_item(root) 358 | log_debug(root) {"crawl_properties reverse=#{reverse.inspect} results=#{results.map {|e| node_path(e)}.inspect}"} 359 | 360 | # 3. Remove any elements from results that do not have an @itemprop (@itemprop-reverse) attribute specified. 361 | results = results.select {|e| e.has_attribute?(reverse ? 'itemprop-reverse' : 'itemprop')} 362 | 363 | # 4. Let new memo be a new list consisting of the old list memo with the addition of root. 364 | raise CrawlFailure, "itemref recursion" if memo.detect {|n| root.node.object_id == n.node.object_id} 365 | new_memo = memo + [root] 366 | 367 | # 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memo as the memo. 368 | results.select {|e| e.has_attribute?('itemscope')}.each do |element| 369 | log_depth {crawl_properties(element, new_memo, reverse)} 370 | end 371 | 372 | results 373 | end 374 | 375 | ## 376 | # To collect all the elements in the item root, the user agent must run these steps. They return a list of elements. 377 | # 378 | # @param [Nokogiri::XML::Element] root 379 | # @return [Array] 380 | # Resultant elements and error count 381 | # @raise [CrawlFailure] on element recursion 382 | def elements_in_item(root) 383 | # Let results and pending be empty lists of elements. 384 | # Let errors be zero. 385 | results, memo, errors = [], [], 0 386 | 387 | # Add all the children elements of root to pending. 388 | pending = root.elements 389 | 390 | # If root has an itemref attribute, split the value of that itemref attribute on spaces. 391 | # For each resulting token ID, 392 | root.attribute('itemref').to_s.split(' ').each do |id| 393 | log_debug(root) {"elements_in_item itemref id #{id}"} 394 | # if there is an element in the home subtree of root with the ID ID, 395 | # then add the first such element to pending. 396 | id_elem = find_element_by_id(id) 397 | pending << id_elem if id_elem 398 | end 399 | log_debug(root) {"elements_in_item pending #{pending.inspect}"} 400 | 401 | # Loop: Remove an element from pending and let current be that element. 402 | while current = pending.shift 403 | if memo.include?(current) 404 | raise CrawlFailure, "elements_in_item: results already includes #{current.inspect}" 405 | elsif !current.has_attribute?('itemscope') 406 | # If current is not already in results and current does not have an itemscope attribute, then: add all the child elements of current to pending. 407 | pending += current.elements 408 | end 409 | memo << current 410 | 411 | # If current is not already in results, then: add current to results. 412 | results << current unless results.include?(current) 413 | end 414 | 415 | results 416 | end 417 | 418 | ## 419 | # 420 | def property_value(element) 421 | base = element.base || base_uri 422 | log_debug(element) {"property_value(#{element.name}): base #{base.inspect}"} 423 | value = case 424 | when element.has_attribute?('itemscope') 425 | {} 426 | when element.has_attribute?('content') 427 | RDF::Literal.new(element.attribute('content').to_s, language: element.language) 428 | when %w(data meter).include?(element.name) && element.attribute('value') 429 | # Lexically scan value and assign appropriate type, otherwise, leave untyped 430 | v = element.attribute('value').to_s 431 | datatype = %w(Integer Float Double).map {|t| RDF::Literal.const_get(t)}.detect do |dt| 432 | v.match(dt::GRAMMAR) 433 | end || RDF::Literal 434 | datatype = RDF::Literal::Double if datatype == RDF::Literal::Float 435 | datatype.new(v) 436 | when %w(audio embed iframe img source track video).include?(element.name) 437 | uri(element.attribute('src'), base) 438 | when %w(a area link).include?(element.name) 439 | uri(element.attribute('href'), base) 440 | when %w(object).include?(element.name) 441 | uri(element.attribute('data'), base) 442 | when %w(time).include?(element.name) 443 | # Lexically scan value and assign appropriate type, otherwise, leave untyped 444 | v = (element.attribute('datetime') || element.text).to_s 445 | datatype = %w(Date Time DateTime Duration).map {|t| RDF::Literal.const_get(t)}.detect do |dt| 446 | v.match(dt::GRAMMAR) 447 | end || RDF::Literal 448 | datatype.new(v, language: element.language) 449 | else 450 | RDF::Literal.new(element.inner_text, language: element.language) 451 | end 452 | log_debug(element) {" #{value.inspect}"} 453 | value 454 | end 455 | 456 | # Fixme, what about xml:base relative to element? 457 | def uri(value, base = nil) 458 | value = if base 459 | base = uri(base) unless base.is_a?(RDF::URI) 460 | base.join(value.to_s) 461 | else 462 | RDF::URI(value.to_s) 463 | end 464 | value.validate! if validate? 465 | value.canonicalize! if canonicalize? 466 | value = RDF::URI.intern(value) if intern? 467 | value 468 | end 469 | end 470 | end -------------------------------------------------------------------------------- /spec/reader_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'rdf/spec/reader' 5 | 6 | describe "RDF::Microdata::Reader" do 7 | let!(:doap) {File.expand_path("../../etc/doap.html", __FILE__)} 8 | let!(:doap_nt) {File.expand_path("../../etc/doap.nt", __FILE__)} 9 | let!(:doap_count) {File.open(doap_nt).each_line.to_a.length} 10 | let!(:registry_path) {File.expand_path("../test-files/test-registry.json", __FILE__)} 11 | 12 | it_behaves_like 'an RDF::Reader' do 13 | let(:reader_input) {File.read(doap)} 14 | let(:reader) {RDF::Microdata::Reader.new(reader_input)} 15 | let(:reader_count) {File.open(doap_nt).each_line.to_a.length} 16 | let(:reader_invalid_input) {""} 17 | end 18 | 19 | describe ".for" do 20 | [ 21 | :microdata, 22 | 'etc/doap.html', 23 | {:file_name => 'etc/doap.html'}, 24 | {file_extension: 'html'}, 25 | {:content_type => 'text/html'}, 26 | ].each do |arg| 27 | it "discovers with #{arg.inspect}" do 28 | expect([RDF::Microdata::Reader, RDF::RDFa::Reader]).to include RDF::Reader.for(arg) 29 | end 30 | end 31 | end 32 | 33 | describe :interface do 34 | before(:each) do 35 | @sampledoc = %( 36 |
37 |

My name is Elizabeth.

38 |
39 | ) 40 | end 41 | 42 | it "should yield reader" do 43 | inner = double("inner") 44 | expect(inner).to receive(:called).with(RDF::Microdata::Reader) 45 | RDF::Microdata::Reader.new(@sampledoc) do |reader| 46 | inner.called(reader.class) 47 | end 48 | end 49 | 50 | it "should return reader" do 51 | expect(RDF::Microdata::Reader.new(@sampledoc)).to be_a(RDF::Microdata::Reader) 52 | end 53 | 54 | it "should yield statements" do 55 | inner = double("inner") 56 | expect(inner).to receive(:called).at_least(1).times.with(RDF::Statement) 57 | RDF::Microdata::Reader.new(@sampledoc).each_statement do |statement| 58 | inner.called(statement.class) 59 | end 60 | end 61 | 62 | it "should yield triples" do 63 | inner = double("inner") 64 | expect(inner).to receive(:called).at_least(1).times 65 | RDF::Microdata::Reader.new(@sampledoc).each_triple do |subject, predicate, object| 66 | inner.called(subject.class, predicate.class, object.class) 67 | end 68 | end 69 | end 70 | 71 | context :parsing do 72 | before :each do 73 | @md_ctx = %q( 74 |
75 | %s 76 |
77 | ) 78 | @nt_ctx = %q( 79 | _:a . 80 | %s 81 | ) 82 | end 83 | 84 | it "parses a simple graph" do 85 | md = %q(

My name is Gregg Kellogg.

) 86 | nt = %q(_:a "Gregg Kellogg" .) 87 | expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 88 | end 89 | 90 | context "values" do 91 | [ 92 | [ 93 | %q(

My name is Gregg Kellogg

), 94 | %q(_:a "Gregg Kellogg" .) 95 | ], 96 | [ 97 | %q( 98 |

My name is Gregg

99 |

My name is Kellogg

100 | ), 101 | %q(_:a "Gregg", "Kellogg" .) 102 | ], 103 | [ 104 | %q(

My name is Gregg Kellogg

), 105 | %q( 106 | _:a "Gregg Kellogg" . 107 | _:a "Gregg Kellogg" . 108 | ) 109 | ], 110 | [ 111 | %q(

My name is Gregg Kellogg

), 112 | %q(_:a "Gregg Kellogg" .) 113 | ], 114 | [ 115 | %q(), 116 | %q(_:a "foo" .) 117 | ], 118 | [ 119 | %q(Bar), 120 | %q(_:a "foo" .) 121 | ], 122 | [ 123 | %q(), 124 | %q(_:a .) 125 | ], 126 | [ 127 | %q(), 128 | %q(_:a .) 129 | ], 130 | [ 131 | %q(), 132 | %q(_:a .) 133 | ], 134 | [ 135 | %q(), 136 | %q(_:a .) 137 | ], 138 | [ 139 | %q(), 140 | %q(_:a .) 141 | ], 142 | [ 143 | %q(), 144 | %q(_:a .) 145 | ], 146 | [ 147 | %q(), 148 | %q(_:a .) 149 | ], 150 | [ 151 | %q(
), 152 | %q(_:a .) 153 | ], 154 | [ 155 | %q(), 156 | %q(_:a .) 157 | ], 158 | [ 159 | %q(), 160 | %q(_:a .) 161 | ], 162 | [ 163 | %q(), 164 | %q(_:a .) 165 | ], 166 | [ 167 | %q(), 168 | %q(_:a "2011-06-28Z"^^ .) 169 | ], 170 | [ 171 | %q(), 172 | %q(_:a "00:00:00Z"^^ .) 173 | ], 174 | [ 175 | %q(), 176 | %q(_:a "2011-06-28T00:00:00Z"^^ .) 177 | ], 178 | [ 179 | %q(), 180 | %q(_:a "P2011Y06M28DT00H00M00S"^^ .) 181 | ], 182 | [ 183 | %q(), 184 | %q(_:a "foo" .) 185 | ], 186 | [ 187 | %q(), 188 | %q(_:a _:b .) 189 | ], 190 | [ 191 | %q(), 192 | %q(_:a "1"^^ .) 193 | ], 194 | [ 195 | %q(), 196 | %q(_:a "1.1"^^ .) 197 | ], 198 | [ 199 | %q(), 200 | %q(_:a "1.1e1"^^ .) 201 | ], 202 | [ 203 | %q(), 204 | %q(_:a "foo" .) 205 | ], 206 | [ 207 | %q(), 208 | %q(_:a "foo" .) 209 | ], 210 | [ 211 | %q(), 212 | %q(_:a "1"^^ .) 213 | ], 214 | [ 215 | %q(), 216 | %q(_:a "1.1"^^ .) 217 | ], 218 | [ 219 | %q(), 220 | %q(_:a "1.1e1"^^ .) 221 | ], 222 | [ 223 | %q(), 224 | %q(_:a "foo" .) 225 | ], 226 | [ 227 | %q(), 228 | %q(_:a "foo" .) 229 | ], 230 | ].each do |(md, nt)| 231 | it "parses #{md}" do 232 | expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 233 | end 234 | end 235 | end 236 | 237 | context "base_uri" do 238 | before :each do 239 | @nt_ctx = %q( 240 | _:a . 241 | %s 242 | ) 243 | end 244 | 245 | [ 246 | [ 247 | %q(), 248 | %q(_:a .) 249 | ], 250 | [ 251 | %q(), 252 | %q(_:a .) 253 | ], 254 | [ 255 | %q(), 256 | %q(_:a .) 257 | ], 258 | [ 259 | %q(), 260 | %q(_:a .) 261 | ], 262 | [ 263 | %q(), 264 | %q(_:a .) 265 | ], 266 | [ 267 | %q(), 268 | %q(_:a .) 269 | ], 270 | [ 271 | %q(), 272 | %q(_:a .) 273 | ], 274 | [ 275 | %q(), 276 | %q(_:a .) 277 | ], 278 | [ 279 | %q(), 280 | %q(_:a .) 281 | ], 282 | [ 283 | %q(), 284 | %q(_:a .) 285 | ], 286 | [ 287 | %q(Stéphane Corlosquet), 288 | %q(_:a .) 289 | ], 290 | ].each do |(md, nt)| 291 | it "parses #{md}" do 292 | expect(parse(@md_ctx % md, base_uri: 'http://example.com/')).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 293 | end 294 | end 295 | end 296 | 297 | context "itemid" do 298 | before :each do 299 | @md_ctx = %q( 300 |
301 | %s 302 |
303 | ) 304 | @nt_ctx = %q( 305 | . 306 | %s 307 | ) 308 | end 309 | 310 | [ 311 | [ 312 | %q(

My name is Gregg Kellogg

), 313 | %q( "Gregg Kellogg" .) 314 | ], 315 | [ 316 | %q(), 317 | %q( "foo" .) 318 | ], 319 | [ 320 | %q(), 321 | %q( .) 322 | ], 323 | [ 324 | %q(), 325 | %q( .) 326 | ], 327 | [ 328 | %q(), 329 | %q( .) 330 | ], 331 | [ 332 | %q(), 333 | %q( .) 334 | ], 335 | [ 336 | %q(), 337 | %q( .) 338 | ], 339 | [ 340 | %q(), 341 | %q( .) 342 | ], 343 | [ 344 | %q(), 345 | %q( .) 346 | ], 347 | [ 348 | %q(), 349 | %q( .) 350 | ], 351 | [ 352 | %q(), 353 | %q( .) 354 | ], 355 | [ 356 | %q(), 357 | %q( .) 358 | ], 359 | [ 360 | %q(), 361 | %q( .) 362 | ], 363 | [ 364 | %q(), 365 | %q( "2011-06-28T00:00:00Z"^^ .) 366 | ], 367 | [ 368 | %q(), 369 | %q( .) 370 | ], 371 | ].each do |(md, nt)| 372 | it "parses #{md}" do 373 | expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 374 | end 375 | end 376 | end 377 | 378 | context "itemtype" do 379 | { 380 | "with no type and token property" => [ 381 | %q( 382 |
383 |
384 |

Name: Amanda

385 |
386 |
387 | ), 388 | %q( 389 | [ <#name> "Amanda" ] . 390 | ) 391 | ], 392 | "with empty type and token property" => [ 393 | %q( 394 |
395 |
396 |

Name: Amanda

397 |
398 |
399 | ), 400 | %q( 401 | [ <#name> "Amanda" ] . 402 | ) 403 | ], 404 | "with relative type and token property" => [ 405 | %q( 406 |
407 |
408 |

Name: Amanda

409 |
410 |
411 | ), 412 | %q( 413 | [ <#name> "Amanda" ] . 414 | ) 415 | ], 416 | "with single type and token property" => [ 417 | %q( 418 |
419 |
420 |

Name: Amanda

421 |
422 |
423 | ), 424 | %q( 425 | [ a ; 426 | "Amanda" ; 427 | ] . 428 | ) 429 | ], 430 | "with multipe types and token property" => [ 431 | %q( 432 |
433 |
434 |

Name: Amanda

435 |
436 |
437 | ), 438 | %q( 439 | [ a , ; 440 | "Amanda" ; 441 | ] . 442 | ) 443 | ], 444 | "with no type and URI property" => [ 445 | %q( 446 |
447 |
448 |

Name: Amanda

449 |
450 |
451 | ), 452 | %q( 453 | [ "Amanda" ] . 454 | ) 455 | ], 456 | "with empty type and URI property" => [ 457 | %q( 458 |
459 |
460 |

Name: Amanda

461 |
462 |
463 | ), 464 | %q( 465 | [ "Amanda" ] . 466 | ) 467 | ], 468 | "with relative type and URI property" => [ 469 | %q( 470 |
471 |
472 |

Name: Amanda

473 |
474 |
475 | ), 476 | %q( 477 | [ "Amanda" ] . 478 | ) 479 | ], 480 | "with single type and URI property" => [ 481 | %q( 482 |
483 |
484 |

Name: Amanda

485 |
486 |
487 | ), 488 | %q( 489 | [ a ; 490 | "Amanda" ; 491 | ] . 492 | ) 493 | ], 494 | "with multipe types and URI property" => [ 495 | %q( 496 |
497 |
498 |

Name: Amanda

499 |
500 |
501 | ), 502 | %q( 503 | [ a , ; 504 | "Amanda" ; 505 | ] . 506 | ) 507 | ], 508 | "with inherited type and token property" => [ 509 | %q( 510 |
511 |

Name: Gregg

512 |
513 |

Name: Jeni

514 |
515 |
516 | ), 517 | %q( 518 | @prefix md: . 519 | @prefix schema: . 520 | [ a schema:Person ; 521 | schema:name "Gregg" ; 522 | schema:knows [ schema:name "Jeni" ] 523 | ] . 524 | ) 525 | ] 526 | }.each do |name, (md, nt)| 527 | it "#{name}" do 528 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 529 | end 530 | end 531 | end 532 | 533 | context "itemref" do 534 | { 535 | "to single id" => 536 | [ 537 | %q( 538 |
539 |
540 |

Name: Amanda

541 |
542 | ), 543 | %q( 544 | [ a ; 545 | "Amanda" ; 546 | ] . 547 | ) 548 | ], 549 | "to generate listed property values" => 550 | [ 551 | %q( 552 |
553 |
554 |

My name is Gregg

555 |
556 |

My name is Kellogg

557 |
558 | ), 559 | %q( 560 | [ a ; 561 | "Gregg", "Kellogg" ; 562 | ] . 563 | ) 564 | ], 565 | "to single id with different types" => 566 | [ 567 | %q( 568 |
569 |
570 |
571 |

Name: Amanda

572 |
573 | ), 574 | %q( 575 | [ a ; 576 | "Amanda" ; 577 | ] . 578 | [ a ; 579 | "Amanda" ; 580 | ] . 581 | ) 582 | ], 583 | "to multiple ids" => 584 | [ 585 | %q( 586 |
587 |
588 |

Name: Amanda

589 |

Jazz Band

590 |
591 | ), 592 | %q( 593 | [ a ; 594 | "Amanda" ; 595 | "Jazz Band" ; 596 | ] . 597 | ) 598 | ], 599 | "with chaining" => 600 | [ 601 | %q( 602 |
603 |
604 |

Name: Amanda

605 |
606 |
607 |

Band: Jazz Band

608 |

Size: 12 players

609 |
610 |
611 | ), 612 | %q( 613 | [ a ; 614 | "Amanda" ; 615 | [ 616 | a ; 617 | "Jazz Band"; 618 | "12" 619 | ] 620 | ] . 621 | ) 622 | ], 623 | "shared" => 624 | [ 625 | %q( 626 |
627 |
628 |
629 |
630 | Amanda 631 |
632 |
633 | ), 634 | %q( 635 | [ <#refers-to> _:a ] . 636 | [ <#refers-to> _:a ] . 637 | _:a <#name> "Amanda" . 638 | ) 639 | 640 | ], 641 | }.each do |name, (md, nt)| 642 | it "parses #{name}" do 643 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 644 | end 645 | end 646 | 647 | it "catches infinite recursion" do 648 | md = %( 649 | 650 | 651 |
652 |
653 |
friend1
654 |
655 |
friend2
656 |
657 |
658 |
659 |
660 | 661 | ) 662 | expect {parse(md, validate: true)}.to raise_error(RDF::ReaderError) 663 | expect(@logger.to_s).to include("itemref recursion") 664 | end 665 | end 666 | 667 | context "propertyURI" do 668 | context "no expansion" do 669 | { 670 | "http://foo/bar + baz => http://foo/baz" => 671 | [ 672 | %q( 673 |
674 |

FooBar

675 |
676 | ), 677 | %q( 678 | [ a ; "FooBar" ] . 679 | ) 680 | ], 681 | "http://foo#bar + baz => http://foo#baz" => 682 | [ 683 | %q( 684 |
685 |

FooBar

686 |
687 | ), 688 | %q( 689 | [ a ; "FooBar" ] . 690 | ) 691 | ], 692 | "http://foo#Type + bar + baz => http://foo#baz" => 693 | [ 694 | %q( 695 |
696 |

Baz

697 |
698 | ), 699 | %q( 700 | [ a ; 701 | [ "Baz"]] . 702 | ) 703 | ], 704 | }.each do |name, (md, nt)| 705 | it "expands #{name}" do 706 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 707 | end 708 | end 709 | end 710 | 711 | context "default propertyURI generation" do 712 | { 713 | "http://foo/bar + baz => http://foo/baz" => 714 | [ 715 | %q( 716 |
717 |

FooBar

718 |
719 | ), 720 | %q( 721 | [ a ; "FooBar" ] . 722 | ) 723 | ], 724 | "http://foo#bar + baz => http://foo#baz" => 725 | [ 726 | %q( 727 |
728 |

FooBar

729 |
730 | ), 731 | %q( 732 | [ a ; "FooBar" ] . 733 | ) 734 | ], 735 | "http://foo#Type + bar + baz => http://foo#baz" => 736 | [ 737 | %q( 738 |
739 |

Baz

740 |
741 | ), 742 | %q( 743 | [ a ; 744 | [ "Baz"]] . 745 | ) 746 | ], 747 | }.each do |name, (md, nt)| 748 | it "expands #{name}" do 749 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 750 | end 751 | end 752 | end 753 | end 754 | 755 | context "itemprop-reverse" do 756 | { 757 | "link" => [ 758 | %q( 759 |
760 | William Shakespeare 761 | 762 |
763 | ), 764 | %q( 765 | [ 766 | a ; 767 | "William Shakespeare" 768 | ] . 769 | ) 770 | ], 771 | "itemscope" => [ 772 | %q( 773 |
774 | The ACME Shopping Mall on Structured Data Avenue 775 | The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data 776 |

Here is a list of shops inside:

777 |
778 | Dan Brickley's Data Restaurant 779 |
780 |
781 | Ramanathan Guha's Meta Content Framework Bakery 782 |
783 |
784 | ), 785 | %q( 786 | _:a a ; 787 | "The ACME Shopping Mall on Structured Data Avenue"; 788 | "The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data" . 789 | _:b a ; 790 | "Dan Brickley's Data Restaurant"; 791 | _:a . 792 | _:c a ; 793 | "Ramanathan Guha's Meta Content Framework Bakery"; 794 | _:a . 795 | ) 796 | ], 797 | "literal" => [ 798 | %q( 799 |
800 | William Shakespeare 801 | 802 |
803 | ), 804 | %q( 805 | _:a a ; 806 | "William Shakespeare" . 807 | ) 808 | ], 809 | "itemprop and itemprop-reverse" => [ 810 | %q( 811 |
812 | Cryptography Users 813 |
815 |
817 | Alice 818 |
819 | 1977 820 |
821 |
822 | ), 823 | %q( 824 | @prefix schema: . 825 | @prefix md: . 826 | 827 | _:a a schema:Organization; 828 | schema:name "Cryptography Users"; 829 | schema:member _:b . 830 | _:b a schema:OrganizationRole; 831 | schema:startDate "1977"; 832 | schema:member _:c; 833 | schema:memberOf _:a . 834 | _:c a schema:Person; 835 | schema:name "Alice"; 836 | schema:memberOf _:b . 837 | ) 838 | ], 839 | }.each do |name, (md, nt)| 840 | it "expands #{name}" do 841 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 842 | end 843 | end 844 | end 845 | 846 | context "vocabulary expansion" do 847 | it "always expands" do 848 | md = %q( 849 |
850 | 851 |
852 | ) 853 | ttl = %q( 854 | [ a , ; 855 | 856 | ] . 857 | ) 858 | 859 | expect(parse(md, vocab_expansion: true)).to be_equivalent_graph(ttl, logger: @logger) 860 | end 861 | 862 | it "always expands (schemas)" do 863 | md = %q( 864 |
865 | 866 |
867 | ) 868 | ttl = %q( 869 | [ a , ; 870 | 871 | ] . 872 | ) 873 | 874 | expect(parse(md, vocab_expansion: true)).to be_equivalent_graph(ttl, logger: @logger) 875 | end 876 | end 877 | 878 | context "test-files" do 879 | Dir.glob(File.join(File.expand_path(File.dirname(__FILE__)), "test-files", "*.html")).each do |md| 880 | it "parses #{md}" do 881 | test_file(md) 882 | end 883 | end 884 | end 885 | end 886 | 887 | def parse(input, options = {}) 888 | @logger = RDF::Spec.logger 889 | graph = options[:graph] || RDF::Graph.new 890 | RDF::Microdata::Reader.new(input, 891 | logger: @logger, 892 | validate: false, 893 | registry: registry_path, 894 | canonicalize: false, 895 | **options 896 | ).each do |statement| 897 | graph << statement 898 | end 899 | graph 900 | end 901 | 902 | def test_file(filepath, options = {}) 903 | graph = parse(File.open(filepath), **options) 904 | 905 | ttl_string = File.read(filepath.sub('.html', '.ttl')) 906 | expect(graph).to be_equivalent_graph(ttl_string, logger: @logger) 907 | end 908 | end 909 | -------------------------------------------------------------------------------- /spec/rdfa_reader_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'rdf/spec/reader' 5 | 6 | describe RDF::Microdata::RdfaReader do 7 | let!(:doap) {File.expand_path("../../etc/doap.html", __FILE__)} 8 | let!(:doap_nt) {File.expand_path("../../etc/doap.nt", __FILE__)} 9 | let!(:registry_path) {File.expand_path("../test-files/test-registry.json", __FILE__)} 10 | before :each do 11 | @reader = RDF::Microdata::RdfaReader.new(StringIO.new("")) 12 | end 13 | 14 | context :interface do 15 | subject {%( 16 |
17 |

My name is Elizabeth.

18 |
19 | )} 20 | 21 | it "should yield reader" do 22 | inner = double("inner") 23 | expect(inner).to receive(:called).with(RDF::Microdata::RdfaReader) 24 | RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/') do |reader| 25 | inner.called(reader.class) 26 | end 27 | end 28 | 29 | it "should return reader" do 30 | expect(RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/')).to be_a(RDF::Microdata::RdfaReader) 31 | end 32 | 33 | it "should not raise errors" do 34 | expect { 35 | RDF::Microdata::RdfaReader.new(subject, validate: true, base_uri: 'http://example/') 36 | }.not_to raise_error 37 | end 38 | 39 | it "should yield statements" do 40 | inner = double("inner") 41 | expect(inner).to receive(:called).with(RDF::Statement).at_least(2) 42 | RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/').each_statement do |statement| 43 | inner.called(statement.class) 44 | end 45 | end 46 | 47 | it "should yield triples" do 48 | inner = double("inner") 49 | expect(inner).to receive(:called).at_least(2) 50 | RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/').each_triple do |subject, predicate, object| 51 | inner.called(subject.class, predicate.class, object.class) 52 | end 53 | end 54 | 55 | context "Microdata Reader with :rdfa option" do 56 | it "returns a RdfaReader instance" do 57 | r = RDF::Microdata::Reader.new(StringIO.new(""), rdfa: true) 58 | expect(r).to be_a(RDF::Microdata::RdfaReader) 59 | end 60 | end 61 | end 62 | 63 | context :parsing do 64 | before :each do 65 | @md_ctx = %q( 66 |
67 | %s 68 |
69 | ) 70 | @nt_ctx = %q( 71 | _:a . 72 | %s 73 | ) 74 | end 75 | 76 | it "parses a simple graph" do 77 | md = %q(

My name is Gregg Kellogg.

) 78 | nt = %q(_:a "Gregg Kellogg" .) 79 | expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 80 | end 81 | 82 | context "values" do 83 | [ 84 | [ 85 | %q(

My name is Gregg Kellogg

), 86 | %q(_:a "Gregg Kellogg" .) 87 | ], 88 | [ 89 | %q( 90 |

My name is Gregg

91 |

My name is Kellogg

92 | ), 93 | %q(_:a "Gregg", "Kellogg" .) 94 | ], 95 | [ 96 | %q(

My name is Gregg Kellogg

), 97 | %q( 98 | _:a "Gregg Kellogg" . 99 | _:a "Gregg Kellogg" . 100 | ) 101 | ], 102 | [ 103 | %q(

My name is Gregg Kellogg

), 104 | %q(_:a "Gregg Kellogg" .) 105 | ], 106 | [ 107 | %q(), 108 | %q(_:a "foo" .) 109 | ], 110 | [ 111 | %q(Bar), 112 | %q(_:a "foo" .) 113 | ], 114 | [ 115 | %q(), 116 | %q(_:a .) 117 | ], 118 | [ 119 | %q(), 120 | %q(_:a .) 121 | ], 122 | [ 123 | %q(), 124 | %q(_:a .) 125 | ], 126 | [ 127 | %q(), 128 | %q(_:a .) 129 | ], 130 | [ 131 | %q(), 132 | %q(_:a .) 133 | ], 134 | [ 135 | %q(), 136 | %q(_:a .) 137 | ], 138 | [ 139 | %q(), 140 | %q(_:a .) 141 | ], 142 | [ 143 | %q(), 144 | %q(_:a .) 145 | ], 146 | [ 147 | %q(), 148 | %q(_:a .) 149 | ], 150 | [ 151 | %q(), 152 | %q(_:a .) 153 | ], 154 | [ 155 | %q(), 156 | %q(_:a .) 157 | ], 158 | [ 159 | %q(), 160 | %q(_:a "2011-06-28Z"^^ .) 161 | ], 162 | [ 163 | %q(), 164 | %q(_:a "00:00:00Z"^^ .) 165 | ], 166 | [ 167 | %q(), 168 | %q(_:a "2011-06-28T00:00:00Z"^^ .) 169 | ], 170 | [ 171 | %q(), 172 | %q(_:a "P2011Y06M28DT00H00M00S"^^ .) 173 | ], 174 | [ 175 | %q(), 176 | %q(_:a "foo" .) 177 | ], 178 | [ 179 | %q(), 180 | %q(_:a _:b .) 181 | ], 182 | [ 183 | %q(), 184 | %q(_:a "1"^^ .) 185 | ], 186 | [ 187 | %q(), 188 | %q(_:a "1.1"^^ .) 189 | ], 190 | [ 191 | %q(), 192 | %q(_:a "1.1e1"^^ .) 193 | ], 194 | [ 195 | %q(), 196 | %q(_:a "foo" .) 197 | ], 198 | [ 199 | %q(), 200 | %q(_:a "foo" .) 201 | ], 202 | [ 203 | %q(), 204 | %q(_:a "1"^^ .) 205 | ], 206 | [ 207 | %q(), 208 | %q(_:a "1.1"^^ .) 209 | ], 210 | [ 211 | %q(), 212 | %q(_:a "1.1e1"^^ .) 213 | ], 214 | [ 215 | %q(), 216 | %q(_:a "foo" .) 217 | ], 218 | [ 219 | %q(), 220 | %q(_:a "foo" .) 221 | ], 222 | ].each do |(md, nt)| 223 | it "parses #{md}" do 224 | pending if [ 225 | '', 226 | '', 227 | ].include?(md) 228 | expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 229 | end 230 | end 231 | end 232 | 233 | context "base_uri" do 234 | before :each do 235 | @nt_ctx = %q( 236 | _:a . 237 | %s 238 | ) 239 | end 240 | 241 | [ 242 | [ 243 | %q(), 244 | %q(_:a .) 245 | ], 246 | [ 247 | %q(), 248 | %q(_:a .) 249 | ], 250 | [ 251 | %q(), 252 | %q(_:a .) 253 | ], 254 | [ 255 | %q(), 256 | %q(_:a .) 257 | ], 258 | [ 259 | %q(), 260 | %q(_:a .) 261 | ], 262 | [ 263 | %q(), 264 | %q(_:a .) 265 | ], 266 | [ 267 | %q(), 268 | %q(_:a .) 269 | ], 270 | [ 271 | %q(), 272 | %q(_:a .) 273 | ], 274 | [ 275 | %q(), 276 | %q(_:a .) 277 | ], 278 | [ 279 | %q(), 280 | %q(_:a .) 281 | ], 282 | [ 283 | %q(Stéphane Corlosquet), 284 | %q(_:a .) 285 | ], 286 | ].each do |(md, nt)| 287 | it "parses #{md}" do 288 | expect(parse(@md_ctx % md, base_uri: 'http://example.com/')).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 289 | end 290 | end 291 | end 292 | 293 | context "itemid" do 294 | before :each do 295 | @md_ctx = %q( 296 |
297 | %s 298 |
299 | ) 300 | @nt_ctx = %q( 301 | . 302 | %s 303 | ) 304 | end 305 | 306 | [ 307 | [ 308 | %q(

My name is Gregg Kellogg

), 309 | %q( "Gregg Kellogg" .) 310 | ], 311 | [ 312 | %q(), 313 | %q( "foo" .) 314 | ], 315 | [ 316 | %q(), 317 | %q( .) 318 | ], 319 | [ 320 | %q(), 321 | %q( .) 322 | ], 323 | [ 324 | %q(), 325 | %q( .) 326 | ], 327 | [ 328 | %q(), 329 | %q( .) 330 | ], 331 | [ 332 | %q(), 333 | %q( .) 334 | ], 335 | [ 336 | %q(), 337 | %q( .) 338 | ], 339 | [ 340 | %q(), 341 | %q( .) 342 | ], 343 | [ 344 | %q(), 345 | %q( .) 346 | ], 347 | [ 348 | %q(), 349 | %q( .) 350 | ], 351 | [ 352 | %q(), 353 | %q( .) 354 | ], 355 | [ 356 | %q(), 357 | %q( .) 358 | ], 359 | [ 360 | %q(), 361 | %q( "2011-06-28T00:00:00Z"^^ .) 362 | ], 363 | [ 364 | %q(), 365 | %q( .) 366 | ], 367 | ].each do |(md, nt)| 368 | it "parses #{md}" do 369 | expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) 370 | end 371 | end 372 | end 373 | 374 | context "itemtype" do 375 | { 376 | "with no type and token property" => [ 377 | %q( 378 |
379 |
380 |

Name: Amanda

381 |
382 |
383 | ), 384 | %q() 385 | ], 386 | "with empty type and token property" => [ 387 | %q( 388 |
389 |
390 |

Name: Amanda

391 |
392 |
393 | ), 394 | %q() 395 | ], 396 | "with relative type and token property" => [ 397 | %q( 398 |
399 |
400 |

Name: Amanda

401 |
402 |
403 | ), 404 | %q() 405 | ], 406 | "with single type and token property" => [ 407 | %q( 408 |
409 |
410 |

Name: Amanda

411 |
412 |
413 | ), 414 | %q( 415 | [ a ; 416 | "Amanda" ; 417 | ] . 418 | ) 419 | ], 420 | "with multipe types and token property" => [ 421 | %q( 422 |
423 |
424 |

Name: Amanda

425 |
426 |
427 | ), 428 | %q( 429 | [ a , ; 430 | "Amanda" ; 431 | ] . 432 | ) 433 | ], 434 | #"with no type and URI property" => [ 435 | # %q( 436 | #
437 | #
438 | #

Name: Amanda

439 | #
440 | #
441 | # ), 442 | # %q( 443 | # [ "Amanda" ] . 444 | # ) 445 | #], 446 | #"with empty type and URI property" => [ 447 | # %q( 448 | #
449 | #
450 | #

Name: Amanda

451 | #
452 | #
453 | # ), 454 | # %q( 455 | # [ "Amanda" ] . 456 | # ) 457 | #], 458 | #"with relative type and URI property" => [ 459 | # %q( 460 | #
461 | #
462 | #

Name: Amanda

463 | #
464 | #
465 | # ), 466 | # %q( 467 | # [ "Amanda" ] . 468 | # ) 469 | #], 470 | "with single type and URI property" => [ 471 | %q( 472 |
473 |
474 |

Name: Amanda

475 |
476 |
477 | ), 478 | %q( 479 | [ a ; 480 | "Amanda" ; 481 | ] . 482 | ) 483 | ], 484 | "with multipe types and URI property" => [ 485 | %q( 486 |
487 |
488 |

Name: Amanda

489 |
490 |
491 | ), 492 | %q( 493 | [ a , ; 494 | "Amanda" ; 495 | ] . 496 | ) 497 | ], 498 | "with inherited type and token property" => [ 499 | %q( 500 |
501 |

Name: Gregg

502 |
503 |

Name: Jeni

504 |
505 |
506 | ), 507 | %q( 508 | @prefix md: . 509 | @prefix schema: . 510 | [ a schema:Person ; 511 | schema:name "Gregg" ; 512 | schema:knows [ schema:name "Jeni" ] 513 | ] . 514 | ) 515 | ] 516 | }.each do |name, (md, nt)| 517 | it "#{name}" do 518 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 519 | end 520 | end 521 | end 522 | 523 | context "itemref" do 524 | { 525 | "to single id" => 526 | [ 527 | %q( 528 |
529 |
530 |

Name: Amanda

531 |
532 | ), 533 | %q( 534 | [ a ; 535 | "Amanda" ; 536 | ] . 537 | ) 538 | ], 539 | "to generate listed property values" => 540 | [ 541 | %q( 542 |
543 |
544 |

My name is Gregg

545 |
546 |

My name is Kellogg

547 |
548 | ), 549 | %q( 550 | [ a ; 551 | "Gregg", "Kellogg" ; 552 | ] . 553 | ) 554 | ], 555 | #"to single id with different types" => 556 | #[ 557 | # %q( 558 | #
559 | #
560 | #
561 | #

Name: Amanda

562 | #
563 | # ), 564 | # %q( 565 | # [ a ; 566 | # "Amanda" ; 567 | # ] . 568 | # [ a ; 569 | # "Amanda" ; 570 | # ] . 571 | # ) 572 | #], 573 | "to multiple ids" => 574 | [ 575 | %q( 576 |
577 |
578 |

Name: Amanda

579 |

Jazz Band

580 |
581 | ), 582 | %q( 583 | [ a ; 584 | "Amanda" ; 585 | "Jazz Band" ; 586 | ] . 587 | ) 588 | ], 589 | "with chaining" => 590 | [ 591 | %q( 592 |
593 |
594 |

Name: Amanda

595 |
596 |
597 |

Band: Jazz Band

598 |

Size: 12 players

599 |
600 |
601 | ), 602 | %q( 603 | [ a ; 604 | "Amanda" ; 605 | [ 606 | a ; 607 | "Jazz Band"; 608 | "12" 609 | ] 610 | ] . 611 | ) 612 | ], 613 | "shared" => 614 | [ 615 | %q( 616 |
617 |
618 |
619 |
620 | Amanda 621 |
622 |
623 | ), 624 | %q( 625 | [ a ; _:a ] . 626 | [ a ; _:a ] . 627 | _:a "Amanda" . 628 | ) 629 | 630 | ], 631 | }.each do |name, (md, nt)| 632 | it "parses #{name}" do 633 | pending "Broke in Nokogiri 13.0" if RUBY_VERSION < "2.7" 634 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 635 | end 636 | end 637 | 638 | it "catches infinite recursion", pending: true do 639 | md = %( 640 | 641 | 642 |
643 |
644 |
friend1
645 |
646 |
friend2
647 |
648 |
649 |
650 |
651 | 652 | ) 653 | expect {parse(md, validate: true)}.to raise_error(RDF::ReaderError) 654 | expect(@logger.to_s).to include("itemref recursion") 655 | end 656 | end 657 | 658 | context "propertyURI" do 659 | context "no expansion" do 660 | { 661 | "http://foo/bar + baz => http://foo/baz" => 662 | [ 663 | %q( 664 |
665 |

FooBar

666 |
667 | ), 668 | %q( 669 | [ a ; "FooBar" ] . 670 | ) 671 | ], 672 | "http://foo#bar + baz => http://foo#baz" => 673 | [ 674 | %q( 675 |
676 |

FooBar

677 |
678 | ), 679 | %q( 680 | [ a ; "FooBar" ] . 681 | ) 682 | ], 683 | "http://foo#Type + bar + baz => http://foo#baz" => 684 | [ 685 | %q( 686 |
687 |

Baz

688 |
689 | ), 690 | %q( 691 | [ a ; 692 | [ "Baz"]] . 693 | ) 694 | ], 695 | }.each do |name, (md, nt)| 696 | it "expands #{name}" do 697 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 698 | end 699 | end 700 | end 701 | 702 | context "default propertyURI generation" do 703 | { 704 | "http://foo/bar + baz => http://foo/baz" => 705 | [ 706 | %q( 707 |
708 |

FooBar

709 |
710 | ), 711 | %q( 712 | [ a ; "FooBar" ] . 713 | ) 714 | ], 715 | "http://foo#bar + baz => http://foo#baz" => 716 | [ 717 | %q( 718 |
719 |

FooBar

720 |
721 | ), 722 | %q( 723 | [ a ; "FooBar" ] . 724 | ) 725 | ], 726 | "http://foo#Type + bar + baz => http://foo#baz" => 727 | [ 728 | %q( 729 |
730 |

Baz

731 |
732 | ), 733 | %q( 734 | [ a ; 735 | [ "Baz"]] . 736 | ) 737 | ], 738 | }.each do |name, (md, nt)| 739 | it "expands #{name}" do 740 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 741 | end 742 | end 743 | end 744 | end 745 | 746 | context "itemprop-reverse", skip: true do 747 | { 748 | "link" => [ 749 | %q( 750 |
751 | William Shakespeare 752 | 753 |
754 | ), 755 | %q( 756 | [ 757 | a ; 758 | "William Shakespeare" 759 | ] . 760 | ) 761 | ], 762 | "itemscope" => [ 763 | %q( 764 |
765 | The ACME Shopping Mall on Structured Data Avenue 766 | The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data 767 |

Here is a list of shops inside:

768 |
769 | Dan Brickley's Data Restaurant 770 |
771 |
772 | Ramanathan Guha's Meta Content Framework Bakery 773 |
774 |
775 | ), 776 | %q( 777 | _:a a ; 778 | "The ACME Shopping Mall on Structured Data Avenue"; 779 | "The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data" . 780 | _:b a ; 781 | "Dan Brickley's Data Restaurant"; 782 | _:a . 783 | _:c a ; 784 | "Ramanathan Guha's Meta Content Framework Bakery"; 785 | _:a . 786 | ) 787 | ], 788 | "literal" => [ 789 | %q( 790 |
791 | William Shakespeare 792 | 793 |
794 | ), 795 | %q( 796 | _:a a ; 797 | "William Shakespeare" . 798 | ) 799 | ], 800 | "itemprop and itemprop-reverse" => [ 801 | %q( 802 |
803 | Cryptography Users 804 |
806 |
808 | Alice 809 |
810 | 1977 811 |
812 |
813 | ), 814 | %q( 815 | @prefix schema: . 816 | @prefix md: . 817 | 818 | _:a a schema:Organization; 819 | schema:name "Cryptography Users"; 820 | schema:member _:b . 821 | _:b a schema:OrganizationRole; 822 | schema:startDate "1977"; 823 | schema:member _:c; 824 | schema:memberOf _:a . 825 | _:c a schema:Person; 826 | schema:name "Alice"; 827 | schema:memberOf _:b . 828 | ) 829 | ], 830 | }.each do |name, (md, nt)| 831 | it "expands #{name}" do 832 | expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) 833 | end 834 | end 835 | end 836 | 837 | context "vocabulary expansion", pending: true do 838 | it "always expands" do 839 | md = %q( 840 |
841 | 842 |
843 | ) 844 | ttl = %q( 845 | [ a , ; 846 | 847 | ] . 848 | ) 849 | 850 | expect(parse(md, vocab_expansion: true)).to be_equivalent_graph(ttl, logger: @logger) 851 | end 852 | 853 | it "always expands (schemas)" do 854 | md = %q( 855 |
856 | 857 |
858 | ) 859 | ttl = %q( 860 | [ a , ; 861 | 862 | ] . 863 | ) 864 | 865 | expect(parse(md, vocab_expansion: true)).to be_equivalent_graph(ttl, logger: @logger) 866 | end 867 | end 868 | 869 | context "test-files", skip: true do 870 | Dir.glob(File.join(File.expand_path(File.dirname(__FILE__)), "test-files", "*.html")).each do |md| 871 | it "parses #{md}" do 872 | test_file(md) 873 | end 874 | end 875 | end 876 | end 877 | 878 | def parse(input, options = {}) 879 | @logger = RDF::Spec.logger 880 | graph = options[:graph] || RDF::Graph.new 881 | RDF::Microdata::Reader.new(input, 882 | logger: @logger, 883 | rdfa: true, 884 | validate: false, 885 | base_uri: "http://example/", 886 | registry: registry_path, 887 | canonicalize: false, 888 | **options 889 | ).each do |statement| 890 | graph << statement 891 | end 892 | 893 | # Remove any rdfa:usesVocabulary statements 894 | graph.query({predicate: RDF::RDFA.usesVocabulary}).each do |stmt| 895 | graph.delete(stmt) 896 | end 897 | graph 898 | end 899 | 900 | def test_file(filepath, **options) 901 | graph = parse(File.open(filepath), **options) 902 | 903 | ttl_string = File.read(filepath.sub('.html', '.ttl')) 904 | expect(graph).to be_equivalent_graph(ttl_string, logger: @logger) 905 | end 906 | end 907 | --------------------------------------------------------------------------------