├── .rspec
├── .github
├── FUNDING.yml
└── workflows
│ └── ci.yml
├── lib
├── multi_xml
│ ├── version.rb
│ └── parsers
│ │ ├── libxml.rb
│ │ ├── nokogiri.rb
│ │ ├── oga.rb
│ │ ├── libxml2_parser.rb
│ │ ├── ox.rb
│ │ └── rexml.rb
└── multi_xml.rb
├── .yardopts
├── .gitignore
├── bin
├── setup
└── console
├── spec
├── helper.rb
├── speed.rb
├── multi_xml_spec.rb
└── parser_shared_example.rb
├── Gemfile
├── Rakefile
├── LICENSE.md
├── .rubocop.yml
├── multi_xml.gemspec
├── CONTRIBUTING.md
├── README.md
└── CHANGELOG.md
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | --order random
3 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [sferik]
2 |
--------------------------------------------------------------------------------
/lib/multi_xml/version.rb:
--------------------------------------------------------------------------------
1 | module MultiXml
2 | VERSION = Gem::Version.create("0.7.2")
3 | end
4 |
--------------------------------------------------------------------------------
/.yardopts:
--------------------------------------------------------------------------------
1 | --no-private
2 | --protected
3 | --markup markdown
4 | -
5 | CHANGELOG.md
6 | CONTRIBUTING.md
7 | LICENSE.md
8 | README.md
9 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | *~
3 | .bundle
4 | .rvmrc
5 | .yardoc
6 | Gemfile.lock
7 | coverage/*
8 | doc/*
9 | log/*
10 | measurement/*
11 | pkg/*
12 |
--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 | IFS=$'\n\t'
4 | set -vx
5 |
6 | bundle install
7 |
8 | # Do any other automated setup that you need to do here
9 |
--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | require "bundler/setup"
4 | require "multi_xml"
5 |
6 | # You can add fixtures and/or initialization code here to make experimenting
7 | # with your gem easier. You can also use a different console, if you like.
8 |
9 | require "irb"
10 | IRB.start(__FILE__)
11 |
--------------------------------------------------------------------------------
/spec/helper.rb:
--------------------------------------------------------------------------------
1 | def jruby?
2 | RUBY_PLATFORM == "java"
3 | end
4 |
5 | require "simplecov"
6 |
7 | SimpleCov.start do
8 | add_filter "/spec"
9 | percent = (jruby?) ? 91.38 : 93.16
10 | minimum_coverage(percent)
11 | end
12 |
13 | require "multi_xml"
14 | require "rspec"
15 |
16 | RSpec.configure do |config|
17 | config.expect_with :rspec do |c|
18 | c.syntax = :expect
19 | end
20 | end
21 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: ci
2 | on: [push, pull_request]
3 | jobs:
4 | build:
5 | strategy:
6 | matrix:
7 | ruby: ["3.2", "3.3", "3.4", "jruby-10"]
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: actions/checkout@v4
11 | - uses: ruby/setup-ruby@v1
12 | with:
13 | ruby-version: ${{ matrix.ruby }}
14 | bundler-cache: true
15 | - run: bundle exec rake test
16 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 |
3 | gem "libxml-ruby", require: nil, platforms: :ruby
4 | gem "nokogiri", require: nil
5 | gem "oga", ">= 2.3", require: nil
6 | gem "ox", require: nil, platforms: :ruby
7 | gem "rexml", require: nil
8 |
9 | gem "rake", ">= 13.2.1"
10 | gem "rspec", ">= 3.12"
11 | gem "rubocop", ">= 1.62.1"
12 | gem "rubocop-performance", ">= 1.20.2"
13 | gem "rubocop-rake", ">= 0.6"
14 | gem "rubocop-rspec", ">= 2.24"
15 | gem "simplecov", ">= 0.22"
16 | gem "standard", ">= 1.35.1"
17 | gem "standard-performance", ">= 1.3.1"
18 | gem "yard", ">= 0.9.36"
19 | gem "yardstick", ">= 0.9.9"
20 |
21 | gemspec
22 |
--------------------------------------------------------------------------------
/lib/multi_xml/parsers/libxml.rb:
--------------------------------------------------------------------------------
1 | require "libxml" unless defined?(LibXML)
2 | require "multi_xml/parsers/libxml2_parser"
3 |
4 | module MultiXml
5 | module Parsers
6 | module Libxml # :nodoc:
7 | include Libxml2Parser
8 | extend self
9 |
10 | def parse_error
11 | ::LibXML::XML::Error
12 | end
13 |
14 | def parse(xml)
15 | node_to_hash(LibXML::XML::Parser.io(xml).parse.root)
16 | end
17 |
18 | private
19 |
20 | def each_child(node, &)
21 | node.each_child(&)
22 | end
23 |
24 | def each_attr(node, &)
25 | node.each_attr(&)
26 | end
27 |
28 | def node_name(node)
29 | node.name
30 | end
31 | end
32 | end
33 | end
34 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler"
2 | Bundler::GemHelper.install_tasks
3 |
4 | require "rspec/core/rake_task"
5 | RSpec::Core::RakeTask.new(:spec)
6 |
7 | task test: :spec
8 |
9 | require "rubocop/rake_task"
10 | RuboCop::RakeTask.new
11 |
12 | require "yard"
13 | YARD::Rake::YardocTask.new do |task|
14 | task.files = ["lib/**/*.rb", "-", "LICENSE.md"]
15 | task.options = [
16 | "--no-private",
17 | "--protected",
18 | "--output-dir", "doc/yard",
19 | "--markup", "markdown"
20 | ]
21 | end
22 |
23 | require "yardstick/rake/measurement"
24 | Yardstick::Rake::Measurement.new do |measurement|
25 | measurement.output = "measurement/report.txt"
26 | end
27 |
28 | require "yardstick/rake/verify"
29 | Yardstick::Rake::Verify.new do |verify|
30 | verify.threshold = 48.8
31 | end
32 |
33 | task default: %i[spec rubocop verify_measurements]
34 |
--------------------------------------------------------------------------------
/lib/multi_xml/parsers/nokogiri.rb:
--------------------------------------------------------------------------------
1 | require "nokogiri" unless defined?(Nokogiri)
2 | require "multi_xml/parsers/libxml2_parser"
3 |
4 | module MultiXml
5 | module Parsers
6 | module Nokogiri # :nodoc:
7 | include Libxml2Parser
8 | extend self
9 |
10 | def parse_error
11 | ::Nokogiri::XML::SyntaxError
12 | end
13 |
14 | def parse(xml)
15 | doc = ::Nokogiri::XML(xml)
16 | raise(doc.errors.first) unless doc.errors.empty?
17 |
18 | node_to_hash(doc.root)
19 | end
20 |
21 | private
22 |
23 | def each_child(node, &)
24 | node.children.each(&)
25 | end
26 |
27 | def each_attr(node, &)
28 | node.attribute_nodes.each(&)
29 | end
30 |
31 | def node_name(node)
32 | node.node_name
33 | end
34 | end
35 | end
36 | end
37 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2010-2025 Erik Berlin
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
1 | require:
2 | - standard
3 |
4 | plugins:
5 | - rubocop-performance
6 | - rubocop-rake
7 | - rubocop-rspec
8 | - standard-performance
9 |
10 | AllCops:
11 | NewCops: enable
12 | TargetRubyVersion: 3.2
13 |
14 | Layout/ArgumentAlignment:
15 | EnforcedStyle: with_fixed_indentation
16 | IndentationWidth: 2
17 |
18 | Layout/CaseIndentation:
19 | EnforcedStyle: end
20 |
21 | Layout/EndAlignment:
22 | EnforcedStyleAlignWith: start_of_line
23 |
24 | Layout/LineLength:
25 | Max: 140
26 |
27 | Layout/ParameterAlignment:
28 | EnforcedStyle: with_fixed_indentation
29 | IndentationWidth: 2
30 |
31 | Layout/SpaceInsideHashLiteralBraces:
32 | EnforcedStyle: no_space
33 |
34 | Metrics/ParameterLists:
35 | CountKeywordArgs: false
36 |
37 | Style/Alias:
38 | EnforcedStyle: prefer_alias_method
39 |
40 | Style/Documentation:
41 | Enabled: false
42 |
43 | Style/FrozenStringLiteralComment:
44 | EnforcedStyle: never
45 |
46 | Style/OpenStructUse:
47 | Enabled: false
48 |
49 | Style/StringLiterals:
50 | EnforcedStyle: double_quotes
51 |
52 | Style/StringLiteralsInInterpolation:
53 | EnforcedStyle: double_quotes
54 |
55 | Style/TernaryParentheses:
56 | EnforcedStyle: require_parentheses
57 |
--------------------------------------------------------------------------------
/spec/speed.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby -wW1
2 |
3 | $LOAD_PATH << "."
4 | $LOAD_PATH << "../lib"
5 |
6 | if __FILE__ == $PROGRAM_NAME
7 | while (i = ARGV.index("-I"))
8 | _, path = ARGV.slice!(i, 2)
9 | $LOAD_PATH << path
10 | end
11 | end
12 |
13 | require "optparse"
14 | require "stringio"
15 | require "multi_xml"
16 |
17 | %w[libxml nokogiri ox].each do |library|
18 | require library
19 | rescue LoadError
20 | next
21 | end
22 |
23 | $verbose = 0
24 | $parsers = []
25 | $iterations = 10
26 |
27 | opts = OptionParser.new
28 | opts.on("-v", "increase verbosity") { $verbose += 1 }
29 | opts.on("-p", "--parser [String]", String, "parser to test") { |parsers| $parsers = [parsers] }
30 | opts.on("-i", "--iterations [Int]", Integer, "iterations") { |iterations| $iterations = iterations }
31 | opts.on("-h", "--help", "Show this display") do
32 | puts opts
33 | Process.exit!(0)
34 | end
35 | files = opts.parse(ARGV)
36 |
37 | if $parsers.empty?
38 | $parsers << "libxml" if defined?(LibXML)
39 | $parsers << "nokogiri" if defined?(Nokogiri)
40 | $parsers << "ox" if defined?(Ox)
41 | end
42 |
43 | files.each do |filename|
44 | times = {}
45 | xml = File.read(filename)
46 | $parsers.each do |p|
47 | MultiXml.parser = p
48 | start = Time.now
49 | $iterations.times do
50 | io = StringIO.new(xml)
51 | MultiXml.parse(io)
52 | end
53 | times[p] = Time.now - start
54 | end
55 | times.each do |p, t|
56 | puts format("%8s took %0.3f seconds to parse %s %d times.", p, t, filename, $iterations)
57 | end
58 | end
59 |
--------------------------------------------------------------------------------
/spec/multi_xml_spec.rb:
--------------------------------------------------------------------------------
1 | require "helper"
2 | require "parser_shared_example"
3 |
4 | class MockDecoder
5 | def self.parse; end
6 | end
7 |
8 | describe "MultiXml" do
9 | context "Parsers" do
10 | it "picks a default parser" do
11 | expect(MultiXml.parser).to be_a(Module)
12 | expect(MultiXml.parser).to respond_to(:parse)
13 | end
14 |
15 | it "defaults to the best available gem" do
16 | # Clear cache variable possibly set by previous tests
17 | MultiXml.send(:remove_instance_variable, :@parser) if MultiXml.instance_variable_defined?(:@parser)
18 | if jruby?
19 | # Ox and Libxml are not not currently available on JRuby, so Nokogiri is the best available gem
20 | expect(MultiXml.parser.name).to eq("MultiXml::Parsers::Nokogiri")
21 | else
22 | expect(MultiXml.parser.name).to eq("MultiXml::Parsers::Ox")
23 | end
24 | end
25 |
26 | it "is settable via a symbol" do
27 | MultiXml.parser = :rexml
28 | expect(MultiXml.parser.name).to eq("MultiXml::Parsers::Rexml")
29 | end
30 |
31 | it "is settable via a class" do
32 | MultiXml.parser = MockDecoder
33 | expect(MultiXml.parser.name).to eq("MockDecoder")
34 | end
35 | end
36 |
37 | [%w[LibXML libxml],
38 | %w[REXML rexml/document],
39 | %w[Nokogiri nokogiri],
40 | %w[Ox ox],
41 | %w[Oga oga]].each do |parser|
42 | require parser.last
43 | context "#{parser.first} parser" do
44 | it_behaves_like "a parser", parser.first
45 | end
46 | rescue LoadError
47 | puts "Tests not run for #{parser.first} due to a LoadError"
48 | end
49 | end
50 |
--------------------------------------------------------------------------------
/multi_xml.gemspec:
--------------------------------------------------------------------------------
1 | require_relative "lib/multi_xml/version"
2 |
3 | Gem::Specification.new do |spec|
4 | spec.name = "multi_xml"
5 | spec.version = MultiXml::VERSION
6 | spec.authors = ["Erik Berlin"]
7 | spec.email = ["sferik@gmail.com"]
8 |
9 | spec.summary = "Provides swappable XML backends utilizing LibXML, Nokogiri, Ox, or REXML."
10 | spec.homepage = "https://github.com/sferik/multi_xml"
11 | spec.license = "MIT"
12 | spec.required_ruby_version = ">= 3.2"
13 |
14 | spec.metadata["allowed_push_host"] = "https://rubygems.org"
15 |
16 | spec.metadata = {
17 | "allowed_push_host" => "https://rubygems.org",
18 | "bug_tracker_uri" => "https://github.com/sferik/multi_xml/issues",
19 | "changelog_uri" => "https://github.com/sferik/multi_xml/blob/master/CHANGELOG.md",
20 | "documentation_uri" => "https://rubydoc.info/gems/multi_xml/",
21 | "funding_uri" => "https://github.com/sponsors/sferik",
22 | "homepage_uri" => spec.homepage,
23 | "rubygems_mfa_required" => "true",
24 | "source_code_uri" => "https://github.com/sferik/multi_xml"
25 | }
26 |
27 | # Specify which files should be added to the gem when it is released.
28 | # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
29 | spec.files = Dir.chdir(__dir__) do
30 | `git ls-files -z`.split("\x0").reject do |f|
31 | (File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
32 | end
33 | end
34 | spec.bindir = "exe"
35 | spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
36 | spec.require_paths = ["lib"]
37 |
38 | # For more information and examples about making a new gem, check out our
39 | # guide at: https://bundler.io/guides/creating_gem.html
40 | spec.metadata["rubygems_mfa_required"] = "true"
41 |
42 | spec.add_dependency("bigdecimal", "~> 3.1")
43 | end
44 |
--------------------------------------------------------------------------------
/lib/multi_xml/parsers/oga.rb:
--------------------------------------------------------------------------------
1 | require "oga" unless defined?(Oga)
2 | require "multi_xml/parsers/libxml2_parser"
3 |
4 | module MultiXml
5 | module Parsers
6 | module Oga # :nodoc:
7 | include Libxml2Parser
8 | extend self
9 |
10 | def parse_error
11 | LL::ParserError
12 | end
13 |
14 | def parse(io)
15 | document = ::Oga.parse_xml(io)
16 | node_to_hash(document.children[0])
17 | end
18 |
19 | def node_to_hash(node, hash = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
20 | node_hash = {MultiXml::CONTENT_ROOT => ""}
21 |
22 | name = node_name(node)
23 |
24 | # Insert node hash into parent hash correctly.
25 | case hash[name]
26 | when Array
27 | hash[name] << node_hash
28 | when Hash
29 | hash[name] = [hash[name], node_hash]
30 | when NilClass
31 | hash[name] = node_hash
32 | end
33 |
34 | # Handle child elements
35 | each_child(node) do |c|
36 | if c.is_a?(::Oga::XML::Element)
37 | node_to_hash(c, node_hash)
38 | elsif c.is_a?(::Oga::XML::Text) || c.is_a?(::Oga::XML::Cdata)
39 | node_hash[MultiXml::CONTENT_ROOT] += c.text
40 | end
41 | end
42 |
43 | # Remove content node if it is empty
44 | node_hash.delete(MultiXml::CONTENT_ROOT) if node_hash[MultiXml::CONTENT_ROOT].strip.empty?
45 |
46 | # Handle attributes
47 | each_attr(node) do |a|
48 | key = node_name(a)
49 | v = node_hash[key]
50 | node_hash[key] = ((v) ? [a.value, v] : a.value)
51 | end
52 |
53 | hash
54 | end
55 |
56 | private
57 |
58 | def each_child(node, &)
59 | node.children.each(&)
60 | end
61 |
62 | def each_attr(node, &)
63 | node.attributes.each(&)
64 | end
65 |
66 | def node_name(node)
67 | node.name
68 | end
69 | end
70 | end
71 | end
72 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## Contributing
2 | In the spirit of [free software][free-sw] , **everyone** is encouraged to help
3 | improve this project.
4 |
5 | [free-sw]: http://www.fsf.org/licensing/essays/free-sw.html
6 |
7 | Here are some ways *you* can contribute:
8 |
9 | * by using alpha, beta, and prerelease versions
10 | * by reporting bugs
11 | * by suggesting new features
12 | * by writing or editing documentation
13 | * by writing specifications
14 | * by writing code (**no patch is too small**: fix typos, add comments, clean up
15 | inconsistent whitespace)
16 | * by refactoring code
17 | * by resolving [issues][]
18 | * by reviewing patches
19 | * [financially][gittip]
20 |
21 | [issues]: https://github.com/sferik/multi_xml/issues
22 | [gittip]: https://www.gittip.com/sferik/
23 |
24 | ## Submitting an Issue
25 | We use the [GitHub issue tracker][issues] to track bugs and features. Before
26 | submitting a bug report or feature request, check to make sure it hasn't
27 | already been submitted. When submitting a bug report, please include a [Gist][]
28 | that includes a stack trace and any details that may be necessary to reproduce
29 | the bug, including your gem version, Ruby version, and operating system.
30 | Ideally, a bug report should include a pull request with failing specs.
31 |
32 | [gist]: https://gist.github.com/
33 |
34 | ## Submitting a Pull Request
35 | 1. [Fork the repository.][fork]
36 | 2. [Create a topic branch.][branch]
37 | 3. Add specs for your unimplemented feature or bug fix.
38 | 4. Run `bundle exec rake spec`. If your specs pass, return to step 3.
39 | 5. Implement your feature or bug fix.
40 | 6. Run `bundle exec rake`. If your specs fail, return to step 5.
41 | 7. Run `open coverage/index.html`. If your changes are not completely covered
42 | by your tests, return to step 3.
43 | 8. Add documentation for your feature or bug fix.
44 | 9. Run `bundle exec rake verify_measurements`. If your changes are not 100%
45 | documented, go back to step 8.
46 | 10. Add, commit, and push your changes.
47 | 11. [Submit a pull request.][pr]
48 |
49 | [fork]: http://help.github.com/fork-a-repo/
50 | [branch]: http://learn.github.com/p/branching.html
51 | [pr]: http://help.github.com/send-pull-requests/
52 |
--------------------------------------------------------------------------------
/lib/multi_xml/parsers/libxml2_parser.rb:
--------------------------------------------------------------------------------
1 | module MultiXml
2 | module Parsers
3 | module Libxml2Parser # :nodoc:
4 | # Convert XML document to hash
5 | #
6 | # node::
7 | # The XML node object to convert to a hash.
8 | #
9 | # hash::
10 | # Hash to merge the converted element into.
11 | def node_to_hash(node, hash = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
12 | node_hash = {MultiXml::CONTENT_ROOT => ""}
13 |
14 | name = node_name(node)
15 |
16 | # Insert node hash into parent hash correctly.
17 | case hash[name]
18 | when Array
19 | hash[name] << node_hash
20 | when Hash
21 | hash[name] = [hash[name], node_hash]
22 | when NilClass
23 | hash[name] = node_hash
24 | end
25 |
26 | # Handle child elements
27 | each_child(node) do |c|
28 | if c.element?
29 | node_to_hash(c, node_hash)
30 | elsif c.text? || c.cdata?
31 | node_hash[MultiXml::CONTENT_ROOT] += c.content
32 | end
33 | end
34 |
35 | # Remove content node if it is empty
36 | node_hash.delete(MultiXml::CONTENT_ROOT) if node_hash[MultiXml::CONTENT_ROOT].strip.empty?
37 |
38 | # Handle attributes
39 | each_attr(node) do |a|
40 | key = node_name(a)
41 | v = node_hash[key]
42 | node_hash[key] = ((v) ? [a.value, v] : a.value)
43 | end
44 |
45 | hash
46 | end
47 |
48 | # Parse an XML Document IO into a simple hash.
49 | # xml::
50 | # XML Document IO to parse
51 | def parse(_)
52 | raise(NotImplementedError, "inheritor should define #{__method__}")
53 | end
54 |
55 | private
56 |
57 | def each_child(*)
58 | raise(NotImplementedError, "inheritor should define #{__method__}")
59 | end
60 |
61 | def each_attr(*)
62 | raise(NotImplementedError, "inheritor should define #{__method__}")
63 | end
64 |
65 | def node_name(*)
66 | raise(NotImplementedError, "inheritor should define #{__method__}")
67 | end
68 | end
69 | end
70 | end
71 |
--------------------------------------------------------------------------------
/lib/multi_xml/parsers/ox.rb:
--------------------------------------------------------------------------------
1 | require "ox" unless defined?(Ox)
2 |
3 | # Each MultiXml parser is expected to parse an XML document into a Hash. The
4 | # conversion rules are:
5 | #
6 | # - Each document starts out as an empty Hash.
7 | #
8 | # - Reading an element created an entry in the parent Hash that has a key of
9 | # the element name and a value of a Hash with attributes as key value
10 | # pairs. Children are added as described by this rule.
11 | #
12 | # - Text and CDATE is stored in the parent element Hash with a key of
13 | # MultiXml::CONTENT_ROOT and a value of the text itself.
14 | #
15 | # - If a key already exists in the Hash then the value associated with the key
16 | # is converted to an Array with the old and new value in it.
17 | #
18 | # - Other elements such as the xml prolog, doctype, and comments are ignored.
19 | #
20 |
21 | module MultiXml
22 | module Parsers
23 | module Ox # :nodoc:
24 | module_function
25 |
26 | def parse_error
27 | Exception
28 | end
29 |
30 | def parse(io)
31 | handler = Handler.new
32 | ::Ox.sax_parse(handler, io, convert_special: true, skip: :skip_return)
33 | handler.doc
34 | end
35 |
36 | class Handler
37 | attr_accessor :stack
38 |
39 | def initialize
40 | @stack = []
41 | end
42 |
43 | def doc
44 | @stack[0]
45 | end
46 |
47 | def attr(name, value)
48 | append(name, value) unless @stack.empty?
49 | end
50 |
51 | def text(value)
52 | append(MultiXml::CONTENT_ROOT, value)
53 | end
54 |
55 | def cdata(value)
56 | append(MultiXml::CONTENT_ROOT, value)
57 | end
58 |
59 | def start_element(name)
60 | @stack.push({}) if @stack.empty?
61 | h = {}
62 | append(name, h)
63 | @stack.push(h)
64 | end
65 |
66 | def end_element(_)
67 | @stack.pop
68 | end
69 |
70 | def error(message, line, column)
71 | raise(StandardError, "#{message} at #{line}:#{column}")
72 | end
73 |
74 | def append(key, value)
75 | key = key.to_s
76 | h = @stack.last
77 | if h.key?(key)
78 | v = h[key]
79 | if v.is_a?(Array)
80 | v << value
81 | else
82 | h[key] = [v, value]
83 | end
84 | else
85 | h[key] = value
86 | end
87 | end
88 | end
89 | end
90 | end
91 | end
92 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MultiXML
2 |
3 | A generic swappable back-end for XML parsing
4 |
5 | ## Installation
6 | gem install multi_xml
7 |
8 | ## Documentation
9 | [http://rdoc.info/gems/multi_xml][documentation]
10 |
11 | [documentation]: http://rdoc.info/gems/multi_xml
12 |
13 | ## Usage Examples
14 | ```ruby
15 | require 'multi_xml'
16 |
17 | MultiXml.parser = :ox
18 | MultiXml.parser = MultiXml::Parsers::Ox # Same as above
19 | MultiXml.parse('This is the contents') # Parsed using Ox
20 |
21 | MultiXml.parser = :libxml
22 | MultiXml.parser = MultiXml::Parsers::Libxml # Same as above
23 | MultiXml.parse('This is the contents') # Parsed using LibXML
24 |
25 | MultiXml.parser = :nokogiri
26 | MultiXml.parser = MultiXml::Parsers::Nokogiri # Same as above
27 | MultiXml.parse('This is the contents') # Parsed using Nokogiri
28 |
29 | MultiXml.parser = :rexml
30 | MultiXml.parser = MultiXml::Parsers::Rexml # Same as above
31 | MultiXml.parse('This is the contents') # Parsed using REXML
32 |
33 | MultiXml.parser = :oga
34 | MultiXml.parser = MultiXml::Parsers::Oga # Same as above
35 | MultiXml.parse('This is the contents') # Parsed using Oga
36 | ```
37 | The `parser` setter takes either a symbol or a class (to allow for custom XML
38 | parsers) that responds to `.parse` at the class level.
39 |
40 | MultiXML tries to have intelligent defaulting. That is, if you have any of the
41 | supported parsers already loaded, it will use them before attempting to load
42 | a new one. When loading, libraries are ordered by speed: first Ox, then LibXML,
43 | then Nokogiri, and finally REXML.
44 |
45 | ## Supported Ruby Versions
46 | This library aims to support and is tested against the following Ruby
47 | implementations:
48 |
49 | * 3.2
50 | * 3.3
51 | * 3.4
52 | * JRuby 10
53 |
54 | If something doesn't work on one of these versions, it's a bug.
55 |
56 | This library may inadvertently work (or seem to work) on other Ruby
57 | implementations, however support will only be provided for the versions listed
58 | above.
59 |
60 | If you would like this library to support another Ruby version, you may
61 | volunteer to be a maintainer. Being a maintainer entails making sure all tests
62 | run and pass on that implementation. When something breaks on your
63 | implementation, you will be responsible for providing patches in a timely
64 | fashion. If critical issues for a particular implementation exist at the time
65 | of a major release, support for that Ruby version may be dropped.
66 |
67 | ## Inspiration
68 | MultiXML was inspired by [MultiJSON][].
69 |
70 | [multijson]: https://github.com/intridea/multi_json/
71 |
72 | ## Copyright
73 | Copyright (c) 2010-2025 Erik Berlin. See [LICENSE][] for details.
74 |
75 | [license]: LICENSE.md
76 |
--------------------------------------------------------------------------------
/lib/multi_xml/parsers/rexml.rb:
--------------------------------------------------------------------------------
1 | require "rexml/document" unless defined?(REXML::Document)
2 |
3 | module MultiXml
4 | module Parsers
5 | module Rexml # :nodoc:
6 | extend self
7 |
8 | def parse_error
9 | ::REXML::ParseException
10 | end
11 |
12 | # Parse an XML Document IO into a simple hash using REXML
13 | #
14 | # xml::
15 | # XML Document IO to parse
16 | def parse(xml)
17 | doc = REXML::Document.new(xml)
18 | raise(REXML::ParseException, "The document #{doc.to_s.inspect} does not have a valid root") unless doc.root
19 |
20 | merge_element!({}, doc.root)
21 | end
22 |
23 | private
24 |
25 | # Convert an XML element and merge into the hash
26 | #
27 | # hash::
28 | # Hash to merge the converted element into.
29 | # element::
30 | # XML element to merge into hash
31 | def merge_element!(hash, element)
32 | merge!(hash, element.name, collapse(element))
33 | end
34 |
35 | # Actually converts an XML document element into a data structure.
36 | #
37 | # element::
38 | # The document element to be collapsed.
39 | def collapse(element)
40 | hash = get_attributes(element)
41 |
42 | if element.has_elements?
43 | element.each_element { |child| merge_element!(hash, child) }
44 | merge_texts!(hash, element) unless empty_content?(element)
45 | hash
46 | else
47 | merge_texts!(hash, element)
48 | end
49 | end
50 |
51 | # Merge all the texts of an element into the hash
52 | #
53 | # hash::
54 | # Hash to add the converted element to.
55 | # element::
56 | # XML element whose texts are to me merged into the hash
57 | def merge_texts!(hash, element)
58 | if element.has_text?
59 | # must use value to prevent double-escaping
60 | texts = element.texts.map(&:value).join
61 | merge!(hash, MultiXml::CONTENT_ROOT, texts)
62 | else
63 | hash
64 | end
65 | end
66 |
67 | # Adds a new key/value pair to an existing Hash. If the key to be added
68 | # already exists and the existing value associated with key is not
69 | # an Array, it will be wrapped in an Array. Then the new value is
70 | # appended to that Array.
71 | #
72 | # hash::
73 | # Hash to add key/value pair to.
74 | # key::
75 | # Key to be added.
76 | # value::
77 | # Value to be associated with key.
78 | def merge!(hash, key, value)
79 | if hash.key?(key)
80 | if hash[key].instance_of?(Array)
81 | hash[key] << value
82 | else
83 | hash[key] = [hash[key], value]
84 | end
85 | elsif value.instance_of?(Array)
86 | hash[key] = [value]
87 | else
88 | hash[key] = value
89 | end
90 | hash
91 | end
92 |
93 | # Converts the attributes array of an XML element into a hash.
94 | # Returns an empty Hash if node has no attributes.
95 | #
96 | # element::
97 | # XML element to extract attributes from.
98 | def get_attributes(element)
99 | attributes = {}
100 | element.attributes.each { |n, v| attributes[n] = v }
101 | attributes
102 | end
103 |
104 | # Determines if a document element has text content
105 | #
106 | # element::
107 | # XML element to be checked.
108 | def empty_content?(element)
109 | element.texts.join.strip.empty?
110 | end
111 | end
112 | end
113 | end
114 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 0.7.2
2 | -----
3 | * [Drop support for Ruby 3.1](https://github.com/sferik/multi_xml/commit/fab6288edd36c58a2b13e0206d8bed305fcb4a4b)
4 |
5 | 0.7.1
6 | -----
7 | * [Relax required Ruby version constraint to allow installation on Debian stable](https://github.com/sferik/multi_xml/commit/7d18711466a15e158dc71344ca6f6e18838ecc8d)
8 |
9 | 0.7.0
10 | -----
11 | * [Add support for Ruby 3.3](https://github.com/sferik/multi_xml/pull/67)
12 | * [Drop support for Ruby 3.0](https://github.com/sferik/multi_xml/commit/eec72c56307fede3a93f1a61553587cb278b0c8a) [and](https://github.com/sferik/multi_xml/commit/6a6dec80a36c30774a5525b45f71d346fb561e69) [earlier](https://github.com/sferik/multi_xml/commit/e7dad37a0a0be8383a26ffe515c575b5b4d04588)
13 | * [Don't mutate strings](https://github.com/sferik/multi_xml/commit/71be3fff4afb0277a7e1c47c5f1f4b6106a8eb45)
14 |
15 | 0.6.0
16 | -----
17 | * [Duplexed Streams](https://github.com/sferik/multi_xml/pull/45)
18 | * [Support for Oga](https://github.com/sferik/multi_xml/pull/47)
19 | * [Integer unification for Ruby 2.4](https://github.com/sferik/multi_xml/pull/54)
20 |
21 | 0.5.5
22 | -----
23 | * [Fix symbolize_keys function](https://github.com/sferik/multi_xml/commit/a4cae3aeb690999287cd30206399abaa5ce1ae81)
24 | * [Fix Nokogiri parser for the same attr and inner element name](https://github.com/sferik/multi_xml/commit/a28ed86e2d7826b2edeed98552736b4c7ca52726)
25 |
26 | 0.5.4
27 | -----
28 | * [Add option to not cast parsed values](https://github.com/sferik/multi_xml/commit/44fc05fbcfd60cc8b555b75212471fab29fa8cd0)
29 | * [Use message instead of to_s](https://github.com/sferik/multi_xml/commit/b06f0114434ffe1957dd7bc2712cb5b76c1b45fe)
30 |
31 | 0.5.3
32 | -----
33 | * [Add cryptographic signature](https://github.com/sferik/multi_xml/commit/f39f0c74308090737816c622dbb7d7aa28c646c0)
34 |
35 | 0.5.2
36 | -----
37 | * [Remove ability to parse symbols and YAML](https://github.com/sferik/multi_xml/pull/34)
38 |
39 | 0.5.1
40 | -----
41 | * [Revert "Reset @@parser in between specs"](https://github.com/sferik/multi_xml/issues/28)
42 |
43 | 0.5.0
44 | -----
45 | * [Reset @@parser in between specs](https://github.com/sferik/multi_xml/commit/b562bed265918b43ac1c4c638ae3a7ffe95ecd83)
46 | * [Add attributes being passed through on content nodes](https://github.com/sferik/multi_xml/commit/631a8bb3c2253db0024f77f47c16d5a53b8128fd)
47 |
48 | 0.4.4
49 | -----
50 | * [Fix regression in MultiXml.parse](https://github.com/sferik/multi_xml/commit/45ae597d9a35cbd89cc7f5518c85bac30199fc06)
51 |
52 | 0.4.3
53 | -----
54 | * [Make parser a class variable](https://github.com/sferik/multi_xml/commit/6804ffc8680ed6466c66f2472f5e016c412c2c24)
55 | * [Add TYPE_NAMES constant](https://github.com/sferik/multi_xml/commit/72a21f2e86c8e3ac9689cee5f3a62102cfb98028)
56 |
57 | 0.4.2
58 | -----
59 | * [Fix bug in dealing with xml element attributes for both REXML and Ox](https://github.com/sferik/multi_xml/commit/ba3c1ac427ff0268abaf8186fb4bd81100c99559)
60 | * [Make Ox the preferred XML parser](https://github.com/sferik/multi_xml/commit/0a718d740c30fba426f300a929cda9ee8250d238)
61 |
62 | 0.4.1
63 | -----
64 | * [Use the SAX like parser with Ox](https://github.com/sferik/multi_xml/commit/d289d42817a32e48483c00d5361c76fbea62a166)
65 |
66 | 0.4.0
67 | -----
68 | * [Add support for Ox](https://github.com/sferik/multi_xml/pull/14)
69 |
70 | 0.3.0
71 | -----
72 | * [Remove core class monkeypatches](https://github.com/sferik/multi_xml/commit/f7cc3ce4d2924c0e0adc6935d1fba5ec79282938)
73 | * [Sort out some class / singleton class issues](https://github.com/sferik/multi_xml/commit/a5dac06bcf658facaaf7afa295f1291c7be15a44)
74 | * [Have parsers refer to toplevel CONTENT_ROOT instead of defining it](https://github.com/sferik/multi_xml/commit/94e6fa49e69b2a2467a0e6d3558f7d9815cae47e)
75 | * [Move redundant input sanitizing to top-level](https://github.com/sferik/multi_xml/commit/4874148214dbbd2e5a4b877734e2519af42d6132)
76 | * [Refactor libxml and nokogiri parsers to inherit from a common ancestor](https://github.com/sferik/multi_xml/commit/e0fdffcbfe641b6aaa3952ffa0570a893de325c2)
77 |
78 | 0.2.2
79 | -----
80 | * [Respect the global load path](https://github.com/sferik/multi_xml/commit/68eb3011b37f0e0222bb842abd2a78e1285a97c1)
81 |
82 | 0.2.1
83 | -----
84 | * [Add BlueCloth gem as development dependency for Markdown formatting](https://github.com/sferik/multi_xml/commit/18195cd1789176709f68f0d7f8df7fc944fe4d24)
85 | * [Replace BlueCloth with Maruku for JRuby compatibility](https://github.com/sferik/multi_xml/commit/bad5516a5ec5e7ef7fc5a35c411721522357fa19)
86 |
87 | 0.2.0
88 | -----
89 | * [Do not automatically load all library files](https://github.com/sferik/multi_xml/commit/dbd0447e062e8930118573c5453150e9371e5955)
90 |
91 | 0.1.4
92 | -----
93 | * [Preserve backtrace when catching/throwing exceptions](https://github.com/sferik/multi_xml/commit/7475ee90201c2701fddd524082832d16ca62552d)
94 |
95 | 0.1.3
96 | -----
97 | * [Common error handling for all parsers](https://github.com/sferik/multi_xml/commit/5357c28eddc14e921fd1be1f445db602a8dddaf2)
98 |
99 | 0.1.2
100 | -----
101 | * [Make wrap an Array class method](https://github.com/sferik/multi_xml/commit/28307b69bd1d9460353c861466e425c2afadcf56)
102 |
103 | 0.1.1
104 | -----
105 | * [Fix parsing for strings that contain newlines](https://github.com/sferik/multi_xml/commit/68087a4ce50b5d63cfa60d6f1fcbc2f6d689e43f)
106 |
107 | 0.1.0
108 | -----
109 | * [Add support for LibXML and Nokogiri](https://github.com/sferik/multi_xml/commit/856bb17fce66601e0b3d3eb3b64dbeb25aed3bca)
110 |
111 | 0.0.1
112 | -----
113 | * [REXML support](https://github.com/sferik/multi_xml/commit/2a848384a7b90fb3e26b5a8d4dc3fa3e3f2db5fc)
114 |
--------------------------------------------------------------------------------
/lib/multi_xml.rb:
--------------------------------------------------------------------------------
1 | require "bigdecimal"
2 | require "date"
3 | require "stringio"
4 | require "time"
5 | require "yaml"
6 |
7 | module MultiXml # rubocop:disable Metrics/ModuleLength
8 | class ParseError < StandardError; end
9 |
10 | class NoParserError < StandardError; end
11 |
12 | class DisallowedTypeError < StandardError
13 | def initialize(type)
14 | super("Disallowed type attribute: #{type.inspect}")
15 | end
16 | end
17 |
18 | unless defined?(REQUIREMENT_MAP)
19 | REQUIREMENT_MAP = [
20 | ["ox", :ox],
21 | ["libxml", :libxml],
22 | ["nokogiri", :nokogiri],
23 | ["rexml/document", :rexml],
24 | ["oga", :oga]
25 | ].freeze
26 | end
27 |
28 | CONTENT_ROOT = "__content__".freeze unless defined?(CONTENT_ROOT)
29 |
30 | unless defined?(PARSING)
31 | float_proc = proc { |float| float.to_f }
32 | datetime_proc = proc { |time| Time.parse(time).utc rescue DateTime.parse(time).utc } # rubocop:disable Style/RescueModifier
33 |
34 | PARSING = {
35 | "symbol" => proc { |symbol| symbol.to_sym },
36 | "date" => proc { |date| Date.parse(date) },
37 | "datetime" => datetime_proc,
38 | "dateTime" => datetime_proc,
39 | "integer" => proc { |integer| integer.to_i },
40 | "float" => float_proc,
41 | "double" => float_proc,
42 | "decimal" => proc { |number| BigDecimal(number) },
43 | "boolean" => proc { |boolean| !%w[0 false].include?(boolean.strip) },
44 | "string" => proc { |string| string.to_s },
45 | "yaml" => proc { |yaml| YAML.load(yaml) rescue yaml }, # rubocop:disable Style/RescueModifier
46 | "base64Binary" => proc { |binary| base64_decode(binary) },
47 | "binary" => proc { |binary, entity| parse_binary(binary, entity) },
48 | "file" => proc { |file, entity| parse_file(file, entity) }
49 | }.freeze
50 | end
51 |
52 | unless defined?(TYPE_NAMES)
53 | TYPE_NAMES = {
54 | "Symbol" => "symbol",
55 | "Integer" => "integer",
56 | "BigDecimal" => "decimal",
57 | "Float" => "float",
58 | "TrueClass" => "boolean",
59 | "FalseClass" => "boolean",
60 | "Date" => "date",
61 | "DateTime" => "datetime",
62 | "Time" => "datetime",
63 | "Array" => "array",
64 | "Hash" => "hash"
65 | }.freeze
66 | end
67 |
68 | DISALLOWED_XML_TYPES = %w[symbol yaml].freeze
69 |
70 | DEFAULT_OPTIONS = {
71 | typecast_xml_value: true,
72 | disallowed_types: DISALLOWED_XML_TYPES,
73 | symbolize_keys: false
74 | }.freeze
75 |
76 | class << self
77 | # Get the current parser class.
78 | def parser
79 | return @parser if defined?(@parser)
80 |
81 | self.parser = default_parser
82 | @parser
83 | end
84 |
85 | # The default parser based on what you currently
86 | # have loaded and installed. First checks to see
87 | # if any parsers are already loaded, then checks
88 | # to see which are installed if none are loaded.
89 | def default_parser
90 | return :ox if defined?(::Ox)
91 | return :libxml if defined?(::LibXML)
92 | return :nokogiri if defined?(::Nokogiri)
93 | return :oga if defined?(::Oga)
94 |
95 | REQUIREMENT_MAP.each do |library, parser|
96 | require library
97 | return parser
98 | rescue LoadError
99 | next
100 | end
101 | raise(NoParserError,
102 | "No XML parser detected. If you're using Rubinius and Bundler, try adding an XML parser to your Gemfile (e.g. libxml-ruby, nokogiri, or rubysl-rexml). For more information, see https://github.com/sferik/multi_xml/issues/42.")
103 | end
104 |
105 | # Set the XML parser utilizing a symbol, string, or class.
106 | # Supported by default are:
107 | #
108 | # * :libxml
109 | # * :nokogiri
110 | # * :ox
111 | # * :rexml
112 | # * :oga
113 | def parser=(new_parser)
114 | case new_parser
115 | when String, Symbol
116 | require "multi_xml/parsers/#{new_parser.to_s.downcase}"
117 | @parser = MultiXml::Parsers.const_get(new_parser.to_s.split("_").collect(&:capitalize).join.to_s)
118 | when Class, Module
119 | @parser = new_parser
120 | else
121 | raise("Did not recognize your parser specification. Please specify either a symbol or a class.")
122 | end
123 | end
124 |
125 | # Parse an XML string or IO into Ruby.
126 | #
127 | # Options
128 | #
129 | # :symbolize_keys :: If true, will use symbols instead of strings for the keys.
130 | #
131 | # :disallowed_types :: Types to disallow from being typecasted. Defaults to `['yaml', 'symbol']`. Use `[]` to allow all types.
132 | #
133 | # :typecast_xml_value :: If true, won't typecast values for parsed document
134 | def parse(xml, options = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
135 | xml ||= ""
136 |
137 | options = DEFAULT_OPTIONS.merge(options)
138 |
139 | xml = xml.strip if xml.respond_to?(:strip)
140 | begin
141 | xml = StringIO.new(xml) unless xml.respond_to?(:read)
142 |
143 | char = xml.getc
144 | return {} if char.nil?
145 |
146 | xml.ungetc(char)
147 |
148 | hash = undasherize_keys(parser.parse(xml) || {})
149 | hash = typecast_xml_value(hash, options[:disallowed_types]) if options[:typecast_xml_value]
150 | rescue DisallowedTypeError
151 | raise
152 | rescue parser.parse_error => e
153 | raise(ParseError, e.message, e.backtrace)
154 | end
155 | hash = symbolize_keys(hash) if options[:symbolize_keys]
156 | hash
157 | end
158 |
159 | # This module decorates files with the original_filename
160 | # and content_type methods.
161 | module FileLike # :nodoc:
162 | attr_writer :original_filename, :content_type
163 |
164 | def original_filename
165 | @original_filename || "untitled"
166 | end
167 |
168 | def content_type
169 | @content_type || "application/octet-stream"
170 | end
171 | end
172 |
173 | private
174 |
175 | # TODO: Add support for other encodings
176 | def parse_binary(binary, entity) # :nodoc:
177 | case entity["encoding"]
178 | when "base64"
179 | base64_decode(binary)
180 | else
181 | binary
182 | end
183 | end
184 |
185 | def parse_file(file, entity)
186 | f = StringIO.new(base64_decode(file))
187 | f.extend(FileLike)
188 | f.original_filename = entity["name"]
189 | f.content_type = entity["content_type"]
190 | f
191 | end
192 |
193 | def base64_decode(input)
194 | input.unpack1("m")
195 | end
196 |
197 | def symbolize_keys(params)
198 | case params
199 | when Hash
200 | params.inject({}) do |result, (key, value)|
201 | result.merge(key.to_sym => symbolize_keys(value))
202 | end
203 | when Array
204 | params.collect { |value| symbolize_keys(value) }
205 | else
206 | params
207 | end
208 | end
209 |
210 | def undasherize_keys(params)
211 | case params
212 | when Hash
213 | params.each_with_object({}) do |(key, value), hash|
214 | hash[key.to_s.tr("-", "_")] = undasherize_keys(value)
215 | hash
216 | end
217 | when Array
218 | params.collect { |value| undasherize_keys(value) }
219 | else
220 | params
221 | end
222 | end
223 |
224 | def typecast_xml_value(value, disallowed_types = nil) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
225 | disallowed_types ||= DISALLOWED_XML_TYPES
226 |
227 | case value
228 | when Hash
229 | if value.include?("type") && !value["type"].is_a?(Hash) && disallowed_types.include?(value["type"])
230 | raise(DisallowedTypeError, value["type"])
231 | end
232 |
233 | if value["type"] == "array"
234 |
235 | # this commented-out suggestion helps to avoid the multiple attribute
236 | # problem, but it breaks when there is only one item in the array.
237 | #
238 | # from: https://github.com/jnunemaker/httparty/issues/102
239 | #
240 | # _, entries = value.detect { |k, v| k != 'type' && v.is_a?(Array) }
241 |
242 | # This attempt fails to consider the order that the detect method
243 | # retrieves the entries.
244 | # _, entries = value.detect {|key, _| key != 'type'}
245 |
246 | # This approach ignores attribute entries that are not convertable
247 | # to an Array which allows attributes to be ignored.
248 | _, entries = value.detect { |k, v| k != "type" && (v.is_a?(Array) || v.is_a?(Hash)) }
249 |
250 | case entries
251 | when NilClass
252 | []
253 | when String
254 | [] if entries.strip.empty?
255 | when Array
256 | entries.collect { |entry| typecast_xml_value(entry, disallowed_types) }
257 | when Hash
258 | [typecast_xml_value(entries, disallowed_types)]
259 | else
260 | raise("can't typecast #{entries.class.name}: #{entries.inspect}")
261 | end
262 |
263 | elsif value.key?(CONTENT_ROOT)
264 | content = value[CONTENT_ROOT]
265 | block = PARSING[value["type"]]
266 | if block
267 | if block.arity == 1
268 | value.delete("type") if PARSING[value["type"]]
269 | if value.keys.size > 1
270 | value[CONTENT_ROOT] = block.call(content)
271 | value
272 | else
273 | block.call(content)
274 | end
275 | else
276 | block.call(content, value)
277 | end
278 | else
279 | (value.keys.size > 1) ? value : content
280 | end
281 | elsif value["type"] == "string" && value["nil"] != "true"
282 | ""
283 | # blank or nil parsed values are represented by nil
284 | elsif value.empty? || value["nil"] == "true"
285 | nil
286 | # If the type is the only element which makes it then
287 | # this still makes the value nil, except if type is
288 | # a XML node(where type['value'] is a Hash)
289 | elsif value["type"] && value.size == 1 && !value["type"].is_a?(Hash)
290 | nil
291 | else
292 | xml_value = value.each_with_object({}) do |(k, v), hash|
293 | hash[k] = typecast_xml_value(v, disallowed_types)
294 | hash
295 | end
296 |
297 | # Turn {:files => {:file => #} into {:files => #} so it is compatible with
298 | # how multipart uploaded files from HTML appear
299 | (xml_value["file"].is_a?(StringIO)) ? xml_value["file"] : xml_value
300 | end
301 | when Array
302 | value.map! { |i| typecast_xml_value(i, disallowed_types) }
303 | (value.length > 1) ? value : value.first
304 | when String
305 | value
306 | else
307 | raise("can't typecast #{value.class.name}: #{value.inspect}")
308 | end
309 | end
310 | end
311 | end
312 |
--------------------------------------------------------------------------------
/spec/parser_shared_example.rb:
--------------------------------------------------------------------------------
1 | shared_examples_for "a parser" do |parser|
2 | before do
3 | MultiXml.parser = parser
4 |
5 | LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER) if parser == "LibXML"
6 | rescue LoadError
7 | pending "Parser #{parser} couldn't be loaded"
8 | end
9 |
10 | describe ".parse" do
11 | context "a blank string" do
12 | before do
13 | @xml = ""
14 | end
15 |
16 | it "returns an empty Hash" do
17 | expect(MultiXml.parse(@xml)).to eq({})
18 | end
19 | end
20 |
21 | context "a whitespace string" do
22 | before do
23 | @xml = " "
24 | end
25 |
26 | it "returns an empty Hash" do
27 | expect(MultiXml.parse(@xml)).to eq({})
28 | end
29 | end
30 |
31 | context "a frozen string" do
32 | before do
33 | @xml = " "
34 | end
35 |
36 | it "returns an empty Hash" do
37 | expect(MultiXml.parse(@xml)).to eq({})
38 | end
39 | end
40 |
41 | unless parser == "Oga"
42 | context "an invalid XML document" do
43 | before do
44 | @xml = ""
45 | end
46 |
47 | it "raises MultiXml::ParseError" do
48 | expect { MultiXml.parse(@xml) }.to raise_error(MultiXml::ParseError)
49 | end
50 | end
51 | end
52 |
53 | context "a valid XML document" do
54 | before do
55 | @xml = ""
56 | end
57 |
58 | it "parses correctly" do
59 | expect(MultiXml.parse(@xml)).to eq("user" => nil)
60 | end
61 |
62 | context "with CDATA" do
63 | before do
64 | @xml = ""
65 | end
66 |
67 | it "returns the correct CDATA" do
68 | expect(MultiXml.parse(@xml)["user"]).to eq("Erik Berlin")
69 | end
70 | end
71 |
72 | context "element with the same inner element and attribute name" do
73 | before do
74 | @xml = "Smith"
75 | end
76 |
77 | it "returns names as Array" do
78 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq %w[John Smith]
79 | end
80 | end
81 |
82 | context "with content" do
83 | before do
84 | @xml = "Erik Berlin"
85 | end
86 |
87 | it "returns the correct content" do
88 | expect(MultiXml.parse(@xml)["user"]).to eq("Erik Berlin")
89 | end
90 | end
91 |
92 | context "with an attribute" do
93 | before do
94 | @xml = ''
95 | end
96 |
97 | it "returns the correct attribute" do
98 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq("Erik Berlin")
99 | end
100 | end
101 |
102 | context "with multiple attributes" do
103 | before do
104 | @xml = ''
105 | end
106 |
107 | it "returns the correct attributes" do
108 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq("Erik Berlin")
109 | expect(MultiXml.parse(@xml)["user"]["screen_name"]).to eq("sferik")
110 | end
111 | end
112 |
113 | context "typecast management" do
114 | before do
115 | @xml = %(
116 |
117 |
118 | Settings
119 |
120 | Test
121 |
122 |
123 |
124 | )
125 | end
126 |
127 | context "with :typecast_xml_value => true" do
128 | before do
129 | @setting = MultiXml.parse(@xml)["global_settings"]["group"]["setting"]
130 | end
131 |
132 | it { expect(@setting).to eq "" }
133 | end
134 |
135 | context "with :typecast_xml_value => false" do
136 | before do
137 | @setting = MultiXml.parse(@xml, typecast_xml_value: false)["global_settings"]["group"]["setting"]
138 | end
139 |
140 | it { expect(@setting).to eq("type" => "string", "description" => {"__content__" => "Test"}) }
141 | end
142 | end
143 |
144 | context "with :symbolize_keys => true" do
145 | before do
146 | @xml = 'Wynn Netherland'
147 | end
148 |
149 | it "symbolizes keys" do
150 | expect(MultiXml.parse(@xml,
151 | symbolize_keys: true)).to eq(users: {user: [{name: "Erik Berlin"},
152 | {name: "Wynn Netherland"}]})
153 | end
154 | end
155 |
156 | context 'with an attribute type="boolean"' do
157 | %w[true false].each do |boolean|
158 | context "when #{boolean}" do
159 | it "returns #{boolean}" do
160 | xml = "#{boolean}"
161 | expect(MultiXml.parse(xml)["tag"]).to be instance_eval(boolean)
162 | end
163 | end
164 | end
165 |
166 | context "when 1" do
167 | before do
168 | @xml = '1'
169 | end
170 |
171 | it "returns true" do
172 | expect(MultiXml.parse(@xml)["tag"]).to be true
173 | end
174 | end
175 |
176 | context "when 0" do
177 | before do
178 | @xml = '0'
179 | end
180 |
181 | it "returns false" do
182 | expect(MultiXml.parse(@xml)["tag"]).to be false
183 | end
184 | end
185 | end
186 |
187 | context 'with an attribute type="integer"' do
188 | context "with a positive integer" do
189 | before do
190 | @xml = '1'
191 | end
192 |
193 | it "returns a Integer" do
194 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Integer)
195 | end
196 |
197 | it "returns a positive number" do
198 | expect(MultiXml.parse(@xml)["tag"]).to be > 0
199 | end
200 |
201 | it "returns the correct number" do
202 | expect(MultiXml.parse(@xml)["tag"]).to eq(1)
203 | end
204 | end
205 |
206 | context "with a negative integer" do
207 | before do
208 | @xml = '-1'
209 | end
210 |
211 | it "returns a Integer" do
212 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Integer)
213 | end
214 |
215 | it "returns a negative number" do
216 | expect(MultiXml.parse(@xml)["tag"]).to be < 0
217 | end
218 |
219 | it "returns the correct number" do
220 | expect(MultiXml.parse(@xml)["tag"]).to eq(-1)
221 | end
222 | end
223 | end
224 |
225 | context 'with an attribute type="string"' do
226 | before do
227 | @xml = ''
228 | end
229 |
230 | it "returns a String" do
231 | expect(MultiXml.parse(@xml)["tag"]).to be_a(String)
232 | end
233 |
234 | it "returns the correct string" do
235 | expect(MultiXml.parse(@xml)["tag"]).to eq("")
236 | end
237 | end
238 |
239 | context 'with an attribute type="date"' do
240 | before do
241 | @xml = '1970-01-01'
242 | end
243 |
244 | it "returns a Date" do
245 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Date)
246 | end
247 |
248 | it "returns the correct date" do
249 | expect(MultiXml.parse(@xml)["tag"]).to eq(Date.parse("1970-01-01"))
250 | end
251 | end
252 |
253 | context 'with an attribute type="datetime"' do
254 | before do
255 | @xml = '1970-01-01 00:00'
256 | end
257 |
258 | it "returns a Time" do
259 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Time)
260 | end
261 |
262 | it "returns the correct time" do
263 | expect(MultiXml.parse(@xml)["tag"]).to eq(Time.parse("1970-01-01 00:00"))
264 | end
265 | end
266 |
267 | context 'with an attribute type="dateTime"' do
268 | before do
269 | @xml = '1970-01-01 00:00'
270 | end
271 |
272 | it "returns a Time" do
273 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Time)
274 | end
275 |
276 | it "returns the correct time" do
277 | expect(MultiXml.parse(@xml)["tag"]).to eq(Time.parse("1970-01-01 00:00"))
278 | end
279 | end
280 |
281 | context 'with an attribute type="double"' do
282 | before do
283 | @xml = '3.14159265358979'
284 | end
285 |
286 | it "returns a Float" do
287 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Float)
288 | end
289 |
290 | it "returns the correct number" do
291 | expect(MultiXml.parse(@xml)["tag"]).to eq(3.14159265358979)
292 | end
293 | end
294 |
295 | context 'with an attribute type="decimal"' do
296 | before do
297 | @xml = '3.14159265358979'
298 | end
299 |
300 | it "returns a BigDecimal" do
301 | expect(MultiXml.parse(@xml)["tag"]).to be_a(BigDecimal)
302 | end
303 |
304 | it "returns the correct number" do
305 | expect(MultiXml.parse(@xml)["tag"]).to eq(3.14159265358979)
306 | end
307 | end
308 |
309 | context 'with an attribute type="base64Binary"' do
310 | before do
311 | @xml = 'aW1hZ2UucG5n'
312 | end
313 |
314 | it "returns a String" do
315 | expect(MultiXml.parse(@xml)["tag"]).to be_a(String)
316 | end
317 |
318 | it "returns the correct string" do
319 | expect(MultiXml.parse(@xml)["tag"]).to eq("image.png")
320 | end
321 | end
322 |
323 | context 'with an attribute type="yaml"' do
324 | before do
325 | @xml = "--- \n1: returns an integer\n:message: Have a nice day\narray: \n- has-dashes: true\n has_underscores: true\n"
326 | end
327 |
328 | it "raises MultiXML::DisallowedTypeError by default" do
329 | expect { MultiXml.parse(@xml)["tag"] }.to raise_error(MultiXml::DisallowedTypeError)
330 | end
331 |
332 | it "returns the correctly parsed YAML when the type is allowed" do
333 | expect(MultiXml.parse(@xml,
334 | disallowed_types: [])["tag"]).to eq(:message => "Have a nice day", 1 => "returns an integer",
335 | "array" => [{"has-dashes" => true, "has_underscores" => true}])
336 | end
337 | end
338 |
339 | context 'with an attribute type="symbol"' do
340 | before do
341 | @xml = 'my_symbol'
342 | end
343 |
344 | it "raises MultiXML::DisallowedTypeError" do
345 | expect { MultiXml.parse(@xml)["tag"] }.to raise_error(MultiXml::DisallowedTypeError)
346 | end
347 |
348 | it "returns the correctly parsed Symbol when the type is allowed" do
349 | expect(MultiXml.parse(@xml, disallowed_types: [])["tag"]).to eq(:my_symbol)
350 | end
351 | end
352 |
353 | context 'with an attribute type="file"' do
354 | before do
355 | @xml = 'ZGF0YQ=='
356 | end
357 |
358 | it "returns a StringIO" do
359 | expect(MultiXml.parse(@xml)["tag"]).to be_a(StringIO)
360 | end
361 |
362 | it "is decoded correctly" do
363 | expect(MultiXml.parse(@xml)["tag"].string).to eq("data")
364 | end
365 |
366 | it "has the correct file name" do
367 | expect(MultiXml.parse(@xml)["tag"].original_filename).to eq("data.txt")
368 | end
369 |
370 | it "has the correct content type" do
371 | expect(MultiXml.parse(@xml)["tag"].content_type).to eq("text/plain")
372 | end
373 |
374 | context "with missing name and content type" do
375 | before do
376 | @xml = 'ZGF0YQ=='
377 | end
378 |
379 | it "returns a StringIO" do
380 | expect(MultiXml.parse(@xml)["tag"]).to be_a(StringIO)
381 | end
382 |
383 | it "is decoded correctly" do
384 | expect(MultiXml.parse(@xml)["tag"].string).to eq("data")
385 | end
386 |
387 | it "has the default file name" do
388 | expect(MultiXml.parse(@xml)["tag"].original_filename).to eq("untitled")
389 | end
390 |
391 | it "has the default content type" do
392 | expect(MultiXml.parse(@xml)["tag"].content_type).to eq("application/octet-stream")
393 | end
394 | end
395 | end
396 |
397 | context 'with an attribute type="array"' do
398 | before do
399 | @xml = 'Erik BerlinWynn Netherland'
400 | end
401 |
402 | it "returns an Array" do
403 | expect(MultiXml.parse(@xml)["users"]).to be_a(Array)
404 | end
405 |
406 | it "returns the correct array" do
407 | expect(MultiXml.parse(@xml)["users"]).to eq(["Erik Berlin", "Wynn Netherland"])
408 | end
409 | end
410 |
411 | context 'with an attribute type="array" in addition to other attributes' do
412 | before do
413 | @xml = 'Erik BerlinWynn Netherland'
414 | end
415 |
416 | it "returns an Array" do
417 | expect(MultiXml.parse(@xml)["users"]).to be_a(Array)
418 | end
419 |
420 | it "returns the correct array" do
421 | expect(MultiXml.parse(@xml)["users"]).to eq(["Erik Berlin", "Wynn Netherland"])
422 | end
423 | end
424 |
425 | context 'with an attribute type="array" containing only one item' do
426 | before do
427 | @xml = 'Erik Berlin'
428 | end
429 |
430 | it "returns an Array" do
431 | expect(MultiXml.parse(@xml)["users"]).to be_a(Array)
432 | end
433 |
434 | it "returns the correct array" do
435 | expect(MultiXml.parse(@xml)["users"]).to eq(["Erik Berlin"])
436 | end
437 | end
438 |
439 | %w[integer boolean date datetime file].each do |type|
440 | context "with an empty attribute type=\"#{type}\"" do
441 | before do
442 | @xml = ""
443 | end
444 |
445 | it "returns nil" do
446 | expect(MultiXml.parse(@xml)["tag"]).to be_nil
447 | end
448 | end
449 | end
450 |
451 | %w[yaml symbol].each do |type|
452 | context "with an empty attribute type=\"#{type}\"" do
453 | before do
454 | @xml = ""
455 | end
456 |
457 | it "raises MultiXml::DisallowedTypeError by default" do
458 | expect { MultiXml.parse(@xml)["tag"] }.to raise_error(MultiXml::DisallowedTypeError)
459 | end
460 |
461 | it "returns nil when the type is allowed" do
462 | expect(MultiXml.parse(@xml, disallowed_types: [])["tag"]).to be_nil
463 | end
464 | end
465 | end
466 |
467 | context 'with an empty attribute type="array"' do
468 | before do
469 | @xml = ''
470 | end
471 |
472 | it "returns an empty Array" do
473 | expect(MultiXml.parse(@xml)["tag"]).to eq([])
474 | end
475 |
476 | context "with whitespace" do
477 | before do
478 | @xml = ' '
479 | end
480 |
481 | it "returns an empty Array" do
482 | expect(MultiXml.parse(@xml)["tag"]).to eq([])
483 | end
484 | end
485 | end
486 |
487 | context "with XML entities" do
488 | before do
489 | @xml_entities = {
490 | "<" => "<",
491 | ">" => ">",
492 | '"' => """,
493 | "'" => "'",
494 | "&" => "&"
495 | }
496 | end
497 |
498 | context "in content" do
499 | it "returns unescaped XML entities" do
500 | @xml_entities.each do |key, value|
501 | xml = "#{value}"
502 | expect(MultiXml.parse(xml)["tag"]).to eq(key)
503 | end
504 | end
505 | end
506 |
507 | context "in attribute" do
508 | it "returns unescaped XML entities" do
509 | @xml_entities.each do |key, value|
510 | xml = ""
511 | expect(MultiXml.parse(xml)["tag"]["attribute"]).to eq(key)
512 | end
513 | end
514 | end
515 | end
516 |
517 | context "with dasherized tag" do
518 | before do
519 | @xml = ""
520 | end
521 |
522 | it "returns undasherize tag" do
523 | expect(MultiXml.parse(@xml).keys).to include("tag_1")
524 | end
525 | end
526 |
527 | context "with dasherized attribute" do
528 | before do
529 | @xml = ''
530 | end
531 |
532 | it "returns undasherize attribute" do
533 | expect(MultiXml.parse(@xml)["tag"].keys).to include("attribute_1")
534 | end
535 | end
536 |
537 | context "with children" do
538 | context "with attributes" do
539 | before do
540 | @xml = ''
541 | end
542 |
543 | it "returns the correct attributes" do
544 | expect(MultiXml.parse(@xml)["users"]["user"]["name"]).to eq("Erik Berlin")
545 | end
546 | end
547 |
548 | context "with text" do
549 | before do
550 | @xml = "Erik Berlin"
551 | end
552 |
553 | it "returns the correct text" do
554 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq("Erik Berlin")
555 | end
556 | end
557 |
558 | context "with an unrecognized attribute type" do
559 | before do
560 | @xml = 'Erik Berlin'
561 | end
562 |
563 | it "passes through the type" do
564 | expect(MultiXml.parse(@xml)["user"]["type"]).to eq("admin")
565 | end
566 | end
567 |
568 | context "with attribute tags on content nodes" do
569 | context "non 'type' attributes" do
570 | before do
571 | @xml = <<-XML
572 |
573 | 123
574 | 0.123
575 |
576 | XML
577 | @parsed_xml = MultiXml.parse(@xml)
578 | end
579 |
580 | it "adds the attributes to the value hash" do
581 | expect(@parsed_xml["options"]["value"][0]["__content__"]).to eq("123")
582 | expect(@parsed_xml["options"]["value"][0]["currency"]).to eq("USD")
583 | expect(@parsed_xml["options"]["value"][1]["__content__"]).to eq("0.123")
584 | expect(@parsed_xml["options"]["value"][1]["number"]).to eq("percent")
585 | end
586 | end
587 |
588 | context "unrecognized type attributes" do
589 | before do
590 | @xml = <<-XML
591 |
592 | 123
593 | 0.123
594 | 123
595 |
596 | XML
597 | @parsed_xml = MultiXml.parse(@xml)
598 | end
599 |
600 | it "adds the attributes to the value hash passing through the type" do
601 | expect(@parsed_xml["options"]["value"][0]["__content__"]).to eq("123")
602 | expect(@parsed_xml["options"]["value"][0]["type"]).to eq("USD")
603 | expect(@parsed_xml["options"]["value"][1]["__content__"]).to eq("0.123")
604 | expect(@parsed_xml["options"]["value"][1]["type"]).to eq("percent")
605 | expect(@parsed_xml["options"]["value"][2]["__content__"]).to eq("123")
606 | expect(@parsed_xml["options"]["value"][2]["currency"]).to eq("USD")
607 | end
608 | end
609 |
610 | context "mixing attributes and non-attributes content nodes type attributes" do
611 | before do
612 | @xml = <<-XML
613 |
614 | 123
615 | 0.123
616 | 123
617 |
618 | XML
619 | @parsed_xml = MultiXml.parse(@xml)
620 | end
621 |
622 | it "adds the attributes to the value hash passing through the type" do
623 | expect(@parsed_xml["options"]["value"][0]["__content__"]).to eq("123")
624 | expect(@parsed_xml["options"]["value"][0]["type"]).to eq("USD")
625 | expect(@parsed_xml["options"]["value"][1]["__content__"]).to eq("0.123")
626 | expect(@parsed_xml["options"]["value"][1]["type"]).to eq("percent")
627 | expect(@parsed_xml["options"]["value"][2]).to eq("123")
628 | end
629 | end
630 |
631 | context "mixing recognized type attribute and non-type attributes on content nodes" do
632 | before do
633 | @xml = <<-XML
634 |
635 | 123
636 |
637 | XML
638 | @parsed_xml = MultiXml.parse(@xml)
639 | end
640 |
641 | it "adds the the non-type attribute and remove the recognized type attribute and do the typecast" do
642 | expect(@parsed_xml["options"]["value"]["__content__"]).to eq(123)
643 | expect(@parsed_xml["options"]["value"]["number"]).to eq("USD")
644 | end
645 | end
646 |
647 | context "mixing unrecognized type attribute and non-type attributes on content nodes" do
648 | before do
649 | @xml = <<-XML
650 |
651 | 123
652 |
653 | XML
654 | @parsed_xml = MultiXml.parse(@xml)
655 | end
656 |
657 | it "adds the the non-type attributes and type attribute to the value hash" do
658 | expect(@parsed_xml["options"]["value"]["__content__"]).to eq("123")
659 | expect(@parsed_xml["options"]["value"]["number"]).to eq("USD")
660 | expect(@parsed_xml["options"]["value"]["type"]).to eq("currency")
661 | end
662 | end
663 | end
664 |
665 | context "with newlines and whitespace" do
666 | before do
667 | @xml = <<-XML
668 |
669 | Erik Berlin
670 |
671 | XML
672 | end
673 |
674 | it "parses correctly" do
675 | expect(MultiXml.parse(@xml)).to eq("user" => {"name" => "Erik Berlin"})
676 | end
677 | end
678 |
679 | # Babies having babies
680 | context "with children" do
681 | before do
682 | @xml = ''
683 | end
684 |
685 | it "parses correctly" do
686 | expect(MultiXml.parse(@xml)).to eq("users" => {"user" => {"name" => "Erik Berlin",
687 | "status" => {"text" => "Hello"}}})
688 | end
689 | end
690 | end
691 |
692 | context "with sibling children" do
693 | before do
694 | @xml = "Erik BerlinWynn Netherland"
695 | end
696 |
697 | it "returns an Array" do
698 | expect(MultiXml.parse(@xml)["users"]["user"]).to be_a(Array)
699 | end
700 |
701 | it "parses correctly" do
702 | expect(MultiXml.parse(@xml)).to eq("users" => {"user" => ["Erik Berlin", "Wynn Netherland"]})
703 | end
704 | end
705 | end
706 |
707 | context "a duplexed stream" do
708 | before do
709 | @xml, wr = IO.pipe
710 |
711 | Thread.new do
712 | "".each_char do |chunk|
713 | wr << chunk
714 | end
715 | wr.close
716 | end
717 | end
718 |
719 | it "parses correctly" do
720 | expect(MultiXml.parse(@xml)).to eq("user" => nil)
721 | end
722 | end
723 | end
724 | end
725 |
--------------------------------------------------------------------------------