├── .rspec ├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── lib ├── multi_xml │ ├── version.rb │ └── parsers │ │ ├── libxml.rb │ │ ├── nokogiri.rb │ │ ├── oga.rb │ │ ├── libxml2_parser.rb │ │ ├── ox.rb │ │ └── rexml.rb └── multi_xml.rb ├── .yardopts ├── .gitignore ├── bin ├── setup └── console ├── spec ├── helper.rb ├── speed.rb ├── multi_xml_spec.rb └── parser_shared_example.rb ├── Gemfile ├── Rakefile ├── LICENSE.md ├── .rubocop.yml ├── multi_xml.gemspec ├── CONTRIBUTING.md ├── README.md └── CHANGELOG.md /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --order random 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [sferik] 2 | -------------------------------------------------------------------------------- /lib/multi_xml/version.rb: -------------------------------------------------------------------------------- 1 | module MultiXml 2 | VERSION = Gem::Version.create("0.7.2") 3 | end 4 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --no-private 2 | --protected 3 | --markup markdown 4 | - 5 | CHANGELOG.md 6 | CONTRIBUTING.md 7 | LICENSE.md 8 | README.md 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *~ 3 | .bundle 4 | .rvmrc 5 | .yardoc 6 | Gemfile.lock 7 | coverage/* 8 | doc/* 9 | log/* 10 | measurement/* 11 | pkg/* 12 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "bundler/setup" 4 | require "multi_xml" 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | require "irb" 10 | IRB.start(__FILE__) 11 | -------------------------------------------------------------------------------- /spec/helper.rb: -------------------------------------------------------------------------------- 1 | def jruby? 2 | RUBY_PLATFORM == "java" 3 | end 4 | 5 | require "simplecov" 6 | 7 | SimpleCov.start do 8 | add_filter "/spec" 9 | percent = (jruby?) ? 91.38 : 93.16 10 | minimum_coverage(percent) 11 | end 12 | 13 | require "multi_xml" 14 | require "rspec" 15 | 16 | RSpec.configure do |config| 17 | config.expect_with :rspec do |c| 18 | c.syntax = :expect 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | strategy: 6 | matrix: 7 | ruby: ["3.2", "3.3", "3.4", "jruby-10"] 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - uses: ruby/setup-ruby@v1 12 | with: 13 | ruby-version: ${{ matrix.ruby }} 14 | bundler-cache: true 15 | - run: bundle exec rake test 16 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem "libxml-ruby", require: nil, platforms: :ruby 4 | gem "nokogiri", require: nil 5 | gem "oga", ">= 2.3", require: nil 6 | gem "ox", require: nil, platforms: :ruby 7 | gem "rexml", require: nil 8 | 9 | gem "rake", ">= 13.2.1" 10 | gem "rspec", ">= 3.12" 11 | gem "rubocop", ">= 1.62.1" 12 | gem "rubocop-performance", ">= 1.20.2" 13 | gem "rubocop-rake", ">= 0.6" 14 | gem "rubocop-rspec", ">= 2.24" 15 | gem "simplecov", ">= 0.22" 16 | gem "standard", ">= 1.35.1" 17 | gem "standard-performance", ">= 1.3.1" 18 | gem "yard", ">= 0.9.36" 19 | gem "yardstick", ">= 0.9.9" 20 | 21 | gemspec 22 | -------------------------------------------------------------------------------- /lib/multi_xml/parsers/libxml.rb: -------------------------------------------------------------------------------- 1 | require "libxml" unless defined?(LibXML) 2 | require "multi_xml/parsers/libxml2_parser" 3 | 4 | module MultiXml 5 | module Parsers 6 | module Libxml # :nodoc: 7 | include Libxml2Parser 8 | extend self 9 | 10 | def parse_error 11 | ::LibXML::XML::Error 12 | end 13 | 14 | def parse(xml) 15 | node_to_hash(LibXML::XML::Parser.io(xml).parse.root) 16 | end 17 | 18 | private 19 | 20 | def each_child(node, &) 21 | node.each_child(&) 22 | end 23 | 24 | def each_attr(node, &) 25 | node.each_attr(&) 26 | end 27 | 28 | def node_name(node) 29 | node.name 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler" 2 | Bundler::GemHelper.install_tasks 3 | 4 | require "rspec/core/rake_task" 5 | RSpec::Core::RakeTask.new(:spec) 6 | 7 | task test: :spec 8 | 9 | require "rubocop/rake_task" 10 | RuboCop::RakeTask.new 11 | 12 | require "yard" 13 | YARD::Rake::YardocTask.new do |task| 14 | task.files = ["lib/**/*.rb", "-", "LICENSE.md"] 15 | task.options = [ 16 | "--no-private", 17 | "--protected", 18 | "--output-dir", "doc/yard", 19 | "--markup", "markdown" 20 | ] 21 | end 22 | 23 | require "yardstick/rake/measurement" 24 | Yardstick::Rake::Measurement.new do |measurement| 25 | measurement.output = "measurement/report.txt" 26 | end 27 | 28 | require "yardstick/rake/verify" 29 | Yardstick::Rake::Verify.new do |verify| 30 | verify.threshold = 48.8 31 | end 32 | 33 | task default: %i[spec rubocop verify_measurements] 34 | -------------------------------------------------------------------------------- /lib/multi_xml/parsers/nokogiri.rb: -------------------------------------------------------------------------------- 1 | require "nokogiri" unless defined?(Nokogiri) 2 | require "multi_xml/parsers/libxml2_parser" 3 | 4 | module MultiXml 5 | module Parsers 6 | module Nokogiri # :nodoc: 7 | include Libxml2Parser 8 | extend self 9 | 10 | def parse_error 11 | ::Nokogiri::XML::SyntaxError 12 | end 13 | 14 | def parse(xml) 15 | doc = ::Nokogiri::XML(xml) 16 | raise(doc.errors.first) unless doc.errors.empty? 17 | 18 | node_to_hash(doc.root) 19 | end 20 | 21 | private 22 | 23 | def each_child(node, &) 24 | node.children.each(&) 25 | end 26 | 27 | def each_attr(node, &) 28 | node.attribute_nodes.each(&) 29 | end 30 | 31 | def node_name(node) 32 | node.node_name 33 | end 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2025 Erik Berlin 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | require: 2 | - standard 3 | 4 | plugins: 5 | - rubocop-performance 6 | - rubocop-rake 7 | - rubocop-rspec 8 | - standard-performance 9 | 10 | AllCops: 11 | NewCops: enable 12 | TargetRubyVersion: 3.2 13 | 14 | Layout/ArgumentAlignment: 15 | EnforcedStyle: with_fixed_indentation 16 | IndentationWidth: 2 17 | 18 | Layout/CaseIndentation: 19 | EnforcedStyle: end 20 | 21 | Layout/EndAlignment: 22 | EnforcedStyleAlignWith: start_of_line 23 | 24 | Layout/LineLength: 25 | Max: 140 26 | 27 | Layout/ParameterAlignment: 28 | EnforcedStyle: with_fixed_indentation 29 | IndentationWidth: 2 30 | 31 | Layout/SpaceInsideHashLiteralBraces: 32 | EnforcedStyle: no_space 33 | 34 | Metrics/ParameterLists: 35 | CountKeywordArgs: false 36 | 37 | Style/Alias: 38 | EnforcedStyle: prefer_alias_method 39 | 40 | Style/Documentation: 41 | Enabled: false 42 | 43 | Style/FrozenStringLiteralComment: 44 | EnforcedStyle: never 45 | 46 | Style/OpenStructUse: 47 | Enabled: false 48 | 49 | Style/StringLiterals: 50 | EnforcedStyle: double_quotes 51 | 52 | Style/StringLiteralsInInterpolation: 53 | EnforcedStyle: double_quotes 54 | 55 | Style/TernaryParentheses: 56 | EnforcedStyle: require_parentheses 57 | -------------------------------------------------------------------------------- /spec/speed.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -wW1 2 | 3 | $LOAD_PATH << "." 4 | $LOAD_PATH << "../lib" 5 | 6 | if __FILE__ == $PROGRAM_NAME 7 | while (i = ARGV.index("-I")) 8 | _, path = ARGV.slice!(i, 2) 9 | $LOAD_PATH << path 10 | end 11 | end 12 | 13 | require "optparse" 14 | require "stringio" 15 | require "multi_xml" 16 | 17 | %w[libxml nokogiri ox].each do |library| 18 | require library 19 | rescue LoadError 20 | next 21 | end 22 | 23 | $verbose = 0 24 | $parsers = [] 25 | $iterations = 10 26 | 27 | opts = OptionParser.new 28 | opts.on("-v", "increase verbosity") { $verbose += 1 } 29 | opts.on("-p", "--parser [String]", String, "parser to test") { |parsers| $parsers = [parsers] } 30 | opts.on("-i", "--iterations [Int]", Integer, "iterations") { |iterations| $iterations = iterations } 31 | opts.on("-h", "--help", "Show this display") do 32 | puts opts 33 | Process.exit!(0) 34 | end 35 | files = opts.parse(ARGV) 36 | 37 | if $parsers.empty? 38 | $parsers << "libxml" if defined?(LibXML) 39 | $parsers << "nokogiri" if defined?(Nokogiri) 40 | $parsers << "ox" if defined?(Ox) 41 | end 42 | 43 | files.each do |filename| 44 | times = {} 45 | xml = File.read(filename) 46 | $parsers.each do |p| 47 | MultiXml.parser = p 48 | start = Time.now 49 | $iterations.times do 50 | io = StringIO.new(xml) 51 | MultiXml.parse(io) 52 | end 53 | times[p] = Time.now - start 54 | end 55 | times.each do |p, t| 56 | puts format("%8s took %0.3f seconds to parse %s %d times.", p, t, filename, $iterations) 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /spec/multi_xml_spec.rb: -------------------------------------------------------------------------------- 1 | require "helper" 2 | require "parser_shared_example" 3 | 4 | class MockDecoder 5 | def self.parse; end 6 | end 7 | 8 | describe "MultiXml" do 9 | context "Parsers" do 10 | it "picks a default parser" do 11 | expect(MultiXml.parser).to be_a(Module) 12 | expect(MultiXml.parser).to respond_to(:parse) 13 | end 14 | 15 | it "defaults to the best available gem" do 16 | # Clear cache variable possibly set by previous tests 17 | MultiXml.send(:remove_instance_variable, :@parser) if MultiXml.instance_variable_defined?(:@parser) 18 | if jruby? 19 | # Ox and Libxml are not not currently available on JRuby, so Nokogiri is the best available gem 20 | expect(MultiXml.parser.name).to eq("MultiXml::Parsers::Nokogiri") 21 | else 22 | expect(MultiXml.parser.name).to eq("MultiXml::Parsers::Ox") 23 | end 24 | end 25 | 26 | it "is settable via a symbol" do 27 | MultiXml.parser = :rexml 28 | expect(MultiXml.parser.name).to eq("MultiXml::Parsers::Rexml") 29 | end 30 | 31 | it "is settable via a class" do 32 | MultiXml.parser = MockDecoder 33 | expect(MultiXml.parser.name).to eq("MockDecoder") 34 | end 35 | end 36 | 37 | [%w[LibXML libxml], 38 | %w[REXML rexml/document], 39 | %w[Nokogiri nokogiri], 40 | %w[Ox ox], 41 | %w[Oga oga]].each do |parser| 42 | require parser.last 43 | context "#{parser.first} parser" do 44 | it_behaves_like "a parser", parser.first 45 | end 46 | rescue LoadError 47 | puts "Tests not run for #{parser.first} due to a LoadError" 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /multi_xml.gemspec: -------------------------------------------------------------------------------- 1 | require_relative "lib/multi_xml/version" 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = "multi_xml" 5 | spec.version = MultiXml::VERSION 6 | spec.authors = ["Erik Berlin"] 7 | spec.email = ["sferik@gmail.com"] 8 | 9 | spec.summary = "Provides swappable XML backends utilizing LibXML, Nokogiri, Ox, or REXML." 10 | spec.homepage = "https://github.com/sferik/multi_xml" 11 | spec.license = "MIT" 12 | spec.required_ruby_version = ">= 3.2" 13 | 14 | spec.metadata["allowed_push_host"] = "https://rubygems.org" 15 | 16 | spec.metadata = { 17 | "allowed_push_host" => "https://rubygems.org", 18 | "bug_tracker_uri" => "https://github.com/sferik/multi_xml/issues", 19 | "changelog_uri" => "https://github.com/sferik/multi_xml/blob/master/CHANGELOG.md", 20 | "documentation_uri" => "https://rubydoc.info/gems/multi_xml/", 21 | "funding_uri" => "https://github.com/sponsors/sferik", 22 | "homepage_uri" => spec.homepage, 23 | "rubygems_mfa_required" => "true", 24 | "source_code_uri" => "https://github.com/sferik/multi_xml" 25 | } 26 | 27 | # Specify which files should be added to the gem when it is released. 28 | # The `git ls-files -z` loads the files in the RubyGem that have been added into git. 29 | spec.files = Dir.chdir(__dir__) do 30 | `git ls-files -z`.split("\x0").reject do |f| 31 | (File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor]) 32 | end 33 | end 34 | spec.bindir = "exe" 35 | spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) } 36 | spec.require_paths = ["lib"] 37 | 38 | # For more information and examples about making a new gem, check out our 39 | # guide at: https://bundler.io/guides/creating_gem.html 40 | spec.metadata["rubygems_mfa_required"] = "true" 41 | 42 | spec.add_dependency("bigdecimal", "~> 3.1") 43 | end 44 | -------------------------------------------------------------------------------- /lib/multi_xml/parsers/oga.rb: -------------------------------------------------------------------------------- 1 | require "oga" unless defined?(Oga) 2 | require "multi_xml/parsers/libxml2_parser" 3 | 4 | module MultiXml 5 | module Parsers 6 | module Oga # :nodoc: 7 | include Libxml2Parser 8 | extend self 9 | 10 | def parse_error 11 | LL::ParserError 12 | end 13 | 14 | def parse(io) 15 | document = ::Oga.parse_xml(io) 16 | node_to_hash(document.children[0]) 17 | end 18 | 19 | def node_to_hash(node, hash = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength 20 | node_hash = {MultiXml::CONTENT_ROOT => ""} 21 | 22 | name = node_name(node) 23 | 24 | # Insert node hash into parent hash correctly. 25 | case hash[name] 26 | when Array 27 | hash[name] << node_hash 28 | when Hash 29 | hash[name] = [hash[name], node_hash] 30 | when NilClass 31 | hash[name] = node_hash 32 | end 33 | 34 | # Handle child elements 35 | each_child(node) do |c| 36 | if c.is_a?(::Oga::XML::Element) 37 | node_to_hash(c, node_hash) 38 | elsif c.is_a?(::Oga::XML::Text) || c.is_a?(::Oga::XML::Cdata) 39 | node_hash[MultiXml::CONTENT_ROOT] += c.text 40 | end 41 | end 42 | 43 | # Remove content node if it is empty 44 | node_hash.delete(MultiXml::CONTENT_ROOT) if node_hash[MultiXml::CONTENT_ROOT].strip.empty? 45 | 46 | # Handle attributes 47 | each_attr(node) do |a| 48 | key = node_name(a) 49 | v = node_hash[key] 50 | node_hash[key] = ((v) ? [a.value, v] : a.value) 51 | end 52 | 53 | hash 54 | end 55 | 56 | private 57 | 58 | def each_child(node, &) 59 | node.children.each(&) 60 | end 61 | 62 | def each_attr(node, &) 63 | node.attributes.each(&) 64 | end 65 | 66 | def node_name(node) 67 | node.name 68 | end 69 | end 70 | end 71 | end 72 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | In the spirit of [free software][free-sw] , **everyone** is encouraged to help 3 | improve this project. 4 | 5 | [free-sw]: http://www.fsf.org/licensing/essays/free-sw.html 6 | 7 | Here are some ways *you* can contribute: 8 | 9 | * by using alpha, beta, and prerelease versions 10 | * by reporting bugs 11 | * by suggesting new features 12 | * by writing or editing documentation 13 | * by writing specifications 14 | * by writing code (**no patch is too small**: fix typos, add comments, clean up 15 | inconsistent whitespace) 16 | * by refactoring code 17 | * by resolving [issues][] 18 | * by reviewing patches 19 | * [financially][gittip] 20 | 21 | [issues]: https://github.com/sferik/multi_xml/issues 22 | [gittip]: https://www.gittip.com/sferik/ 23 | 24 | ## Submitting an Issue 25 | We use the [GitHub issue tracker][issues] to track bugs and features. Before 26 | submitting a bug report or feature request, check to make sure it hasn't 27 | already been submitted. When submitting a bug report, please include a [Gist][] 28 | that includes a stack trace and any details that may be necessary to reproduce 29 | the bug, including your gem version, Ruby version, and operating system. 30 | Ideally, a bug report should include a pull request with failing specs. 31 | 32 | [gist]: https://gist.github.com/ 33 | 34 | ## Submitting a Pull Request 35 | 1. [Fork the repository.][fork] 36 | 2. [Create a topic branch.][branch] 37 | 3. Add specs for your unimplemented feature or bug fix. 38 | 4. Run `bundle exec rake spec`. If your specs pass, return to step 3. 39 | 5. Implement your feature or bug fix. 40 | 6. Run `bundle exec rake`. If your specs fail, return to step 5. 41 | 7. Run `open coverage/index.html`. If your changes are not completely covered 42 | by your tests, return to step 3. 43 | 8. Add documentation for your feature or bug fix. 44 | 9. Run `bundle exec rake verify_measurements`. If your changes are not 100% 45 | documented, go back to step 8. 46 | 10. Add, commit, and push your changes. 47 | 11. [Submit a pull request.][pr] 48 | 49 | [fork]: http://help.github.com/fork-a-repo/ 50 | [branch]: http://learn.github.com/p/branching.html 51 | [pr]: http://help.github.com/send-pull-requests/ 52 | -------------------------------------------------------------------------------- /lib/multi_xml/parsers/libxml2_parser.rb: -------------------------------------------------------------------------------- 1 | module MultiXml 2 | module Parsers 3 | module Libxml2Parser # :nodoc: 4 | # Convert XML document to hash 5 | # 6 | # node:: 7 | # The XML node object to convert to a hash. 8 | # 9 | # hash:: 10 | # Hash to merge the converted element into. 11 | def node_to_hash(node, hash = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength 12 | node_hash = {MultiXml::CONTENT_ROOT => ""} 13 | 14 | name = node_name(node) 15 | 16 | # Insert node hash into parent hash correctly. 17 | case hash[name] 18 | when Array 19 | hash[name] << node_hash 20 | when Hash 21 | hash[name] = [hash[name], node_hash] 22 | when NilClass 23 | hash[name] = node_hash 24 | end 25 | 26 | # Handle child elements 27 | each_child(node) do |c| 28 | if c.element? 29 | node_to_hash(c, node_hash) 30 | elsif c.text? || c.cdata? 31 | node_hash[MultiXml::CONTENT_ROOT] += c.content 32 | end 33 | end 34 | 35 | # Remove content node if it is empty 36 | node_hash.delete(MultiXml::CONTENT_ROOT) if node_hash[MultiXml::CONTENT_ROOT].strip.empty? 37 | 38 | # Handle attributes 39 | each_attr(node) do |a| 40 | key = node_name(a) 41 | v = node_hash[key] 42 | node_hash[key] = ((v) ? [a.value, v] : a.value) 43 | end 44 | 45 | hash 46 | end 47 | 48 | # Parse an XML Document IO into a simple hash. 49 | # xml:: 50 | # XML Document IO to parse 51 | def parse(_) 52 | raise(NotImplementedError, "inheritor should define #{__method__}") 53 | end 54 | 55 | private 56 | 57 | def each_child(*) 58 | raise(NotImplementedError, "inheritor should define #{__method__}") 59 | end 60 | 61 | def each_attr(*) 62 | raise(NotImplementedError, "inheritor should define #{__method__}") 63 | end 64 | 65 | def node_name(*) 66 | raise(NotImplementedError, "inheritor should define #{__method__}") 67 | end 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /lib/multi_xml/parsers/ox.rb: -------------------------------------------------------------------------------- 1 | require "ox" unless defined?(Ox) 2 | 3 | # Each MultiXml parser is expected to parse an XML document into a Hash. The 4 | # conversion rules are: 5 | # 6 | # - Each document starts out as an empty Hash. 7 | # 8 | # - Reading an element created an entry in the parent Hash that has a key of 9 | # the element name and a value of a Hash with attributes as key value 10 | # pairs. Children are added as described by this rule. 11 | # 12 | # - Text and CDATE is stored in the parent element Hash with a key of 13 | # MultiXml::CONTENT_ROOT and a value of the text itself. 14 | # 15 | # - If a key already exists in the Hash then the value associated with the key 16 | # is converted to an Array with the old and new value in it. 17 | # 18 | # - Other elements such as the xml prolog, doctype, and comments are ignored. 19 | # 20 | 21 | module MultiXml 22 | module Parsers 23 | module Ox # :nodoc: 24 | module_function 25 | 26 | def parse_error 27 | Exception 28 | end 29 | 30 | def parse(io) 31 | handler = Handler.new 32 | ::Ox.sax_parse(handler, io, convert_special: true, skip: :skip_return) 33 | handler.doc 34 | end 35 | 36 | class Handler 37 | attr_accessor :stack 38 | 39 | def initialize 40 | @stack = [] 41 | end 42 | 43 | def doc 44 | @stack[0] 45 | end 46 | 47 | def attr(name, value) 48 | append(name, value) unless @stack.empty? 49 | end 50 | 51 | def text(value) 52 | append(MultiXml::CONTENT_ROOT, value) 53 | end 54 | 55 | def cdata(value) 56 | append(MultiXml::CONTENT_ROOT, value) 57 | end 58 | 59 | def start_element(name) 60 | @stack.push({}) if @stack.empty? 61 | h = {} 62 | append(name, h) 63 | @stack.push(h) 64 | end 65 | 66 | def end_element(_) 67 | @stack.pop 68 | end 69 | 70 | def error(message, line, column) 71 | raise(StandardError, "#{message} at #{line}:#{column}") 72 | end 73 | 74 | def append(key, value) 75 | key = key.to_s 76 | h = @stack.last 77 | if h.key?(key) 78 | v = h[key] 79 | if v.is_a?(Array) 80 | v << value 81 | else 82 | h[key] = [v, value] 83 | end 84 | else 85 | h[key] = value 86 | end 87 | end 88 | end 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MultiXML 2 | 3 | A generic swappable back-end for XML parsing 4 | 5 | ## Installation 6 | gem install multi_xml 7 | 8 | ## Documentation 9 | [http://rdoc.info/gems/multi_xml][documentation] 10 | 11 | [documentation]: http://rdoc.info/gems/multi_xml 12 | 13 | ## Usage Examples 14 | ```ruby 15 | require 'multi_xml' 16 | 17 | MultiXml.parser = :ox 18 | MultiXml.parser = MultiXml::Parsers::Ox # Same as above 19 | MultiXml.parse('This is the contents') # Parsed using Ox 20 | 21 | MultiXml.parser = :libxml 22 | MultiXml.parser = MultiXml::Parsers::Libxml # Same as above 23 | MultiXml.parse('This is the contents') # Parsed using LibXML 24 | 25 | MultiXml.parser = :nokogiri 26 | MultiXml.parser = MultiXml::Parsers::Nokogiri # Same as above 27 | MultiXml.parse('This is the contents') # Parsed using Nokogiri 28 | 29 | MultiXml.parser = :rexml 30 | MultiXml.parser = MultiXml::Parsers::Rexml # Same as above 31 | MultiXml.parse('This is the contents') # Parsed using REXML 32 | 33 | MultiXml.parser = :oga 34 | MultiXml.parser = MultiXml::Parsers::Oga # Same as above 35 | MultiXml.parse('This is the contents') # Parsed using Oga 36 | ``` 37 | The `parser` setter takes either a symbol or a class (to allow for custom XML 38 | parsers) that responds to `.parse` at the class level. 39 | 40 | MultiXML tries to have intelligent defaulting. That is, if you have any of the 41 | supported parsers already loaded, it will use them before attempting to load 42 | a new one. When loading, libraries are ordered by speed: first Ox, then LibXML, 43 | then Nokogiri, and finally REXML. 44 | 45 | ## Supported Ruby Versions 46 | This library aims to support and is tested against the following Ruby 47 | implementations: 48 | 49 | * 3.2 50 | * 3.3 51 | * 3.4 52 | * JRuby 10 53 | 54 | If something doesn't work on one of these versions, it's a bug. 55 | 56 | This library may inadvertently work (or seem to work) on other Ruby 57 | implementations, however support will only be provided for the versions listed 58 | above. 59 | 60 | If you would like this library to support another Ruby version, you may 61 | volunteer to be a maintainer. Being a maintainer entails making sure all tests 62 | run and pass on that implementation. When something breaks on your 63 | implementation, you will be responsible for providing patches in a timely 64 | fashion. If critical issues for a particular implementation exist at the time 65 | of a major release, support for that Ruby version may be dropped. 66 | 67 | ## Inspiration 68 | MultiXML was inspired by [MultiJSON][]. 69 | 70 | [multijson]: https://github.com/intridea/multi_json/ 71 | 72 | ## Copyright 73 | Copyright (c) 2010-2025 Erik Berlin. See [LICENSE][] for details. 74 | 75 | [license]: LICENSE.md 76 | -------------------------------------------------------------------------------- /lib/multi_xml/parsers/rexml.rb: -------------------------------------------------------------------------------- 1 | require "rexml/document" unless defined?(REXML::Document) 2 | 3 | module MultiXml 4 | module Parsers 5 | module Rexml # :nodoc: 6 | extend self 7 | 8 | def parse_error 9 | ::REXML::ParseException 10 | end 11 | 12 | # Parse an XML Document IO into a simple hash using REXML 13 | # 14 | # xml:: 15 | # XML Document IO to parse 16 | def parse(xml) 17 | doc = REXML::Document.new(xml) 18 | raise(REXML::ParseException, "The document #{doc.to_s.inspect} does not have a valid root") unless doc.root 19 | 20 | merge_element!({}, doc.root) 21 | end 22 | 23 | private 24 | 25 | # Convert an XML element and merge into the hash 26 | # 27 | # hash:: 28 | # Hash to merge the converted element into. 29 | # element:: 30 | # XML element to merge into hash 31 | def merge_element!(hash, element) 32 | merge!(hash, element.name, collapse(element)) 33 | end 34 | 35 | # Actually converts an XML document element into a data structure. 36 | # 37 | # element:: 38 | # The document element to be collapsed. 39 | def collapse(element) 40 | hash = get_attributes(element) 41 | 42 | if element.has_elements? 43 | element.each_element { |child| merge_element!(hash, child) } 44 | merge_texts!(hash, element) unless empty_content?(element) 45 | hash 46 | else 47 | merge_texts!(hash, element) 48 | end 49 | end 50 | 51 | # Merge all the texts of an element into the hash 52 | # 53 | # hash:: 54 | # Hash to add the converted element to. 55 | # element:: 56 | # XML element whose texts are to me merged into the hash 57 | def merge_texts!(hash, element) 58 | if element.has_text? 59 | # must use value to prevent double-escaping 60 | texts = element.texts.map(&:value).join 61 | merge!(hash, MultiXml::CONTENT_ROOT, texts) 62 | else 63 | hash 64 | end 65 | end 66 | 67 | # Adds a new key/value pair to an existing Hash. If the key to be added 68 | # already exists and the existing value associated with key is not 69 | # an Array, it will be wrapped in an Array. Then the new value is 70 | # appended to that Array. 71 | # 72 | # hash:: 73 | # Hash to add key/value pair to. 74 | # key:: 75 | # Key to be added. 76 | # value:: 77 | # Value to be associated with key. 78 | def merge!(hash, key, value) 79 | if hash.key?(key) 80 | if hash[key].instance_of?(Array) 81 | hash[key] << value 82 | else 83 | hash[key] = [hash[key], value] 84 | end 85 | elsif value.instance_of?(Array) 86 | hash[key] = [value] 87 | else 88 | hash[key] = value 89 | end 90 | hash 91 | end 92 | 93 | # Converts the attributes array of an XML element into a hash. 94 | # Returns an empty Hash if node has no attributes. 95 | # 96 | # element:: 97 | # XML element to extract attributes from. 98 | def get_attributes(element) 99 | attributes = {} 100 | element.attributes.each { |n, v| attributes[n] = v } 101 | attributes 102 | end 103 | 104 | # Determines if a document element has text content 105 | # 106 | # element:: 107 | # XML element to be checked. 108 | def empty_content?(element) 109 | element.texts.join.strip.empty? 110 | end 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 0.7.2 2 | ----- 3 | * [Drop support for Ruby 3.1](https://github.com/sferik/multi_xml/commit/fab6288edd36c58a2b13e0206d8bed305fcb4a4b) 4 | 5 | 0.7.1 6 | ----- 7 | * [Relax required Ruby version constraint to allow installation on Debian stable](https://github.com/sferik/multi_xml/commit/7d18711466a15e158dc71344ca6f6e18838ecc8d) 8 | 9 | 0.7.0 10 | ----- 11 | * [Add support for Ruby 3.3](https://github.com/sferik/multi_xml/pull/67) 12 | * [Drop support for Ruby 3.0](https://github.com/sferik/multi_xml/commit/eec72c56307fede3a93f1a61553587cb278b0c8a) [and](https://github.com/sferik/multi_xml/commit/6a6dec80a36c30774a5525b45f71d346fb561e69) [earlier](https://github.com/sferik/multi_xml/commit/e7dad37a0a0be8383a26ffe515c575b5b4d04588) 13 | * [Don't mutate strings](https://github.com/sferik/multi_xml/commit/71be3fff4afb0277a7e1c47c5f1f4b6106a8eb45) 14 | 15 | 0.6.0 16 | ----- 17 | * [Duplexed Streams](https://github.com/sferik/multi_xml/pull/45) 18 | * [Support for Oga](https://github.com/sferik/multi_xml/pull/47) 19 | * [Integer unification for Ruby 2.4](https://github.com/sferik/multi_xml/pull/54) 20 | 21 | 0.5.5 22 | ----- 23 | * [Fix symbolize_keys function](https://github.com/sferik/multi_xml/commit/a4cae3aeb690999287cd30206399abaa5ce1ae81) 24 | * [Fix Nokogiri parser for the same attr and inner element name](https://github.com/sferik/multi_xml/commit/a28ed86e2d7826b2edeed98552736b4c7ca52726) 25 | 26 | 0.5.4 27 | ----- 28 | * [Add option to not cast parsed values](https://github.com/sferik/multi_xml/commit/44fc05fbcfd60cc8b555b75212471fab29fa8cd0) 29 | * [Use message instead of to_s](https://github.com/sferik/multi_xml/commit/b06f0114434ffe1957dd7bc2712cb5b76c1b45fe) 30 | 31 | 0.5.3 32 | ----- 33 | * [Add cryptographic signature](https://github.com/sferik/multi_xml/commit/f39f0c74308090737816c622dbb7d7aa28c646c0) 34 | 35 | 0.5.2 36 | ----- 37 | * [Remove ability to parse symbols and YAML](https://github.com/sferik/multi_xml/pull/34) 38 | 39 | 0.5.1 40 | ----- 41 | * [Revert "Reset @@parser in between specs"](https://github.com/sferik/multi_xml/issues/28) 42 | 43 | 0.5.0 44 | ----- 45 | * [Reset @@parser in between specs](https://github.com/sferik/multi_xml/commit/b562bed265918b43ac1c4c638ae3a7ffe95ecd83) 46 | * [Add attributes being passed through on content nodes](https://github.com/sferik/multi_xml/commit/631a8bb3c2253db0024f77f47c16d5a53b8128fd) 47 | 48 | 0.4.4 49 | ----- 50 | * [Fix regression in MultiXml.parse](https://github.com/sferik/multi_xml/commit/45ae597d9a35cbd89cc7f5518c85bac30199fc06) 51 | 52 | 0.4.3 53 | ----- 54 | * [Make parser a class variable](https://github.com/sferik/multi_xml/commit/6804ffc8680ed6466c66f2472f5e016c412c2c24) 55 | * [Add TYPE_NAMES constant](https://github.com/sferik/multi_xml/commit/72a21f2e86c8e3ac9689cee5f3a62102cfb98028) 56 | 57 | 0.4.2 58 | ----- 59 | * [Fix bug in dealing with xml element attributes for both REXML and Ox](https://github.com/sferik/multi_xml/commit/ba3c1ac427ff0268abaf8186fb4bd81100c99559) 60 | * [Make Ox the preferred XML parser](https://github.com/sferik/multi_xml/commit/0a718d740c30fba426f300a929cda9ee8250d238) 61 | 62 | 0.4.1 63 | ----- 64 | * [Use the SAX like parser with Ox](https://github.com/sferik/multi_xml/commit/d289d42817a32e48483c00d5361c76fbea62a166) 65 | 66 | 0.4.0 67 | ----- 68 | * [Add support for Ox](https://github.com/sferik/multi_xml/pull/14) 69 | 70 | 0.3.0 71 | ----- 72 | * [Remove core class monkeypatches](https://github.com/sferik/multi_xml/commit/f7cc3ce4d2924c0e0adc6935d1fba5ec79282938) 73 | * [Sort out some class / singleton class issues](https://github.com/sferik/multi_xml/commit/a5dac06bcf658facaaf7afa295f1291c7be15a44) 74 | * [Have parsers refer to toplevel CONTENT_ROOT instead of defining it](https://github.com/sferik/multi_xml/commit/94e6fa49e69b2a2467a0e6d3558f7d9815cae47e) 75 | * [Move redundant input sanitizing to top-level](https://github.com/sferik/multi_xml/commit/4874148214dbbd2e5a4b877734e2519af42d6132) 76 | * [Refactor libxml and nokogiri parsers to inherit from a common ancestor](https://github.com/sferik/multi_xml/commit/e0fdffcbfe641b6aaa3952ffa0570a893de325c2) 77 | 78 | 0.2.2 79 | ----- 80 | * [Respect the global load path](https://github.com/sferik/multi_xml/commit/68eb3011b37f0e0222bb842abd2a78e1285a97c1) 81 | 82 | 0.2.1 83 | ----- 84 | * [Add BlueCloth gem as development dependency for Markdown formatting](https://github.com/sferik/multi_xml/commit/18195cd1789176709f68f0d7f8df7fc944fe4d24) 85 | * [Replace BlueCloth with Maruku for JRuby compatibility](https://github.com/sferik/multi_xml/commit/bad5516a5ec5e7ef7fc5a35c411721522357fa19) 86 | 87 | 0.2.0 88 | ----- 89 | * [Do not automatically load all library files](https://github.com/sferik/multi_xml/commit/dbd0447e062e8930118573c5453150e9371e5955) 90 | 91 | 0.1.4 92 | ----- 93 | * [Preserve backtrace when catching/throwing exceptions](https://github.com/sferik/multi_xml/commit/7475ee90201c2701fddd524082832d16ca62552d) 94 | 95 | 0.1.3 96 | ----- 97 | * [Common error handling for all parsers](https://github.com/sferik/multi_xml/commit/5357c28eddc14e921fd1be1f445db602a8dddaf2) 98 | 99 | 0.1.2 100 | ----- 101 | * [Make wrap an Array class method](https://github.com/sferik/multi_xml/commit/28307b69bd1d9460353c861466e425c2afadcf56) 102 | 103 | 0.1.1 104 | ----- 105 | * [Fix parsing for strings that contain newlines](https://github.com/sferik/multi_xml/commit/68087a4ce50b5d63cfa60d6f1fcbc2f6d689e43f) 106 | 107 | 0.1.0 108 | ----- 109 | * [Add support for LibXML and Nokogiri](https://github.com/sferik/multi_xml/commit/856bb17fce66601e0b3d3eb3b64dbeb25aed3bca) 110 | 111 | 0.0.1 112 | ----- 113 | * [REXML support](https://github.com/sferik/multi_xml/commit/2a848384a7b90fb3e26b5a8d4dc3fa3e3f2db5fc) 114 | -------------------------------------------------------------------------------- /lib/multi_xml.rb: -------------------------------------------------------------------------------- 1 | require "bigdecimal" 2 | require "date" 3 | require "stringio" 4 | require "time" 5 | require "yaml" 6 | 7 | module MultiXml # rubocop:disable Metrics/ModuleLength 8 | class ParseError < StandardError; end 9 | 10 | class NoParserError < StandardError; end 11 | 12 | class DisallowedTypeError < StandardError 13 | def initialize(type) 14 | super("Disallowed type attribute: #{type.inspect}") 15 | end 16 | end 17 | 18 | unless defined?(REQUIREMENT_MAP) 19 | REQUIREMENT_MAP = [ 20 | ["ox", :ox], 21 | ["libxml", :libxml], 22 | ["nokogiri", :nokogiri], 23 | ["rexml/document", :rexml], 24 | ["oga", :oga] 25 | ].freeze 26 | end 27 | 28 | CONTENT_ROOT = "__content__".freeze unless defined?(CONTENT_ROOT) 29 | 30 | unless defined?(PARSING) 31 | float_proc = proc { |float| float.to_f } 32 | datetime_proc = proc { |time| Time.parse(time).utc rescue DateTime.parse(time).utc } # rubocop:disable Style/RescueModifier 33 | 34 | PARSING = { 35 | "symbol" => proc { |symbol| symbol.to_sym }, 36 | "date" => proc { |date| Date.parse(date) }, 37 | "datetime" => datetime_proc, 38 | "dateTime" => datetime_proc, 39 | "integer" => proc { |integer| integer.to_i }, 40 | "float" => float_proc, 41 | "double" => float_proc, 42 | "decimal" => proc { |number| BigDecimal(number) }, 43 | "boolean" => proc { |boolean| !%w[0 false].include?(boolean.strip) }, 44 | "string" => proc { |string| string.to_s }, 45 | "yaml" => proc { |yaml| YAML.load(yaml) rescue yaml }, # rubocop:disable Style/RescueModifier 46 | "base64Binary" => proc { |binary| base64_decode(binary) }, 47 | "binary" => proc { |binary, entity| parse_binary(binary, entity) }, 48 | "file" => proc { |file, entity| parse_file(file, entity) } 49 | }.freeze 50 | end 51 | 52 | unless defined?(TYPE_NAMES) 53 | TYPE_NAMES = { 54 | "Symbol" => "symbol", 55 | "Integer" => "integer", 56 | "BigDecimal" => "decimal", 57 | "Float" => "float", 58 | "TrueClass" => "boolean", 59 | "FalseClass" => "boolean", 60 | "Date" => "date", 61 | "DateTime" => "datetime", 62 | "Time" => "datetime", 63 | "Array" => "array", 64 | "Hash" => "hash" 65 | }.freeze 66 | end 67 | 68 | DISALLOWED_XML_TYPES = %w[symbol yaml].freeze 69 | 70 | DEFAULT_OPTIONS = { 71 | typecast_xml_value: true, 72 | disallowed_types: DISALLOWED_XML_TYPES, 73 | symbolize_keys: false 74 | }.freeze 75 | 76 | class << self 77 | # Get the current parser class. 78 | def parser 79 | return @parser if defined?(@parser) 80 | 81 | self.parser = default_parser 82 | @parser 83 | end 84 | 85 | # The default parser based on what you currently 86 | # have loaded and installed. First checks to see 87 | # if any parsers are already loaded, then checks 88 | # to see which are installed if none are loaded. 89 | def default_parser 90 | return :ox if defined?(::Ox) 91 | return :libxml if defined?(::LibXML) 92 | return :nokogiri if defined?(::Nokogiri) 93 | return :oga if defined?(::Oga) 94 | 95 | REQUIREMENT_MAP.each do |library, parser| 96 | require library 97 | return parser 98 | rescue LoadError 99 | next 100 | end 101 | raise(NoParserError, 102 | "No XML parser detected. If you're using Rubinius and Bundler, try adding an XML parser to your Gemfile (e.g. libxml-ruby, nokogiri, or rubysl-rexml). For more information, see https://github.com/sferik/multi_xml/issues/42.") 103 | end 104 | 105 | # Set the XML parser utilizing a symbol, string, or class. 106 | # Supported by default are: 107 | # 108 | # * :libxml 109 | # * :nokogiri 110 | # * :ox 111 | # * :rexml 112 | # * :oga 113 | def parser=(new_parser) 114 | case new_parser 115 | when String, Symbol 116 | require "multi_xml/parsers/#{new_parser.to_s.downcase}" 117 | @parser = MultiXml::Parsers.const_get(new_parser.to_s.split("_").collect(&:capitalize).join.to_s) 118 | when Class, Module 119 | @parser = new_parser 120 | else 121 | raise("Did not recognize your parser specification. Please specify either a symbol or a class.") 122 | end 123 | end 124 | 125 | # Parse an XML string or IO into Ruby. 126 | # 127 | # Options 128 | # 129 | # :symbolize_keys :: If true, will use symbols instead of strings for the keys. 130 | # 131 | # :disallowed_types :: Types to disallow from being typecasted. Defaults to `['yaml', 'symbol']`. Use `[]` to allow all types. 132 | # 133 | # :typecast_xml_value :: If true, won't typecast values for parsed document 134 | def parse(xml, options = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity 135 | xml ||= "" 136 | 137 | options = DEFAULT_OPTIONS.merge(options) 138 | 139 | xml = xml.strip if xml.respond_to?(:strip) 140 | begin 141 | xml = StringIO.new(xml) unless xml.respond_to?(:read) 142 | 143 | char = xml.getc 144 | return {} if char.nil? 145 | 146 | xml.ungetc(char) 147 | 148 | hash = undasherize_keys(parser.parse(xml) || {}) 149 | hash = typecast_xml_value(hash, options[:disallowed_types]) if options[:typecast_xml_value] 150 | rescue DisallowedTypeError 151 | raise 152 | rescue parser.parse_error => e 153 | raise(ParseError, e.message, e.backtrace) 154 | end 155 | hash = symbolize_keys(hash) if options[:symbolize_keys] 156 | hash 157 | end 158 | 159 | # This module decorates files with the original_filename 160 | # and content_type methods. 161 | module FileLike # :nodoc: 162 | attr_writer :original_filename, :content_type 163 | 164 | def original_filename 165 | @original_filename || "untitled" 166 | end 167 | 168 | def content_type 169 | @content_type || "application/octet-stream" 170 | end 171 | end 172 | 173 | private 174 | 175 | # TODO: Add support for other encodings 176 | def parse_binary(binary, entity) # :nodoc: 177 | case entity["encoding"] 178 | when "base64" 179 | base64_decode(binary) 180 | else 181 | binary 182 | end 183 | end 184 | 185 | def parse_file(file, entity) 186 | f = StringIO.new(base64_decode(file)) 187 | f.extend(FileLike) 188 | f.original_filename = entity["name"] 189 | f.content_type = entity["content_type"] 190 | f 191 | end 192 | 193 | def base64_decode(input) 194 | input.unpack1("m") 195 | end 196 | 197 | def symbolize_keys(params) 198 | case params 199 | when Hash 200 | params.inject({}) do |result, (key, value)| 201 | result.merge(key.to_sym => symbolize_keys(value)) 202 | end 203 | when Array 204 | params.collect { |value| symbolize_keys(value) } 205 | else 206 | params 207 | end 208 | end 209 | 210 | def undasherize_keys(params) 211 | case params 212 | when Hash 213 | params.each_with_object({}) do |(key, value), hash| 214 | hash[key.to_s.tr("-", "_")] = undasherize_keys(value) 215 | hash 216 | end 217 | when Array 218 | params.collect { |value| undasherize_keys(value) } 219 | else 220 | params 221 | end 222 | end 223 | 224 | def typecast_xml_value(value, disallowed_types = nil) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity 225 | disallowed_types ||= DISALLOWED_XML_TYPES 226 | 227 | case value 228 | when Hash 229 | if value.include?("type") && !value["type"].is_a?(Hash) && disallowed_types.include?(value["type"]) 230 | raise(DisallowedTypeError, value["type"]) 231 | end 232 | 233 | if value["type"] == "array" 234 | 235 | # this commented-out suggestion helps to avoid the multiple attribute 236 | # problem, but it breaks when there is only one item in the array. 237 | # 238 | # from: https://github.com/jnunemaker/httparty/issues/102 239 | # 240 | # _, entries = value.detect { |k, v| k != 'type' && v.is_a?(Array) } 241 | 242 | # This attempt fails to consider the order that the detect method 243 | # retrieves the entries. 244 | # _, entries = value.detect {|key, _| key != 'type'} 245 | 246 | # This approach ignores attribute entries that are not convertable 247 | # to an Array which allows attributes to be ignored. 248 | _, entries = value.detect { |k, v| k != "type" && (v.is_a?(Array) || v.is_a?(Hash)) } 249 | 250 | case entries 251 | when NilClass 252 | [] 253 | when String 254 | [] if entries.strip.empty? 255 | when Array 256 | entries.collect { |entry| typecast_xml_value(entry, disallowed_types) } 257 | when Hash 258 | [typecast_xml_value(entries, disallowed_types)] 259 | else 260 | raise("can't typecast #{entries.class.name}: #{entries.inspect}") 261 | end 262 | 263 | elsif value.key?(CONTENT_ROOT) 264 | content = value[CONTENT_ROOT] 265 | block = PARSING[value["type"]] 266 | if block 267 | if block.arity == 1 268 | value.delete("type") if PARSING[value["type"]] 269 | if value.keys.size > 1 270 | value[CONTENT_ROOT] = block.call(content) 271 | value 272 | else 273 | block.call(content) 274 | end 275 | else 276 | block.call(content, value) 277 | end 278 | else 279 | (value.keys.size > 1) ? value : content 280 | end 281 | elsif value["type"] == "string" && value["nil"] != "true" 282 | "" 283 | # blank or nil parsed values are represented by nil 284 | elsif value.empty? || value["nil"] == "true" 285 | nil 286 | # If the type is the only element which makes it then 287 | # this still makes the value nil, except if type is 288 | # a XML node(where type['value'] is a Hash) 289 | elsif value["type"] && value.size == 1 && !value["type"].is_a?(Hash) 290 | nil 291 | else 292 | xml_value = value.each_with_object({}) do |(k, v), hash| 293 | hash[k] = typecast_xml_value(v, disallowed_types) 294 | hash 295 | end 296 | 297 | # Turn {:files => {:file => #} into {:files => #} so it is compatible with 298 | # how multipart uploaded files from HTML appear 299 | (xml_value["file"].is_a?(StringIO)) ? xml_value["file"] : xml_value 300 | end 301 | when Array 302 | value.map! { |i| typecast_xml_value(i, disallowed_types) } 303 | (value.length > 1) ? value : value.first 304 | when String 305 | value 306 | else 307 | raise("can't typecast #{value.class.name}: #{value.inspect}") 308 | end 309 | end 310 | end 311 | end 312 | -------------------------------------------------------------------------------- /spec/parser_shared_example.rb: -------------------------------------------------------------------------------- 1 | shared_examples_for "a parser" do |parser| 2 | before do 3 | MultiXml.parser = parser 4 | 5 | LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER) if parser == "LibXML" 6 | rescue LoadError 7 | pending "Parser #{parser} couldn't be loaded" 8 | end 9 | 10 | describe ".parse" do 11 | context "a blank string" do 12 | before do 13 | @xml = "" 14 | end 15 | 16 | it "returns an empty Hash" do 17 | expect(MultiXml.parse(@xml)).to eq({}) 18 | end 19 | end 20 | 21 | context "a whitespace string" do 22 | before do 23 | @xml = " " 24 | end 25 | 26 | it "returns an empty Hash" do 27 | expect(MultiXml.parse(@xml)).to eq({}) 28 | end 29 | end 30 | 31 | context "a frozen string" do 32 | before do 33 | @xml = " " 34 | end 35 | 36 | it "returns an empty Hash" do 37 | expect(MultiXml.parse(@xml)).to eq({}) 38 | end 39 | end 40 | 41 | unless parser == "Oga" 42 | context "an invalid XML document" do 43 | before do 44 | @xml = "" 45 | end 46 | 47 | it "raises MultiXml::ParseError" do 48 | expect { MultiXml.parse(@xml) }.to raise_error(MultiXml::ParseError) 49 | end 50 | end 51 | end 52 | 53 | context "a valid XML document" do 54 | before do 55 | @xml = "" 56 | end 57 | 58 | it "parses correctly" do 59 | expect(MultiXml.parse(@xml)).to eq("user" => nil) 60 | end 61 | 62 | context "with CDATA" do 63 | before do 64 | @xml = "" 65 | end 66 | 67 | it "returns the correct CDATA" do 68 | expect(MultiXml.parse(@xml)["user"]).to eq("Erik Berlin") 69 | end 70 | end 71 | 72 | context "element with the same inner element and attribute name" do 73 | before do 74 | @xml = "Smith" 75 | end 76 | 77 | it "returns names as Array" do 78 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq %w[John Smith] 79 | end 80 | end 81 | 82 | context "with content" do 83 | before do 84 | @xml = "Erik Berlin" 85 | end 86 | 87 | it "returns the correct content" do 88 | expect(MultiXml.parse(@xml)["user"]).to eq("Erik Berlin") 89 | end 90 | end 91 | 92 | context "with an attribute" do 93 | before do 94 | @xml = '' 95 | end 96 | 97 | it "returns the correct attribute" do 98 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq("Erik Berlin") 99 | end 100 | end 101 | 102 | context "with multiple attributes" do 103 | before do 104 | @xml = '' 105 | end 106 | 107 | it "returns the correct attributes" do 108 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq("Erik Berlin") 109 | expect(MultiXml.parse(@xml)["user"]["screen_name"]).to eq("sferik") 110 | end 111 | end 112 | 113 | context "typecast management" do 114 | before do 115 | @xml = %( 116 | 117 | 118 | Settings 119 | 120 | Test 121 | 122 | 123 | 124 | ) 125 | end 126 | 127 | context "with :typecast_xml_value => true" do 128 | before do 129 | @setting = MultiXml.parse(@xml)["global_settings"]["group"]["setting"] 130 | end 131 | 132 | it { expect(@setting).to eq "" } 133 | end 134 | 135 | context "with :typecast_xml_value => false" do 136 | before do 137 | @setting = MultiXml.parse(@xml, typecast_xml_value: false)["global_settings"]["group"]["setting"] 138 | end 139 | 140 | it { expect(@setting).to eq("type" => "string", "description" => {"__content__" => "Test"}) } 141 | end 142 | end 143 | 144 | context "with :symbolize_keys => true" do 145 | before do 146 | @xml = 'Wynn Netherland' 147 | end 148 | 149 | it "symbolizes keys" do 150 | expect(MultiXml.parse(@xml, 151 | symbolize_keys: true)).to eq(users: {user: [{name: "Erik Berlin"}, 152 | {name: "Wynn Netherland"}]}) 153 | end 154 | end 155 | 156 | context 'with an attribute type="boolean"' do 157 | %w[true false].each do |boolean| 158 | context "when #{boolean}" do 159 | it "returns #{boolean}" do 160 | xml = "#{boolean}" 161 | expect(MultiXml.parse(xml)["tag"]).to be instance_eval(boolean) 162 | end 163 | end 164 | end 165 | 166 | context "when 1" do 167 | before do 168 | @xml = '1' 169 | end 170 | 171 | it "returns true" do 172 | expect(MultiXml.parse(@xml)["tag"]).to be true 173 | end 174 | end 175 | 176 | context "when 0" do 177 | before do 178 | @xml = '0' 179 | end 180 | 181 | it "returns false" do 182 | expect(MultiXml.parse(@xml)["tag"]).to be false 183 | end 184 | end 185 | end 186 | 187 | context 'with an attribute type="integer"' do 188 | context "with a positive integer" do 189 | before do 190 | @xml = '1' 191 | end 192 | 193 | it "returns a Integer" do 194 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Integer) 195 | end 196 | 197 | it "returns a positive number" do 198 | expect(MultiXml.parse(@xml)["tag"]).to be > 0 199 | end 200 | 201 | it "returns the correct number" do 202 | expect(MultiXml.parse(@xml)["tag"]).to eq(1) 203 | end 204 | end 205 | 206 | context "with a negative integer" do 207 | before do 208 | @xml = '-1' 209 | end 210 | 211 | it "returns a Integer" do 212 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Integer) 213 | end 214 | 215 | it "returns a negative number" do 216 | expect(MultiXml.parse(@xml)["tag"]).to be < 0 217 | end 218 | 219 | it "returns the correct number" do 220 | expect(MultiXml.parse(@xml)["tag"]).to eq(-1) 221 | end 222 | end 223 | end 224 | 225 | context 'with an attribute type="string"' do 226 | before do 227 | @xml = '' 228 | end 229 | 230 | it "returns a String" do 231 | expect(MultiXml.parse(@xml)["tag"]).to be_a(String) 232 | end 233 | 234 | it "returns the correct string" do 235 | expect(MultiXml.parse(@xml)["tag"]).to eq("") 236 | end 237 | end 238 | 239 | context 'with an attribute type="date"' do 240 | before do 241 | @xml = '1970-01-01' 242 | end 243 | 244 | it "returns a Date" do 245 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Date) 246 | end 247 | 248 | it "returns the correct date" do 249 | expect(MultiXml.parse(@xml)["tag"]).to eq(Date.parse("1970-01-01")) 250 | end 251 | end 252 | 253 | context 'with an attribute type="datetime"' do 254 | before do 255 | @xml = '1970-01-01 00:00' 256 | end 257 | 258 | it "returns a Time" do 259 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Time) 260 | end 261 | 262 | it "returns the correct time" do 263 | expect(MultiXml.parse(@xml)["tag"]).to eq(Time.parse("1970-01-01 00:00")) 264 | end 265 | end 266 | 267 | context 'with an attribute type="dateTime"' do 268 | before do 269 | @xml = '1970-01-01 00:00' 270 | end 271 | 272 | it "returns a Time" do 273 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Time) 274 | end 275 | 276 | it "returns the correct time" do 277 | expect(MultiXml.parse(@xml)["tag"]).to eq(Time.parse("1970-01-01 00:00")) 278 | end 279 | end 280 | 281 | context 'with an attribute type="double"' do 282 | before do 283 | @xml = '3.14159265358979' 284 | end 285 | 286 | it "returns a Float" do 287 | expect(MultiXml.parse(@xml)["tag"]).to be_a(Float) 288 | end 289 | 290 | it "returns the correct number" do 291 | expect(MultiXml.parse(@xml)["tag"]).to eq(3.14159265358979) 292 | end 293 | end 294 | 295 | context 'with an attribute type="decimal"' do 296 | before do 297 | @xml = '3.14159265358979' 298 | end 299 | 300 | it "returns a BigDecimal" do 301 | expect(MultiXml.parse(@xml)["tag"]).to be_a(BigDecimal) 302 | end 303 | 304 | it "returns the correct number" do 305 | expect(MultiXml.parse(@xml)["tag"]).to eq(3.14159265358979) 306 | end 307 | end 308 | 309 | context 'with an attribute type="base64Binary"' do 310 | before do 311 | @xml = 'aW1hZ2UucG5n' 312 | end 313 | 314 | it "returns a String" do 315 | expect(MultiXml.parse(@xml)["tag"]).to be_a(String) 316 | end 317 | 318 | it "returns the correct string" do 319 | expect(MultiXml.parse(@xml)["tag"]).to eq("image.png") 320 | end 321 | end 322 | 323 | context 'with an attribute type="yaml"' do 324 | before do 325 | @xml = "--- \n1: returns an integer\n:message: Have a nice day\narray: \n- has-dashes: true\n has_underscores: true\n" 326 | end 327 | 328 | it "raises MultiXML::DisallowedTypeError by default" do 329 | expect { MultiXml.parse(@xml)["tag"] }.to raise_error(MultiXml::DisallowedTypeError) 330 | end 331 | 332 | it "returns the correctly parsed YAML when the type is allowed" do 333 | expect(MultiXml.parse(@xml, 334 | disallowed_types: [])["tag"]).to eq(:message => "Have a nice day", 1 => "returns an integer", 335 | "array" => [{"has-dashes" => true, "has_underscores" => true}]) 336 | end 337 | end 338 | 339 | context 'with an attribute type="symbol"' do 340 | before do 341 | @xml = 'my_symbol' 342 | end 343 | 344 | it "raises MultiXML::DisallowedTypeError" do 345 | expect { MultiXml.parse(@xml)["tag"] }.to raise_error(MultiXml::DisallowedTypeError) 346 | end 347 | 348 | it "returns the correctly parsed Symbol when the type is allowed" do 349 | expect(MultiXml.parse(@xml, disallowed_types: [])["tag"]).to eq(:my_symbol) 350 | end 351 | end 352 | 353 | context 'with an attribute type="file"' do 354 | before do 355 | @xml = 'ZGF0YQ==' 356 | end 357 | 358 | it "returns a StringIO" do 359 | expect(MultiXml.parse(@xml)["tag"]).to be_a(StringIO) 360 | end 361 | 362 | it "is decoded correctly" do 363 | expect(MultiXml.parse(@xml)["tag"].string).to eq("data") 364 | end 365 | 366 | it "has the correct file name" do 367 | expect(MultiXml.parse(@xml)["tag"].original_filename).to eq("data.txt") 368 | end 369 | 370 | it "has the correct content type" do 371 | expect(MultiXml.parse(@xml)["tag"].content_type).to eq("text/plain") 372 | end 373 | 374 | context "with missing name and content type" do 375 | before do 376 | @xml = 'ZGF0YQ==' 377 | end 378 | 379 | it "returns a StringIO" do 380 | expect(MultiXml.parse(@xml)["tag"]).to be_a(StringIO) 381 | end 382 | 383 | it "is decoded correctly" do 384 | expect(MultiXml.parse(@xml)["tag"].string).to eq("data") 385 | end 386 | 387 | it "has the default file name" do 388 | expect(MultiXml.parse(@xml)["tag"].original_filename).to eq("untitled") 389 | end 390 | 391 | it "has the default content type" do 392 | expect(MultiXml.parse(@xml)["tag"].content_type).to eq("application/octet-stream") 393 | end 394 | end 395 | end 396 | 397 | context 'with an attribute type="array"' do 398 | before do 399 | @xml = 'Erik BerlinWynn Netherland' 400 | end 401 | 402 | it "returns an Array" do 403 | expect(MultiXml.parse(@xml)["users"]).to be_a(Array) 404 | end 405 | 406 | it "returns the correct array" do 407 | expect(MultiXml.parse(@xml)["users"]).to eq(["Erik Berlin", "Wynn Netherland"]) 408 | end 409 | end 410 | 411 | context 'with an attribute type="array" in addition to other attributes' do 412 | before do 413 | @xml = 'Erik BerlinWynn Netherland' 414 | end 415 | 416 | it "returns an Array" do 417 | expect(MultiXml.parse(@xml)["users"]).to be_a(Array) 418 | end 419 | 420 | it "returns the correct array" do 421 | expect(MultiXml.parse(@xml)["users"]).to eq(["Erik Berlin", "Wynn Netherland"]) 422 | end 423 | end 424 | 425 | context 'with an attribute type="array" containing only one item' do 426 | before do 427 | @xml = 'Erik Berlin' 428 | end 429 | 430 | it "returns an Array" do 431 | expect(MultiXml.parse(@xml)["users"]).to be_a(Array) 432 | end 433 | 434 | it "returns the correct array" do 435 | expect(MultiXml.parse(@xml)["users"]).to eq(["Erik Berlin"]) 436 | end 437 | end 438 | 439 | %w[integer boolean date datetime file].each do |type| 440 | context "with an empty attribute type=\"#{type}\"" do 441 | before do 442 | @xml = "" 443 | end 444 | 445 | it "returns nil" do 446 | expect(MultiXml.parse(@xml)["tag"]).to be_nil 447 | end 448 | end 449 | end 450 | 451 | %w[yaml symbol].each do |type| 452 | context "with an empty attribute type=\"#{type}\"" do 453 | before do 454 | @xml = "" 455 | end 456 | 457 | it "raises MultiXml::DisallowedTypeError by default" do 458 | expect { MultiXml.parse(@xml)["tag"] }.to raise_error(MultiXml::DisallowedTypeError) 459 | end 460 | 461 | it "returns nil when the type is allowed" do 462 | expect(MultiXml.parse(@xml, disallowed_types: [])["tag"]).to be_nil 463 | end 464 | end 465 | end 466 | 467 | context 'with an empty attribute type="array"' do 468 | before do 469 | @xml = '' 470 | end 471 | 472 | it "returns an empty Array" do 473 | expect(MultiXml.parse(@xml)["tag"]).to eq([]) 474 | end 475 | 476 | context "with whitespace" do 477 | before do 478 | @xml = ' ' 479 | end 480 | 481 | it "returns an empty Array" do 482 | expect(MultiXml.parse(@xml)["tag"]).to eq([]) 483 | end 484 | end 485 | end 486 | 487 | context "with XML entities" do 488 | before do 489 | @xml_entities = { 490 | "<" => "<", 491 | ">" => ">", 492 | '"' => """, 493 | "'" => "'", 494 | "&" => "&" 495 | } 496 | end 497 | 498 | context "in content" do 499 | it "returns unescaped XML entities" do 500 | @xml_entities.each do |key, value| 501 | xml = "#{value}" 502 | expect(MultiXml.parse(xml)["tag"]).to eq(key) 503 | end 504 | end 505 | end 506 | 507 | context "in attribute" do 508 | it "returns unescaped XML entities" do 509 | @xml_entities.each do |key, value| 510 | xml = "" 511 | expect(MultiXml.parse(xml)["tag"]["attribute"]).to eq(key) 512 | end 513 | end 514 | end 515 | end 516 | 517 | context "with dasherized tag" do 518 | before do 519 | @xml = "" 520 | end 521 | 522 | it "returns undasherize tag" do 523 | expect(MultiXml.parse(@xml).keys).to include("tag_1") 524 | end 525 | end 526 | 527 | context "with dasherized attribute" do 528 | before do 529 | @xml = '' 530 | end 531 | 532 | it "returns undasherize attribute" do 533 | expect(MultiXml.parse(@xml)["tag"].keys).to include("attribute_1") 534 | end 535 | end 536 | 537 | context "with children" do 538 | context "with attributes" do 539 | before do 540 | @xml = '' 541 | end 542 | 543 | it "returns the correct attributes" do 544 | expect(MultiXml.parse(@xml)["users"]["user"]["name"]).to eq("Erik Berlin") 545 | end 546 | end 547 | 548 | context "with text" do 549 | before do 550 | @xml = "Erik Berlin" 551 | end 552 | 553 | it "returns the correct text" do 554 | expect(MultiXml.parse(@xml)["user"]["name"]).to eq("Erik Berlin") 555 | end 556 | end 557 | 558 | context "with an unrecognized attribute type" do 559 | before do 560 | @xml = 'Erik Berlin' 561 | end 562 | 563 | it "passes through the type" do 564 | expect(MultiXml.parse(@xml)["user"]["type"]).to eq("admin") 565 | end 566 | end 567 | 568 | context "with attribute tags on content nodes" do 569 | context "non 'type' attributes" do 570 | before do 571 | @xml = <<-XML 572 | 573 | 123 574 | 0.123 575 | 576 | XML 577 | @parsed_xml = MultiXml.parse(@xml) 578 | end 579 | 580 | it "adds the attributes to the value hash" do 581 | expect(@parsed_xml["options"]["value"][0]["__content__"]).to eq("123") 582 | expect(@parsed_xml["options"]["value"][0]["currency"]).to eq("USD") 583 | expect(@parsed_xml["options"]["value"][1]["__content__"]).to eq("0.123") 584 | expect(@parsed_xml["options"]["value"][1]["number"]).to eq("percent") 585 | end 586 | end 587 | 588 | context "unrecognized type attributes" do 589 | before do 590 | @xml = <<-XML 591 | 592 | 123 593 | 0.123 594 | 123 595 | 596 | XML 597 | @parsed_xml = MultiXml.parse(@xml) 598 | end 599 | 600 | it "adds the attributes to the value hash passing through the type" do 601 | expect(@parsed_xml["options"]["value"][0]["__content__"]).to eq("123") 602 | expect(@parsed_xml["options"]["value"][0]["type"]).to eq("USD") 603 | expect(@parsed_xml["options"]["value"][1]["__content__"]).to eq("0.123") 604 | expect(@parsed_xml["options"]["value"][1]["type"]).to eq("percent") 605 | expect(@parsed_xml["options"]["value"][2]["__content__"]).to eq("123") 606 | expect(@parsed_xml["options"]["value"][2]["currency"]).to eq("USD") 607 | end 608 | end 609 | 610 | context "mixing attributes and non-attributes content nodes type attributes" do 611 | before do 612 | @xml = <<-XML 613 | 614 | 123 615 | 0.123 616 | 123 617 | 618 | XML 619 | @parsed_xml = MultiXml.parse(@xml) 620 | end 621 | 622 | it "adds the attributes to the value hash passing through the type" do 623 | expect(@parsed_xml["options"]["value"][0]["__content__"]).to eq("123") 624 | expect(@parsed_xml["options"]["value"][0]["type"]).to eq("USD") 625 | expect(@parsed_xml["options"]["value"][1]["__content__"]).to eq("0.123") 626 | expect(@parsed_xml["options"]["value"][1]["type"]).to eq("percent") 627 | expect(@parsed_xml["options"]["value"][2]).to eq("123") 628 | end 629 | end 630 | 631 | context "mixing recognized type attribute and non-type attributes on content nodes" do 632 | before do 633 | @xml = <<-XML 634 | 635 | 123 636 | 637 | XML 638 | @parsed_xml = MultiXml.parse(@xml) 639 | end 640 | 641 | it "adds the the non-type attribute and remove the recognized type attribute and do the typecast" do 642 | expect(@parsed_xml["options"]["value"]["__content__"]).to eq(123) 643 | expect(@parsed_xml["options"]["value"]["number"]).to eq("USD") 644 | end 645 | end 646 | 647 | context "mixing unrecognized type attribute and non-type attributes on content nodes" do 648 | before do 649 | @xml = <<-XML 650 | 651 | 123 652 | 653 | XML 654 | @parsed_xml = MultiXml.parse(@xml) 655 | end 656 | 657 | it "adds the the non-type attributes and type attribute to the value hash" do 658 | expect(@parsed_xml["options"]["value"]["__content__"]).to eq("123") 659 | expect(@parsed_xml["options"]["value"]["number"]).to eq("USD") 660 | expect(@parsed_xml["options"]["value"]["type"]).to eq("currency") 661 | end 662 | end 663 | end 664 | 665 | context "with newlines and whitespace" do 666 | before do 667 | @xml = <<-XML 668 | 669 | Erik Berlin 670 | 671 | XML 672 | end 673 | 674 | it "parses correctly" do 675 | expect(MultiXml.parse(@xml)).to eq("user" => {"name" => "Erik Berlin"}) 676 | end 677 | end 678 | 679 | # Babies having babies 680 | context "with children" do 681 | before do 682 | @xml = '' 683 | end 684 | 685 | it "parses correctly" do 686 | expect(MultiXml.parse(@xml)).to eq("users" => {"user" => {"name" => "Erik Berlin", 687 | "status" => {"text" => "Hello"}}}) 688 | end 689 | end 690 | end 691 | 692 | context "with sibling children" do 693 | before do 694 | @xml = "Erik BerlinWynn Netherland" 695 | end 696 | 697 | it "returns an Array" do 698 | expect(MultiXml.parse(@xml)["users"]["user"]).to be_a(Array) 699 | end 700 | 701 | it "parses correctly" do 702 | expect(MultiXml.parse(@xml)).to eq("users" => {"user" => ["Erik Berlin", "Wynn Netherland"]}) 703 | end 704 | end 705 | end 706 | 707 | context "a duplexed stream" do 708 | before do 709 | @xml, wr = IO.pipe 710 | 711 | Thread.new do 712 | "".each_char do |chunk| 713 | wr << chunk 714 | end 715 | wr.close 716 | end 717 | end 718 | 719 | it "parses correctly" do 720 | expect(MultiXml.parse(@xml)).to eq("user" => nil) 721 | end 722 | end 723 | end 724 | end 725 | --------------------------------------------------------------------------------