├── .rspec ├── lib ├── sax-machine │ ├── version.rb │ ├── config │ │ ├── sax_ancestor.rb │ │ ├── sax_attribute.rb │ │ ├── sax_element_value.rb │ │ ├── sax_collection.rb │ │ └── sax_element.rb │ ├── handlers │ │ ├── sax_nokogiri_handler.rb │ │ ├── sax_oga_handler.rb │ │ ├── sax_ox_handler.rb │ │ └── sax_abstract_handler.rb │ ├── sax_configure.rb │ ├── sax_config.rb │ └── sax_document.rb └── sax-machine.rb ├── .gitignore ├── Rakefile ├── Guardfile ├── Gemfile ├── .travis.yml ├── spec ├── spec_helper.rb ├── sax-machine │ ├── sax_activerecord_spec.rb │ ├── sax_include_spec.rb │ ├── sax_configure_spec.rb │ └── sax_document_spec.rb └── fixtures │ ├── atom-content.html │ └── atom.xml ├── sax-machine.gemspec ├── HISTORY.md └── README.md /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format progress 3 | -------------------------------------------------------------------------------- /lib/sax-machine/version.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | VERSION = "1.3.2" 3 | end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .bundle 3 | *.gem 4 | Gemfile.lock 5 | .rvmrc 6 | .DS_STORE 7 | pkg/ 8 | coverage/ 9 | .ruby-version 10 | .ruby-gemset 11 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | require 'bundler/gem_tasks' 3 | require 'rspec/core/rake_task' 4 | 5 | RSpec::Core::RakeTask.new(:spec) 6 | task test: :spec 7 | task default: :test 8 | -------------------------------------------------------------------------------- /Guardfile: -------------------------------------------------------------------------------- 1 | guard "rspec", version: 2 do 2 | watch(%r{^spec/.+_spec\.rb$}) 3 | watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" } 4 | watch("spec/spec_helper.rb") { "spec" } 5 | end 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | gemspec 4 | 5 | group :development, :test do 6 | gem 'rake' 7 | gem 'guard-rspec' 8 | gem 'simplecov', require: false, platforms: [:mri] 9 | gem 'coveralls', require: false, platforms: [:mri] 10 | 11 | gem 'activerecord', '~> 4.1' 12 | gem 'nokogiri', '~> 1.6' 13 | gem 'ox', '>= 2.1.2', platforms: [:mri, :rbx] 14 | gem 'oga', '>= 0.3.4' 15 | end 16 | -------------------------------------------------------------------------------- /lib/sax-machine/config/sax_ancestor.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | class SAXConfig 3 | class AncestorConfig 4 | attr_reader :name, :setter 5 | 6 | def initialize(name, options) 7 | @name = name.to_s 8 | @as = options[:as] 9 | @setter = "#{@as}=" 10 | end 11 | 12 | def column 13 | @as || @name.to_sym 14 | end 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/sax-machine/config/sax_attribute.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | class SAXConfig 3 | class AttributeConfig < ElementValueConfig 4 | def value_from_attrs(attrs) 5 | attrs.fetch(@name, nil) 6 | end 7 | 8 | def attrs_match?(attrs) 9 | attrs.key?(@name) || attrs.value?(@name) 10 | end 11 | alias_method :has_value_and_attrs_match?, :attrs_match? 12 | 13 | def collection? 14 | false 15 | end 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | 3 | rvm: 4 | - 1.9.3 5 | - 2.0 6 | - 2.1 7 | - 2.2 8 | - jruby-1.7 9 | - rbx-2 10 | - ruby-head 11 | - jruby-head 12 | 13 | sudo: false 14 | 15 | env: 16 | matrix: 17 | - HANDLER="nokogiri" 18 | - HANDLER="ox" 19 | - HANDLER="oga" 20 | 21 | matrix: 22 | exclude: 23 | - env: HANDLER="ox" 24 | rvm: jruby-1.7 25 | - env: HANDLER="ox" 26 | rvm: jruby-head 27 | allow_failures: 28 | - env: HANDLER="oga" 29 | rvm: jruby-1.7 30 | - rvm: rbx-2 31 | - rvm: ruby-head 32 | - rvm: jruby-head 33 | -------------------------------------------------------------------------------- /lib/sax-machine/config/sax_element_value.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | class SAXConfig 3 | class ElementValueConfig 4 | attr_reader :name, :setter, :data_class 5 | 6 | def initialize(name, options) 7 | @name = name.to_s 8 | @as = options[:as] 9 | @setter = "#{@as}=" 10 | @required = options[:required] 11 | @data_class = options[:class] 12 | end 13 | 14 | def column 15 | @as || @name.to_sym 16 | end 17 | 18 | def required? 19 | !!@required 20 | end 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | begin 2 | require 'simplecov' 3 | require 'coveralls' 4 | 5 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ 6 | SimpleCov::Formatter::HTMLFormatter, 7 | Coveralls::SimpleCov::Formatter 8 | ] 9 | 10 | SimpleCov.start do 11 | add_filter '/spec/' 12 | end 13 | rescue LoadError 14 | end 15 | 16 | require File.expand_path(File.dirname(__FILE__) + '/../lib/sax-machine') 17 | SAXMachine.handler = ENV['HANDLER'].to_sym if ENV['HANDLER'] 18 | 19 | RSpec.configure do |config| 20 | config.run_all_when_everything_filtered = true 21 | config.filter_run :focus 22 | end 23 | -------------------------------------------------------------------------------- /spec/sax-machine/sax_activerecord_spec.rb: -------------------------------------------------------------------------------- 1 | require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') 2 | require 'active_record' 3 | 4 | describe "SAXMachine ActiveRecord integration" do 5 | before do 6 | class MySaxModel < ActiveRecord::Base 7 | SAXMachine.configure(MySaxModel) do |c| 8 | c.element :title 9 | end 10 | end 11 | end 12 | 13 | after do 14 | Object.send(:remove_const, :MySaxModel) 15 | end 16 | 17 | it "parses document" do 18 | document = MySaxModel.parse("My Title") 19 | expect(document.title).to eq("My Title") 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/sax-machine.rb: -------------------------------------------------------------------------------- 1 | require "sax-machine/version" 2 | require "sax-machine/sax_document" 3 | require "sax-machine/sax_configure" 4 | require "sax-machine/sax_config" 5 | 6 | module SAXMachine 7 | def self.handler 8 | @@handler ||= nil 9 | end 10 | 11 | def self.handler=(handler) 12 | if handler 13 | require "sax-machine/handlers/sax_#{handler}_handler" 14 | @@handler = handler 15 | end 16 | end 17 | end 18 | 19 | # Try handlers 20 | [:ox, :oga].each do |handler| 21 | begin 22 | SAXMachine.handler = handler 23 | break 24 | rescue LoadError 25 | end 26 | end 27 | 28 | # Still no handler, use Nokogiri 29 | if SAXMachine.handler.nil? 30 | SAXMachine.handler = :nokogiri 31 | end 32 | -------------------------------------------------------------------------------- /lib/sax-machine/config/sax_collection.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | class SAXConfig 3 | class CollectionConfig 4 | attr_reader :name 5 | 6 | def initialize(name, options) 7 | @name = name.to_s 8 | @class = options[:class] 9 | @as = options[:as].to_s 10 | @with = options.fetch(:with, {}) 11 | end 12 | 13 | def accessor 14 | as 15 | end 16 | 17 | def attrs_match?(attrs) 18 | @with.all? do |key, value| 19 | value === attrs[key.to_s] 20 | end 21 | end 22 | 23 | def data_class 24 | @class || @name 25 | end 26 | 27 | protected 28 | def as 29 | @as 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/sax-machine/handlers/sax_nokogiri_handler.rb: -------------------------------------------------------------------------------- 1 | require 'sax-machine/handlers/sax_abstract_handler' 2 | require 'nokogiri' 3 | 4 | module SAXMachine 5 | class SAXNokogiriHandler < Nokogiri::XML::SAX::Document 6 | include SAXAbstractHandler 7 | 8 | def sax_parse(xml_input) 9 | parser = Nokogiri::XML::SAX::Parser.new(self) 10 | parser.parse(xml_input) do |ctx| 11 | ctx.replace_entities = true 12 | end 13 | end 14 | 15 | alias_method :initialize, :_initialize 16 | alias_method :characters, :_characters 17 | alias_method :cdata_block, :_characters 18 | alias_method :start_element, :_start_element 19 | alias_method :end_element, :_end_element 20 | alias_method :error, :_error 21 | alias_method :warning, :_warning 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /sax-machine.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path("../lib/sax-machine/version", __FILE__) 3 | 4 | Gem::Specification.new do |s| 5 | s.name = "sax-machine" 6 | s.version = SAXMachine::VERSION 7 | 8 | s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin", "Dmitry Krasnoukhov"] 9 | s.email = %q{paul@pauldix.net} 10 | s.homepage = %q{http://github.com/pauldix/sax-machine} 11 | s.summary = %q{Declarative SAX Parsing with Nokogiri, Ox or Oga} 12 | s.license = %q{MIT} 13 | 14 | s.files = `git ls-files`.split("\n") 15 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 16 | s.require_paths = ["lib"] 17 | s.platform = Gem::Platform::RUBY 18 | 19 | s.add_development_dependency "rspec", "~> 3.0" 20 | end 21 | -------------------------------------------------------------------------------- /lib/sax-machine/handlers/sax_oga_handler.rb: -------------------------------------------------------------------------------- 1 | require 'sax-machine/handlers/sax_abstract_handler' 2 | require 'oga' 3 | 4 | module SAXMachine 5 | class SAXOgaHandler 6 | include SAXAbstractHandler 7 | 8 | def initialize(*args) 9 | _initialize(*args) 10 | end 11 | 12 | def sax_parse(xml_input) 13 | Oga.sax_parse_xml(self, xml_input) 14 | rescue LL::ParserError => e 15 | on_error(e.message) 16 | end 17 | 18 | def on_element(namespace, name, attrs) 19 | _start_element(node_name(namespace, name), attrs) 20 | end 21 | 22 | def after_element(namespace, name) 23 | _end_element(node_name(namespace, name)) 24 | end 25 | 26 | def on_error(*args) 27 | _error(args.join(" ")) 28 | end 29 | 30 | alias_method :on_text, :_characters 31 | alias_method :on_cdata, :_characters 32 | 33 | private 34 | 35 | def node_name(namespace, name) 36 | namespace ? "#{namespace}:#{name}" : name 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/sax-machine/sax_configure.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | def self.configure(clazz) 3 | extended_clazz = Class.new(clazz) 4 | extended_clazz.send(:include, SAXMachine) 5 | 6 | # override create_attr to create attributes on the original class 7 | def extended_clazz.create_attr real_name 8 | superclass.send(:attr_reader, real_name) unless superclass.method_defined?(real_name) 9 | superclass.send(:attr_writer, real_name) unless superclass.method_defined?("#{real_name}=") 10 | end 11 | 12 | yield(extended_clazz) 13 | 14 | clazz.extend LightWeightSaxMachine 15 | clazz.sax_config = extended_clazz.sax_config 16 | 17 | (class << clazz;self;end).send(:define_method, :parse) do |xml_input| 18 | extended_clazz.parse(xml_input) 19 | end 20 | end 21 | 22 | module LightWeightSaxMachine 23 | attr_writer :sax_config 24 | 25 | def sax_config 26 | @sax_config ||= SAXConfig.new 27 | end 28 | 29 | def inherited(subclass) 30 | subclass.sax_config.send(:initialize_copy, self.sax_config) 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/sax-machine/handlers/sax_ox_handler.rb: -------------------------------------------------------------------------------- 1 | require 'sax-machine/handlers/sax_abstract_handler' 2 | require 'ox' 3 | 4 | module SAXMachine 5 | class SAXOxHandler < Ox::Sax 6 | include SAXAbstractHandler 7 | 8 | def initialize(*args) 9 | _initialize(*args) 10 | _reset_element 11 | end 12 | 13 | def sax_parse(xml_input) 14 | # Ox requires input to be streamable 15 | xml_input = StringIO.new(xml_input) if xml_input.is_a?(String) 16 | 17 | Ox.sax_parse(self, xml_input, 18 | symbolize: false, 19 | convert_special: true, 20 | skip: :skip_return, 21 | ) 22 | end 23 | 24 | def attr(name, str) 25 | @attrs[name] = str 26 | end 27 | 28 | def attrs_done 29 | _start_element(@element, @attrs) 30 | _reset_element 31 | end 32 | 33 | def start_element(name) 34 | @element = name 35 | end 36 | 37 | def text(value) 38 | _characters(value) if value && !value.empty? 39 | end 40 | 41 | alias_method :cdata, :text 42 | 43 | def error(message, line, column) 44 | _error("#{message} on line #{line} column #{column}") 45 | end 46 | 47 | alias_method :end_element, :_end_element 48 | 49 | private 50 | 51 | def _reset_element 52 | @attrs = {} 53 | @element = "" 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /spec/sax-machine/sax_include_spec.rb: -------------------------------------------------------------------------------- 1 | require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') 2 | 3 | describe "SAXMachine inheritance" do 4 | before do 5 | class A 6 | include SAXMachine 7 | element :title 8 | end 9 | 10 | class B < A 11 | element :b 12 | end 13 | 14 | class C < B 15 | element :c 16 | end 17 | 18 | xml = "TestMatched!And Again" 19 | @a = A.new 20 | @a.parse xml 21 | @b = B.new 22 | @b.parse xml 23 | @c = C.new 24 | @c.parse xml 25 | end 26 | 27 | after do 28 | Object.send(:remove_const, :A) 29 | Object.send(:remove_const, :B) 30 | Object.send(:remove_const, :C) 31 | end 32 | 33 | it { expect(@a).to be_a(A) } 34 | it { expect(@a).not_to be_a(B) } 35 | it { expect(@a).to be_a(SAXMachine) } 36 | it { expect(@a.title).to eq("Test") } 37 | it { expect(@b).to be_a(A) } 38 | it { expect(@b).to be_a(B) } 39 | it { expect(@b).to be_a(SAXMachine) } 40 | it { expect(@b.title).to eq("Test") } 41 | it { expect(@b.b).to eq("Matched!") } 42 | it { expect(@c).to be_a(A) } 43 | it { expect(@c).to be_a(B) } 44 | it { expect(@c).to be_a(C) } 45 | it { expect(@c).to be_a(SAXMachine) } 46 | it { expect(@c.title).to eq("Test") } 47 | it { expect(@c.b).to eq("Matched!") } 48 | it { expect(@c.c).to eq("And Again") } 49 | end 50 | -------------------------------------------------------------------------------- /spec/sax-machine/sax_configure_spec.rb: -------------------------------------------------------------------------------- 1 | require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') 2 | 3 | describe "SAXMachine configure" do 4 | before do 5 | class A 6 | SAXMachine.configure(A) do |c| 7 | c.element :title 8 | end 9 | end 10 | 11 | class B < A 12 | SAXMachine.configure(B) do |c| 13 | c.element :b 14 | end 15 | end 16 | 17 | class C < B 18 | SAXMachine.configure(C) do |c| 19 | c.element :c 20 | end 21 | end 22 | 23 | xml = "TestMatched!And Again" 24 | @a = A.parse xml 25 | @b = B.parse xml 26 | @c = C.parse xml 27 | end 28 | 29 | after do 30 | Object.send(:remove_const, :A) 31 | Object.send(:remove_const, :B) 32 | Object.send(:remove_const, :C) 33 | end 34 | 35 | it { expect(@a).to be_a(A) } 36 | it { expect(@a).not_to be_a(B) } 37 | it { expect(@a).to be_a(SAXMachine) } 38 | it { expect(@a.title).to eq("Test") } 39 | it { expect(@b).to be_a(A) } 40 | it { expect(@b).to be_a(B) } 41 | it { expect(@b).to be_a(SAXMachine) } 42 | it { expect(@b.title).to eq("Test") } 43 | it { expect(@b.b).to eq("Matched!") } 44 | it { expect(@c).to be_a(A) } 45 | it { expect(@c).to be_a(B) } 46 | it { expect(@c).to be_a(C) } 47 | it { expect(@c).to be_a(SAXMachine) } 48 | it { expect(@c.title).to eq("Test") } 49 | it { expect(@c.b).to eq("Matched!") } 50 | it { expect(@c.c).to eq("And Again") } 51 | end 52 | -------------------------------------------------------------------------------- /lib/sax-machine/config/sax_element.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | class SAXConfig 3 | class ElementConfig 4 | attr_reader :name, :as, :setter, :data_class, :collection, :default 5 | 6 | def initialize(name, options) 7 | @name = name.to_s 8 | @with = options.fetch(:with, {}) 9 | 10 | @value = if options.has_key?(:value) 11 | options[:value].to_s 12 | else 13 | nil 14 | end 15 | 16 | @as = options[:as] 17 | @collection = options[:collection] 18 | @default = options[:default] 19 | 20 | @setter = if @collection 21 | "add_#{options[:as]}" 22 | else 23 | "#{@as}=" 24 | end 25 | 26 | @data_class = options[:class] 27 | @required = options[:required] 28 | end 29 | 30 | def value_configured? 31 | !@value.nil? 32 | end 33 | 34 | def to_s 35 | "name: #{@name} dataclass: #{@data_class} setter: #{@setter} required: #{@required} value: #{@value} as:#{@as} collection: #{@collection} with: #{@with}" 36 | end 37 | 38 | def column 39 | @as || @name.to_sym 40 | end 41 | 42 | def required? 43 | !!@required 44 | end 45 | 46 | def value_from_attrs(attrs) 47 | attrs.fetch(@value, nil) 48 | end 49 | 50 | def attrs_match?(attrs) 51 | @with.all? do |key, value| 52 | value === attrs[key.to_s] 53 | end 54 | end 55 | 56 | def has_value_and_attrs_match?(attrs) 57 | !@value.nil? && attrs_match?(attrs) 58 | end 59 | 60 | def collection? 61 | !!@collection 62 | end 63 | end 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /lib/sax-machine/sax_config.rb: -------------------------------------------------------------------------------- 1 | require "sax-machine/config/sax_element_value" 2 | require "sax-machine/config/sax_attribute" 3 | require "sax-machine/config/sax_element" 4 | require "sax-machine/config/sax_collection" 5 | require "sax-machine/config/sax_ancestor" 6 | 7 | module SAXMachine 8 | class SAXConfig 9 | attr_accessor :top_level_elements, :top_level_attributes, :top_level_element_value, :collection_elements, :ancestors 10 | 11 | def initialize 12 | # Default value is an empty array 13 | @top_level_elements = Hash.new { |hash, key| hash[key] = [] } 14 | @top_level_attributes = [] 15 | @top_level_element_value = [] 16 | @collection_elements = Hash.new { |hash, key| hash[key] = [] } 17 | @ancestors = [] 18 | end 19 | 20 | def columns 21 | @top_level_elements.map { |_, ecs| ecs }.flatten 22 | end 23 | 24 | def initialize_copy(sax_config) 25 | super 26 | 27 | @top_level_elements = sax_config.top_level_elements.clone 28 | @top_level_attributes = sax_config.top_level_attributes.clone 29 | @top_level_element_value = sax_config.top_level_element_value.clone 30 | @collection_elements = sax_config.collection_elements.clone 31 | @ancestors = sax_config.ancestors.clone 32 | end 33 | 34 | def add_top_level_element(name, options) 35 | @top_level_elements[name.to_s] << ElementConfig.new(name, options) 36 | end 37 | 38 | def add_top_level_attribute(name, options) 39 | @top_level_attributes << AttributeConfig.new(options.delete(:name), options) 40 | end 41 | 42 | def add_top_level_element_value(name, options) 43 | @top_level_element_value << ElementValueConfig.new(options.delete(:name), options) 44 | end 45 | 46 | def add_collection_element(name, options) 47 | @collection_elements[name.to_s] << CollectionConfig.new(name, options) 48 | end 49 | 50 | def add_ancestor(name, options) 51 | @ancestors << AncestorConfig.new(name, options) 52 | end 53 | 54 | def collection_config(name, attrs) 55 | @collection_elements[name.to_s].detect { |cc| cc.attrs_match?(attrs) } 56 | end 57 | 58 | def attribute_configs_for_element(attrs) 59 | @top_level_attributes.select { |aa| aa.attrs_match?(attrs) } 60 | end 61 | 62 | def element_values_for_element 63 | @top_level_element_value 64 | end 65 | 66 | def element_configs_for_attribute(name, attrs) 67 | return [] unless @top_level_elements.has_key?(name.to_s) 68 | 69 | @top_level_elements[name.to_s].select { |ec| ec.has_value_and_attrs_match?(attrs) } 70 | end 71 | 72 | def element_config_for_tag(name, attrs) 73 | return unless @top_level_elements.has_key?(name.to_s) 74 | 75 | @top_level_elements[name.to_s].detect { |ec| ec.attrs_match?(attrs) } 76 | end 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | # HEAD 2 | 3 | # 1.3.2 4 | 5 | * Compatibility with Oga 0.3 6 | 7 | # 1.3.1 8 | 9 | * Allow default value to be `false` [[#66](https://github.com/pauldix/sax-machine/pull/66)] 10 | * Support adding class to an attribute [[#68](https://github.com/pauldix/sax-machine/pull/68)] 11 | * Adjust Ox handler to skip empty text/cdata values 12 | 13 | # 1.3.0 14 | 15 | * Improve block modifiers to support all config options 16 | * Make block modifiers run in instance context 17 | * Make all handlers support IO as a input 18 | 19 | # 1.2.0 20 | 21 | * Add support for blocks as value modifiers [[#61](https://github.com/pauldix/sax-machine/pull/61)] 22 | 23 | # 1.1.1 24 | 25 | * Fix Nokogiri autoloading [[#60](https://github.com/pauldix/sax-machine/pull/60)] 26 | 27 | # 1.1.0 28 | 29 | * Option to use Oga as a SAX handler 30 | 31 | # 1.0.3 32 | 33 | * Remove missed `nokogiri` reference [[#54](https://github.com/pauldix/sax-machine/pull/54)] 34 | * Add support for `Symbol` data type conversion [[#57](https://github.com/pauldix/sax-machine/pull/57)] 35 | * Add specs for multiple elements with the same alias [[#53](https://github.com/pauldix/sax-machine/pull/53)] 36 | * Various code and documentation enhancements 37 | 38 | # 1.0.2 39 | 40 | * Make sure SAXConfig getters do not modify internal vars. Prevent race conditions 41 | 42 | # 1.0.1 43 | 44 | * Improve normalize_name performance 45 | 46 | # 1.0.0 47 | 48 | * Make `nokogiri` dependency optional 49 | * Add :default argument for elements [[#51](https://github.com/pauldix/sax-machine/pull/51)] 50 | 51 | # 0.3.0 52 | 53 | * Option to use Ox as a SAX handler instead of Nokogiri [[#49](https://github.com/pauldix/sax-machine/pull/49)] 54 | * Bump RSpec to 3.0, convert existing specs 55 | 56 | # 0.2.1 57 | 58 | * Turn on replace_entities on Nokogiri parser [[#40](https://github.com/pauldix/sax-machine/pull/40)] 59 | * Provide mass assignment through initialize method [[#38](https://github.com/pauldix/sax-machine/pull/38)] 60 | * Bump nokogiri (~> 1.6) and rspec, drop growl dependency 61 | * Update 'with' option to allow pattern matching in addition to string matching 62 | 63 | # 0.2.0.rc1 64 | 65 | * Try to reduce the number of instances of respond_to? in the code by 66 | pulling common uses of it out to methods. [[#32](https://github.com/pauldix/sax-machine/pull/32)] 67 | * The parse stack is now composed of simple objects instead of it being 68 | an array of arrays. [[#32](https://github.com/pauldix/sax-machine/pull/32)] 69 | * Now using an identifier for an empty buffer instead of empty string. [[#32](https://github.com/pauldix/sax-machine/pull/32)] 70 | * Clean up several variables that were not being used. [[#32](https://github.com/pauldix/sax-machine/pull/32)] 71 | * Encapsulate stack so it's not being exposed as part of the API. [[#32](https://github.com/pauldix/sax-machine/pull/32)] 72 | * `cdata_block` is now an alias instead of delegating to characters. [[#32](https://github.com/pauldix/sax-machine/pull/32)] 73 | 74 | # 0.1.0 75 | 76 | * Rename parent to ancestor 77 | * Add SAXMachine.configure 78 | -------------------------------------------------------------------------------- /spec/fixtures/atom-content.html: -------------------------------------------------------------------------------- 1 | 2 |

In my previous post about the speed of serializing data, I concluded that Marshal was the quickest way to get things done. So I set about using Marshal to store some data in an ActiveRecord object. Things worked great at first, but on some test data I got this error: marshal data too short. Luckily, Bryan Helmkamp had helpfully pointed out that there were sometimes problems with storing marshaled data in the database. He said it was best to base64 encode the marshal dump before storing.

3 | 4 |

I was curious why it was working on some things and not others. It turns out that some types of data being marshaled were causing the error to pop up. Here's the test data I used in my specs:

5 |
{ :foo => 3, :bar => 2 } # hash with symbols for keys and integer values
[3, 2.1, 4, 8]           # array with integer and float values
6 |

Everything worked when I switched the array values to all integers so it seems that floats were causing the problem. However, in the interest of keeping everything working regardless of data types, I base64 encoded before going into the database and decoded on the way out.

7 | 8 |

I also ran the benchmarks again to determine what impact this would have on speed. Here are the results for 100 iterations on a 10k element array and a 10k element hash with and without base64 encode/decode:

9 |
                user       system     total       real
array marshal  0.200000   0.010000   0.210000 (  0.214018) (without Base64)
array marshal  0.220000   0.010000   0.230000 (  0.250260)

hash marshal   1.830000   0.040000   1.870000 (  1.892874) (without Base64)
hash marshal   2.040000   0.100000   2.140000 (  2.170405)
10 |

As you can see the difference in speed is pretty negligible. I assume that the error has to do with AR cleaning the stuff that gets inserted into the database, but I'm not really sure. In the end it's just easier to use Base64.encode64 when serializing data into a text field in ActiveRecord using Marshal.

11 | 12 |

I've also read people posting about this error when using the database session store. I can only assume that it's because they were trying to store either way too much data in their session (too much for a regular text field) or they were storing float values or some other data type that would cause this to pop up. Hopefully this helps.

13 |
14 | 15 |
16 | -------------------------------------------------------------------------------- /lib/sax-machine/sax_document.rb: -------------------------------------------------------------------------------- 1 | module SAXMachine 2 | def self.included(base) 3 | base.send(:include, InstanceMethods) 4 | base.extend(ClassMethods) 5 | end 6 | 7 | def parse(xml_input, on_error = nil, on_warning = nil) 8 | handler_klass = SAXMachine.const_get("SAX#{SAXMachine.handler.capitalize}Handler") 9 | 10 | handler = handler_klass.new(self, on_error, on_warning) 11 | handler.sax_parse(xml_input) 12 | 13 | self 14 | end 15 | 16 | module InstanceMethods 17 | def initialize(attributes = {}) 18 | attributes.each do |name, value| 19 | send("#{name}=", value) 20 | end 21 | 22 | self.class.sax_config.top_level_elements.each do |_, configs| 23 | configs.each do |config| 24 | next if config.default.nil? 25 | next unless send(config.as).nil? 26 | 27 | send(config.setter, config.default) 28 | end 29 | end 30 | end 31 | end 32 | 33 | module ClassMethods 34 | def inherited(subclass) 35 | subclass.sax_config.send(:initialize_copy, self.sax_config) 36 | end 37 | 38 | def parse(*args) 39 | new.parse(*args) 40 | end 41 | 42 | def element(name, options = {}, &block) 43 | real_name = (options[:as] ||= name).to_s 44 | sax_config.add_top_level_element(name, options) 45 | create_attr(real_name, &block) 46 | end 47 | 48 | def attribute(name, options = {}, &block) 49 | real_name = (options[:as] ||= name).to_s 50 | sax_config.add_top_level_attribute(self.class.to_s, options.merge(name: name)) 51 | create_attr(real_name, &block) 52 | end 53 | 54 | def value(name, options = {}, &block) 55 | real_name = (options[:as] ||= name).to_s 56 | sax_config.add_top_level_element_value(self.class.to_s, options.merge(name: name)) 57 | create_attr(real_name, &block) 58 | end 59 | 60 | def ancestor(name, options = {}, &block) 61 | real_name = (options[:as] ||= name).to_s 62 | sax_config.add_ancestor(name, options) 63 | create_attr(real_name, &block) 64 | end 65 | 66 | def elements(name, options = {}, &block) 67 | real_name = (options[:as] ||= name).to_s 68 | 69 | if options[:class] 70 | sax_config.add_collection_element(name, options) 71 | else 72 | if block_given? 73 | define_method("add_#{real_name}") do |value| 74 | send(real_name).send(:<<, instance_exec(value, &block)) 75 | end 76 | else 77 | define_method("add_#{real_name}") do |value| 78 | send(real_name).send(:<<, value) 79 | end 80 | end 81 | 82 | sax_config.add_top_level_element(name, options.merge(collection: true)) 83 | end 84 | 85 | if !method_defined?(real_name) 86 | class_eval <<-SRC 87 | def #{real_name} 88 | @#{real_name} ||= [] 89 | end 90 | SRC 91 | end 92 | 93 | attr_writer(options[:as]) unless method_defined?("#{options[:as]}=") 94 | end 95 | 96 | def columns 97 | sax_config.columns 98 | end 99 | 100 | def column(sym) 101 | columns.select { |c| c.column == sym }[0] 102 | end 103 | 104 | def data_class(sym) 105 | column(sym).data_class 106 | end 107 | 108 | def required?(sym) 109 | column(sym).required? 110 | end 111 | 112 | def column_names 113 | columns.map { |e| e.column } 114 | end 115 | 116 | def sax_config 117 | @sax_config ||= SAXConfig.new 118 | end 119 | 120 | # we only want to insert the getter and setter if they haven't defined it from elsewhere. 121 | # this is how we allow custom parsing behavior. So you could define the setter 122 | # and have it parse the string into a date or whatever. 123 | def create_attr(real_name, &block) 124 | attr_reader(real_name) unless method_defined?(real_name) 125 | 126 | if !method_defined?("#{real_name}=") 127 | if block_given? 128 | define_method("#{real_name}=") do |value| 129 | instance_variable_set("@#{real_name}", instance_exec(value, &block)) 130 | end 131 | else 132 | attr_writer(real_name) 133 | end 134 | end 135 | end 136 | end 137 | end 138 | -------------------------------------------------------------------------------- /lib/sax-machine/handlers/sax_abstract_handler.rb: -------------------------------------------------------------------------------- 1 | require 'time' 2 | 3 | module SAXMachine 4 | module SAXAbstractHandler 5 | NO_BUFFER = :no_buffer 6 | 7 | class StackNode < Struct.new(:object, :config, :buffer) 8 | def initialize(object, config = nil, buffer = NO_BUFFER) 9 | self.object = object 10 | self.config = config 11 | self.buffer = buffer 12 | end 13 | end 14 | 15 | def sax_parse(xml_input) 16 | raise NotImplementedError 17 | end 18 | 19 | def _initialize(object, on_error = nil, on_warning = nil) 20 | @stack = [ StackNode.new(object) ] 21 | @parsed_configs = {} 22 | @on_error = on_error 23 | @on_warning = on_warning 24 | end 25 | 26 | def _characters(data) 27 | node = stack.last 28 | 29 | if node.buffer == NO_BUFFER 30 | node.buffer = data.dup 31 | else 32 | node.buffer << data 33 | end 34 | end 35 | 36 | def _start_element(name, attrs = []) 37 | name = normalize_name(name) 38 | node = stack.last 39 | object = node.object 40 | 41 | sax_config = sax_config_for(object) 42 | 43 | if sax_config 44 | attrs = Hash[attrs] 45 | 46 | if collection_config = sax_config.collection_config(name, attrs) 47 | object = collection_config.data_class.new 48 | sax_config = sax_config_for(object) 49 | 50 | stack.push(StackNode.new(object, collection_config)) 51 | 52 | set_attributes_on(object, attrs) 53 | end 54 | 55 | sax_config.element_configs_for_attribute(name, attrs).each do |ec| 56 | unless parsed_config?(object, ec) 57 | value = data_class_value(ec.data_class, ec.value_from_attrs(attrs)) 58 | object.send(ec.setter, value) 59 | mark_as_parsed(object, ec) 60 | end 61 | end 62 | 63 | if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs) 64 | new_object = 65 | case element_config.data_class.to_s 66 | when "Integer" then 0 67 | when "Float" then 0.0 68 | when "Symbol" then nil 69 | when "Time" then Time.at(0) 70 | when "" then object 71 | else 72 | element_config.data_class.new 73 | end 74 | 75 | stack.push(StackNode.new(new_object, element_config)) 76 | 77 | set_attributes_on(new_object, attrs) 78 | end 79 | end 80 | end 81 | 82 | def _end_element(name) 83 | name = normalize_name(name) 84 | 85 | start_tag = stack[-2] 86 | close_tag = stack[-1] 87 | 88 | return unless start_tag && close_tag 89 | 90 | object = start_tag.object 91 | element = close_tag.object 92 | config = close_tag.config 93 | value = close_tag.buffer 94 | 95 | return unless config.name == name 96 | 97 | unless parsed_config?(object, config) 98 | if (element_value_config = element_values_for(config)) 99 | element_value_config.each { |evc| element.send(evc.setter, value) } 100 | end 101 | 102 | if config.respond_to?(:accessor) 103 | subconfig = sax_config_for(element) 104 | 105 | if econf = subconfig.element_config_for_tag(name, []) 106 | element.send(econf.setter, value) unless econf.value_configured? 107 | end 108 | 109 | object.send(config.accessor) << element 110 | else 111 | value = data_class_value(config.data_class, value) || element 112 | object.send(config.setter, value) if value != NO_BUFFER 113 | mark_as_parsed(object, config) 114 | end 115 | 116 | # try to set the ancestor 117 | if (sax_config = sax_config_for(element)) 118 | sax_config.ancestors.each do |ancestor| 119 | element.send(ancestor.setter, object) 120 | end 121 | end 122 | end 123 | 124 | stack.pop 125 | end 126 | 127 | def _error(string) 128 | if @on_error 129 | @on_error.call(string) 130 | end 131 | end 132 | 133 | def _warning(string) 134 | if @on_warning 135 | @on_warning.call(string) 136 | end 137 | end 138 | 139 | private 140 | 141 | def mark_as_parsed(object, element_config) 142 | unless element_config.collection? 143 | @parsed_configs[[object.object_id, element_config.object_id]] = true 144 | end 145 | end 146 | 147 | def parsed_config?(object, element_config) 148 | @parsed_configs[[object.object_id, element_config.object_id]] 149 | end 150 | 151 | def sax_config_for(object) 152 | if object.class.respond_to?(:sax_config) 153 | object.class.sax_config 154 | end 155 | end 156 | 157 | def element_values_for(config) 158 | if config.data_class.respond_to?(:sax_config) 159 | config.data_class.sax_config.element_values_for_element 160 | end 161 | end 162 | 163 | def normalize_name(name) 164 | name.to_s.tr("-", "_") 165 | end 166 | 167 | def set_attributes_on(object, attributes) 168 | config = sax_config_for(object) 169 | 170 | if config 171 | config.attribute_configs_for_element(attributes).each do |ac| 172 | value = data_class_value(ac.data_class, ac.value_from_attrs(attributes)) 173 | object.send(ac.setter, value) 174 | end 175 | end 176 | end 177 | 178 | def data_class_value(data_class, value) 179 | case data_class.to_s 180 | when "String" then value != NO_BUFFER ? value.to_s : value 181 | when "Integer" then value != NO_BUFFER ? value.to_i : value 182 | when "Float" then value != NO_BUFFER ? value.to_s.gsub(",",".").to_f : value 183 | when "Symbol" then 184 | if value != NO_BUFFER 185 | value.to_s.empty? ? nil : value.to_s.downcase.to_sym 186 | else 187 | value 188 | end 189 | # Assumes that time elements will be string-based and are not 190 | # something else, e.g. seconds since epoch 191 | when "Time" then value != NO_BUFFER ? Time.parse(value.to_s) : value 192 | when "" then value 193 | end 194 | end 195 | 196 | def stack 197 | @stack 198 | end 199 | end 200 | end 201 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SAX Machine 2 | 3 | ## Status 4 | 5 | [![Gem Version](https://badge.fury.io/rb/sax-machine.svg)](http://badge.fury.io/rb/sax-machine) 6 | [![Build Status](https://secure.travis-ci.org/pauldix/sax-machine.svg?branch=master)](http://travis-ci.org/pauldix/sax-machine?branch=master) 7 | [![Coverage Status](https://img.shields.io/coveralls/pauldix/sax-machine.svg)](https://coveralls.io/r/pauldix/sax-machine?branch=master) 8 | [![Code Climate](https://img.shields.io/codeclimate/github/pauldix/sax-machine.svg)](https://codeclimate.com/github/pauldix/sax-machine) 9 | [![Dependencies](https://gemnasium.com/pauldix/sax-machine.svg)](https://gemnasium.com/pauldix/sax-machine) 10 | 11 | ## Description 12 | 13 | A declarative SAX parsing library backed by Nokogiri, Ox or Oga. 14 | 15 | ## Installation 16 | 17 | Add this line to your application's Gemfile: 18 | 19 | ```ruby 20 | gem 'sax-machine' 21 | ``` 22 | 23 | And then execute: 24 | 25 | ```bash 26 | $ bundle 27 | ``` 28 | 29 | ## Usage 30 | 31 | SAX Machine can use either `nokogiri`, `ox` or `oga` as XML SAX handler. 32 | 33 | To use **Nokogiri** add this line to your Gemfile: 34 | 35 | ```ruby 36 | gem 'nokogiri', '~> 1.6' 37 | ``` 38 | 39 | To use **Ox** add this line to your Gemfile: 40 | 41 | ```ruby 42 | gem 'ox', '>= 2.1.2' 43 | ``` 44 | 45 | To use **Oga** add this line to your Gemfile: 46 | 47 | ```ruby 48 | gem 'oga', '>= 0.2.0' 49 | ``` 50 | 51 | You can also specify which handler to use manually, like this: 52 | 53 | ```ruby 54 | SAXMachine.handler = :nokogiri 55 | ``` 56 | 57 | ## Examples 58 | 59 | Include `SAXMachine` in any class and define properties to parse: 60 | 61 | ```ruby 62 | class AtomContent 63 | include SAXMachine 64 | attribute :type 65 | value :text 66 | end 67 | 68 | class AtomEntry 69 | include SAXMachine 70 | element :title 71 | # The :as argument makes this available through entry.author instead of .name 72 | element :name, as: :author 73 | element "feedburner:origLink", as: :url 74 | # The :default argument specifies default value for element when it's missing 75 | element :summary, class: String, default: "No summary available" 76 | element :content, class: AtomContent 77 | element :published 78 | ancestor :ancestor 79 | end 80 | 81 | class Atom 82 | include SAXMachine 83 | # Use block to modify the returned value 84 | # Blocks are working with pretty much everything, 85 | # except for `elements` with `class` attribute 86 | element :title do |title| 87 | title.strip 88 | end 89 | # The :with argument means that you only match a link tag 90 | # that has an attribute of type: "text/html" 91 | element :link, value: :href, as: :url, with: { 92 | type: "text/html" 93 | } 94 | # The :value argument means that instead of setting the value 95 | # to the text between the tag, it sets it to the attribute value of :href 96 | element :link, value: :href, as: :feed_url, with: { 97 | type: "application/atom+xml" 98 | } 99 | elements :entry, as: :entries, class: AtomEntry 100 | end 101 | ``` 102 | 103 | Then parse any XML with your class: 104 | 105 | ```ruby 106 | feed = Atom.parse(xml_text) 107 | 108 | feed.title # Whatever the title of the blog is 109 | feed.url # The main URL of the blog 110 | feed.feed_url # The URL of the blog feed 111 | 112 | feed.entries.first.title # Title of the first entry 113 | feed.entries.first.author # The author of the first entry 114 | feed.entries.first.url # Permalink on the blog for this entry 115 | feed.entries.first.summary # Returns "No summary available" if summary is missing 116 | feed.entries.first.ancestor # The Atom ancestor 117 | feed.entries.first.content # Instance of AtomContent 118 | feed.entries.first.content.text # Entry content text 119 | ``` 120 | 121 | You can also use the elements method without specifying a class: 122 | 123 | ```ruby 124 | class ServiceResponse 125 | include SAXMachine 126 | elements :message, as: :messages 127 | end 128 | 129 | response = ServiceResponse.parse(" 130 | 131 | hi 132 | world 133 | 134 | ") 135 | response.messages.first # hi 136 | response.messages.last # world 137 | ``` 138 | 139 | To limit conflicts in the class used for mappping, you can use the alternate 140 | `SAXMachine.configure` syntax: 141 | 142 | ```ruby 143 | class X < ActiveRecord::Base 144 | # This way no element, elements or ancestor method will be added to X 145 | SAXMachine.configure(X) do |c| 146 | c.element :title 147 | end 148 | end 149 | ``` 150 | 151 | Multiple elements can be mapped to the same alias: 152 | 153 | ```ruby 154 | class RSSEntry 155 | include SAXMachine 156 | # ... 157 | element :pubDate, as: :published 158 | element :pubdate, as: :published 159 | element :"dc:date", as: :published 160 | element :"dc:Date", as: :published 161 | element :"dcterms:created", as: :published 162 | end 163 | ``` 164 | 165 | If more than one of these elements exists in the source, the value from the *last one* is used. The order of 166 | the `element` declarations in the code is unimportant. The order they are encountered while parsing the 167 | document determines the value assigned to the alias. 168 | 169 | If an element is defined in the source but is blank (e.g., ``), it is ignored, and non-empty one is picked. 170 | 171 | ## Contributing 172 | 173 | 1. Fork it 174 | 2. Create your feature branch (`git checkout -b my-new-feature`) 175 | 3. Commit your changes (`git commit -am 'Add some feature'`) 176 | 4. Push to the branch (`git push origin my-new-feature`) 177 | 5. Create new Pull Request 178 | 179 | ## LICENSE 180 | 181 | The MIT License 182 | 183 | Copyright (c) 2009-2014: 184 | 185 | * [Paul Dix](http://www.pauldix.net) 186 | * [Julien Kirch](http://www.archiloque.net) 187 | * [Ezekiel Templin](http://zeke.templ.in) 188 | * [Dmitry Krasnoukhov](http://krasnoukhov.com) 189 | 190 | Permission is hereby granted, free of charge, to any person obtaining 191 | a copy of this software and associated documentation files (the 192 | 'Software'), to deal in the Software without restriction, including 193 | without limitation the rights to use, copy, modify, merge, publish, 194 | distribute, sublicense, and/or sell copies of the Software, and to 195 | permit persons to whom the Software is furnished to do so, subject to 196 | the following conditions: 197 | 198 | The above copyright notice and this permission notice shall be 199 | included in all copies or substantial portions of the Software. 200 | 201 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 202 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 203 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 204 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 205 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 206 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 207 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 208 | -------------------------------------------------------------------------------- /spec/fixtures/atom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Paul Dix Explains Nothing 4 | 5 | 6 | tag:typepad.com,2003:weblog-108605 7 | 2008-09-04T16:07:19-04:00 8 | Entrepreneurship, programming, software development, politics, NYC, and random thoughts. 9 | TypePad 10 | 11 | Marshal data too short error with ActiveRecord 12 | 13 | 14 | tag:typepad.com,2003:post-55147740 15 | 2008-09-04T16:07:19-04:00 16 | 2008-11-17T14:40:06-05:00 17 | In my previous post about the speed of serializing data, I concluded that Marshal was the quickest way to get things done. So I set about using Marshal to store some data in an ActiveRecord object. Things worked great at... 18 | 19 | Paul Dix 20 | 21 | 22 | 23 | 24 | 25 | <div xmlns="http://www.w3.org/1999/xhtml"><p>In my previous <a href="http://www.pauldix.net/2008/08/serializing-dat.html">post about the speed of serializing data</a>, I concluded that Marshal was the quickest way to get things done. So I set about using Marshal to store some data in an ActiveRecord object. Things worked great at first, but on some test data I got this error: marshal data too short. Luckily, <a href="http://www.brynary.com/">Bryan Helmkamp</a> had helpfully pointed out that there were sometimes problems with storing marshaled data in the database. He said it was best to base64 encode the marshal dump before storing.</p> 26 | 27 | <p>I was curious why it was working on some things and not others. It turns out that some types of data being marshaled were causing the error to pop up. Here's the test data I used in my specs:</p> 28 | <pre>{ :foo =&gt; 3, :bar =&gt; 2 } # hash with symbols for keys and integer values<br />[3, 2.1, 4, 8]&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; # array with integer and float values</pre> 29 | <p>Everything worked when I switched the array values to all integers so it seems that floats were causing the problem. However, in the interest of keeping everything working regardless of data types, I base64 encoded before going into the database and decoded on the way out.</p> 30 | 31 | <p>I also ran the benchmarks again to determine what impact this would have on speed. Here are the results for 100 iterations on a 10k element array and a 10k element hash with and without base64 encode/decode:</p> 32 | <pre>&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp; user&nbsp; &nbsp;&nbsp; &nbsp; system&nbsp; &nbsp;&nbsp; total&nbsp; &nbsp;&nbsp; &nbsp; real<br />array marshal&nbsp; 0.200000&nbsp; &nbsp;0.010000&nbsp; &nbsp;0.210000 (&nbsp; 0.214018) (without Base64)<br />array marshal&nbsp; 0.220000&nbsp; &nbsp;0.010000&nbsp; &nbsp;0.230000 (&nbsp; 0.250260)<br /><br />hash marshal&nbsp; &nbsp;1.830000&nbsp; &nbsp;0.040000&nbsp; &nbsp;1.870000 (&nbsp; 1.892874) (without Base64)<br />hash marshal&nbsp; &nbsp;2.040000&nbsp; &nbsp;0.100000&nbsp; &nbsp;2.140000 (&nbsp; 2.170405)</pre> 33 | <p>As you can see the difference in speed is pretty negligible. I assume that the error has to do with AR cleaning the stuff that gets inserted into the database, but I'm not really sure. In the end it's just easier to use Base64.encode64 when serializing data into a text field in ActiveRecord using Marshal.</p> 34 | 35 | <p>I've also read people posting about this error when using the database session store. I can only assume that it's because they were trying to store either way too much data in their session (too much for a regular text field) or they were storing float values or some other data type that would cause this to pop up. Hopefully this helps.</p></div> 36 | <div class="feedflare"> 37 | <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=rWfWO"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=rWfWO" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=RaCqo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=RaCqo" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=1CBLo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=1CBLo" border="0"></img></a> 38 | </div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/383536354" height="1" width="1"/> 39 | 40 | 41 | http://www.pauldix.net/2008/09/marshal-data-to.html?param1=1&param2=2 42 | 43 | Serializing data speed comparison: Marshal vs. JSON vs. Eval vs. YAML 44 | 45 | 46 | tag:typepad.com,2003:post-54766774 47 | 2008-08-27T14:31:41-04:00 48 | 2008-10-14T01:26:31-04:00 49 | Last night at the NYC Ruby hackfest, I got into a discussion about serializing data. Brian mentioned the Marshal library to me, which for some reason had completely escaped my attention until last night. He said it was wicked fast... 50 | 51 | Paul Dix 52 | 53 | 54 | 55 | 56 | 57 | <div xmlns="http://www.w3.org/1999/xhtml"><p>Last night at the <a href="http://nycruby.org">NYC Ruby hackfest</a>, I got into a discussion about serializing data. Brian mentioned the Marshal library to me, which for some reason had completely escaped my attention until last night. He said it was wicked fast so we decided to run a quick benchmark comparison.</p> 58 | <p>The test data is designed to roughly approximate what my <a href="http://www.pauldix.net/2008/08/storing-many-cl.html">stored classifier data</a> will look like. The different methods we decided to benchmark were Marshal, json, eval, and yaml. With each one we took the in-memory object and serialized it and then read it back in. With eval we had to convert the object to ruby code to serialize it then run eval against that. Here are the results for 100 iterations on a 10k element array and a hash with 10k key/value pairs run on my Macbook Pro 2.4 GHz Core 2 Duo:</p> 59 | <pre>&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; user&nbsp; &nbsp;&nbsp; &nbsp;system&nbsp; &nbsp;&nbsp; total&nbsp; &nbsp;&nbsp; &nbsp; real<br />array marshal&nbsp; 0.210000&nbsp; &nbsp;0.010000&nbsp; &nbsp;0.220000 (&nbsp; 0.220701)<br />array json&nbsp; &nbsp;&nbsp; 2.180000&nbsp; &nbsp;0.050000&nbsp; &nbsp;2.230000 (&nbsp; 2.288489)<br />array eval&nbsp; &nbsp;&nbsp; 2.090000&nbsp; &nbsp;0.060000&nbsp; &nbsp;2.150000 (&nbsp; 2.240443)<br />array yaml&nbsp; &nbsp; 26.650000&nbsp; &nbsp;0.350000&nbsp; 27.000000 ( 27.810609)<br /><br />hash marshal&nbsp; &nbsp;2.000000&nbsp; &nbsp;0.050000&nbsp; &nbsp;2.050000 (&nbsp; 2.114950)<br />hash json&nbsp; &nbsp;&nbsp; &nbsp;3.700000&nbsp; &nbsp;0.060000&nbsp; &nbsp;3.760000 (&nbsp; 3.881716)<br />hash eval&nbsp; &nbsp;&nbsp; &nbsp;5.370000&nbsp; &nbsp;0.140000&nbsp; &nbsp;5.510000 (&nbsp; 6.117947)<br />hash yaml&nbsp; &nbsp;&nbsp; 68.220000&nbsp; &nbsp;0.870000&nbsp; 69.090000 ( 72.370784)</pre> 60 | <p>The order in which I tested them is pretty much the order in which they ranked for speed. Marshal was amazingly fast. JSON and eval came out roughly equal on the array with eval trailing quite a bit for the hash. Yaml was just slow as all hell. A note on the json: I used the 1.1.3 library which uses c to parse. I assume it would be quite a bit slower if I used the pure ruby implementation. Here's <a href="http://gist.github.com/7549">a gist of the benchmark code</a> if you're curious and want to run it yourself.</p> 61 | 62 | 63 | 64 | <p>If you're serializing user data, be super careful about using eval. It's probably best to avoid it completely. Finally, just for fun I took yaml out (it was too slow) and ran the benchmark again with 1k iterations:</p> 65 | <pre>&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; user&nbsp; &nbsp;&nbsp; &nbsp;system&nbsp; &nbsp;&nbsp; total&nbsp; &nbsp;&nbsp; &nbsp; real<br />array marshal&nbsp; 2.080000&nbsp; &nbsp;0.110000&nbsp; &nbsp;2.190000 (&nbsp; 2.242235)<br />array json&nbsp; &nbsp; 21.860000&nbsp; &nbsp;0.500000&nbsp; 22.360000 ( 23.052403)<br />array eval&nbsp; &nbsp; 20.730000&nbsp; &nbsp;0.570000&nbsp; 21.300000 ( 21.992454)<br /><br />hash marshal&nbsp; 19.510000&nbsp; &nbsp;0.500000&nbsp; 20.010000 ( 20.794111)<br />hash json&nbsp; &nbsp;&nbsp; 39.770000&nbsp; &nbsp;0.670000&nbsp; 40.440000 ( 41.689297)<br />hash eval&nbsp; &nbsp;&nbsp; 51.410000&nbsp; &nbsp;1.290000&nbsp; 52.700000 ( 54.155711)</pre></div> 66 | <div class="feedflare"> 67 | <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=zombO"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=zombO" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=T3kqo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=T3kqo" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=aI6Oo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=aI6Oo" border="0"></img></a> 68 | </div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/376401099" height="1" width="1"/> 69 | 70 | 71 | http://www.pauldix.net/2008/08/serializing-dat.html 72 | 73 | Gotcha with cache_fu and permalinks 74 | 75 | 76 | tag:typepad.com,2003:post-54411628 77 | 2008-08-19T14:26:24-04:00 78 | 2008-11-20T13:58:38-05:00 79 | This is an issue I had recently in a project with cache_fu. Models that I found and cached based on permalinks weren't expiring the cache correctly when getting updated. Here's an example scenario. Say you have a blog with posts.... 80 | 81 | Paul Dix 82 | 83 | 84 | 85 | 86 | 87 | <div xmlns="http://www.w3.org/1999/xhtml"><p>This is an issue I had recently in a project with <a href="http://errtheblog.com/posts/57-kickin-ass-w-cachefu">cache_fu</a>. Models that I found and cached based on permalinks weren't expiring the cache correctly when getting updated. Here's an example scenario.</p> 88 | 89 | <p>Say you have a blog with posts. However, instead of using a url like http://paulscoolblog.com/posts/23 you want something that's more search engine friendly and readable for the user. So you use a permalink (maybe using the <a href="http://github.com/github/permalink_fu/tree/master">permalink_fu plugin</a>) that's auto-generated based on the title of the post. This post would have a url that looks something like http://paulscoolblog.com/posts/gotcha-with-cache_fu-and-permalinks.</p> 90 | 91 | <p>In your controller's show method you'd probably find the post like this:</p> 92 | <pre>@post = Post.find_by_permalink(params[:permalink])</pre> 93 | <p>However, you'd want to do the caching thing so you'd actually do this:</p> 94 | <pre>@post = Post.cached(:find_by_permalink, :with =&gt; params[:permalink])</pre> 95 | <p>The problem that I ran into, which is probably obvious to anyone familiar with cache_fu, was that when updating the post, it wouldn't expire the cache. That part of the post model looks like this:</p> 96 | <pre>class Post &lt; ActiveRecord::Base<br />&nbsp; before_save :expire_cache<br />&nbsp; ...<br />end</pre> 97 | <p>Do you see it? The issue is that when expire_cache gets called on the object, it expires the key <strong>Post:23</strong> from the cache (assuming 23 was the id of the post). However, when the post was cached using the cached(:find_by_permalink ...) method, it put the post object into the cache with a key of <strong>Post:find_by_permalink:gotcha-with-cache_fu-and-permalinks</strong>.</p> 98 | <p>Luckily, it's a fairly simple fix. If you have a model that is commonly accessed through permalinks, just write your own cache expiry method that looks for both keys and expires them.</p></div> 99 | <div class="feedflare"> 100 | <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=V1ojO"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=V1ojO" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=eu6Zo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=eu6Zo" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=ddUho"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=ddUho" border="0"></img></a> 101 | </div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/369250462" height="1" width="1"/> 102 | 103 | 104 | http://www.pauldix.net/2008/08/gotcha-with-cac.html 105 | 106 | Non-greedy mode in regex 107 | 108 | 109 | tag:typepad.com,2003:post-54227244 110 | 2008-08-15T09:32:11-04:00 111 | 2008-08-27T09:33:15-04:00 112 | I was writing a regular expression yesterday and this popped up. It's just a quick note about greedy vs. non-greedy mode in regular expression matching. Say I have a regular expression that looks something like this: /(\[.*\])/ In English that... 113 | 114 | Paul Dix 115 | 116 | 117 | 118 | 119 | <p>I was writing a regular expression yesterday and this popped up. It's just a quick note about greedy vs. non-greedy mode in regular expression matching. Say I have a regular expression that looks something like this:</p> 120 | <pre>/(\[.*\])/</pre> 121 | <p>In English that says something roughly like: find an opening bracket [ with 0 or more of any character followed by a closing bracket. The backslashes are to escape the brackets and the parenthesis specify grouping so we can later access that matched text.</p> 122 | 123 | <p>The greedy mode comes up with the 0 or more characters part of the match (the .* part of the expression). The default mode of greedy means that the parser will gobble up as many characters as it can and match the very last closing bracket. So if you have text like this:</p> 124 | 125 | <pre>a = [:foo, :bar]<br>b = [:hello, :world]</pre> 126 | <p>The resulting grouped match would be this:</p> 127 | <pre>[:foo, :bar]<br>b = [:hello, :world]</pre> 128 | <p>If you just wanted the [:foo, :bar] part, the solution is to parse in non-greedy mode. This means that it will match on the first closing bracket it sees. The modified regular expression looks like this:</p> 129 | <pre>/(\[.*?\])/</pre> 130 | <p>I love the regular expression engine in Ruby. It's one of the best things it ripped off from Perl. The one thing I don't like is the magic global variable that it places matched groups into. You can access that first match through the $1 variable. If you're unfamiliar with regular expressions, a good place to start is the <a href="http://www.amazon.com/Programming-Perl-3rd-Larry-Wall/dp/0596000278/ref=pd_bbs_sr_1?ie=UTF8&amp;s=books&amp;qid=1218806755&amp;sr=8-1">Camel book</a>. It's about Perl, but the way they work is very similar. I actually haven't seen good coverage of regexes in a Ruby book.</p><div class="feedflare"> 131 | <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=OkVmO"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=OkVmO" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=iRpWo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=iRpWo" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=pjRCo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=pjRCo" border="0"></img></a> 132 | </div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/365673983" height="1" width="1"/> 133 | 134 | 135 | http://www.pauldix.net/2008/08/non-greedy-mode.html 136 | 137 | Storing many classification models 138 | 139 | 140 | tag:typepad.com,2003:post-53888232 141 | 2008-08-07T12:01:38-04:00 142 | 2008-08-27T16:58:18-04:00 143 | One of the things I need to do in Filterly is keep many trained classifiers. These are the machine learning models that determine if a blog post is on topic (Filterly separates information by topic). At the very least I... 144 | 145 | Paul Dix 146 | 147 | 148 | 149 | 150 | <p>One of the things I need to do in <a href="http://filterly.com/">Filterly</a> is keep many trained <a href="http://en.wikipedia.org/wiki/Statistical_classification">classifiers</a>. These are the machine learning models that determine if a blog post is on topic (Filterly separates information by topic). At the very least I need one per topic in the system. If I want to do something like <a href="http://en.wikipedia.org/wiki/Boosting">boosting</a> then I need even more. The issue I'm wrestling with is how to store this data. I'll outline a specific approach and what the storage needs are.</p> 151 | 152 | <p>Let's say I go with boosting and 10 <a href="http://en.wikipedia.org/wiki/Perceptron">perceptrons</a>. I'll also limit my feature space to the 10,000 most statistically significant features. So the storage for each perceptron is a 10k element array. However, I'll also have to keep another data structure to store what the 10k features are and their position in the array. In code I use a hash for this where the feature name is the key and the value is its position. I just need to store one of these hashes per topic.</p> 153 | 154 | <p>That's not really a huge amount of data. I'm more concerned about the best way to store it. I don't think this kind of thing maps well to a relational database. I don't need to store the features individually. Generally when I'm running the thing I'll want the whole perceptron and feature set in memory for quick access. For now I'm just using a big text field and serializing each using JSON.</p> 155 | 156 | <p>I don't really like this approach. The whole serializing into the database seems really inelegant. Combined with the time that it takes to parse these things. Each time I want to see if a new post is on topic I'd need to load up the classifier and parse the 10 10k arrays and the 10k key hash. I could keep each classifier running as a service, but then I've got a pretty heavy process running for each topic.</p> 157 | 158 | <p>I guess I'll just use the stupid easy solution for the time being and worry about performance later. Anyone have thoughts on the best approach?</p><div class="feedflare"> 159 | <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=DUT8O"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=DUT8O" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=ZGjFo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=ZGjFo" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=pH3Vo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=pH3Vo" border="0"></img></a> 160 | </div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/358530158" height="1" width="1"/> 161 | 162 | 163 | http://www.pauldix.net/2008/08/storing-many-cl.html 164 | 165 | 166 | -------------------------------------------------------------------------------- /spec/sax-machine/sax_document_spec.rb: -------------------------------------------------------------------------------- 1 | require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') 2 | 3 | describe "SAXMachine" do 4 | describe "element" do 5 | describe "when parsing a single element" do 6 | before do 7 | @klass = Class.new do 8 | include SAXMachine 9 | element :title 10 | ancestor :body 11 | value :something, required: false 12 | attribute :anything, required: true 13 | end 14 | end 15 | 16 | it "provides mass assignment through initialize method" do 17 | document = @klass.new(title: "Title") 18 | expect(document.title).to eq("Title") 19 | end 20 | 21 | it "provides an accessor" do 22 | document = @klass.new 23 | document.title = "Title" 24 | expect(document.title).to eq("Title") 25 | end 26 | 27 | it "does not overwrites the getter is there is already one present" do 28 | @klass = Class.new do 29 | def title 30 | "#{@title} ***" 31 | end 32 | 33 | include SAXMachine 34 | element :title 35 | end 36 | 37 | document = @klass.new 38 | document.title = "Title" 39 | expect(document.title).to eq("Title ***") 40 | end 41 | 42 | it "does not overwrites the setter if there is already one present" do 43 | @klass = Class.new do 44 | def title=(val) 45 | @title = "#{val} **" 46 | end 47 | 48 | include SAXMachine 49 | element :title 50 | end 51 | 52 | document = @klass.new 53 | document.title = "Title" 54 | expect(document.title).to eq("Title **") 55 | end 56 | 57 | it "does not overwrites the accessor when the element is not present" do 58 | document = @klass.new 59 | document.title = "Title" 60 | document.parse("") 61 | expect(document.title).to eq("Title") 62 | end 63 | 64 | it "overwrites the value when the element is present" do 65 | document = @klass.new 66 | document.title = "Old title" 67 | document.parse("New title") 68 | expect(document.title).to eq("New title") 69 | end 70 | 71 | it "saves the element text into an accessor" do 72 | document = @klass.parse("My Title") 73 | expect(document.title).to eq("My Title") 74 | end 75 | 76 | it "keeps the document encoding for elements" do 77 | data = "My Title" 78 | data.encode!("utf-8") 79 | 80 | document = @klass.parse(data) 81 | expect(document.title.encoding).to eq(data.encoding) 82 | end 83 | 84 | it "saves cdata into an accessor" do 85 | document = @klass.parse("<![CDATA[A Title]]>") 86 | expect(document.title).to eq("A Title") 87 | end 88 | 89 | it "saves the element text into an accessor when there are multiple elements" do 90 | document = @klass.parse("My Titlebar") 91 | expect(document.title).to eq("My Title") 92 | end 93 | 94 | it "saves the first element text when there are multiple of the same element" do 95 | document = @klass.parse("My Titlebar") 96 | expect(document.title).to eq("My Title") 97 | end 98 | 99 | describe "the introspection" do 100 | it "allows to get column names" do 101 | expect(@klass.column_names).to match_array([:title]) 102 | end 103 | 104 | it "allows to get elements" do 105 | expect(@klass.sax_config.top_level_elements.values.flatten.map(&:to_s)).to \ 106 | match_array(["name: title dataclass: setter: title= required: value: as:title collection: with: {}"]) 107 | end 108 | 109 | it "allows to get ancestors" do 110 | expect(@klass.sax_config.ancestors.map(&:column)).to \ 111 | match_array([:body]) 112 | end 113 | 114 | it "allows to get values" do 115 | expect(@klass.sax_config.top_level_element_value.map(&:column)).to \ 116 | match_array([:something]) 117 | expect(@klass.sax_config.top_level_element_value.map(&:required?)).to \ 118 | match_array([false]) 119 | end 120 | 121 | it "allows to get attributes" do 122 | expect(@klass.sax_config.top_level_attributes.map(&:column)).to \ 123 | match_array([:anything]) 124 | expect(@klass.sax_config.top_level_attributes.map(&:required?)).to \ 125 | match_array([true]) 126 | expect(@klass.sax_config.top_level_attributes.map(&:collection?)).to \ 127 | match_array([false]) 128 | end 129 | end 130 | 131 | describe "the class attribute" do 132 | before(:each) do 133 | @klass = Class.new do 134 | include SAXMachine 135 | element :date, class: DateTime 136 | end 137 | 138 | @document = @klass.new 139 | @document.date = Time.now.iso8601 140 | end 141 | 142 | it "is available" do 143 | expect(@klass.data_class(:date)).to eq(DateTime) 144 | end 145 | 146 | describe "string" do 147 | before do 148 | class TestString 149 | include SAXMachine 150 | element :number, class: String 151 | end 152 | 153 | class TestStringAttribute 154 | include SAXMachine 155 | attribute :sub_number, class: String 156 | end 157 | 158 | class TestStringWithAttribute 159 | include SAXMachine 160 | element :number, class: TestStringAttribute 161 | end 162 | end 163 | 164 | it "is handled in an element" do 165 | document = TestString.parse("5.5") 166 | expect(document.number).to eq("5.5") 167 | end 168 | 169 | it "is handled in an attribute" do 170 | document = TestStringWithAttribute.parse("") 171 | expect(document.number.sub_number).to eq("5.5") 172 | end 173 | end 174 | 175 | describe "integer" do 176 | before do 177 | class TestInteger 178 | include SAXMachine 179 | element :number, class: Integer 180 | end 181 | 182 | class TestIntegerAttribute 183 | include SAXMachine 184 | attribute :sub_number, class: Integer 185 | end 186 | 187 | class TestIntegerWithAttribute 188 | include SAXMachine 189 | element :number, class: TestIntegerAttribute 190 | end 191 | 192 | class IntegerInsideAttribute 193 | include SAXMachine 194 | element :number, value: :int_attr, as: :int_attr, class: Integer 195 | end 196 | end 197 | 198 | it "is handled in an element" do 199 | document = TestInteger.parse("5") 200 | expect(document.number).to eq(5) 201 | end 202 | 203 | it "is handled in an attribute" do 204 | document = TestIntegerWithAttribute.parse("") 205 | expect(document.number.sub_number).to eq(5) 206 | end 207 | 208 | it "is handled in an attribute with value option" do 209 | document = IntegerInsideAttribute.parse("") 210 | expect(document.int_attr).to eq(2) 211 | end 212 | end 213 | 214 | describe "float" do 215 | before do 216 | class TestFloat 217 | include SAXMachine 218 | element :number, class: Float 219 | end 220 | 221 | class TestFloatAttribute 222 | include SAXMachine 223 | attribute :sub_number, class: Float 224 | end 225 | 226 | class TestFloatWithAttribute 227 | include SAXMachine 228 | element :number, class: TestFloatAttribute 229 | end 230 | end 231 | 232 | it "is handled in an element with '.' delimiter" do 233 | document = TestFloat.parse("5.5") 234 | expect(document.number).to eq(5.5) 235 | end 236 | 237 | it "is handled in an element with ',' delimiter" do 238 | document = TestFloat.parse("5,5") 239 | expect(document.number).to eq(5.5) 240 | end 241 | 242 | it "is handled in an attribute" do 243 | document = TestFloatWithAttribute.parse("5.5") 244 | expect(document.number.sub_number).to eq(5.5) 245 | end 246 | end 247 | 248 | describe "symbol" do 249 | before do 250 | class TestSymbol 251 | include SAXMachine 252 | element :symbol, class: Symbol 253 | end 254 | 255 | class TestSymbolAttribute 256 | include SAXMachine 257 | attribute :sub_symbol, class: Symbol 258 | end 259 | 260 | class TestSymbolWithAttribute 261 | include SAXMachine 262 | element :symbol, class: TestSymbolAttribute 263 | end 264 | end 265 | 266 | it "is handled in an element" do 267 | document = TestSymbol.parse("MY_SYMBOL_VALUE") 268 | expect(document.symbol).to eq(:my_symbol_value) 269 | end 270 | 271 | it "is handled in an attribute" do 272 | document = TestSymbolWithAttribute.parse("") 273 | expect(document.symbol.sub_symbol).to eq(:my_symbol_value) 274 | end 275 | end 276 | 277 | describe "time" do 278 | before do 279 | class TestTime 280 | include SAXMachine 281 | element :time, class: Time 282 | end 283 | 284 | class TestTimeAttribute 285 | include SAXMachine 286 | attribute :sub_time, class: Time 287 | end 288 | 289 | class TestTimeWithAttribute 290 | include SAXMachine 291 | element :time, class: TestTimeAttribute 292 | end 293 | end 294 | 295 | it "is handled in an element" do 296 | document = TestTime.parse("") 297 | expect(document.time).to eq(Time.utc(1994, 2, 4, 6, 20, 0, 0)) 298 | end 299 | 300 | it "is handled in an attribute" do 301 | document = TestTimeWithAttribute.parse("") 302 | expect(document.time.sub_time).to eq(Time.utc(1994, 2, 4, 6, 20, 0, 0)) 303 | end 304 | end 305 | end 306 | 307 | describe "the default attribute" do 308 | it "is available" do 309 | @klass = Class.new do 310 | include SAXMachine 311 | element :number, class: Integer, default: 0 312 | end 313 | 314 | document = @klass.parse("number") 315 | expect(document.number).to eq(0) 316 | 317 | document = @klass.parse("") 318 | expect(document.number).to eq(0) 319 | end 320 | 321 | it "can be a Boolean" do 322 | @klass = Class.new do 323 | include SAXMachine 324 | element(:bool, default: false) { |v| !!v } 325 | end 326 | 327 | document = @klass.parse("bool") 328 | expect(document.bool).to be false 329 | 330 | document = @klass.parse("") 331 | expect(document.bool).to be false 332 | 333 | document = @klass.parse("1") 334 | expect(document.bool).to be true 335 | end 336 | end 337 | 338 | describe "the required attribute" do 339 | it "is available" do 340 | @klass = Class.new do 341 | include SAXMachine 342 | element :date, required: true 343 | end 344 | expect(@klass.required?(:date)).to be_truthy 345 | end 346 | end 347 | 348 | describe "the block" do 349 | before do 350 | class ElementBlockParser 351 | include SAXMachine 352 | 353 | ancestor :parent do |parent| 354 | parent.class.to_s 355 | end 356 | 357 | value :text do |text| 358 | text.downcase 359 | end 360 | end 361 | 362 | class BlockParser 363 | include SAXMachine 364 | 365 | element :title do |title| 366 | "#{title}!!!" 367 | end 368 | 369 | element :scope do |scope| 370 | "#{title} #{scope}" 371 | end 372 | 373 | attribute :id do |id| 374 | id.to_i 375 | end 376 | 377 | element :nested, class: ElementBlockParser 378 | elements :message, as: :messages do |message| 379 | "#{message}!" 380 | end 381 | end 382 | end 383 | 384 | it "has instance as a block context" do 385 | document = BlockParser.parse("SAXsomething") 386 | expect(document.scope).to eq("SAX!!! something") 387 | end 388 | 389 | it "uses block for element" do 390 | document = BlockParser.parse("SAX") 391 | expect(document.title).to eq("SAX!!!") 392 | end 393 | 394 | it 'uses block for attribute' do 395 | document = BlockParser.parse("SAX") 396 | expect(document.id).to eq(345) 397 | end 398 | 399 | it "uses block for value" do 400 | document = BlockParser.parse("<nested>tEst</nested>") 401 | expect(document.nested.text).to eq("test") 402 | end 403 | 404 | it "uses block for ancestor" do 405 | document = BlockParser.parse("<nested>SAX</nested>") 406 | expect(document.nested.parent).to eq("BlockParser") 407 | end 408 | 409 | it "uses block for elements" do 410 | document = BlockParser.parse("<message>hi</message><message>world</message>") 411 | expect(document.messages).to eq(["hi!", "world!"]) 412 | end 413 | end 414 | end 415 | 416 | describe "when parsing multiple elements" do 417 | before do 418 | @klass = Class.new do 419 | include SAXMachine 420 | element :title 421 | element :name 422 | end 423 | end 424 | 425 | it "saves the element text for a second tag" do 426 | document = @klass.parse("My TitlePaul") 427 | expect(document.name).to eq("Paul") 428 | expect(document.title).to eq("My Title") 429 | end 430 | 431 | it "does not overwrites the getter is there is already one present" do 432 | @klass = Class.new do 433 | def items 434 | [] 435 | end 436 | 437 | include SAXMachine 438 | elements :items 439 | end 440 | 441 | document = @klass.new 442 | document.items = [1, 2, 3, 4] 443 | expect(document.items).to eq([]) 444 | end 445 | 446 | it "does not overwrites the setter if there is already one present" do 447 | @klass = Class.new do 448 | def items=(val) 449 | @items = [1, *val] 450 | end 451 | 452 | include SAXMachine 453 | elements :items 454 | end 455 | 456 | document = @klass.new 457 | document.items = [2, 3] 458 | expect(document.items).to eq([1, 2, 3]) 459 | end 460 | end 461 | 462 | describe "when using options for parsing elements" do 463 | describe "using the 'as' option" do 464 | before do 465 | @klass = Class.new do 466 | include SAXMachine 467 | element :description, as: :summary 468 | end 469 | end 470 | 471 | it "provides an accessor using the 'as' name" do 472 | document = @klass.new 473 | document.summary = "a small summary" 474 | expect(document.summary).to eq("a small summary") 475 | end 476 | 477 | it "saves the element text into the 'as' accessor" do 478 | document = @klass.parse("here is a description") 479 | expect(document.summary).to eq("here is a description") 480 | end 481 | end 482 | 483 | describe "using the :with option" do 484 | describe "and the :value option" do 485 | before do 486 | @klass = Class.new do 487 | include SAXMachine 488 | element :link, value: :href, with: { foo: "bar" } 489 | end 490 | end 491 | 492 | it "saves the value of a matching element" do 493 | document = @klass.parse("asdf") 494 | expect(document.link).to eq("test") 495 | end 496 | 497 | it "saves the value of the first matching element" do 498 | document = @klass.parse("") 499 | expect(document.link).to eq("first") 500 | end 501 | 502 | describe "and the :as option" do 503 | before do 504 | @klass = Class.new do 505 | include SAXMachine 506 | element :link, value: :href, as: :url, with: { foo: "bar" } 507 | element :link, value: :href, as: :second_url, with: { asdf: "jkl" } 508 | end 509 | end 510 | 511 | it "saves the value of the first matching element" do 512 | document = @klass.parse("") 513 | expect(document.url).to eq("first") 514 | expect(document.second_url).to eq("second") 515 | end 516 | end 517 | end 518 | 519 | describe "with only one element" do 520 | before do 521 | @klass = Class.new do 522 | include SAXMachine 523 | element :link, with: { foo: "bar" } 524 | end 525 | end 526 | 527 | it "saves the text of an element that has matching attributes" do 528 | document = @klass.parse("match") 529 | expect(document.link).to eq("match") 530 | end 531 | 532 | it "does not saves the text of an element that doesn't have matching attributes" do 533 | document = @klass.parse("no match") 534 | expect(document.link).to be_nil 535 | end 536 | 537 | it "saves the text of an element that has matching attributes when it is the second of that type" do 538 | document = @klass.parse("no matchmatch") 539 | expect(document.link).to eq("match") 540 | end 541 | 542 | it "saves the text of an element that has matching attributes plus a few more" do 543 | document = @klass.parse("no matchmatch") 544 | expect(document.link).to eq("match") 545 | end 546 | end 547 | 548 | describe "with multiple elements of same tag" do 549 | before do 550 | @klass = Class.new do 551 | include SAXMachine 552 | element :link, as: :first, with: { foo: "bar" } 553 | element :link, as: :second, with: { asdf: "jkl" } 554 | end 555 | end 556 | 557 | it "matches the first element" do 558 | document = @klass.parse("no matchfirst matchno match") 559 | expect(document.first).to eq("first match") 560 | end 561 | 562 | it "matches the second element" do 563 | document = @klass.parse("no matchfirst matchsecond matchhi") 564 | expect(document.second).to eq("second match") 565 | end 566 | end 567 | 568 | describe "with only one element as a regular expression" do 569 | before do 570 | @klass = Class.new do 571 | include SAXMachine 572 | element :link, with: { foo: /ar$/ } 573 | end 574 | end 575 | 576 | it "saves the text of an element that has matching attributes" do 577 | document = @klass.parse("match") 578 | expect(document.link).to eq("match") 579 | end 580 | 581 | it "does not saves the text of an element that doesn't have matching attributes" do 582 | document = @klass.parse("no match") 583 | expect(document.link).to be_nil 584 | end 585 | 586 | it "saves the text of an element that has matching attributes when it is the second of that type" do 587 | document = @klass.parse("no matchmatch") 588 | expect(document.link).to eq("match") 589 | end 590 | 591 | it "saves the text of an element that has matching attributes plus a few more" do 592 | document = @klass.parse("no matchmatch") 593 | expect(document.link).to eq("match") 594 | end 595 | end 596 | end 597 | 598 | describe "using the 'value' option" do 599 | before do 600 | @klass = Class.new do 601 | include SAXMachine 602 | element :link, value: :foo 603 | end 604 | end 605 | 606 | it "saves the attribute value" do 607 | document = @klass.parse("hello") 608 | expect(document.link).to eq("test") 609 | end 610 | 611 | it "saves the attribute value when there is no text enclosed by the tag" do 612 | document = @klass.parse("") 613 | expect(document.link).to eq("test") 614 | end 615 | 616 | it "saves the attribute value when the tag close is in the open" do 617 | document = @klass.parse("") 618 | expect(document.link).to eq("test") 619 | end 620 | 621 | it "saves two different attribute values on a single tag" do 622 | @klass = Class.new do 623 | include SAXMachine 624 | element :link, value: :foo, as: :first 625 | element :link, value: :bar, as: :second 626 | end 627 | 628 | document = @klass.parse("") 629 | expect(document.first).to eq("foo value") 630 | expect(document.second).to eq("bar value") 631 | end 632 | 633 | it "does not fail if one of the attribute hasn't been defined" do 634 | @klass = Class.new do 635 | include SAXMachine 636 | element :link, value: :foo, as: :first 637 | element :link, value: :bar, as: :second 638 | end 639 | 640 | document = @klass.parse("") 641 | expect(document.first).to eq("foo value") 642 | expect(document.second).to be_nil 643 | end 644 | end 645 | 646 | describe "when desiring both the content and attributes of an element" do 647 | before do 648 | @klass = Class.new do 649 | include SAXMachine 650 | element :link 651 | element :link, value: :foo, as: :link_foo 652 | element :link, value: :bar, as: :link_bar 653 | end 654 | end 655 | 656 | it "parses the element and attribute values" do 657 | document = @klass.parse("hello") 658 | expect(document.link).to eq("hello") 659 | expect(document.link_foo).to eq("test1") 660 | expect(document.link_bar).to eq("test2") 661 | end 662 | end 663 | end 664 | end 665 | 666 | describe "elements" do 667 | describe "when parsing multiple elements" do 668 | before do 669 | @klass = Class.new do 670 | include SAXMachine 671 | elements :entry, as: :entries 672 | end 673 | end 674 | 675 | it "provides a collection accessor" do 676 | document = @klass.new 677 | document.entries << :foo 678 | expect(document.entries).to eq([:foo]) 679 | end 680 | 681 | it "parses a single element" do 682 | document = @klass.parse("hello") 683 | expect(document.entries).to eq(["hello"]) 684 | end 685 | 686 | it "parses multiple elements" do 687 | document = @klass.parse("helloworld") 688 | expect(document.entries).to eq(["hello", "world"]) 689 | end 690 | 691 | it "parses multiple elements when taking an attribute value" do 692 | attribute_klass = Class.new do 693 | include SAXMachine 694 | elements :entry, as: :entries, value: :foo 695 | end 696 | 697 | doc = attribute_klass.parse("") 698 | expect(doc.entries).to eq(["asdf", "jkl"]) 699 | end 700 | end 701 | 702 | describe "when using the with and class options" do 703 | before do 704 | class Bar 705 | include SAXMachine 706 | element :title 707 | end 708 | 709 | class Foo 710 | include SAXMachine 711 | element :title 712 | end 713 | 714 | class Item 715 | include SAXMachine 716 | end 717 | 718 | @klass = Class.new do 719 | include SAXMachine 720 | elements :item, as: :items, with: { type: "Bar" }, class: Bar 721 | elements :item, as: :items, with: { type: /Foo/ }, class: Foo 722 | end 723 | end 724 | 725 | it "casts into the correct class" do 726 | document = @klass.parse("Bar titleFoo title") 727 | expect(document.items.size).to eq(2) 728 | expect(document.items.first).to be_a(Bar) 729 | expect(document.items.first.title).to eq("Bar title") 730 | expect(document.items.last).to be_a(Foo) 731 | expect(document.items.last.title).to eq("Foo title") 732 | end 733 | end 734 | 735 | describe "when using the class option" do 736 | before do 737 | class Foo 738 | include SAXMachine 739 | element :title 740 | end 741 | 742 | @klass = Class.new do 743 | include SAXMachine 744 | elements :entry, as: :entries, class: Foo 745 | end 746 | end 747 | 748 | it "parses a single element with children" do 749 | document = @klass.parse("a title") 750 | expect(document.entries.size).to eq(1) 751 | expect(document.entries.first.title).to eq("a title") 752 | end 753 | 754 | it "parses multiple elements with children" do 755 | document = @klass.parse("title 1title 2") 756 | expect(document.entries.size).to eq(2) 757 | expect(document.entries.first.title).to eq("title 1") 758 | expect(document.entries.last.title).to eq("title 2") 759 | end 760 | 761 | it "does not parse a top level element that is specified only in a child" do 762 | document = @klass.parse("no parsecorrect title") 763 | expect(document.entries.size).to eq(1) 764 | expect(document.entries.first.title).to eq("correct title") 765 | end 766 | 767 | it "parses elements, and make attributes and inner text available" do 768 | class Related 769 | include SAXMachine 770 | element "related", as: :item 771 | element "related", as: :attr, value: "attr" 772 | end 773 | 774 | class Foo 775 | elements "related", as: "items", class: Related 776 | end 777 | 778 | doc = Foo.parse(%{somethingsomethingelse}) 779 | expect(doc.items.first).not_to be_nil 780 | expect(doc.items.size).to eq(2) 781 | expect(doc.items.first.item).to eq("something") 782 | expect(doc.items.last.item).to eq("somethingelse") 783 | end 784 | 785 | it "parses out an attribute value from the tag that starts the collection" do 786 | class Foo 787 | element :entry, value: :href, as: :url 788 | end 789 | 790 | document = @klass.parse("paul") 791 | expect(document.entries.size).to eq(1) 792 | expect(document.entries.first.title).to eq("paul") 793 | expect(document.entries.first.url).to eq("http://pauldix.net") 794 | end 795 | end 796 | end 797 | 798 | describe "when dealing with element names containing dashes" do 799 | it "converts dashes to underscores" do 800 | class Dashes 801 | include SAXMachine 802 | element :dashed_element 803 | end 804 | 805 | parsed = Dashes.parse("Text") 806 | expect(parsed.dashed_element).to eq "Text" 807 | end 808 | end 809 | 810 | describe "full example" do 811 | before do 812 | @xml = File.read("spec/fixtures/atom.xml") 813 | 814 | class AtomEntry 815 | include SAXMachine 816 | element :title 817 | element :name, as: :author 818 | element "feedburner:origLink", as: :url 819 | element :link, as: :alternate, value: :href, with: { type: "text/html", rel: "alternate" } 820 | element :summary 821 | element :content 822 | element :published 823 | end 824 | 825 | class Atom 826 | include SAXMachine 827 | element :title 828 | element :link, value: :href, as: :url, with: { type: "text/html" } 829 | element :link, value: :href, as: :feed_url, with: { type: "application/atom+xml" } 830 | elements :entry, as: :entries, class: AtomEntry 831 | end 832 | 833 | @feed = Atom.parse(@xml) 834 | end 835 | 836 | it "parses the url" do 837 | expect(@feed.url).to eq("http://www.pauldix.net/") 838 | end 839 | 840 | it "parses entry url" do 841 | expect(@feed.entries.first.url).to eq("http://www.pauldix.net/2008/09/marshal-data-to.html?param1=1¶m2=2") 842 | expect(@feed.entries.first.alternate).to eq("http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/383536354/marshal-data-to.html?param1=1¶m2=2") 843 | end 844 | 845 | it "parses content" do 846 | expect(@feed.entries.first.content.strip).to eq(File.read("spec/fixtures/atom-content.html").strip) 847 | end 848 | end 849 | 850 | describe "parsing a tree" do 851 | before do 852 | @xml = %[ 853 | 854 | 855 | First 856 | 857 | 858 | Second 859 | 860 | 861 | 862 | 863 | ] 864 | 865 | class CategoryCollection; end 866 | 867 | class Category 868 | include SAXMachine 869 | attr_accessor :id 870 | element :category, value: :id, as: :id 871 | element :title 872 | element :categories, as: :collection, class: CategoryCollection 873 | ancestor :ancestor 874 | end 875 | 876 | class CategoryCollection 877 | include SAXMachine 878 | elements :category, as: :categories, class: Category 879 | end 880 | 881 | @collection = CategoryCollection.parse(@xml) 882 | end 883 | 884 | it "parses the first category" do 885 | expect(@collection.categories.first.id).to eq("1") 886 | expect(@collection.categories.first.title).to eq("First") 887 | expect(@collection.categories.first.ancestor).to eq(@collection) 888 | end 889 | 890 | it "parses the nested category" do 891 | expect(@collection.categories.first.collection.categories.first.id).to eq("2") 892 | expect(@collection.categories.first.collection.categories.first.title).to eq("Second") 893 | end 894 | end 895 | 896 | describe "parsing a tree without a collection class" do 897 | before do 898 | @xml = %[ 899 | 900 | 901 | First 902 | 903 | 904 | Second 905 | 906 | 907 | 908 | 909 | ] 910 | 911 | class CategoryTree 912 | include SAXMachine 913 | attr_accessor :id 914 | element :category, value: :id, as: :id 915 | element :title 916 | elements :category, as: :categories, class: CategoryTree 917 | end 918 | 919 | @collection = CategoryTree.parse(@xml) 920 | end 921 | 922 | it "parses the first category" do 923 | expect(@collection.categories.first.id).to eq("1") 924 | expect(@collection.categories.first.title).to eq("First") 925 | end 926 | 927 | it "parses the nested category" do 928 | expect(@collection.categories.first.categories.first.id).to eq("2") 929 | expect(@collection.categories.first.categories.first.title).to eq("Second") 930 | end 931 | end 932 | 933 | describe "with element deeper inside the xml structure" do 934 | before do 935 | @xml = %[ 936 | 937 | 938 | Hello 939 | 940 | 941 | ] 942 | 943 | @klass = Class.new do 944 | include SAXMachine 945 | attr_accessor :id 946 | element :item, value: "id", as: :id 947 | element :title 948 | end 949 | 950 | @item = @klass.parse(@xml) 951 | end 952 | 953 | it "has an id" do 954 | expect(@item.id).to eq("1") 955 | end 956 | 957 | it "has a title" do 958 | expect(@item.title).to eq("Hello") 959 | end 960 | end 961 | 962 | describe "with config to pull multiple attributes" do 963 | before do 964 | @xml = %[ 965 | 966 | 967 | 968 | ] 969 | 970 | class AuthorElement 971 | include SAXMachine 972 | attribute :name 973 | attribute :role 974 | end 975 | 976 | class ItemElement 977 | include SAXMachine 978 | element :author, class: AuthorElement 979 | end 980 | 981 | @item = ItemElement.parse(@xml) 982 | end 983 | 984 | it "has the child element" do 985 | expect(@item.author).not_to be_nil 986 | end 987 | 988 | it "has the author name" do 989 | expect(@item.author.name).to eq("John Doe") 990 | end 991 | 992 | it "has the author role" do 993 | expect(@item.author.role).to eq("writer") 994 | end 995 | end 996 | 997 | describe "with multiple elements and multiple attributes" do 998 | before do 999 | @xml = %[ 1000 | 1001 | 1002 | 1003 | 1004 | ] 1005 | 1006 | class AuthorElement2 1007 | include SAXMachine 1008 | attribute :name 1009 | attribute :role 1010 | end 1011 | 1012 | class ItemElement2 1013 | include SAXMachine 1014 | elements :author, as: :authors, class: AuthorElement2 1015 | end 1016 | 1017 | @item = ItemElement2.parse(@xml) 1018 | end 1019 | 1020 | it "has the child elements" do 1021 | expect(@item.authors).not_to be_nil 1022 | expect(@item.authors.count).to eq(2) 1023 | end 1024 | 1025 | it "has the author names" do 1026 | expect(@item.authors.first.name).to eq("John Doe") 1027 | expect(@item.authors.last.name).to eq("Jane Doe") 1028 | end 1029 | 1030 | it "has the author roles" do 1031 | expect(@item.authors.first.role).to eq("writer") 1032 | expect(@item.authors.last.role).to eq("artist") 1033 | end 1034 | end 1035 | 1036 | describe "with mixed attributes and element values" do 1037 | before do 1038 | @xml = %[ 1039 | 1040 | John Doe 1041 | 1042 | ] 1043 | 1044 | class AuthorElement3 1045 | include SAXMachine 1046 | value :name 1047 | attribute :role 1048 | end 1049 | 1050 | class ItemElement3 1051 | include SAXMachine 1052 | element :author, class: AuthorElement3 1053 | end 1054 | 1055 | @item = ItemElement3.parse(@xml) 1056 | end 1057 | 1058 | it "has the child elements" do 1059 | expect(@item.author).not_to be_nil 1060 | end 1061 | 1062 | it "has the author names" do 1063 | expect(@item.author.name).to eq("John Doe") 1064 | end 1065 | 1066 | it "has the author roles" do 1067 | expect(@item.author.role).to eq("writer") 1068 | end 1069 | end 1070 | 1071 | describe "with multiple mixed attributes and element values" do 1072 | before do 1073 | @xml = %[ 1074 | 1075 | sweet 1076 | John Doe 1077 | Jane Doe 1078 | 1079 | ] 1080 | 1081 | class AuthorElement4 1082 | include SAXMachine 1083 | value :name 1084 | attribute :role 1085 | end 1086 | 1087 | class ItemElement4 1088 | include SAXMachine 1089 | element :title 1090 | elements :author, as: :authors, class: AuthorElement4 1091 | 1092 | def title=(blah) 1093 | @title = blah 1094 | end 1095 | end 1096 | 1097 | @item = ItemElement4.parse(@xml) 1098 | end 1099 | 1100 | it "has the title" do 1101 | expect(@item.title).to eq("sweet") 1102 | end 1103 | 1104 | it "has the child elements" do 1105 | expect(@item.authors).not_to be_nil 1106 | expect(@item.authors.count).to eq(2) 1107 | end 1108 | 1109 | it "has the author names" do 1110 | expect(@item.authors.first.name).to eq("John Doe") 1111 | expect(@item.authors.last.name).to eq("Jane Doe") 1112 | end 1113 | 1114 | it "has the author roles" do 1115 | expect(@item.authors.first.role).to eq("writer") 1116 | expect(@item.authors.last.role).to eq("artist") 1117 | end 1118 | end 1119 | 1120 | describe "with multiple elements with the same alias" do 1121 | let(:item) { ItemElement5.parse(xml) } 1122 | 1123 | before do 1124 | class ItemElement5 1125 | include SAXMachine 1126 | element :pubDate, as: :published 1127 | element :"dc:date", as: :published 1128 | end 1129 | end 1130 | 1131 | describe "only first defined" do 1132 | let(:xml) { "first value" } 1133 | 1134 | it "has first value" do 1135 | expect(item.published).to eq("first value") 1136 | end 1137 | end 1138 | 1139 | describe "only last defined" do 1140 | let(:xml) { "last value" } 1141 | 1142 | it "has last value" do 1143 | expect(item.published).to eq("last value") 1144 | end 1145 | end 1146 | 1147 | describe "both defined" do 1148 | let(:xml) { "first valuelast value" } 1149 | 1150 | it "has last value" do 1151 | expect(item.published).to eq("last value") 1152 | end 1153 | end 1154 | 1155 | describe "both defined but order is reversed" do 1156 | let(:xml) { "last valuefirst value" } 1157 | 1158 | it "has first value" do 1159 | expect(item.published).to eq("first value") 1160 | end 1161 | end 1162 | 1163 | describe "both defined but last is empty" do 1164 | let(:xml) { "first value" } 1165 | 1166 | it "has first value" do 1167 | expect(item.published).to eq("first value") 1168 | end 1169 | end 1170 | end 1171 | 1172 | describe "with error handling" do 1173 | before do 1174 | @xml = %[ 1175 | 1176 | sweet 1177 | ] 1178 | 1179 | class ItemElement5 1180 | include SAXMachine 1181 | element :title 1182 | end 1183 | 1184 | @errors = [] 1185 | @warnings = [] 1186 | @item = ItemElement5.parse( 1187 | @xml, 1188 | ->(x) { @errors << x }, 1189 | ->(x) { @warnings << x }, 1190 | ) 1191 | end 1192 | 1193 | it "has error" do 1194 | expect(@errors.uniq.size).to eq(1) 1195 | end 1196 | 1197 | it "has no warning" do 1198 | expect(@warnings.uniq.size).to eq(0) 1199 | end 1200 | end 1201 | 1202 | describe "with io as a input" do 1203 | before do 1204 | @io = StringIO.new('sweet') 1205 | 1206 | class IoParser 1207 | include SAXMachine 1208 | element :title 1209 | end 1210 | 1211 | @item = ItemElement5.parse(@io) 1212 | end 1213 | 1214 | it "parses" do 1215 | expect(@item.title).to eq("sweet") 1216 | end 1217 | end 1218 | end 1219 | --------------------------------------------------------------------------------