├── .gitignore ├── CHANGELOG ├── Gemfile ├── LICENSE ├── README.markdown ├── Rakefile ├── errata.gemspec ├── lib ├── errata.rb └── errata │ ├── erratum.rb │ ├── erratum │ ├── delete.rb │ ├── reject.rb │ ├── replace.rb │ ├── simplify.rb │ ├── transform.rb │ └── truncate.rb │ └── version.rb ├── rfc_editor.png └── test ├── helper.rb ├── models.rb ├── test_errata.rb └── test_old_style.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw? 2 | .DS_Store 3 | coverage 4 | doc 5 | pkg 6 | Gemfile.lock 7 | *.gem 8 | .yardoc -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 1.1.1 / 2012-05-11 2 | 3 | * Enhancements 4 | 5 | * Don't require a :responder if there are no conditions 6 | 7 | 1.1.0 / 2012-05-03 8 | 9 | * Breaking changes 10 | 11 | * Errata#options is no longer available for introspection. Use Errata#lazy_load_table_options and Errata#responder if that helps. 12 | 13 | * Enhancements 14 | 15 | * Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7+ 16 | * Modernize Rakefile and test helper 17 | * Got rid of autoload and made thread-safe in general 18 | * Convert to minitest 19 | * Strictly correct lazy-loading 20 | * Added warnings for deprecated usage styles. 21 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source :rubygems 2 | 3 | gemspec 4 | 5 | # development dependencies 6 | gem 'minitest' 7 | gem 'minitest-reporters' 8 | gem 'rake' 9 | gem 'yard' 10 | unless RUBY_VERSION >= '1.9' 11 | gem 'fastercsv' 12 | end 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Brighter Planet 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # errata 2 | 3 | Define an errata in table format (CSV) and then apply it to an arbitrary source. Inspired by RFC Errata, lets you keep your own errata in a transparent way. 4 | 5 | Tested in MRI 1.8.7+, MRI 1.9.2+, and JRuby 1.6.7+. Thread safe. 6 | 7 | ## Inspiration 8 | 9 | There's a process for reporting errata on RFC: 10 | 11 | * [RFC Errata](http://www.rfc-editor.org/errata.php) 12 | * [Status and Type Descriptions for RFC Errata](http://www.rfc-editor.org/status_type_desc.html) 13 | * [How to report errata](http://www.rfc-editor.org/how_to_report.html) 14 | 15 |

screenshot of the RFC Editor

16 | 17 | ## Example 18 | 19 | Every errata has a table structure based on the [IETF RFC Editor's "How to Report Errata"](http://www.rfc-editor.org/how_to_report.html). 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 |
datenameemailtypesectionactionxyconditionnotes
2011-03-22Ian Houghian@brighterplanet.commetaIntended usehttp://example.com/original-data-with-errors.xlsA hypothetical document that uses non-ISO country names
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/ANTIGUA & BARBUDA/ANTIGUA AND BARBUDA
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/BOLIVIA/BOLIVIA, PLURINATIONAL STATE OF
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/BOSNIA & HERZEGOVINA/BOSNIA AND HERZEGOVINA
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/BRITISH VIRGIN ISLANDS/VIRGIN ISLANDS, BRITISH
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/COTE D'IVOIRE/CÔTE D'IVOIRE
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/DEM\. PEOPLE'S REP\. OF KOREA/KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/DEM\. REP\. OF THE CONGO/CONGO, THE DEMOCRATIC REPUBLIC OF THE
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/HONG KONG SAR/HONG KONG
2011-03-22Ian Houghian@brighterplanet.comtechnicalCountry Namereplace/IRAN \(ISLAMIC REPUBLIC OF\)/IRAN, ISLAMIC REPUBLIC OF
155 | 156 | Which would be saved as a CSV: 157 | 158 | date,name,email,type,section,action,x,y,condition,notes 159 | 2011-03-22,Ian Hough,ian@brighterplanet.com,meta,Intended use,,http://example.com/original-data-with-errors.xls,,A hypothetical document that uses non-ISO country names 160 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/ANTIGUA & BARBUDA/,ANTIGUA AND BARBUDA,, 161 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BOLIVIA/,"BOLIVIA, PLURINATIONAL STATE OF",, 162 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BOSNIA & HERZEGOVINA/,BOSNIA AND HERZEGOVINA,, 163 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BRITISH VIRGIN ISLANDS/,"VIRGIN ISLANDS, BRITISH",, 164 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/COTE D'IVOIRE/,CÔTE D'IVOIRE,, 165 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/DEM\. PEOPLE'S REP\. OF KOREA/,"KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",, 166 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/DEM\. REP\. OF THE CONGO/,"CONGO, THE DEMOCRATIC REPUBLIC OF THE",, 167 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/HONG KONG SAR/,HONG KONG,, 168 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/IRAN \(ISLAMIC REPUBLIC OF\)/,"IRAN, ISLAMIC REPUBLIC OF",, 169 | 170 | And then used 171 | 172 | errata = Errata.new(:url => 'http://example.com/errata.csv') 173 | original = RemoteTable.new(:url => 'http://example.com/original-data-with-errors.xls') 174 | original.each do |row| 175 | errata.correct! row # destructively correct each row 176 | end 177 | 178 | ## UTF-8 179 | 180 | Assumes all input strings are UTF-8. Otherwise there can be problems with Ruby 1.9 and Regexp::FIXEDENCODING. Specifically, ASCII-8BIT regexps might be applied to UTF-8 strings (or vice-versa), resulting in Encoding::CompatibilityError. 181 | 182 | ## More advanced usage 183 | 184 | The [`earth` library](https://github.com/brighterplanet/earth) has dozens of real-life examples showing errata in action: 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 |
ModelReferenceErrata file
Countrydata_miner.rbwri_errata.csv
Aircraftdata_miner.rbfaa_errata.csv
Airportsdata_miner.rbopenflights_errata.csv
Automobile model variantsdata_miner.rbfeg_errata.csv
213 | 214 | ## Real-world usage 215 | 216 |

Brighter Planet logo

217 | 218 | We use `errata` for [data science at Brighter Planet](http://brighterplanet.com/research) and in production at 219 | 220 | * [Brighter Planet's reference data web service](http://data.brighterplanet.com) 221 | * [Brighter Planet's impact estimate web service](http://impact.brighterplanet.com) 222 | 223 | The killer combination: 224 | 225 | 1. [`active_record_inline_schema`](https://github.com/seamusabshere/active_record_inline_schema) - define table structure 226 | 2. [`remote_table`](https://github.com/seamusabshere/remote_table) - download data and parse it 227 | 3. [`errata`](https://github.com/seamusabshere/errata) (this library!) - apply corrections in a transparent way 228 | 4. [`data_miner`](https://github.com/seamusabshere/remote_table) - import data idempotently 229 | 230 | ## Authors 231 | 232 | * Seamus Abshere 233 | * Andy Rossmeissl 234 | * Ian Hough 235 | 236 | ## Copyright 237 | 238 | Copyright (c) 2012 Brighter Planet. See LICENSE for details. 239 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | require "bundler/gem_tasks" 3 | 4 | require 'rake' 5 | require 'rake/testtask' 6 | Rake::TestTask.new(:test) do |test| 7 | test.libs << 'test' 8 | test.pattern = 'test/**/test_*.rb' 9 | test.verbose = true 10 | end 11 | 12 | require 'yard' 13 | YARD::Rake::YardocTask.new do |y| 14 | y.options << '--no-private' 15 | end 16 | 17 | task :default => :test 18 | -------------------------------------------------------------------------------- /errata.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path("../lib/errata/version", __FILE__) 3 | 4 | Gem::Specification.new do |s| 5 | s.name = "errata" 6 | s.version = Errata::VERSION 7 | s.authors = ["Seamus Abshere", "Andy Rossmeissl"] 8 | s.email = ["seamus@abshere.net"] 9 | s.homepage = "https://github.com/seamusabshere/errata" 10 | s.summary = "Correct strings based on remote errata files" 11 | s.description = %q{Correct strings based on remote errata files.} 12 | 13 | s.rubyforge_project = "errata" 14 | 15 | s.files = `git ls-files`.split("\n") 16 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 17 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 18 | s.require_paths = ["lib"] 19 | 20 | s.add_dependency 'activesupport', '>=2.3.4' 21 | s.add_dependency 'remote_table', '>=1.1.7' 22 | s.add_dependency 'to_regexp', '>= 0.0.2' 23 | end 24 | -------------------------------------------------------------------------------- /lib/errata.rb: -------------------------------------------------------------------------------- 1 | require 'thread' 2 | 3 | require 'active_support' 4 | require 'active_support/version' 5 | if ::ActiveSupport::VERSION::MAJOR >= 3 6 | require 'active_support/core_ext' 7 | end 8 | require 'remote_table' 9 | 10 | require 'errata/erratum' 11 | 12 | class Errata 13 | CORRECTIONS = %w{delete replace simplify transform truncate reject} 14 | 15 | attr_reader :lazy_load_table_options 16 | attr_reader :lazy_load_responder_class_name 17 | 18 | # Arguments 19 | # * :responder (required) - normally you pass this something like Guru.new, which should respond to questions like #is_a_bentley?. If you pass a string, it will be lazily constantized and a new object initialized from it; for example, 'Guru' will lead to 'Guru'.constantize.new. 20 | # * :table - takes something that acts like a RemoteTable 21 | # If and only if you don't pass :table, all other options will be passed to a new RemoteTable (for example, :url, etc.) 22 | def initialize(options = {}) 23 | options = options.symbolize_keys 24 | 25 | responder = options.delete :responder 26 | if responder.is_a?(::String) 27 | @lazy_load_responder_mutex = ::Mutex.new 28 | @lazy_load_responder_class_name = responder 29 | elsif responder 30 | ::Kernel.warn %{[errata] Passing an object as :responder is deprecated. It's recommended to pass a class name instead, which will be constantized and instantiated with no arguments.} 31 | @responder = responder 32 | else 33 | @no_responder = true 34 | end 35 | 36 | if table = options.delete(:table) 37 | ::Kernel.warn %{[errata] Passing :table is deprecated. It's recommended to pass table options instead.} 38 | @table = table 39 | else 40 | @lazy_load_table_options = options 41 | end 42 | 43 | @set_rejections_and_corrections_mutex = ::Mutex.new 44 | end 45 | 46 | def rejects?(row) 47 | rejections.any? { |erratum| erratum.targets?(row) } 48 | end 49 | 50 | def correct!(row) 51 | corrections.each { |erratum| erratum.correct!(row) } 52 | nil 53 | end 54 | 55 | def responder 56 | return if @no_responder == true 57 | @responder || @lazy_load_responder_mutex.synchronize do 58 | @responder ||= lazy_load_responder_class_name.constantize.new 59 | end 60 | end 61 | 62 | private 63 | 64 | def set_rejections_and_corrections! 65 | return if @set_rejections_and_corrections == true 66 | @set_rejections_and_corrections_mutex.synchronize do 67 | return if @set_rejections_and_corrections == true 68 | 69 | if @table 70 | table = @table 71 | @table = nil # won't need this again 72 | else 73 | table = ::RemoteTable.new lazy_load_table_options 74 | end 75 | 76 | rejections = [] 77 | corrections = [] 78 | 79 | table.each do |erratum_initializer| 80 | erratum_initializer = erratum_initializer.symbolize_keys 81 | action = erratum_initializer[:action].downcase 82 | if action == 'reject' 83 | rejections << Erratum::Reject.new(responder, erratum_initializer) 84 | elsif CORRECTIONS.include?(action) 85 | corrections << Erratum.const_get(action.camelcase).new(responder, erratum_initializer) 86 | end 87 | end 88 | 89 | @rejections = rejections 90 | @corrections = corrections 91 | @set_rejections_and_corrections = true 92 | end 93 | end 94 | 95 | def rejections 96 | set_rejections_and_corrections! 97 | @rejections 98 | end 99 | 100 | def corrections 101 | set_rejections_and_corrections! 102 | @corrections 103 | end 104 | end 105 | -------------------------------------------------------------------------------- /lib/errata/erratum.rb: -------------------------------------------------------------------------------- 1 | require 'to_regexp' 2 | 3 | require 'errata/erratum/delete' 4 | require 'errata/erratum/reject' 5 | require 'errata/erratum/replace' 6 | require 'errata/erratum/simplify' 7 | require 'errata/erratum/transform' 8 | require 'errata/erratum/truncate' 9 | 10 | class Errata 11 | class Erratum 12 | SEMICOLON_DELIMITER = /\s*;\s*/ 13 | SPECIAL_ABBR = /\Aabbr\((.*)\)\z/ 14 | REJECT_ACTIONS = %w{reject truncate} 15 | 16 | attr_reader :responder 17 | attr_reader :section 18 | attr_reader :matching_methods 19 | attr_reader :matching_expression 20 | 21 | def initialize(responder, options = {}) 22 | @responder = responder 23 | @section = options[:section] 24 | @matching_methods = options[:condition].split(SEMICOLON_DELIMITER).map do |method_id| 25 | method_id.strip.gsub(/\W/, '_').downcase + '?' 26 | end 27 | if @matching_methods.any? and @responder.nil? 28 | raise ::ArgumentError, %{[errata] Conditions like #{@matching_methods.first.inspect} used, but no :responder defined} 29 | end 30 | @matching_expression = if options[:x].blank? 31 | nil 32 | elsif (options[:x].start_with?('/') or options[:x].start_with?('%r{')) and as_regexp = options[:x].as_regexp 33 | ::Regexp.new(*as_regexp) 34 | elsif SPECIAL_ABBR.match options[:x] 35 | @abbr_query = true 36 | abbr = $1.split(/(\w\??)/).reject { |a| a == '' }.join('\.?\s?') + '\.?([^\w\.]|\z)' 37 | expr = '(\A|\s)' + abbr 38 | ::Regexp.new expr, true 39 | elsif REJECT_ACTIONS.include? options[:action] 40 | expr = '\A\s*' + ::Regexp.escape(options[:x]) 41 | ::Regexp.new expr, true 42 | else 43 | options[:x] 44 | end 45 | end 46 | 47 | def abbr? 48 | @abbr_query == true 49 | end 50 | 51 | def targets?(row) 52 | !!(conditions_match?(row) and expression_matches?(row)) 53 | end 54 | 55 | def expression_matches?(row) 56 | return true if matching_expression.blank? or section.blank? 57 | case matching_expression 58 | when ::Regexp 59 | matching_expression.match row[section].to_s 60 | when ::String 61 | row[section].to_s.include? matching_expression 62 | end 63 | end 64 | 65 | def conditions_match?(row) 66 | matching_methods.all? { |method_id| responder.send method_id, row } 67 | end 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /lib/errata/erratum/delete.rb: -------------------------------------------------------------------------------- 1 | class Errata 2 | class Erratum 3 | class Delete < Erratum 4 | attr_reader :backfill 5 | 6 | def initialize(responder, options = {}) 7 | super 8 | # otherwise abbr(X) will kill the characters before and after the match 9 | @backfill = if abbr? 10 | '\1\2' 11 | else 12 | '' 13 | end 14 | end 15 | 16 | def correct!(row) 17 | if targets? row 18 | row[section].gsub! matching_expression, backfill 19 | end 20 | end 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/errata/erratum/reject.rb: -------------------------------------------------------------------------------- 1 | class Errata 2 | class Erratum 3 | class Reject < Erratum 4 | end 5 | end 6 | end 7 | -------------------------------------------------------------------------------- /lib/errata/erratum/replace.rb: -------------------------------------------------------------------------------- 1 | class Errata 2 | class Erratum 3 | class Replace < Erratum 4 | attr_reader :correction 5 | 6 | def initialize(responder, options = {}) 7 | super 8 | @correction = if abbr? 9 | '\1' + options[:y].to_s + '\2' 10 | else 11 | options[:y].to_s 12 | end 13 | end 14 | 15 | def correct!(row) 16 | if targets? row 17 | if matching_expression.blank? 18 | row[section] = correction.dup 19 | else 20 | row[section].gsub! matching_expression, correction 21 | end 22 | end 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/errata/erratum/simplify.rb: -------------------------------------------------------------------------------- 1 | class Errata 2 | class Erratum 3 | class Simplify < Erratum 4 | attr_reader :second_section 5 | 6 | def initialize(responder, options = {}) 7 | super 8 | @second_section = options[:x] 9 | end 10 | 11 | def targets?(row) 12 | !row[section].blank? and !row[second_section].blank? and conditions_match?(row) and special_matcher(row).match(row[section]) 13 | end 14 | 15 | def correct!(row) 16 | if targets? row 17 | row[section].gsub! special_matcher(row), '' 18 | end 19 | end 20 | 21 | private 22 | 23 | def special_matcher(row) 24 | /[\s\(\[\'\"]*#{::Regexp.escape(row[second_section])}[\s\)\]\'\"]*/ 25 | end 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/errata/erratum/transform.rb: -------------------------------------------------------------------------------- 1 | class Errata 2 | class Erratum 3 | class Transform < Erratum 4 | ALLOWED_METHODS = %w{upcase downcase} 5 | 6 | attr_reader :string_method 7 | 8 | def initialize(responder, options = {}) 9 | super 10 | @string_method = options[:y] 11 | raise %{[errata] Method "#{@string_method}" not allowed} unless ALLOWED_METHODS.include? @string_method 12 | end 13 | 14 | def correct!(row) 15 | if targets? row 16 | row[section].gsub!(matching_expression) { |match| match.send string_method } 17 | end 18 | end 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/errata/erratum/truncate.rb: -------------------------------------------------------------------------------- 1 | class Errata 2 | class Erratum 3 | class Truncate < Erratum 4 | attr_reader :necessary_and_sufficient_prefix 5 | 6 | def initialize(responder, options = {}) 7 | super 8 | @necessary_and_sufficient_prefix = options[:x] 9 | end 10 | 11 | def correct!(row) 12 | if targets? row 13 | row[section] = necessary_and_sufficient_prefix.dup 14 | end 15 | end 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/errata/version.rb: -------------------------------------------------------------------------------- 1 | class Errata 2 | VERSION = '1.1.1' 3 | end 4 | -------------------------------------------------------------------------------- /rfc_editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seamusabshere/errata/1666873d1c75fecdbaa86d090f386d81f3831c28/rfc_editor.png -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler/setup' 3 | 4 | if Bundler.definition.specs['ruby-debug19'].first or Bundler.definition.specs['ruby-debug'].first 5 | require 'ruby-debug' 6 | end 7 | 8 | require 'minitest/spec' 9 | require 'minitest/autorun' 10 | require 'minitest/reporters' 11 | MiniTest::Unit.runner = MiniTest::SuiteRunner.new 12 | MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new 13 | 14 | unless RUBY_VERSION >= '1.9' 15 | require 'fastercsv' 16 | end 17 | 18 | require 'errata' 19 | -------------------------------------------------------------------------------- /test/models.rb: -------------------------------------------------------------------------------- 1 | class AutomobileVariantGuru 2 | def transmission_is_blank?(row) 3 | row['transmission'].blank? 4 | end 5 | 6 | def is_a_2007_gmc_or_chevrolet?(row) 7 | row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase 8 | end 9 | 10 | def is_a_porsche?(row) 11 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'PORSCHE' } 12 | end 13 | 14 | def is_not_a_porsche?(row) 15 | !is_a_porsche? row 16 | end 17 | 18 | def is_a_mercedes_benz?(row) 19 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v =~ /MERCEDES/i } 20 | end 21 | 22 | def is_a_lexus?(row) 23 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'LEXUS' } 24 | end 25 | 26 | def is_a_bmw?(row) 27 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'BMW' } 28 | end 29 | 30 | def is_a_ford?(row) 31 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'FORD' } 32 | end 33 | 34 | def is_a_bentley?(row) 35 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'BENTLEY' } 36 | end 37 | 38 | def is_a_rolls_royce?(row) 39 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v =~ /ROLLS/i } 40 | end 41 | 42 | def is_a_turbo_brooklands?(row) 43 | row.slice('CAR LINE', 'carline name', 'carline_name').any? { |k, v| v =~ /TURBO R\/RL BKLDS/i } 44 | end 45 | 46 | def model_contains_maybach?(row) 47 | row.slice('CAR LINE', 'carline name', 'carline_name').any? { |k, v| v =~ /MAYBACH/i } 48 | end 49 | 50 | def model_contains_bentley?(row) 51 | row.slice('CAR LINE', 'carline name', 'carline_name').any? { |k, v| v =~ /BENTLEY/i } 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /test/test_errata.rb: -------------------------------------------------------------------------------- 1 | require 'helper' 2 | require 'models' 3 | 4 | describe Errata do 5 | describe 'without responder' do 6 | it "doesn't require a responder" do 7 | e = Errata.new :url => 'https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdHEtemF2YTZzdGRYbE1MTHFMRXpRUHc&single=true&gid=0&output=csv' 8 | row = { 'name' => 'denver intl airport' } 9 | e.correct! row 10 | row['name'].must_equal 'denver International airport' 11 | end 12 | end 13 | 14 | describe 'with conditions' do 15 | it "uses a responder to answer conditions" do 16 | eval %{ 17 | class ColoradoGuru 18 | def is_denver_airport?(record) 19 | record['name'].to_s.downcase.include? 'denver' 20 | end 21 | end 22 | } 23 | e = Errata.new( 24 | :url => 'https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdG9PcFBjVnE4SGpLVXNTakVhSFY2VFE&single=true&gid=0&output=csv', 25 | :responder => 'ColoradoGuru' 26 | ) 27 | row = { 'name' => 'denver intl airport' } 28 | e.correct! row 29 | row['name'].must_equal 'denver International airport' # matched condition 30 | row = { 'name' => 'madison intl airport' } 31 | e.correct! row 32 | row['name'].must_equal 'madison intl airport' # didn't match 33 | end 34 | 35 | it "blows up if you have conditions but no responder" do 36 | e = Errata.new :url => 'https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdG9PcFBjVnE4SGpLVXNTakVhSFY2VFE&single=true&gid=0&output=csv' 37 | row = { 'name' => 'denver intl airport' } 38 | lambda do 39 | e.correct! row 40 | end.must_raise ArgumentError, /conditions.*used/i 41 | end 42 | end 43 | 44 | describe 'to correct automobile model details' do 45 | before do 46 | @e = Errata.new :url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg', 47 | :responder => 'AutomobileVariantGuru' 48 | end 49 | 50 | it "corrects rows" do 51 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" } 52 | @e.correct!(alfa) 53 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo' 54 | end 55 | 56 | it "rejects rows" do 57 | @e.rejects?('carline_mfr_name' => 'AURORA CARS').must_equal true 58 | end 59 | 60 | it "tries multiple conditions" do 61 | bentley = { 'carline_mfr_name' => 'ROLLS-ROYCE BENTLEY', "carline name" => 'Super Bentley' } 62 | @e.correct!(bentley) 63 | bentley['carline_mfr_name'].must_equal 'Bentley' 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /test/test_old_style.rb: -------------------------------------------------------------------------------- 1 | require 'helper' 2 | require 'models' 3 | 4 | describe 'old-style Errata usage' do 5 | before do 6 | @e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'), 7 | :responder => AutomobileVariantGuru.new 8 | end 9 | 10 | it "corrects rows" do 11 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" } 12 | @e.correct!(alfa) 13 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo' 14 | end 15 | 16 | it "rejects rows" do 17 | @e.rejects?('carline_mfr_name' => 'AURORA CARS').must_equal true 18 | end 19 | 20 | it "lazily constantizes and initializes responder" do 21 | e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'), 22 | :responder => 'AutomobileVariantGuru' 23 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" } 24 | e.correct!(alfa) 25 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo' 26 | end 27 | 28 | it "passes options to RemoteTable if no :table is specified" do 29 | e = Errata.new :url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg', 30 | :responder => AutomobileVariantGuru.new 31 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" } 32 | e.correct!(alfa) 33 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo' 34 | end 35 | 36 | it "tries multiple conditions" do 37 | bentley = { 'carline_mfr_name' => 'ROLLS-ROYCE BENTLEY', "carline name" => 'Super Bentley' } 38 | @e.correct!(bentley) 39 | bentley['carline_mfr_name'].must_equal 'Bentley' 40 | end 41 | end 42 | --------------------------------------------------------------------------------