├── .gitignore
├── CHANGELOG
├── Gemfile
├── LICENSE
├── README.markdown
├── Rakefile
├── errata.gemspec
├── lib
├── errata.rb
└── errata
│ ├── erratum.rb
│ ├── erratum
│ ├── delete.rb
│ ├── reject.rb
│ ├── replace.rb
│ ├── simplify.rb
│ ├── transform.rb
│ └── truncate.rb
│ └── version.rb
├── rfc_editor.png
└── test
├── helper.rb
├── models.rb
├── test_errata.rb
└── test_old_style.rb
/.gitignore:
--------------------------------------------------------------------------------
1 | *.sw?
2 | .DS_Store
3 | coverage
4 | doc
5 | pkg
6 | Gemfile.lock
7 | *.gem
8 | .yardoc
--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
1 | 1.1.1 / 2012-05-11
2 |
3 | * Enhancements
4 |
5 | * Don't require a :responder if there are no conditions
6 |
7 | 1.1.0 / 2012-05-03
8 |
9 | * Breaking changes
10 |
11 | * Errata#options is no longer available for introspection. Use Errata#lazy_load_table_options and Errata#responder if that helps.
12 |
13 | * Enhancements
14 |
15 | * Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7+
16 | * Modernize Rakefile and test helper
17 | * Got rid of autoload and made thread-safe in general
18 | * Convert to minitest
19 | * Strictly correct lazy-loading
20 | * Added warnings for deprecated usage styles.
21 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source :rubygems
2 |
3 | gemspec
4 |
5 | # development dependencies
6 | gem 'minitest'
7 | gem 'minitest-reporters'
8 | gem 'rake'
9 | gem 'yard'
10 | unless RUBY_VERSION >= '1.9'
11 | gem 'fastercsv'
12 | end
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011 Brighter Planet
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
1 | # errata
2 |
3 | Define an errata in table format (CSV) and then apply it to an arbitrary source. Inspired by RFC Errata, lets you keep your own errata in a transparent way.
4 |
5 | Tested in MRI 1.8.7+, MRI 1.9.2+, and JRuby 1.6.7+. Thread safe.
6 |
7 | ## Inspiration
8 |
9 | There's a process for reporting errata on RFC:
10 |
11 | * [RFC Errata](http://www.rfc-editor.org/errata.php)
12 | * [Status and Type Descriptions for RFC Errata](http://www.rfc-editor.org/status_type_desc.html)
13 | * [How to report errata](http://www.rfc-editor.org/how_to_report.html)
14 |
15 |

16 |
17 | ## Example
18 |
19 | Every errata has a table structure based on the [IETF RFC Editor's "How to Report Errata"](http://www.rfc-editor.org/how_to_report.html).
20 |
21 |
22 |
23 | date |
24 | name |
25 | email |
26 | type |
27 | section |
28 | action |
29 | x |
30 | y |
31 | condition |
32 | notes |
33 |
34 |
35 | 2011-03-22 |
36 | Ian Hough |
37 | ian@brighterplanet.com |
38 | meta |
39 | Intended use |
40 | |
41 | http://example.com/original-data-with-errors.xls |
42 | |
43 | |
44 | A hypothetical document that uses non-ISO country names |
45 |
46 |
47 | 2011-03-22 |
48 | Ian Hough |
49 | ian@brighterplanet.com |
50 | technical |
51 | Country Name |
52 | replace |
53 | /ANTIGUA & BARBUDA/ |
54 | ANTIGUA AND BARBUDA |
55 | |
56 | |
57 |
58 |
59 | 2011-03-22 |
60 | Ian Hough |
61 | ian@brighterplanet.com |
62 | technical |
63 | Country Name |
64 | replace |
65 | /BOLIVIA/ |
66 | BOLIVIA, PLURINATIONAL STATE OF |
67 | |
68 | |
69 |
70 |
71 | 2011-03-22 |
72 | Ian Hough |
73 | ian@brighterplanet.com |
74 | technical |
75 | Country Name |
76 | replace |
77 | /BOSNIA & HERZEGOVINA/ |
78 | BOSNIA AND HERZEGOVINA |
79 | |
80 | |
81 |
82 |
83 | 2011-03-22 |
84 | Ian Hough |
85 | ian@brighterplanet.com |
86 | technical |
87 | Country Name |
88 | replace |
89 | /BRITISH VIRGIN ISLANDS/ |
90 | VIRGIN ISLANDS, BRITISH |
91 | |
92 | |
93 |
94 |
95 | 2011-03-22 |
96 | Ian Hough |
97 | ian@brighterplanet.com |
98 | technical |
99 | Country Name |
100 | replace |
101 | /COTE D'IVOIRE/ |
102 | CÔTE D'IVOIRE |
103 | |
104 | |
105 |
106 |
107 | 2011-03-22 |
108 | Ian Hough |
109 | ian@brighterplanet.com |
110 | technical |
111 | Country Name |
112 | replace |
113 | /DEM\. PEOPLE'S REP\. OF KOREA/ |
114 | KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF |
115 | |
116 | |
117 |
118 |
119 | 2011-03-22 |
120 | Ian Hough |
121 | ian@brighterplanet.com |
122 | technical |
123 | Country Name |
124 | replace |
125 | /DEM\. REP\. OF THE CONGO/ |
126 | CONGO, THE DEMOCRATIC REPUBLIC OF THE |
127 | |
128 | |
129 |
130 |
131 | 2011-03-22 |
132 | Ian Hough |
133 | ian@brighterplanet.com |
134 | technical |
135 | Country Name |
136 | replace |
137 | /HONG KONG SAR/ |
138 | HONG KONG |
139 | |
140 | |
141 |
142 |
143 | 2011-03-22 |
144 | Ian Hough |
145 | ian@brighterplanet.com |
146 | technical |
147 | Country Name |
148 | replace |
149 | /IRAN \(ISLAMIC REPUBLIC OF\)/ |
150 | IRAN, ISLAMIC REPUBLIC OF |
151 | |
152 | |
153 |
154 |
155 |
156 | Which would be saved as a CSV:
157 |
158 | date,name,email,type,section,action,x,y,condition,notes
159 | 2011-03-22,Ian Hough,ian@brighterplanet.com,meta,Intended use,,http://example.com/original-data-with-errors.xls,,A hypothetical document that uses non-ISO country names
160 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/ANTIGUA & BARBUDA/,ANTIGUA AND BARBUDA,,
161 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BOLIVIA/,"BOLIVIA, PLURINATIONAL STATE OF",,
162 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BOSNIA & HERZEGOVINA/,BOSNIA AND HERZEGOVINA,,
163 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BRITISH VIRGIN ISLANDS/,"VIRGIN ISLANDS, BRITISH",,
164 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/COTE D'IVOIRE/,CÔTE D'IVOIRE,,
165 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/DEM\. PEOPLE'S REP\. OF KOREA/,"KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",,
166 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/DEM\. REP\. OF THE CONGO/,"CONGO, THE DEMOCRATIC REPUBLIC OF THE",,
167 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/HONG KONG SAR/,HONG KONG,,
168 | 2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/IRAN \(ISLAMIC REPUBLIC OF\)/,"IRAN, ISLAMIC REPUBLIC OF",,
169 |
170 | And then used
171 |
172 | errata = Errata.new(:url => 'http://example.com/errata.csv')
173 | original = RemoteTable.new(:url => 'http://example.com/original-data-with-errors.xls')
174 | original.each do |row|
175 | errata.correct! row # destructively correct each row
176 | end
177 |
178 | ## UTF-8
179 |
180 | Assumes all input strings are UTF-8. Otherwise there can be problems with Ruby 1.9 and Regexp::FIXEDENCODING. Specifically, ASCII-8BIT regexps might be applied to UTF-8 strings (or vice-versa), resulting in Encoding::CompatibilityError.
181 |
182 | ## More advanced usage
183 |
184 | The [`earth` library](https://github.com/brighterplanet/earth) has dozens of real-life examples showing errata in action:
185 |
186 |
213 |
214 | ## Real-world usage
215 |
216 | 
217 |
218 | We use `errata` for [data science at Brighter Planet](http://brighterplanet.com/research) and in production at
219 |
220 | * [Brighter Planet's reference data web service](http://data.brighterplanet.com)
221 | * [Brighter Planet's impact estimate web service](http://impact.brighterplanet.com)
222 |
223 | The killer combination:
224 |
225 | 1. [`active_record_inline_schema`](https://github.com/seamusabshere/active_record_inline_schema) - define table structure
226 | 2. [`remote_table`](https://github.com/seamusabshere/remote_table) - download data and parse it
227 | 3. [`errata`](https://github.com/seamusabshere/errata) (this library!) - apply corrections in a transparent way
228 | 4. [`data_miner`](https://github.com/seamusabshere/remote_table) - import data idempotently
229 |
230 | ## Authors
231 |
232 | * Seamus Abshere
233 | * Andy Rossmeissl
234 | * Ian Hough
235 |
236 | ## Copyright
237 |
238 | Copyright (c) 2012 Brighter Planet. See LICENSE for details.
239 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env rake
2 | require "bundler/gem_tasks"
3 |
4 | require 'rake'
5 | require 'rake/testtask'
6 | Rake::TestTask.new(:test) do |test|
7 | test.libs << 'test'
8 | test.pattern = 'test/**/test_*.rb'
9 | test.verbose = true
10 | end
11 |
12 | require 'yard'
13 | YARD::Rake::YardocTask.new do |y|
14 | y.options << '--no-private'
15 | end
16 |
17 | task :default => :test
18 |
--------------------------------------------------------------------------------
/errata.gemspec:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | require File.expand_path("../lib/errata/version", __FILE__)
3 |
4 | Gem::Specification.new do |s|
5 | s.name = "errata"
6 | s.version = Errata::VERSION
7 | s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
8 | s.email = ["seamus@abshere.net"]
9 | s.homepage = "https://github.com/seamusabshere/errata"
10 | s.summary = "Correct strings based on remote errata files"
11 | s.description = %q{Correct strings based on remote errata files.}
12 |
13 | s.rubyforge_project = "errata"
14 |
15 | s.files = `git ls-files`.split("\n")
16 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18 | s.require_paths = ["lib"]
19 |
20 | s.add_dependency 'activesupport', '>=2.3.4'
21 | s.add_dependency 'remote_table', '>=1.1.7'
22 | s.add_dependency 'to_regexp', '>= 0.0.2'
23 | end
24 |
--------------------------------------------------------------------------------
/lib/errata.rb:
--------------------------------------------------------------------------------
1 | require 'thread'
2 |
3 | require 'active_support'
4 | require 'active_support/version'
5 | if ::ActiveSupport::VERSION::MAJOR >= 3
6 | require 'active_support/core_ext'
7 | end
8 | require 'remote_table'
9 |
10 | require 'errata/erratum'
11 |
12 | class Errata
13 | CORRECTIONS = %w{delete replace simplify transform truncate reject}
14 |
15 | attr_reader :lazy_load_table_options
16 | attr_reader :lazy_load_responder_class_name
17 |
18 | # Arguments
19 | # * :responder (required) - normally you pass this something like Guru.new, which should respond to questions like #is_a_bentley?. If you pass a string, it will be lazily constantized and a new object initialized from it; for example, 'Guru' will lead to 'Guru'.constantize.new.
20 | # * :table - takes something that acts like a RemoteTable
21 | # If and only if you don't pass :table, all other options will be passed to a new RemoteTable (for example, :url, etc.)
22 | def initialize(options = {})
23 | options = options.symbolize_keys
24 |
25 | responder = options.delete :responder
26 | if responder.is_a?(::String)
27 | @lazy_load_responder_mutex = ::Mutex.new
28 | @lazy_load_responder_class_name = responder
29 | elsif responder
30 | ::Kernel.warn %{[errata] Passing an object as :responder is deprecated. It's recommended to pass a class name instead, which will be constantized and instantiated with no arguments.}
31 | @responder = responder
32 | else
33 | @no_responder = true
34 | end
35 |
36 | if table = options.delete(:table)
37 | ::Kernel.warn %{[errata] Passing :table is deprecated. It's recommended to pass table options instead.}
38 | @table = table
39 | else
40 | @lazy_load_table_options = options
41 | end
42 |
43 | @set_rejections_and_corrections_mutex = ::Mutex.new
44 | end
45 |
46 | def rejects?(row)
47 | rejections.any? { |erratum| erratum.targets?(row) }
48 | end
49 |
50 | def correct!(row)
51 | corrections.each { |erratum| erratum.correct!(row) }
52 | nil
53 | end
54 |
55 | def responder
56 | return if @no_responder == true
57 | @responder || @lazy_load_responder_mutex.synchronize do
58 | @responder ||= lazy_load_responder_class_name.constantize.new
59 | end
60 | end
61 |
62 | private
63 |
64 | def set_rejections_and_corrections!
65 | return if @set_rejections_and_corrections == true
66 | @set_rejections_and_corrections_mutex.synchronize do
67 | return if @set_rejections_and_corrections == true
68 |
69 | if @table
70 | table = @table
71 | @table = nil # won't need this again
72 | else
73 | table = ::RemoteTable.new lazy_load_table_options
74 | end
75 |
76 | rejections = []
77 | corrections = []
78 |
79 | table.each do |erratum_initializer|
80 | erratum_initializer = erratum_initializer.symbolize_keys
81 | action = erratum_initializer[:action].downcase
82 | if action == 'reject'
83 | rejections << Erratum::Reject.new(responder, erratum_initializer)
84 | elsif CORRECTIONS.include?(action)
85 | corrections << Erratum.const_get(action.camelcase).new(responder, erratum_initializer)
86 | end
87 | end
88 |
89 | @rejections = rejections
90 | @corrections = corrections
91 | @set_rejections_and_corrections = true
92 | end
93 | end
94 |
95 | def rejections
96 | set_rejections_and_corrections!
97 | @rejections
98 | end
99 |
100 | def corrections
101 | set_rejections_and_corrections!
102 | @corrections
103 | end
104 | end
105 |
--------------------------------------------------------------------------------
/lib/errata/erratum.rb:
--------------------------------------------------------------------------------
1 | require 'to_regexp'
2 |
3 | require 'errata/erratum/delete'
4 | require 'errata/erratum/reject'
5 | require 'errata/erratum/replace'
6 | require 'errata/erratum/simplify'
7 | require 'errata/erratum/transform'
8 | require 'errata/erratum/truncate'
9 |
10 | class Errata
11 | class Erratum
12 | SEMICOLON_DELIMITER = /\s*;\s*/
13 | SPECIAL_ABBR = /\Aabbr\((.*)\)\z/
14 | REJECT_ACTIONS = %w{reject truncate}
15 |
16 | attr_reader :responder
17 | attr_reader :section
18 | attr_reader :matching_methods
19 | attr_reader :matching_expression
20 |
21 | def initialize(responder, options = {})
22 | @responder = responder
23 | @section = options[:section]
24 | @matching_methods = options[:condition].split(SEMICOLON_DELIMITER).map do |method_id|
25 | method_id.strip.gsub(/\W/, '_').downcase + '?'
26 | end
27 | if @matching_methods.any? and @responder.nil?
28 | raise ::ArgumentError, %{[errata] Conditions like #{@matching_methods.first.inspect} used, but no :responder defined}
29 | end
30 | @matching_expression = if options[:x].blank?
31 | nil
32 | elsif (options[:x].start_with?('/') or options[:x].start_with?('%r{')) and as_regexp = options[:x].as_regexp
33 | ::Regexp.new(*as_regexp)
34 | elsif SPECIAL_ABBR.match options[:x]
35 | @abbr_query = true
36 | abbr = $1.split(/(\w\??)/).reject { |a| a == '' }.join('\.?\s?') + '\.?([^\w\.]|\z)'
37 | expr = '(\A|\s)' + abbr
38 | ::Regexp.new expr, true
39 | elsif REJECT_ACTIONS.include? options[:action]
40 | expr = '\A\s*' + ::Regexp.escape(options[:x])
41 | ::Regexp.new expr, true
42 | else
43 | options[:x]
44 | end
45 | end
46 |
47 | def abbr?
48 | @abbr_query == true
49 | end
50 |
51 | def targets?(row)
52 | !!(conditions_match?(row) and expression_matches?(row))
53 | end
54 |
55 | def expression_matches?(row)
56 | return true if matching_expression.blank? or section.blank?
57 | case matching_expression
58 | when ::Regexp
59 | matching_expression.match row[section].to_s
60 | when ::String
61 | row[section].to_s.include? matching_expression
62 | end
63 | end
64 |
65 | def conditions_match?(row)
66 | matching_methods.all? { |method_id| responder.send method_id, row }
67 | end
68 | end
69 | end
70 |
--------------------------------------------------------------------------------
/lib/errata/erratum/delete.rb:
--------------------------------------------------------------------------------
1 | class Errata
2 | class Erratum
3 | class Delete < Erratum
4 | attr_reader :backfill
5 |
6 | def initialize(responder, options = {})
7 | super
8 | # otherwise abbr(X) will kill the characters before and after the match
9 | @backfill = if abbr?
10 | '\1\2'
11 | else
12 | ''
13 | end
14 | end
15 |
16 | def correct!(row)
17 | if targets? row
18 | row[section].gsub! matching_expression, backfill
19 | end
20 | end
21 | end
22 | end
23 | end
24 |
--------------------------------------------------------------------------------
/lib/errata/erratum/reject.rb:
--------------------------------------------------------------------------------
1 | class Errata
2 | class Erratum
3 | class Reject < Erratum
4 | end
5 | end
6 | end
7 |
--------------------------------------------------------------------------------
/lib/errata/erratum/replace.rb:
--------------------------------------------------------------------------------
1 | class Errata
2 | class Erratum
3 | class Replace < Erratum
4 | attr_reader :correction
5 |
6 | def initialize(responder, options = {})
7 | super
8 | @correction = if abbr?
9 | '\1' + options[:y].to_s + '\2'
10 | else
11 | options[:y].to_s
12 | end
13 | end
14 |
15 | def correct!(row)
16 | if targets? row
17 | if matching_expression.blank?
18 | row[section] = correction.dup
19 | else
20 | row[section].gsub! matching_expression, correction
21 | end
22 | end
23 | end
24 | end
25 | end
26 | end
27 |
--------------------------------------------------------------------------------
/lib/errata/erratum/simplify.rb:
--------------------------------------------------------------------------------
1 | class Errata
2 | class Erratum
3 | class Simplify < Erratum
4 | attr_reader :second_section
5 |
6 | def initialize(responder, options = {})
7 | super
8 | @second_section = options[:x]
9 | end
10 |
11 | def targets?(row)
12 | !row[section].blank? and !row[second_section].blank? and conditions_match?(row) and special_matcher(row).match(row[section])
13 | end
14 |
15 | def correct!(row)
16 | if targets? row
17 | row[section].gsub! special_matcher(row), ''
18 | end
19 | end
20 |
21 | private
22 |
23 | def special_matcher(row)
24 | /[\s\(\[\'\"]*#{::Regexp.escape(row[second_section])}[\s\)\]\'\"]*/
25 | end
26 | end
27 | end
28 | end
29 |
--------------------------------------------------------------------------------
/lib/errata/erratum/transform.rb:
--------------------------------------------------------------------------------
1 | class Errata
2 | class Erratum
3 | class Transform < Erratum
4 | ALLOWED_METHODS = %w{upcase downcase}
5 |
6 | attr_reader :string_method
7 |
8 | def initialize(responder, options = {})
9 | super
10 | @string_method = options[:y]
11 | raise %{[errata] Method "#{@string_method}" not allowed} unless ALLOWED_METHODS.include? @string_method
12 | end
13 |
14 | def correct!(row)
15 | if targets? row
16 | row[section].gsub!(matching_expression) { |match| match.send string_method }
17 | end
18 | end
19 | end
20 | end
21 | end
22 |
--------------------------------------------------------------------------------
/lib/errata/erratum/truncate.rb:
--------------------------------------------------------------------------------
1 | class Errata
2 | class Erratum
3 | class Truncate < Erratum
4 | attr_reader :necessary_and_sufficient_prefix
5 |
6 | def initialize(responder, options = {})
7 | super
8 | @necessary_and_sufficient_prefix = options[:x]
9 | end
10 |
11 | def correct!(row)
12 | if targets? row
13 | row[section] = necessary_and_sufficient_prefix.dup
14 | end
15 | end
16 | end
17 | end
18 | end
19 |
--------------------------------------------------------------------------------
/lib/errata/version.rb:
--------------------------------------------------------------------------------
1 | class Errata
2 | VERSION = '1.1.1'
3 | end
4 |
--------------------------------------------------------------------------------
/rfc_editor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seamusabshere/errata/1666873d1c75fecdbaa86d090f386d81f3831c28/rfc_editor.png
--------------------------------------------------------------------------------
/test/helper.rb:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'bundler/setup'
3 |
4 | if Bundler.definition.specs['ruby-debug19'].first or Bundler.definition.specs['ruby-debug'].first
5 | require 'ruby-debug'
6 | end
7 |
8 | require 'minitest/spec'
9 | require 'minitest/autorun'
10 | require 'minitest/reporters'
11 | MiniTest::Unit.runner = MiniTest::SuiteRunner.new
12 | MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
13 |
14 | unless RUBY_VERSION >= '1.9'
15 | require 'fastercsv'
16 | end
17 |
18 | require 'errata'
19 |
--------------------------------------------------------------------------------
/test/models.rb:
--------------------------------------------------------------------------------
1 | class AutomobileVariantGuru
2 | def transmission_is_blank?(row)
3 | row['transmission'].blank?
4 | end
5 |
6 | def is_a_2007_gmc_or_chevrolet?(row)
7 | row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
8 | end
9 |
10 | def is_a_porsche?(row)
11 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'PORSCHE' }
12 | end
13 |
14 | def is_not_a_porsche?(row)
15 | !is_a_porsche? row
16 | end
17 |
18 | def is_a_mercedes_benz?(row)
19 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v =~ /MERCEDES/i }
20 | end
21 |
22 | def is_a_lexus?(row)
23 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'LEXUS' }
24 | end
25 |
26 | def is_a_bmw?(row)
27 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'BMW' }
28 | end
29 |
30 | def is_a_ford?(row)
31 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'FORD' }
32 | end
33 |
34 | def is_a_bentley?(row)
35 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v.upcase == 'BENTLEY' }
36 | end
37 |
38 | def is_a_rolls_royce?(row)
39 | row.slice('MFR', 'Manufacturer', 'carline_mfr_name').any? { |k, v| v =~ /ROLLS/i }
40 | end
41 |
42 | def is_a_turbo_brooklands?(row)
43 | row.slice('CAR LINE', 'carline name', 'carline_name').any? { |k, v| v =~ /TURBO R\/RL BKLDS/i }
44 | end
45 |
46 | def model_contains_maybach?(row)
47 | row.slice('CAR LINE', 'carline name', 'carline_name').any? { |k, v| v =~ /MAYBACH/i }
48 | end
49 |
50 | def model_contains_bentley?(row)
51 | row.slice('CAR LINE', 'carline name', 'carline_name').any? { |k, v| v =~ /BENTLEY/i }
52 | end
53 | end
54 |
--------------------------------------------------------------------------------
/test/test_errata.rb:
--------------------------------------------------------------------------------
1 | require 'helper'
2 | require 'models'
3 |
4 | describe Errata do
5 | describe 'without responder' do
6 | it "doesn't require a responder" do
7 | e = Errata.new :url => 'https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdHEtemF2YTZzdGRYbE1MTHFMRXpRUHc&single=true&gid=0&output=csv'
8 | row = { 'name' => 'denver intl airport' }
9 | e.correct! row
10 | row['name'].must_equal 'denver International airport'
11 | end
12 | end
13 |
14 | describe 'with conditions' do
15 | it "uses a responder to answer conditions" do
16 | eval %{
17 | class ColoradoGuru
18 | def is_denver_airport?(record)
19 | record['name'].to_s.downcase.include? 'denver'
20 | end
21 | end
22 | }
23 | e = Errata.new(
24 | :url => 'https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdG9PcFBjVnE4SGpLVXNTakVhSFY2VFE&single=true&gid=0&output=csv',
25 | :responder => 'ColoradoGuru'
26 | )
27 | row = { 'name' => 'denver intl airport' }
28 | e.correct! row
29 | row['name'].must_equal 'denver International airport' # matched condition
30 | row = { 'name' => 'madison intl airport' }
31 | e.correct! row
32 | row['name'].must_equal 'madison intl airport' # didn't match
33 | end
34 |
35 | it "blows up if you have conditions but no responder" do
36 | e = Errata.new :url => 'https://docs.google.com/spreadsheet/pub?key=0AkCJNpm9Ks6JdG9PcFBjVnE4SGpLVXNTakVhSFY2VFE&single=true&gid=0&output=csv'
37 | row = { 'name' => 'denver intl airport' }
38 | lambda do
39 | e.correct! row
40 | end.must_raise ArgumentError, /conditions.*used/i
41 | end
42 | end
43 |
44 | describe 'to correct automobile model details' do
45 | before do
46 | @e = Errata.new :url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg',
47 | :responder => 'AutomobileVariantGuru'
48 | end
49 |
50 | it "corrects rows" do
51 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
52 | @e.correct!(alfa)
53 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
54 | end
55 |
56 | it "rejects rows" do
57 | @e.rejects?('carline_mfr_name' => 'AURORA CARS').must_equal true
58 | end
59 |
60 | it "tries multiple conditions" do
61 | bentley = { 'carline_mfr_name' => 'ROLLS-ROYCE BENTLEY', "carline name" => 'Super Bentley' }
62 | @e.correct!(bentley)
63 | bentley['carline_mfr_name'].must_equal 'Bentley'
64 | end
65 | end
66 | end
67 |
--------------------------------------------------------------------------------
/test/test_old_style.rb:
--------------------------------------------------------------------------------
1 | require 'helper'
2 | require 'models'
3 |
4 | describe 'old-style Errata usage' do
5 | before do
6 | @e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'),
7 | :responder => AutomobileVariantGuru.new
8 | end
9 |
10 | it "corrects rows" do
11 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
12 | @e.correct!(alfa)
13 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
14 | end
15 |
16 | it "rejects rows" do
17 | @e.rejects?('carline_mfr_name' => 'AURORA CARS').must_equal true
18 | end
19 |
20 | it "lazily constantizes and initializes responder" do
21 | e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'),
22 | :responder => 'AutomobileVariantGuru'
23 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
24 | e.correct!(alfa)
25 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
26 | end
27 |
28 | it "passes options to RemoteTable if no :table is specified" do
29 | e = Errata.new :url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg',
30 | :responder => AutomobileVariantGuru.new
31 | alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
32 | e.correct!(alfa)
33 | alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
34 | end
35 |
36 | it "tries multiple conditions" do
37 | bentley = { 'carline_mfr_name' => 'ROLLS-ROYCE BENTLEY', "carline name" => 'Super Bentley' }
38 | @e.correct!(bentley)
39 | bentley['carline_mfr_name'].must_equal 'Bentley'
40 | end
41 | end
42 |
--------------------------------------------------------------------------------