├── lib ├── data_filter │ ├── version.rb │ ├── prefix_filter.rb │ ├── keyword_filter.rb │ ├── truthy_filter.rb │ ├── range_filter.rb │ ├── like_filter.rb │ ├── range_overlap_filter.rb │ └── filter_set.rb └── data_filter.rb ├── .travis.yml ├── Gemfile ├── .gitignore ├── Rakefile ├── test ├── test_helper.rb ├── keyword_filter_test.rb ├── truthy_filter_test.rb ├── prefix_filter_test.rb ├── range_filter_test.rb ├── range_overlap_filter_test.rb └── like_filter_test.rb ├── data_filter.gemspec ├── LICENSE.txt ├── CODE_OF_CONDUCT.md └── README.md /lib/data_filter/version.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | VERSION = '0.4.1' 3 | end 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.1.3 4 | before_install: gem install bundler -v 1.10.6 5 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in data_filter.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | /.idea/ 11 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | 4 | Rake::TestTask.new(:test) do |t| 5 | t.libs << "test" 6 | t.libs << "lib" 7 | t.test_files = FileList['test/**/*_test.rb'] 8 | end 9 | 10 | task :default => :test 11 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require 'coveralls' 2 | Coveralls.wear! 3 | 4 | $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) 5 | require 'data_filter' 6 | 7 | require 'minitest/autorun' 8 | require 'minitest/spec' 9 | require 'minitest/pride' 10 | -------------------------------------------------------------------------------- /lib/data_filter.rb: -------------------------------------------------------------------------------- 1 | require 'data_filter/version' 2 | 3 | require 'data_filter/filter_set' 4 | 5 | require 'data_filter/keyword_filter' 6 | require 'data_filter/range_filter' 7 | require 'data_filter/like_filter' 8 | require 'data_filter/range_overlap_filter' 9 | require 'data_filter/truthy_filter' 10 | require 'data_filter/prefix_filter' 11 | 12 | module DataFilter; end 13 | -------------------------------------------------------------------------------- /test/keyword_filter_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | module DataFilter 4 | class KeywordFilterTest < Minitest::Spec 5 | it "gracefully handles items that don't respond to the filter sym" do 6 | assert_equal nil, DataFilter::KeywordFilter.new([:hello, :goodbye], 'search').call(nil) 7 | end 8 | 9 | it 'is case insensitive' do 10 | el = OpenStruct.new(name: 'John Snow') 11 | assert_equal el, DataFilter::KeywordFilter.new([:name], 'john').call(el) 12 | end 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /data_filter.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'data_filter/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "data_filter" 8 | spec.version = DataFilter::VERSION 9 | spec.authors = ["Josh Bodah"] 10 | spec.email = ["jb3689@yahoo.com"] 11 | 12 | spec.summary = %q{an extensible DSL for filtering data sets} 13 | spec.homepage = "https://github.com/backupify/data_filter" 14 | spec.license = "MIT" 15 | 16 | spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } 17 | spec.bindir = "exe" 18 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 19 | spec.require_paths = ["lib"] 20 | 21 | spec.add_development_dependency "bundler", "~> 1.10" 22 | spec.add_development_dependency "rake", "~> 10.0" 23 | spec.add_development_dependency "minitest" 24 | spec.add_development_dependency 'coveralls' 25 | end 26 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Josh Bodah 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/data_filter/prefix_filter.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | # Used to filter a data item by a prefix by seeing if 3 | # the data field value starts with the prefix 4 | # 5 | # @example 6 | # object = MyModel.new(text: 'hello world!') 7 | # filter = DataFilter::PrefixFilter.new(:text, 'hello') 8 | # filter.call(object) 9 | # # => # 10 | class PrefixFilter 11 | # @param field_sym [Symbol] name of the data method we want 12 | # to filter 13 | # @param prefix [String] the value we want to use when 14 | # filtering the data item 15 | def initialize(field_sym, prefix) 16 | @field_sym = field_sym 17 | @prefix = prefix 18 | end 19 | 20 | # Filters the item 21 | # 22 | # @param item [Object] the item we want to filter 23 | # @return [Object, nil] the original data item 24 | def call(item) 25 | if item.respond_to?(@field_sym) && 26 | starts_with?(item.public_send(@field_sym), @prefix) 27 | item 28 | end 29 | end 30 | 31 | private 32 | 33 | def starts_with?(actual, prefix) 34 | actual.match(/\A#{prefix}/i) 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/data_filter/keyword_filter.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | # Used to filter a data item by a search term by seeing if 3 | # ANY of the data fields' values are similar to the search term 4 | # 5 | # @example 6 | # object = MyModel.new(text: 'hello world', name: 'goodbye', phrase: 'yo') 7 | # filter = DataFilter::KeywordFilter.new([:name, :phrase], 'hello') 8 | # filter.call(object) 9 | # # => nil 10 | class KeywordFilter 11 | # @param field_syms [Array] a collection of all of the data 12 | # methods we want to inspect when filtering 13 | # @param search_term [String] the value we want to use when filtering 14 | # the data item 15 | def initialize(field_syms, search_term) 16 | @field_syms = field_syms 17 | @search_term = search_term 18 | end 19 | 20 | # Filters the item 21 | # 22 | # @param item [Comparable] the item we want to filter 23 | # @return [Object] the original data item 24 | def call(item) 25 | item if @field_syms.any? { |s| match?(item, s) } 26 | end 27 | 28 | private 29 | 30 | # :nodoc: 31 | def match?(item, field_sym) 32 | item.respond_to?(field_sym) && 33 | DataFilter::LikeFilter.new(field_sym, @search_term).call(item) 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /lib/data_filter/truthy_filter.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | # Used to filter a data item by whether it is truthy/falsey 3 | # 4 | # @example 5 | # object = MyModel.new(is_alive: 'false') 6 | # filter = DataFilter::TruthyFilter.new(:is_alive) 7 | # filter.call(object) 8 | # # => nil 9 | class TruthyFilter 10 | # @param field_sym [Symbol] the name of the field to filter by 11 | # @param invert [Boolean] (default: false) set to true if you 12 | # would rather match when the field is falsey instead of when 13 | # it is truthy 14 | def initialize(field_sym, invert: false) 15 | @field_sym = field_sym 16 | @invert = invert 17 | end 18 | 19 | # Filters the item 20 | # 21 | # @param item [Object] the item we want to filter 22 | # @return [Object, nil] the original data item 23 | def call(item) 24 | if item.respond_to?(@field_sym) 25 | val = item.public_send(@field_sym) 26 | is_falsey = is_falsey?(val) 27 | is_match = (@invert ? is_falsey : !is_falsey) 28 | if is_match 29 | item 30 | end 31 | end 32 | end 33 | 34 | private 35 | 36 | # @private 37 | def is_falsey?(val) 38 | [false, 'false', 0, nil].include?(val) 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /test/truthy_filter_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | module DataFilter 4 | class TruthyFilterTest < Minitest::Spec 5 | describe 'not inverted' do 6 | before do 7 | @filter = DataFilter::TruthyFilter.new(:is_alive) 8 | end 9 | 10 | [true, 'true', 1, 'hello world', Object.new].each do |val| 11 | it "matches #{val} (#{val.class})" do 12 | obj = OpenStruct.new(is_alive: val) 13 | assert_equal obj, @filter.call(obj) 14 | end 15 | end 16 | 17 | [false, 'false', nil, 0].each do |val| 18 | it "filters #{val} (#{val.class})" do 19 | obj = OpenStruct.new(is_alive: val) 20 | assert_equal nil, @filter.call(obj) 21 | end 22 | end 23 | end 24 | 25 | describe 'inverted' do 26 | before do 27 | @filter = DataFilter::TruthyFilter.new(:is_alive, invert: true) 28 | end 29 | 30 | [false, 'false', nil, 0].each do |val| 31 | it "matches #{val} (#{val.class})" do 32 | obj = OpenStruct.new(is_alive: val) 33 | assert_equal obj, @filter.call(obj) 34 | end 35 | end 36 | 37 | [true, 'true', 1, 'hello world', Object.new].each do |val| 38 | it "filters #{val} (#{val.class})" do 39 | obj = OpenStruct.new(is_alive: val) 40 | assert_equal nil, @filter.call(obj) 41 | end 42 | end 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities. 4 | 5 | We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 6 | 7 | Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct. 8 | 9 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team. 10 | 11 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers. 12 | 13 | This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/) 14 | -------------------------------------------------------------------------------- /test/prefix_filter_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | module DataFilter 4 | class PrefixFilterTest < Minitest::Spec 5 | it "gracefully handles items that don't respond to the filter sym" do 6 | assert_equal nil, DataFilter::PrefixFilter.new(:hello, 'world').call(nil) 7 | end 8 | 9 | it 'is case insensitive' do 10 | filter = DataFilter::PrefixFilter.new(:name, 'johN s') 11 | el = OpenStruct.new(name: 'John Snow') 12 | assert_equal el, filter.call(el) 13 | end 14 | 15 | it 'does not match if the search term is more specific than the data' do 16 | filter = DataFilter::PrefixFilter.new(:name, 'josh1') 17 | el = OpenStruct.new(name: 'josh') 18 | assert_equal nil, filter.call(el) 19 | end 20 | 21 | it 'does not match if the search term is not at the start of the data' do 22 | filter = DataFilter::PrefixFilter.new(:name, 'josh') 23 | el = OpenStruct.new(name: ' josh') 24 | assert_equal nil, filter.call(el) 25 | end 26 | 27 | it 'matches if the search term is less specific than the data' do 28 | filter = DataFilter::PrefixFilter.new(:name, 'jo') 29 | el = OpenStruct.new(name: 'joshua') 30 | assert_equal el, filter.call(el) 31 | end 32 | 33 | it 'matches if the search term is identical to the data' do 34 | filter = DataFilter::PrefixFilter.new(:name, 'one two') 35 | el = OpenStruct.new(name: 'one two') 36 | assert_equal el, filter.call(el) 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/data_filter/range_filter.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | # Used to filter a data item by some range by seeing if 3 | # the data field value falls within that range 4 | # 5 | # @example with a ceiling 6 | # object = MyModel.new(created_at: Date.parse('2001-01-13')) 7 | # filter = DataFilter::RangeFilter.new(:created_at, ceiling: Date.parse('2003-01-01')) 8 | # filter.call(object) 9 | # # => #> 10 | # 11 | # @example with a floor 12 | # object = MyModel.new(file_count: 300) 13 | # filter = DataFilter::RangeFilter.new(:file_count, floor: 1) 14 | # filter_return = filter.call(object) 15 | # # => # 16 | # has_file = filter_return.present? 17 | # # => true 18 | class RangeFilter 19 | # @param field_sym [Symbol] the field to filter on 20 | # @param floor [Comparable] the range beginning we want to filter the data 21 | # item by 22 | # @param ceiling [Comparable] the range end we want to filter the data item 23 | # by 24 | # @param nil_default [Comparable] the value to use if the data item has no 25 | # field value 26 | def initialize(field_sym, floor: nil, ceiling: nil, nil_default: nil) 27 | @field_sym = field_sym 28 | @floor = floor 29 | @ceiling = ceiling 30 | @nil_default = nil_default 31 | end 32 | 33 | # Filters the item 34 | # 35 | # @param item [Comparable] the item we want to filter 36 | # @return [Object] the original data item 37 | def call(item) 38 | if item.respond_to?(@field_sym) 39 | actual = item.public_send(@field_sym) 40 | actual = @nil_default if actual.nil? 41 | item if in_range?(actual) 42 | end 43 | end 44 | 45 | private 46 | 47 | # :nodoc: 48 | def in_range?(actual) 49 | return false if actual.nil? 50 | (@floor.nil? || actual >= @floor) && (@ceiling.nil? || actual <= @ceiling) 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /lib/data_filter/like_filter.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | # Used to filter a data item by a search term by seeing if 3 | # the data field value is similar to the search term 4 | # 5 | # @example 6 | # object = MyModel.new(text: 'hello world!') 7 | # filter = DataFilter::LikeFilter.new(:text, 'hello') 8 | # filter.call(object) 9 | # # => # 10 | class LikeFilter 11 | # @param field_sym [Symbol] name of the data method we want 12 | # to filter 13 | # @param search_term [String] the value we want to use when 14 | # filtering the data item 15 | # @param normalize_regex [regex] the optional regular 16 | # expression for normalizing the string to search 17 | def initialize(field_sym, search_term, normalize_regex = nil) 18 | @field_sym = field_sym 19 | @search_term = search_term 20 | @normalize_regex = normalize_regex || /[^\w\s]/ 21 | end 22 | 23 | # Filters the item 24 | # 25 | # @param item [Object] the item we want to filter 26 | # @return [Object, nil] the original data item 27 | def call(item) 28 | if item.respond_to?(@field_sym) && 29 | match?(item.public_send(@field_sym), @search_term) 30 | item 31 | end 32 | end 33 | 34 | private 35 | 36 | # :nodoc: 37 | def match?(actual, search_term) 38 | case actual 39 | when Hash 40 | match?(actual.values.flatten, search_term) 41 | when Array 42 | actual.any? { |item| match?(item, search_term) } 43 | when String 44 | regexp = 45 | normalize(search_term, true) 46 | .split(' ') 47 | .map { |term| Regexp.escape(term) } 48 | .join('|') 49 | .insert(0, '(') 50 | .insert(-1, ')') 51 | normalize(actual, false).match(/#{regexp}/i) 52 | end 53 | end 54 | 55 | def normalize(str, use_cache = false) 56 | if use_cache 57 | @normalize_cache ||= {} 58 | @normalize_cache[str] ||= str.gsub(@normalize_regex, ' ') 59 | else 60 | str.gsub(@normalize_regex, ' ') 61 | end 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /lib/data_filter/range_overlap_filter.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | # Used to filter a data item by a set of ranges by seeing if 3 | # the data field value intersects that range 4 | # 5 | # @example with a floor and ceiling 6 | # event = MyModel.new(start_time: Date.parse('2001-01-13'), end_time: Date.parse('2002-01-13')) 7 | # filter = DataFilter::RangeOverlapFilter 8 | # .new(:start_time, :end_time, floor: Date.parse('2000-01-13'), ceiling: Date.parse('2003-01-13')) 9 | # filter.call(object) 10 | # # => #, end_time: # 11 | class RangeOverlapFilter 12 | # @param start_sym [Symbol] the range start to filter on 13 | # @param end_sym [Symbol] the range end to filter on 14 | # @param floor [Comparable] the range beginning we want to filter the data 15 | # item by 16 | # @param ceiling [Comparable] the range end we want to filter the data item 17 | # by 18 | # @param nil_default [Comparable] the value to use if the data item has no 19 | # field value 20 | def initialize(start_sym, end_sym, floor: nil, ceiling: nil, nil_default: nil) 21 | @start_sym = start_sym 22 | @end_sym = end_sym 23 | @floor = floor 24 | @ceiling = ceiling 25 | @nil_default = nil_default 26 | end 27 | 28 | # Filters the item 29 | # 30 | # @param item [Comparable] the item we want to filter 31 | # @return [Object] the original data item 32 | def call(item) 33 | if item.respond_to?(@start_sym) && item.respond_to?(@end_sym) 34 | actual_start = item.public_send(@start_sym) 35 | actual_start = @nil_default if actual_start.nil? 36 | 37 | actual_end = item.public_send(@end_sym) 38 | actual_end = @nil_default if actual_end.nil? 39 | 40 | item if in_range?(actual_start, actual_end) 41 | end 42 | end 43 | 44 | private 45 | 46 | # :nodoc: 47 | def in_range?(actual_start, actual_end) 48 | return true if @floor.nil? && @ceiling.nil? 49 | return false if actual_start.nil? || actual_end.nil? 50 | 51 | # TODO should this sort the start and end? 52 | # by default Ranges like (2..0) will have no elements 53 | actual_range = (actual_start..actual_end) 54 | 55 | if @floor.nil? 56 | actual_range.include?(@ceiling) || actual_range.max <= @ceiling 57 | elsif @ceiling.nil? 58 | actual_range.include?(@floor) || actual_range.min >= @floor 59 | else 60 | overlaps?((@floor..@ceiling), actual_range) 61 | end 62 | end 63 | 64 | # Snipped from ActiveSupport 65 | def overlaps?(range_a, range_b) 66 | range_a.include?(range_b.first) || range_b.include?(range_a.first) 67 | end 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /test/range_filter_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | module DataFilter 4 | class RangeFilterTest < Minitest::Spec 5 | def assert_match(item_count) 6 | data = OpenStruct.new(item_count: item_count) 7 | assert_equal data, @f.call(data) 8 | end 9 | 10 | def assert_filter(item_count) 11 | data = OpenStruct.new(item_count: item_count) 12 | assert_equal nil, @f.call(data) 13 | end 14 | 15 | it "gracefully handles items that don't respond to the filter sym" do 16 | assert_equal nil, DataFilter::RangeFilter.new(:hello).call(nil) 17 | end 18 | 19 | describe 'no floor or ceiling' do 20 | before do 21 | @f = DataFilter::RangeFilter.new(:item_count) 22 | end 23 | 24 | it 'always matches fields' do 25 | assert_match 3 26 | end 27 | end 28 | 29 | describe 'floor but no ceiling' do 30 | before do 31 | @f = DataFilter::RangeFilter.new(:item_count, floor: 2) 32 | end 33 | 34 | it 'matches things above the floor' do 35 | assert_match 3 36 | end 37 | 38 | it 'matches things equal to the floor' do 39 | assert_match 2 40 | end 41 | 42 | it 'filters things under the floor' do 43 | assert_filter 1 44 | end 45 | end 46 | 47 | describe 'ceiling but no floor' do 48 | before do 49 | @f = DataFilter::RangeFilter.new(:item_count, ceiling: 2) 50 | end 51 | 52 | it 'matches things under the ceiling' do 53 | assert_match 1 54 | end 55 | 56 | it 'matches things equal to the ceiling' do 57 | assert_match 2 58 | end 59 | 60 | it 'filters things above the ceiling' do 61 | assert_filter 3 62 | end 63 | end 64 | 65 | describe 'both ceiling and floor' do 66 | before do 67 | @f = DataFilter::RangeFilter.new(:item_count, floor: 1, ceiling: 3) 68 | end 69 | 70 | it 'matches things between the floor and ceiling' do 71 | assert_match 2 72 | end 73 | 74 | it 'matches things equal to the floor' do 75 | assert_match 1 76 | end 77 | 78 | it 'matches things equal to the ceiling' do 79 | assert_match 3 80 | end 81 | 82 | it 'filters things below the floor' do 83 | assert_filter 0 84 | end 85 | 86 | it 'filters things above the ceiling' do 87 | assert_filter 4 88 | end 89 | end 90 | 91 | describe 'nil default' do 92 | it 'treats nil like the nil default' do 93 | @f = DataFilter::RangeFilter.new(:item_count, floor: 1, nil_default: 0) 94 | assert_filter nil 95 | 96 | @f = DataFilter::RangeFilter.new(:item_count, floor: 1, nil_default: 2) 97 | assert_match nil 98 | end 99 | end 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataFilter 2 | 3 | [![Gem Version](https://badge.fury.io/rb/data_filter.svg)](http://badge.fury.io/rb/data_filter) 4 | [![Build Status](https://travis-ci.org/backupify/data_filter.svg)](https://travis-ci.org/backupify/data_filter) 5 | [![Coverage Status](https://coveralls.io/repos/backupify/data_filter/badge.svg?branch=master&service=github)](https://coveralls.io/github/backupify/data_filter?branch=master) 6 | [![Code Climate](https://codeclimate.com/github/backupify/data_filter/badges/gpa.svg)](https://codeclimate.com/github/backupify/data_filter) 7 | 8 | an extensible DSL for filtering data sets 9 | 10 | ## Summary 11 | 12 | `DataFilter` is a library for creating filters that are consistent, reusable, and easy to read. A filter is simply something that decides whether or not an element should be removed from a set. For example, we could create a `DataFilter::FilterSet` that is comprised of various filters and then pass an array into the filter set. The filter set will then remove elements that do not pass each of the filters. 13 | 14 | ## Installation 15 | 16 | ``` 17 | gem install data_filter 18 | ``` 19 | 20 | ## Usage 21 | 22 | `DataFilter::FilterSet::create` provides a DSL for creating a collection 23 | of filters which can be applied to your data. The DSL is designed to be 24 | controller friendly and will only apply filters if a parameter is specified. 25 | If a filter doesn't do what you need then you can pass any object that responds 26 | to `#call` (e.g. a lambda) to `add_filter`. 27 | 28 | ```rb 29 | filter_set = DataFilter::FilterSet.create do 30 | # Fuzzy comparison 31 | like_filter :name, by: params[:name] 32 | 33 | # Fuzzy comparison with custom normalization 34 | like_filter :name, by: params[:name], normalize_regex: /[^\w\s^@^.^+^-]/ 35 | 36 | # Keyword search 37 | keyword_filter [:gender], by: params[:gender] 38 | 39 | # Match truthy/falsey values 40 | truthy_filter :student, match: params[:is_student] 41 | 42 | # Check if within range 43 | range_filter :age, ceiling: params[:max_age] 44 | 45 | # Check if ranges overlap 46 | range_filter :start, :end, floor: Date.parse('2015-01-01') 47 | 48 | # Add a custom filter 49 | add_filter -> (user) { user if user.student || user.age > 25 } 50 | end 51 | 52 | data = [ 53 | User.create(name: 'Josh', age: 26, student: false, gender: :male, start: Date.parse('2007-01-01'), end: Date.parse('2013-01-01')), 54 | User.create(name: 'Lauren', age: 25, student: true, gender: :female, start: Date.parse('2008-01-01'), end: Date.parse('2016-01-01')) 55 | ] 56 | 57 | # By default data which doesn't match all of the filters will be filtered out 58 | filter_set.call(data) 59 | ``` 60 | 61 | ## Changelog 62 | 63 | ``` 64 | * v0.4.0 65 | 66 | - Added :normalize_regex option to LikeFilter 67 | 68 | * v0.3.1 69 | 70 | - Extend DSL with PrefixFilter 71 | 72 | * v0.3.0 73 | 74 | - Added PrefixFilter 75 | 76 | * v0.2.0 77 | 78 | - Fix RangeOverlapFilter edge cases 79 | ``` 80 | 81 | ## License 82 | 83 | The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). 84 | 85 | -------------------------------------------------------------------------------- /test/range_overlap_filter_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | class RangeOverlapFilterTest < Minitest::Spec 4 | def assert_match(start, _end) 5 | data = OpenStruct.new(start: start, end: _end) 6 | assert_equal data, @f.call(data) 7 | end 8 | 9 | def assert_filter(start, _end) 10 | data = OpenStruct.new(start: start, end: _end) 11 | assert_equal nil, @f.call(data) 12 | end 13 | 14 | describe 'no floor or ceiling' do 15 | before do 16 | @f = DataFilter::RangeOverlapFilter.new(:start, :end) 17 | end 18 | 19 | it 'always matches' do 20 | assert_match 1, 3 21 | end 22 | end 23 | 24 | describe 'floor but no ceiling' do 25 | before do 26 | @f = DataFilter::RangeOverlapFilter.new(:start, :end, floor: 2) 27 | end 28 | 29 | it 'matches if the range straddles the floor' do 30 | assert_match 1, 3 31 | end 32 | 33 | it 'matches if the range peak matches the floor' do 34 | assert_match 1, 2 35 | end 36 | 37 | it 'matches if the range valley matches the floor' do 38 | assert_match 2, 3 39 | end 40 | 41 | it 'filters if the range is entirely below the floor' do 42 | assert_filter 0, 1 43 | end 44 | 45 | it 'matches if the range is entirely above the floor' do 46 | assert_match 3, 4 47 | end 48 | end 49 | 50 | describe 'ceiling but no floor' do 51 | before do 52 | @f = DataFilter::RangeOverlapFilter.new(:start, :end, ceiling: 3) 53 | end 54 | 55 | it 'matches if the range straddles the ceiling' do 56 | assert_match 2, 4 57 | end 58 | 59 | it 'matches if the range peak matches the ceiling' do 60 | assert_match 1, 3 61 | end 62 | 63 | it 'matches if the range valley matches the ceiling' do 64 | assert_match 3, 5 65 | end 66 | 67 | it 'matches if the range is entirely below the ceiling' do 68 | assert_match 0, 1 69 | end 70 | 71 | it 'filters if the range is entirely above the ceiling' do 72 | assert_filter 4, 5 73 | end 74 | end 75 | 76 | describe 'floor and ceiling' do 77 | before do 78 | @f = DataFilter::RangeOverlapFilter.new(:start, :end, floor: 2, ceiling: 5) 79 | end 80 | 81 | it 'matches if the range straddles the ceiling' do 82 | assert_match 3, 6 83 | end 84 | 85 | it 'matches if the range is straddling the floor' do 86 | assert_match 1, 3 87 | end 88 | 89 | it 'matches if the range extends past both the ceiling and the floor' do 90 | assert_match 1, 6 91 | end 92 | 93 | it 'matches if the range is inside the ceiling and the floor' do 94 | assert_match 3, 4 95 | end 96 | 97 | it 'matches if the range is equal to the ceiling and floor' do 98 | assert_match 2, 5 99 | end 100 | 101 | it 'matches if the range peak is equal to the floor' do 102 | assert_match 0, 2 103 | end 104 | 105 | it 'matches if the range valley is equal to the ceiling' do 106 | assert_match 5, 6 107 | end 108 | 109 | it 'filters if the range is entirely below the floor' do 110 | assert_filter 0, 1 111 | end 112 | 113 | it 'filters if the range is entirely above the ceiling' do 114 | assert_filter 6, 7 115 | end 116 | end 117 | 118 | describe 'nil default' do 119 | it 'treats nils as the nil default' do 120 | @f = DataFilter::RangeOverlapFilter.new(:start, :end, floor: 1, nil_default: 0) 121 | assert_filter nil, nil 122 | assert_match nil, 1 123 | 124 | @f = DataFilter::RangeOverlapFilter.new(:start, :end, floor: 1, nil_default: 1) 125 | assert_match nil, nil 126 | end 127 | end 128 | end 129 | -------------------------------------------------------------------------------- /test/like_filter_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | module DataFilter 4 | class LikeFilterTest < Minitest::Spec 5 | it "gracefully handles items that don't respond to the filter sym" do 6 | assert_equal nil, DataFilter::LikeFilter.new(:hello, 'world').call(nil) 7 | end 8 | 9 | it 'is case insensitive' do 10 | filter = DataFilter::LikeFilter.new(:name, 'john') 11 | el = OpenStruct.new(name: 'John Snow') 12 | assert_equal el, filter.call(el) 13 | end 14 | 15 | it 'strips regexp' do 16 | # Regexp in both filter and data 17 | # Both get stripped and search is identical to data 18 | filter = DataFilter::LikeFilter.new(:name, 'b*lbo') 19 | el = OpenStruct.new(name: 'b*lbo') 20 | assert_equal el, filter.call(el) 21 | 22 | # Regexp in filter it pass due to special char strip 23 | # The split parts then fuzzy match properly 24 | filter = DataFilter::LikeFilter.new(:name, 'b*lbo') 25 | el = OpenStruct.new(name: 'bilbo') 26 | refute_equal nil, filter.call(el) 27 | 28 | # Regexp in data it fail due to special char strip 29 | # The search term does not fuzzy match the split parts 30 | filter = DataFilter::LikeFilter.new(:name, 'bilbo') 31 | el = OpenStruct.new(name: 'b*lbo') 32 | assert_equal nil, filter.call(el) 33 | end 34 | 35 | it 'ignores extra whitespace' do 36 | # Extra space in both filter and data 37 | filter = DataFilter::LikeFilter.new(:name, 'hello world') 38 | el = OpenStruct.new(name: 'hello world') 39 | assert_equal el, filter.call(el) 40 | 41 | # Extra space in filter 42 | filter = DataFilter::LikeFilter.new(:name, ' fern bush') 43 | el = OpenStruct.new(name: 'fern bush') 44 | assert_equal el, filter.call(el) 45 | 46 | # Extra space in data 47 | filter = DataFilter::LikeFilter.new(:name, 'yo dawg') 48 | el = OpenStruct.new(name: ' yo dawg ') 49 | assert_equal el, filter.call(el) 50 | end 51 | 52 | it 'ignores non-word characters' do 53 | # Special characters in both filter and data 54 | filter = DataFilter::LikeFilter.new(:name, 'mr.bean') 55 | el = OpenStruct.new(name: 'mr,bean') 56 | assert_equal el, filter.call(el) 57 | 58 | # Special characters in filter 59 | filter = DataFilter::LikeFilter.new(:name, 'mr.bean') 60 | el = OpenStruct.new(name: 'mr bean') 61 | assert_equal el, filter.call(el) 62 | 63 | # Special characters in both filter and data 64 | filter = DataFilter::LikeFilter.new(:name, 'mr bean') 65 | el = OpenStruct.new(name: 'mr.bean') 66 | assert_equal el, filter.call(el) 67 | end 68 | 69 | it 'does not match if the search term is more specific than the data' do 70 | filter = DataFilter::LikeFilter.new(:name, 'joshua') 71 | el = OpenStruct.new(name: 'josh') 72 | assert_equal nil, filter.call(el) 73 | end 74 | 75 | it 'matches if the search term is less specific than the data' do 76 | filter = DataFilter::LikeFilter.new(:name, 'josh') 77 | el = OpenStruct.new(name: 'joshua') 78 | assert_equal el, filter.call(el) 79 | end 80 | 81 | it 'matches if the search term is identical to the data' do 82 | filter = DataFilter::LikeFilter.new(:name, 'josh') 83 | el = OpenStruct.new(name: 'josh') 84 | assert_equal el, filter.call(el) 85 | end 86 | 87 | it 'handles arrays' do 88 | filter = DataFilter::LikeFilter.new(:name, 'Array Man') 89 | el = OpenStruct.new(name: ['Array Man']) 90 | assert_equal el, filter.call(el) 91 | end 92 | 93 | it 'handles hashes' do 94 | filter = DataFilter::LikeFilter.new(:name, 'Super Hash') 95 | el = OpenStruct.new(name: {:alias => 'Super Hash'}) 96 | assert_equal el, filter.call(el) 97 | end 98 | 99 | it 'allows custom regex' do 100 | # email characters in both filter and data 101 | filter = DataFilter::LikeFilter.new(:from, 'test-user+person@datto.com', /[^\w\s^.^@^-]/) 102 | el = OpenStruct.new(from: 'test-user+person@datto.com') 103 | assert_equal el, filter.call(el) 104 | end 105 | 106 | it 'allows custom regex to be nil' do 107 | # email characters in both filter and data 108 | filter = DataFilter::LikeFilter.new(:from, 'Bobs Burgers', nil) 109 | el = OpenStruct.new(from: 'Bobs Burgers') 110 | assert_equal el, filter.call(el) 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /lib/data_filter/filter_set.rb: -------------------------------------------------------------------------------- 1 | module DataFilter 2 | # Represents a collection of data filters that can be called on 3 | # data. Provides a DSL for creating a filter set and only adding 4 | # filters the filters that you need. 5 | class FilterSet 6 | attr_reader :filters 7 | 8 | def initialize 9 | @filters = [] 10 | end 11 | 12 | # Add a filter to the filter set 13 | # 14 | # @param filter [#call] 15 | # a callable filter. Can be a proc, lambda, or any object 16 | # that responds to #call 17 | # @return [FilterSet] the amended filter set 18 | def add_filter(filter) 19 | @filters << filter 20 | self 21 | end 22 | 23 | # Run the filter set on a single data item 24 | # 25 | # @param item [Object] some item that we want to pass through all 26 | # of the filters in the filter set 27 | # @return [Object, nil] the original item or nil 28 | def filter(item) 29 | @filters.reduce(item) { |i, filter| i if filter.call(i) } 30 | end 31 | 32 | # Run the filter set on a collection of data items 33 | # 34 | # @param items [Enumerable] collection of items that we want to 35 | # pass through all of the filters in the filter set 36 | # @return [Enumerable] the filtered results 37 | def batch(items) 38 | items.select { |i| filter(i) } 39 | end 40 | 41 | # A DSL for creating a series of filters that can be called 42 | # 43 | # Provides a cleaner way to define a {DataFilter::FilterSet} 44 | # with a bunch of different filters 45 | # 46 | # Conditionally adds filters to the set based on whether or not 47 | # any valid search terms are provided (useful for Controller params) 48 | # 49 | # @example Office365::Mail::MessagesController 50 | # filter_set = DataFilter::FilterSet.create do 51 | # like_filter :to, by: params[:to] 52 | # like_filter :from, by: params[:from] 53 | # like_filter :cc, by: params[:cc] 54 | # like_filter :bcc, by: params[:bcc] 55 | # like_filter :subject, by: params[:subject] 56 | # 57 | # keyword_filter [:to, :from, :cc, :bcc, :subject], by: params[:keyword] 58 | # 59 | # range_filter :date, floor: start_date, ceiling: end_date 60 | # 61 | # if params[:has_attachment] === true 62 | # range_filter :attachment_count, floor: 1 63 | # elsif params[:has_attachment] === false 64 | # range_filter :attachment_count, ceiling: 0, nil_default: 0 65 | # end 66 | # end 67 | module DSL 68 | def self.included(base) 69 | base.extend(ClassMethods) 70 | end 71 | 72 | module ClassMethods 73 | # Initializes a new {DataFilter::FilterSet} using a block. 74 | # The block conforms to the DSL defined in this method. 75 | # Delegates undefined messages to the caller's scope. 76 | # 77 | # @yield the DSL block 78 | # @return [DataFilter::FilterSet] the filter set evaluated 79 | # with the DSL 80 | def create(&block) 81 | original_self = eval 'self', block.binding 82 | instance = new 83 | instance.instance_variable_set(:@original_self, original_self) 84 | instance.instance_eval &block 85 | instance 86 | end 87 | end 88 | 89 | # Adds a {DataFilter::LikeFilter} to the filter set 90 | # 91 | # @param field_sym [Symbol] name of the data method we want 92 | # to filter 93 | # @option opts [Object] :by the value we want to use when 94 | # filtering the data item, :normalize_regex the regular 95 | # expression used to normalize the string 96 | def like_filter(field_sym, opts = {}) 97 | if opts[:by] 98 | @filters << LikeFilter.new(field_sym, opts[:by], opts[:normalize_regex]) 99 | end 100 | end 101 | 102 | # Adds a {DataFilter::PrefixFilter} to the filter set 103 | # 104 | # @param field_sym [Symbol] name of the data method we want 105 | # to filter 106 | # @option opts [Object] :by the value we want to use when 107 | # filtering the data item 108 | def prefix_filter(field_sym, opts = {}) 109 | if opts[:by] 110 | @filters << PrefixFilter.new(field_sym, opts[:by]) 111 | end 112 | end 113 | 114 | # Adds a {DataFilter::KeywordFilter} to the filter set 115 | # 116 | # @param field_syms [Array] a collection of all of the data 117 | # methods we want to inspect when filtering 118 | # @option opts [Object] :by the value we want to use when filtering 119 | # the data item 120 | def keyword_filter(field_syms, opts = {}) 121 | if opts[:by] 122 | @filters << KeywordFilter.new(field_syms, opts[:by]) 123 | end 124 | end 125 | 126 | # Adds a {DataFilter::RangeFilter} to the filter set 127 | # 128 | # @param field_sym [Symbol] name of the data method we want to 129 | # filter 130 | # @option opts [Comparable] :floor the range beginning we want to 131 | # filter the data item by 132 | # @option opts [Comparable] :ceiling the range end we want to filter 133 | # the data item by 134 | # @option opts [Comparable] :nil_default the value to use if the 135 | # data item has no field value 136 | def range_filter(field_sym, opts = {}) 137 | if opts[:floor] || opts[:ceiling] 138 | @filters << RangeFilter.new(field_sym, **opts) 139 | end 140 | end 141 | 142 | # Adds a {DataFilter::RangeOverlapFilter} to the filter set 143 | # 144 | # @param start_sym [Symbol] name of the start field we want to 145 | # filter 146 | # @param end_sym [Symbol] name of the end field we want to 147 | # filter 148 | # @option opts [Comparable] :floor the range beginning we want to 149 | # filter the data item by 150 | # @option opts [Comparable] :ceiling the range end we want to filter 151 | # the data item by 152 | # @option opts [Comparable] :nil_default the value to use if the 153 | # data item has no field value 154 | def range_overlap_filter(start_sym, end_sym, opts = {}) 155 | if opts[:floor] || opts[:ceiling] 156 | @filters << RangeOverlapFilter.new(start_sym, end_sym, **opts) 157 | end 158 | end 159 | 160 | # Adds a {DataFilter::TruthyFilter} to the filter set 161 | # 162 | # @param field_sym [Symbol] name of the field to match on 163 | # @param match [Object] truthy/falsey value to use to determine whether 164 | # the filter should match/filter truthy fields or falsey fields 165 | def truthy_filter(field_sym, match: nil) 166 | # Skip filter if match is not specified 167 | return if match.nil? 168 | if is_falsey?(match) 169 | @filters << TruthyFilter.new(field_sym, invert: true) 170 | else 171 | @filters << TruthyFilter.new(field_sym) 172 | end 173 | end 174 | 175 | # Used to support the DSL. Calls out to the parent scope if 176 | # we receive a message we can't respond to 177 | def method_missing(sym, *args, &block) 178 | @original_self.send(sym, *args, &block) 179 | end 180 | 181 | # Used to support the DSL. Calls out to the parent scope if 182 | # we receive a message we can't respond to 183 | def respond_to_missing?(sym, include_all = false) 184 | @original_self.respond_to?(sym, include_all) 185 | end 186 | 187 | private 188 | 189 | 190 | # TODO DRY up 191 | def is_falsey?(val) 192 | [false, 'false'].include?(val) 193 | end 194 | end 195 | 196 | include DSL 197 | end 198 | end 199 | --------------------------------------------------------------------------------