├── .gitignore ├── .rspec ├── .travis.yml ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── bench ├── bench.rb └── words.txt ├── lib ├── redis-asm.rb ├── redis │ ├── asm.rb │ └── asm │ │ └── version.rb └── redis_asm.lua ├── redis-asm.gemspec └── spec ├── redis └── asm │ ├── asm_spec.rb │ └── test_data.txt └── spec_helper.rb /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | *.bundle 11 | *.so 12 | *.o 13 | *.a 14 | mkmf.log 15 | sample.rb -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --color 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.1.5 4 | services: 5 | - redis-server -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in redis-asm.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Masato Yamaguchi 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Redis::Asm 2 | [![Build Status](https://travis-ci.org/krt/redis-asm.svg?branch=master)](https://travis-ci.org/krt/redis-asm) 3 | [![Coverage Status](https://img.shields.io/coveralls/krt/redis-asm.svg)](https://coveralls.io/r/krt/redis-asm) 4 | 5 | ##### Fast fuzzy string search on Redis using Lua. UTF-8 ready. 6 | 7 | ## Description 8 | Fast ASM (Approximate String Matching) by calculating edit distance within the collections such as ZSET, HASH, LIST, SET on Redis using Lua script. 9 | `Redis::Asm` provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings. 10 | 11 | ## Prerequisites 12 | This library requires a Redis server with Lua scripting support (EVAL and EVALSHA commands). This support was added in Redis 2.6. 13 | 14 | ## Installation 15 | 16 | Add this line to your application's Gemfile: 17 | 18 | ```ruby 19 | gem 'redis-asm' 20 | ``` 21 | 22 | And then execute: 23 | 24 | $ bundle 25 | 26 | Or install it yourself as: 27 | 28 | $ gem install redis-asm 29 | 30 | ## Usage 31 | 32 | To initialize `Redis::Asm`: 33 | ```ruby 34 | require 'redis' 35 | require 'redis-asm' 36 | 37 | # Use Redis.current: 38 | redis = Redis.current 39 | 40 | # Initialize Redis with host and port: 41 | redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT) 42 | 43 | asm = Redis::Asm.new(redis) 44 | ``` 45 | 46 | 47 | First, prepare test data: 48 | ```ruby 49 | data = %w(example samples abampere zzi 東京都 京都府) 50 | 51 | # key names 52 | keys = {} 53 | types = ['set', 'zset', 'hash', 'list'] 54 | types.each{|t| keys[t] = "testdata:#{t}"} 55 | 56 | # reset Redis 57 | keys.values.each{|v| redis.del v } 58 | 59 | # set data to Redis 60 | redis.sadd keys['set'], data 61 | redis.zadd keys['zset'], data.map.with_index{|d, i| [i+1, d]} 62 | redis.mapped_hmset keys['hash'], ({}).tap{|h| data.each_with_index{|x,i| h[i+1] = x}} 63 | data.each{|d| redis.rpush keys['list'], d } 64 | ``` 65 | 66 | To execute fuzzy search from Redis collections: 67 | ```ruby 68 | require 'json' 69 | require 'yaml' 70 | 71 | # asm.search(KEY, NEELDE, MAX_RESULTS=10) 72 | 73 | # To search from SET 74 | result = asm.search(keys['set'], 'example') 75 | # To search from LIST 76 | result = asm.search(keys['list'], 'example') 77 | 78 | puts JSON.parse(result).to_yaml 79 | # --- 80 | # - haystack: example 81 | # match: 1 82 | # - haystack: samples 83 | # match: 0.57142857142857 84 | # - haystack: abampere 85 | # match: 0.5 86 | 87 | # To search from HASH 88 | 89 | # Redis::Asm matches HASH values 90 | # each item has 'field' property 91 | 92 | result = asm.search(HASH_KEY, 'example') 93 | puts JSON.parse(result).to_yaml 94 | # --- 95 | # - haystack: example 96 | # field: '1' 97 | # match: 1 98 | # - haystack: samples 99 | # field: '2' 100 | # match: 0.57142857142857 101 | # - haystack: abampere 102 | # field: '3' 103 | # match: 0.5 104 | 105 | # To search from ZSET 106 | # each item has 'score' property 107 | 108 | result = asm.search(ZSET_KEY, 'example') 109 | puts JSON.parse(result).to_yaml 110 | # --- 111 | # - haystack: example 112 | # score: '1' 113 | # match: 1 114 | # - haystack: samples 115 | # score: '2' 116 | # match: 0.57142857142857 117 | # - haystack: abampere 118 | # score: '3' 119 | # match: 0.5 120 | ``` 121 | You can use UTF-8 multibyte chars: 122 | ```ruby 123 | result = asm.search(ZSET_KEY, '東京都') 124 | puts JSON.parse(result).to_yaml 125 | # --- 126 | # - haystack: "東京都" 127 | # match: 1 128 | # - haystack: "京都府" 129 | # match: 0.33333333333333 130 | ``` 131 | ## Performance 132 | 133 | - PC: MBP 2.6 GHz Intel Core i5 16GM DD3 RAM 134 | - OS: Mac OSX 10.9.5 135 | - Ruby 2.1.5p273 [x86_64-darwin13.0] 136 | - Redis server v=2.6.17 bits=64 137 | 138 | You can try benchmarking `Redis::Asm` by running `rake bench` in console. 139 | That's the result I've got on my machine. 140 | ```sh 141 | krt@mbp% ruby bench/bench.rb 142 | user system total real 143 | a : 1000 wd 0.000000 0.000000 0.000000 ( 0.016898) 144 | a : 10000 wd 0.000000 0.000000 0.000000 ( 0.165706) 145 | a : 100000 wd 0.000000 0.000000 0.000000 ( 1.468973) 146 | 147 | baz : 1000 wd 0.000000 0.000000 0.000000 ( 0.014015) 148 | baz : 10000 wd 0.000000 0.000000 0.000000 ( 0.091153) 149 | baz : 100000 wd 0.000000 0.000000 0.000000 ( 0.651317) 150 | 151 | rifmino : 1000 wd 0.000000 0.000000 0.000000 ( 0.017831) 152 | rifmino : 10000 wd 0.000000 0.000000 0.000000 ( 0.108233) 153 | rifmino : 100000 wd 0.000000 0.000000 0.000000 ( 0.772444) 154 | 155 | mskelngesol : 1000 wd 0.000000 0.000000 0.000000 ( 0.015920) 156 | mskelngesol : 10000 wd 0.000000 0.000000 0.000000 ( 0.092513) 157 | mskelngesol : 100000 wd 0.000000 0.000000 0.000000 ( 0.701796) 158 | 159 | 元気です : 1000 wd 0.000000 0.000000 0.000000 ( 0.002177) 160 | 元気です : 10000 wd 0.000000 0.000000 0.000000 ( 0.028857) 161 | 元気です : 100000 wd 0.000000 0.000000 0.000000 ( 0.279001) 162 | ``` 163 | *NOTE:* To be fair, it's suitable for less or eql than about 10,000 words, for Redis blocks it's requests while executing Lua script. 164 | 165 | ## Acknowledgment 166 | 167 | - Words in test data from @atebits 168 | https://github.com/atebits/Words 169 | - Some japanese multibyte words from @gkovacs 170 | https://github.com/gkovacs/japanese-morphology 171 | - Levenshtein algorythm from @wooorm 172 | https://github.com/wooorm/levenshtein-edit-distance 173 | 174 | ## Contributing 175 | 176 | 1. Fork it ( https://github.com/krt/redis-asm/fork ) 177 | 2. Create your feature branch (`git checkout -b my-new-feature`) 178 | 3. Commit your changes (`git commit -am 'Add some feature'`) 179 | 4. Push to the branch (`git push origin my-new-feature`) 180 | 5. Create a new Pull Request 181 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rspec/core/rake_task" 3 | 4 | RSpec::Core::RakeTask.new(:spec) 5 | 6 | task :default => :spec 7 | 8 | desc 'Run benchmark' 9 | task :bench do 10 | sh 'bundle', 'exec', 'ruby', 'bench/bench.rb' 11 | end 12 | -------------------------------------------------------------------------------- /bench/bench.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # -*- encoding: utf-8 -*- 3 | $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) 4 | require 'benchmark' 5 | require 'redis' 6 | require 'redis-asm' 7 | require 'json' 8 | 9 | dic = 10 | File.read(File.expand_path('../words.txt', __FILE__)). 11 | split("\n"). 12 | map(&:chomp) 13 | 3.times{dic.shift} 14 | 15 | SKEY = 'redis:asm:bench' 16 | 17 | def setup_redis key, dic, haystack_size 18 | diviser = dic.size / haystack_size 19 | r = Redis.current 20 | r.del key 21 | r.sadd key, dic.select.with_index {|w, i| i % diviser == 0 } 22 | end 23 | 24 | r = Redis.current 25 | asm = Redis::Asm.new(r) 26 | 27 | needles = %w(a baz rifmino mskelngesol 元気です) 28 | 29 | Benchmark.bm(22) do |x| 30 | needles.each do |needle| 31 | [1000, 10000, 100000].each do |s| 32 | setup_redis SKEY, dic, s 33 | x.report("%11s : %6d wd"%[needle, s]) { asm.search(SKEY, needle) } 34 | end 35 | puts "" 36 | end 37 | end 38 | 39 | # output results 40 | # puts "results from 100000\n" 41 | # setup_redis SKEY, dic, 100000 42 | # needles.each do |needle| 43 | # puts "#{needle} :" 44 | # p JSON.parse(asm.search(SKEY, needle)) 45 | # puts "" 46 | # end 47 | -------------------------------------------------------------------------------- /lib/redis-asm.rb: -------------------------------------------------------------------------------- 1 | require 'redis' 2 | require 'redis/asm' -------------------------------------------------------------------------------- /lib/redis/asm.rb: -------------------------------------------------------------------------------- 1 | require 'redis' 2 | require "redis/asm/version" 3 | require "digest/sha1" 4 | 5 | class Redis 6 | class Asm 7 | 8 | SCRIPT_DIR = File.expand_path('../../', __FILE__) 9 | SCRIPT = File.read File.join(SCRIPT_DIR, "redis_asm.lua") 10 | SHA1 = Digest::SHA1.hexdigest SCRIPT 11 | 12 | def initialize(redis) 13 | @redis = redis 14 | end 15 | 16 | def search(key, needle, max_results=10) 17 | begin 18 | @redis.evalsha(SHA1, :keys => [key], :argv => [needle, max_results]) 19 | rescue Exception => e 20 | if e.message =~ /NOSCRIPT/ 21 | @redis.script(:load, SCRIPT) 22 | retry 23 | else 24 | raise e 25 | end 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/redis/asm/version.rb: -------------------------------------------------------------------------------- 1 | class Redis 2 | class Asm 3 | VERSION = "0.2.0" 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /lib/redis_asm.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | redis_asm.lua 4 | approximate string matching for redis 5 | 6 | Copyright (c) 2015 Masato Yamaguchi 7 | 8 | This software is released under the MIT License. 9 | 10 | http://opensource.org/licenses/mit-license.php 11 | 12 | 13 | USAGE: 14 | > eval "(content of this script)" 1 KEY NEEDLE MAX_RESULTS 15 | 16 | @param {string} KEY Name of key. Accepts ZSET, SET, HASH and LIST. 17 | @param {string} NEEDLE Search word. 18 | @param {boolean} MAX_RESULTS Max size of results, defaults 10. 19 | @return {string} Result as json string. 20 | ]] 21 | 22 | local i 23 | local haystacks = {} 24 | local opt_data = {} -- score for ZSET, or field for HASH. 25 | 26 | local key_type = redis.call('TYPE', KEYS[1])["ok"] 27 | 28 | if not key_type then return nil end 29 | if key_type == 'zset' then 30 | local zset = redis.call('ZRANGE', KEYS[1], 0, -1, 'WITHSCORES') 31 | local is_value = true 32 | for i = 1, #zset do 33 | if is_value then haystacks[#haystacks + 1] = zset[i] end 34 | if not is_value then opt_data[#opt_data + 1] = zset[i] end 35 | is_value = not is_value 36 | end 37 | elseif key_type == 'list' then 38 | haystacks = redis.call('LRANGE', KEYS[1], 0, -1) 39 | elseif key_type == 'set' then 40 | haystacks = redis.call('SMEMBERS', KEYS[1]) 41 | elseif key_type == 'hash' then 42 | local hash = redis.call('HGETALL', KEYS[1]) 43 | local is_field = true 44 | for i = 1, #hash do 45 | if is_field then opt_data[#opt_data + 1] = hash[i] end 46 | if not is_field then haystacks[#haystacks + 1] = hash[i] end 47 | is_field = not is_field 48 | end 49 | else 50 | return nil 51 | end 52 | 53 | local needle = ARGV[1] 54 | if not needle then return nil end 55 | 56 | local max_results = tonumber(ARGV[2]) or 10 57 | 58 | local cjson = cjson 59 | local s_byte = string.byte 60 | local s_sub = string.sub 61 | local s_find = string.find 62 | local m_min = math.min 63 | local m_max = math.max 64 | local m_floor = math.floor 65 | local m_ceil = math.ceil 66 | local t_sort = table.sort 67 | 68 | 69 | -- mapping utf-8 leading-byte to byte offset 70 | local byte_offsets = { 71 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 81 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 82 | 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 83 | 3, 3, 3, 3, 3, 3, 3} 84 | 85 | --[[ 86 | * Split utf-8 string into multi-byte chunks according to its leading-byte. 87 | * @param {string} 88 | * @return {Array.} Array of multi-byte strings. 89 | --]] 90 | local function split_into_utf8_bytes(str) 91 | local codes = {} 92 | local i 93 | local offset = 0 94 | 95 | local mb_str, byte, offset_pos 96 | 97 | for i = 1, #str do 98 | offset_pos = i + offset 99 | if offset_pos > #str then 100 | break 101 | end 102 | 103 | byte = byte_offsets[s_byte(str, offset_pos, offset_pos)] or 0 104 | 105 | mb_str = s_sub(str, offset_pos, offset_pos + byte) 106 | codes[#codes + 1] = mb_str 107 | offset = offset + byte 108 | end 109 | return codes 110 | end 111 | 112 | --[[ 113 | * Check if haystack includes any character in needle. 114 | * @param {string} 115 | * @param {Array.} 116 | * @return {boolean} true if haystack includes utf_needle 117 | --]] 118 | local function haystack_includes_needle_char(haystack, utf_needle) 119 | for i = 1, #utf_needle do 120 | if s_find(haystack, utf_needle[i], 1, true) then return true end 121 | end 122 | return false 123 | end 124 | 125 | local cache = {} 126 | 127 | --[[ 128 | * Calculate match score using levenshtein distance. 129 | * @param {Array.} haystack 130 | * @param {Array.} needle 131 | * @param {boolean} if true, stop calculating 132 | when the result might be lower than lowest_score 133 | * @param {number|nil} lowest_score 134 | * @return {number|nil} match score(0..1) 135 | --]] 136 | local function levenshtein_score(str, needle, should_cutoff, lowest_score) 137 | local length, length_needle, code, result, should_break 138 | local distance, distance_needle, index, index_needle, cutoff_distance 139 | local longer_length = m_max(#str, #needle) 140 | 141 | if should_cutoff and lowest_score then 142 | cutoff_distance = m_ceil((1 - lowest_score) * longer_length) + 1 143 | end 144 | 145 | length = #str 146 | length_needle = #needle 147 | for index = 1, length do 148 | cache[index] = index + 1 149 | end 150 | 151 | for index_needle = 1, length_needle do 152 | code = needle[index_needle] 153 | result = index_needle - 1 154 | distance = index_needle - 1 155 | 156 | for index = 1, length do 157 | distance_needle = (code == str[index]) and distance or distance + 1 158 | distance = cache[index] 159 | result = (distance > result) and 160 | ((distance_needle > result) and result + 1 or distance_needle) 161 | or 162 | ((distance_needle > distance) and distance + 1 or distance_needle) 163 | cache[index] = result 164 | 165 | if cutoff_distance and result > cutoff_distance then 166 | return nil 167 | end 168 | end 169 | end 170 | return 1 - (result / longer_length) 171 | end 172 | 173 | local scores = {} 174 | local utf_needle = split_into_utf8_bytes(needle) 175 | local lowest_score, utf_word, longer_length, score 176 | local should_cutoff = false 177 | 178 | -- main loop. 179 | for i = 1, #haystacks do 180 | if haystack_includes_needle_char(haystacks[i], utf_needle) then 181 | utf_word = split_into_utf8_bytes(haystacks[i]) 182 | 183 | if #utf_word >= #utf_needle then 184 | longer_length = #utf_word 185 | 186 | if s_find(haystacks[i], needle, 1, true) then 187 | score = #utf_needle * (1 / longer_length) 188 | else 189 | score = levenshtein_score(utf_word, utf_needle, should_cutoff, lowest_score) 190 | end 191 | 192 | if score and not(score == 0) then 193 | if #scores > max_results then 194 | should_cutoff = true 195 | t_sort( 196 | scores, 197 | function(a,b) 198 | return a.score > b.score 199 | end 200 | ) 201 | lowest_score = scores[max_results].score 202 | if score > lowest_score then 203 | scores[#scores + 1] = {score = score, idx = i} 204 | end 205 | else 206 | scores[#scores + 1] = {score = score, idx = i} 207 | end 208 | end 209 | end 210 | end 211 | 212 | end 213 | 214 | t_sort( 215 | scores, 216 | function(a,b) 217 | return a.score > b.score 218 | end 219 | ) 220 | 221 | local result = {} 222 | local output_length = m_min(#scores, max_results) 223 | 224 | for i = 1, output_length do 225 | local item = {} 226 | item['match'] = scores[i].score 227 | item['haystack'] = haystacks[scores[i].idx] 228 | if key_type == 'zset' then 229 | item['score'] = opt_data[scores[i].idx] 230 | elseif key_type == 'hash' then 231 | item['field'] = opt_data[scores[i].idx] 232 | end 233 | result[#result + 1] = item 234 | end 235 | 236 | local text = cjson.encode(result) 237 | 238 | return(text) 239 | -------------------------------------------------------------------------------- /redis-asm.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'redis/asm/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "redis-asm" 8 | spec.version = Redis::Asm::VERSION 9 | spec.authors = ["Masato Yamaguchi"] 10 | spec.email = ["karateka2000@gmail.com"] 11 | spec.summary = "Fast fuzzy string search on Redis using Lua. UTF-8 ready." 12 | spec.description = "Fast ASM (Approximate String Matching) by calucuating edit distance within the collecitons such as ZSET, HASH, LIST, SET on Redis using Lua script. It provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings." 13 | spec.homepage = "http://github.com/krt/redis-asm" 14 | spec.license = "MIT" 15 | 16 | spec.files = `git ls-files -z`.split("\x0") 17 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 18 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 19 | spec.require_paths = ["lib"] 20 | 21 | spec.add_development_dependency "bundler", "~> 1.7" 22 | spec.add_development_dependency "rake", "~> 10.0" 23 | spec.add_development_dependency "rspec" 24 | spec.add_development_dependency "coveralls" 25 | spec.add_development_dependency "simplecov" 26 | spec.add_dependency 'redis', '~> 3.0' 27 | spec.add_dependency 'digest' 28 | end 29 | -------------------------------------------------------------------------------- /spec/redis/asm/asm_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'json' 3 | require 'yaml' 4 | 5 | REDIS_PORT = ENV['REDIS_PORT'] || 6379 6 | REDIS_HOST = ENV['REDIS_HOST'] || 'localhost' 7 | 8 | redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT) 9 | asm = Redis::Asm.new(redis) 10 | 11 | SKEY = 'redis:asm:testing:set' 12 | ZKEY = 'redis:asm:testing:zset' 13 | HKEY = 'redis:asm:testing:hash' 14 | LKEY = 'redis:asm:testing:list' 15 | 16 | describe Redis::Asm do 17 | 18 | before :all do 19 | test_data = File.read(File.expand_path('../test_data.txt', __FILE__)) 20 | .split("\n") 21 | i = 0 22 | zdata = test_data.map{|item| i += 1; [i, item]} 23 | i = 0 24 | hdata = test_data.inject({}){|ha, k| i += 1; ha.merge(i=>k)} 25 | 26 | redis.pipelined do |r| 27 | r.script :flush 28 | r.sadd SKEY, test_data 29 | r.zadd ZKEY, zdata 30 | r.mapped_hmset HKEY, hdata 31 | test_data.each{|item| r.rpush LKEY,item} 32 | end 33 | end 34 | 35 | after :all do 36 | redis.del ZKEY 37 | redis.del HKEY 38 | redis.del SKEY 39 | redis.del LKEY 40 | end 41 | 42 | it 'has a version number' do 43 | expect(Redis::Asm::VERSION).not_to be nil 44 | end 45 | 46 | it 'responds to search method' do 47 | expect(asm.respond_to?(:search)).to eq(true) 48 | end 49 | 50 | context 'execute fuzzy searching on Redis SET or LIST' do 51 | let(:result_set) {JSON.parse(asm.search(SKEY, 'example'))} 52 | let(:result_list) {JSON.parse(asm.search(LKEY, 'example'))} 53 | 54 | it "result has exactly matched string" do 55 | expect(result_set.first).to eq({"haystack"=>"example", "match"=>1}) 56 | expect(result_list.first).to eq({"haystack"=>"example", "match"=>1}) 57 | end 58 | 59 | it "result has fuzzy matched string" do 60 | expect(result_set[1]).to eq({"haystack"=>"samples", "match"=>0.57142857142857}) 61 | expect(result_list[1]).to eq({"haystack"=>"samples", "match"=>0.57142857142857}) 62 | end 63 | 64 | it "result size must be default limit(10)" do 65 | expect(result_set.size).to eq 10 66 | expect(result_list.size).to eq 10 67 | end 68 | end 69 | 70 | context 'execute fuzzy searching on Redis Set with bracket char' do 71 | let(:result_set) {JSON.parse(asm.search(SKEY, '(ample'))} 72 | it "result has fuzzy matched string" do 73 | expect(result_set[1]).to eq({"haystack"=>"samples", "match"=>0.71428571428571}) 74 | end 75 | end 76 | 77 | context 'execute fuzzy searching on Redis SET or LIST using multi-byte string' do 78 | let(:result_set) {JSON.parse(asm.search(SKEY, '東京都'))} 79 | let(:result_list) {JSON.parse(asm.search(LKEY, '東京都'))} 80 | 81 | it "result has exactly matched string" do 82 | expect(result_set.first).to eq({"haystack"=>"東京都", "match"=>1}) 83 | expect(result_list.first).to eq({"haystack"=>"東京都", "match"=>1}) 84 | end 85 | 86 | it "result has fuzzy matched string" do 87 | expect(result_set[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333}) 88 | expect(result_list[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333}) 89 | end 90 | 91 | it "result size must be matched item count" do 92 | expect(result_set.size).to eq 2 93 | expect(result_list.size).to eq 2 94 | end 95 | end 96 | 97 | context 'execute fuzzy searching on Redis ZSET or HASH' do 98 | let(:result_zset) {JSON.parse(asm.search(ZKEY, 'example'))} 99 | let(:result_hash) {JSON.parse(asm.search(HKEY, 'example'))} 100 | 101 | it "result has exactly matched string, zset has 'score' and hash has 'field'" do 102 | expect(result_zset.first).to eq({"haystack"=>"example", "score"=>"114", "match"=>1}) 103 | expect(result_hash.first).to eq({"haystack"=>"example", "field"=>"114", "match"=>1}) 104 | end 105 | 106 | it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do 107 | expect(result_zset[1]).to eq({"haystack"=>"samples", "score"=>"119", "match"=>0.57142857142857}) 108 | expect(result_hash[1]).to eq({"haystack"=>"samples", "field"=>"119", "match"=>0.57142857142857}) 109 | end 110 | 111 | it "result size must be default limit(10)" do 112 | expect(result_zset.size).to eq 10 113 | expect(result_hash.size).to eq 10 114 | end 115 | end 116 | 117 | context 'execute fuzzy searching on Redis ZSET or HASH using multi-byte string' do 118 | let(:result_zset) {JSON.parse(asm.search(ZKEY, '東京都'))} 119 | let(:result_hash) {JSON.parse(asm.search(HKEY, '東京都'))} 120 | 121 | it "result has exactly matched string, zset has 'score' and hash has 'field'" do 122 | expect(result_zset.first).to eq({"haystack"=>"東京都", "score"=>"126", "match"=>1}) 123 | expect(result_hash.first).to eq({"haystack"=>"東京都", "field"=>"126", "match"=>1}) 124 | end 125 | 126 | it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do 127 | expect(result_zset[1]).to eq({"haystack"=>"京都府", "score"=>"125", "match"=>0.33333333333333}) 128 | expect(result_hash[1]).to eq({"haystack"=>"京都府", "field"=>"125", "match"=>0.33333333333333}) 129 | end 130 | 131 | it "result size must be matched item count" do 132 | expect(result_zset.size).to eq 2 133 | expect(result_hash.size).to eq 2 134 | end 135 | end 136 | 137 | end 138 | -------------------------------------------------------------------------------- /spec/redis/asm/test_data.txt: -------------------------------------------------------------------------------- 1 | 1ab2cd34ef5g6 2 | a 3 | aa 4 | aah 5 | aahed 6 | aahing 7 | aahs 8 | aal 9 | aalii 10 | aaliis 11 | aals 12 | aardvark 13 | aardvarks 14 | aardwolf 15 | aardwolves 16 | aargh 17 | aarrgh 18 | aarrghh 19 | aarti 20 | aartis 21 | aas 22 | aasvogel 23 | aasvogels 24 | ab 25 | aba 26 | abac 27 | abaca 28 | abacas 29 | abaci 30 | aback 31 | abacs 32 | abacterial 33 | abactinal 34 | abactinally 35 | abactor 36 | abactors 37 | abacus 38 | abacuses 39 | abaft 40 | abaka 41 | abakas 42 | abalone 43 | abalones 44 | abamp 45 | abampere 46 | abamperes 47 | abamps 48 | aband 49 | abanded 50 | abanding 51 | abandon 52 | abandoned 53 | abandonedly 54 | abandonee 55 | abandonees 56 | abandoner 57 | abandoners 58 | abandoning 59 | abandonment 60 | abandonments 61 | abandons 62 | abandonware 63 | abandonwares 64 | abands 65 | abapical 66 | abas 67 | abase 68 | abased 69 | abasedly 70 | abasement 71 | abasements 72 | abaser 73 | abasers 74 | abases 75 | abash 76 | abashed 77 | abashedly 78 | abashes 79 | abashing 80 | abashless 81 | abashment 82 | abashments 83 | abasia 84 | abasias 85 | abasing 86 | abask 87 | abatable 88 | abate 89 | abated 90 | abatement 91 | abatements 92 | abater 93 | abaters 94 | abates 95 | abating 96 | abatis 97 | abatises 98 | abator 99 | abators 100 | abattis 101 | abattises 102 | abattoir 103 | abattoirs 104 | abc 105 | abcdefg 106 | ac 107 | axc 108 | b 109 | bc 110 | cat 111 | cow 112 | difference 113 | distance 114 | example 115 | frankenstein 116 | javawasneat 117 | kitten 118 | levenshtein 119 | samples 120 | scalaisgreat 121 | sitting 122 | sturgeon 123 | urgently 124 | xabxcdxxefxgx 125 | 京都府 126 | 東京都 127 | 弊社佐藤 128 | 弊社と致しましては 129 | 貴社におかれましては 130 | 因為我是中國人所以我會說中文 131 | 因為我是英國人所以我會說英文 -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'simplecov' 2 | require 'coveralls' 3 | Coveralls.wear! 4 | 5 | $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) 6 | require 'redis' 7 | require 'redis/asm' 8 | 9 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ 10 | SimpleCov::Formatter::HTMLFormatter, 11 | Coveralls::SimpleCov::Formatter 12 | ] 13 | SimpleCov.start --------------------------------------------------------------------------------