├── VERSION ├── lib ├── levenshtein-ffi.rb └── levenshtein.rb ├── ext └── levenshtein │ ├── .gitignore │ ├── extconf.rb │ ├── levenshtein.h │ └── levenshtein.c ├── .gitignore ├── spec ├── spec_helper.rb └── levenshtein_spec.rb ├── Gemfile ├── CHANGELOG.markdown ├── Gemfile.lock ├── Rakefile ├── levenshtein-ffi.gemspec └── README.markdown /VERSION: -------------------------------------------------------------------------------- 1 | 1.0.3 -------------------------------------------------------------------------------- /lib/levenshtein-ffi.rb: -------------------------------------------------------------------------------- 1 | require 'levenshtein' 2 | -------------------------------------------------------------------------------- /ext/levenshtein/.gitignore: -------------------------------------------------------------------------------- 1 | *.bundle 2 | *.o 3 | Makefile 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | README.markdown.html 2 | *.rbc 3 | pkg 4 | .*.sw? 5 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require File.dirname(__FILE__) + "/../lib/levenshtein" 2 | -------------------------------------------------------------------------------- /ext/levenshtein/extconf.rb: -------------------------------------------------------------------------------- 1 | require 'mkmf' 2 | create_makefile('levenshtein') 3 | -------------------------------------------------------------------------------- /ext/levenshtein/levenshtein.h: -------------------------------------------------------------------------------- 1 | unsigned levenshtein(const char *, const char *); 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source :rubygems 2 | 3 | gem 'ffi', '~> 1.1.5' 4 | 5 | group :test do 6 | gem 'rspec', '2.7.0' 7 | gem 'jeweler' 8 | end 9 | -------------------------------------------------------------------------------- /CHANGELOG.markdown: -------------------------------------------------------------------------------- 1 | 1.0.3 2 | ----- 3 | * Added nil type validation to levenshtein [https://github.com/dbalatero/levenshtein-ffi/pull/3] 4 | 5 | 1.0.2 6 | ----- 7 | * spec updates 8 | * autoloading fix 9 | 10 | 1.0.1 11 | ----- 12 | * Support loading .so files on Linux, whoops! 13 | 14 | 1.0.0 15 | ----- 16 | * Birthday 17 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | diff-lcs (1.1.3) 5 | ffi (1.1.5) 6 | git (1.2.5) 7 | jeweler (1.6.4) 8 | bundler (~> 1.0) 9 | git (>= 1.2.5) 10 | rake 11 | rake (0.9.2) 12 | rspec (2.7.0) 13 | rspec-core (~> 2.7.0) 14 | rspec-expectations (~> 2.7.0) 15 | rspec-mocks (~> 2.7.0) 16 | rspec-core (2.7.1) 17 | rspec-expectations (2.7.0) 18 | diff-lcs (~> 1.1.2) 19 | rspec-mocks (2.7.0) 20 | 21 | PLATFORMS 22 | ruby 23 | 24 | DEPENDENCIES 25 | ffi (~> 1.1.5) 26 | jeweler 27 | rspec (= 2.7.0) 28 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | begin 2 | require 'jeweler' 3 | Jeweler::Tasks.new do |gemspec| 4 | gemspec.name = "levenshtein-ffi" 5 | gemspec.summary = "An FFI version of the levenshtein gem." 6 | gemspec.description = "Provides a fast, cross-Ruby implementation of the levenshtein distance algorithm." 7 | gemspec.email = "dbalatero@gmail.com" 8 | gemspec.homepage = "http://github.com/dbalatero/levenshtein-ffi" 9 | gemspec.authors = ["David Balatero"] 10 | gemspec.add_dependency "ffi" 11 | gemspec.add_development_dependency "rspec" 12 | gemspec.add_development_dependency "jeweler" 13 | end 14 | 15 | Jeweler::GemcutterTasks.new 16 | rescue LoadError 17 | puts "Jeweler not available. Install it with: gem install jeweler" 18 | end 19 | 20 | -------------------------------------------------------------------------------- /lib/levenshtein.rb: -------------------------------------------------------------------------------- 1 | require 'ffi' 2 | 3 | module Levenshtein 4 | class << self 5 | extend FFI::Library 6 | 7 | # Try loading in order. 8 | library = File.dirname(__FILE__) + "/../ext/levenshtein/levenshtein" 9 | candidates = ['.bundle', '.so', '.dylib', ''].map { |ext| library + ext } 10 | ffi_lib(candidates) 11 | 12 | # Safe version of distance, checks that arguments are really strings. 13 | def distance(str1, str2) 14 | validate(str1) 15 | validate(str2) 16 | ffi_distance(str1, str2) 17 | end 18 | 19 | # Unsafe version. Results in a segmentation fault if passed nils! 20 | attach_function :ffi_distance, :levenshtein, [:string, :string], :int 21 | 22 | private 23 | def validate(arg) 24 | unless arg.kind_of?(String) 25 | raise TypeError, "wrong argument type #{arg.class} (expected String)" 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /spec/levenshtein_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe Levenshtein do 4 | fixtures = [ 5 | ["hello", "hello", 0], 6 | ["hello", "helo", 1], 7 | ["hello", "jello", 1], 8 | ["hello", "helol", 1], 9 | ["hello", "hellol", 1], 10 | ["hello", "heloll", 2], 11 | ["hello", "cheese", 4], 12 | ["hello", "saint", 5], 13 | ["hello", "", 5], 14 | ] 15 | 16 | fixtures.each do |w1, w2, d| 17 | it "should calculate a distance of #{d} between #{w1} and #{w2}" do 18 | Levenshtein.distance(w1, w2).should == d 19 | Levenshtein.distance(w2, w1).should == d 20 | end 21 | end 22 | 23 | it "should raise an error if either argument is nil" do 24 | expect { Levenshtein.distance("", nil) }.to raise_error TypeError 25 | expect { Levenshtein.distance(nil, "") }.to raise_error TypeError 26 | end 27 | 28 | it "should raise an error if either argument is something else than a string" do 29 | expect { Levenshtein.distance("woah", /woah/) }.to raise_error TypeError 30 | expect { Levenshtein.distance(5.3, "5.3") }.to raise_error TypeError 31 | expect { Levenshtein.distance(Object.new, "Hello") }.to raise_error TypeError 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /levenshtein-ffi.gemspec: -------------------------------------------------------------------------------- 1 | # Generated by jeweler 2 | # DO NOT EDIT THIS FILE DIRECTLY 3 | # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec' 4 | # -*- encoding: utf-8 -*- 5 | 6 | Gem::Specification.new do |s| 7 | s.name = "levenshtein-ffi" 8 | s.version = "1.0.3" 9 | 10 | s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= 11 | s.authors = ["David Balatero"] 12 | s.date = "2012-08-09" 13 | s.description = "Provides a fast, cross-Ruby implementation of the levenshtein distance algorithm." 14 | s.email = "dbalatero@gmail.com" 15 | s.extensions = ["ext/levenshtein/extconf.rb"] 16 | s.extra_rdoc_files = [ 17 | "README.markdown", 18 | "README.markdown.html" 19 | ] 20 | s.files = [ 21 | "CHANGELOG.markdown", 22 | "Gemfile", 23 | "Gemfile.lock", 24 | "README.markdown", 25 | "Rakefile", 26 | "VERSION", 27 | "ext/levenshtein/.gitignore", 28 | "ext/levenshtein/extconf.rb", 29 | "ext/levenshtein/levenshtein.c", 30 | "ext/levenshtein/levenshtein.h", 31 | "levenshtein-ffi.gemspec", 32 | "lib/levenshtein-ffi.rb", 33 | "lib/levenshtein.rb", 34 | "spec/levenshtein_spec.rb", 35 | "spec/spec_helper.rb" 36 | ] 37 | s.homepage = "http://github.com/dbalatero/levenshtein-ffi" 38 | s.require_paths = ["lib"] 39 | s.rubygems_version = "1.8.17" 40 | s.summary = "An FFI version of the levenshtein gem." 41 | 42 | if s.respond_to? :specification_version then 43 | s.specification_version = 3 44 | 45 | if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then 46 | s.add_runtime_dependency(%q, ["~> 1.1.5"]) 47 | s.add_runtime_dependency(%q, [">= 0"]) 48 | s.add_development_dependency(%q, [">= 0"]) 49 | s.add_development_dependency(%q, [">= 0"]) 50 | else 51 | s.add_dependency(%q, ["~> 1.1.5"]) 52 | s.add_dependency(%q, [">= 0"]) 53 | s.add_dependency(%q, [">= 0"]) 54 | s.add_dependency(%q, [">= 0"]) 55 | end 56 | else 57 | s.add_dependency(%q, ["~> 1.1.5"]) 58 | s.add_dependency(%q, [">= 0"]) 59 | s.add_dependency(%q, [">= 0"]) 60 | s.add_dependency(%q, [">= 0"]) 61 | end 62 | end 63 | 64 | -------------------------------------------------------------------------------- /ext/levenshtein/levenshtein.c: -------------------------------------------------------------------------------- 1 | # include 2 | # include 3 | 4 | # ifdef LEV_CASE_INSENSITIVE 5 | # include 6 | # define eq(x, y) (tolower(x) == tolower(y)) 7 | # else 8 | # define eq(x, y) ((x) == (y)) 9 | # endif 10 | 11 | # define min(x, y) ((x) < (y) ? (x) : (y)) 12 | 13 | unsigned int levenshtein (const char *word1, const char *word2) { 14 | size_t len1 = strlen(word1), 15 | len2 = strlen(word2); 16 | unsigned int *v = calloc(len2 + 1, sizeof(unsigned int)); 17 | unsigned int i, j, current, next, cost; 18 | 19 | /* strip common prefixes */ 20 | while (len1 > 0 && len2 > 0 && eq(word1[0], word2[0])) 21 | word1++, word2++, len1--, len2--; 22 | 23 | /* handle degenerate cases */ 24 | if (!len1) return len2; 25 | if (!len2) return len1; 26 | 27 | /* initialize the column vector */ 28 | for (j = 0; j < len2 + 1; j++) 29 | v[j] = j; 30 | 31 | for (i = 0; i < len1; i++) { 32 | /* set the value of the first row */ 33 | current = i + 1; 34 | /* for each row in the column, compute the cost */ 35 | for (j = 0; j < len2; j++) { 36 | /* 37 | * cost of replacement is 0 if the two chars are the same, or have 38 | * been transposed with the chars immediately before. otherwise 1. 39 | */ 40 | cost = !(eq(word1[i], word2[j]) || (i && j && 41 | eq(word1[i-1], word2[j]) && eq(word1[i],word2[j-1]))); 42 | /* find the least cost of insertion, deletion, or replacement */ 43 | next = min(min( v[j+1] + 1, 44 | current + 1 ), 45 | v[j] + cost ); 46 | /* stash the previous row's cost in the column vector */ 47 | v[j] = current; 48 | /* make the cost of the next transition current */ 49 | current = next; 50 | } 51 | /* keep the final cost at the bottom of the column */ 52 | v[len2] = next; 53 | } 54 | free(v); 55 | return next; 56 | } 57 | 58 | # ifdef TEST 59 | # include 60 | # include "levenshtein.h" 61 | 62 | int main (int argc, char **argv) { 63 | unsigned int distance; 64 | if (argc < 3) return -1; 65 | distance = levenshtein(argv[1], argv[2]); 66 | printf("%s vs %s: %u\n", argv[1], argv[2],distance); 67 | } 68 | # endif 69 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | levenshtein-ffi 2 | =============== 3 | 4 | Converted to FFI by David Balatero for Ruby portability. 5 | 6 | This gem originally based on levenshtein. 7 | 8 | Tested on: 9 | 10 | * MRI 1.8.6 11 | * MRI 1.8.7 12 | * MRI 1.9.1 13 | * MRI 1.9.2 14 | * Rubinius 1.1.0 15 | 16 | Known Issues 17 | ============ 18 | * The C extension uses `char*` strings, and so Unicode strings will give incorrect distances. 19 | 20 | Including in Gemfile 21 | ==================== 22 | 23 | gem 'levenshtein-ffi', :require => 'levenshtein' 24 | 25 | Original README 26 | =============== 27 | 28 | The levenshtein module implements fast Damerau-Levenshtein edit distance 29 | computation in O(n) memory and O(n^2) time, using a C wrapper. The module has a 30 | single function: 31 | 32 | require 'levenshtein' 33 | Levenshtein.distance("string1", "string2") == 1 # returns true 34 | 35 | This function can be used as a drop-in replacement for 36 | Text::Levenshtein.levenshtein, which is pure Ruby and rather slow. That's it! 37 | 38 | The code is made available under the following BSD license: 39 | 40 | Copyright (c) 2009, Schuyler Erle. 41 | All rights reserved. 42 | 43 | Redistribution and use in source and binary forms, with or without 44 | modification, are permitted provided that the following conditions are met: 45 | 46 | * Redistributions of source code must retain the above copyright notice, 47 | this list of conditions and the following disclaimer. 48 | 49 | * Redistributions in binary form must reproduce the above copyright notice, 50 | this list of conditions and the following disclaimer in the documentation 51 | and/or other materials provided with the distribution. 52 | 53 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 54 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 55 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 56 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 57 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 59 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 60 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 61 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 62 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 63 | 64 | - end - 65 | --------------------------------------------------------------------------------