├── .rvm.gems ├── .hgignore ├── spec ├── lib │ ├── constants.rb │ └── helpers.rb ├── linguistics │ ├── inflector_spec.rb │ ├── monkeypatches_spec.rb │ ├── en │ │ ├── participles_spec.rb │ │ ├── linkparser_spec.rb │ │ ├── stemmer_spec.rb │ │ ├── wordnet_spec.rb │ │ ├── conjunctions_spec.rb │ │ ├── infinitives_spec.rb │ │ └── articles_spec.rb │ ├── en_spec.rb │ └── iso639_spec.rb └── linguistics_spec.rb ├── experiments ├── wn-proglang.rb ├── farmobjs.rb ├── TEMPLATE.rb.tpl ├── randobjlist.rb ├── generalize.rb ├── conjunct-with-block.rb ├── lafcadio_plural.rb ├── lprintf.rb ├── allobjlist.rb ├── api.rb └── gen_numwords_specs.rb ├── .hgtags ├── examples ├── klingon.rb ├── generalize_sentence.rb └── endocs.rb ├── .pryrc ├── .tm_properties ├── History.rdoc ├── .hgsigs ├── Gemfile ├── lib ├── linguistics │ ├── languagebehavior.rb │ ├── en │ │ ├── participles.rb │ │ ├── stemmer.rb │ │ ├── linkparser.rb │ │ ├── articles.rb │ │ ├── titlecase.rb │ │ ├── conjunctions.rb │ │ ├── wordnet.rb │ │ └── numbers.rb │ ├── monkeypatches.rb │ ├── inflector.rb │ ├── en.rb │ └── iso639.rb └── linguistics.rb ├── .rvmrc ├── Manifest.txt ├── LICENSE ├── Rakefile ├── .irbrc └── README.rdoc /.rvm.gems: -------------------------------------------------------------------------------- 1 | hoe-deveiate -v0.3.0 2 | hoe-bundler -v1.2.0 3 | linkparser -v1.1.4 4 | simplecov -v0.6.4 5 | wordnet -v1.0.0 6 | wordnet-defaultdb -v1.0.1 7 | ruby-stemmer -v0.9.3 8 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | ^commit\-msg\.txt$ 2 | docs/manual/output 3 | docs/api 4 | ^pkg$ 5 | ChangeLog$ 6 | coverage/ 7 | coverage\\.info 8 | ^coverage\.info$ 9 | \.DS_Store 10 | ~$ 11 | \.orig$ 12 | ^\.yardoc/ 13 | docs/.*\.dump$ 14 | ^release\.notes$ 15 | ^doc/ 16 | Gemfile.lock 17 | -------------------------------------------------------------------------------- /spec/lib/constants.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'linguistics' 4 | 5 | 6 | ### A collection of constants used in testing 7 | module Linguistics::TestConstants # :nodoc:all 8 | 9 | TEST_ARRAY = %w{stone stick hammer stone lantern} 10 | TEST_STRING = "banner" 11 | TEST_NUMBER = 5 12 | 13 | end 14 | 15 | 16 | -------------------------------------------------------------------------------- /experiments/wn-proglang.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | BEGIN { 4 | $LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib" 5 | require 'linguistics' 6 | } 7 | 8 | Linguistics::use( :en ) 9 | unless Linguistics::EN::haveWordnet? 10 | 11 | # Demo of WordNet integration. 12 | 13 | "programming language".en.gloss 14 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 1e029bfd9ead84151b6ddf888c74dca2b13272cf 1.0.7 2 | 1e029bfd9ead84151b6ddf888c74dca2b13272cf 1.0.7 3 | 0000000000000000000000000000000000000000 1.0.7 4 | 0000000000000000000000000000000000000000 1.0.7 5 | 5f4fa2c136c7ad28ece8c1bcbfad0982532fd9eb 1.0.7 6 | da353c888ad408857b9c5cca1ec60675f3121e60 1.0.8 7 | 1359338b7128798679095466a2a96903832b48d4 v2.0.0 8 | d8d00bf937f2dd8ca42abd6631453da16a9a263e v2.0.1 9 | a516c984a9a169bad58921a39d7d72437b51c9b5 v2.0.2 10 | -------------------------------------------------------------------------------- /experiments/farmobjs.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | BEGIN { 4 | $LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib" 5 | require 'linguistics' 6 | } 7 | 8 | Linguistics::use( :en ) 9 | 10 | # Just a(nother) fun little demo of the conjunction (junction, what's 11 | # your) function. 12 | animals = %w{dog cow ox chicken goose goat cow dog rooster llama 13 | pig goat dog cat cat dog cow goat goose goose ox alpaca} 14 | puts "The farm has: " + 15 | animals.en.conjunction 16 | -------------------------------------------------------------------------------- /experiments/TEMPLATE.rb.tpl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # 3 | # (>>>description<<<) 4 | # 5 | # Time-stamp: <24-Aug-2003 16:11:13 deveiant> 6 | # 7 | 8 | BEGIN { 9 | base = File::dirname( File::dirname(File::expand_path(__FILE__)) ) 10 | $LOAD_PATH.unshift "#{base}/lib" 11 | 12 | require "#{base}/utils.rb" 13 | include UtilityFunctions 14 | } 15 | 16 | try( "(>>>FILE_SANS<<<)" ) { 17 | (>>>POINT<<<) 18 | } 19 | 20 | 21 | >>>TEMPLATE-DEFINITION-SECTION<<< 22 | ("description" "Experiment description: ") 23 | 24 | 25 | -------------------------------------------------------------------------------- /examples/klingon.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -w 2 | 3 | require 'linguistics' 4 | 5 | # An example of how you'd start writing a language module that provides 6 | # Klingon-language inflecton. It's obviously not really a useful 7 | # implementation. 8 | 9 | module Linguistics::TLH 10 | 11 | # Register the module with the framework 12 | Linguistics.register_language( :tlh, self ) 13 | 14 | end 15 | 16 | 17 | if __FILE__ == $0 18 | require 'pp' 19 | Linguistics.use( :tlh, :classes => [Object] ) 20 | pp Object.new.tlh 21 | end 22 | 23 | -------------------------------------------------------------------------------- /.pryrc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # vim: set nosta noet ts=4 sw=4: 3 | 4 | BEGIN { 5 | require 'pathname' 6 | $LOAD_PATH.unshift( Pathname.new( __FILE__ ).dirname + 'lib' ) 7 | } 8 | 9 | begin 10 | require 'loggability' 11 | require 'linguistics' 12 | 13 | Loggability.level = :debug 14 | Loggability.format_with( :color ) 15 | 16 | # Linguistics.use( :en ) 17 | # Linguistics.use( :en, monkeypatch: true ) 18 | rescue Exception => err 19 | $stderr.puts "Linguistics failed to load: %p: %s" % [ err.class, err.message ] 20 | $stderr.puts( err.backtrace ) 21 | end 22 | 23 | -------------------------------------------------------------------------------- /.tm_properties: -------------------------------------------------------------------------------- 1 | # Settings 2 | projectDirectory = "$CWD" 3 | windowTitle = "${CWD/^.*\///} «$TM_DISPLAYNAME»" 4 | excludeInFileChooser = "{$exclude,.hg,pkg}" 5 | 6 | TM_RUBY = "/Users/mgranger/.rvm/bin/rvm-auto-ruby" 7 | 8 | TM_RSPEC_OPTS = '-rrspec/core/formatters/webkit -Ilib:../Mongrel2/lib' 9 | TM_RSPEC_FORMATTER = 'RSpec::Core::Formatters::WebKit' 10 | 11 | [ source.ruby ] 12 | disableIndentCorrections = true 13 | tabSize = 4 14 | softTabs = false 15 | 16 | [ source.ruby.rspec ] 17 | tabSize = 4 18 | softTabs = false 19 | 20 | 21 | -------------------------------------------------------------------------------- /History.rdoc: -------------------------------------------------------------------------------- 1 | == v2.0.2 [2013-02-27] Michael Granger 2 | 3 | - Fix for Ruby 2: don't memoize the inflector. 4 | 5 | 6 | == v2.0.1 [2013-02-25] Michael Granger 7 | 8 | - Add missing loggability dependency to the gem (fixes #3). 9 | - Adding some monkeypatch specs (refs #1), fixing some edge-case 10 | pluralizations. 11 | - Documentation fixes. 12 | 13 | 14 | == v2.0.0 [2012-10-10] Michael Granger 15 | 16 | Rewritten to be more modular, easier to extend and maintain, and to work under 17 | 1.9. 18 | 19 | 20 | === v1.0.9 [2011-09-01] Michael Granger 21 | 22 | - Bugfix for Linguistics::EN.ordinate. 23 | 24 | 25 | -------------------------------------------------------------------------------- /experiments/randobjlist.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | BEGIN { 4 | $LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib" 5 | require 'linguistics' 6 | } 7 | 8 | Linguistics::use( :en ) 9 | 10 | # Just a fun little demo of the conjunction (junction, what's your) function. 11 | 12 | MinObjects = 5 13 | MaxObjects = 35 14 | Objects = %w[ 15 | butcher baker candlestick-maker 16 | mouse clock 17 | cat fiddle cow moon dog sport dish spoon 18 | tisket tasket 19 | jack jill hill pail crown 20 | ] 21 | 22 | def randobjlist 23 | objs = [] 24 | 0.upto( rand(MaxObjects - MinObjects) + MinObjects ) do 25 | objs << Objects[ rand(Objects.nitems) - 1 ] 26 | end 27 | 28 | return objs 29 | end 30 | 31 | 32 | puts "Random object list:\n\t" + 33 | randobjlist().en.conjunction 34 | 35 | -------------------------------------------------------------------------------- /experiments/generalize.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | 4 | BEGIN { 5 | $LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib" 6 | require 'linguistics' 7 | } 8 | 9 | Linguistics::use( :en, :installProxy => :en ) 10 | 11 | # Just a bit of fun -- takes a sentence, and tries to generalize it by looking 12 | # up the hypernyms of each of the words. This'll work much better when 13 | # integration with LinkParser is added, as this doesn't know about parts of 14 | # speech or anything. 15 | 16 | if ARGV.empty? 17 | print "Sentence: " 18 | sentence = $stdin.gets 19 | else 20 | sentence = ARGV.join(" ") 21 | end 22 | 23 | newSentence = sentence.split.collect {|word| 24 | word.hypernyms ? word.hypernyms[0].words[0] : word 25 | }.join(" ") 26 | 27 | 28 | puts "Converted:\n %s\nto\n %s\n\n" % [ sentence, newSentence ] 29 | 30 | -------------------------------------------------------------------------------- /.hgsigs: -------------------------------------------------------------------------------- 1 | 401a04c4cf43f4a88093a3013003c3d4baff7a61 0 iEYEABECAAYFAkr1Ap0ACgkQ+zlz4UKpE6SmtwCfVkDwdziUnU66cKKgnU4ETNsa8UsAnRUz1k+e+m4aZTwieDU9jhJJTHbT 2 | bebbaa868974c3298865e23f1e21aeae67fb354b 0 iEYEABECAAYFAkr1CEgACgkQ+zlz4UKpE6QcYQCgycc21E8FelXeiEUXnCNg/IUQcWwAn3rChiQ41MKMX7B9EhqOb1CpycrD 3 | 8029de2f9c60345ddb5cc6bfacee3132a47c6fea 0 iEYEABECAAYFAksC148ACgkQ+zlz4UKpE6TWOACfTmF4+MNXij9OBD0ZVkduuDAlbQIAoKD2KlZYe+vWzGHc4hm+nP9jTW+X 4 | a7cda4b8747c6d34688ec97e2d721a26aab06bae 0 iEYEABECAAYFAlB1mUEACgkQ+zlz4UKpE6RXMwCcC1rJErdthKKiK0SgqNl+rF+aLywAoNmSPnI9ZVij7a/rrNvGHlMooHxL 5 | 5240c28c80bd748fee9041cdd5a00bad63c33fe0 0 iEYEABECAAYFAlEsIfsACgkQ+zlz4UKpE6RKlQCg2eD5eUiyzx6yyWriWstVGXC4kXUAoLndZG0LHKwRfvs60L3/1JdjNZvt 6 | d2eeec0b78321a8298e23599bcc05839686a1ac0 0 iEYEABECAAYFAlEuQ4sACgkQ+zlz4UKpE6QjbQCfThFNbhBvcC06wgfAoKJXA41wClYAoM4NguP0W73oMqQ4TRbufsJWpzoj 7 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # -*- ruby -*- 2 | 3 | # DO NOT EDIT THIS FILE. Instead, edit Rakefile, and run `rake bundler:gemfile`. 4 | 5 | source "https://rubygems.org/" 6 | 7 | gem "loggability", "~>0.7" 8 | 9 | gem "hoe-mercurial", "~>1.4.0", :group => [:development, :test] 10 | gem "hoe-highline", "~>0.1.0", :group => [:development, :test] 11 | gem "rdoc", "~>4.0", :group => [:development, :test] 12 | gem "hoe-deveiate", "~>0.3", :group => [:development, :test] 13 | gem "hoe-bundler", "~>1.2", :group => [:development, :test] 14 | gem "linkparser", "~>1.1", :group => [:development, :test] 15 | gem "wordnet", "~>1.0", :group => [:development, :test] 16 | gem "wordnet-defaultdb", "~>1.0", :group => [:development, :test] 17 | gem "ruby-stemmer", "~>0.9", :group => [:development, :test] 18 | gem "hoe", "~>3.7", :group => [:development, :test] 19 | 20 | # vim: syntax=ruby 21 | -------------------------------------------------------------------------------- /experiments/conjunct-with-block.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # 3 | # Written to find a minimal testcase for the #conjunction-with-block bug. 4 | # 5 | # Time-stamp: <04-Nov-2005 07:43:36 ged> 6 | # 7 | 8 | BEGIN { 9 | base = File::dirname( File::dirname(File::expand_path(__FILE__)) ) 10 | $LOAD_PATH.unshift "#{base}/lib" 11 | 12 | require "#{base}/utils.rb" 13 | include UtilityFunctions 14 | } 15 | 16 | require 'linguistics' 17 | 18 | Linguistics::use( :en, :installProxy => true ) 19 | array = %w{sheep shrew goose bear penguin barnacle sheep goose goose} 20 | 21 | $defout.puts "Called via language proxy: ", 22 | array.en.conjunction {|word| "%s-word" % [word[0,1]]} 23 | 24 | $defout.puts "Called via delegator proxy: ", 25 | array.conjunction {|word| "%s-word" % [word[0,1]]} 26 | 27 | $defout.puts "Called via language proxy: ", 28 | array.en.conjunction {|word| "%s-word" % [word[0,1]]} 29 | 30 | 31 | -------------------------------------------------------------------------------- /spec/lib/helpers.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # coding: utf-8 3 | 4 | BEGIN { 5 | require 'pathname' 6 | basedir = Pathname.new( __FILE__ ).dirname.parent 7 | 8 | libdir = basedir + "lib" 9 | 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | # SimpleCov test coverage reporting; enable this using the :coverage rake task 14 | if ENV['COVERAGE'] 15 | $stderr.puts "\n\n>>> Enabling coverage report.\n\n" 16 | require 'simplecov' 17 | SimpleCov.start do 18 | add_filter 'spec' 19 | add_group "Needing tests" do |file| 20 | file.covered_percent < 90 21 | end 22 | end 23 | end 24 | 25 | require 'linguistics' 26 | 27 | require 'rspec' 28 | require 'spec/lib/constants' 29 | require 'loggability/spechelpers' 30 | 31 | ### Mock with RSpec 32 | RSpec.configure do |c| 33 | c.mock_with( :rspec ) 34 | c.include( Loggability::SpecHelpers ) 35 | end 36 | 37 | # vim: set nosta noet ts=4 sw=4: 38 | 39 | -------------------------------------------------------------------------------- /lib/linguistics/languagebehavior.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'rspec' 4 | 5 | require 'linguistics' 6 | require 'linguistics/iso639' 7 | 8 | 9 | # This is a RSpec 2 shared behavior for language plugins. You can use this to be 10 | # sure that your language plugin conforms to the API expected by Linguistics. You'll 11 | # probably want to use it something like this: 12 | # 13 | # require 'linguistics/languagebehavior' 14 | # 15 | # describe Linguistics::KL do 16 | # 17 | # it_should_behave_like "A Linguistics language module" 18 | # 19 | # # ... any other specs for your module 20 | # 21 | # end 22 | 23 | shared_examples_for "a Linguistics language module" do 24 | 25 | let( :language_module ) do 26 | described_class 27 | end 28 | 29 | 30 | it "registers itself with the Linguistics module when required" do 31 | Linguistics.languages.values.should include( language_module ) 32 | end 33 | 34 | end 35 | 36 | 37 | -------------------------------------------------------------------------------- /.rvmrc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is an RVM Project .rvmrc file, used to automatically load the ruby 4 | # development environment upon cd'ing into the directory 5 | 6 | environment_id="ruby-2.0.0@linguistics" 7 | rvmdir=${rvm_path:-$HOME/.rvm} 8 | gemset_file=".rvm.gems" 9 | 10 | if [[ -d "${rvmdir}/environments" && -s "${rvmdir}/environments/$environment_id" ]]; then 11 | echo "Using ${environment_id}" 12 | . "${rvmdir}/environments/$environment_id" 13 | 14 | if [[ -s "${rvmdir}/hooks/after_use" ]]; then 15 | . "${rvmdir}/hooks/after_use" 16 | fi 17 | else 18 | # If the environment file has not yet been created, use the RVM CLI to select. 19 | if ! rvm --create use "$environment_id"; then 20 | echo "Failed to create RVM environment '${environment_id}'." 21 | exit 1 22 | fi 23 | fi 24 | 25 | if [[ -s "$gemset_file" ]]; then 26 | rvm gemset import "$gemset_file" 27 | fi 28 | 29 | echo "ObjectSpace.each_object.map( &:class ).en.conjunction" 30 | echo 31 | -------------------------------------------------------------------------------- /spec/linguistics/inflector_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/inflector' 18 | 19 | 20 | describe Linguistics::Inflector do 21 | 22 | before( :all ) do 23 | setup_logging( :fatal ) 24 | end 25 | 26 | after( :all ) do 27 | reset_logging() 28 | end 29 | 30 | 31 | it "provides a human-readable representation of the object suitable for debugging" do 32 | obj = Object.new 33 | result = Linguistics::Inflector.new( :en, obj ).inspect 34 | 35 | result.should include( (obj.object_id / 2).to_s(16) ) 36 | result.should =~ /english-language/i 37 | end 38 | 39 | end 40 | 41 | -------------------------------------------------------------------------------- /experiments/lafcadio_plural.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # 3 | # Experimenting with Lafcadio's pluralization algorithm 4 | # 5 | # Time-stamp: <13-Jul-2005 08:37:00 ged> 6 | # 7 | 8 | BEGIN { 9 | base = File::dirname( File::dirname(File::expand_path(__FILE__)) ) 10 | $LOAD_PATH.unshift "#{base}/lib" 11 | 12 | require "#{base}/utils.rb" 13 | include UtilityFunctions 14 | 15 | require 'linguistics' 16 | } 17 | 18 | $yaml = false 19 | Linguistics::use( :en ) 20 | 21 | def plural(singular) 22 | consonantYPattern = Regexp.new("([^aeiou])y$", Regexp::IGNORECASE) 23 | if singular =~ consonantYPattern 24 | singular.gsub consonantYPattern, '\1ies' 25 | elsif singular =~ /[xs]$/ 26 | singular + "es" 27 | else 28 | singular + "s" 29 | end 30 | end 31 | 32 | Words = %w[tree fairy address opus mythos child persona datum nucleus 33 | phenomenon commando radix seraph nexus series dais trellis ] 34 | 35 | Words.each do |word| 36 | puts "%s vs. %s" % [plural(word), word.en.plural] 37 | end 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /lib/linguistics/en/participles.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'linguistics/en' unless defined?( Linguistics::EN ) 4 | 5 | # Methods for deriving present participles for the English-language 6 | # Linguistics module. 7 | module Linguistics::EN::Participles 8 | 9 | # Register this module to the list of modules to include 10 | Linguistics::EN.register_extension( self ) 11 | 12 | 13 | ### Attempt to return the inflected string in its present participle 14 | ### form (e.g., talked -> talking). 15 | def present_participle 16 | plural = self.to_s.en.plural_verb 17 | 18 | plural.sub!( /ie$/, 'y' ) or 19 | plural.sub!( /ue$/, 'u' ) or 20 | plural.sub!( /([auy])e$/, '$1' ) or 21 | plural.sub!( /i$/, '' ) or 22 | plural.sub!( /([^e])e$/, "\\1" ) or 23 | /er$/.match( plural ) or 24 | plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" ) 25 | 26 | return "#{plural}ing" 27 | end 28 | alias_method :part_pres, :present_participle 29 | Linguistics::EN.register_lprintf_formatter :PART_PRES, :present_participle 30 | 31 | 32 | end # module Linguistics::EN::Participles 33 | 34 | -------------------------------------------------------------------------------- /experiments/lprintf.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # 3 | # Experiment to work out the implementation of sprintf-like formatting. 4 | # 5 | # Conclusion: re-opening the module doesn't work well, but I've added it to 6 | # en.rb, and it seems to work quite well for most cases. Still having trouble 7 | # with 'CONJUNCT'. 8 | # 9 | # Time-stamp: <31-Oct-2005 06:11:43 ged> 10 | # 11 | 12 | BEGIN { 13 | base = File::dirname( File::dirname(File::expand_path(__FILE__)) ) 14 | $LOAD_PATH.unshift "#{base}/lib" 15 | 16 | require "#{base}/utils.rb" 17 | include UtilityFunctions 18 | } 19 | 20 | require 'linguistics' 21 | 22 | Linguistics::use( :en, :classes => [String,Array] ) 23 | 24 | module Linguistics::EN 25 | 26 | module_function 27 | def lprintf( fmt, *args ) 28 | fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match| 29 | op = $1 30 | case op 31 | when 'PL' 32 | args.shift.en.plural 33 | when 'A', 'AN' 34 | args.shift.en.a 35 | when 'NO' 36 | args.shift.en.no 37 | when 'CONJUNCT' 38 | args.shift.en.conjunction 39 | else 40 | raise "no such formatter %p" % op 41 | end 42 | end 43 | end 44 | 45 | end 46 | 47 | try( '"How many %PL do you want?".en.lprintf("monkey")' ) 48 | 49 | 50 | -------------------------------------------------------------------------------- /lib/linguistics/monkeypatches.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -w 2 | 3 | module Linguistics 4 | 5 | ### A collection of extensions that get added to Array. 6 | module ArrayExtensions 7 | 8 | ### Returns a new Array that has had a new member inserted between all of 9 | ### the current ones. The value used is the given +value+ argument unless a 10 | ### block is given, in which case the block is called once for each pair of 11 | ### the Array, and the return value is used as the separator. 12 | def separate( *args, &block ) 13 | ary = self.dup 14 | ary.separate!( *args, &block ) 15 | return ary 16 | end 17 | 18 | ### The same as #separate, but modifies the Array in place. 19 | def separate!( *args ) 20 | raise LocalJumpError, "no block given for no-arg #separate!" if 21 | args.empty? && !block_given? 22 | value = args.first 23 | 24 | (1..( (self.length * 2) - 2 )).step(2) do |i| 25 | if block_given? 26 | self.insert( i, yield(self[i-1,2]) ) 27 | else 28 | self.insert( i, value ) 29 | end 30 | end 31 | self 32 | end 33 | 34 | end # module ArrayExtensions 35 | 36 | end # module Linguistics 37 | 38 | ### Extend Array 39 | class Array 40 | include Linguistics::ArrayExtensions 41 | end 42 | 43 | -------------------------------------------------------------------------------- /experiments/allobjlist.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | BEGIN { 4 | $LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib" 5 | require 'linguistics' 6 | } 7 | 8 | Linguistics::use( :en ) 9 | 10 | # Just a(nother) fun little demo of the conjunction (junction, what's 11 | # your) function. 12 | 13 | allobjs = [] 14 | ObjectSpace::each_object {|obj| allobjs << obj.class.name} 15 | 16 | puts "The current Ruby objectspace contains: " + 17 | allobjs.en.conjunction( :generalize => true ) 18 | 19 | 20 | # Prints: 21 | # "The current Ruby objectspace contains: thousands of Strings, thousands of 22 | # Arrays, hundreds of Hashes, hundreds of Classes, many Regexps, a number of 23 | # Ranges, a number of Modules, several Files, several Floats, several Procs, 24 | # several MatchDatas, several Objects, several IOS, a Binding, a NoMemoryError, 25 | # a SystemStackError, a fatal, a Thread, and a ThreadGroup" 26 | # 27 | 28 | # If :generalize is set to 'false', it prints: 29 | # "The current Ruby objectspace contains: 8744 Strings, 1025 Arrays, 425 30 | # Hashes, 184 Classes, 74 Regexps, 18 Ranges, 18 Modules, five Files, five 31 | # Floats, four Procs, three MatchDatas, three Objects, three IOS, a Binding, a 32 | # NoMemoryError, a SystemStackError, a fatal, a Thread, and a ThreadGroup" 33 | # 34 | 35 | -------------------------------------------------------------------------------- /spec/linguistics/monkeypatches_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/monkeypatches' 18 | 19 | 20 | describe Array, "extended with Linguistics::ArrayExtensions" do 21 | 22 | it "can return a copy of itself with a separator between each element" do 23 | ary = %w[one two three] 24 | ary.separate( 'and' ).should == [ 'one', 'and', 'two', 'and', 'three' ] 25 | end 26 | 27 | it "can return a copy of itself with each element separated by the return value of a block" do 28 | ary = %w[thumpy lippy barky tiger] 29 | result = ary.separate {|left, right| (left > right) ? '>' : '<' } 30 | result.should == [ 'thumpy', '>', 'lippy', '>', 'barky', '<', 'tiger' ] 31 | end 32 | 33 | it "provides a mutator variant of #separate" do 34 | ary = %w[one two three] 35 | result = ary.separate!( nil ) 36 | result.should equal( ary ) 37 | result.should == [ 'one', nil, 'two', nil, 'three' ] 38 | end 39 | 40 | end -------------------------------------------------------------------------------- /examples/generalize_sentence.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | BEGIN { 4 | require 'pathname' 5 | 6 | basedir = Pathname.new( __FILE__ ).dirname.parent.expand_path 7 | libdir = basedir + "lib" 8 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 9 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 10 | } 11 | 12 | require 'linguistics' 13 | require 'readline' 14 | 15 | Linguistics.use( :en, :installProxy => true ) 16 | 17 | def generalized_word( word ) 18 | $deferr.puts " Traversing hypernyms for #{word}" 19 | syn = word.synset or return word 20 | nyms = syn.traverse( :hypernyms ) 21 | return word if nyms.empty? 22 | 23 | general_subj = nyms[ nyms.length / 4 ] 24 | $deferr.puts " %d synsets returned. Picking %d (%s)" % [ 25 | nyms.length, 26 | nyms.length / 4, 27 | general_subj.words.first, 28 | ] 29 | return general_subj.words.first 30 | end 31 | 32 | while input = Readline.readline( "Sentence to generalize: " ) 33 | sent = input.sentence 34 | 35 | subj = sent.subject 36 | obj = sent.object 37 | verb = sent.verb 38 | 39 | input.sub!( /\b#{subj}\b/, generalized_word(subj) ) if subj 40 | input.sub!( /\b#{obj}\b/, generalized_word(obj) ) if obj 41 | input.sub!( /\b#{verb}\b/, generalized_word(verb) ) if verb 42 | 43 | puts input 44 | end 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /Manifest.txt: -------------------------------------------------------------------------------- 1 | ChangeLog 2 | History.rdoc 3 | LICENSE 4 | Manifest.txt 5 | README.rdoc 6 | Rakefile 7 | examples/endocs.rb 8 | examples/generalize_sentence.rb 9 | examples/klingon.rb 10 | lib/linguistics.rb 11 | lib/linguistics/en.rb 12 | lib/linguistics/en/articles.rb 13 | lib/linguistics/en/conjugation.rb 14 | lib/linguistics/en/conjunctions.rb 15 | lib/linguistics/en/infinitives.rb 16 | lib/linguistics/en/linkparser.rb 17 | lib/linguistics/en/numbers.rb 18 | lib/linguistics/en/participles.rb 19 | lib/linguistics/en/pluralization.rb 20 | lib/linguistics/en/stemmer.rb 21 | lib/linguistics/en/titlecase.rb 22 | lib/linguistics/en/wordnet.rb 23 | lib/linguistics/inflector.rb 24 | lib/linguistics/iso639.rb 25 | lib/linguistics/languagebehavior.rb 26 | lib/linguistics/monkeypatches.rb 27 | spec/lib/constants.rb 28 | spec/lib/helpers.rb 29 | spec/linguistics/en/articles_spec.rb 30 | spec/linguistics/en/conjugation_spec.rb 31 | spec/linguistics/en/conjunctions_spec.rb 32 | spec/linguistics/en/infinitives_spec.rb 33 | spec/linguistics/en/linkparser_spec.rb 34 | spec/linguistics/en/numbers_spec.rb 35 | spec/linguistics/en/participles_spec.rb 36 | spec/linguistics/en/pluralization_spec.rb 37 | spec/linguistics/en/stemmer_spec.rb 38 | spec/linguistics/en/titlecase_spec.rb 39 | spec/linguistics/en/wordnet_spec.rb 40 | spec/linguistics/en_spec.rb 41 | spec/linguistics/inflector_spec.rb 42 | spec/linguistics/iso639_spec.rb 43 | spec/linguistics/monkeypatches_spec.rb 44 | spec/linguistics_spec.rb 45 | -------------------------------------------------------------------------------- /spec/linguistics/en/participles_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rspec -cfd 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en' 18 | require 'linguistics/en/participles' 19 | 20 | 21 | describe Linguistics::EN::Participles do 22 | 23 | before( :all ) do 24 | setup_logging( :fatal ) 25 | Linguistics.use( :en ) 26 | end 27 | 28 | after( :all ) do 29 | reset_logging() 30 | end 31 | 32 | 33 | it "returns 'seeing' as the present participle for 'sees'" do 34 | "sees".en.present_participle.should == 'seeing' 35 | end 36 | 37 | it "returns 'eating' as the present participle for 'eats'" do 38 | "eats".en.present_participle.should == 'eating' 39 | end 40 | 41 | it "returns 'batting' as the present participle for 'bats'" do 42 | "bats".en.present_participle.should == 'batting' 43 | end 44 | 45 | it "returns 'hating' as the present participle for 'hates'" do 46 | "hates".en.present_participle.should == 'hating' 47 | end 48 | 49 | it "returns 'spying' as the present participle for 'spies'" do 50 | "spies".en.present_participle.should == 'spying' 51 | end 52 | 53 | 54 | end 55 | 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2003-20011, Michael Granger 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the author/s, nor the names of the project's 15 | contributors may be used to endorse or promote products derived from this 16 | software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /spec/linguistics/en/linkparser_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en/linkparser' 18 | 19 | 20 | describe Linguistics::EN::LinkParser do 21 | 22 | before( :all ) do 23 | setup_logging( :fatal ) 24 | Linguistics.use( :en ) 25 | end 26 | 27 | after( :all ) do 28 | reset_logging() 29 | end 30 | 31 | 32 | it "adds EN::LinkParser to the list of English language modules" do 33 | Linguistics::EN::MODULES.include?( Linguistics::EN::LinkParser ) 34 | end 35 | 36 | 37 | describe "on a system that has the 'linkparser' library installed" do 38 | 39 | it "can create a LinkParser::Sentence from a sentence in a string" do 40 | pending "installation of the linkparser library" unless 41 | Linguistics::EN.has_linkparser? 42 | "This is a sentence.".en.sentence.should be_a( LinkParser::Sentence ) 43 | end 44 | 45 | end 46 | 47 | 48 | describe "on a system that doesn't have the 'linkparser' library" do 49 | it "raises an NotImplementedError when you try to use linkparser functionality" do 50 | 51 | # If the system *does* have linkparser support, pretend it doesn't. 52 | if Linguistics::EN.has_linkparser? 53 | Linguistics::EN::LinkParser.stub( :has_linkparser? ).and_return( false ) 54 | exception = stub( "linkparser load error", :message => 'no such file to load' ) 55 | Linguistics::EN::LinkParser.stub( :lp_error ).and_return( exception ) 56 | end 57 | 58 | expect { 59 | "This is a sentence.".en.sentence 60 | }.to raise_error( NotImplementedError, /not loaded/i ) 61 | end 62 | 63 | end 64 | 65 | end 66 | 67 | -------------------------------------------------------------------------------- /spec/linguistics/en/stemmer_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en' 18 | require 'linguistics/en/stemmer' 19 | 20 | 21 | describe Linguistics::EN::Stemmer do 22 | 23 | before( :all ) do 24 | setup_logging() 25 | Linguistics.use( :en ) 26 | end 27 | 28 | after( :all ) do 29 | reset_logging() 30 | end 31 | 32 | 33 | it "adds EN::Stemmer to the list of English language modules" do 34 | Linguistics::EN::MODULES.include?( Linguistics::EN::Stemmer ) 35 | end 36 | 37 | 38 | describe "on a system that has the 'ruby-stemmer' library installed" do 39 | 40 | before( :each ) do 41 | pending "installation of the ruby-stemmer library" unless 42 | Linguistics::EN.has_stemmer? 43 | end 44 | 45 | it "can fetch the stem of a word" do 46 | "communication".en.stem.should == 'communic' 47 | end 48 | 49 | end 50 | 51 | 52 | describe "on a system that doesn't have the 'ruby-stemmer' library" do 53 | 54 | before( :all ) do 55 | # If the system *does* have stemmer support, pretend it doesn't. 56 | if Linguistics::EN.has_stemmer? 57 | error = LoadError.new( "simulated exception: no such file to load -- lingua/stemmer" ) 58 | Linguistics::EN::Stemmer.instance_variable_set( :@has_stemmer, false ) 59 | Linguistics::EN::Stemmer.instance_variable_set( :@stemmer_error, error ) 60 | end 61 | end 62 | 63 | it "raises an NotImplementedError when you try to use stemmer functionality" do 64 | expect { 65 | "communication".en.stem 66 | }.to raise_error( LoadError, %r{lingua/stemmer}i ) 67 | end 68 | 69 | end 70 | 71 | end 72 | 73 | -------------------------------------------------------------------------------- /spec/linguistics_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | 18 | 19 | describe Linguistics do 20 | 21 | before( :all ) do 22 | setup_logging() 23 | end 24 | 25 | after( :each ) do 26 | reset_logging() 27 | end 28 | 29 | 30 | describe "version methods" do 31 | 32 | it "returns a version string if asked" do 33 | Linguistics.version_string.should =~ /\w+ [\d.]+/ 34 | end 35 | 36 | it "returns a version string with a build number if asked" do 37 | Linguistics.version_string(true).should =~ /\w+ [\d.]+ \(build [[:xdigit:]]+\)/ 38 | end 39 | end 40 | 41 | 42 | describe "language-loading functions" do 43 | 44 | it "load a language's linguistic functions via variants of its ISO639 code" do 45 | testclass = Class.new 46 | Linguistics.use( :eng, :classes => testclass ).should == [ testclass ] 47 | testclass.new.should respond_to( :eng ) 48 | testclass.new.should respond_to( :en ) 49 | end 50 | 51 | it "load a language's linguistic functions via the 2-letter variant of its ISO639 code" do 52 | testclass = Class.new 53 | Linguistics.use( :en, :classes => testclass ).should == [ testclass ] 54 | testclass.new.should respond_to( :eng ) 55 | testclass.new.should respond_to( :en ) 56 | end 57 | 58 | it "default to extending a default set of classes" do 59 | Linguistics.use( :eng ).should == Linguistics::DEFAULT_EXT_CLASSES 60 | [].should respond_to( :eng ) 61 | end 62 | 63 | it "raise an error when a language that doesn't exist is requested" do 64 | expect { 65 | Linguistics.use( :zz ) 66 | }.to raise_error( RuntimeError, /unknown ISO639-2 language code/i ) 67 | end 68 | 69 | it "raise an error for valid languages that don't have any linguistic functions to load" do 70 | expect { 71 | Linguistics.use( :ja ) 72 | }.to raise_error( LoadError, /failed to load a language extension/i ) 73 | end 74 | 75 | end 76 | 77 | end 78 | -------------------------------------------------------------------------------- /spec/linguistics/en/wordnet_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en' 18 | require 'linguistics/en/wordnet' 19 | 20 | 21 | describe Linguistics::EN::WordNet do 22 | 23 | before( :all ) do 24 | setup_logging() 25 | Linguistics.use( :en ) 26 | end 27 | 28 | after( :all ) do 29 | reset_logging() 30 | end 31 | 32 | 33 | it "adds EN::WordNet to the list of English language modules" do 34 | Linguistics::EN::MODULES.include?( Linguistics::EN::WordNet ) 35 | end 36 | 37 | 38 | describe "on a system that has the 'wordnet' library installed" do 39 | 40 | before( :each ) do 41 | pending "installation of the wordnet library" unless 42 | Linguistics::EN.has_wordnet? 43 | end 44 | 45 | it "can create a WordNet::Synset from a word" do 46 | "jackal".en.synset.should be_a( WordNet::Synset ) 47 | end 48 | 49 | it "can load all synsets for a word" do 50 | result = "appear".en.synsets 51 | result.should have( 7 ).members 52 | result.should include( WordNet::Synset[200422090] ) 53 | end 54 | 55 | end 56 | 57 | 58 | describe "on a system that doesn't have the 'wordnet' library" do 59 | before( :all ) do 60 | # If the system *does* have wordnet support, pretend it doesn't. 61 | if Linguistics::EN.has_wordnet? 62 | @had_wordnet = true 63 | error = LoadError.new( "no such file to load -- wordnet" ) 64 | Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, false ) 65 | Linguistics::EN::WordNet.instance_variable_set( :@wn_error, error ) 66 | end 67 | end 68 | 69 | after( :all ) do 70 | if @had_wordnet 71 | Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, true ) 72 | Linguistics::EN::WordNet.instance_variable_set( :@wn_error, nil ) 73 | end 74 | end 75 | 76 | it "raises the appropriate LoadError when you try to use wordnet functionality" do 77 | expect { 78 | "persimmon".en.synset 79 | }.to raise_error( LoadError, %r{wordnet}i ) 80 | end 81 | 82 | end 83 | 84 | end 85 | 86 | -------------------------------------------------------------------------------- /lib/linguistics/en/stemmer.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'linguistics' unless defined?( Linguistics ) 4 | require 'linguistics/en' unless defined?( Linguistics::EN ) 5 | 6 | # Ruby-Stemmer support for the English-language Linguistics module. It 7 | # requires the Ruby-Stemmer gem to be installed; if it is not 8 | # installed, calling the functions defined by this file will raise 9 | # NotImplementedErrors. 10 | # 11 | # # Test to be sure the Stemmer gem loaded okay. 12 | # Linguistics::EN.has_stemmer? 13 | # # => true 14 | # 15 | module Linguistics::EN::Stemmer 16 | 17 | # Module instance variables -- copied over to the EN module when registered 18 | @has_stemmer = false 19 | @stemmer_error = nil 20 | @stemmer = nil 21 | 22 | # Load Ruby-Stemmer if possible, saving the error that occurs if anything goes wrong. 23 | begin 24 | require 'lingua/stemmer' 25 | @has_stemmer = true 26 | rescue LoadError => err 27 | @stemmer_error = err 28 | end 29 | 30 | 31 | # Container for methods intended to extend the EN module as singleton methods. 32 | module SingletonMethods 33 | 34 | ### Returns +true+ if Ruby-Stemmer was loaded okay 35 | def has_stemmer? ; @has_stemmer; end 36 | 37 | ### If #has_stemmer? returns +false+, this can be called to fetch the 38 | ### exception which was raised when Ruby-Stemmer was loaded. 39 | def stemmer_error ; @stemmer_error; end 40 | 41 | end # module SingletonMethods 42 | extend SingletonMethods 43 | 44 | 45 | # Register this module to the list of modules to include 46 | Linguistics::EN.register_extension( self ) 47 | 48 | ################################################################# 49 | ### M O D U L E M E T H O D S 50 | ################################################################# 51 | 52 | ### The instance of the Lingua::Stemmer used for all Linguistics Stemmer 53 | ### functions. 54 | def self::stemmer 55 | raise self.stemmer_error unless self.has_stemmer? 56 | @stemmer ||= Lingua::Stemmer.new 57 | end 58 | 59 | 60 | ################################################################# 61 | ### S T E M M E R I N T E R F A C E 62 | ################################################################# 63 | 64 | ###### 65 | public 66 | ###### 67 | 68 | 69 | ### Return the stem of the receiving word. 70 | def stem 71 | return Linguistics::EN::Stemmer.stemmer.stem( self.obj.to_s ) 72 | end 73 | 74 | end # module Linguistics::EN::Stemmer 75 | 76 | -------------------------------------------------------------------------------- /lib/linguistics/inflector.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # coding: utf-8 3 | 4 | require 'loggability' 5 | require 'linguistics' unless defined?( Linguistics ) 6 | 7 | # A facade object that acts as the extension point for linguistic modules 8 | # for a single language. A single instance of an inflector is generated 9 | # for an object that has been extended with a Linguistics language 10 | # the first time the language is used. 11 | class Linguistics::Inflector 12 | extend Loggability 13 | 14 | 15 | # Loggability API -- log to the linguistics logger 16 | log_to :linguistics 17 | 18 | 19 | ### Create a new inflector for +obj+. 20 | def initialize( language_code, obj ) 21 | raise TypeError, "can't inflect for another inflector!" if 22 | obj.is_a?( Linguistics::Inflector ) 23 | @language_code = language_code 24 | @obj = obj 25 | super() 26 | end 27 | 28 | 29 | ###### 30 | public 31 | ###### 32 | 33 | # The object the inflector is delegating for 34 | attr_reader :obj 35 | 36 | # The inflector's language code 37 | attr_reader :language_code 38 | 39 | 40 | ### Return the english-language name of the language the inflector is delegating 41 | ### for. 42 | def language 43 | ::Linguistics::ISO639::LANGUAGE_CODES[ self.language_code.to_sym ][:eng_name] 44 | end 45 | 46 | 47 | ### Returns +true+ if either the inflector or the object it's wrapping respond to 48 | ### the specified +message+. 49 | def respond_to_missing?( message, include_priv=false ) 50 | return self.obj.respond_to?( message, include_priv ) 51 | end 52 | 53 | 54 | ### Return the target object as a String. 55 | def to_s 56 | return self.obj.to_s 57 | end 58 | 59 | 60 | ### Return the target object as an Integer 61 | def to_i 62 | return self.obj.to_i 63 | end 64 | 65 | 66 | ### Output a programmer-readable representation of the object suitable for debugging. 67 | def inspect 68 | return "#<(%s-language inflector) for <%s:0x%0x> >" % [ 69 | self.language, 70 | @obj.class, 71 | @obj.object_id / 2 72 | ] 73 | end 74 | 75 | 76 | ######### 77 | protected 78 | ######### 79 | 80 | ### Delegate missing methods to the target object. 81 | def method_missing( sym, *args, &block ) 82 | return super unless self.obj.respond_to?( sym ) 83 | meth = self.obj.method( sym ) 84 | self.singleton_class.send( :define_method, sym, &meth ) 85 | return self.method( sym ).call( *args, &block ) 86 | end 87 | 88 | end # class Linguistics::Inflector 89 | 90 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | 3 | begin 4 | require 'rspec' 5 | require 'rspec/core/rake_task' 6 | rescue LoadError 7 | abort "This Rakefile requires RSpec. Try again after doing 'gem install rspec'" 8 | end 9 | 10 | begin 11 | require 'hoe' 12 | rescue LoadError 13 | abort "This Rakefile requires Hoe. Try again after doing 'gem install hoe'" 14 | end 15 | 16 | # The path to the generated .gemspec file 17 | GEMSPEC = '.gemspec' 18 | 19 | Hoe.plugin :mercurial 20 | Hoe.plugin :bundler 21 | Hoe.plugin :publish 22 | Hoe.plugin :signing 23 | 24 | Hoe.plugins.delete :rubyforge 25 | 26 | hoespec = Hoe.spec 'linguistics' do |spec| 27 | spec.name = 'linguistics' 28 | spec.readme_file = 'README.rdoc' 29 | spec.history_file = 'History.rdoc' 30 | spec.extra_rdoc_files = FileList[ '*.rdoc' ] 31 | spec.license 'BSD' 32 | 33 | spec.developer 'Michael Granger', 'ged@FaerieMUD.org' 34 | 35 | spec.dependency 'loggability', '~> 0.7' 36 | 37 | spec.dependency 'hoe-deveiate', '~> 0.3', :development 38 | spec.dependency 'hoe-bundler', '~> 1.2', :development 39 | spec.dependency 'linkparser', '~> 1.1', :development 40 | spec.dependency 'wordnet', '~> 1.0', :development 41 | spec.dependency 'wordnet-defaultdb', '~> 1.0', :development 42 | spec.dependency 'ruby-stemmer', '~> 0.9', :development 43 | 44 | spec.spec_extras[:rdoc_options] = ['-f', 'fivefish', '-t', 'Ruby Linguistics Toolkit'] 45 | spec.spec_extras[:post_install_message] = [ 46 | "This library also presents tie-ins for the 'linkparser' and", 47 | "'wordnet' libraries, which you can enable by installing the", 48 | "gems of the same name." 49 | ].join( "\n" ) 50 | 51 | spec.require_ruby_version( '>=1.9.3' ) 52 | spec.hg_sign_tags = true if spec.respond_to?( :hg_sign_tags= ) 53 | spec.check_history_on_release = true if spec.respond_to?( :check_history_on_release= ) 54 | 55 | spec.rdoc_locations << "deveiate:/usr/local/www/public/code/#{remote_rdoc_dir}" 56 | end 57 | 58 | ENV['VERSION'] ||= hoespec.spec.version.to_s 59 | 60 | task 'hg:precheckin' => [ :check_history, :check_manifest, :spec ] 61 | 62 | desc "Build a coverage report" 63 | task :coverage do 64 | ENV["COVERAGE"] = 'yes' 65 | Rake::Task[:spec].invoke 66 | end 67 | 68 | 69 | desc "generate a gemspec from your Hoe.spec" 70 | file GEMSPEC => 'Rakefile' do |task| 71 | spec = hoespec.spec.dup 72 | spec.files.delete( '.gemtest' ) 73 | spec.version = "#{spec.version}.pre.#{Time.now.strftime("%Y%m%d%H%M%S")}" 74 | File.open( task.name, 'w' ) do |fh| 75 | fh.write( spec.to_ruby ) 76 | end 77 | end 78 | 79 | -------------------------------------------------------------------------------- /spec/linguistics/en_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en' 18 | require 'linguistics/languagebehavior' 19 | 20 | 21 | describe Linguistics::EN do 22 | 23 | before( :all ) do 24 | setup_logging( :fatal ) 25 | Linguistics.use( :en, :proxy => true ) 26 | include Linguistics::EN 27 | end 28 | 29 | after( :all ) do 30 | reset_logging() 31 | end 32 | 33 | 34 | it_behaves_like "a Linguistics language module" 35 | 36 | 37 | it "provides a predicate for testing for the presence of modules by name" do 38 | Linguistics::EN.should_not have_extension( 'nonexistant' ) 39 | Linguistics::EN.should have_extension( 'articles' ) 40 | end 41 | 42 | it "knows that it's not in 'classical' mode by default" do 43 | Linguistics::EN.should_not be_classical() 44 | end 45 | 46 | it "can run a single block in classical mode" do 47 | Linguistics::EN.in_classical_mode do 48 | Linguistics::EN.should be_classical() 49 | end 50 | end 51 | 52 | it "handles nested classical blocks correctly" do 53 | Linguistics::EN.in_classical_mode do 54 | Linguistics::EN.in_classical_mode do 55 | Linguistics::EN.should be_classical() 56 | end 57 | Linguistics::EN.should be_classical() 58 | end 59 | Linguistics::EN.should_not be_classical() 60 | end 61 | 62 | 63 | it "provides a sprintf-like function for interpolating variables into a String" do 64 | "I have %CONJUNCT.".en.lprintf( ["cat", "cat", "dog"] ). 65 | should == "I have two cats and a dog." 66 | end 67 | 68 | 69 | context "lprintf formatters" do 70 | 71 | before( :all ) do 72 | @real_formatters = Linguistics::EN.lprintf_formatters 73 | end 74 | 75 | before( :each ) do 76 | Linguistics::EN.lprintf_formatters.clear 77 | end 78 | 79 | after( :all ) do 80 | Linguistics::EN.lprintf_formatters.replace( @real_formatters ) 81 | end 82 | 83 | 84 | it "provides a way to register new lprintf formatters with a Symbol" do 85 | Linguistics::EN.register_lprintf_formatter :TEST, :plural 86 | Linguistics::EN.lprintf_formatters.should have( 1 ).member 87 | Linguistics::EN.lprintf_formatters.should include( :TEST ) 88 | Linguistics::EN.lprintf_formatters[ :TEST ].should be_a( Proc ) 89 | end 90 | 91 | end 92 | end 93 | 94 | -------------------------------------------------------------------------------- /spec/linguistics/iso639_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | #encoding: utf-8 3 | 4 | BEGIN { 5 | require 'pathname' 6 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent 7 | 8 | libdir = basedir + "lib" 9 | 10 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 11 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 12 | } 13 | 14 | require 'rspec' 15 | require 'spec/lib/helpers' 16 | 17 | require 'linguistics' 18 | require 'linguistics/iso639' 19 | 20 | 21 | describe Linguistics::ISO639 do 22 | 23 | # eng||en|English|anglais 24 | it "loads simple language codes from its __DATA__ section" do 25 | Linguistics::LANGUAGE_CODES.should have_key( :en ) 26 | Linguistics::LANGUAGE_CODES[ :en ].should have(3).members 27 | 28 | Linguistics::LANGUAGE_CODES[ :en ].should have_key( :codes ) 29 | Linguistics::LANGUAGE_CODES[ :en ][:codes].should have(2).members 30 | Linguistics::LANGUAGE_CODES[ :en ][:codes].should include("en", "eng") 31 | 32 | Linguistics::LANGUAGE_CODES[ :en ].should have_key( :eng_name ) 33 | Linguistics::LANGUAGE_CODES[ :en ][:eng_name].should == 'English' 34 | Linguistics::LANGUAGE_CODES[ :en ].should have_key( :fre_name ) 35 | Linguistics::LANGUAGE_CODES[ :en ][:fre_name].should == 'anglais' 36 | end 37 | 38 | it "loads language codes with variants from its __DATA__ section" do 39 | 40 | # cze|ces|cs|Czech|tchèque 41 | Linguistics::LANGUAGE_CODES.should have_key( :cs ) 42 | Linguistics::LANGUAGE_CODES[ :cs ].should have(3).members 43 | 44 | Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :codes ) 45 | Linguistics::LANGUAGE_CODES[ :cs ][:codes].should have(3).members 46 | Linguistics::LANGUAGE_CODES[ :cs ][:codes].should include("cs", "ces", "cze") 47 | 48 | Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :eng_name ) 49 | Linguistics::LANGUAGE_CODES[ :cs ][:eng_name].should == 'Czech' 50 | Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :fre_name ) 51 | Linguistics::LANGUAGE_CODES[ :cs ][:fre_name].should == 'tchèque' 52 | 53 | # mac|mkd|mk|Macedonian|macédonien 54 | Linguistics::LANGUAGE_CODES.should have_key( :mk ) 55 | Linguistics::LANGUAGE_CODES[ :mk ].should have( 3 ).members 56 | 57 | Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :codes ) 58 | Linguistics::LANGUAGE_CODES[ :mk ][:codes].should have(3).members 59 | Linguistics::LANGUAGE_CODES[ :mk ][:codes].should include("mk", "mac", "mkd") 60 | 61 | Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :eng_name ) 62 | Linguistics::LANGUAGE_CODES[ :mk ][:eng_name].should == 'Macedonian' 63 | Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :fre_name ) 64 | Linguistics::LANGUAGE_CODES[ :mk ][:fre_name].should == 'macédonien' 65 | 66 | end 67 | 68 | end 69 | -------------------------------------------------------------------------------- /lib/linguistics/en/linkparser.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'linguistics/en' unless defined?( Linguistics::EN ) 4 | 5 | # LinkParser support for the English-language Linguistics module. 6 | # LinkParser enables grammatic queries of English language sentences. 7 | # 8 | # # Test to see whether or not the link parser is loaded. 9 | # Linguistics::EN.has_link_parser? 10 | # # => true 11 | # 12 | # # Diagram the first linkage for a test sentence 13 | # puts "he is a big dog".en.sentence.linkages.first.to_s 14 | # +---O*---+ 15 | # | +--Ds--+ 16 | # +Ss+ | +-A-+ 17 | # | | | | | 18 | # he is a big dog 19 | # 20 | # # Find the verb in the sentence 21 | # "he is a big dog".en.sentence.verb.to_s 22 | # # => "is" 23 | # 24 | # # Combined infinitive + LinkParser: Find the infinitive form of the verb of the 25 | # given sentence. 26 | # "he is a big dog".en.sentence.verb.infinitive 27 | # # => "be" 28 | # 29 | # # Find the direct object of the sentence 30 | # "he is a big dog".en.sentence.object.to_s 31 | # # => "dog" 32 | # 33 | # # Combine WordNet + LinkParser to find the definition of the direct object of 34 | # # the sentence 35 | # "he is a big dog".en.sentence.object.gloss 36 | # # => "a member of the genus Canis (probably descended from the common wolf) that 37 | # has been domesticated by man since prehistoric times; occurs in many breeds; 38 | # \"the dog barked all night\"" 39 | # 40 | module Linguistics::EN::LinkParser 41 | 42 | @has_linkparser = false 43 | @lp_dict = nil 44 | @lp_error = nil 45 | 46 | begin 47 | require "linkparser" 48 | @has_linkparser = true 49 | rescue LoadError => err 50 | @lp_error = err 51 | end 52 | 53 | 54 | # Container for methods intended to extend the EN module as singleton methods. 55 | module SingletonMethods 56 | 57 | ### Returns +true+ if WordNet was loaded okay 58 | def has_linkparser? ; @has_linkparser; end 59 | 60 | ### If #has_linkparser? returns +false+, this can be called to fetch the 61 | ### exception which was raised when WordNet was loaded. 62 | def linkparser_error ; @lp_error; end 63 | 64 | end # module SingletonMethods 65 | extend SingletonMethods 66 | 67 | 68 | # Register this module to the list of modules to include 69 | Linguistics::EN.register_extension( self ) 70 | 71 | ################################################################# 72 | ### M O D U L E M E T H O D S 73 | ################################################################# 74 | 75 | ### The instance of LinkParser used for all Linguistics LinkParser 76 | ### functions. 77 | def self::lp_dict 78 | if !self.has_linkparser? 79 | raise NotImplementedError, 80 | "LinkParser functions are not loaded: %s" % 81 | self.lp_error.message 82 | end 83 | 84 | return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 ) 85 | end 86 | 87 | 88 | ################################################################# 89 | ### L I N K P A R S E R I N T E R F A C E 90 | ################################################################# 91 | 92 | ###### 93 | public 94 | ###### 95 | 96 | ### Return a LinkParser::Sentence for the stringified +obj+. 97 | def sentence 98 | return Linguistics::EN::LinkParser.lp_dict.parse( self.to_s ) 99 | end 100 | 101 | end # class Linguistics::EN::LinkParser 102 | -------------------------------------------------------------------------------- /.irbrc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby -*- ruby -*- 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.expand_path 6 | libdir = basedir + "lib" 7 | 8 | puts ">>> Adding #{libdir} to load path..." 9 | $LOAD_PATH.unshift( libdir.to_s ) 10 | } 11 | 12 | require 'English' 13 | 14 | # Set some ANSI escape code constants (Shamelessly stolen from Perl's 15 | # Term::ANSIColor by Russ Allbery and Zenin 16 | ANSI_ATTRIBUTES = { 17 | 'clear' => 0, 18 | 'reset' => 0, 19 | 'bold' => 1, 20 | 'dark' => 2, 21 | 'underline' => 4, 22 | 'underscore' => 4, 23 | 'blink' => 5, 24 | 'reverse' => 7, 25 | 'concealed' => 8, 26 | 27 | 'black' => 30, 'on_black' => 40, 28 | 'red' => 31, 'on_red' => 41, 29 | 'green' => 32, 'on_green' => 42, 30 | 'yellow' => 33, 'on_yellow' => 43, 31 | 'blue' => 34, 'on_blue' => 44, 32 | 'magenta' => 35, 'on_magenta' => 45, 33 | 'cyan' => 36, 'on_cyan' => 46, 34 | 'white' => 37, 'on_white' => 47 35 | } 36 | 37 | ### Create a string that contains the ANSI codes specified and return it 38 | def ansi_code( *attributes ) 39 | attributes.flatten! 40 | attributes.collect! {|at| at.to_s } 41 | # $stderr.puts "Returning ansicode for TERM = %p: %p" % 42 | # [ ENV['TERM'], attributes ] 43 | return '' unless /(?:vt10[03]|xterm(?:-color)?|linux|screen)/i =~ ENV['TERM'] 44 | attributes = ANSI_ATTRIBUTES.values_at( *attributes ).compact.join(';') 45 | 46 | # $stderr.puts " attr is: %p" % [attributes] 47 | if attributes.empty? 48 | return '' 49 | else 50 | return "\e[%sm" % attributes 51 | end 52 | end 53 | 54 | 55 | ### Colorize the given +string+ with the specified +attributes+ and return it, handling 56 | ### line-endings, color reset, etc. 57 | def colorize( *args ) 58 | string = '' 59 | 60 | if block_given? 61 | string = yield 62 | else 63 | string = args.shift 64 | end 65 | 66 | ending = string[/(\s)$/] || '' 67 | string = string.rstrip 68 | 69 | return ansi_code( args.flatten ) + string + ansi_code( 'reset' ) + ending 70 | end 71 | 72 | 73 | ### Try to match the specified +str+ with the given +re+, printing out the result. 74 | def try_regexp( str, re ) 75 | if str =~ re 76 | puts " #$PREMATCH", 77 | " " + colorize( 'bold', 'green' ) { $MATCH }, 78 | " #$POSTMATCH" 79 | else 80 | puts colorize( "Nope.", 'red' ) 81 | end 82 | end 83 | 84 | IRB.conf[:PROMPT][:manual] = { 85 | :PROMPT_I => "irb> ", 86 | :PROMPT_S => "... ", 87 | :PROMPT_C => "* ", 88 | :RETURN => "# => %s\n" # format to return value 89 | } 90 | IRB.conf[:PROMPT_MODE] = :manual 91 | 92 | # class FilteringOutputMethod < IRB::OutputMethod 93 | # 94 | # REPLACEMENTS = { 95 | # /\blaika\b/i => 'acme', 96 | # /\bljc\b/i => 'sales', 97 | # /\badtech2\b/i => 'marketing', 98 | # } 99 | # 100 | # def print( *opts ) 101 | # opts.each do |opt| 102 | # REPLACEMENTS.each do |pat, repl| 103 | # opt.gsub!( pat, repl ) 104 | # end 105 | # $stdout.print( opt ) 106 | # end 107 | # end 108 | # end 109 | # IRB.conf[:OUTPUT_MODE] 110 | 111 | 112 | begin 113 | $stderr.puts "Loading Linguistics..." 114 | require 'linguistics' 115 | Linguistics.use( :en ) 116 | rescue => e 117 | $stderr.puts "Ack! Linguistics library failed to load: #{e.message}\n\t" + 118 | e.backtrace.join( "\n\t" ) 119 | end 120 | 121 | -------------------------------------------------------------------------------- /lib/linguistics/en/articles.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'linguistics/en' unless defined?( Linguistics::EN ) 4 | 5 | # Indefinite article methods for the English-language Linguistics module. 6 | module Linguistics::EN::Articles 7 | 8 | # Register this module to the list of modules to include 9 | Linguistics::EN.register_extension( self ) 10 | 11 | 12 | # This pattern matches strings of capitals starting with a "vowel-sound" 13 | # consonant followed by another consonant, and which are not likely 14 | # to be real words (oh, all right then, it's just magic!) 15 | A_abbrev = %r{ 16 | ^( 17 | (?! 18 | FJO | 19 | [HLMNS]Y. | 20 | RY[EO] | 21 | SQU | 22 | ( 23 | F[LR]? | 24 | [HL] | 25 | MN? | 26 | N | 27 | RH? | 28 | S[CHKLMNPTVW]? | 29 | X(YL)? 30 | ) [AEIOU] 31 | ) 32 | [FHLMNRSX][A-Z] 33 | ) 34 | }x 35 | 36 | # This pattern codes the beginnings of all english words begining with a 37 | # 'y' followed by a consonant. Any other y-consonant prefix therefore 38 | # implies an abbreviation. 39 | A_y_cons = %r{^(y(?:b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt))}i 40 | 41 | # Exceptions to exceptions 42 | A_explicit_an = Regexp.union( /euler/i, /hour(?!i)/i, /heir/i, /honest/i, /hono/i ) 43 | 44 | # Words which always indicate zero quantity 45 | PL_count_zero = Regexp.union( "0", "no", "zero", "nil" ) 46 | 47 | 48 | ### Returns the given word with a prepended indefinite article, unless 49 | ### +count+ is non-nil and not singular. 50 | def indef_article( count=nil ) 51 | word = self.to_s 52 | 53 | self.log.debug "Fetching the indefinite article for %p (count = %p)" % [ word, count ] 54 | return "#{count} #{word}" if 55 | count && /^(#{PL_count_one})$/i !~ count.to_s 56 | 57 | # Handle user-defined variants 58 | # return value if value = ud_match( word, A_a_user_defined ) 59 | 60 | self.log.debug " count wasn't a definite singular countword" 61 | case word 62 | 63 | # Handle special cases 64 | when /^(#{A_explicit_an})/i 65 | return "an #{word}" 66 | 67 | # Handle abbreviations 68 | when A_abbrev 69 | return "an #{word}" 70 | when /^[aefhilmnorsx][.-]/i 71 | return "an #{word}" 72 | when /^[a-z][.-]/i 73 | return "a #{word}" 74 | 75 | # Handle consonants 76 | when /^[^aeiouy]/i 77 | return "a #{word}" 78 | 79 | # Handle special vowel-forms 80 | when /^e[uw]/i 81 | return "a #{word}" 82 | when /^onc?e\b/i 83 | return "a #{word}" 84 | when /^uni([^nmd]|mo)/i 85 | return "a #{word}" 86 | when /^u[bcfhjkqrst][aeiou]/i 87 | return "a #{word}" 88 | 89 | # Handle vowels 90 | when /^[aeiou]/i 91 | return "an #{word}" 92 | 93 | # Handle y... (before certain consonants implies (unnaturalized) "i.." sound) 94 | when A_y_cons 95 | return "an #{word}" 96 | 97 | # Otherwise, guess "a" 98 | else 99 | return "a #{word}" 100 | end 101 | end 102 | 103 | 104 | ### Return the inflected phrase with the appropriate indefinite article ("a" or 105 | ### "an") prepended. 106 | def a( count=nil ) 107 | count ||= 1 108 | phrase = self.to_s 109 | 110 | md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase ) 111 | pre, word, post = md.to_a[1,3] 112 | return phrase if word.nil? or word.empty? 113 | 114 | result = word.en.indef_article 115 | return pre + result + post 116 | end 117 | alias_method :an, :a 118 | Linguistics::EN.register_lprintf_formatter :A, :a 119 | Linguistics::EN.register_lprintf_formatter :AN, :a 120 | 121 | 122 | ### Translate zero-quantified +phrase+ to "no +phrase.plural+" 123 | def no( count=nil ) 124 | phrase = self.to_s 125 | md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase ) 126 | pre, word, post = md.to_a[1,3] 127 | count ||= 0 128 | 129 | unless /^#{PL_count_zero}$/ =~ count.to_s 130 | return "#{pre}#{count} " + plural( word, count ) + post 131 | else 132 | return "#{pre}no " + word.en.plural( 0 ) + post 133 | end 134 | end 135 | Linguistics::EN.register_lprintf_formatter :NO, :no 136 | 137 | end # module Linguistics::EN::Articles 138 | 139 | -------------------------------------------------------------------------------- /lib/linguistics/en/titlecase.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'linguistics/en' unless defined?( Linguistics::EN ) 4 | 5 | # Methods for capitalizing a sentence as a title, nouns as proper 6 | # nouns, and for turning a sentence into its equivalent CamelCaseSentence 7 | # and vice-versa. It's part of the English-language Linguistics module. 8 | module Linguistics::EN::TitleCase 9 | 10 | # Register this module to the list of modules to include 11 | Linguistics::EN.register_extension( self ) 12 | 13 | 14 | # Exceptions: Indefinite articles 15 | ARTICLES = %w[a and the] 16 | 17 | # Exceptions: Prepositions shorter than five letters 18 | SHORT_PREPOSITIONS = ["amid", "at", "but", "by", "down", "for", "from", "in", 19 | "into", "like", "near", "of", "off", "on", "onto", "out", "over", 20 | "past", "save", "with", "till", "to", "unto", "up", "upon", "with"] 21 | 22 | # Exceptions: Coordinating conjunctions 23 | COORD_CONJUNCTIONS = %w[and but as] 24 | 25 | # Titlecase exceptions: "In titles, capitalize the first word, the 26 | # last word, and all words in between except articles (a, an, and 27 | # the), prepositions under five letters (in, of, to), and coordinating 28 | # conjunctions (and, but). These rules apply to titles of long, short, 29 | # and partial works as well as your own papers" (Anson, Schwegler, 30 | # and Muth. The Longman Writer's Companion 240). 31 | TITLE_CASE_EXCEPTIONS = ARTICLES | SHORT_PREPOSITIONS | COORD_CONJUNCTIONS 32 | 33 | # The words which don't get capitalized in a compound proper noun 34 | PROPER_NOUN_EXCEPTIONS = %w{and the of} 35 | 36 | 37 | 38 | ### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English 39 | ### ("camel case to english"). Each word is decapitalized. 40 | def un_camel_case 41 | self.to_s. 42 | gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }. 43 | gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase 44 | end 45 | 46 | 47 | ### Turns an English language +string+ into a CamelCase word. 48 | def to_camel_case 49 | self.to_s.gsub( /\s+([a-z])/i ) { $1.upcase } 50 | end 51 | 52 | 53 | ### Returns the inflected object as a title-cased String. 54 | ### 55 | ### Some examples: 56 | ### 57 | ### "a portrait of the artist as a young man".en.titlecase 58 | ### # => "A Portrait of the Artist as a Young Man" 59 | ### 60 | ### "a seven-sided romance".en.titlecase 61 | ### # => "A Seven-Sided Romance" 62 | ### 63 | ### "the curious incident of the dog in the night-time".en.titlecase 64 | ### # => "The Curious Incident of the Dog in the Night-Time" 65 | ### 66 | ### "the rats of n.i.m.h.".en.titlecase 67 | ### # => "The Rats of N.I.M.H." 68 | def titlecase 69 | 70 | # Split on word-boundaries 71 | words = self.to_s.split( /\b/ ) 72 | 73 | # Always capitalize the first and last words 74 | words.first.capitalize! 75 | words.last.capitalize! 76 | 77 | # Now scan the rest of the tokens, skipping non-words and capitalization 78 | # exceptions. 79 | words.each_with_index do |word, i| 80 | 81 | # Non-words 82 | next unless /^\w+$/.match( word ) 83 | 84 | # Skip exception-words 85 | next if TITLE_CASE_EXCEPTIONS.include?( word ) 86 | 87 | # Skip second parts of contractions 88 | next if words[i - 1] == "'" && /\w/.match( words[i - 2] ) 89 | 90 | # Have to do it this way instead of capitalize! because that method 91 | # also downcases all other letters. 92 | word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 } 93 | end 94 | 95 | return words.join 96 | end 97 | 98 | 99 | ### Returns the proper noun form of the inflected object by capitalizing most of the 100 | ### words. 101 | ### 102 | ### Some examples: 103 | ### 104 | ### "bosnia and herzegovina".en.proper_noun 105 | ### # => "Bosnia and Herzegovina" 106 | ### "macedonia, the former yugoslav republic of".en.proper_noun 107 | ### # => "Macedonia, the Former Yugoslav Republic of" 108 | ### "virgin islands, u.s.".en.proper_noun 109 | ### # => "Virgin Islands, U.S." 110 | def proper_noun 111 | return self.to_s.split(/([ .]+)/).collect do |word| 112 | next word unless 113 | /^[a-z]/.match( word ) && 114 | ! (PROPER_NOUN_EXCEPTIONS.include?( word )) 115 | word.capitalize 116 | end.join 117 | end 118 | 119 | 120 | end # module Linguistics::EN::TitleCase 121 | 122 | -------------------------------------------------------------------------------- /spec/linguistics/en/conjunctions_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en' 18 | require 'linguistics/en/conjunctions' 19 | 20 | 21 | describe Linguistics::EN::Conjunctions do 22 | 23 | before( :all ) do 24 | Linguistics.use( :en ) 25 | setup_logging( :fatal ) 26 | end 27 | 28 | 29 | TEST_ITEMS = %w[cow chicken dog goat dog dog duck duck goose goose goose dog goat] 30 | 31 | it "don't use a penultimate separator if it's turned off" do 32 | TEST_ITEMS.en.conjunction( :penultimate => false ).should == 33 | "four dogs, three geese, two goats, two ducks, a cow and a chicken" 34 | end 35 | 36 | it "honors the penultimate setting even if there are only three items (bugfix)" do 37 | %w[duck cow dog].en.conjunction( :penultimate => false ).should == 38 | "a duck, a cow and a dog" 39 | end 40 | 41 | it "uses the supplied block for transformation before building the conjunction" do 42 | TEST_ITEMS.en.conjunction {|item| "'%s' animal" % [item[0]] }.should == 43 | "six 'd' animals, five 'g' animals, and two 'c' animals" 44 | end 45 | 46 | it "uses the alternative separator if one or more phrases include the primary one" do 47 | scene_items = [ 48 | "desk with stamps, paper, and envelopes on it", 49 | "basket containing milk, eggs, and broccoli", 50 | "chair", "chair", "chair", 51 | "wooden chest", 52 | "hat rack", 53 | ] 54 | 55 | scene_items.en.conjunction.should == 56 | "three chairs; a desk with stamps, paper, and envelopes on it; " + 57 | "a basket containing milk, eggs, and broccoli; " + 58 | "a wooden chest; and a hat rack" 59 | end 60 | 61 | 62 | describe "with an Array of a single element" do 63 | 64 | before( :each ) do 65 | @array = ['cat'] 66 | end 67 | 68 | it "results in a phrase with indefinite article" do 69 | @array.en.conjunction.should == "a cat" 70 | end 71 | 72 | end 73 | 74 | 75 | describe "with an Array of two different words" do 76 | 77 | before( :each ) do 78 | @array = ['cat', 'dog'] 79 | end 80 | 81 | it "results in a phrase joined with 'and' with default options" do 82 | @array.en.conjunction.should == "a cat and a dog" 83 | end 84 | 85 | it "results in a phrase joined with 'plus' if 'plus' is set as the conjunctive" do 86 | @array.en.conjunction(:conjunctive => 'plus').should == "a cat plus a dog" 87 | end 88 | 89 | it "results in a phrase joined with a space if an empty string is set as the conjunctive" do 90 | @array.en.conjunction(:conjunctive => '').should == "a cat a dog" 91 | end 92 | 93 | end 94 | 95 | 96 | describe "with an Array of two words that differ only in case" do 97 | 98 | before( :each ) do 99 | @array = ['cat', 'Cat'] 100 | end 101 | 102 | it "combines them into their downcased equivalents with default options" do 103 | @array.en.conjunction.should == "two cats" 104 | end 105 | 106 | it "lists them separately if :combine is set to false" do 107 | @array.en.conjunction(:combine => false).should == "a cat and a Cat" 108 | end 109 | 110 | it "doesn't combine them if :casefold is turned off" do 111 | @array.en.conjunction(:casefold => false).should == "a cat and a Cat" 112 | end 113 | 114 | it "combines and lists them with a non-specific count if :generalize is set" do 115 | @array.en.conjunction(:generalize => true).should == "several cats" 116 | end 117 | 118 | end 119 | 120 | 121 | describe "with an Array of many (more than two) words of varying cases" do 122 | 123 | before( :each ) do 124 | @array = %w{cat dog fox dog chicken chicken Fox chicken goose Dog goose} 125 | end 126 | 127 | it "combines them into their downcased equivalents and lists them in order of amount " + 128 | "with default options" do 129 | @array.en.conjunction.should == 130 | 'three dogs, three chickens, two foxes, two geese, and a cat' 131 | end 132 | 133 | it "lists them separately if :combine is set to false" do 134 | @array.en.conjunction(:combine => false).should == 135 | 'a cat, a dog, a fox, a dog, a chicken, a chicken, a Fox, a '\ 136 | 'chicken, a goose, a Dog, and a goose' 137 | end 138 | 139 | it "doesn't combine the differently-cased ones if :casefold is turned off" do 140 | @array.en.conjunction(:casefold => false).should == 141 | 'three chickens, two dogs, two geese, a cat, a fox, a Fox, '\ 142 | 'and a Dog' 143 | end 144 | 145 | it "combines and lists them with a non-specific count if :generalize is set" do 146 | @array.en.conjunction(:generalize => true).should == 147 | 'several dogs, several chickens, several foxes, several '\ 148 | 'geese, and a cat' 149 | end 150 | 151 | end 152 | 153 | 154 | end 155 | -------------------------------------------------------------------------------- /experiments/api.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # 3 | # This is a little pseudo-program to work out how to best generalize the 4 | # interface to the grammar tools. 5 | # 6 | # == Authors 7 | # 8 | # * Michael Granger 9 | # 10 | # == Copyright 11 | # 12 | # Copyright (c) 2003, 2005 The FaerieMUD Consortium. All rights reserved. 13 | # 14 | # This module is free software. You may use, modify, and/or redistribute this 15 | # software under the terms of the Perl Artistic License. (See 16 | # http://language.perl.com/misc/Artistic.html) 17 | # 18 | # == Version 19 | # 20 | # $Id$ 21 | # 22 | 23 | require 'linguistics' 24 | Linguistics::use( :en, 'de' ) 25 | 26 | 27 | ##################################################################### 28 | ### C O N J U N C T I O N S 29 | ##################################################################### 30 | 31 | things = [ 32 | 'a stick', 33 | 'a stone', 34 | 'a stick', 35 | 'a silver hammer', 36 | 'an old hammer', 37 | ] 38 | 39 | print things.en.conjunction 40 | # => "two sticks, a stone, a silver hammer, and an old hammer" 41 | 42 | things = [ 43 | "ein Stein", 44 | "ein Reisig", 45 | "ein Stein", 46 | "ein silber Hammer", 47 | "ein alten Hammer", 48 | ] 49 | print things.de.conjunction 50 | # => "zwei Steinen, ein Reisig, ein silber Hammer, und ein alten Hammer" 51 | 52 | 53 | 54 | ##################################################################### 55 | ### I N F L E C T I O N S 56 | ##################################################################### 57 | 58 | ### Unconditional plurals 59 | 60 | "duck".en.plural 61 | # => "ducks" 62 | 63 | "goose".en.plural 64 | # => "geese" 65 | 66 | 67 | ### Conditional plurals 68 | 69 | "trivet".en.plural( 1 ) 70 | # => "trivet" 71 | 72 | "trivet".en.plural( 4 ) 73 | # => "trivets" 74 | 75 | 76 | ### POS plurals 77 | 78 | "paint".en.plural_noun 79 | # => "paints" 80 | 81 | "paint".en.plural_verb 82 | # => "paint" 83 | 84 | 85 | ### Ordinals 86 | 87 | 5.ord 88 | # => 5th 89 | 90 | ### Plural/singular 91 | 92 | # "0/1/N" -> "no/1/N" translation 93 | [3, 1, 0].each {|errors| 94 | puts "There " + 'were'.en.plural_verb(errors) + " error".en.no(errors) 95 | } 96 | # => There were 3 errors 97 | # => There was 1 error 98 | # => There were no errors 99 | 100 | # Compare two words "number-insensitively": 101 | puts "same" if word1.en === word2.en 102 | puts "same noun" if word1.en.noun === word2.en.noun 103 | puts "same verb" if word1.en.verb === word2.en.verb 104 | puts "same adjective" if word1.en.adj === word2.en.adj 105 | 106 | # Add correct "a" or "an" for a given word: 107 | %{Did you want #{"thing".en.a} or #{"idea".en.a}?} 108 | 109 | 110 | ### Convert numerals to words (i.e. 1->"one", 101->"one hundred and one", etc.) 111 | ### In a scalar context: get back a single string... 112 | 113 | # Add methods to Numeric, singleton methods to returned strings for #ord and 114 | # #words that will allow .words.to_a, etc.) 115 | 116 | 1234.words 117 | # => "one thousand, two hundred and thirty-four" 118 | 1234.ord.words 119 | # => "one thousand, two hundred and thirty-fourth" 120 | 1234.words.to_a 121 | # => ["one thousand","two hundred and thirty-four"] 122 | 123 | 124 | # Optional parameters change translation: 125 | 12345.words( :group => 1 ) 126 | # "one, two, three, four, five" 127 | 12345.words( :group => 2 ) 128 | # "twelve, thirty-four, five" 129 | 12345.words( :group => 3 ) 130 | # "one twenty-three, forty-five" 131 | 1234.words( :and => '' ) 132 | # "one thousand, two hundred thirty-four" 133 | 1234.words( :and => ', plus' ) 134 | # "one thousand, two hundred, plus thirty-four" 135 | 555_1202.words( :group => 1, :zero => 'oh' ) 136 | # "five, five, five, one, two, oh, two" 137 | 123.456.words( :group => 1, :decimal => 'mark' ) 138 | # "one two three mark four five six" 139 | 140 | "duck".en.quantify( 0 ) 141 | # => "no ducks" 142 | "duck".en.quantify( 1 ) 143 | # => "a duck" 144 | "duck".en.quantify( 2 ) 145 | # => "a few ducks" 146 | "duck".en.quantify( 5 ) 147 | # => "several ducks" 148 | "duck".en.quantify( 50 ) 149 | # => "many ducks" 150 | "duck".en.quantify( 504 ) 151 | # => "hundreds of ducks" 152 | "duck".en.quantify( 5046 ) 153 | # => "thousands of ducks" 154 | "duck".en.quantify( 50_461 ) 155 | # => "tens of thousands of ducks" 156 | "duck".en.quantify( 504_614 ) 157 | # => "hundreds of thousands of ducks" 158 | "duck".en.quantify( 5_046_140 ) 159 | # => "millions of ducks" 160 | 161 | 162 | ### "Classical" plurals (eg: "focus"->"foci", "cherub"->"cherubim") 163 | # Use classical plurals 164 | Linguistics::use( :en, :classical => true ) 165 | 166 | # Interpolate "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()" 167 | # "NUM()" AND "ORD()" WITHIN STRINGS: 168 | 169 | "The plural of #{word} is PL(word)\n".inflect 170 | 171 | 172 | # print inflect("The plural of $word is PL($word)\n") 173 | # print inflect("I saw $cat_count PL("cat",$cat_count)\n") 174 | # print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)") 175 | # print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)") 176 | # print inflect("I saw NUM($cat_count) PL("cat")\nNUM()") 177 | # print inflect("There PL_V(was,$errors) NO(error,$errors)\n") 178 | # print inflect("There NUM($errors,) PL_V(was) NO(error)\n" 179 | # print inflect("Did you want A($thing) or AN($idea)\n") 180 | # print inflect("It was ORD($position) from the left\n") 181 | 182 | # # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES): 183 | 184 | # def_noun "VAX" => "VAXen"; # SINGULAR => PLURAL 185 | 186 | # def_verb "will" => "shall", # 1ST PERSON SINGULAR => PLURAL 187 | # "will" => "will", # 2ND PERSON SINGULAR => PLURAL 188 | # "will" => "will", # 3RD PERSON SINGULAR => PLURAL 189 | 190 | # def_adj "hir" => "their", # SINGULAR => PLURAL 191 | 192 | # def_a "h" # "AY HALWAYS SEZ 'HAITCH'!" 193 | 194 | # def_an "horrendous.*" # "AN HORRENDOUS AFFECTATION" 195 | 196 | -------------------------------------------------------------------------------- /lib/linguistics.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # coding: utf-8 3 | 4 | require 'loggability' 5 | 6 | # An interface for extending core Ruby classes with natural-language methods. 7 | module Linguistics 8 | extend Loggability 9 | 10 | # Loggability API -- set up a logger for Linguistics objects 11 | log_as :linguistics 12 | 13 | 14 | # Release version 15 | VERSION = '2.0.2' 16 | 17 | # VCS version 18 | REVISION = %q$Revision$ 19 | 20 | # The list of Classes to add linguistic behaviours to. 21 | DEFAULT_EXT_CLASSES = [ String, Numeric, Array ] 22 | 23 | 24 | vvec = lambda {|version| version.split('.').collect {|v| v.to_i }.pack('N*') } 25 | abort "This version of Linguistics requires Ruby 1.9.2 or greater." unless 26 | vvec[RUBY_VERSION] >= vvec['1.9.2'] 27 | 28 | 29 | require 'linguistics/monkeypatches' 30 | require 'linguistics/iso639' 31 | require 'linguistics/inflector' 32 | 33 | include Linguistics::ISO639 34 | 35 | 36 | ### Language modules and the inflector classes that act as their interfaces 37 | @languages = {} 38 | @inflector_mixins = {} 39 | 40 | class << self 41 | # The Hash of loaded languages keyed by 3-letter bibliographic ISO639-2 code 42 | attr_reader :languages 43 | 44 | # The Hash of anonymous inflector modules that act as the mixin interface to 45 | # a language module's inflector, keyed by the language module they belong to 46 | attr_reader :inflector_mixins 47 | end 48 | 49 | 50 | ### Return the library's version string 51 | def self::version_string( include_buildnum=false ) 52 | vstring = "%s %s" % [ self.name, VERSION ] 53 | vstring << " (build %s)" % [ REVISION[/: ([[:xdigit:]]+)/, 1] || '0' ] if include_buildnum 54 | return vstring 55 | end 56 | 57 | 58 | ### Register a module as providing linguistic functions for the specified +language+ (a two- 59 | ### or three-letter ISO639-2 language codes as a Symbol) 60 | def self::register_language( language, mod ) 61 | language_entry = LANGUAGE_CODES[ language.to_sym ] or 62 | raise "Unknown ISO639-2 language code '#{language}'" 63 | self.log.info "Registering %s for language %p" % [ mod, language_entry ] 64 | 65 | language_entry[:codes].each do |lang| 66 | self.languages[ lang.to_sym ] = mod 67 | end 68 | 69 | # Load in plugins for the language 70 | Gem.find_files( "linguistics/#{language}/*.rb" ).each do |extension| 71 | next if extension.include?( '/spec/' ) # Skip specs 72 | extension.sub!( %r{.*/linguistics/}, 'linguistics/' ) 73 | self.log.debug " trying to load #{language_entry[:eng_name]} extension %p" % [ extension ] 74 | begin 75 | require extension 76 | rescue LoadError => err 77 | self.log.debug " failed (%s): %s %s" % 78 | [ err.class.name, err.message, err.backtrace.first ] 79 | else 80 | self.log.debug " success." 81 | end 82 | end 83 | 84 | end 85 | 86 | 87 | ### Try to load the module that implements the given language, returning 88 | ### the Module object if successful. 89 | def self::load_language( lang ) 90 | unless mod = self.languages[ lang.to_sym ] 91 | 92 | self.log.debug "Trying to load language %p" % [ lang ] 93 | language = LANGUAGE_CODES[ lang.to_sym ] or 94 | raise "Unknown ISO639-2 language code '#{lang}'" 95 | self.log.debug " got language code %p" % [ language ] 96 | 97 | # Sort all the codes for the specified language, trying the 2-letter 98 | # versions first in alphabetical order, then the 3-letter ones 99 | msgs = [] 100 | mod = nil 101 | 102 | language[:codes].sort.each do |code| 103 | next if code == '' 104 | 105 | begin 106 | require "linguistics/#{code}" 107 | self.log.debug " loaded linguistics/#{code}!" 108 | mod = self.languages[ lang.to_sym ] 109 | self.log.debug " set mod to %p" % [ mod ] 110 | break 111 | rescue LoadError => err 112 | self.log.error " require of linguistics/#{code} failed: #{err.message}" 113 | msgs << "Tried 'linguistics/#{code}': #{err.message}\n" 114 | end 115 | end 116 | 117 | if mod.is_a?( Array ) 118 | raise LoadError, 119 | "Failed to load language extension %s:\n%s" % 120 | [ lang, msgs.join ] 121 | end 122 | 123 | end 124 | 125 | return mod 126 | end 127 | 128 | 129 | ### Add linguistics functions for the specified languages to Ruby's core 130 | ### classes. The interface to all linguistic functions for a given language 131 | ### is through a method which is the same the language's international 2- or 132 | ### 3-letter code (ISO 639). You can also specify a Hash of configuration 133 | ### options which control which classes are extended: 134 | ### 135 | ### [:classes] 136 | ### Specify the classes which are to be extended. If this is not specified, 137 | ### the Class objects in Linguistics::DEFAULT_EXT_CLASSES (an Array) are 138 | ### extended. 139 | ### [:monkeypatch] 140 | ### Monkeypatch directly (albeit responsibly, via a mixin) the specified 141 | ### +classes+ instead of adding a single language-code method. 142 | def self::use( *languages ) 143 | config = languages.pop if languages.last.is_a?( Hash ) 144 | config ||= {} 145 | 146 | classes = Array(config[:classes]) if config[:classes] 147 | classes ||= DEFAULT_EXT_CLASSES 148 | 149 | self.log.debug "Extending %d classes with %d language modules." % 150 | [ classes.length, languages.length ] 151 | 152 | # Mix the language module for each requested language into each 153 | # specified class 154 | classes.each do |klass| 155 | self.log.debug " extending %p" % [ klass ] 156 | languages.each do |lang| 157 | mod = load_language( lang ) or 158 | raise LoadError, "failed to load a language extension for %p" % [ lang ] 159 | self.log.debug " using %s language module: %p" % [ lang, mod ] 160 | 161 | if config[:monkeypatch] 162 | klass.send( :include, mod ) 163 | else 164 | inflector = make_inflector_mixin( lang, mod ) 165 | self.log.debug " made an inflector mixin: %p" % [ inflector ] 166 | klass.send( :include, inflector ) 167 | end 168 | end 169 | end 170 | 171 | return classes 172 | end 173 | 174 | 175 | ### Create a mixin module/class pair that act as the per-object interface to 176 | ### the given language +mod+'s inflector. 177 | def self::make_inflector_mixin( lang, mod ) 178 | language = LANGUAGE_CODES[ lang.to_sym ] or 179 | raise "Unknown ISO639-2 language code '#{lang}'" 180 | 181 | unless mixin = self.inflector_mixins[ mod ] 182 | self.log.debug "Making an inflector mixin for %p" % [ mod ] 183 | 184 | bibcode, alpha2code, termcode = *language[:codes] 185 | inflector = Class.new( Linguistics::Inflector ) { include(mod) } 186 | self.log.debug " created inflector class %p for [%p, %p, %p]" % 187 | [ inflector, bibcode, termcode, alpha2code ] 188 | 189 | mixin = Module.new do 190 | define_method( bibcode ) do 191 | inflector.new( bibcode, self ) 192 | end 193 | alias_method termcode, bibcode unless termcode.nil? || termcode.empty? 194 | alias_method alpha2code, bibcode unless alpha2code.nil? || alpha2code.empty? 195 | end 196 | self.inflector_mixins[ mod ] = mixin 197 | end 198 | 199 | return mixin 200 | end 201 | 202 | 203 | end # module Linguistics 204 | 205 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = Linguistics 2 | 3 | docs :: http://deveiate.org/code/linguistics 4 | project :: https://bitbucket.org/ged/linguistics 5 | github :: https://github.com/ged/linguistics 6 | 7 | 8 | == Description 9 | 10 | Linguistics is a framework for building linguistic utilities for Ruby 11 | objects in any language. It includes a generic language-independant 12 | front end, a module for mapping language codes into language names, and 13 | a module which contains various English-language utilities. 14 | 15 | 16 | == Usage 17 | 18 | The Linguistics module comes with a language-independant mechanism for 19 | extending core Ruby classes with linguistic methods. 20 | 21 | It consists of three parts: a core linguistics module which contains the 22 | class-extension framework for languages, a generic inflector class that 23 | serves as an extension point for linguistic methods on Ruby objects, and 24 | one or more language-specific modules which contain the actual 25 | linguistic functions. 26 | 27 | The module works by adding a single instance method for each language 28 | named after the language's two-letter code (or three-letter code, if no 29 | two-letter code is defined by ISO639) to various Ruby classes. This 30 | allows many language-specific methods to be added to objects without 31 | cluttering up the interface or risking collision between them, albeit at 32 | the cost of three or four more characters per method invocation. For 33 | example: 34 | 35 | Linguistics.use( :en ) 36 | "goose".en.plural 37 | # => "geese" 38 | 39 | If you prefer monkeypatching (around 70) linguistics methods directly onto core 40 | classes, you can do that by adding a 'monkeypatch' option to ::use: 41 | 42 | Linguistics.use( :en, monkeypatch: true ) 43 | "goose".plural 44 | # => "geese" 45 | 46 | === Controlling Which Classes Get Extended 47 | 48 | If you should wish to extend classes other than the ones in 49 | Linguistics::DEFAULT_EXT_CLASSES, you have a few options. 50 | 51 | You can modify the DEFAULT_EXT_CLASSES array directly (before you call 52 | ::use, of course): 53 | 54 | Linguistics::DEFAULT_EXT_CLASSES << MyClass 55 | 56 | You can also pass an Array of classes to .use: 57 | 58 | Linguistics.use( :en, classes: [MyClass] ) 59 | 60 | Or you can add language methods to classes via mixin: 61 | 62 | class MyClass 63 | include Linguistics::EN 64 | end 65 | 66 | All Linguistics methods use Ruby's casting mechanism, so at a minimum, 67 | your classes should provide an implementation of #to_s that returns 68 | words or phrases. 69 | 70 | 71 | === Adding Language Modules 72 | 73 | To add a new language to the framework, define a module that will act as 74 | the top-level namespace for all your linguistic functions, and then 75 | register it as being available, like so: 76 | 77 | module Linguistics::TLH 78 | 79 | # Add Klingon to the list of default languages 80 | Linguistics.register_language( :tlh, self ) 81 | 82 | end 83 | 84 | The first argument is either the two- or three-letter [ISO 639.2] 85 | (http://www.loc.gov/standards/iso639-2/php/code_list.php) language code 86 | for the language you're registering. 87 | 88 | The second is the container module itself. 89 | 90 | After you register your language, each class that Linguistics is told to 91 | extend will have a method for your language code/s: 92 | 93 | irb> Linguistics.use( :tlh, :classes => Object ) 94 | # => [Object] 95 | irb> Object.new.tlh 96 | # => #<(Klingon; tlhIngan-Hol-language inflector) for > 97 | 98 | If you use RSpec 2, you can test out any API requirements of the module 99 | by requiring 'linguistics/languagebehavior' and adding a shared 100 | behavior to your spec: 101 | 102 | require 'rspec' 103 | require 'linguistics/languagebehavior' 104 | 105 | describe Linguistics::TLH do 106 | 107 | it_should_behave_like "a Linguistics language module" 108 | 109 | # ... any other specs for your module 110 | 111 | end 112 | 113 | If you wish to use the logging subsystem set up by Linguistics, you can 114 | do so one of two ways: by logging to the logger directly: 115 | 116 | Linguistics.log.debug "Registering Klingon language extension" 117 | 118 | or by mixing the `Linguistics::Loggable' module into your class/module, 119 | which will give you a 'log' method that prepends the object class on 120 | each log message so it's easy to filter out the ones you want: 121 | 122 | require 'linguistics/mixins' 123 | class Linguistics::TLH::Generator 124 | include Linguistics::Loggable 125 | 126 | def generate_it 127 | self.log.debug "starting generation..." 128 | end 129 | end 130 | 131 | 132 | 133 | == English Language Module 134 | 135 | Linguistics comes with an English-language module; see the API 136 | documentation for Linguistics::EN for more information about it. 137 | 138 | 139 | == Authors 140 | 141 | * Michael Granger 142 | * Martin Chase 143 | 144 | 145 | == Contributors 146 | 147 | * Robert Berry (bdigital on github) - English conjugation ported from 148 | MorphAdorner 149 | 150 | 151 | == Requirements 152 | 153 | * Ruby >= 1.9.3 154 | 155 | It may work under earlier versions, but I'll only be testing it on 1.9.3 156 | or later. 157 | 158 | 159 | == Optional 160 | 161 | The English-language module for Linguistics has support for a few other 162 | optional natural-language libraries: 163 | 164 | linkparser[http://deveiate.org/projects/Ruby-LinkParser] :: 165 | Ruby high-level interface to the CMU Link Grammar library 166 | 167 | wordnet[http://deveiate.org/projects/Ruby-WordNet] :: 168 | Adds integration for the Ruby binding for the WordNet® 169 | lexical refrence system. 170 | 171 | 172 | == Contributing 173 | 174 | You can check out the current development source with Mercurial via its 175 | {project page}[http://deveiate.org/projects/Linguistics]. Or if you prefer 176 | Git, via {its Github mirror}[https://github.com/ged/linguistics]. 177 | 178 | After checking out the source, run: 179 | 180 | $ rake newb 181 | 182 | This task will install any missing dependencies, run the tests/specs, and 183 | generate the API documentation. 184 | 185 | 186 | == License 187 | 188 | Copyright (c) 2003-2012, Michael Granger 189 | All rights reserved. 190 | 191 | Redistribution and use in source and binary forms, with or without 192 | modification, are permitted provided that the following conditions are met: 193 | 194 | * Redistributions of source code must retain the above copyright notice, 195 | this list of conditions and the following disclaimer. 196 | 197 | * Redistributions in binary form must reproduce the above copyright notice, 198 | this list of conditions and the following disclaimer in the documentation 199 | and/or other materials provided with the distribution. 200 | 201 | * Neither the name of the author/s, nor the names of the project's 202 | contributors may be used to endorse or promote products derived from this 203 | software without specific prior written permission. 204 | 205 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 206 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 207 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 208 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 209 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 210 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 211 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 212 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 213 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 214 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 215 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /lib/linguistics/en/conjunctions.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'linguistics/en' unless defined?( Linguistics::EN ) 4 | 5 | # Conjunction methods for the English-language Linguistics module. 6 | module Linguistics::EN::Conjunctions 7 | 8 | # Register this module to the list of modules to include 9 | Linguistics::EN.register_extension( self ) 10 | 11 | # :stopdoc: 12 | 13 | # Default configuration arguments for the #conjunction (junction, what's 14 | # your) function. 15 | CONJUNCTION_DEFAULTS = { 16 | :separator => ', ', 17 | :altsep => '; ', 18 | :penultimate => true, 19 | :conjunctive => 'and', 20 | :combine => true, 21 | :casefold => true, 22 | :generalize => false, 23 | :quantsort => true, 24 | } 25 | 26 | 27 | # :TODO: Needs refactoring 28 | 29 | ### Return the specified +obj+ (which must support the #collect 30 | ### method) as a conjunction. Each item is converted to a String if it is 31 | ### not already (using #to_s) unless a block is given, in which case it is 32 | ### called once for each object in the array, and the stringified return 33 | ### value from the block is used instead. Returning +nil+ causes that 34 | ### particular element to be omitted from the resulting conjunction. The 35 | ### following options can be used to control the makeup of the returned 36 | ### conjunction String: 37 | ### 38 | ### [:separator] 39 | ### Specify one or more characters to separate items in the resulting 40 | ### list. Defaults to ', '. 41 | ### [:altsep] 42 | ### An alternate separator to use if any of the resulting conjunction's 43 | ### clauses contain the :separator character/s. Defaults to '; '. 44 | ### [:penultimate] 45 | ### Flag that indicates whether or not to join the last clause onto the 46 | ### rest of the conjunction using a penultimate :separator. E.g., 47 | ### %w{duck, cow, dog}.en.conjunction 48 | ### # => "a duck, a cow, and a dog" 49 | ### %w{duck cow dog}.en.conjunction( :penultimate => false ) 50 | ### "a duck, a cow and a dog" 51 | ### Default to true. 52 | ### [:conjunctive] 53 | ### Sets the word used as the conjunctive (separating word) of the 54 | ### resulting string. Default to 'and'. 55 | ### [:combine] 56 | ### If set to true (the default), items which are indentical (after 57 | ### surrounding spaces are stripped) will be combined in the resulting 58 | ### conjunction. E.g., 59 | ### %w{goose cow goose dog}.en.conjunction 60 | ### # => "two geese, a cow, and a dog" 61 | ### %w{goose cow goose dog}.en.conjunction( :combine => false ) 62 | ### # => "a goose, a cow, a goose, and a dog" 63 | ### [:casefold] 64 | ### If set to true (the default), then items are compared 65 | ### case-insensitively when combining them. This has no effect if 66 | ### :combine is false. 67 | ### [:generalize] 68 | ### If set to true, then quantities of combined items are turned into 69 | ### general descriptions instead of exact amounts. 70 | ### ary = %w{goose pig dog horse goose reindeer goose dog horse} 71 | ### ary.en.conjunction 72 | ### # => "three geese, two dogs, two horses, a pig, and a reindeer" 73 | ### ary.en.conjunction( :generalize => true ) 74 | ### # => "several geese, several dogs, several horses, a pig, and a reindeer" 75 | ### See the #quantify method for specifics on how quantities are 76 | ### generalized. Generalization defaults to false, and has no effect if 77 | ### :combine is false. 78 | ### [:quantsort] 79 | ### If set to true (the default), items which are combined in the 80 | ### resulting conjunction will be listed in order of amount, with greater 81 | ### quantities sorted first. If :quantsort is false, combined items 82 | ### will appear where the first instance of them occurred in the 83 | ### list. This sort is also the fallback for indentical quantities (ie., 84 | ### items of the same quantity will be listed in the order they appeared 85 | ### in the source list). 86 | ### 87 | def conjunction( args={} ) 88 | config = CONJUNCTION_DEFAULTS.merge( args ) 89 | 90 | # Transform items in the obj to phrases 91 | phrases = if block_given? 92 | self.log.debug " collecting with a block" 93 | self.collect {|item| yield(item) }.compact 94 | else 95 | self.log.debug " collecting without a block" 96 | rval = self.collect( &:to_s ) 97 | self.log.debug " collected: %p" % [ rval ] 98 | rval 99 | end 100 | 101 | self.log.debug " phrases is: %p" % [ phrases ] 102 | 103 | # No need for a conjunction if there's only one thing 104 | return phrases[0].en.a if phrases.length < 2 105 | 106 | # Set up a Proc to derive a collector key from a phrase depending on the 107 | # configuration 108 | keyfunc = 109 | if config[:casefold] 110 | proc {|key| key.downcase.strip} 111 | else 112 | proc {|key| key.strip} 113 | end 114 | 115 | # Count and delete phrases that hash the same when the keyfunc munges 116 | # them into the same thing if we're combining (:combine => true). 117 | collector = {} 118 | if config[:combine] 119 | 120 | phrases.each_index do |i| 121 | # Stop when reaching the end of a truncated list 122 | break if phrases[i].nil? 123 | 124 | # Make the key using the configured key function 125 | phrase = keyfunc[ phrases[i] ] 126 | 127 | # If the collector already has this key, increment its count, 128 | # eliminate the duplicate from the phrase list, and redo the loop. 129 | if collector.key?( phrase ) 130 | collector[ phrase ] += 1 131 | phrases.delete_at( i ) 132 | redo 133 | end 134 | 135 | collector[ phrase ] = 1 136 | end 137 | else 138 | # If we're not combining, just make everything have a count of 1. 139 | phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1} 140 | end 141 | 142 | # If sort-by-quantity is turned on, sort the phrases first by how many 143 | # there are (most-first), and then by the order they were specified in. 144 | if config[:quantsort] && config[:combine] 145 | origorder = {} 146 | phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i } 147 | phrases.sort! {|a,b| 148 | (collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? || 149 | (origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ]) 150 | } 151 | end 152 | 153 | # Set up a filtering function that adds either an indefinite article, an 154 | # indefinite quantifier, or a definite quantifier to each phrase 155 | # depending on the configuration and the count of phrases in the 156 | # collector. 157 | filter = 158 | if config[:generalize] 159 | proc {|phrase, count| phrase.en.quantify(count) } 160 | else 161 | proc do |phrase, count| 162 | if count > 1 163 | "%s %s" % [ 164 | # :TODO: Make this threshold settable 165 | count < 10 ? count.en.numwords : count.to_s, 166 | phrase.en.plural( count ) 167 | ] 168 | else 169 | phrase.en.a 170 | end 171 | end 172 | end 173 | 174 | # Now use the configured filter to turn each phrase into its final 175 | # form. Hmmm... square-bracket Lisp? 176 | phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] } 177 | 178 | # Prepend the conjunctive to the last element unless it's empty or 179 | # there's only one element 180 | phrases[-1].insert( 0, config[:conjunctive] + " " ) unless 181 | config[:conjunctive].strip.empty? or 182 | phrases.length < 2 183 | 184 | # Concatenate the last two elements if there's no penultimate separator, 185 | # and pick a separator based on how many phrases there are and whether 186 | # or not there's already an instance of it in the phrases. 187 | phrase_count = phrases.length 188 | phrases[-2] << " " << phrases.pop unless config[:penultimate] 189 | sep = config[:separator] 190 | if phrase_count <= 2 191 | sep = ' ' 192 | elsif phrases.find {|str| str.include?(config[:separator]) } 193 | sep = config[:altsep] 194 | end 195 | 196 | return phrases.join( sep ) 197 | end 198 | Linguistics::EN.register_lprintf_formatter :CONJUNCT, :conjunction 199 | 200 | 201 | end # module Linguistics::EN::Conjunctions 202 | 203 | -------------------------------------------------------------------------------- /lib/linguistics/en/wordnet.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'linguistics/en' unless defined?( Linguistics::EN ) 4 | 5 | # WordNet support for the English-language Linguistics module. It 6 | # requires the Ruby-WordNet module to be installed; if it is not 7 | # installed, calling the functions defined by this file will raise 8 | # NotImplementedErrors. 9 | # 10 | # # Test to be sure the WordNet module loaded okay. 11 | # Linguistics::EN.has_wordnet? 12 | # # => true 13 | # 14 | # # Fetch the default synset for the word "balance" 15 | # "balance".en.synset 16 | # # => # 18 | # 19 | # # Fetch the synset for the first verb sense of "balance" 20 | # "balance".en.synset( :verb ) 21 | # # => # 25 | # 26 | # # Fetch the second noun sense 27 | # "balance".en.synset( 2, :noun ) 28 | # # => # 30 | # 31 | # # Fetch the second noun sense's hypernyms (more-general words, like a superclass) 32 | # "balance".en.synset( 2, :noun ).hypernyms 33 | # # => [#] 36 | # 37 | # # A simpler way of doing the same thing: 38 | # "balance".en.hypernyms( 2, :noun ) 39 | # # => [#] 42 | # 43 | # # Fetch the first hypernym's hypernyms 44 | # "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms 45 | # # => [#] 48 | # 49 | # # Find the synset to which both the second noun sense of "balance" and the 50 | # # default sense of "shovel" belong. 51 | # ("balance".en.synset( 2, :noun ) | "shovel".en.synset) 52 | # # => # 55 | # 56 | # # Fetch just the words for the other kinds of "instruments" 57 | # "instrument".en.hyponyms.collect {|synset| synset.words}.flatten 58 | # # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", 59 | # "extractor", "instrument of execution", "instrument of punishment", "measuring 60 | # instrument", "measuring system", "measuring device", "medical instrument", 61 | # "navigational instrument", "optical instrument", "plotter", "scientific 62 | # instrument", "sonograph", "surveying instrument", "surveyor's instrument", 63 | # "tracer", "weapon", "arm", "weapon system", "whip"] 64 | # 65 | module Linguistics::EN::WordNet 66 | 67 | @has_wordnet = false 68 | @wn_error = nil 69 | @lexicon = nil 70 | 71 | # Load WordNet if possible, saving the error that occurs if anything goes wrong. 72 | begin 73 | require 'wordnet' 74 | @has_wordnet = true 75 | rescue LoadError => err 76 | @wn_error = err 77 | end 78 | 79 | 80 | # Container for methods intended to extend the EN module as singleton methods. 81 | module SingletonMethods 82 | 83 | ### Returns +true+ if WordNet was loaded okay 84 | def has_wordnet? ; @has_wordnet; end 85 | 86 | ### If #has_wordnet? returns +false+, this can be called to fetch the 87 | ### exception which was raised when WordNet was loaded. 88 | def wordnet_error ; @wn_error; end 89 | 90 | end # module SingletonMethods 91 | extend SingletonMethods 92 | 93 | 94 | # Register this module to the list of modules to include 95 | Linguistics::EN.register_extension( self ) 96 | 97 | 98 | ################################################################# 99 | ### M O D U L E M E T H O D S 100 | ################################################################# 101 | 102 | ### The instance of the WordNet::Lexicon used for all Linguistics WordNet 103 | ### functions. 104 | def self::lexicon 105 | raise self.wordnet_error unless self.has_wordnet? 106 | @lexicon ||= WordNet::Lexicon::new 107 | end 108 | 109 | 110 | ### Set the WordNet::Lexicon used by the linguistic functions. 111 | def self::lexicon=( newlex ) 112 | @lexicon = newlex 113 | end 114 | 115 | 116 | ### Make a function that calls the method +meth+ on the synset of an input 117 | ### word. 118 | def self::def_synset_function( name ) 119 | define_method( name ) do |*criteria| 120 | syn = self.synset( *criteria ) or return nil 121 | return syn.send( name ) 122 | end 123 | end 124 | 125 | 126 | 127 | ################################################################# 128 | ### W O R D N E T I N T E R F A C E 129 | ################################################################# 130 | 131 | ###### 132 | public 133 | ###### 134 | 135 | ### Look up the synset associated with the given word or collocation in the 136 | ### WordNet lexicon and return a WordNet::Synset object. 137 | def synset( *args ) 138 | return Linguistics::EN::WordNet.lexicon[ self.to_s, *args ] 139 | end 140 | 141 | 142 | ### Look up all the synsets associated with the given word or collocation in 143 | ### the WordNet lexicon and return an Array of WordNet::Synset objects. If 144 | ### +pos+ is +nil+, return synsets for all parts of speech. 145 | def synsets( *args ) 146 | return Linguistics::EN::WordNet.lexicon.lookup_synsets( self.to_s, *args ) 147 | end 148 | 149 | 150 | # Returns definitions and/or example sentences as a String. 151 | def_synset_function :definition 152 | 153 | # Return nouns or verbs that have the same hypernym as the receiver. 154 | def_synset_function :coordinates 155 | 156 | # Returns the Array of synonyms contained in the synset for the receiver. 157 | def_synset_function :words 158 | def_synset_function :synonyms 159 | 160 | # Returns the name of the lexicographer file that contains the raw data for 161 | # the receiver. 162 | def_synset_function :lex_info 163 | 164 | # :TODO: Finish these comments, and figure out how the hell to get the 165 | # methods to show up in RDoc. 166 | def_synset_function :frames 167 | 168 | 169 | # Returns the synsets for the receiver's antonyms, if any. Ex: 170 | # 'opaque'.en.synset.antonyms 171 | # ==> [#] 176 | def_synset_function :antonyms 177 | 178 | def_synset_function :hypernyms 179 | def_synset_function :instance_hypernyms 180 | def_synset_function :entailment 181 | def_synset_function :hyponyms 182 | def_synset_function :instance_hyponyms 183 | def_synset_function :causes 184 | def_synset_function :verbgroups 185 | def_synset_function :similar_to 186 | def_synset_function :participles 187 | def_synset_function :pertainyms 188 | def_synset_function :attributes 189 | def_synset_function :derived_from 190 | def_synset_function :see_also 191 | def_synset_function :functions 192 | 193 | def_synset_function :meronyms 194 | def_synset_function :member_meronyms 195 | def_synset_function :stuff_meronyms 196 | def_synset_function :portion_meronyms 197 | def_synset_function :component_meronyms 198 | def_synset_function :feature_meronyms 199 | def_synset_function :phase_meronyms 200 | def_synset_function :place_meronyms 201 | 202 | def_synset_function :holonyms 203 | def_synset_function :member_holonyms 204 | def_synset_function :stuff_holonyms 205 | def_synset_function :portion_holonyms 206 | def_synset_function :component_holonyms 207 | def_synset_function :feature_holonyms 208 | def_synset_function :phase_holonyms 209 | def_synset_function :place_holonyms 210 | 211 | def_synset_function :domains 212 | def_synset_function :category_domains 213 | def_synset_function :region_domains 214 | def_synset_function :usage_domains 215 | 216 | def_synset_function :members 217 | def_synset_function :category_members 218 | def_synset_function :region_members 219 | def_synset_function :usage_members 220 | 221 | 222 | end # module Linguistics::EN 223 | 224 | -------------------------------------------------------------------------------- /examples/endocs.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | $LOAD_PATH.unshift( 'lib' ) 4 | $stdout.sync = $stderr.sync = true 5 | 6 | require 'loggability' 7 | require 'linguistics' 8 | require 'pry' 9 | 10 | lines = File.readlines( __FILE__ ).slice_before( /^__END__/ ).to_a 11 | header = lines.shift 12 | source = lines.shift 13 | source.shift 14 | 15 | header_lines = header.length + 1 16 | code = '' 17 | result = nil 18 | 19 | Loggability.level = $VERBOSE ? :debug : :warn 20 | Loggability.format_with( :color ) 21 | 22 | source.each_with_index do |line, i| 23 | case line 24 | 25 | # Eval any accumulated code on a blank line 26 | when /^\s*$/ 27 | puts 28 | next if code.empty? 29 | puts( code ) 30 | eval( code, binding(), __FILE__, header_lines + i ) 31 | code = '' 32 | 33 | # Eval the code on a result marker, but also render the result after the marker 34 | when /^#\s+=>/ 35 | puts( code ) 36 | $stdout.flush 37 | result = eval( code, binding(), __FILE__, header_lines + i ) 38 | print '# => ' 39 | pp( result ) 40 | code = '' 41 | 42 | # Output comment lines as-is 43 | when /^#/ 44 | puts( line ) 45 | 46 | # Anything else gets appended to the code accumulator 47 | else 48 | code << line 49 | end 50 | end 51 | 52 | __END__ 53 | 54 | # This module is a container for various English-language linguistic 55 | # functions for the Linguistics library. It can be either loaded 56 | # directly, or by passing some variant of +:en+ or +:eng+ to the 57 | # Linguistics.use method. 58 | 59 | require 'linguistics' 60 | Linguistics.use( :en ) # extends Array, String, and Numeric 61 | 62 | # == Pluralization 63 | 64 | "box".en.plural 65 | # => "boxes" 66 | 67 | "mouse".en.plural 68 | # => "mice" 69 | 70 | "ruby".en.plural 71 | # => "rubies" 72 | 73 | 74 | # == Indefinite Articles 75 | 76 | "book".en.a 77 | # => "a book" 78 | 79 | "article".en.a 80 | # => "an article" 81 | 82 | 83 | # == Present Participles 84 | 85 | "runs".en.present_participle 86 | # => "running" 87 | 88 | "eats".en.present_participle 89 | # => "eating" 90 | 91 | "spies".en.present_participle 92 | # => "spying" 93 | 94 | 95 | # == Ordinal Numbers 96 | 97 | 5.en.ordinal 98 | # => "5th" 99 | 100 | 2004.en.ordinal 101 | # => "2004th" 102 | 103 | 104 | # == Numbers to Words 105 | 106 | 5.en.numwords 107 | # => "five" 108 | 109 | 2004.en.numwords 110 | # => "two thousand and four" 111 | 112 | 2385762345876.en.numwords 113 | # => "two trillion, three hundred and eighty-five billion, seven hundred and sixty-two million, three hundred and forty-five thousand, eight hundred and seventy-six" 114 | 115 | 116 | # == Quantification 117 | 118 | "cow".en.quantify( 5 ) 119 | # => "several cows" 120 | 121 | "cow".en.quantify( 1005 ) 122 | # => "thousands of cows" 123 | 124 | "cow".en.quantify( 20_432_123_000_000 ) 125 | # => "tens of trillions of cows" 126 | 127 | 128 | # == Conjunctions 129 | 130 | animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat dog cat cat dog cow goat goose goose ox alpaca} 131 | "The farm has: " + animals.en.conjunction 132 | # => The farm has: four dogs, three cows, three geese, three goats, two oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca 133 | 134 | # Note that 'goose' and 'ox' are both correctly pluralized, and the correct 135 | # indefinite article 'an' has been used for 'alpaca'. 136 | # 137 | # You can also use the generalization function of the #quantify method to give 138 | # general descriptions of object lists instead of literal counts: 139 | 140 | allobjs = [] 141 | ObjectSpace::each_object {|obj| allobjs << obj.class.name } 142 | puts "The current Ruby objectspace contains: " + allobjs.en.conjunction( :generalize => true ) 143 | # => 144 | 145 | 146 | # == Infinitives 147 | 148 | "leaving".en.infinitive 149 | # => "leave" 150 | 151 | "left".en.infinitive 152 | # => "leave" 153 | 154 | "leaving".en.infinitive.suffix 155 | # => "ing" 156 | 157 | 158 | # == Conjugation 159 | 160 | #Conjugate a verb given an infinitive: 161 | 162 | "run".en.past_tense 163 | # => "ran" 164 | 165 | "run".en.past_participle 166 | # => "run" 167 | 168 | "run".en.present_tense 169 | # => "run" 170 | 171 | "run".en.present_participle 172 | # => "running" 173 | 174 | # Conjugate an infinitive with an explicit tense and grammatical person: 175 | 176 | "be".en.conjugate( :present, :third_person_singular ) 177 | # => "is" 178 | 179 | "be".en.conjugate( :present, :first_person_singular ) 180 | # => "am" 181 | 182 | "be".en.conjugate( :past, :first_person_singular ) 183 | # => "was" 184 | 185 | # The functionality is a port of the verb conjugation portion of Morph 186 | # Adorner (http://morphadorner.northwestern.edu/). 187 | # 188 | # It includes a good number of irregular verbs, but it's not going to be 189 | # 100% correct everytime. 190 | 191 | 192 | # == WordNet® Integration 193 | 194 | # If you have the 'wordnet' gem installed, you can look up WordNet synsets using 195 | # the Linguistics interface: 196 | 197 | # Test to be sure the WordNet module loaded okay. 198 | Linguistics::EN.has_wordnet? 199 | # => true 200 | 201 | # Fetch the default synset for the word "balance" 202 | "balance".en.synset 203 | # => # 204 | 205 | # Fetch the synset for the first verb sense of "balance" 206 | "balance".en.synset( :verb ) 207 | # => # 208 | 209 | # Fetch the second noun sense 210 | "balance".en.synset( 2, :noun ) 211 | # => # 212 | 213 | # Fetch the second noun sense's hypernyms (more-general words, like a superclass) 214 | "balance".en.synset( 2, :noun ).hypernyms 215 | # => [#] 216 | 217 | # A simpler way of doing the same thing: 218 | "balance".en.hypernyms( 2, :noun ) 219 | # => [#] 220 | 221 | # Fetch the first hypernym's hypernyms 222 | "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms 223 | # => [#] 224 | 225 | # Find the synset to which both the second noun sense of "balance" and the 226 | # default sense of "shovel" belong. 227 | ("balance".en.synset( 2, :noun ) | "shovel".en.synset) 228 | # => # 229 | 230 | # Fetch words for the specific kinds of (device-ish) "instruments" 231 | "instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ') 232 | # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", "extractor", "instrument of execution", "instrument of punishment", "measuring instrument", "measuring system", "measuring device", "medical instrument", "navigational instrument", "optical instrument", "plotter", "scientific instrument", "sonograph", "surveying instrument", "surveyor's instrument", "tracer", "weapon", "arm", "weapon system", "whip"] 233 | 234 | # ...or musical instruments 235 | "instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ') 236 | # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", "extractor", "instrument of execution", "instrument of punishment", "measuring instrument", "measuring system", "measuring device", "medical instrument", "navigational instrument", "optical instrument", "plotter", "scientific instrument", "sonograph", "surveying instrument", "surveyor's instrument", "tracer", "weapon", "arm", "weapon system", "whip"] 237 | 238 | # There are many more WordNet methods supported--too many to list here. See the 239 | # documentation for the complete list. 240 | 241 | 242 | # == LinkParser Integration 243 | 244 | # If you have the 'linkparser' gem installed, you can create linkages 245 | # from English sentences that let you query for parts of speech: 246 | 247 | # Test to see whether or not the link parser is loaded. 248 | Linguistics::EN.has_linkparser? 249 | # => true 250 | 251 | # Diagram the first linkage for a test sentence 252 | puts "he is a big dog".en.sentence.linkages.first.diagram 253 | 254 | # Find the verb in the sentence 255 | "he is a big dog".en.sentence.verb 256 | # => "is" 257 | 258 | # Combined infinitive + LinkParser: Find the infinitive form of the verb of the 259 | # given sentence. 260 | "he is a big dog".en.sentence.verb.en.infinitive 261 | # => "be" 262 | 263 | # Find the direct object of the sentence 264 | "he is a big dog".en.sentence.object 265 | # => "dog" 266 | 267 | # Combine WordNet + LinkParser to find the definition of the direct object of 268 | # the sentence 269 | "he is a big dog".en.sentence.object.en.definition 270 | # => 271 | 272 | 273 | -------------------------------------------------------------------------------- /experiments/gen_numwords_specs.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | NumberTests = [ 4 | ["0", "zero", "zero", "zero", "zero", "zeroth", ], 5 | ["1", "one", "one", "one", "one", "first", ], 6 | ["2", "two", "two", "two", "two", "second", ], 7 | ["3", "three", "three", "three", "three", "third", ], 8 | ["4", "four", "four", "four", "four", "fourth", ], 9 | ["5", "five", "five", "five", "five", "fifth", ], 10 | ["6", "six", "six", "six", "six", "sixth", ], 11 | ["7", "seven", "seven", "seven", "seven", "seventh", ], 12 | ["8", "eight", "eight", "eight", "eight", "eighth", ], 13 | ["9", "nine", "nine", "nine", "nine", "ninth", ], 14 | ["10", "ten", "one, zero", "ten", "ten", "tenth", ], 15 | ["11", "eleven", "one, one", "eleven", "eleven", "eleventh", ], 16 | ["12", "twelve", "one, two", "twelve", "twelve", "twelfth", ], 17 | ["13", "thirteen", "one, three", "thirteen", "thirteen", "thirteenth", ], 18 | ["14", "fourteen", "one, four", "fourteen", "fourteen", "fourteenth", ], 19 | ["15", "fifteen", "one, five", "fifteen", "fifteen", "fifteenth", ], 20 | ["16", "sixteen", "one, six", "sixteen", "sixteen", "sixteenth", ], 21 | ["17", "seventeen", "one, seven", "seventeen", "seventeen", "seventeenth", ], 22 | ["18", "eighteen", "one, eight", "eighteen", "eighteen", "eighteenth", ], 23 | ["19", "nineteen", "one, nine", "nineteen", "nineteen", "nineteenth", ], 24 | ["20", "twenty", "two, zero", "twenty", "twenty", "twentieth", ], 25 | ["21", "twenty-one", "two, one", "twenty-one", "twenty-one", "twenty-first", ], 26 | ["29", "twenty-nine", "two, nine", "twenty-nine", "twenty-nine", "twenty-ninth", ], 27 | ["99", "ninety-nine", "nine, nine", "ninety-nine", "ninety-nine", "ninety-ninth", ], 28 | 29 | ["100", "one hundred", "one, zero, zero", "ten, zero", "one zero zero", 30 | "one hundredth", ], 31 | ["101", "one hundred and one", "one, zero, one", "ten, one", "one zero one", 32 | "one hundred and first", ], 33 | ["110", "one hundred and ten", "one, one, zero", "eleven, zero", "one ten", 34 | "one hundred and tenth", ], 35 | ["111", "one hundred and eleven", "one, one, one", "eleven, one", "one eleven", 36 | "one hundred and eleventh", ], 37 | ["900", "nine hundred", "nine, zero, zero", "ninety, zero", "nine zero zero", 38 | "nine hundredth", ], 39 | ["999", "nine hundred and ninety-nine", "nine, nine, nine", "ninety-nine, nine", 40 | "nine ninety-nine", "nine hundred and ninety-ninth", ], 41 | 42 | ["1000", "one thousand", "one, zero, zero, zero", "ten, zero zero", 43 | "one zero zero, zero", "one thousandth", ], 44 | ["1001", "one thousand and one", "one, zero, zero, one", "ten, zero one", 45 | "one zero zero, one", "one thousand and first", ], 46 | ["1010", "one thousand and ten", "one, zero, one, zero", "ten, ten", 47 | "one zero one, zero", "one thousand and tenth", ], 48 | ["1100", "one thousand, one hundred", "one, one, zero, zero", 49 | "eleven, zero zero", "one ten, zero", "one thousand, one hundredth", ], 50 | ["2000", "two thousand", "two, zero, zero, zero", "twenty, zero zero", 51 | "two zero zero, zero", "two thousandth", ], 52 | ["10000", "ten thousand", "one, zero, zero, zero, zero", "ten, zero zero, zero", 53 | "one zero zero, zero zero", "ten thousandth", ], 54 | 55 | ["100000", "one hundred thousand", "one, zero, zero, zero, zero, zero", 56 | "ten, zero zero, zero zero", "one zero zero, zero zero zero", 57 | "one hundred thousandth", ], 58 | ["100001", "one hundred thousand and one", "one, zero, zero, zero, zero, one", 59 | "ten, zero zero, zero one", "one zero zero, zero zero one", 60 | "one hundred thousand and first", ], 61 | ["123456", "one hundred and twenty-three thousand, four hundred and fifty-six", 62 | "one, two, three, four, five, six", "twelve, thirty-four, fifty-six", 63 | "one twenty-three, four fifty-six", 64 | "one hundred and twenty-three thousand, four hundred and fifty-sixth", ], 65 | ["0123456", "one hundred and twenty-three thousand, four hundred and fifty-six", 66 | "zero, one, two, three, four, five, six", 67 | "zero one, twenty-three, forty-five, six", 68 | "zero twelve, three forty-five, six", 69 | "one hundred and twenty-three thousand, four hundred and fifty-sixth", ], 70 | 71 | ["1234567", 72 | "one million, two hundred and thirty-four thousand, five hundred and sixty-seven", 73 | "one, two, three, four, five, six, seven", "twelve, thirty-four, fifty-six, seven", 74 | "one twenty-three, four fifty-six, seven", 75 | "one million, two hundred and thirty-four thousand, five hundred and sixty-seventh", ], 76 | ["12345678", 77 | "twelve million, three hundred and forty-five thousand, six hundred and seventy-eight", 78 | "one, two, three, four, five, six, seven, eight", 79 | "twelve, thirty-four, fifty-six, seventy-eight", 80 | "one twenty-three, four fifty-six, seventy-eight", 81 | "twelve million, three hundred and forty-five thousand, six hundred and seventy-eighth", ], 82 | ["12_345_678", 83 | "twelve million, three hundred and forty-five thousand, six hundred and seventy-eight", 84 | "one, two, three, four, five, six, seven, eight", 85 | "twelve, thirty-four, fifty-six, seventy-eight", 86 | "one twenty-three, four fifty-six, seventy-eight", ], 87 | ["1234,5678", 88 | "twelve million, three hundred and forty-five thousand, six hundred and seventy-eight", 89 | "one, two, three, four, five, six, seven, eight", 90 | "twelve, thirty-four, fifty-six, seventy-eight", 91 | "one twenty-three, four fifty-six, seventy-eight", ], 92 | ["1234567890", 93 | "one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninety", 94 | "one, two, three, four, five, six, seven, eight, nine, zero", 95 | "twelve, thirty-four, fifty-six, seventy-eight, ninety", 96 | "one twenty-three, four fifty-six, seven eighty-nine, zero", 97 | "one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninetieth", ], 98 | ["123456789012345", 99 | "one hundred and twenty-three trillion, four hundred and fifty-six billion, seven hundred and eighty-nine million, twelve thousand, three hundred and forty-five", 100 | "one, two, three, four, five, six, seven, eight, nine, zero, one, two, three, four, five", 101 | "twelve, thirty-four, fifty-six, seventy-eight, ninety, twelve, thirty-four, five", 102 | "one twenty-three, four fifty-six, seven eighty-nine, zero twelve, three forty-five", 103 | "one hundred and twenty-three trillion, four hundred and fifty-six billion, seven hundred and eighty-nine million, twelve thousand, three hundred and forty-fifth", ], 104 | ["12345678901234567890", 105 | "twelve quintillion, three hundred and forty-five quadrillion, six hundred and seventy-eight trillion, nine hundred and one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninety", 106 | "one, two, three, four, five, six, seven, eight, nine, zero, one, two, three, four, five, six, seven, eight, nine, zero", 107 | "twelve, thirty-four, fifty-six, seventy-eight, ninety, twelve, thirty-four, fifty-six, seventy-eight, ninety", 108 | "one twenty-three, four fifty-six, seven eighty-nine, zero twelve, three forty-five, six seventy-eight, ninety", 109 | "twelve quintillion, three hundred and forty-five quadrillion, six hundred and seventy-eight trillion, nine hundred and one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninetieth", ], 110 | 111 | ["0.987654", "zero point nine eight seven six five four", 112 | "zero, point, nine, eight, seven, six, five, four", 113 | "zero, point, ninety-eight, seventy-six, fifty-four", 114 | "zero, point, nine eighty-seven, six fifty-four", 115 | "zero point nine eight seven six five fourth", ], 116 | [".987654", "point nine eight seven six five four", 117 | "point, nine, eight, seven, six, five, four", 118 | "point, ninety-eight, seventy-six, fifty-four", 119 | "point, nine eighty-seven, six fifty-four", 120 | "point nine eight seven six five fourth", ], 121 | ["9.87654", "nine point eight seven six five four", 122 | "nine, point, eight, seven, six, five, four", 123 | "nine, point, eighty-seven, sixty-five, four", 124 | "nine, point, eight seventy-six, fifty-four", 125 | "nine point eight seven six five fourth", ], 126 | ["98.7654", "ninety-eight point seven six five four", 127 | "nine, eight, point, seven, six, five, four", 128 | "ninety-eight, point, seventy-six, fifty-four", 129 | "ninety-eight, point, seven sixty-five, four", 130 | "ninety-eight point seven six five fourth", ], 131 | ["987.654", "nine hundred and eighty-seven point six five four", 132 | "nine, eight, seven, point, six, five, four", 133 | "ninety-eight, seven, point, sixty-five, four", 134 | "nine eighty-seven, point, six fifty-four", 135 | "nine hundred and eighty-seven point six five fourth", ], 136 | ["9876.54", "nine thousand, eight hundred and seventy-six point five four", 137 | "nine, eight, seven, six, point, five, four", 138 | "ninety-eight, seventy-six, point, fifty-four", 139 | "nine eighty-seven, six, point, fifty-four", 140 | "nine thousand, eight hundred and seventy-six point five fourth", ], 141 | ["98765.4", "ninety-eight thousand, seven hundred and sixty-five point four", 142 | "nine, eight, seven, six, five, point, four", 143 | "ninety-eight, seventy-six, five, point, four", 144 | "nine eighty-seven, sixty-five, point, four", 145 | "ninety-eight thousand, seven hundred and sixty-five point fourth", ], 146 | ["101.202.303", "one hundred and one point two zero two three zero three", 147 | "one, zero, one, point, two, zero, two, point, three, zero, three", 148 | "ten, one, point, twenty, two, point, thirty, three", 149 | "one zero one, point, two zero two, point, three zero three", 150 | ] 151 | ] 152 | 153 | NumberTests.each do 154 | |origin, regular, group1, group2, group3, numord, ordnum| 155 | 156 | puts %{ 157 | it "can transform #{origin} into english words" do 158 | #{origin.dump}.en.numwords == #{regular.dump} 159 | end 160 | 161 | it "can transform #{origin} into english words in single-digit groups" do 162 | #{origin.dump}.en.numwords( :group => 1 ) == #{group1.dump} 163 | end 164 | 165 | it "can transform #{origin} into english words in double-digit groups" do 166 | #{origin.dump}.en.numwords( :group => 2 ) == #{group2.dump} 167 | end 168 | 169 | it "can transform #{origin} into english words in triple-digit groups" do 170 | #{origin.dump}.en.numwords( :group => 3 ) == #{group3.dump} 171 | end 172 | } 173 | 174 | puts %{ 175 | it "can transform the english words for #{origin} into an ordinal" do 176 | #{origin.dump}.en.numwords.en.ordinal.should == #{numord.dump} 177 | end 178 | } if numord 179 | 180 | end 181 | -------------------------------------------------------------------------------- /lib/linguistics/en.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'rubygems' # For Gem.find_files 4 | require 'pathname' 5 | 6 | require 'linguistics' unless defined?( Linguistics ) 7 | 8 | 9 | # This module is a container for various English-language linguistic 10 | # functions for the Linguistics library. It can be either loaded 11 | # directly, or by passing some variant of +:en+ or +:eng+ to the 12 | # Linguistics.use method. 13 | # 14 | # == Pluralization 15 | # 16 | # "box".en.plural 17 | # # => "boxes" 18 | # 19 | # "mouse".en.plural 20 | # # => "mice" 21 | # 22 | # "ruby".en.plural 23 | # # => "rubies" 24 | # 25 | # 26 | # == Indefinite Articles 27 | # 28 | # "book".en.a 29 | # # => "a book" 30 | # 31 | # "article".en.a 32 | # # => "an article" 33 | # 34 | # 35 | # == Present Participles 36 | # 37 | # "runs".en.present_participle 38 | # # => "running" 39 | # 40 | # "eats".en.present_participle 41 | # # => "eating" 42 | # 43 | # "spies".en.present_participle 44 | # # => "spying" 45 | # 46 | # 47 | # == Ordinal Numbers 48 | # 49 | # 5.en.ordinal 50 | # # => "5th" 51 | # 52 | # 2004.en.ordinal 53 | # # => "2004th" 54 | # 55 | # 56 | # == Numbers to Words 57 | # 58 | # 5.en.numwords 59 | # # => "five" 60 | # 61 | # 2004.en.numwords 62 | # # => "two thousand and four" 63 | # 64 | # 2385762345876.en.numwords 65 | # # => "two trillion, three hundred and eighty-five billion, seven hundred and 66 | # # sixty-two million, three hundred and forty-five thousand, eight hundred 67 | # # and seventy-six" 68 | # 69 | # 70 | # == Quantification 71 | # 72 | # "cow".en.quantify( 5 ) 73 | # # => "several cows" 74 | # 75 | # "cow".en.quantify( 1005 ) 76 | # # => "thousands of cows" 77 | # 78 | # "cow".en.quantify( 20_432_123_000_000 ) 79 | # # => "tens of trillions of cows" 80 | # 81 | # 82 | # == Conjunctions 83 | # 84 | # animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat 85 | # dog cat cat dog cow goat goose goose ox alpaca} 86 | # "The farm has: " + animals.en.conjunction 87 | # # => "The farm has: four dogs, three cows, three geese, three goats, two 88 | # # oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca" 89 | # 90 | # Note that 'goose' and 'ox' are both correctly pluralized, and the correct 91 | # indefinite article 'an' has been used for 'alpaca'. 92 | # 93 | # You can also use the generalization function of the #quantify method to give 94 | # general descriptions of object lists instead of literal counts: 95 | # 96 | # allobjs = [] 97 | # ObjectSpace::each_object {|obj| allobjs << obj.class.name } 98 | # puts "The current Ruby objectspace contains: " + 99 | # allobjs.en.conjunction( :generalize => true ) 100 | # 101 | # Outputs: 102 | # 103 | # The current Ruby objectspace contains: hundreds of thousands of Strings, 104 | # thousands of RubyVM::InstructionSequences, thousands of Arrays, thousands 105 | # of Hashes, hundreds of Procs, hundreds of Regexps, [...], a 106 | # SystemStackError, a Random, an ARGF.class, a Data, a fatal, an 107 | # OptionParser::List, a YAML::EngineManager, a URI::Parser, a Rational, and 108 | # a Gem::Platform 109 | # 110 | # 111 | # == Infinitives 112 | # 113 | # "leaving".en.infinitive 114 | # # => "leave" 115 | # 116 | # "left".en.infinitive 117 | # # => "leave" 118 | # 119 | # "leaving".en.infinitive.suffix 120 | # # => "ing" 121 | # 122 | # 123 | # == Conjugation 124 | # 125 | # Conjugate a verb given an infinitive: 126 | # 127 | # "run".en.past_tense 128 | # # => "ran" 129 | # 130 | # "run".en.past_participle 131 | # # => "run" 132 | # 133 | # "run".en.present_tense 134 | # # => "run" 135 | # 136 | # "run".en.present_participle 137 | # # => "running" 138 | # 139 | # Conjugate an infinitive with an explicit tense and grammatical person: 140 | # 141 | # "be".en.conjugate( :present, :third_person_singular ) 142 | # # => "is" 143 | # 144 | # "be".en.conjugate( :present, :first_person_singular ) 145 | # # => "am" 146 | # 147 | # "be".en.conjugate( :past, :first_person_singular ) 148 | # # => "was" 149 | # 150 | # The functionality is a port of the verb conjugation portion of Morph 151 | # Adorner (http://morphadorner.northwestern.edu/). 152 | # 153 | # It includes a good number of irregular verbs, but it's not going to be 154 | # 100% correct everytime. 155 | # 156 | # 157 | # == WordNet® Integration 158 | # 159 | # If you have the 'wordnet' gem installed, you can look up WordNet synsets using 160 | # the Linguistics interface: 161 | # 162 | # Test to be sure the WordNet module loaded okay. 163 | # 164 | # Linguistics::EN.has_wordnet? 165 | # # => true 166 | # 167 | # Fetch the default synset for the word "balance" 168 | # 169 | # "balance".en.synset 170 | # # => # 172 | # 173 | # Fetch the synset for the first verb sense of "balance" 174 | # 175 | # "balance".en.synset( :verb ) 176 | # # => # 178 | # 179 | # Fetch the second noun sense 180 | # 181 | # "balance".en.synset( 2, :noun ) 182 | # # => # 186 | # 187 | # Fetch the second noun sense's hypernyms (more-general words, like a 188 | # superclass) 189 | # 190 | # "balance".en.synset( 2, :noun ).hypernyms 191 | # # => [#] 195 | # 196 | # A simpler way of doing the same thing: 197 | # 198 | # "balance".en.hypernyms( 2, :noun ) 199 | # # => [#] 203 | # 204 | # Fetch the first hypernym's hypernyms 205 | # 206 | # "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms 207 | # # => [#] 210 | # 211 | # Find the synset to which both the second noun sense of "balance" and the 212 | # default sense of "shovel" belong. 213 | # 214 | # ("balance".en.synset( 2, :noun ) | "shovel".en.synset) 215 | # # => # 217 | # 218 | # Fetch words for the specific kinds of (device-ish) "instruments" 219 | # 220 | # "instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ') 221 | # # => "analyser, analyzer, cauterant, cautery, drafting instrument, engine, 222 | # # extractor, instrument of execution, instrument of punishment, measuring 223 | # # device, measuring instrument, measuring system, medical instrument, 224 | # # navigational instrument, optical instrument, plotter, scientific 225 | # # instrument, sonograph, surveying instrument, surveyor's instrument, 226 | # # tracer, arm, weapon, weapon system, whip" 227 | # 228 | # ...or musical instruments 229 | # 230 | # "instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ') 231 | # # => "barrel organ, grind organ, hand organ, hurdy-gurdy, hurdy gurdy, 232 | # # street organ, bass, calliope, steam organ, electronic instrument, 233 | # # electronic musical instrument, jew's harp, jews' harp, mouth bow, keyboard 234 | # # instrument, music box, musical box, percussion instrument, percussive 235 | # # instrument, stringed instrument, wind, wind instrument" 236 | # 237 | # There are many more WordNet methods supported--too many to list here. See the 238 | # WordNet::Synset API documentation for the complete list. 239 | # 240 | # 241 | # == LinkParser Integration 242 | # 243 | # If you have the 'linkparser' gem installed, you can create linkages 244 | # from English sentences that let you query for parts of speech: 245 | # 246 | # Test to see whether or not the link parser is loaded. 247 | # 248 | # Linguistics::EN.has_linkparser? 249 | # # => true 250 | # 251 | # Diagram the first linkage for a test sentence 252 | # 253 | # puts "he is a big dog".en.sentence.linkages.first.diagram 254 | # 255 | # Outputs: 256 | # 257 | # +-----Ost----+ 258 | # | +----Ds---+ 259 | # +-Ss+ | +--A--+ 260 | # | | | | | 261 | # he is.v a big.a dog.n 262 | # 263 | # Find the verb in the sentence 264 | # 265 | # "he is a big dog".en.sentence.verb.to_s 266 | # # => "is" 267 | # 268 | # Combined infinitive + LinkParser: Find the infinitive form of the verb of the 269 | # given sentence. 270 | # 271 | # "he is a big dog".en.sentence.verb.en.infinitive 272 | # # => "be" 273 | # 274 | # Find the direct object of the sentence 275 | # 276 | # "he is a big dog".en.sentence.object.to_s 277 | # # => "dog" 278 | # 279 | # Combine WordNet + LinkParser to find the definition of the direct object of 280 | # the sentence 281 | # 282 | # "he is a big dog".en.sentence.object.en.definition 283 | # # => "a member of the genus Canis (probably descended from the common wolf) 284 | # # that has been domesticated by man since prehistoric times; occurs in many 285 | # # breeds" 286 | # 287 | # 288 | module Linguistics::EN 289 | extend Loggability 290 | 291 | # Loggability API -- log to the Linguistics logger 292 | log_to :linguistics 293 | 294 | # The list of loaded modules 295 | MODULES = [] 296 | 297 | # The key to set in the thread-hash to indicate it's running in 'classical' mode 298 | THREAD_CLASSICAL_KEY = :english_classical_mode 299 | 300 | 301 | # A Hash of 'lprintf' formatters keyed by name 302 | @@lprintf_formatters = {} 303 | 304 | 305 | ################################################################# 306 | ### U T I L I T Y F U N C T I O N S 307 | ################################################################# 308 | 309 | ### A Hash of formatters for the lprintf function. 310 | def self::lprintf_formatters 311 | return @@lprintf_formatters 312 | end 313 | 314 | 315 | ### Register an English-language extension. 316 | def self::register_extension( mod ) 317 | MODULES.push( mod ) 318 | self.log.debug "Registered English extension %p" % [ mod ] 319 | 320 | include( mod ) 321 | mod.extend( Loggability ) 322 | mod.log_to( :linguistics ) 323 | 324 | if mod.const_defined?( :SingletonMethods ) 325 | smod = mod.const_get(:SingletonMethods) 326 | self.log.debug " and its singleton methods %p" % [ smod ] 327 | extend( smod ) 328 | 329 | ivars = mod.instance_variables 330 | self.log.debug " and instance variables %p" % [ ivars ] 331 | ivars.each do |ivar| 332 | instance_variable_set( ivar, mod.instance_variable_get(ivar) ) 333 | end 334 | end 335 | end 336 | 337 | 338 | ### Returns +true+ if the English-language module with the given +name+ was 339 | ### successfully registered. 340 | def self::has_extension?( name ) 341 | return MODULES.any? do |mod| 342 | mod.name.sub( /.*::/, '' ).downcase == name.to_s.downcase 343 | end 344 | end 345 | 346 | 347 | ### Debugging output 348 | def self::debug_msg( *msgs ) # :nodoc: 349 | $stderr.puts msgs.join(" ") if $DEBUG 350 | end 351 | 352 | 353 | ### Add an lprintf formatter named +name+ that will use the specified +callback+ method. 354 | ### The name of the formatter is the placeholder that will be used in the 355 | ### format string, and the +callback+ is the method to call on the english-language 356 | ### inflector for the lprintf argument, and can either be an object that responds to 357 | ### #call, or the name of a method to call as a Symbol. 358 | ### 359 | ### Using a Symbol: 360 | ### 361 | ### def plural( count=2 ) 362 | ### # return the plural of the inflected object 363 | ### end 364 | ### Linguistics::EN.register_lprintf_formatter :PL, :plural 365 | ### 366 | ### Using a method: 367 | ### 368 | ### Linguistics::EN.register_lprintf_formatter :PL, method( :plural ) 369 | ### 370 | ### Using a block: 371 | ### 372 | ### Linguistics::EN.register_lprintf_formatter :PL do |obj| 373 | ### obj.en.plural 374 | ### end 375 | ### 376 | def self::register_lprintf_formatter( name, callback=nil ) 377 | raise LocalJumpError, "no callback or block given" unless callback || block_given? 378 | callback ||= Proc.new 379 | 380 | @@lprintf_formatters[ name ] = callback.to_proc 381 | end 382 | 383 | 384 | ### Return +true+ if running in a 'classical' mode. 385 | def self::classical? 386 | return Thread.current[ THREAD_CLASSICAL_KEY ] ? true : false 387 | end 388 | 389 | 390 | ### Set classical mode for the current thread inside the block, then 391 | ### unset it when it returns. 392 | def self::in_classical_mode 393 | old_setting = Thread.current[ THREAD_CLASSICAL_KEY ] 394 | Thread.current[ THREAD_CLASSICAL_KEY ] = true 395 | 396 | yield 397 | ensure 398 | Thread.current[ THREAD_CLASSICAL_KEY ] = old_setting 399 | end 400 | 401 | 402 | ################################################################# 403 | ### P U B L I C F U N C T I O N S 404 | ################################################################# 405 | 406 | ### Format the given +fmt+ string by replacing %-escaped sequences with the 407 | ### result of performing a specified operation on the corresponding 408 | ### argument, ala Kernel.sprintf. 409 | ### %PL:: 410 | ### Plural. 411 | ### %A, %AN:: 412 | ### Prepend indefinite article. 413 | ### %NO:: 414 | ### Zero-quantified phrase. 415 | ### %NUMWORDS:: 416 | ### Convert a number into the corresponding words. 417 | ### %CONJUNCT:: 418 | ### Conjunction. 419 | def lprintf( *args ) 420 | return self.to_s.gsub( /%([A-Z_]+)/ ) do |match| 421 | op = $1.to_s.upcase.to_sym 422 | if (( callback = Linguistics::EN.lprintf_formatters[op] )) 423 | arg = args.shift 424 | callback.call( arg.en ) 425 | else 426 | raise "no such formatter %p" % [ op ] 427 | end 428 | end 429 | end 430 | 431 | 432 | # Add 'english' to the list of default languages 433 | Linguistics.register_language( :en, self ) 434 | 435 | 436 | end # module Linguistics::EN 437 | 438 | -------------------------------------------------------------------------------- /lib/linguistics/en/numbers.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | require 'linguistics/en' unless defined?( Linguistics::EN ) 4 | 5 | # Numeric methods for the English-language Linguistics module. 6 | module Linguistics::EN::Numbers 7 | 8 | # Register this module to the list of modules to include 9 | Linguistics::EN.register_extension( self ) 10 | 11 | # 12 | # Numerals, ordinals, and numbers-to-words 13 | # 14 | 15 | # Default configuration arguments for the #numwords function 16 | NUMWORD_DEFAULTS = { 17 | :group => 0, 18 | :comma => ', ', 19 | :and => ' and ', 20 | :zero => 'zero', 21 | :decimal => 'point', 22 | :asArray => false, 23 | } 24 | 25 | # Default configuration arguments for the #quantify function 26 | QUANTIFY_DEFAULTS = { 27 | :joinword => " of ", 28 | } 29 | 30 | # Default ranges for #quantify 31 | SEVERAL_RANGE = 2..5 32 | NUMBER_RANGE = 6..19 33 | NUMEROUS_RANGE = 20..45 34 | MANY_RANGE = 46..99 35 | 36 | # Numerical inflections 37 | NTH = { 38 | 0 => 'th', 39 | 1 => 'st', 40 | 2 => 'nd', 41 | 3 => 'rd', 42 | 4 => 'th', 43 | 5 => 'th', 44 | 6 => 'th', 45 | 7 => 'th', 46 | 8 => 'th', 47 | 9 => 'th', 48 | 11 => 'th', 49 | 12 => 'th', 50 | 13 => 'th', 51 | } 52 | 53 | # Ordinal word parts 54 | ORDINALS = { 55 | 'ty' => 'tieth', 56 | 'one' => 'first', 57 | 'two' => 'second', 58 | 'three' => 'third', 59 | 'five' => 'fifth', 60 | 'eight' => 'eighth', 61 | 'nine' => 'ninth', 62 | 'twelve' => 'twelfth', 63 | } 64 | ORDINAL_SUFFIXES = ORDINALS.keys.join("|") + "|" 65 | ORDINALS[""] = 'th' 66 | 67 | # Numeral names 68 | UNITS = [''] + %w[one two three four five six seven eight nine] 69 | TEENS = %w[ten eleven twelve thirteen fourteen 70 | fifteen sixteen seventeen eighteen nineteen] 71 | TENS = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety] 72 | THOUSANDS = [' ', ' thousand'] + %w[ 73 | m b tr quadr quint sext sept oct non dec undec duodec tredec 74 | quattuordec quindec sexdec septemdec octodec novemdec vigint 75 | ].collect {|prefix| ' ' + prefix + 'illion'} 76 | 77 | 78 | # A collection of functions for transforming digits into word 79 | # phrases. Indexed by the number of digits being transformed; e.g., 80 | # NUMBER_TO_WORDS_FUNCTIONS[2] is the function for transforming 81 | # double-digit numbers. 82 | NUMBER_TO_WORDS_FUNCTIONS = [ 83 | proc {|*args| raise "No digits (#{args.inspect})"}, 84 | 85 | # Single-digits 86 | proc {|zero,x| 87 | (x.nonzero? ? to_units(x) : "#{zero} ") 88 | }, 89 | 90 | # Double-digits 91 | proc {|zero,x,y| 92 | if x.nonzero? 93 | to_tens( x, y ) 94 | elsif y.nonzero? 95 | "#{zero} " + NUMBER_TO_WORDS_FUNCTIONS[1].call( zero, y ) 96 | else 97 | ([zero] * 2).join(" ") 98 | end 99 | }, 100 | 101 | # Triple-digits 102 | proc {|zero,x,y,z| 103 | NUMBER_TO_WORDS_FUNCTIONS[1].call(zero,x) + 104 | NUMBER_TO_WORDS_FUNCTIONS[2].call(zero,y,z) 105 | } 106 | ] 107 | 108 | 109 | ### Return the specified number as english words. One or more configuration 110 | ### values may be passed to control the returned String: 111 | ### 112 | ### [:group] 113 | ### Controls how many numbers at a time are grouped together. Valid values 114 | ### are 0 (normal grouping), 1 (single-digit 115 | ### grouping, e.g., "one, two, three, four"), 2 116 | ### (double-digit grouping, e.g., "twelve, thirty-four", or 3 117 | ### (triple-digit grouping, e.g., "one twenty-three, four"). 118 | ### [:comma] 119 | ### Set the character/s used to separate word groups. Defaults to 120 | ### ", ". 121 | ### [:and] 122 | ### Set the word and/or characters used where ' and ' (the 123 | ### default) is normally used. Setting :and to 124 | ### ' ', for example, will cause 2556 to be 125 | ### returned as "two-thousand, five hundred fifty-six" instead of 126 | ### "two-thousand, five hundred and fifty-six". 127 | ### [:zero] 128 | ### Set the word used to represent the numeral 0 in the 129 | ### result. 'zero' is the default. 130 | ### [:decimal] 131 | ### Set the translation of any decimal points in the number; the default 132 | ### is 'point'. 133 | ### [:as_array] 134 | ### If set to a true value, the number will be returned as an array of 135 | ### word groups instead of a String. 136 | def numwords( hashargs={} ) 137 | num = self.to_s 138 | self.log.debug "Turning %p into number words..." % [ num ] 139 | config = NUMWORD_DEFAULTS.merge( hashargs ) 140 | raise "Bad chunking option: #{config[:group]}" unless 141 | config[:group].between?( 0, 3 ) 142 | 143 | # Array of number parts: first is everything to the left of the first 144 | # decimal, followed by any groups of decimal-delimted numbers after that 145 | parts = [] 146 | 147 | # Wordify any sign prefix 148 | sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : '' 149 | 150 | # Strip any ordinal suffixes 151 | ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' ) 152 | 153 | # Split the number into chunks delimited by '.' 154 | chunks = if !config[:decimal].empty? then 155 | if config[:group].nonzero? 156 | num.split(/\./) 157 | else 158 | num.split(/\./, 2) 159 | end 160 | else 161 | [ num ] 162 | end 163 | 164 | # Wordify each chunk, pushing arrays into the parts array 165 | chunks.each_with_index do |chunk,section| 166 | chunk.gsub!( /\D+/, '' ) 167 | self.log.debug " working on chunk %p (section %d)" % [ chunk, section ] 168 | 169 | # If there's nothing in this chunk of the number, set it to zero 170 | # unless it's the whole-number part, in which case just push an 171 | # empty array. 172 | if chunk.empty? 173 | self.log.debug " chunk is empty..." 174 | if section.zero? 175 | self.log.debug " skipping the empty whole-number part" 176 | parts.push [] 177 | next 178 | end 179 | end 180 | 181 | # Split the number section into wordified parts unless this is the 182 | # second or succeeding part of a non-group number 183 | unless config[:group].zero? && section.nonzero? 184 | parts.push number_to_words( chunk, config ) 185 | self.log.debug " added %p" % [ parts.last ] 186 | else 187 | parts.push number_to_words( chunk, config.merge(:group => 1) ) 188 | self.log.debug " added %p" % [ parts.last ] 189 | end 190 | end 191 | 192 | self.log.debug "Parts => %p" % [ parts ] 193 | 194 | # Turn the last word of the whole-number part back into an ordinal if 195 | # the original number came in that way. 196 | if ord && !parts[0].empty? 197 | self.log.debug " turning the last whole-number part back into an ordinal, since it " + 198 | "came in that way" 199 | parts[0][-1] = ordinal( parts[0].last ) 200 | end 201 | 202 | # If the caller's expecting an Array return, just flatten and return the 203 | # parts array. 204 | if config[:as_array] 205 | self.log.debug " returning the number parts as an Array" 206 | unless sign.empty? 207 | parts[0].unshift( sign ) 208 | end 209 | return parts.flatten 210 | end 211 | 212 | # Catenate each sub-parts array into a whole number part and one or more 213 | # post-decimal parts. If grouping is turned on, all sub-parts get joined 214 | # with commas, otherwise just the whole-number part is. 215 | if config[:group].zero? 216 | self.log.debug " no custom grouping" 217 | if parts[0].length > 1 218 | self.log.debug " whole and decimal part; working on the whole number first" 219 | 220 | # Join all but the last part together with commas 221 | wholenum = parts[0][0...-1].join( config[:comma] ) 222 | 223 | # If the last part is just a single word, append it to the 224 | # wholenum part with an 'and'. This is to get things like 'three 225 | # thousand and three' instead of 'three thousand, three'. 226 | if /^\s*(\S+)\s*$/ =~ parts[0].last 227 | self.log.debug "last word is a single word; using the 'and' separator: %p" % 228 | [ config[:and] ] 229 | wholenum += config[:and] + parts[0].last 230 | else 231 | self.log.debug "last word has multiple words; using the comma separator: %p" % 232 | [ config[:comma] ] 233 | wholenum += config[:comma] + parts[0].last 234 | end 235 | else 236 | self.log.debug " non-decimal." 237 | wholenum = parts[0][0] 238 | end 239 | 240 | decimals = parts[1..-1].collect {|part| part.join(" ")} 241 | self.log.debug " wholenum: %p; decimals: %p" % [ wholenum, decimals ] 242 | 243 | # Join with the configured decimal; if it's empty, just join with 244 | # spaces. 245 | unless config[:decimal].empty? 246 | self.log.debug " joining with the configured decimal: %p" % [ config[:decimal] ] 247 | return sign + ([ wholenum ] + decimals). 248 | join( " #{config[:decimal]} " ).strip 249 | else 250 | self.log.debug " joining with the spaces since no decimal is configured" 251 | return sign + ([ wholenum ] + decimals). 252 | join( " " ).strip 253 | end 254 | 255 | else 256 | self.log.debug " grouping with decimal %p and comma %p" % 257 | config.values_at( :decimal, :comma ) 258 | return parts.compact. 259 | separate( config[:decimal] ). 260 | delete_if {|el| el.empty?}. 261 | join( config[:comma] ). 262 | strip 263 | end 264 | end 265 | Linguistics::EN.register_lprintf_formatter :NUMWORDS, :numwords 266 | 267 | 268 | ### Transform the given +number+ into an ordinal word. The +number+ object 269 | ### can be either an Integer or a String. 270 | def ordinal 271 | if self.respond_to?( :to_int ) 272 | number = self.to_int 273 | return "%d%s" % [ number, (NTH[ number % 100 ] || NTH[ number % 10 ]) ] 274 | 275 | else 276 | number = self.to_s 277 | self.log.debug "Making an ordinal out of a non-Integer (%p)" % [ number ] 278 | return number.sub( /(#{ORDINAL_SUFFIXES})\Z/ ) { ORDINALS[$1] } 279 | end 280 | end 281 | Linguistics::EN.register_lprintf_formatter :ORD, :ordinal 282 | 283 | 284 | ### Transform the given +number+ into an ordinate word. 285 | def ordinate 286 | return self.numwords.en.ordinal 287 | end 288 | 289 | 290 | ### Return a phrase describing the specified +number+ of objects in the 291 | ### inflected object in general terms. The following options can be used to 292 | ### control the makeup of the returned quantity String: 293 | ### 294 | ### [:joinword] 295 | ### Sets the word (and any surrounding spaces) used as the word separating the 296 | ### quantity from the noun in the resulting string. Defaults to ' of 297 | ### '. 298 | def quantify( number=0, args={} ) 299 | phrase = self.to_s 300 | self.log.debug "Quantifying %d instances of %p" % [ number, phrase ] 301 | 302 | num = number.to_i 303 | config = QUANTIFY_DEFAULTS.merge( args ) 304 | 305 | case num 306 | when 0 307 | phrase.en.no 308 | when 1 309 | phrase.en.a 310 | when SEVERAL_RANGE 311 | "several " + phrase.en.plural( num ) 312 | when NUMBER_RANGE 313 | "a number of " + phrase.en.plural( num ) 314 | when NUMEROUS_RANGE 315 | "numerous " + phrase.en.plural( num ) 316 | when MANY_RANGE 317 | "many " + phrase.en.plural( num ) 318 | else 319 | 320 | # Anything bigger than the MANY_RANGE gets described like 321 | # "hundreds of thousands of..." or "millions of..." 322 | # depending, of course, on how many there are. 323 | thousands, subthousands = Math::log10( num ).to_i.divmod( 3 ) 324 | self.log.debug "thousands = %p, subthousands = %p" % [ thousands, subthousands ] 325 | 326 | stword = 327 | case subthousands 328 | when 2 329 | "hundreds" 330 | when 1 331 | "tens" 332 | else 333 | nil 334 | end 335 | 336 | unless thousands.zero? 337 | thword = to_thousands( thousands ).strip.en.plural 338 | end 339 | 340 | [ # Hundreds (of)... 341 | stword, 342 | 343 | # thousands (of) 344 | thword, 345 | 346 | # stars. 347 | phrase.en.plural(number) 348 | ].compact.join( config[:joinword] ) 349 | end 350 | end 351 | Linguistics::EN.register_lprintf_formatter :QUANT, :quantify 352 | 353 | 354 | ############### 355 | module_function 356 | ############### 357 | 358 | ### Transform the specified number of units-place numerals into a 359 | ### word-phrase at the given number of +thousands+ places. 360 | def to_units( units, thousands=0 ) 361 | return UNITS[ units ] + to_thousands( thousands ) 362 | end 363 | 364 | 365 | ### Transform the specified number of tens- and units-place numerals into a 366 | ### word-phrase at the given number of +thousands+ places. 367 | def to_tens( tens, units, thousands=0 ) 368 | raise ArgumentError, "tens: no implicit conversion from nil" unless tens 369 | raise ArgumentError, "units: no implicit conversion from nil" unless units 370 | 371 | unless tens == 1 372 | return TENS[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) + 373 | to_units( units, thousands ) 374 | else 375 | return TEENS[ units ] + to_thousands( thousands ) 376 | end 377 | end 378 | 379 | 380 | ### Transform the specified number of hundreds-, tens-, and units-place 381 | ### numerals into a word phrase. If the number of thousands (+thousands+) is 382 | ### greater than 0, it will be used to determine where the decimal point is 383 | ### in relation to the hundreds-place number. 384 | def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " ) 385 | joinword = ' ' if joinword.empty? 386 | if hundreds.nonzero? 387 | return to_units( hundreds ) + " hundred" + 388 | (tens.nonzero? || units.nonzero? ? joinword : '') + 389 | to_tens( tens, units ) + 390 | to_thousands( thousands ) 391 | elsif tens.nonzero? || units.nonzero? 392 | return to_tens( tens, units ) + to_thousands( thousands ) 393 | else 394 | return nil 395 | end 396 | end 397 | 398 | ### Transform the specified number into one or more words like 'thousand', 399 | ### 'million', etc. Uses the thousands (American) system. 400 | def to_thousands( thousands=0 ) 401 | parts = [] 402 | (0..thousands).step( THOUSANDS.length - 1 ) {|i| 403 | if i.zero? 404 | parts.push THOUSANDS[ thousands % (THOUSANDS.length - 1) ] 405 | else 406 | parts.push THOUSANDS.last 407 | end 408 | } 409 | 410 | return parts.join(" ") 411 | end 412 | 413 | 414 | ### Return the specified number +number+ as an array of number phrases. 415 | def number_to_words( number, config ) 416 | return [config[:zero]] if number.to_i.zero? 417 | 418 | if config[:group].nonzero? then 419 | return number_to_custom_word_groups( number, config[:group], config[:zero] ) 420 | else 421 | return number_to_standard_word_groups( number, config[:and] ) 422 | end 423 | end 424 | 425 | 426 | ### Split the given +number+ up into groups of +groupsize+ and return 427 | ### them as an Array of words. Use +zeroword+ for any occurences of '0'. 428 | def number_to_custom_word_groups( number, groupsize, zeroword="zero" ) 429 | self.log.debug "Making custom word groups of %d digits out of %p" % [ groupsize, number ] 430 | 431 | # Build a Regexp with number of digits. Any past 432 | # the first are optional. 433 | re = Regexp.new( "(\\d)" + ("(\\d)?" * (groupsize - 1)) ) 434 | self.log.debug " regex for matching groups of %d digits is %p" % [ groupsize, re ] 435 | 436 | # Scan the string, and call the word-chunk function that deals with 437 | # chunks of the found number of digits. 438 | return number.to_s.scan( re ).collect do |digits| 439 | self.log.debug " digits = %p" % [ digits ] 440 | numerals = digits.flatten.compact.collect {|i| i.to_i} 441 | self.log.debug " numerals = %p" % [ numerals ] 442 | 443 | fn = NUMBER_TO_WORDS_FUNCTIONS[ numerals.length ] 444 | self.log.debug " number to word function is #%d: %p" % [ numerals.length, fn ] 445 | fn.call( zeroword, *numerals ).strip 446 | end 447 | end 448 | 449 | 450 | ### Split the given +number+ up into groups of three and return 451 | ### the Array of words describing each group in the standard style. 452 | def number_to_standard_word_groups( number, andword="and" ) 453 | phrase = number.to_s 454 | phrase.sub!( /\A\s*0+/, '' ) 455 | chunks = [] 456 | mill = 0 457 | self.log.debug "Making standard word groups out of %p" % [ phrase ] 458 | 459 | # Match backward from the end of the digits in the string, turning 460 | # chunks of three, of two, and of one into words. 461 | mill += 1 while 462 | phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) do 463 | words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill, andword ) 464 | chunks.unshift words.strip.squeeze(' ') unless words.nil? 465 | '' 466 | end 467 | 468 | phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) do 469 | chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ') 470 | '' 471 | end 472 | 473 | phrase.sub!( /(\d)(?=\D*\Z)/ ) do 474 | chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ') 475 | '' 476 | end 477 | 478 | return chunks 479 | end 480 | 481 | 482 | end # module Linguistics::EN::Numbers 483 | 484 | -------------------------------------------------------------------------------- /spec/linguistics/en/infinitives_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env spec -cfs 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en/infinitives' 18 | 19 | 20 | describe Linguistics::EN::Infinitives do 21 | 22 | before( :all ) do 23 | setup_logging( :fatal ) 24 | Linguistics.use( :en, :proxy => true ) 25 | include Linguistics::EN 26 | end 27 | 28 | after( :all ) do 29 | reset_logging() 30 | end 31 | 32 | 33 | describe "Infinitive object class" do 34 | it "compares as equal if its primary word is equal" do 35 | Linguistics::EN::Infinitives::Infinitive.new( 'basse', 'bass', 's', '2' ).should == 36 | 'basse' 37 | end 38 | 39 | it "compares as equal if its secondary word is equal" do 40 | Linguistics::EN::Infinitives::Infinitive.new( 'basse', 'bass', 's', '2' ).should == 41 | 'bass' 42 | end 43 | 44 | end 45 | 46 | 47 | it "uses rule 1 when calculating the infinitive of 'aches'" do 48 | "aches".en.infinitive.should == 'ache' 49 | "aches".en.infinitive.rule.should == '1' 50 | end 51 | 52 | it "uses rule 2 when calculating the infinitive of 'vases'" do 53 | "vases".en.infinitive.should == 'vase' 54 | "vases".en.infinitive.rule.should == '2' 55 | end 56 | 57 | it "uses rule 2 when calculating the infinitive of 'basses'" do 58 | "basses".en.infinitive.should == 'bass' 59 | "basses".en.infinitive.rule.should == '2' 60 | end 61 | 62 | it "uses rule 3 when calculating the infinitive of 'axes'" do 63 | "axes".en.infinitive.should == 'axe' 64 | "axes".en.infinitive.rule.should == '3' 65 | end 66 | 67 | it "uses rule 3 when calculating the infinitive of 'fixes'" do 68 | "fixes".en.infinitive.should == 'fix' 69 | "fixes".en.infinitive.rule.should == '3' 70 | end 71 | 72 | it "uses rule 4 when calculating the infinitive of 'hazes'" do 73 | "hazes".en.infinitive.should == 'haze' 74 | "hazes".en.infinitive.rule.should == '4' 75 | end 76 | 77 | it "uses rule 4 when calculating the infinitive of 'buzzes'" do 78 | "buzzes".en.infinitive.should == 'buzz' 79 | "buzzes".en.infinitive.rule.should == '4' 80 | end 81 | 82 | it "uses rule 6a when calculating the infinitive of 'caress'" do 83 | "caress".en.infinitive.should == 'caress' 84 | "caress".en.infinitive.rule.should == '6a' 85 | end 86 | 87 | it "uses rule 6b when calculating the infinitive of 'bans'" do 88 | "bans".en.infinitive.should == 'ban' 89 | "bans".en.infinitive.rule.should == '6b' 90 | end 91 | 92 | it "uses rule 7 when calculating the infinitive of 'Jones's'" do 93 | "Jones's".en.infinitive.should == 'Jones' 94 | "Jones's".en.infinitive.rule.should == '7' 95 | end 96 | 97 | it "uses rule 8 when calculating the infinitive of 'creater'" do 98 | "creater".en.infinitive.should == 'creater' 99 | "creater".en.infinitive.rule.should == '8' 100 | end 101 | 102 | it "uses rule 9 when calculating the infinitive of 'reacter'" do 103 | "reacter".en.infinitive.should == 'reacter' 104 | "reacter".en.infinitive.rule.should == '9' 105 | end 106 | 107 | it "uses rule 10 when calculating the infinitive of 'copier'" do 108 | "copier".en.infinitive.should == 'copy' 109 | "copier".en.infinitive.rule.should == '10' 110 | end 111 | 112 | it "uses rule 11 when calculating the infinitive of 'baker'" do 113 | "baker".en.infinitive.should == 'bake' 114 | "baker".en.infinitive.rule.should == '11' 115 | end 116 | 117 | it "uses rule 11 when calculating the infinitive of 'smaller'" do 118 | "smaller".en.infinitive.should == 'small' 119 | "smaller".en.infinitive.rule.should == '11' 120 | end 121 | 122 | it "uses rule 12a when calculating the infinitive of 'curried'" do 123 | "curried".en.infinitive.should == 'curry' 124 | "curried".en.infinitive.rule.should == '12a' 125 | end 126 | 127 | it "uses rule 12b when calculating the infinitive of 'bored'" do 128 | "bored".en.infinitive.should == 'bore' 129 | "bored".en.infinitive.rule.should == '12b' 130 | end 131 | 132 | it "uses rule 12b when calculating the infinitive of 'seated'" do 133 | "seated".en.infinitive.should == 'seat' 134 | "seated".en.infinitive.rule.should == '12b' 135 | end 136 | 137 | it "uses rule 12b when calculating the infinitive of 'tipped'" do 138 | "tipped".en.infinitive.should == 'tip' 139 | "tipped".en.infinitive.rule.should == '12b' 140 | end 141 | 142 | it "uses rule 12b when calculating the infinitive of 'kitted'" do 143 | "kitted".en.infinitive.should == 'kit' 144 | "kitted".en.infinitive.rule.should == '12b' 145 | end 146 | 147 | it "uses rule 12b when calculating the infinitive of 'capped'" do 148 | "capped".en.infinitive.should == 'cap' 149 | "capped".en.infinitive.rule.should == '12b' 150 | end 151 | 152 | it "uses rule 12b when calculating the infinitive of 'chopped'" do 153 | "chopped".en.infinitive.should == 'chop' 154 | "chopped".en.infinitive.rule.should == '12b' 155 | end 156 | 157 | it "uses rule 13a when calculating the infinitive of 'flies'" do 158 | "flies".en.infinitive.should == 'fly' 159 | "flies".en.infinitive.rule.should == '13a' 160 | end 161 | 162 | it "uses rule 13b when calculating the infinitive of 'palates'" do 163 | "palates".en.infinitive.should == 'palate' 164 | "palates".en.infinitive.rule.should == '13b' 165 | end 166 | 167 | it "uses rule 14a when calculating the infinitive of 'liveliest'" do 168 | "liveliest".en.infinitive.should == 'lively' 169 | "liveliest".en.infinitive.rule.should == '14a' 170 | end 171 | 172 | it "uses rule 14b when calculating the infinitive of 'wisest'" do 173 | "wisest".en.infinitive.should == 'wise' 174 | "wisest".en.infinitive.rule.should == '14b' 175 | end 176 | 177 | it "uses rule 14b when calculating the infinitive of 'strongest'" do 178 | "strongest".en.infinitive.should == 'strong' 179 | "strongest".en.infinitive.rule.should == '14b' 180 | end 181 | 182 | it "uses rule 15 when calculating the infinitive of 'living'" do 183 | "living".en.infinitive.should == 'live' 184 | "living".en.infinitive.rule.should == '15' 185 | end 186 | 187 | it "uses rule 15 when calculating the infinitive of 'laughing'" do 188 | "laughing".en.infinitive.should == 'laugh' 189 | "laughing".en.infinitive.rule.should == '15' 190 | end 191 | 192 | it "uses rule 15 when calculating the infinitive of 'swaying'" do 193 | "swaying".en.infinitive.should == 'sway' 194 | "swaying".en.infinitive.rule.should == '15' 195 | end 196 | 197 | it "uses rule 15 when calculating the infinitive of 'catching'" do 198 | "catching".en.infinitive.should == 'catch' 199 | "catching".en.infinitive.rule.should == '15' 200 | end 201 | 202 | it "uses rule 15 when calculating the infinitive of 'smiling'" do 203 | "smiling".en.infinitive.should == 'smile' 204 | "smiling".en.infinitive.rule.should == '15' 205 | end 206 | 207 | it "uses rule 15 when calculating the infinitive of 'swimming'" do 208 | "swimming".en.infinitive.should == 'swim' 209 | "swimming".en.infinitive.rule.should == '15' 210 | end 211 | 212 | it "uses rule 15 when calculating the infinitive of 'running'" do 213 | "running".en.infinitive.should == 'run' 214 | "running".en.infinitive.rule.should == '15' 215 | end 216 | 217 | it "uses rule 15 when calculating the infinitive of 'floating'" do 218 | "floating".en.infinitive.should == 'float' 219 | "floating".en.infinitive.rule.should == '15' 220 | end 221 | 222 | it "uses rule 15 when calculating the infinitive of 'keyboarding'" do 223 | "keyboarding".en.infinitive.should == 'keyboard' 224 | "keyboarding".en.infinitive.rule.should == '15' 225 | end 226 | 227 | it "uses rule 15 when calculating the infinitive of 'wrestling'" do 228 | "wrestling".en.infinitive.should == 'wrestle' 229 | "wrestling".en.infinitive.rule.should == '15' 230 | end 231 | 232 | it "uses rule 15 when calculating the infinitive of 'traveling'" do 233 | "traveling".en.infinitive.should == 'travel' 234 | "traveling".en.infinitive.rule.should == '15' 235 | end 236 | 237 | it "uses rule 15 when calculating the infinitive of 'traipsing'" do 238 | "traipsing".en.infinitive.should == 'traipse' 239 | "traipsing".en.infinitive.rule.should == '15' 240 | end 241 | 242 | it "uses rule 16 when calculating the infinitive of 'stylist'" do 243 | "stylist".en.infinitive.should == 'style' 244 | "stylist".en.infinitive.rule.should == '16' 245 | end 246 | 247 | it "uses rule 16 when calculating the infinitive of 'dentist'" do 248 | "dentist".en.infinitive.should == 'dent' 249 | "dentist".en.infinitive.rule.should == '16' 250 | end 251 | 252 | it "uses rule 17 when calculating the infinitive of 'cubism'" do 253 | "cubism".en.infinitive.should == 'cube' 254 | "cubism".en.infinitive.rule.should == '17' 255 | end 256 | 257 | it "uses rule 17 when calculating the infinitive of 'socialism'" do 258 | "socialism".en.infinitive.should == 'social' 259 | "socialism".en.infinitive.rule.should == '17' 260 | end 261 | 262 | it "uses rule 18 when calculating the infinitive of 'scarcity'" do 263 | "scarcity".en.infinitive.should == 'scarce' 264 | "scarcity".en.infinitive.rule.should == '18' 265 | end 266 | 267 | it "uses rule 18 when calculating the infinitive of 'rapidity'" do 268 | "rapidity".en.infinitive.should == 'rapid' 269 | "rapidity".en.infinitive.rule.should == '18' 270 | end 271 | 272 | it "uses rule 19 when calculating the infinitive of 'immunize'" do 273 | "immunize".en.infinitive.should == 'immune' 274 | "immunize".en.infinitive.rule.should == '19' 275 | end 276 | 277 | it "uses rule 19 when calculating the infinitive of 'lionize'" do 278 | "lionize".en.infinitive.should == 'lion' 279 | "lionize".en.infinitive.rule.should == '19' 280 | end 281 | 282 | it "uses rule 20c when calculating the infinitive of 'livable'" do 283 | "livable".en.infinitive.should == 'live' 284 | "livable".en.infinitive.rule.should == '20c' 285 | end 286 | 287 | it "uses rule 20c when calculating the infinitive of 'portable'" do 288 | "portable".en.infinitive.should == 'port' 289 | "portable".en.infinitive.rule.should == '20c' 290 | end 291 | 292 | it "uses rule 22 when calculating the infinitive of 'nobility'" do 293 | "nobility".en.infinitive.should == 'noble' 294 | "nobility".en.infinitive.rule.should == '22' 295 | end 296 | 297 | it "uses rule 23 when calculating the infinitive of 'identifiable'" do 298 | "identifiable".en.infinitive.should == 'identify' 299 | "identifiable".en.infinitive.rule.should == '23' 300 | end 301 | 302 | it "uses rule 24 when calculating the infinitive of 'psychologist'" do 303 | "psychologist".en.infinitive.should == 'psychology' 304 | "psychologist".en.infinitive.rule.should == '24' 305 | end 306 | 307 | it "uses rule 25 when calculating the infinitive of 'photographic'" do 308 | "photographic".en.infinitive.should == 'photography' 309 | "photographic".en.infinitive.rule.should == '25' 310 | end 311 | 312 | it "uses rule 26 when calculating the infinitive of 'stylistic'" do 313 | "stylistic".en.infinitive.should == 'stylist' 314 | "stylistic".en.infinitive.rule.should == '26' 315 | end 316 | 317 | it "uses rule 27 when calculating the infinitive of 'martensitic'" do 318 | "martensitic".en.infinitive.should == 'martensite' 319 | "martensitic".en.infinitive.rule.should == '27' 320 | end 321 | 322 | it "uses rule 27 when calculating the infinitive of 'politic'" do 323 | "politic".en.infinitive.should == 'polite' 324 | "politic".en.infinitive.rule.should == '27' 325 | end 326 | 327 | it "uses rule 28 when calculating the infinitive of 'ladylike'" do 328 | "ladylike".en.infinitive.should == 'lady' 329 | "ladylike".en.infinitive.rule.should == '28' 330 | end 331 | 332 | it "uses rule 29 when calculating the infinitive of 'biologic'" do 333 | "biologic".en.infinitive.should == 'biology' 334 | "biologic".en.infinitive.rule.should == '29' 335 | end 336 | 337 | it "uses rule 30 when calculating the infinitive of 'battlement'" do 338 | "battlement".en.infinitive.should == 'battle' 339 | "battlement".en.infinitive.rule.should == '30' 340 | end 341 | 342 | it "uses rule 31 when calculating the infinitive of 'supplemental'" do 343 | "supplemental".en.infinitive.should == 'supplement' 344 | "supplemental".en.infinitive.rule.should == '31' 345 | end 346 | 347 | it "uses rule 32 when calculating the infinitive of 'thermometry'" do 348 | "thermometry".en.infinitive.should == 'thermometer' 349 | "thermometry".en.infinitive.rule.should == '32' 350 | end 351 | 352 | it "uses rule 33 when calculating the infinitive of 'inadvertence'" do 353 | "inadvertence".en.infinitive.should == 'inadvertent' 354 | "inadvertence".en.infinitive.rule.should == '33' 355 | end 356 | 357 | it "uses rule 34 when calculating the infinitive of 'potency'" do 358 | "potency".en.infinitive.should == 'potent' 359 | "potency".en.infinitive.rule.should == '34' 360 | end 361 | 362 | it "uses rule 35 when calculating the infinitive of 'discipleship'" do 363 | "discipleship".en.infinitive.should == 'disciple' 364 | "discipleship".en.infinitive.rule.should == '35' 365 | end 366 | 367 | it "uses rule 36 when calculating the infinitive of 'mystical'" do 368 | "mystical".en.infinitive.should == 'mystic' 369 | "mystical".en.infinitive.rule.should == '36' 370 | end 371 | 372 | it "uses rule 37 when calculating the infinitive of 'regional'" do 373 | "regional".en.infinitive.should == 'region' 374 | "regional".en.infinitive.rule.should == '37' 375 | end 376 | 377 | it "uses rule 37 when calculating the infinitive of 'national'" do 378 | "national".en.infinitive.should == 'nation' 379 | "national".en.infinitive.rule.should == '37' 380 | end 381 | 382 | it "uses rule 38 when calculating the infinitive of 'horribly'" do 383 | "horribly".en.infinitive.should == 'horrible' 384 | "horribly".en.infinitive.rule.should == '38' 385 | end 386 | 387 | it "uses rule 39 when calculating the infinitive of 'scantily'" do 388 | "scantily".en.infinitive.should == 'scanty' 389 | "scantily".en.infinitive.rule.should == '39' 390 | end 391 | 392 | it "uses rule 40 when calculating the infinitive of 'partly'" do 393 | "partly".en.infinitive.should == 'part' 394 | "partly".en.infinitive.rule.should == '40' 395 | end 396 | 397 | it "uses rule 41a when calculating the infinitive of 'dutiful'" do 398 | "dutiful".en.infinitive.should == 'duty' 399 | "dutiful".en.infinitive.rule.should == '41a' 400 | end 401 | 402 | it "uses rule 41b when calculating the infinitive of 'harmful'" do 403 | "harmful".en.infinitive.should == 'harm' 404 | "harmful".en.infinitive.rule.should == '41b' 405 | end 406 | 407 | it "uses rule 42a when calculating the infinitive of 'likelihood'" do 408 | "likelihood".en.infinitive.should == 'likely' 409 | "likelihood".en.infinitive.rule.should == '42a' 410 | end 411 | 412 | it "uses rule 42b when calculating the infinitive of 'neighborhood'" do 413 | "neighborhood".en.infinitive.should == 'neighbor' 414 | "neighborhood".en.infinitive.rule.should == '42b' 415 | end 416 | 417 | it "uses rule 42b when calculating the infinitive of 'neighbourhood'" do 418 | "neighbourhood".en.infinitive.should == 'neighbour' 419 | "neighbourhood".en.infinitive.rule.should == '42b' 420 | end 421 | 422 | it "uses rule 43a when calculating the infinitive of 'penniless'" do 423 | "penniless".en.infinitive.should == 'penny' 424 | "penniless".en.infinitive.rule.should == '43a' 425 | end 426 | 427 | it "uses rule 43b when calculating the infinitive of 'listless'" do 428 | "listless".en.infinitive.should == 'list' 429 | "listless".en.infinitive.rule.should == '43b' 430 | end 431 | 432 | it "uses rule 44a when calculating the infinitive of 'heartiness'" do 433 | "heartiness".en.infinitive.should == 'hearty' 434 | "heartiness".en.infinitive.rule.should == '44a' 435 | end 436 | 437 | it "uses rule 44b when calculating the infinitive of 'coolness'" do 438 | "coolness".en.infinitive.should == 'cool' 439 | "coolness".en.infinitive.rule.should == '44b' 440 | end 441 | 442 | it "uses rule 45 when calculating the infinitive of 'specification'" do 443 | "specification".en.infinitive.should == 'specify' 444 | "specification".en.infinitive.rule.should == '45' 445 | end 446 | 447 | it "uses rule 46 when calculating the infinitive of 'rationalization'" do 448 | "rationalization".en.infinitive.should == 'rationalize' 449 | "rationalization".en.infinitive.rule.should == '46' 450 | end 451 | 452 | it "uses rule 47 when calculating the infinitive of 'detection'" do 453 | "detection".en.infinitive.should == 'detect' 454 | "detection".en.infinitive.rule.should == '47' 455 | end 456 | 457 | it "uses rule 48 when calculating the infinitive of 'exertion'" do 458 | "exertion".en.infinitive.should == 'exert' 459 | "exertion".en.infinitive.rule.should == '48' 460 | end 461 | 462 | it "uses rule 49 when calculating the infinitive of 'creation'" do 463 | "creation".en.infinitive.should == 'create' 464 | "creation".en.infinitive.rule.should == '49' 465 | end 466 | 467 | it "uses rule 50 when calculating the infinitive of 'creator'" do 468 | "creator".en.infinitive.should == 'create' 469 | "creator".en.infinitive.rule.should == '50' 470 | end 471 | 472 | it "uses rule 51 when calculating the infinitive of 'detector'" do 473 | "detector".en.infinitive.should == 'detect' 474 | "detector".en.infinitive.rule.should == '51' 475 | end 476 | 477 | it "uses rule 52 when calculating the infinitive of 'creative'" do 478 | "creative".en.infinitive.should == 'creation' 479 | "creative".en.infinitive.rule.should == '52' 480 | end 481 | 482 | it "uses rule 52 when calculating the infinitive of 'decisive'" do 483 | "decisive".en.infinitive.should == 'decision' 484 | "decisive".en.infinitive.rule.should == '52' 485 | end 486 | 487 | it "uses rule 53 when calculating the infinitive of 'Australian'" do 488 | "Australian".en.infinitive.should == 'Australia' 489 | "Australian".en.infinitive.rule.should == '53' 490 | end 491 | 492 | it "uses rule 54 when calculating the infinitive of 'Jeffersonian'" do 493 | "Jeffersonian".en.infinitive.should == 'Jefferson' 494 | "Jeffersonian".en.infinitive.rule.should == '54' 495 | end 496 | 497 | it "uses irregular rule when calculating the infinitive of 'rove'" do 498 | "rove".en.infinitive.should == 'reeve' 499 | "rove".en.infinitive.rule.should == 'irregular' 500 | end 501 | 502 | it "uses irregular rule when calculating the infinitive of 'dove'" do 503 | "dove".en.infinitive.should == 'dive' 504 | "dove".en.infinitive.rule.should == 'irregular' 505 | end 506 | 507 | it "uses irregular rule when calculating the infinitive of 'snuck'" do 508 | "snuck".en.infinitive.should == 'sneak' 509 | "snuck".en.infinitive.rule.should == 'irregular' 510 | end 511 | 512 | it "uses irregular rule when calculating the infinitive of 'wot'" do 513 | "wot".en.infinitive.should == 'wit' 514 | "wot".en.infinitive.rule.should == 'irregular' 515 | end 516 | 517 | end 518 | 519 | -------------------------------------------------------------------------------- /lib/linguistics/iso639.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # coding: utf-8 3 | 4 | require 'linguistics' unless defined?( Linguistics ) 5 | 6 | # A hash of International 2- and 3-letter ISO639-1 and ISO639-2 7 | # language codes information. Each entry is keyed by all of its 8 | # language codes as Symbols, and the entry itself has three keys: 9 | # 10 | # [:codes] 11 | # All of the codes known for this language as Strings 12 | # [:eng_name] 13 | # The English-language name of the language. 14 | # [:fre_name] 15 | # The French-language name of the language. 16 | # 17 | # Entries for 'ja' and 'en': 18 | # 19 | # irb > Linguistics::ISO639::LANGUAGE_CODES[:en] 20 | # => {:eng_name=>"English", :fre_name=>"anglais", :codes=>["en", "eng"]} 21 | # irb > Linguistics::ISO639::LANGUAGE_CODES[:eng] 22 | # => {:eng_name=>"English", :fre_name=>"anglais", :codes=>["en", "eng"]} 23 | # irb > Linguistics::ISO639::LANGUAGE_CODES[:ja] 24 | # => {:eng_name=>"Japanese", :fre_name=>"japonais", :codes=>["ja", "jpn"]} 25 | 26 | module Linguistics::ISO639 27 | 28 | # Hash of ISO639 2- and 3-letter language codes 29 | LANGUAGE_CODES = {} 30 | 31 | # Read everything after the __END__ 32 | _, data = File.read( __FILE__, :encoding => 'utf-8' ).split( /^__END__$/, 2 ) 33 | 34 | # To read the files, please note that one line of text contains one 35 | # entry. An alpha-3 (bibliographic) code, an alpha-3 (terminologic) 36 | # code (when given), an alpha-2 code (when given), an English name, 37 | # and a French name of a language are all separated by pipe (|) 38 | # characters. If one of these elements is not applicable to the entry, 39 | # the field is left empty, i.e., a pipe (|) character immediately 40 | # follows the preceding entry. The Line terminator is the LF character. 41 | 42 | # bib_alpha3|term_alpha3|alpha2|eng_name|fre_name 43 | # E.g., "eng||en|English|anglais" 44 | data.lines do |line| 45 | next unless line =~ /\|/ # Skip non-language lines 46 | bib_alpha3, term_alpha3, alpha2, eng_name, fre_name = line.chomp.split( '|', 5 ) 47 | entry = { 48 | :eng_name => eng_name, 49 | :fre_name => fre_name, 50 | :codes => [ bib_alpha3, alpha2, term_alpha3 ].reject {|item| item.empty? } 51 | } 52 | $stderr.puts " adding language code entry %p from line: %p" % 53 | [ entry, line ] if $DEBUG 54 | 55 | LANGUAGE_CODES[ bib_alpha3.to_sym ] = entry 56 | LANGUAGE_CODES[ alpha2.to_sym ] = entry if alpha2 57 | end 58 | 59 | end # module Linguistics::ISO639 60 | 61 | # Data from: http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 62 | __END__ 63 | aar||aa|Afar|afar 64 | abk||ab|Abkhazian|abkhaze 65 | ace|||Achinese|aceh 66 | ach|||Acoli|acoli 67 | ada|||Adangme|adangme 68 | ady|||Adyghe; Adygei|adyghé 69 | afa|||Afro-Asiatic languages|afro-asiatiques, langues 70 | afh|||Afrihili|afrihili 71 | afr||af|Afrikaans|afrikaans 72 | ain|||Ainu|aïnou 73 | aka||ak|Akan|akan 74 | akk|||Akkadian|akkadien 75 | alb|sqi|sq|Albanian|albanais 76 | ale|||Aleut|aléoute 77 | alg|||Algonquian languages|algonquines, langues 78 | alt|||Southern Altai|altai du Sud 79 | amh||am|Amharic|amharique 80 | ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100) 81 | anp|||Angika|angika 82 | apa|||Apache languages|apaches, langues 83 | ara||ar|Arabic|arabe 84 | arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE) 85 | arg||an|Aragonese|aragonais 86 | arm|hye|hy|Armenian|arménien 87 | arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce 88 | arp|||Arapaho|arapaho 89 | art|||Artificial languages|artificielles, langues 90 | arw|||Arawak|arawak 91 | asm||as|Assamese|assamais 92 | ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais 93 | ath|||Athapascan languages|athapascanes, langues 94 | aus|||Australian languages|australiennes, langues 95 | ava||av|Avaric|avar 96 | ave||ae|Avestan|avestique 97 | awa|||Awadhi|awadhi 98 | aym||ay|Aymara|aymara 99 | aze||az|Azerbaijani|azéri 100 | bad|||Banda languages|banda, langues 101 | bai|||Bamileke languages|bamiléké, langues 102 | bak||ba|Bashkir|bachkir 103 | bal|||Baluchi|baloutchi 104 | bam||bm|Bambara|bambara 105 | ban|||Balinese|balinais 106 | baq|eus|eu|Basque|basque 107 | bas|||Basa|basa 108 | bat|||Baltic languages|baltes, langues 109 | bej|||Beja; Bedawiyet|bedja 110 | bel||be|Belarusian|biélorusse 111 | bem|||Bemba|bemba 112 | ben||bn|Bengali|bengali 113 | ber|||Berber languages|berbères, langues 114 | bho|||Bhojpuri|bhojpuri 115 | bih||bh|Bihari languages|langues biharis 116 | bik|||Bikol|bikol 117 | bin|||Bini; Edo|bini; edo 118 | bis||bi|Bislama|bichlamar 119 | bla|||Siksika|blackfoot 120 | bnt|||Bantu (Other)|bantoues, autres langues 121 | bos||bs|Bosnian|bosniaque 122 | bra|||Braj|braj 123 | bre||br|Breton|breton 124 | btk|||Batak languages|batak, langues 125 | bua|||Buriat|bouriate 126 | bug|||Buginese|bugi 127 | bul||bg|Bulgarian|bulgare 128 | bur|mya|my|Burmese|birman 129 | byn|||Blin; Bilin|blin; bilen 130 | cad|||Caddo|caddo 131 | cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues 132 | car|||Galibi Carib|karib; galibi; carib 133 | cat||ca|Catalan; Valencian|catalan; valencien 134 | cau|||Caucasian languages|caucasiennes, langues 135 | ceb|||Cebuano|cebuano 136 | cel|||Celtic languages|celtiques, langues; celtes, langues 137 | cha||ch|Chamorro|chamorro 138 | chb|||Chibcha|chibcha 139 | che||ce|Chechen|tchétchène 140 | chg|||Chagatai|djaghataï 141 | chi|zho|zh|Chinese|chinois 142 | chk|||Chuukese|chuuk 143 | chm|||Mari|mari 144 | chn|||Chinook jargon|chinook, jargon 145 | cho|||Choctaw|choctaw 146 | chp|||Chipewyan; Dene Suline|chipewyan 147 | chr|||Cherokee|cherokee 148 | chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare 149 | chv||cv|Chuvash|tchouvache 150 | chy|||Cheyenne|cheyenne 151 | cmc|||Chamic languages|chames, langues 152 | cop|||Coptic|copte 153 | cor||kw|Cornish|cornique 154 | cos||co|Corsican|corse 155 | cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais 156 | cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français 157 | cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais 158 | cre||cr|Cree|cree 159 | crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé 160 | crp|||Creoles and pidgins |créoles et pidgins 161 | csb|||Kashubian|kachoube 162 | cus|||Cushitic languages|couchitiques, langues 163 | cze|ces|cs|Czech|tchèque 164 | dak|||Dakota|dakota 165 | dan||da|Danish|danois 166 | dar|||Dargwa|dargwa 167 | day|||Land Dayak languages|dayak, langues 168 | del|||Delaware|delaware 169 | den|||Slave (Athapascan)|esclave (athapascan) 170 | dgr|||Dogrib|dogrib 171 | din|||Dinka|dinka 172 | div||dv|Divehi; Dhivehi; Maldivian|maldivien 173 | doi|||Dogri|dogri 174 | dra|||Dravidian languages|dravidiennes, langues 175 | dsb|||Lower Sorbian|bas-sorabe 176 | dua|||Duala|douala 177 | dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350) 178 | dut|nld|nl|Dutch; Flemish|néerlandais; flamand 179 | dyu|||Dyula|dioula 180 | dzo||dz|Dzongkha|dzongkha 181 | efi|||Efik|efik 182 | egy|||Egyptian (Ancient)|égyptien 183 | eka|||Ekajuk|ekajuk 184 | elx|||Elamite|élamite 185 | eng||en|English|anglais 186 | enm|||English, Middle (1100-1500)|anglais moyen (1100-1500) 187 | epo||eo|Esperanto|espéranto 188 | est||et|Estonian|estonien 189 | ewe||ee|Ewe|éwé 190 | ewo|||Ewondo|éwondo 191 | fan|||Fang|fang 192 | fao||fo|Faroese|féroïen 193 | fat|||Fanti|fanti 194 | fij||fj|Fijian|fidjien 195 | fil|||Filipino; Pilipino|filipino; pilipino 196 | fin||fi|Finnish|finnois 197 | fiu|||Finno-Ugrian languages|finno-ougriennes, langues 198 | fon|||Fon|fon 199 | fre|fra|fr|French|français 200 | frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600) 201 | fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400) 202 | frr|||Northern Frisian|frison septentrional 203 | frs|||Eastern Frisian|frison oriental 204 | fry||fy|Western Frisian|frison occidental 205 | ful||ff|Fulah|peul 206 | fur|||Friulian|frioulan 207 | gaa|||Ga|ga 208 | gay|||Gayo|gayo 209 | gba|||Gbaya|gbaya 210 | gem|||Germanic languages|germaniques, langues 211 | geo|kat|ka|Georgian|géorgien 212 | ger|deu|de|German|allemand 213 | gez|||Geez|guèze 214 | gil|||Gilbertese|kiribati 215 | gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais 216 | gle||ga|Irish|irlandais 217 | glg||gl|Galician|galicien 218 | glv||gv|Manx|manx; mannois 219 | gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500) 220 | goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050) 221 | gon|||Gondi|gond 222 | gor|||Gorontalo|gorontalo 223 | got|||Gothic|gothique 224 | grb|||Grebo|grebo 225 | grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453) 226 | gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453) 227 | grn||gn|Guarani|guarani 228 | gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien 229 | guj||gu|Gujarati|goudjrati 230 | gwi|||Gwich'in|gwich'in 231 | hai|||Haida|haida 232 | hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien 233 | hau||ha|Hausa|haoussa 234 | haw|||Hawaiian|hawaïen 235 | heb||he|Hebrew|hébreu 236 | her||hz|Herero|herero 237 | hil|||Hiligaynon|hiligaynon 238 | him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales 239 | hin||hi|Hindi|hindi 240 | hit|||Hittite|hittite 241 | hmn|||Hmong|hmong 242 | hmo||ho|Hiri Motu|hiri motu 243 | hrv||hr|Croatian|croate 244 | hsb|||Upper Sorbian|haut-sorabe 245 | hun||hu|Hungarian|hongrois 246 | hup|||Hupa|hupa 247 | iba|||Iban|iban 248 | ibo||ig|Igbo|igbo 249 | ice|isl|is|Icelandic|islandais 250 | ido||io|Ido|ido 251 | iii||ii|Sichuan Yi; Nuosu|yi de Sichuan 252 | ijo|||Ijo languages|ijo, langues 253 | iku||iu|Inuktitut|inuktitut 254 | ile||ie|Interlingue; Occidental|interlingue 255 | ilo|||Iloko|ilocano 256 | ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale) 257 | inc|||Indic languages|indo-aryennes, langues 258 | ind||id|Indonesian|indonésien 259 | ine|||Indo-European languages|indo-européennes, langues 260 | inh|||Ingush|ingouche 261 | ipk||ik|Inupiaq|inupiaq 262 | ira|||Iranian languages|iraniennes, langues 263 | iro|||Iroquoian languages|iroquoises, langues 264 | ita||it|Italian|italien 265 | jav||jv|Javanese|javanais 266 | jbo|||Lojban|lojban 267 | jpn||ja|Japanese|japonais 268 | jpr|||Judeo-Persian|judéo-persan 269 | jrb|||Judeo-Arabic|judéo-arabe 270 | kaa|||Kara-Kalpak|karakalpak 271 | kab|||Kabyle|kabyle 272 | kac|||Kachin; Jingpho|kachin; jingpho 273 | kal||kl|Kalaallisut; Greenlandic|groenlandais 274 | kam|||Kamba|kamba 275 | kan||kn|Kannada|kannada 276 | kar|||Karen languages|karen, langues 277 | kas||ks|Kashmiri|kashmiri 278 | kau||kr|Kanuri|kanouri 279 | kaw|||Kawi|kawi 280 | kaz||kk|Kazakh|kazakh 281 | kbd|||Kabardian|kabardien 282 | kha|||Khasi|khasi 283 | khi|||Khoisan languages|khoïsan, langues 284 | khm||km|Central Khmer|khmer central 285 | kho|||Khotanese; Sakan|khotanais; sakan 286 | kik||ki|Kikuyu; Gikuyu|kikuyu 287 | kin||rw|Kinyarwanda|rwanda 288 | kir||ky|Kirghiz; Kyrgyz|kirghiz 289 | kmb|||Kimbundu|kimbundu 290 | kok|||Konkani|konkani 291 | kom||kv|Komi|kom 292 | kon||kg|Kongo|kongo 293 | kor||ko|Korean|coréen 294 | kos|||Kosraean|kosrae 295 | kpe|||Kpelle|kpellé 296 | krc|||Karachay-Balkar|karatchai balkar 297 | krl|||Karelian|carélien 298 | kro|||Kru languages|krou, langues 299 | kru|||Kurukh|kurukh 300 | kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama 301 | kum|||Kumyk|koumyk 302 | kur||ku|Kurdish|kurde 303 | kut|||Kutenai|kutenai 304 | lad|||Ladino|judéo-espagnol 305 | lah|||Lahnda|lahnda 306 | lam|||Lamba|lamba 307 | lao||lo|Lao|lao 308 | lat||la|Latin|latin 309 | lav||lv|Latvian|letton 310 | lez|||Lezghian|lezghien 311 | lim||li|Limburgan; Limburger; Limburgish|limbourgeois 312 | lin||ln|Lingala|lingala 313 | lit||lt|Lithuanian|lituanien 314 | lol|||Mongo|mongo 315 | loz|||Lozi|lozi 316 | ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois 317 | lua|||Luba-Lulua|luba-lulua 318 | lub||lu|Luba-Katanga|luba-katanga 319 | lug||lg|Ganda|ganda 320 | lui|||Luiseno|luiseno 321 | lun|||Lunda|lunda 322 | luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie) 323 | lus|||Lushai|lushai 324 | mac|mkd|mk|Macedonian|macédonien 325 | mad|||Madurese|madourais 326 | mag|||Magahi|magahi 327 | mah||mh|Marshallese|marshall 328 | mai|||Maithili|maithili 329 | mak|||Makasar|makassar 330 | mal||ml|Malayalam|malayalam 331 | man|||Mandingo|mandingue 332 | mao|mri|mi|Maori|maori 333 | map|||Austronesian languages|austronésiennes, langues 334 | mar||mr|Marathi|marathe 335 | mas|||Masai|massaï 336 | may|msa|ms|Malay|malais 337 | mdf|||Moksha|moksa 338 | mdr|||Mandar|mandar 339 | men|||Mende|mendé 340 | mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200) 341 | mic|||Mi'kmaq; Micmac|mi'kmaq; micmac 342 | min|||Minangkabau|minangkabau 343 | mis|||Uncoded languages|langues non codées 344 | mkh|||Mon-Khmer languages|môn-khmer, langues 345 | mlg||mg|Malagasy|malgache 346 | mlt||mt|Maltese|maltais 347 | mnc|||Manchu|mandchou 348 | mni|||Manipuri|manipuri 349 | mno|||Manobo languages|manobo, langues 350 | moh|||Mohawk|mohawk 351 | mon||mn|Mongolian|mongol 352 | mos|||Mossi|moré 353 | mul|||Multiple languages|multilingue 354 | mun|||Munda languages|mounda, langues 355 | mus|||Creek|muskogee 356 | mwl|||Mirandese|mirandais 357 | mwr|||Marwari|marvari 358 | myn|||Mayan languages|maya, langues 359 | myv|||Erzya|erza 360 | nah|||Nahuatl languages|nahuatl, langues 361 | nai|||North American Indian languages|nord-amérindiennes, langues 362 | nap|||Neapolitan|napolitain 363 | nau||na|Nauru|nauruan 364 | nav||nv|Navajo; Navaho|navaho 365 | nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud 366 | nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord 367 | ndo||ng|Ndonga|ndonga 368 | nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas 369 | nep||ne|Nepali|népalais 370 | new|||Nepal Bhasa; Newari|nepal bhasa; newari 371 | nia|||Nias|nias 372 | nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues 373 | niu|||Niuean|niué 374 | nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien 375 | nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål 376 | nog|||Nogai|nogaï; nogay 377 | non|||Norse, Old|norrois, vieux 378 | nor||no|Norwegian|norvégien 379 | nqo|||N'Ko|n'ko 380 | nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord 381 | nub|||Nubian languages|nubiennes, langues 382 | nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique 383 | nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja 384 | nym|||Nyamwezi|nyamwezi 385 | nyn|||Nyankole|nyankolé 386 | nyo|||Nyoro|nyoro 387 | nzi|||Nzima|nzema 388 | oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal 389 | oji||oj|Ojibwa|ojibwa 390 | ori||or|Oriya|oriya 391 | orm||om|Oromo|galla 392 | osa|||Osage|osage 393 | oss||os|Ossetian; Ossetic|ossète 394 | ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928) 395 | oto|||Otomian languages|otomi, langues 396 | paa|||Papuan languages|papoues, langues 397 | pag|||Pangasinan|pangasinan 398 | pal|||Pahlavi|pahlavi 399 | pam|||Pampanga; Kapampangan|pampangan 400 | pan||pa|Panjabi; Punjabi|pendjabi 401 | pap|||Papiamento|papiamento 402 | pau|||Palauan|palau 403 | peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.) 404 | per|fas|fa|Persian|persan 405 | phi|||Philippine languages|philippines, langues 406 | phn|||Phoenician|phénicien 407 | pli||pi|Pali|pali 408 | pol||pl|Polish|polonais 409 | pon|||Pohnpeian|pohnpei 410 | por||pt|Portuguese|portugais 411 | pra|||Prakrit languages|prâkrit, langues 412 | pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500) 413 | pus||ps|Pushto; Pashto|pachto 414 | qaa-qtz|||Reserved for local use|réservée à l'usage local 415 | que||qu|Quechua|quechua 416 | raj|||Rajasthani|rajasthani 417 | rap|||Rapanui|rapanui 418 | rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook 419 | roa|||Romance languages|romanes, langues 420 | roh||rm|Romansh|romanche 421 | rom|||Romany|tsigane 422 | rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave 423 | run||rn|Rundi|rundi 424 | rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain 425 | rus||ru|Russian|russe 426 | sad|||Sandawe|sandawe 427 | sag||sg|Sango|sango 428 | sah|||Yakut|iakoute 429 | sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues 430 | sal|||Salishan languages|salishennes, langues 431 | sam|||Samaritan Aramaic|samaritain 432 | san||sa|Sanskrit|sanskrit 433 | sas|||Sasak|sasak 434 | sat|||Santali|santal 435 | scn|||Sicilian|sicilien 436 | sco|||Scots|écossais 437 | sel|||Selkup|selkoupe 438 | sem|||Semitic languages|sémitiques, langues 439 | sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900) 440 | sgn|||Sign Languages|langues des signes 441 | shn|||Shan|chan 442 | sid|||Sidamo|sidamo 443 | sin||si|Sinhala; Sinhalese|singhalais 444 | sio|||Siouan languages|sioux, langues 445 | sit|||Sino-Tibetan languages|sino-tibétaines, langues 446 | sla|||Slavic languages|slaves, langues 447 | slo|slk|sk|Slovak|slovaque 448 | slv||sl|Slovenian|slovène 449 | sma|||Southern Sami|sami du Sud 450 | sme||se|Northern Sami|sami du Nord 451 | smi|||Sami languages|sames, langues 452 | smj|||Lule Sami|sami de Lule 453 | smn|||Inari Sami|sami d'Inari 454 | smo||sm|Samoan|samoan 455 | sms|||Skolt Sami|sami skolt 456 | sna||sn|Shona|shona 457 | snd||sd|Sindhi|sindhi 458 | snk|||Soninke|soninké 459 | sog|||Sogdian|sogdien 460 | som||so|Somali|somali 461 | son|||Songhai languages|songhai, langues 462 | sot||st|Sotho, Southern|sotho du Sud 463 | spa||es|Spanish; Castilian|espagnol; castillan 464 | srd||sc|Sardinian|sarde 465 | srn|||Sranan Tongo|sranan tongo 466 | srp||sr|Serbian|serbe 467 | srr|||Serer|sérère 468 | ssa|||Nilo-Saharan languages|nilo-sahariennes, langues 469 | ssw||ss|Swati|swati 470 | suk|||Sukuma|sukuma 471 | sun||su|Sundanese|soundanais 472 | sus|||Susu|soussou 473 | sux|||Sumerian|sumérien 474 | swa||sw|Swahili|swahili 475 | swe||sv|Swedish|suédois 476 | syc|||Classical Syriac|syriaque classique 477 | syr|||Syriac|syriaque 478 | tah||ty|Tahitian|tahitien 479 | tai|||Tai languages|tai, langues 480 | tam||ta|Tamil|tamoul 481 | tat||tt|Tatar|tatar 482 | tel||te|Telugu|télougou 483 | tem|||Timne|temne 484 | ter|||Tereno|tereno 485 | tet|||Tetum|tetum 486 | tgk||tg|Tajik|tadjik 487 | tgl||tl|Tagalog|tagalog 488 | tha||th|Thai|thaï 489 | tib|bod|bo|Tibetan|tibétain 490 | tig|||Tigre|tigré 491 | tir||ti|Tigrinya|tigrigna 492 | tiv|||Tiv|tiv 493 | tkl|||Tokelau|tokelau 494 | tlh|||Klingon; tlhIngan-Hol|klingon 495 | tli|||Tlingit|tlingit 496 | tmh|||Tamashek|tamacheq 497 | tog|||Tonga (Nyasa)|tonga (Nyasa) 498 | ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga) 499 | tpi|||Tok Pisin|tok pisin 500 | tsi|||Tsimshian|tsimshian 501 | tsn||tn|Tswana|tswana 502 | tso||ts|Tsonga|tsonga 503 | tuk||tk|Turkmen|turkmène 504 | tum|||Tumbuka|tumbuka 505 | tup|||Tupi languages|tupi, langues 506 | tur||tr|Turkish|turc 507 | tut|||Altaic languages|altaïques, langues 508 | tvl|||Tuvalu|tuvalu 509 | twi||tw|Twi|twi 510 | tyv|||Tuvinian|touva 511 | udm|||Udmurt|oudmourte 512 | uga|||Ugaritic|ougaritique 513 | uig||ug|Uighur; Uyghur|ouïgour 514 | ukr||uk|Ukrainian|ukrainien 515 | umb|||Umbundu|umbundu 516 | und|||Undetermined|indéterminée 517 | urd||ur|Urdu|ourdou 518 | uzb||uz|Uzbek|ouszbek 519 | vai|||Vai|vaï 520 | ven||ve|Venda|venda 521 | vie||vi|Vietnamese|vietnamien 522 | vol||vo|Volapük|volapük 523 | vot|||Votic|vote 524 | wak|||Wakashan languages|wakashanes, langues 525 | wal|||Walamo|walamo 526 | war|||Waray|waray 527 | was|||Washo|washo 528 | wel|cym|cy|Welsh|gallois 529 | wen|||Sorbian languages|sorabes, langues 530 | wln||wa|Walloon|wallon 531 | wol||wo|Wolof|wolof 532 | xal|||Kalmyk; Oirat|kalmouk; oïrat 533 | xho||xh|Xhosa|xhosa 534 | yao|||Yao|yao 535 | yap|||Yapese|yapois 536 | yid||yi|Yiddish|yiddish 537 | yor||yo|Yoruba|yoruba 538 | ypk|||Yupik languages|yupik, langues 539 | zap|||Zapotec|zapotèque 540 | zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss 541 | zen|||Zenaga|zenaga 542 | zha||za|Zhuang; Chuang|zhuang; chuang 543 | znd|||Zande languages|zandé, langues 544 | zul||zu|Zulu|zoulou 545 | zun|||Zuni|zuni 546 | zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable 547 | zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki 548 | -------------------------------------------------------------------------------- /spec/linguistics/en/articles_spec.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rspec -cfd 2 | 3 | BEGIN { 4 | require 'pathname' 5 | basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent 6 | 7 | libdir = basedir + "lib" 8 | 9 | $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s ) 10 | $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 11 | } 12 | 13 | require 'rspec' 14 | require 'spec/lib/helpers' 15 | 16 | require 'linguistics' 17 | require 'linguistics/en' 18 | require 'linguistics/en/articles' 19 | 20 | 21 | describe Linguistics::EN::Articles do 22 | 23 | before( :all ) do 24 | setup_logging() 25 | Linguistics.use( :en ) 26 | end 27 | 28 | after( :all ) do 29 | reset_logging() 30 | end 31 | 32 | it "adds EN::Articles to the list of English language modules" do 33 | Linguistics::EN.should have_extension( :articles ) 34 | end 35 | 36 | 37 | describe "in monkeypatch mode" do 38 | 39 | let( :monkeypatched_class ) do 40 | Class.new do 41 | def to_s; "antelope"; end 42 | end 43 | end 44 | let( :monkeypatched_object ) do 45 | Linguistics.use( :en, classes: monkeypatched_class, monkeypatch: true ) 46 | monkeypatched_class.new 47 | end 48 | 49 | 50 | it "uses the stringified receiver as the object which should get the article" do 51 | monkeypatched_object.a.should == 'an antelope' 52 | end 53 | 54 | it "uses correct pluralization to form the negative article" do 55 | monkeypatched_object.no.should == 'no antelopes' 56 | end 57 | 58 | end 59 | 60 | 61 | it "uses 'an' as the indefinite article for 'A.B.C'" do 62 | "A.B.C".en.a.should == "an A.B.C" 63 | end 64 | 65 | it "uses 'an' as the indefinite article for 'AI'" do 66 | "AI".en.a.should == "an AI" 67 | end 68 | 69 | it "uses 'an' as the indefinite article for 'AGE'" do 70 | "AGE".en.a.should == "an AGE" 71 | end 72 | 73 | it "uses 'an' as the indefinite article for 'agendum'" do 74 | "agendum".en.a.should == "an agendum" 75 | end 76 | 77 | it "uses 'an' as the indefinite article for 'aide-de-camp'" do 78 | "aide-de-camp".en.a.should == "an aide-de-camp" 79 | end 80 | 81 | it "uses 'an' as the indefinite article for 'albino'" do 82 | "albino".en.a.should == "an albino" 83 | end 84 | 85 | it "uses 'a' as the indefinite article for 'B.L.T. sandwich'" do 86 | "B.L.T. sandwich".en.a.should == "a B.L.T. sandwich" 87 | end 88 | 89 | it "uses 'a' as the indefinite article for 'BMW'" do 90 | "BMW".en.a.should == "a BMW" 91 | end 92 | 93 | it "uses 'a' as the indefinite article for 'BLANK'" do 94 | "BLANK".en.a.should == "a BLANK" 95 | end 96 | 97 | it "uses 'a' as the indefinite article for 'bacterium'" do 98 | "bacterium".en.a.should == "a bacterium" 99 | end 100 | 101 | it "uses 'a' as the indefinite article for 'Burmese restaurant'" do 102 | "Burmese restaurant".en.a.should == "a Burmese restaurant" 103 | end 104 | 105 | it "uses 'a' as the indefinite article for 'C.O.'" do 106 | "C.O.".en.a.should == "a C.O." 107 | end 108 | 109 | it "uses 'a' as the indefinite article for 'CCD'" do 110 | "CCD".en.a.should == "a CCD" 111 | end 112 | 113 | it "uses 'a' as the indefinite article for 'COLON'" do 114 | "COLON".en.a.should == "a COLON" 115 | end 116 | 117 | it "uses 'a' as the indefinite article for 'cameo'" do 118 | "cameo".en.a.should == "a cameo" 119 | end 120 | 121 | it "uses 'a' as the indefinite article for 'CAPITAL'" do 122 | "CAPITAL".en.a.should == "a CAPITAL" 123 | end 124 | 125 | it "uses 'a' as the indefinite article for 'D.S.M.'" do 126 | "D.S.M.".en.a.should == "a D.S.M." 127 | end 128 | 129 | it "uses 'a' as the indefinite article for 'DNR'" do 130 | "DNR".en.a.should == "a DNR" 131 | end 132 | 133 | it "uses 'a' as the indefinite article for 'DINNER'" do 134 | "DINNER".en.a.should == "a DINNER" 135 | end 136 | 137 | it "uses 'a' as the indefinite article for 'dynamo'" do 138 | "dynamo".en.a.should == "a dynamo" 139 | end 140 | 141 | it "uses 'an' as the indefinite article for 'E.K.G.'" do 142 | "E.K.G.".en.a.should == "an E.K.G." 143 | end 144 | 145 | it "uses 'an' as the indefinite article for 'ECG'" do 146 | "ECG".en.a.should == "an ECG" 147 | end 148 | 149 | it "uses 'an' as the indefinite article for 'EGG'" do 150 | "EGG".en.a.should == "an EGG" 151 | end 152 | 153 | it "uses 'an' as the indefinite article for 'embryo'" do 154 | "embryo".en.a.should == "an embryo" 155 | end 156 | 157 | it "uses 'an' as the indefinite article for 'erratum'" do 158 | "erratum".en.a.should == "an erratum" 159 | end 160 | 161 | it "uses 'a' as the indefinite article for 'eucalyptus'" do 162 | "eucalyptus".en.a.should == "a eucalyptus" 163 | end 164 | 165 | it "uses 'an' as the indefinite article for 'Euler number'" do 166 | "Euler number".en.a.should == "an Euler number" 167 | end 168 | 169 | it "uses 'a' as the indefinite article for 'eulogy'" do 170 | "eulogy".en.a.should == "a eulogy" 171 | end 172 | 173 | it "uses 'a' as the indefinite article for 'euphemism'" do 174 | "euphemism".en.a.should == "a euphemism" 175 | end 176 | 177 | it "uses 'a' as the indefinite article for 'euphoria'" do 178 | "euphoria".en.a.should == "a euphoria" 179 | end 180 | 181 | it "uses 'a' as the indefinite article for 'ewe'" do 182 | "ewe".en.a.should == "a ewe" 183 | end 184 | 185 | it "uses 'a' as the indefinite article for 'ewer'" do 186 | "ewer".en.a.should == "a ewer" 187 | end 188 | 189 | it "uses 'an' as the indefinite article for 'extremum'" do 190 | "extremum".en.a.should == "an extremum" 191 | end 192 | 193 | it "uses 'an' as the indefinite article for 'eye'" do 194 | "eye".en.a.should == "an eye" 195 | end 196 | 197 | it "uses 'an' as the indefinite article for 'F.B.I. agent'" do 198 | "F.B.I. agent".en.a.should == "an F.B.I. agent" 199 | end 200 | 201 | it "uses 'an' as the indefinite article for 'FSM'" do 202 | "FSM".en.a.should == "an FSM" 203 | end 204 | 205 | it "uses 'a' as the indefinite article for 'FACT'" do 206 | "FACT".en.a.should == "a FACT" 207 | end 208 | 209 | it "uses 'a' as the indefinite article for 'FAQ'" do 210 | "FAQ".en.a.should == "a FAQ" 211 | end 212 | 213 | it "uses 'an' as the indefinite article for 'F.A.Q.'" do 214 | "F.A.Q.".en.a.should == "an F.A.Q." 215 | end 216 | 217 | it "uses 'a' as the indefinite article for 'fish'" do 218 | "fish".en.a.should == "a fish" 219 | end 220 | 221 | it "uses 'a' as the indefinite article for 'G-string'" do 222 | "G-string".en.a.should == "a G-string" 223 | end 224 | 225 | it "uses 'a' as the indefinite article for 'GSM phone'" do 226 | "GSM phone".en.a.should == "a GSM phone" 227 | end 228 | 229 | it "uses 'a' as the indefinite article for 'GOD'" do 230 | "GOD".en.a.should == "a GOD" 231 | end 232 | 233 | it "uses 'a' as the indefinite article for 'genus'" do 234 | "genus".en.a.should == "a genus" 235 | end 236 | 237 | it "uses 'a' as the indefinite article for 'Governor General'" do 238 | "Governor General".en.a.should == "a Governor General" 239 | end 240 | 241 | it "uses 'an' as the indefinite article for 'H-Bomb'" do 242 | "H-Bomb".en.a.should == "an H-Bomb" 243 | end 244 | 245 | it "uses 'an' as the indefinite article for 'H.M.S Ark Royal'" do 246 | "H.M.S Ark Royal".en.a.should == "an H.M.S Ark Royal" 247 | end 248 | 249 | it "uses 'an' as the indefinite article for 'HSL colour space'" do 250 | "HSL colour space".en.a.should == "an HSL colour space" 251 | end 252 | 253 | it "uses 'a' as the indefinite article for 'HAL 9000'" do 254 | "HAL 9000".en.a.should == "a HAL 9000" 255 | end 256 | 257 | it "uses 'an' as the indefinite article for 'H.A.L. 9000'" do 258 | "H.A.L. 9000".en.a.should == "an H.A.L. 9000" 259 | end 260 | 261 | it "uses 'a' as the indefinite article for 'has-been'" do 262 | "has-been".en.a.should == "a has-been" 263 | end 264 | 265 | it "uses 'a' as the indefinite article for 'height'" do 266 | "height".en.a.should == "a height" 267 | end 268 | 269 | it "uses 'an' as the indefinite article for 'heir'" do 270 | "heir".en.a.should == "an heir" 271 | end 272 | 273 | it "uses 'a' as the indefinite article for 'honed blade'" do 274 | "honed blade".en.a.should == "a honed blade" 275 | end 276 | 277 | it "uses 'an' as the indefinite article for 'honest man'" do 278 | "honest man".en.a.should == "an honest man" 279 | end 280 | 281 | it "uses 'a' as the indefinite article for 'honeymoon'" do 282 | "honeymoon".en.a.should == "a honeymoon" 283 | end 284 | 285 | it "uses 'an' as the indefinite article for 'honorarium'" do 286 | "honorarium".en.a.should == "an honorarium" 287 | end 288 | 289 | it "uses 'an' as the indefinite article for 'honorary degree'" do 290 | "honorary degree".en.a.should == "an honorary degree" 291 | end 292 | 293 | it "uses 'an' as the indefinite article for 'honoree'" do 294 | "honoree".en.a.should == "an honoree" 295 | end 296 | 297 | it "uses 'an' as the indefinite article for 'honorific'" do 298 | "honorific".en.a.should == "an honorific" 299 | end 300 | 301 | it "uses 'a' as the indefinite article for 'Hough transform'" do 302 | "Hough transform".en.a.should == "a Hough transform" 303 | end 304 | 305 | it "uses 'a' as the indefinite article for 'hound'" do 306 | "hound".en.a.should == "a hound" 307 | end 308 | 309 | it "uses 'an' as the indefinite article for 'hour'" do 310 | "hour".en.a.should == "an hour" 311 | end 312 | 313 | it "uses 'an' as the indefinite article for 'hourglass'" do 314 | "hourglass".en.a.should == "an hourglass" 315 | end 316 | 317 | it "uses 'a' as the indefinite article for 'houri'" do 318 | "houri".en.a.should == "a houri" 319 | end 320 | 321 | it "uses 'a' as the indefinite article for 'house'" do 322 | "house".en.a.should == "a house" 323 | end 324 | 325 | it "uses 'an' as the indefinite article for 'I.O.U.'" do 326 | "I.O.U.".en.a.should == "an I.O.U." 327 | end 328 | 329 | it "uses 'an' as the indefinite article for 'IQ'" do 330 | "IQ".en.a.should == "an IQ" 331 | end 332 | 333 | it "uses 'an' as the indefinite article for 'IDEA'" do 334 | "IDEA".en.a.should == "an IDEA" 335 | end 336 | 337 | it "uses 'an' as the indefinite article for 'inferno'" do 338 | "inferno".en.a.should == "an inferno" 339 | end 340 | 341 | it "uses 'an' as the indefinite article for 'Inspector General'" do 342 | "Inspector General".en.a.should == "an Inspector General" 343 | end 344 | 345 | it "uses 'a' as the indefinite article for 'jumbo'" do 346 | "jumbo".en.a.should == "a jumbo" 347 | end 348 | 349 | it "uses 'a' as the indefinite article for 'knife'" do 350 | "knife".en.a.should == "a knife" 351 | end 352 | 353 | it "uses 'an' as the indefinite article for 'L.E.D.'" do 354 | "L.E.D.".en.a.should == "an L.E.D." 355 | end 356 | 357 | it "uses 'a' as the indefinite article for 'LED'" do 358 | "LED".en.a.should == "a LED" 359 | end 360 | 361 | it "uses 'an' as the indefinite article for 'LCD'" do 362 | "LCD".en.a.should == "an LCD" 363 | end 364 | 365 | it "uses 'a' as the indefinite article for 'lady in waiting'" do 366 | "lady in waiting".en.a.should == "a lady in waiting" 367 | end 368 | 369 | it "uses 'a' as the indefinite article for 'leaf'" do 370 | "leaf".en.a.should == "a leaf" 371 | end 372 | 373 | it "uses 'an' as the indefinite article for 'M.I.A.'" do 374 | "M.I.A.".en.a.should == "an M.I.A." 375 | end 376 | 377 | it "uses 'a' as the indefinite article for 'MIASMA'" do 378 | "MIASMA".en.a.should == "a MIASMA" 379 | end 380 | 381 | it "uses 'an' as the indefinite article for 'MTV channel'" do 382 | "MTV channel".en.a.should == "an MTV channel" 383 | end 384 | 385 | it "uses 'a' as the indefinite article for 'Major General'" do 386 | "Major General".en.a.should == "a Major General" 387 | end 388 | 389 | it "uses 'an' as the indefinite article for 'N.C.O.'" do 390 | "N.C.O.".en.a.should == "an N.C.O." 391 | end 392 | 393 | it "uses 'an' as the indefinite article for 'NCO'" do 394 | "NCO".en.a.should == "an NCO" 395 | end 396 | 397 | it "uses 'a' as the indefinite article for 'NATO country'" do 398 | "NATO country".en.a.should == "a NATO country" 399 | end 400 | 401 | it "uses 'a' as the indefinite article for 'note'" do 402 | "note".en.a.should == "a note" 403 | end 404 | 405 | it "uses 'an' as the indefinite article for 'O.K.'" do 406 | "O.K.".en.a.should == "an O.K." 407 | end 408 | 409 | it "uses 'an' as the indefinite article for 'OK'" do 410 | "OK".en.a.should == "an OK" 411 | end 412 | 413 | it "uses 'an' as the indefinite article for 'OLE'" do 414 | "OLE".en.a.should == "an OLE" 415 | end 416 | 417 | it "uses 'an' as the indefinite article for 'octavo'" do 418 | "octavo".en.a.should == "an octavo" 419 | end 420 | 421 | it "uses 'an' as the indefinite article for 'octopus'" do 422 | "octopus".en.a.should == "an octopus" 423 | end 424 | 425 | it "uses 'an' as the indefinite article for 'okay'" do 426 | "okay".en.a.should == "an okay" 427 | end 428 | 429 | it "uses 'a' as the indefinite article for 'once-and-future-king'" do 430 | "once-and-future-king".en.a.should == "a once-and-future-king" 431 | end 432 | 433 | it "uses 'an' as the indefinite article for 'oncologist'" do 434 | "oncologist".en.a.should == "an oncologist" 435 | end 436 | 437 | it "uses 'a' as the indefinite article for 'one night stand'" do 438 | "one night stand".en.a.should == "a one night stand" 439 | end 440 | 441 | it "uses 'an' as the indefinite article for 'onerous task'" do 442 | "onerous task".en.a.should == "an onerous task" 443 | end 444 | 445 | it "uses 'an' as the indefinite article for 'opera'" do 446 | "opera".en.a.should == "an opera" 447 | end 448 | 449 | it "uses 'an' as the indefinite article for 'optimum'" do 450 | "optimum".en.a.should == "an optimum" 451 | end 452 | 453 | it "uses 'an' as the indefinite article for 'opus'" do 454 | "opus".en.a.should == "an opus" 455 | end 456 | 457 | it "uses 'an' as the indefinite article for 'ox'" do 458 | "ox".en.a.should == "an ox" 459 | end 460 | 461 | it "uses 'a' as the indefinite article for 'Ph.D.'" do 462 | "Ph.D.".en.a.should == "a Ph.D." 463 | end 464 | 465 | it "uses 'a' as the indefinite article for 'PET'" do 466 | "PET".en.a.should == "a PET" 467 | end 468 | 469 | it "uses 'a' as the indefinite article for 'P.E.T. scan'" do 470 | "P.E.T. scan".en.a.should == "a P.E.T. scan" 471 | end 472 | 473 | it "uses 'a' as the indefinite article for 'plateau'" do 474 | "plateau".en.a.should == "a plateau" 475 | end 476 | 477 | it "uses 'a' as the indefinite article for 'quantum'" do 478 | "quantum".en.a.should == "a quantum" 479 | end 480 | 481 | it "uses 'an' as the indefinite article for 'R.S.V.P.'" do 482 | "R.S.V.P.".en.a.should == "an R.S.V.P." 483 | end 484 | 485 | it "uses 'an' as the indefinite article for 'RSVP'" do 486 | "RSVP".en.a.should == "an RSVP" 487 | end 488 | 489 | it "uses 'a' as the indefinite article for 'REST'" do 490 | "REST".en.a.should == "a REST" 491 | end 492 | 493 | it "uses 'a' as the indefinite article for 'reindeer'" do 494 | "reindeer".en.a.should == "a reindeer" 495 | end 496 | 497 | it "uses 'an' as the indefinite article for 'S.O.S.'" do 498 | "S.O.S.".en.a.should == "an S.O.S." 499 | end 500 | 501 | it "uses 'a' as the indefinite article for 'SUM'" do 502 | "SUM".en.a.should == "a SUM" 503 | end 504 | 505 | it "uses 'an' as the indefinite article for 'SST'" do 506 | "SST".en.a.should == "an SST" 507 | end 508 | 509 | it "uses 'a' as the indefinite article for 'salmon'" do 510 | "salmon".en.a.should == "a salmon" 511 | end 512 | 513 | it "uses 'a' as the indefinite article for 'T.N.T. bomb'" do 514 | "T.N.T. bomb".en.a.should == "a T.N.T. bomb" 515 | end 516 | 517 | it "uses 'a' as the indefinite article for 'TNT bomb'" do 518 | "TNT bomb".en.a.should == "a TNT bomb" 519 | end 520 | 521 | it "uses 'a' as the indefinite article for 'TENT'" do 522 | "TENT".en.a.should == "a TENT" 523 | end 524 | 525 | it "uses 'a' as the indefinite article for 'thought'" do 526 | "thought".en.a.should == "a thought" 527 | end 528 | 529 | it "uses 'a' as the indefinite article for 'tomato'" do 530 | "tomato".en.a.should == "a tomato" 531 | end 532 | 533 | it "uses 'a' as the indefinite article for 'U-boat'" do 534 | "U-boat".en.a.should == "a U-boat" 535 | end 536 | 537 | it "uses 'a' as the indefinite article for 'U.F.O.'" do 538 | "U.F.O.".en.a.should == "a U.F.O." 539 | end 540 | 541 | it "uses 'a' as the indefinite article for 'UFO'" do 542 | "UFO".en.a.should == "a UFO" 543 | end 544 | 545 | it "uses 'a' as the indefinite article for 'ubiquity'" do 546 | "ubiquity".en.a.should == "a ubiquity" 547 | end 548 | 549 | it "uses 'a' as the indefinite article for 'unicorn'" do 550 | "unicorn".en.a.should == "a unicorn" 551 | end 552 | 553 | it "uses 'an' as the indefinite article for 'unidentified flying object'" do 554 | "unidentified flying object".en.a.should == "an unidentified flying object" 555 | end 556 | 557 | it "uses 'a' as the indefinite article for 'uniform'" do 558 | "uniform".en.a.should == "a uniform" 559 | end 560 | 561 | it "uses 'a' as the indefinite article for 'unimodal system'" do 562 | "unimodal system".en.a.should == "a unimodal system" 563 | end 564 | 565 | it "uses 'an' as the indefinite article for 'unimpressive record'" do 566 | "unimpressive record".en.a.should == "an unimpressive record" 567 | end 568 | 569 | it "uses 'an' as the indefinite article for 'uninformed opinion'" do 570 | "uninformed opinion".en.a.should == "an uninformed opinion" 571 | end 572 | 573 | it "uses 'an' as the indefinite article for 'uninvited guest'" do 574 | "uninvited guest".en.a.should == "an uninvited guest" 575 | end 576 | 577 | it "uses 'a' as the indefinite article for 'union'" do 578 | "union".en.a.should == "a union" 579 | end 580 | 581 | it "uses 'a' as the indefinite article for 'uniplex'" do 582 | "uniplex".en.a.should == "a uniplex" 583 | end 584 | 585 | it "uses 'a' as the indefinite article for 'uniprocessor'" do 586 | "uniprocessor".en.a.should == "a uniprocessor" 587 | end 588 | 589 | it "uses 'a' as the indefinite article for 'unique opportunity'" do 590 | "unique opportunity".en.a.should == "a unique opportunity" 591 | end 592 | 593 | it "uses 'a' as the indefinite article for 'unisex hairdresser'" do 594 | "unisex hairdresser".en.a.should == "a unisex hairdresser" 595 | end 596 | 597 | it "uses 'a' as the indefinite article for 'unison'" do 598 | "unison".en.a.should == "a unison" 599 | end 600 | 601 | it "uses 'a' as the indefinite article for 'unit'" do 602 | "unit".en.a.should == "a unit" 603 | end 604 | 605 | it "uses 'a' as the indefinite article for 'unitarian'" do 606 | "unitarian".en.a.should == "a unitarian" 607 | end 608 | 609 | it "uses 'a' as the indefinite article for 'united front'" do 610 | "united front".en.a.should == "a united front" 611 | end 612 | 613 | it "uses 'a' as the indefinite article for 'unity'" do 614 | "unity".en.a.should == "a unity" 615 | end 616 | 617 | it "uses 'a' as the indefinite article for 'univalent bond'" do 618 | "univalent bond".en.a.should == "a univalent bond" 619 | end 620 | 621 | it "uses 'a' as the indefinite article for 'univariate statistic'" do 622 | "univariate statistic".en.a.should == "a univariate statistic" 623 | end 624 | 625 | it "uses 'a' as the indefinite article for 'universe'" do 626 | "universe".en.a.should == "a universe" 627 | end 628 | 629 | it "uses 'an' as the indefinite article for 'unordered meal'" do 630 | "unordered meal".en.a.should == "an unordered meal" 631 | end 632 | 633 | it "uses 'a' as the indefinite article for 'uranium atom'" do 634 | "uranium atom".en.a.should == "a uranium atom" 635 | end 636 | 637 | it "uses 'an' as the indefinite article for 'urban myth'" do 638 | "urban myth".en.a.should == "an urban myth" 639 | end 640 | 641 | it "uses 'an' as the indefinite article for 'urbane miss'" do 642 | "urbane miss".en.a.should == "an urbane miss" 643 | end 644 | 645 | it "uses 'an' as the indefinite article for 'urchin'" do 646 | "urchin".en.a.should == "an urchin" 647 | end 648 | 649 | it "uses 'a' as the indefinite article for 'urea detector'" do 650 | "urea detector".en.a.should == "a urea detector" 651 | end 652 | 653 | it "uses 'a' as the indefinite article for 'urethane monomer'" do 654 | "urethane monomer".en.a.should == "a urethane monomer" 655 | end 656 | 657 | it "uses 'an' as the indefinite article for 'urge'" do 658 | "urge".en.a.should == "an urge" 659 | end 660 | 661 | it "uses 'an' as the indefinite article for 'urgency'" do 662 | "urgency".en.a.should == "an urgency" 663 | end 664 | 665 | it "uses 'a' as the indefinite article for 'urinal'" do 666 | "urinal".en.a.should == "a urinal" 667 | end 668 | 669 | it "uses 'an' as the indefinite article for 'urn'" do 670 | "urn".en.a.should == "an urn" 671 | end 672 | 673 | it "uses 'a' as the indefinite article for 'usage'" do 674 | "usage".en.a.should == "a usage" 675 | end 676 | 677 | it "uses 'a' as the indefinite article for 'use'" do 678 | "use".en.a.should == "a use" 679 | end 680 | 681 | it "uses 'an' as the indefinite article for 'usher'" do 682 | "usher".en.a.should == "an usher" 683 | end 684 | 685 | it "uses 'a' as the indefinite article for 'usual suspect'" do 686 | "usual suspect".en.a.should == "a usual suspect" 687 | end 688 | 689 | it "uses 'a' as the indefinite article for 'usurer'" do 690 | "usurer".en.a.should == "a usurer" 691 | end 692 | 693 | it "uses 'a' as the indefinite article for 'usurper'" do 694 | "usurper".en.a.should == "a usurper" 695 | end 696 | 697 | it "uses 'a' as the indefinite article for 'utensil'" do 698 | "utensil".en.a.should == "a utensil" 699 | end 700 | 701 | it "uses 'a' as the indefinite article for 'utility'" do 702 | "utility".en.a.should == "a utility" 703 | end 704 | 705 | it "uses 'an' as the indefinite article for 'utmost urgency'" do 706 | "utmost urgency".en.a.should == "an utmost urgency" 707 | end 708 | 709 | it "uses 'a' as the indefinite article for 'utopia'" do 710 | "utopia".en.a.should == "a utopia" 711 | end 712 | 713 | it "uses 'an' as the indefinite article for 'utterance'" do 714 | "utterance".en.a.should == "an utterance" 715 | end 716 | 717 | it "uses 'a' as the indefinite article for 'V.I.P.'" do 718 | "V.I.P.".en.a.should == "a V.I.P." 719 | end 720 | 721 | it "uses 'a' as the indefinite article for 'VIPER'" do 722 | "VIPER".en.a.should == "a VIPER" 723 | end 724 | 725 | it "uses 'a' as the indefinite article for 'viper'" do 726 | "viper".en.a.should == "a viper" 727 | end 728 | 729 | it "uses 'an' as the indefinite article for 'X-ray'" do 730 | "X-ray".en.a.should == "an X-ray" 731 | end 732 | 733 | it "uses 'an' as the indefinite article for 'X.O.'" do 734 | "X.O.".en.a.should == "an X.O." 735 | end 736 | 737 | it "uses 'a' as the indefinite article for 'XYLAPHONE'" do 738 | "XYLAPHONE".en.a.should == "a XYLAPHONE" 739 | end 740 | 741 | it "uses 'an' as the indefinite article for 'XY chromosome'" do 742 | "XY chromosome".en.a.should == "an XY chromosome" 743 | end 744 | 745 | it "uses 'a' as the indefinite article for 'xenophobe'" do 746 | "xenophobe".en.a.should == "a xenophobe" 747 | end 748 | 749 | it "uses 'a' as the indefinite article for 'Y-shaped pipe'" do 750 | "Y-shaped pipe".en.a.should == "a Y-shaped pipe" 751 | end 752 | 753 | it "uses 'a' as the indefinite article for 'Y.Z. plane'" do 754 | "Y.Z. plane".en.a.should == "a Y.Z. plane" 755 | end 756 | 757 | it "uses 'a' as the indefinite article for 'YMCA'" do 758 | "YMCA".en.a.should == "a YMCA" 759 | end 760 | 761 | it "uses 'an' as the indefinite article for 'YBLENT eye'" do 762 | "YBLENT eye".en.a.should == "an YBLENT eye" 763 | end 764 | 765 | it "uses 'an' as the indefinite article for 'yblent eye'" do 766 | "yblent eye".en.a.should == "an yblent eye" 767 | end 768 | 769 | it "uses 'an' as the indefinite article for 'yclad body'" do 770 | "yclad body".en.a.should == "an yclad body" 771 | end 772 | 773 | it "uses 'a' as the indefinite article for 'yellowing'" do 774 | "yellowing".en.a.should == "a yellowing" 775 | end 776 | 777 | it "uses 'a' as the indefinite article for 'yield'" do 778 | "yield".en.a.should == "a yield" 779 | end 780 | 781 | it "uses 'a' as the indefinite article for 'youth'" do 782 | "youth".en.a.should == "a youth" 783 | end 784 | 785 | it "uses 'a' as the indefinite article for 'youth'" do 786 | "youth".en.a.should == "a youth" 787 | end 788 | 789 | it "uses 'an' as the indefinite article for 'ypsiliform junction'" do 790 | "ypsiliform junction".en.a.should == "an ypsiliform junction" 791 | end 792 | 793 | it "uses 'an' as the indefinite article for 'yttrium atom'" do 794 | "yttrium atom".en.a.should == "an yttrium atom" 795 | end 796 | 797 | it "uses 'a' as the indefinite article for 'zoo'" do 798 | "zoo".en.a.should == "a zoo" 799 | end 800 | 801 | 802 | it "uses correct pluralization to form the negative article" do 803 | "mouse".en.no.should == "no mice" 804 | end 805 | 806 | it "uses currect pluralization for noun phrases to form the negative article" do 807 | "univariate statistic".en.no.should == "no univariate statistics" 808 | end 809 | 810 | it "uses the correct pluralization for 'Secretary of State' to form the negative article" do 811 | "Secretary of State".en.no.should == "no Secretaries of State" 812 | end 813 | 814 | 815 | context "lprintf formatters" do 816 | 817 | it "registers the :A lprintf formatter" do 818 | Linguistics::EN.lprintf_formatters.should include( :A ) 819 | end 820 | 821 | it "registers the :AN lprintf formatter" do 822 | Linguistics::EN.lprintf_formatters.should include( :AN ) 823 | end 824 | 825 | it "adds an indefinite article to the argument to %A" do 826 | "You pick up %A.".en.lprintf( "umbrella" ). 827 | should == "You pick up an umbrella." 828 | end 829 | 830 | it "adds an indefinite article to the argument to %AN" do 831 | "You pick up %AN.".en.lprintf( "chocolate bar" ). 832 | should == "You pick up a chocolate bar." 833 | end 834 | 835 | end 836 | 837 | end 838 | 839 | --------------------------------------------------------------------------------