├── .rvm.gems
├── .hgignore
├── spec
    ├── lib
    │   ├── constants.rb
    │   └── helpers.rb
    ├── linguistics
    │   ├── inflector_spec.rb
    │   ├── monkeypatches_spec.rb
    │   ├── en
    │   │   ├── participles_spec.rb
    │   │   ├── linkparser_spec.rb
    │   │   ├── stemmer_spec.rb
    │   │   ├── wordnet_spec.rb
    │   │   ├── conjunctions_spec.rb
    │   │   ├── infinitives_spec.rb
    │   │   └── articles_spec.rb
    │   ├── en_spec.rb
    │   └── iso639_spec.rb
    └── linguistics_spec.rb
├── experiments
    ├── wn-proglang.rb
    ├── farmobjs.rb
    ├── TEMPLATE.rb.tpl
    ├── randobjlist.rb
    ├── generalize.rb
    ├── conjunct-with-block.rb
    ├── lafcadio_plural.rb
    ├── lprintf.rb
    ├── allobjlist.rb
    ├── api.rb
    └── gen_numwords_specs.rb
├── .hgtags
├── examples
    ├── klingon.rb
    ├── generalize_sentence.rb
    └── endocs.rb
├── .pryrc
├── .tm_properties
├── History.rdoc
├── .hgsigs
├── Gemfile
├── lib
    ├── linguistics
    │   ├── languagebehavior.rb
    │   ├── en
    │   │   ├── participles.rb
    │   │   ├── stemmer.rb
    │   │   ├── linkparser.rb
    │   │   ├── articles.rb
    │   │   ├── titlecase.rb
    │   │   ├── conjunctions.rb
    │   │   ├── wordnet.rb
    │   │   └── numbers.rb
    │   ├── monkeypatches.rb
    │   ├── inflector.rb
    │   ├── en.rb
    │   └── iso639.rb
    └── linguistics.rb
├── .rvmrc
├── Manifest.txt
├── LICENSE
├── Rakefile
├── .irbrc
└── README.rdoc


/.rvm.gems:
--------------------------------------------------------------------------------
1 | hoe-deveiate -v0.3.0
2 | hoe-bundler -v1.2.0
3 | linkparser -v1.1.4
4 | simplecov -v0.6.4
5 | wordnet -v1.0.0
6 | wordnet-defaultdb -v1.0.1
7 | ruby-stemmer -v0.9.3
8 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
 1 | ^commit\-msg\.txt$
 2 | docs/manual/output
 3 | docs/api
 4 | ^pkg$
 5 | ChangeLog$
 6 | coverage/
 7 | coverage\\.info
 8 | ^coverage\.info$
 9 | \.DS_Store
10 | ~$
11 | \.orig$
12 | ^\.yardoc/
13 | docs/.*\.dump$
14 | ^release\.notes$
15 | ^doc/
16 | Gemfile.lock
17 | 


--------------------------------------------------------------------------------
/spec/lib/constants.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require 'linguistics'
 4 | 
 5 | 
 6 | ### A collection of constants used in testing
 7 | module Linguistics::TestConstants # :nodoc:all
 8 | 
 9 | 	TEST_ARRAY = %w{stone stick hammer stone lantern}
10 | 	TEST_STRING = "banner"
11 | 	TEST_NUMBER = 5
12 | 
13 | end
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/experiments/wn-proglang.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | 
 3 | BEGIN {
 4 | 	$LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib"
 5 | 	require 'linguistics'
 6 | }
 7 | 
 8 | Linguistics::use( :en )
 9 | unless Linguistics::EN::haveWordnet?
10 | 
11 | # Demo of WordNet integration.
12 | 
13 | "programming language".en.gloss
14 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
 1 | 1e029bfd9ead84151b6ddf888c74dca2b13272cf 1.0.7
 2 | 1e029bfd9ead84151b6ddf888c74dca2b13272cf 1.0.7
 3 | 0000000000000000000000000000000000000000 1.0.7
 4 | 0000000000000000000000000000000000000000 1.0.7
 5 | 5f4fa2c136c7ad28ece8c1bcbfad0982532fd9eb 1.0.7
 6 | da353c888ad408857b9c5cca1ec60675f3121e60 1.0.8
 7 | 1359338b7128798679095466a2a96903832b48d4 v2.0.0
 8 | d8d00bf937f2dd8ca42abd6631453da16a9a263e v2.0.1
 9 | a516c984a9a169bad58921a39d7d72437b51c9b5 v2.0.2
10 | 


--------------------------------------------------------------------------------
/experiments/farmobjs.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | 
 3 | BEGIN {
 4 | 	$LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib"
 5 | 	require 'linguistics'
 6 | }
 7 | 
 8 | Linguistics::use( :en )
 9 | 
10 | # Just a(nother) fun little demo of the conjunction (junction, what's
11 | # your) function.
12 | animals = %w{dog cow ox chicken goose goat cow dog rooster llama 
13 | 	pig goat dog cat cat dog cow goat goose goose ox alpaca}
14 | puts "The farm has: " +
15 | 	animals.en.conjunction
16 | 


--------------------------------------------------------------------------------
/experiments/TEMPLATE.rb.tpl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | #
 3 | # (>>>description<<<)
 4 | # 
 5 | # Time-stamp: <24-Aug-2003 16:11:13 deveiant>
 6 | #
 7 | 
 8 | BEGIN {
 9 | 	base = File::dirname( File::dirname(File::expand_path(__FILE__)) )
10 | 	$LOAD_PATH.unshift "#{base}/lib"
11 | 
12 | 	require "#{base}/utils.rb"
13 | 	include UtilityFunctions
14 | }
15 | 
16 | try( "(>>>FILE_SANS<<<)" ) {
17 | 	(>>>POINT<<<)
18 | }
19 | 
20 | 
21 | >>>TEMPLATE-DEFINITION-SECTION<<<
22 | ("description" "Experiment description: ")
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/examples/klingon.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby -w
 2 | 
 3 | require 'linguistics'
 4 | 
 5 | # An example of how you'd start writing a language module that provides
 6 | # Klingon-language inflecton.  It's obviously not really a useful
 7 | # implementation.
 8 | 
 9 | module Linguistics::TLH
10 | 
11 | 	# Register the module with the framework
12 | 	Linguistics.register_language( :tlh, self )
13 | 
14 | end
15 | 
16 | 
17 | if __FILE__ == $0
18 | 	require 'pp'
19 | 	Linguistics.use( :tlh, :classes => [Object] )
20 | 	pp Object.new.tlh
21 | end
22 | 
23 | 


--------------------------------------------------------------------------------
/.pryrc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | # vim: set nosta noet ts=4 sw=4:
 3 | 
 4 | BEGIN {
 5 |     require 'pathname'
 6 | 	$LOAD_PATH.unshift( Pathname.new( __FILE__ ).dirname + 'lib' )
 7 | }
 8 | 
 9 | begin
10 | 	require 'loggability'
11 | 	require 'linguistics'
12 | 
13 | 	Loggability.level = :debug
14 | 	Loggability.format_with( :color )
15 | 
16 | 	# Linguistics.use( :en )
17 | 	# Linguistics.use( :en, monkeypatch: true )
18 | rescue Exception => err
19 | 	$stderr.puts "Linguistics failed to load: %p: %s" % [ err.class, err.message ]
20 | 	$stderr.puts( err.backtrace )
21 | end
22 | 
23 | 


--------------------------------------------------------------------------------
/.tm_properties:
--------------------------------------------------------------------------------
 1 | # Settings
 2 | projectDirectory     = "$CWD"
 3 | windowTitle          = "${CWD/^.*\///} «$TM_DISPLAYNAME»"
 4 | excludeInFileChooser = "{$exclude,.hg,pkg}"
 5 | 
 6 | TM_RUBY                  = "/Users/mgranger/.rvm/bin/rvm-auto-ruby"
 7 | 
 8 | TM_RSPEC_OPTS            = '-rrspec/core/formatters/webkit -Ilib:../Mongrel2/lib'
 9 | TM_RSPEC_FORMATTER       = 'RSpec::Core::Formatters::WebKit'
10 | 
11 | [ source.ruby ]
12 | disableIndentCorrections = true
13 | tabSize = 4
14 | softTabs = false
15 | 
16 | [ source.ruby.rspec ]
17 | tabSize = 4
18 | softTabs = false
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/History.rdoc:
--------------------------------------------------------------------------------
 1 | == v2.0.2 [2013-02-27] Michael Granger <ged@FaerieMUD.org>
 2 | 
 3 | - Fix for Ruby 2: don't memoize the inflector.
 4 | 
 5 | 
 6 | == v2.0.1 [2013-02-25] Michael Granger <ged@FaerieMUD.org>
 7 | 
 8 | - Add missing loggability dependency to the gem (fixes #3).
 9 | - Adding some monkeypatch specs (refs #1), fixing some edge-case
10 |   pluralizations.
11 | - Documentation fixes.
12 | 
13 | 
14 | == v2.0.0 [2012-10-10] Michael Granger  <ged@FaerieMUD.org>
15 | 
16 | Rewritten to be more modular, easier to extend and maintain, and to work under
17 | 1.9.
18 | 
19 | 
20 | === v1.0.9 [2011-09-01] Michael Granger  <ged@FaerieMUD.org>
21 | 
22 | - Bugfix for Linguistics::EN.ordinate.
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/experiments/randobjlist.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | 
 3 | BEGIN {
 4 | 	$LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib"
 5 | 	require 'linguistics'
 6 | }
 7 | 
 8 | Linguistics::use( :en )
 9 | 
10 | # Just a fun little demo of the conjunction (junction, what's your) function.
11 | 
12 | MinObjects = 5
13 | MaxObjects = 35
14 | Objects = %w[
15 | 	butcher baker candlestick-maker
16 | 	mouse clock
17 | 	cat fiddle cow moon dog sport dish spoon
18 | 	tisket tasket
19 | 	jack jill hill pail crown
20 | ]
21 | 
22 | def randobjlist
23 | 	objs = []
24 | 	0.upto( rand(MaxObjects - MinObjects) + MinObjects ) do
25 | 		objs << Objects[ rand(Objects.nitems) - 1 ]
26 | 	end
27 | 
28 | 	return objs
29 | end
30 | 
31 | 
32 | puts "Random object list:\n\t" +
33 | 	randobjlist().en.conjunction
34 | 
35 | 


--------------------------------------------------------------------------------
/experiments/generalize.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | 
 3 | 
 4 | BEGIN {
 5 | 	$LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib"
 6 | 	require 'linguistics'
 7 | }
 8 | 
 9 | Linguistics::use( :en, :installProxy => :en )
10 | 
11 | # Just a bit of fun -- takes a sentence, and tries to generalize it by looking
12 | # up the hypernyms of each of the words. This'll work much better when
13 | # integration with LinkParser is added, as this doesn't know about parts of
14 | # speech or anything.
15 | 
16 | if ARGV.empty?
17 | 	print "Sentence: "
18 | 	sentence = $stdin.gets
19 | else
20 | 	sentence = ARGV.join(" ")
21 | end
22 | 
23 | newSentence = sentence.split.collect {|word|
24 | 	word.hypernyms ? word.hypernyms[0].words[0] : word
25 | }.join(" ")
26 | 
27 | 
28 | puts "Converted:\n  %s\nto\n  %s\n\n" % [ sentence, newSentence ]
29 | 
30 | 


--------------------------------------------------------------------------------
/.hgsigs:
--------------------------------------------------------------------------------
1 | 401a04c4cf43f4a88093a3013003c3d4baff7a61 0 iEYEABECAAYFAkr1Ap0ACgkQ+zlz4UKpE6SmtwCfVkDwdziUnU66cKKgnU4ETNsa8UsAnRUz1k+e+m4aZTwieDU9jhJJTHbT
2 | bebbaa868974c3298865e23f1e21aeae67fb354b 0 iEYEABECAAYFAkr1CEgACgkQ+zlz4UKpE6QcYQCgycc21E8FelXeiEUXnCNg/IUQcWwAn3rChiQ41MKMX7B9EhqOb1CpycrD
3 | 8029de2f9c60345ddb5cc6bfacee3132a47c6fea 0 iEYEABECAAYFAksC148ACgkQ+zlz4UKpE6TWOACfTmF4+MNXij9OBD0ZVkduuDAlbQIAoKD2KlZYe+vWzGHc4hm+nP9jTW+X
4 | a7cda4b8747c6d34688ec97e2d721a26aab06bae 0 iEYEABECAAYFAlB1mUEACgkQ+zlz4UKpE6RXMwCcC1rJErdthKKiK0SgqNl+rF+aLywAoNmSPnI9ZVij7a/rrNvGHlMooHxL
5 | 5240c28c80bd748fee9041cdd5a00bad63c33fe0 0 iEYEABECAAYFAlEsIfsACgkQ+zlz4UKpE6RKlQCg2eD5eUiyzx6yyWriWstVGXC4kXUAoLndZG0LHKwRfvs60L3/1JdjNZvt
6 | d2eeec0b78321a8298e23599bcc05839686a1ac0 0 iEYEABECAAYFAlEuQ4sACgkQ+zlz4UKpE6QjbQCfThFNbhBvcC06wgfAoKJXA41wClYAoM4NguP0W73oMqQ4TRbufsJWpzoj
7 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | # -*- ruby -*-
 2 | 
 3 | # DO NOT EDIT THIS FILE. Instead, edit Rakefile, and run `rake bundler:gemfile`.
 4 | 
 5 | source "https://rubygems.org/"
 6 | 
 7 | gem "loggability", "~>0.7"
 8 | 
 9 | gem "hoe-mercurial", "~>1.4.0", :group => [:development, :test]
10 | gem "hoe-highline", "~>0.1.0", :group => [:development, :test]
11 | gem "rdoc", "~>4.0", :group => [:development, :test]
12 | gem "hoe-deveiate", "~>0.3", :group => [:development, :test]
13 | gem "hoe-bundler", "~>1.2", :group => [:development, :test]
14 | gem "linkparser", "~>1.1", :group => [:development, :test]
15 | gem "wordnet", "~>1.0", :group => [:development, :test]
16 | gem "wordnet-defaultdb", "~>1.0", :group => [:development, :test]
17 | gem "ruby-stemmer", "~>0.9", :group => [:development, :test]
18 | gem "hoe", "~>3.7", :group => [:development, :test]
19 | 
20 | # vim: syntax=ruby
21 | 


--------------------------------------------------------------------------------
/experiments/conjunct-with-block.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | #
 3 | # Written to find a minimal testcase for the #conjunction-with-block bug.
 4 | # 
 5 | # Time-stamp: <04-Nov-2005 07:43:36 ged>
 6 | #
 7 | 
 8 | BEGIN {
 9 | 	base = File::dirname( File::dirname(File::expand_path(__FILE__)) )
10 | 	$LOAD_PATH.unshift "#{base}/lib"
11 | 
12 | 	require "#{base}/utils.rb"
13 | 	include UtilityFunctions
14 | }
15 | 
16 | require 'linguistics'
17 | 
18 | Linguistics::use( :en, :installProxy => true )
19 | array = %w{sheep shrew goose bear penguin barnacle sheep goose goose}
20 | 
21 | $defout.puts "Called via language proxy: ",
22 |   array.en.conjunction {|word| "%s-word" % [word[0,1]]}
23 | 
24 | $defout.puts "Called via delegator proxy: ",
25 |   array.conjunction {|word| "%s-word" % [word[0,1]]}
26 | 
27 | $defout.puts "Called via language proxy: ",
28 |   array.en.conjunction {|word| "%s-word" % [word[0,1]]}
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/spec/lib/helpers.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | # coding: utf-8
 3 | 
 4 | BEGIN {
 5 | 	require 'pathname'
 6 | 	basedir = Pathname.new( __FILE__ ).dirname.parent
 7 | 
 8 | 	libdir = basedir + "lib"
 9 | 
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | # SimpleCov test coverage reporting; enable this using the :coverage rake task
14 | if ENV['COVERAGE']
15 | 	$stderr.puts "\n\n>>> Enabling coverage report.\n\n"
16 | 	require 'simplecov'
17 | 	SimpleCov.start do
18 | 		add_filter 'spec'
19 | 		add_group "Needing tests" do |file|
20 | 			file.covered_percent < 90
21 | 		end
22 | 	end
23 | end
24 | 
25 | require 'linguistics'
26 | 
27 | require 'rspec'
28 | require 'spec/lib/constants'
29 | require 'loggability/spechelpers'
30 | 
31 | ### Mock with RSpec
32 | RSpec.configure do |c|
33 | 	c.mock_with( :rspec )
34 | 	c.include( Loggability::SpecHelpers )
35 | end
36 | 
37 | # vim: set nosta noet ts=4 sw=4:
38 | 
39 | 


--------------------------------------------------------------------------------
/lib/linguistics/languagebehavior.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require 'rspec'
 4 | 
 5 | require 'linguistics'
 6 | require 'linguistics/iso639'
 7 | 
 8 | 
 9 | # This is a RSpec 2 shared behavior for language plugins. You can use this to be
10 | # sure that your language plugin conforms to the API expected by Linguistics. You'll
11 | # probably want to use it something like this:
12 | # 
13 | #    require 'linguistics/languagebehavior'
14 | #
15 | #    describe Linguistics::KL do
16 | #
17 | #      it_should_behave_like "A Linguistics language module"
18 | #
19 | #      # ... any other specs for your module
20 | #
21 | #    end
22 | 
23 | shared_examples_for "a Linguistics language module" do
24 | 
25 | 	let( :language_module ) do
26 | 		described_class
27 | 	end
28 | 
29 | 
30 | 	it "registers itself with the Linguistics module when required" do
31 | 		Linguistics.languages.values.should include( language_module )
32 | 	end
33 | 
34 | end
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/.rvmrc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This is an RVM Project .rvmrc file, used to automatically load the ruby
 4 | # development environment upon cd'ing into the directory
 5 | 
 6 | environment_id="ruby-2.0.0@linguistics"
 7 | rvmdir=${rvm_path:-$HOME/.rvm}
 8 | gemset_file=".rvm.gems"
 9 | 
10 | if [[ -d "${rvmdir}/environments" && -s "${rvmdir}/environments/$environment_id" ]]; then
11 | 	echo "Using ${environment_id}"
12 | 	. "${rvmdir}/environments/$environment_id"
13 | 
14 | 	if [[ -s "${rvmdir}/hooks/after_use" ]]; then
15 | 		. "${rvmdir}/hooks/after_use"
16 | 	fi
17 | else
18 | 	# If the environment file has not yet been created, use the RVM CLI to select.
19 | 	if ! rvm --create use  "$environment_id"; then
20 | 		echo "Failed to create RVM environment '${environment_id}'."
21 | 		exit 1
22 | 	fi
23 | fi
24 | 
25 | if [[ -s "$gemset_file" ]]; then
26 | 	rvm gemset import "$gemset_file"
27 | fi
28 | 
29 | echo "ObjectSpace.each_object.map( &:class ).en.conjunction"
30 | echo
31 | 


--------------------------------------------------------------------------------
/spec/linguistics/inflector_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | require 'linguistics/inflector'
18 | 
19 | 
20 | describe Linguistics::Inflector do
21 | 
22 | 	before( :all ) do
23 | 		setup_logging( :fatal )
24 | 	end
25 | 
26 | 	after( :all ) do
27 | 		reset_logging()
28 | 	end
29 | 
30 | 
31 | 	it "provides a human-readable representation of the object suitable for debugging" do
32 | 		obj = Object.new
33 | 		result = Linguistics::Inflector.new( :en, obj ).inspect
34 | 
35 | 		result.should include( (obj.object_id / 2).to_s(16) )
36 | 		result.should =~ /english-language/i
37 | 	end
38 | 
39 | end
40 | 
41 | 


--------------------------------------------------------------------------------
/experiments/lafcadio_plural.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | #
 3 | # Experimenting with Lafcadio's pluralization algorithm
 4 | # 
 5 | # Time-stamp: <13-Jul-2005 08:37:00 ged>
 6 | #
 7 | 
 8 | BEGIN {
 9 | 	base = File::dirname( File::dirname(File::expand_path(__FILE__)) )
10 | 	$LOAD_PATH.unshift "#{base}/lib"
11 | 
12 | 	require "#{base}/utils.rb"
13 | 	include UtilityFunctions
14 | 
15 | 	require 'linguistics'
16 | }
17 | 
18 | $yaml = false
19 | Linguistics::use( :en )
20 | 
21 | def plural(singular)
22 | 	consonantYPattern = Regexp.new("([^aeiou])y$", Regexp::IGNORECASE)
23 | 	if singular =~ consonantYPattern
24 | 		singular.gsub consonantYPattern, '\1ies'
25 | 	elsif singular =~ /[xs]$/
26 | 		singular + "es"
27 | 	else
28 | 		singular + "s"
29 | 	end
30 | end
31 | 
32 | Words = %w[tree fairy address opus mythos child persona datum nucleus
33 |            phenomenon commando radix seraph nexus series dais trellis ]
34 | 
35 | Words.each do |word|
36 | 	puts "%s vs. %s" % [plural(word), word.en.plural]
37 | end
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/participles.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | 
 3 | require 'linguistics/en' unless defined?( Linguistics::EN )
 4 | 
 5 | # Methods for deriving present participles for the English-language
 6 | # Linguistics module.
 7 | module Linguistics::EN::Participles
 8 | 
 9 | 	# Register this module to the list of modules to include
10 | 	Linguistics::EN.register_extension( self )
11 | 
12 | 
13 | 	### Attempt to return the inflected string in its present participle 
14 | 	### form (e.g., talked -> talking).
15 | 	def present_participle
16 |         plural = self.to_s.en.plural_verb
17 | 
18 | 		plural.sub!( /ie$/, 'y' ) or
19 | 			plural.sub!( /ue$/, 'u' ) or
20 | 			plural.sub!( /([auy])e$/, '$1' ) or
21 | 			plural.sub!( /i$/, '' ) or
22 | 			plural.sub!( /([^e])e$/, "\\1" ) or
23 | 			/er$/.match( plural ) or
24 | 			plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
25 | 
26 |         return "#{plural}ing"
27 | 	end
28 | 	alias_method :part_pres, :present_participle
29 | 	Linguistics::EN.register_lprintf_formatter :PART_PRES, :present_participle
30 | 
31 | 
32 | end # module Linguistics::EN::Participles
33 | 
34 | 


--------------------------------------------------------------------------------
/experiments/lprintf.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | #
 3 | # Experiment to work out the implementation of sprintf-like formatting.
 4 | #
 5 | # Conclusion: re-opening the module doesn't work well, but I've added it to
 6 | # en.rb, and it seems to work quite well for most cases. Still having trouble
 7 | # with 'CONJUNCT'.
 8 | # 
 9 | # Time-stamp: <31-Oct-2005 06:11:43 ged>
10 | #
11 | 
12 | BEGIN {
13 | 	base = File::dirname( File::dirname(File::expand_path(__FILE__)) )
14 | 	$LOAD_PATH.unshift "#{base}/lib"
15 | 
16 | 	require "#{base}/utils.rb"
17 | 	include UtilityFunctions
18 | }
19 | 
20 | require 'linguistics'
21 | 
22 | Linguistics::use( :en, :classes => [String,Array] )
23 | 
24 | module Linguistics::EN
25 | 
26 | 	module_function
27 | 	def lprintf( fmt, *args )
28 | 		fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
29 | 			op = $1
30 | 			case op
31 | 			when 'PL'
32 | 				args.shift.en.plural
33 | 			when 'A', 'AN'
34 | 				args.shift.en.a
35 | 			when 'NO'
36 | 				args.shift.en.no
37 | 			when 'CONJUNCT'
38 | 				args.shift.en.conjunction
39 | 			else
40 | 				raise "no such formatter %p" % op
41 | 			end
42 | 		end
43 | 	end
44 | 
45 | end
46 | 
47 | try( '"How many %PL do you want?".en.lprintf("monkey")' )
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/lib/linguistics/monkeypatches.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby -w
 2 | 
 3 | module Linguistics
 4 | 
 5 | 	### A collection of extensions that get added to Array.
 6 | 	module ArrayExtensions
 7 | 
 8 | 		### Returns a new Array that has had a new member inserted between all of
 9 | 		### the current ones. The value used is the given +value+ argument unless a
10 | 		### block is given, in which case the block is called once for each pair of
11 | 		### the Array, and the return value is used as the separator.
12 | 		def separate( *args, &block )
13 | 			ary = self.dup
14 | 			ary.separate!( *args, &block )
15 | 			return ary
16 | 		end
17 | 
18 | 		### The same as #separate, but modifies the Array in place.
19 | 		def separate!( *args )
20 | 			raise LocalJumpError, "no block given for no-arg #separate!" if
21 | 				args.empty? && !block_given?
22 | 			value = args.first
23 | 
24 | 			(1..( (self.length * 2) - 2 )).step(2) do |i|
25 | 				if block_given?
26 | 					self.insert( i, yield(self[i-1,2]) )
27 | 				else
28 | 					self.insert( i, value )
29 | 				end
30 | 			end
31 | 			self
32 | 		end
33 | 
34 | 	end # module ArrayExtensions
35 | 
36 | end # module Linguistics
37 | 
38 | ### Extend Array
39 | class Array
40 | 	include Linguistics::ArrayExtensions
41 | end
42 | 
43 | 


--------------------------------------------------------------------------------
/experiments/allobjlist.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | 
 3 | BEGIN {
 4 | 	$LOAD_PATH.unshift File::dirname(File::dirname( __FILE__ )) + "/lib"
 5 | 	require 'linguistics'
 6 | }
 7 | 
 8 | Linguistics::use( :en )
 9 | 
10 | # Just a(nother) fun little demo of the conjunction (junction, what's
11 | # your) function.
12 | 
13 | allobjs = []
14 | ObjectSpace::each_object {|obj| allobjs << obj.class.name}
15 | 
16 | puts "The current Ruby objectspace contains: " +
17 | 	allobjs.en.conjunction( :generalize => true )
18 | 
19 | 
20 | # Prints:
21 | #  "The current Ruby objectspace contains: thousands of Strings, thousands of
22 | #  Arrays, hundreds of Hashes, hundreds of Classes, many Regexps, a number of
23 | #  Ranges, a number of Modules, several Files, several Floats, several Procs,
24 | #  several MatchDatas, several Objects, several IOS, a Binding, a NoMemoryError,
25 | #  a SystemStackError, a fatal, a Thread, and a ThreadGroup"
26 | #
27 | 
28 | # If :generalize is set to 'false', it prints:
29 | #  "The current Ruby objectspace contains: 8744 Strings, 1025 Arrays, 425
30 | #  Hashes, 184 Classes, 74 Regexps, 18 Ranges, 18 Modules, five Files, five
31 | #  Floats, four Procs, three MatchDatas, three Objects, three IOS, a Binding, a
32 | #  NoMemoryError, a SystemStackError, a fatal, a Thread, and a ThreadGroup"
33 | #
34 | 
35 | 


--------------------------------------------------------------------------------
/spec/linguistics/monkeypatches_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | require 'linguistics/monkeypatches'
18 | 
19 | 
20 | describe Array, "extended with Linguistics::ArrayExtensions" do
21 | 
22 | 	it "can return a copy of itself with a separator between each element" do
23 | 		ary = %w[one two three]
24 | 		ary.separate( 'and' ).should == [ 'one', 'and', 'two', 'and', 'three' ]
25 | 	end
26 | 
27 | 	it "can return a copy of itself with each element separated by the return value of a block" do
28 | 		ary = %w[thumpy lippy barky tiger]
29 | 		result = ary.separate {|left, right| (left > right) ? '>' : '<' }
30 | 		result.should == [ 'thumpy', '>', 'lippy', '>', 'barky', '<', 'tiger' ]
31 | 	end
32 | 
33 | 	it "provides a mutator variant of #separate" do
34 | 		ary = %w[one two three]
35 | 		result = ary.separate!( nil )
36 | 		result.should equal( ary )
37 | 		result.should == [ 'one', nil, 'two', nil, 'three' ]
38 | 	end
39 | 
40 | end


--------------------------------------------------------------------------------
/examples/generalize_sentence.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	
 6 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.expand_path
 7 | 	libdir = basedir + "lib"
 8 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
 9 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
10 | }
11 | 
12 | require 'linguistics'
13 | require 'readline'
14 | 
15 | Linguistics.use( :en, :installProxy => true )
16 | 
17 | def generalized_word( word )
18 | 	$deferr.puts "    Traversing hypernyms for #{word}"
19 | 	syn = word.synset or return word
20 | 	nyms = syn.traverse( :hypernyms )
21 | 	return word if nyms.empty?
22 | 
23 | 	general_subj = nyms[ nyms.length / 4 ]
24 | 	$deferr.puts "      %d synsets returned. Picking %d (%s)" % [
25 | 		nyms.length,
26 | 		nyms.length / 4,
27 | 		general_subj.words.first,
28 | 	]
29 | 	return general_subj.words.first
30 | end
31 | 
32 | while input = Readline.readline( "Sentence to generalize: " )
33 | 	sent = input.sentence
34 | 
35 | 	subj = sent.subject
36 | 	obj = sent.object
37 | 	verb = sent.verb
38 | 	
39 | 	input.sub!( /\b#{subj}\b/, generalized_word(subj) ) if subj
40 | 	input.sub!( /\b#{obj}\b/, generalized_word(obj) ) if obj
41 | 	input.sub!( /\b#{verb}\b/, generalized_word(verb) ) if verb
42 | 
43 | 	puts input
44 | end
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/Manifest.txt:
--------------------------------------------------------------------------------
 1 | ChangeLog
 2 | History.rdoc
 3 | LICENSE
 4 | Manifest.txt
 5 | README.rdoc
 6 | Rakefile
 7 | examples/endocs.rb
 8 | examples/generalize_sentence.rb
 9 | examples/klingon.rb
10 | lib/linguistics.rb
11 | lib/linguistics/en.rb
12 | lib/linguistics/en/articles.rb
13 | lib/linguistics/en/conjugation.rb
14 | lib/linguistics/en/conjunctions.rb
15 | lib/linguistics/en/infinitives.rb
16 | lib/linguistics/en/linkparser.rb
17 | lib/linguistics/en/numbers.rb
18 | lib/linguistics/en/participles.rb
19 | lib/linguistics/en/pluralization.rb
20 | lib/linguistics/en/stemmer.rb
21 | lib/linguistics/en/titlecase.rb
22 | lib/linguistics/en/wordnet.rb
23 | lib/linguistics/inflector.rb
24 | lib/linguistics/iso639.rb
25 | lib/linguistics/languagebehavior.rb
26 | lib/linguistics/monkeypatches.rb
27 | spec/lib/constants.rb
28 | spec/lib/helpers.rb
29 | spec/linguistics/en/articles_spec.rb
30 | spec/linguistics/en/conjugation_spec.rb
31 | spec/linguistics/en/conjunctions_spec.rb
32 | spec/linguistics/en/infinitives_spec.rb
33 | spec/linguistics/en/linkparser_spec.rb
34 | spec/linguistics/en/numbers_spec.rb
35 | spec/linguistics/en/participles_spec.rb
36 | spec/linguistics/en/pluralization_spec.rb
37 | spec/linguistics/en/stemmer_spec.rb
38 | spec/linguistics/en/titlecase_spec.rb
39 | spec/linguistics/en/wordnet_spec.rb
40 | spec/linguistics/en_spec.rb
41 | spec/linguistics/inflector_spec.rb
42 | spec/linguistics/iso639_spec.rb
43 | spec/linguistics/monkeypatches_spec.rb
44 | spec/linguistics_spec.rb
45 | 


--------------------------------------------------------------------------------
/spec/linguistics/en/participles_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env rspec -cfd
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | require 'linguistics/en'
18 | require 'linguistics/en/participles'
19 | 
20 | 
21 | describe Linguistics::EN::Participles do
22 | 
23 | 	before( :all ) do
24 | 		setup_logging( :fatal )
25 | 		Linguistics.use( :en )
26 | 	end
27 | 
28 | 	after( :all ) do
29 | 		reset_logging()
30 | 	end
31 | 
32 | 
33 | 	it "returns 'seeing' as the present participle for 'sees'" do
34 | 		"sees".en.present_participle.should == 'seeing'
35 | 	end
36 | 
37 | 	it "returns 'eating' as the present participle for 'eats'" do
38 | 		"eats".en.present_participle.should == 'eating'
39 | 	end
40 | 
41 | 	it "returns 'batting' as the present participle for 'bats'" do
42 | 		"bats".en.present_participle.should == 'batting'
43 | 	end
44 | 
45 | 	it "returns 'hating' as the present participle for 'hates'" do
46 | 		"hates".en.present_participle.should == 'hating'
47 | 	end
48 | 
49 | 	it "returns 'spying' as the present participle for 'spies'" do
50 | 		"spies".en.present_participle.should == 'spying'
51 | 	end
52 | 
53 | 
54 | end
55 | 
56 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2003-20011, Michael Granger
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice,
 8 |   this list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of the author/s, nor the names of the project's
15 |   contributors may be used to endorse or promote products derived from this
16 |   software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/spec/linguistics/en/linkparser_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | require 'linguistics/en/linkparser'
18 | 
19 | 
20 | describe Linguistics::EN::LinkParser do
21 | 
22 | 	before( :all ) do
23 | 		setup_logging( :fatal )
24 | 		Linguistics.use( :en )
25 | 	end
26 | 
27 | 	after( :all ) do
28 | 		reset_logging()
29 | 	end
30 | 
31 | 
32 | 	it "adds EN::LinkParser to the list of English language modules" do
33 | 		Linguistics::EN::MODULES.include?( Linguistics::EN::LinkParser )
34 | 	end
35 | 
36 | 
37 | 	describe "on a system that has the 'linkparser' library installed" do
38 | 
39 | 		it "can create a LinkParser::Sentence from a sentence in a string" do
40 | 			pending "installation of the linkparser library" unless
41 | 				Linguistics::EN.has_linkparser?
42 | 			"This is a sentence.".en.sentence.should be_a( LinkParser::Sentence )
43 | 		end
44 | 
45 | 	end
46 | 
47 | 
48 | 	describe "on a system that doesn't have the 'linkparser' library" do
49 | 		it "raises an NotImplementedError when you try to use linkparser functionality" do
50 | 
51 | 			# If the system *does* have linkparser support, pretend it doesn't.
52 | 			if Linguistics::EN.has_linkparser?
53 | 				Linguistics::EN::LinkParser.stub( :has_linkparser? ).and_return( false )
54 | 				exception = stub( "linkparser load error", :message => 'no such file to load' )
55 | 				Linguistics::EN::LinkParser.stub( :lp_error ).and_return( exception )
56 | 			end
57 | 
58 | 			expect {
59 | 				"This is a sentence.".en.sentence
60 | 			}.to raise_error( NotImplementedError, /not loaded/i )
61 | 		end
62 | 
63 | 	end
64 | 
65 | end
66 | 
67 | 


--------------------------------------------------------------------------------
/spec/linguistics/en/stemmer_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | require 'linguistics/en'
18 | require 'linguistics/en/stemmer'
19 | 
20 | 
21 | describe Linguistics::EN::Stemmer do
22 | 
23 | 	before( :all ) do
24 | 		setup_logging()
25 | 		Linguistics.use( :en )
26 | 	end
27 | 
28 | 	after( :all ) do
29 | 		reset_logging()
30 | 	end
31 | 
32 | 
33 | 	it "adds EN::Stemmer to the list of English language modules" do
34 | 		Linguistics::EN::MODULES.include?( Linguistics::EN::Stemmer )
35 | 	end
36 | 
37 | 
38 | 	describe "on a system that has the 'ruby-stemmer' library installed" do
39 | 
40 | 		before( :each ) do
41 | 			pending "installation of the ruby-stemmer library" unless
42 | 				Linguistics::EN.has_stemmer?
43 | 		end
44 | 
45 | 		it "can fetch the stem of a word" do
46 | 			"communication".en.stem.should == 'communic'
47 | 		end
48 | 
49 | 	end
50 | 
51 | 
52 | 	describe "on a system that doesn't have the 'ruby-stemmer' library" do
53 | 
54 | 		before( :all ) do
55 | 			# If the system *does* have stemmer support, pretend it doesn't.
56 | 			if Linguistics::EN.has_stemmer?
57 | 				error = LoadError.new( "simulated exception: no such file to load -- lingua/stemmer" )
58 | 				Linguistics::EN::Stemmer.instance_variable_set( :@has_stemmer, false )
59 | 				Linguistics::EN::Stemmer.instance_variable_set( :@stemmer_error, error )
60 | 			end
61 | 		end
62 | 
63 | 		it "raises an NotImplementedError when you try to use stemmer functionality" do
64 | 			expect {
65 | 				"communication".en.stem
66 | 			}.to raise_error( LoadError, %r{lingua/stemmer}i )
67 | 		end
68 | 
69 | 	end
70 | 
71 | end
72 | 
73 | 


--------------------------------------------------------------------------------
/spec/linguistics_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | 
18 | 
19 | describe Linguistics do
20 | 
21 | 	before( :all ) do
22 | 		setup_logging()
23 | 	end
24 | 
25 | 	after( :each ) do
26 | 		reset_logging()
27 | 	end
28 | 
29 | 
30 | 	describe "version methods" do
31 | 
32 | 		it "returns a version string if asked" do
33 | 			Linguistics.version_string.should =~ /\w+ [\d.]+/
34 | 		end
35 | 
36 | 		it "returns a version string with a build number if asked" do
37 | 			Linguistics.version_string(true).should =~ /\w+ [\d.]+ \(build [[:xdigit:]]+\)/
38 | 		end
39 | 	end
40 | 
41 | 
42 | 	describe "language-loading functions" do
43 | 
44 | 		it "load a language's linguistic functions via variants of its ISO639 code" do
45 | 			testclass = Class.new
46 | 			Linguistics.use( :eng, :classes => testclass ).should == [ testclass ]
47 | 			testclass.new.should respond_to( :eng )
48 | 			testclass.new.should respond_to( :en )
49 | 		end
50 | 
51 | 		it "load a language's linguistic functions via the 2-letter variant of its ISO639 code" do
52 | 			testclass = Class.new
53 | 			Linguistics.use( :en, :classes => testclass ).should == [ testclass ]
54 | 			testclass.new.should respond_to( :eng )
55 | 			testclass.new.should respond_to( :en )
56 | 		end
57 | 
58 | 		it "default to extending a default set of classes" do
59 | 			Linguistics.use( :eng ).should == Linguistics::DEFAULT_EXT_CLASSES
60 | 			[].should respond_to( :eng )
61 | 		end
62 | 
63 | 		it "raise an error when a language that doesn't exist is requested" do
64 | 			expect {
65 | 				Linguistics.use( :zz )
66 | 			}.to raise_error( RuntimeError, /unknown ISO639-2 language code/i )
67 | 		end
68 | 
69 | 		it "raise an error for valid languages that don't have any linguistic functions to load" do
70 | 			expect {
71 | 				Linguistics.use( :ja )
72 | 			}.to raise_error( LoadError, /failed to load a language extension/i )
73 | 		end
74 | 
75 | 	end
76 | 
77 | end
78 | 


--------------------------------------------------------------------------------
/spec/linguistics/en/wordnet_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | require 'linguistics/en'
18 | require 'linguistics/en/wordnet'
19 | 
20 | 
21 | describe Linguistics::EN::WordNet do
22 | 
23 | 	before( :all ) do
24 | 		setup_logging()
25 | 		Linguistics.use( :en )
26 | 	end
27 | 
28 | 	after( :all ) do
29 | 		reset_logging()
30 | 	end
31 | 
32 | 
33 | 	it "adds EN::WordNet to the list of English language modules" do
34 | 		Linguistics::EN::MODULES.include?( Linguistics::EN::WordNet )
35 | 	end
36 | 
37 | 
38 | 	describe "on a system that has the 'wordnet' library installed" do
39 | 
40 | 		before( :each ) do
41 | 			pending "installation of the wordnet library" unless
42 | 				Linguistics::EN.has_wordnet?
43 | 		end
44 | 
45 | 		it "can create a WordNet::Synset from a word" do
46 | 			"jackal".en.synset.should be_a( WordNet::Synset )
47 | 		end
48 | 
49 | 		it "can load all synsets for a word" do
50 | 			result = "appear".en.synsets
51 | 			result.should have( 7 ).members
52 | 			result.should include( WordNet::Synset[200422090] )
53 | 		end
54 | 
55 | 	end
56 | 
57 | 
58 | 	describe "on a system that doesn't have the 'wordnet' library" do
59 | 		before( :all ) do
60 | 			# If the system *does* have wordnet support, pretend it doesn't.
61 | 			if Linguistics::EN.has_wordnet?
62 | 				@had_wordnet = true
63 | 				error = LoadError.new( "no such file to load -- wordnet" )
64 | 				Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, false )
65 | 				Linguistics::EN::WordNet.instance_variable_set( :@wn_error, error )
66 | 			end
67 | 		end
68 | 
69 | 		after( :all ) do
70 | 			if @had_wordnet
71 | 				Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, true )
72 | 				Linguistics::EN::WordNet.instance_variable_set( :@wn_error, nil )
73 | 			end
74 | 		end
75 | 
76 | 		it "raises the appropriate LoadError when you try to use wordnet functionality" do
77 | 			expect {
78 | 				"persimmon".en.synset
79 | 			}.to raise_error( LoadError, %r{wordnet}i )
80 | 		end
81 | 
82 | 	end
83 | 
84 | end
85 | 
86 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/stemmer.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require 'linguistics' unless defined?( Linguistics )
 4 | require 'linguistics/en' unless defined?( Linguistics::EN )
 5 | 
 6 | # Ruby-Stemmer support for the English-language Linguistics module. It
 7 | # requires the Ruby-Stemmer gem to be installed; if it is not
 8 | # installed, calling the functions defined by this file will raise
 9 | # NotImplementedErrors.
10 | #
11 | #   # Test to be sure the Stemmer gem loaded okay.
12 | #   Linguistics::EN.has_stemmer?
13 | #   # => true
14 | #
15 | module Linguistics::EN::Stemmer
16 | 
17 | 	# Module instance variables -- copied over to the EN module when registered
18 | 	@has_stemmer   = false
19 | 	@stemmer_error = nil
20 | 	@stemmer       = nil
21 | 
22 | 	# Load Ruby-Stemmer if possible, saving the error that occurs if anything goes wrong.
23 | 	begin
24 | 		require 'lingua/stemmer'
25 | 		@has_stemmer = true
26 | 	rescue LoadError => err
27 | 		@stemmer_error = err
28 | 	end
29 | 
30 | 
31 | 	# Container for methods intended to extend the EN module as singleton methods.
32 | 	module SingletonMethods
33 | 
34 | 		### Returns +true+ if Ruby-Stemmer was loaded okay
35 | 		def has_stemmer? ; @has_stemmer; end
36 | 
37 | 		### If #has_stemmer? returns +false+, this can be called to fetch the
38 | 		### exception which was raised when Ruby-Stemmer was loaded.
39 | 		def stemmer_error ; @stemmer_error; end
40 | 
41 | 	end # module SingletonMethods
42 | 	extend SingletonMethods
43 | 
44 | 
45 | 	# Register this module to the list of modules to include
46 | 	Linguistics::EN.register_extension( self )
47 | 
48 | 	#################################################################
49 | 	###	M O D U L E   M E T H O D S
50 | 	#################################################################
51 | 
52 | 	### The instance of the Lingua::Stemmer used for all Linguistics Stemmer
53 | 	### functions.
54 | 	def self::stemmer
55 | 		raise self.stemmer_error unless self.has_stemmer?
56 | 		@stemmer ||= Lingua::Stemmer.new
57 | 	end
58 | 
59 | 
60 | 	#################################################################
61 | 	###	S T E M M E R   I N T E R F A C E
62 | 	#################################################################
63 | 
64 | 	######
65 | 	public
66 | 	######
67 | 
68 | 
69 | 	### Return the stem of the receiving word.
70 | 	def stem
71 | 		return Linguistics::EN::Stemmer.stemmer.stem( self.obj.to_s )
72 | 	end
73 | 
74 | end # module Linguistics::EN::Stemmer
75 | 
76 | 


--------------------------------------------------------------------------------
/lib/linguistics/inflector.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby
 2 | # coding: utf-8
 3 | 
 4 | require 'loggability'
 5 | require 'linguistics' unless defined?( Linguistics )
 6 | 
 7 | # A facade object that acts as the extension point for linguistic modules
 8 | # for a single language. A single instance of an inflector is generated
 9 | # for an object that has been extended with a Linguistics language
10 | # the first time the language is used.
11 | class Linguistics::Inflector
12 | 	extend Loggability
13 | 
14 | 
15 | 	# Loggability API -- log to the linguistics logger
16 | 	log_to :linguistics
17 | 
18 | 
19 | 	### Create a new inflector for +obj+.
20 | 	def initialize( language_code, obj )
21 | 		raise TypeError, "can't inflect for another inflector!" if
22 | 			obj.is_a?( Linguistics::Inflector )
23 | 		@language_code = language_code
24 | 		@obj = obj
25 | 		super()
26 | 	end
27 | 
28 | 
29 | 	######
30 | 	public
31 | 	######
32 | 
33 | 	# The object the inflector is delegating for
34 | 	attr_reader :obj
35 | 
36 | 	# The inflector's language code
37 | 	attr_reader :language_code
38 | 
39 | 
40 | 	### Return the english-language name of the language the inflector is delegating
41 | 	### for.
42 | 	def language
43 | 		::Linguistics::ISO639::LANGUAGE_CODES[ self.language_code.to_sym ][:eng_name]
44 | 	end
45 | 
46 | 
47 | 	### Returns +true+ if either the inflector or the object it's wrapping respond to
48 | 	### the specified +message+.
49 | 	def respond_to_missing?( message, include_priv=false )
50 | 		return self.obj.respond_to?( message, include_priv )
51 | 	end
52 | 
53 | 
54 | 	### Return the target object as a String.
55 | 	def to_s
56 | 		return self.obj.to_s
57 | 	end
58 | 
59 | 
60 | 	### Return the target object as an Integer
61 | 	def to_i
62 | 		return self.obj.to_i
63 | 	end
64 | 
65 | 
66 | 	### Output a programmer-readable representation of the object suitable for debugging.
67 | 	def inspect
68 | 		return "#<(%s-language inflector) for <%s:0x%0x> >" % [
69 | 			self.language,
70 | 			@obj.class,
71 | 			@obj.object_id / 2
72 | 		]
73 | 	end
74 | 
75 | 
76 | 	#########
77 | 	protected
78 | 	#########
79 | 
80 | 	### Delegate missing methods to the target object.
81 | 	def method_missing( sym, *args, &block )
82 | 		return super unless self.obj.respond_to?( sym )
83 | 		meth = self.obj.method( sym )
84 | 		self.singleton_class.send( :define_method, sym, &meth )
85 | 		return self.method( sym ).call( *args, &block )
86 | 	end
87 | 
88 | end # class Linguistics::Inflector
89 | 
90 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env rake
 2 | 
 3 | begin
 4 | 	require 'rspec'
 5 | 	require 'rspec/core/rake_task'
 6 | rescue LoadError
 7 | 	abort "This Rakefile requires RSpec. Try again after doing 'gem install rspec'"
 8 | end
 9 | 
10 | begin
11 | 	require 'hoe'
12 | rescue LoadError
13 | 	abort "This Rakefile requires Hoe. Try again after doing 'gem install hoe'"
14 | end
15 | 
16 | # The path to the generated .gemspec file
17 | GEMSPEC = '.gemspec'
18 | 
19 | Hoe.plugin :mercurial
20 | Hoe.plugin :bundler
21 | Hoe.plugin :publish
22 | Hoe.plugin :signing
23 | 
24 | Hoe.plugins.delete :rubyforge
25 | 
26 | hoespec = Hoe.spec 'linguistics' do |spec|
27 | 	spec.name = 'linguistics'
28 | 	spec.readme_file = 'README.rdoc'
29 | 	spec.history_file = 'History.rdoc'
30 | 	spec.extra_rdoc_files = FileList[ '*.rdoc' ]
31 | 	spec.license 'BSD'
32 | 
33 | 	spec.developer 'Michael Granger', 'ged@FaerieMUD.org'
34 | 
35 | 	spec.dependency 'loggability', '~> 0.7'
36 | 
37 | 	spec.dependency 'hoe-deveiate', '~> 0.3', :development
38 | 	spec.dependency 'hoe-bundler', '~> 1.2', :development
39 | 	spec.dependency 'linkparser', '~> 1.1', :development
40 | 	spec.dependency 'wordnet', '~> 1.0', :development
41 | 	spec.dependency 'wordnet-defaultdb', '~> 1.0', :development
42 | 	spec.dependency 'ruby-stemmer', '~> 0.9', :development
43 | 
44 | 	spec.spec_extras[:rdoc_options] = ['-f', 'fivefish', '-t', 'Ruby Linguistics Toolkit']
45 | 	spec.spec_extras[:post_install_message] = [
46 | 			"This library also presents tie-ins for the 'linkparser' and",
47 | 			"'wordnet' libraries, which you can enable by installing the",
48 | 			"gems of the same name."
49 | 		  ].join( "\n" )
50 | 
51 | 	spec.require_ruby_version( '>=1.9.3' )
52 | 	spec.hg_sign_tags = true if spec.respond_to?( :hg_sign_tags= )
53 | 	spec.check_history_on_release = true if spec.respond_to?( :check_history_on_release= )
54 | 
55 | 	spec.rdoc_locations << "deveiate:/usr/local/www/public/code/#{remote_rdoc_dir}"
56 | end
57 | 
58 | ENV['VERSION'] ||= hoespec.spec.version.to_s
59 | 
60 | task 'hg:precheckin' => [ :check_history, :check_manifest, :spec ]
61 | 
62 | desc "Build a coverage report"
63 | task :coverage do
64 | 	ENV["COVERAGE"] = 'yes'
65 | 	Rake::Task[:spec].invoke
66 | end
67 | 
68 | 
69 | desc "generate a gemspec from your Hoe.spec"
70 | file GEMSPEC => 'Rakefile' do |task|
71 | 	spec = hoespec.spec.dup
72 | 	spec.files.delete( '.gemtest' )
73 | 	spec.version = "#{spec.version}.pre.#{Time.now.strftime("%Y%m%d%H%M%S")}"
74 | 	File.open( task.name, 'w' ) do |fh|
75 | 		fh.write( spec.to_ruby )
76 | 	end
77 | end
78 | 
79 | 


--------------------------------------------------------------------------------
/spec/linguistics/en_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | 
 3 | BEGIN {
 4 | 	require 'pathname'
 5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent
 6 | 
 7 | 	libdir = basedir + "lib"
 8 | 
 9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11 | }
12 | 
13 | require 'rspec'
14 | require 'spec/lib/helpers'
15 | 
16 | require 'linguistics'
17 | require 'linguistics/en'
18 | require 'linguistics/languagebehavior'
19 | 
20 | 
21 | describe Linguistics::EN do
22 | 
23 | 	before( :all ) do
24 | 		setup_logging( :fatal )
25 | 		Linguistics.use( :en, :proxy => true )
26 | 		include Linguistics::EN
27 | 	end
28 | 
29 | 	after( :all ) do
30 | 		reset_logging()
31 | 	end
32 | 
33 | 
34 | 	it_behaves_like "a Linguistics language module"
35 | 
36 | 
37 | 	it "provides a predicate for testing for the presence of modules by name" do
38 | 		Linguistics::EN.should_not have_extension( 'nonexistant' )
39 | 		Linguistics::EN.should have_extension( 'articles' )
40 | 	end
41 | 
42 | 	it "knows that it's not in 'classical' mode by default" do
43 | 		Linguistics::EN.should_not be_classical()
44 | 	end
45 | 
46 | 	it "can run a single block in classical mode" do
47 | 		Linguistics::EN.in_classical_mode do
48 | 			Linguistics::EN.should be_classical()
49 | 		end
50 | 	end
51 | 
52 | 	it "handles nested classical blocks correctly" do
53 | 		Linguistics::EN.in_classical_mode do
54 | 			Linguistics::EN.in_classical_mode do
55 | 				Linguistics::EN.should be_classical()
56 | 			end
57 | 			Linguistics::EN.should be_classical()
58 | 		end
59 | 		Linguistics::EN.should_not be_classical()
60 | 	end
61 | 
62 | 
63 | 	it "provides a sprintf-like function for interpolating variables into a String" do
64 | 		"I have %CONJUNCT.".en.lprintf( ["cat", "cat", "dog"] ).
65 | 			should == "I have two cats and a dog."
66 | 	end
67 | 
68 | 
69 | 	context "lprintf formatters" do
70 | 
71 | 		before( :all ) do
72 | 			@real_formatters = Linguistics::EN.lprintf_formatters
73 | 		end
74 | 
75 | 		before( :each ) do
76 | 			Linguistics::EN.lprintf_formatters.clear
77 | 		end
78 | 
79 | 		after( :all ) do
80 | 			Linguistics::EN.lprintf_formatters.replace( @real_formatters )
81 | 		end
82 | 
83 | 
84 | 		it "provides a way to register new lprintf formatters with a Symbol" do
85 | 			Linguistics::EN.register_lprintf_formatter :TEST, :plural
86 | 			Linguistics::EN.lprintf_formatters.should have( 1 ).member
87 | 			Linguistics::EN.lprintf_formatters.should include( :TEST )
88 | 			Linguistics::EN.lprintf_formatters[ :TEST ].should be_a( Proc )
89 | 		end
90 | 
91 | 	end
92 | end
93 | 
94 | 


--------------------------------------------------------------------------------
/spec/linguistics/iso639_spec.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env spec -cfs
 2 | #encoding: utf-8
 3 | 
 4 | BEGIN {
 5 | 	require 'pathname'
 6 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent
 7 | 
 8 | 	libdir = basedir + "lib"
 9 | 
10 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
11 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
12 | }
13 | 
14 | require 'rspec'
15 | require 'spec/lib/helpers'
16 | 
17 | require 'linguistics'
18 | require 'linguistics/iso639'
19 | 
20 | 
21 | describe Linguistics::ISO639 do
22 | 
23 | 	# eng||en|English|anglais
24 | 	it "loads simple language codes from its __DATA__ section" do
25 | 		Linguistics::LANGUAGE_CODES.should have_key( :en )
26 | 		Linguistics::LANGUAGE_CODES[ :en ].should have(3).members
27 | 
28 | 		Linguistics::LANGUAGE_CODES[ :en ].should have_key( :codes )
29 | 		Linguistics::LANGUAGE_CODES[ :en ][:codes].should have(2).members
30 | 		Linguistics::LANGUAGE_CODES[ :en ][:codes].should include("en", "eng")
31 | 
32 | 		Linguistics::LANGUAGE_CODES[ :en ].should have_key( :eng_name )
33 | 		Linguistics::LANGUAGE_CODES[ :en ][:eng_name].should == 'English'
34 | 		Linguistics::LANGUAGE_CODES[ :en ].should have_key( :fre_name )
35 | 		Linguistics::LANGUAGE_CODES[ :en ][:fre_name].should == 'anglais'
36 | 	end
37 | 
38 | 	it "loads language codes with variants from its __DATA__ section" do
39 | 
40 | 		# cze|ces|cs|Czech|tchèque
41 | 		Linguistics::LANGUAGE_CODES.should have_key( :cs )
42 | 		Linguistics::LANGUAGE_CODES[ :cs ].should have(3).members
43 | 
44 | 		Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :codes )
45 | 		Linguistics::LANGUAGE_CODES[ :cs ][:codes].should have(3).members
46 | 		Linguistics::LANGUAGE_CODES[ :cs ][:codes].should include("cs", "ces", "cze")
47 | 
48 | 		Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :eng_name )
49 | 		Linguistics::LANGUAGE_CODES[ :cs ][:eng_name].should == 'Czech'
50 | 		Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :fre_name )
51 | 		Linguistics::LANGUAGE_CODES[ :cs ][:fre_name].should == 'tchèque'
52 | 
53 | 		# mac|mkd|mk|Macedonian|macédonien
54 | 		Linguistics::LANGUAGE_CODES.should have_key( :mk )
55 | 		Linguistics::LANGUAGE_CODES[ :mk ].should have( 3 ).members
56 | 
57 | 		Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :codes )
58 | 		Linguistics::LANGUAGE_CODES[ :mk ][:codes].should have(3).members
59 | 		Linguistics::LANGUAGE_CODES[ :mk ][:codes].should include("mk", "mac", "mkd")
60 | 
61 | 		Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :eng_name )
62 | 		Linguistics::LANGUAGE_CODES[ :mk ][:eng_name].should == 'Macedonian'
63 | 		Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :fre_name )
64 | 		Linguistics::LANGUAGE_CODES[ :mk ][:fre_name].should == 'macédonien'
65 | 
66 | 	end
67 | 
68 | end
69 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/linkparser.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | require 'linguistics/en' unless defined?( Linguistics::EN )
  4 | 
  5 | # LinkParser support for the English-language Linguistics module.
  6 | # LinkParser enables grammatic queries of English language sentences.
  7 | #
  8 | #   # Test to see whether or not the link parser is loaded.
  9 | #   Linguistics::EN.has_link_parser?
 10 | #   # => true
 11 | # 
 12 | #   # Diagram the first linkage for a test sentence
 13 | #   puts "he is a big dog".en.sentence.linkages.first.to_s
 14 | # 	  +---O*---+ 
 15 | # 	  | +--Ds--+ 
 16 | #    +Ss+ |  +-A-+ 
 17 | #    |  | |  |   | 
 18 | #   he is a big dog
 19 | # 
 20 | #   # Find the verb in the sentence
 21 | #   "he is a big dog".en.sentence.verb.to_s      
 22 | #   # => "is"
 23 | # 
 24 | #   # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
 25 | #   given sentence.
 26 | #   "he is a big dog".en.sentence.verb.infinitive
 27 | #   # => "be"
 28 | # 
 29 | #   # Find the direct object of the sentence
 30 | #   "he is a big dog".en.sentence.object.to_s
 31 | #   # => "dog"
 32 | # 
 33 | #   # Combine WordNet + LinkParser to find the definition of the direct object of
 34 | #   # the sentence
 35 | #   "he is a big dog".en.sentence.object.gloss
 36 | #   # => "a member of the genus Canis (probably descended from the common wolf) that
 37 | #   has been domesticated by man since prehistoric times; occurs in many breeds;
 38 | #   \"the dog barked all night\""
 39 | # 
 40 | module Linguistics::EN::LinkParser
 41 | 
 42 | 	@has_linkparser = false
 43 | 	@lp_dict        = nil
 44 | 	@lp_error       = nil
 45 | 
 46 | 	begin
 47 | 		require "linkparser"
 48 | 		@has_linkparser = true
 49 | 	rescue LoadError => err
 50 | 		@lp_error = err
 51 | 	end
 52 | 
 53 | 
 54 | 	# Container for methods intended to extend the EN module as singleton methods.
 55 | 	module SingletonMethods
 56 | 
 57 | 		### Returns +true+ if WordNet was loaded okay
 58 | 		def has_linkparser? ; @has_linkparser; end
 59 | 
 60 | 		### If #has_linkparser? returns +false+, this can be called to fetch the
 61 | 		### exception which was raised when WordNet was loaded.
 62 | 		def linkparser_error ; @lp_error; end
 63 | 
 64 | 	end # module SingletonMethods
 65 | 	extend SingletonMethods
 66 | 
 67 | 
 68 | 	# Register this module to the list of modules to include
 69 | 	Linguistics::EN.register_extension( self )
 70 | 
 71 | 	#################################################################
 72 | 	###	M O D U L E   M E T H O D S
 73 | 	#################################################################
 74 | 
 75 | 	### The instance of LinkParser used for all Linguistics LinkParser
 76 | 	### functions.
 77 | 	def self::lp_dict
 78 | 		if !self.has_linkparser?
 79 | 			raise NotImplementedError,
 80 | 				"LinkParser functions are not loaded: %s" %
 81 | 				self.lp_error.message
 82 | 		end
 83 | 
 84 | 		return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
 85 | 	end
 86 | 
 87 | 
 88 | 	#################################################################
 89 | 	###	L I N K P A R S E R   I N T E R F A C E
 90 | 	#################################################################
 91 | 
 92 | 	######
 93 | 	public
 94 | 	######
 95 | 
 96 | 	### Return a LinkParser::Sentence for the stringified +obj+.
 97 | 	def sentence
 98 | 		return Linguistics::EN::LinkParser.lp_dict.parse( self.to_s )
 99 | 	end
100 | 
101 | end # class Linguistics::EN::LinkParser
102 | 


--------------------------------------------------------------------------------
/.irbrc:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby -*- ruby -*-
  2 | 
  3 | BEGIN {
  4 | 	require 'pathname'
  5 | 	basedir = Pathname.new( __FILE__ ).dirname.expand_path
  6 | 	libdir = basedir + "lib"
  7 | 
  8 | 	puts ">>> Adding #{libdir} to load path..."
  9 | 	$LOAD_PATH.unshift( libdir.to_s )
 10 | }
 11 | 
 12 | require 'English'
 13 | 
 14 | # Set some ANSI escape code constants (Shamelessly stolen from Perl's
 15 | # Term::ANSIColor by Russ Allbery <rra@stanford.edu> and Zenin <zenin@best.com>
 16 | ANSI_ATTRIBUTES = {
 17 | 	'clear'      => 0,
 18 | 	'reset'      => 0,
 19 | 	'bold'       => 1,
 20 | 	'dark'       => 2,
 21 | 	'underline'  => 4,
 22 | 	'underscore' => 4,
 23 | 	'blink'      => 5,
 24 | 	'reverse'    => 7,
 25 | 	'concealed'  => 8,
 26 | 
 27 | 	'black'      => 30,   'on_black'   => 40, 
 28 | 	'red'        => 31,   'on_red'     => 41, 
 29 | 	'green'      => 32,   'on_green'   => 42, 
 30 | 	'yellow'     => 33,   'on_yellow'  => 43, 
 31 | 	'blue'       => 34,   'on_blue'    => 44, 
 32 | 	'magenta'    => 35,   'on_magenta' => 45, 
 33 | 	'cyan'       => 36,   'on_cyan'    => 46, 
 34 | 	'white'      => 37,   'on_white'   => 47
 35 | }
 36 | 
 37 | ### Create a string that contains the ANSI codes specified and return it
 38 | def ansi_code( *attributes )
 39 | 	attributes.flatten!
 40 | 	attributes.collect! {|at| at.to_s }
 41 | 	# $stderr.puts "Returning ansicode for TERM = %p: %p" %
 42 | 	# 	[ ENV['TERM'], attributes ]
 43 | 	return '' unless /(?:vt10[03]|xterm(?:-color)?|linux|screen)/i =~ ENV['TERM']
 44 | 	attributes = ANSI_ATTRIBUTES.values_at( *attributes ).compact.join(';')
 45 | 
 46 | 	# $stderr.puts "  attr is: %p" % [attributes]
 47 | 	if attributes.empty? 
 48 | 		return ''
 49 | 	else
 50 | 		return "\e[%sm" % attributes
 51 | 	end
 52 | end
 53 | 
 54 | 
 55 | ### Colorize the given +string+ with the specified +attributes+ and return it, handling 
 56 | ### line-endings, color reset, etc.
 57 | def colorize( *args )
 58 | 	string = ''
 59 | 
 60 | 	if block_given?
 61 | 		string = yield
 62 | 	else
 63 | 		string = args.shift
 64 | 	end
 65 | 
 66 | 	ending = string[/(\s)$/] || ''
 67 | 	string = string.rstrip
 68 | 
 69 | 	return ansi_code( args.flatten ) + string + ansi_code( 'reset' ) + ending
 70 | end
 71 | 
 72 | 
 73 | ### Try to match the specified +str+ with the given +re+, printing out the result.
 74 | def try_regexp( str, re )
 75 | 	if str =~ re
 76 | 		puts "  #$PREMATCH",
 77 | 		     "  " + colorize( 'bold', 'green' ) { $MATCH },
 78 | 		     "  #$POSTMATCH"
 79 | 	else
 80 | 		puts colorize( "Nope.", 'red' )
 81 | 	end
 82 | end
 83 | 
 84 | IRB.conf[:PROMPT][:manual] = {
 85 |     :PROMPT_I => "irb> ",
 86 |     :PROMPT_S => "... ",
 87 |     :PROMPT_C => "* ",
 88 |     :RETURN => "# => %s\n"      # format to return value
 89 | }
 90 | IRB.conf[:PROMPT_MODE] = :manual
 91 | 
 92 | # class FilteringOutputMethod < IRB::OutputMethod
 93 | # 
 94 | # 	REPLACEMENTS = {
 95 | # 		/\blaika\b/i   => 'acme',
 96 | # 		/\bljc\b/i     => 'sales',
 97 | # 		/\badtech2\b/i => 'marketing',
 98 | # 	}
 99 | # 
100 | # 	def print( *opts )
101 | # 		opts.each do |opt|
102 | # 			REPLACEMENTS.each do |pat, repl|
103 | # 				opt.gsub!( pat, repl )
104 | # 			end
105 | # 			$stdout.print( opt )
106 | # 		end
107 | # 	end
108 | # end
109 | # IRB.conf[:OUTPUT_MODE]
110 | 
111 | 
112 | begin
113 | 	$stderr.puts "Loading Linguistics..."
114 | 	require 'linguistics'
115 | 	Linguistics.use( :en )
116 | rescue => e
117 | 	$stderr.puts "Ack! Linguistics library failed to load: #{e.message}\n\t" +
118 | 		e.backtrace.join( "\n\t" )
119 | end
120 | 
121 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/articles.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | require 'linguistics/en' unless defined?( Linguistics::EN )
  4 | 
  5 | # Indefinite article methods for the English-language Linguistics module.
  6 | module Linguistics::EN::Articles
  7 | 
  8 | 	# Register this module to the list of modules to include
  9 | 	Linguistics::EN.register_extension( self )
 10 | 
 11 | 
 12 | 	# This pattern matches strings of capitals starting with a "vowel-sound"
 13 | 	# consonant followed by another consonant, and which are not likely
 14 | 	# to be real words (oh, all right then, it's just magic!)
 15 | 	A_abbrev = %r{
 16 | 		^(
 17 | 			(?!
 18 | 				FJO       |
 19 | 				[HLMNS]Y. |
 20 | 				RY[EO]    |
 21 | 				SQU       |
 22 | 				(
 23 | 					F[LR]?         |
 24 | 					[HL]           |
 25 | 					MN?            |
 26 | 					N              |
 27 | 					RH?            |
 28 | 					S[CHKLMNPTVW]? |
 29 | 					X(YL)?
 30 | 				) [AEIOU]
 31 | 			)
 32 | 		[FHLMNRSX][A-Z]
 33 | 		)
 34 | 	}x
 35 | 
 36 | 	# This pattern codes the beginnings of all english words begining with a
 37 | 	# 'y' followed by a consonant. Any other y-consonant prefix therefore
 38 | 	# implies an abbreviation.
 39 | 	A_y_cons = %r{^(y(?:b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt))}i
 40 | 
 41 | 	# Exceptions to exceptions
 42 | 	A_explicit_an = Regexp.union( /euler/i, /hour(?!i)/i, /heir/i, /honest/i, /hono/i )
 43 | 
 44 | 	# Words which always indicate zero quantity
 45 | 	PL_count_zero = Regexp.union( "0", "no", "zero", "nil" )
 46 | 
 47 | 
 48 | 	### Returns the given word with a prepended indefinite article, unless
 49 | 	### +count+ is non-nil and not singular.
 50 | 	def indef_article( count=nil )
 51 | 		word = self.to_s
 52 | 
 53 | 		self.log.debug "Fetching the indefinite article for %p (count = %p)" % [ word, count ]
 54 | 		return "#{count} #{word}" if
 55 | 			count && /^(#{PL_count_one})$/i !~ count.to_s
 56 | 
 57 | 		# Handle user-defined variants
 58 | 		# return value if value = ud_match( word, A_a_user_defined )
 59 | 
 60 | 		self.log.debug "  count wasn't a definite singular countword"
 61 | 		case word
 62 | 
 63 | 		# Handle special cases
 64 | 		when /^(#{A_explicit_an})/i
 65 | 			return "an #{word}"
 66 | 
 67 | 		# Handle abbreviations
 68 | 		when A_abbrev
 69 | 			return "an #{word}"
 70 | 		when /^[aefhilmnorsx][.-]/i
 71 | 			return "an #{word}"
 72 | 		when /^[a-z][.-]/i
 73 | 			return "a #{word}"
 74 | 
 75 | 		# Handle consonants
 76 | 		when /^[^aeiouy]/i
 77 | 			return "a #{word}"
 78 | 
 79 | 		# Handle special vowel-forms
 80 | 		when /^e[uw]/i
 81 | 			return "a #{word}"
 82 | 		when /^onc?e\b/i
 83 | 			return "a #{word}"
 84 | 		when /^uni([^nmd]|mo)/i
 85 | 			return "a #{word}"
 86 | 		when /^u[bcfhjkqrst][aeiou]/i
 87 | 			return "a #{word}"
 88 | 
 89 | 		# Handle vowels
 90 | 		when /^[aeiou]/i
 91 | 			return "an #{word}"
 92 | 
 93 | 		# Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
 94 | 		when A_y_cons
 95 | 			return "an #{word}"
 96 | 
 97 | 		# Otherwise, guess "a"
 98 | 		else
 99 | 			return "a #{word}"
100 | 		end
101 | 	end
102 | 
103 | 
104 | 	### Return the inflected phrase with the appropriate indefinite article ("a" or
105 | 	### "an") prepended. 
106 | 	def a( count=nil )
107 | 		count ||= 1
108 | 		phrase = self.to_s
109 | 
110 | 		md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
111 | 		pre, word, post = md.to_a[1,3]
112 | 		return phrase if word.nil? or word.empty?
113 | 
114 | 		result = word.en.indef_article
115 | 		return pre + result + post
116 | 	end
117 | 	alias_method :an, :a
118 | 	Linguistics::EN.register_lprintf_formatter :A, :a
119 | 	Linguistics::EN.register_lprintf_formatter :AN, :a
120 | 
121 | 
122 | 	### Translate zero-quantified +phrase+ to "no +phrase.plural+"
123 | 	def no( count=nil )
124 | 		phrase = self.to_s
125 | 		md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
126 | 		pre, word, post = md.to_a[1,3]
127 | 		count ||= 0
128 | 
129 | 		unless /^#{PL_count_zero}$/ =~ count.to_s
130 | 			return "#{pre}#{count} " + plural( word, count ) + post
131 | 		else
132 | 			return "#{pre}no " + word.en.plural( 0 ) + post
133 | 		end
134 | 	end
135 | 	Linguistics::EN.register_lprintf_formatter :NO, :no
136 | 
137 | end # module Linguistics::EN::Articles
138 | 
139 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/titlecase.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | require 'linguistics/en' unless defined?( Linguistics::EN )
  4 | 
  5 | # Methods for capitalizing a sentence as a title, nouns as proper
  6 | # nouns, and for turning a sentence into its equivalent CamelCaseSentence
  7 | # and vice-versa. It's part of the English-language Linguistics module.
  8 | module Linguistics::EN::TitleCase
  9 | 
 10 | 	# Register this module to the list of modules to include
 11 | 	Linguistics::EN.register_extension( self )
 12 | 
 13 | 
 14 | 	# Exceptions: Indefinite articles
 15 | 	ARTICLES = %w[a and the]
 16 | 
 17 | 	# Exceptions: Prepositions shorter than five letters
 18 | 	SHORT_PREPOSITIONS = ["amid", "at", "but", "by", "down", "for", "from", "in",
 19 | 		"into", "like", "near", "of", "off", "on", "onto", "out", "over",
 20 | 		"past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
 21 | 
 22 | 	# Exceptions: Coordinating conjunctions
 23 | 	COORD_CONJUNCTIONS = %w[and but as]
 24 | 
 25 | 	# Titlecase exceptions: "In titles, capitalize the first word, the
 26 | 	# last word, and all words in between except articles (a, an, and
 27 | 	# the), prepositions under five letters (in, of, to), and coordinating
 28 | 	# conjunctions (and, but). These rules apply to titles of long, short,
 29 | 	# and partial works as well as your own papers" (Anson, Schwegler,
 30 | 	# and Muth. The Longman Writer's Companion 240).
 31 | 	TITLE_CASE_EXCEPTIONS = ARTICLES | SHORT_PREPOSITIONS | COORD_CONJUNCTIONS
 32 | 
 33 | 	# The words which don't get capitalized in a compound proper noun
 34 | 	PROPER_NOUN_EXCEPTIONS = %w{and the of}
 35 | 
 36 | 
 37 | 
 38 | 	### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
 39 | 	### ("camel case to english"). Each word is decapitalized.
 40 | 	def un_camel_case
 41 | 		self.to_s.
 42 | 			gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
 43 | 			gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
 44 | 	end
 45 | 
 46 | 
 47 | 	### Turns an English language +string+ into a CamelCase word.
 48 | 	def to_camel_case
 49 | 		self.to_s.gsub( /\s+([a-z])/i ) { $1.upcase }
 50 | 	end
 51 | 
 52 | 
 53 | 	### Returns the inflected object as a title-cased String.
 54 | 	###
 55 | 	### Some examples:
 56 | 	###
 57 | 	###   "a portrait of the artist as a young man".en.titlecase
 58 | 	###   # => "A Portrait of the Artist as a Young Man"
 59 | 	###
 60 | 	###   "a seven-sided romance".en.titlecase
 61 | 	###   # => "A Seven-Sided Romance"
 62 | 	###
 63 | 	###   "the curious incident of the dog in the night-time".en.titlecase
 64 | 	###   # => "The Curious Incident of the Dog in the Night-Time"
 65 | 	###
 66 | 	###   "the rats of n.i.m.h.".en.titlecase
 67 | 	###   # => "The Rats of N.I.M.H."
 68 | 	def titlecase
 69 | 
 70 | 		# Split on word-boundaries
 71 | 		words = self.to_s.split( /\b/ )
 72 | 
 73 | 		# Always capitalize the first and last words
 74 | 		words.first.capitalize!
 75 | 		words.last.capitalize!
 76 | 
 77 | 		# Now scan the rest of the tokens, skipping non-words and capitalization
 78 | 		# exceptions.
 79 | 		words.each_with_index do |word, i|
 80 | 
 81 | 			# Non-words
 82 | 			next unless /^\w+$/.match( word )
 83 | 
 84 | 			# Skip exception-words
 85 | 			next if TITLE_CASE_EXCEPTIONS.include?( word )
 86 | 
 87 | 			# Skip second parts of contractions
 88 | 			next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
 89 | 
 90 | 			# Have to do it this way instead of capitalize! because that method
 91 | 			# also downcases all other letters.
 92 | 			word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
 93 | 		end
 94 | 
 95 | 		return words.join
 96 | 	end
 97 | 
 98 | 
 99 | 	### Returns the proper noun form of the inflected object by capitalizing most of the
100 | 	### words.
101 | 	###
102 | 	### Some examples:
103 | 	###
104 | 	###   "bosnia and herzegovina".en.proper_noun
105 | 	###   # => "Bosnia and Herzegovina"
106 | 	###   "macedonia, the former yugoslav republic of".en.proper_noun
107 | 	###   # => "Macedonia, the Former Yugoslav Republic of"
108 | 	###   "virgin islands, u.s.".en.proper_noun
109 | 	###   # => "Virgin Islands, U.S."
110 | 	def proper_noun
111 | 		return self.to_s.split(/([ .]+)/).collect do |word|
112 | 			next word unless
113 | 				/^[a-z]/.match( word ) &&
114 | 				! (PROPER_NOUN_EXCEPTIONS.include?( word ))
115 | 			word.capitalize
116 | 		end.join
117 | 	end
118 | 
119 | 
120 | end # module Linguistics::EN::TitleCase
121 | 
122 | 


--------------------------------------------------------------------------------
/spec/linguistics/en/conjunctions_spec.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env spec -cfs
  2 | 
  3 | BEGIN {
  4 | 	require 'pathname'
  5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
  6 | 
  7 | 	libdir = basedir + "lib"
  8 | 
  9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
 10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
 11 | }
 12 | 
 13 | require 'rspec'
 14 | require 'spec/lib/helpers'
 15 | 
 16 | require 'linguistics'
 17 | require 'linguistics/en'
 18 | require 'linguistics/en/conjunctions'
 19 | 
 20 | 
 21 | describe Linguistics::EN::Conjunctions do
 22 | 
 23 | 	before( :all ) do
 24 | 		Linguistics.use( :en )
 25 | 		setup_logging( :fatal )
 26 | 	end
 27 | 
 28 | 
 29 | 	TEST_ITEMS = %w[cow chicken dog goat dog dog duck duck goose goose goose dog goat]
 30 | 
 31 | 	it "don't use a penultimate separator if it's turned off" do
 32 | 		TEST_ITEMS.en.conjunction( :penultimate => false ).should ==
 33 | 			"four dogs, three geese, two goats, two ducks, a cow and a chicken"
 34 | 	end
 35 | 
 36 | 	it "honors the penultimate setting even if there are only three items (bugfix)" do
 37 | 		%w[duck cow dog].en.conjunction( :penultimate => false ).should ==
 38 | 			"a duck, a cow and a dog"
 39 | 	end
 40 | 
 41 | 	it "uses the supplied block for transformation before building the conjunction" do
 42 | 		TEST_ITEMS.en.conjunction {|item| "'%s' animal" % [item[0]] }.should ==
 43 | 			"six 'd' animals, five 'g' animals, and two 'c' animals"
 44 | 	end
 45 | 
 46 | 	it "uses the alternative separator if one or more phrases include the primary one" do
 47 | 		scene_items = [
 48 | 			"desk with stamps, paper, and envelopes on it",
 49 | 			"basket containing milk, eggs, and broccoli",
 50 | 			"chair", "chair", "chair",
 51 | 			"wooden chest",
 52 | 			"hat rack",
 53 | 		]
 54 | 
 55 | 		scene_items.en.conjunction.should ==
 56 | 			"three chairs; a desk with stamps, paper, and envelopes on it; " +
 57 | 			"a basket containing milk, eggs, and broccoli; " +
 58 | 			"a wooden chest; and a hat rack"
 59 | 	end
 60 | 
 61 | 
 62 | 	describe "with an Array of a single element" do
 63 | 
 64 | 		before( :each ) do
 65 | 			@array = ['cat']
 66 | 		end
 67 | 
 68 | 		it "results in a phrase with indefinite article" do
 69 | 			@array.en.conjunction.should == "a cat"
 70 | 		end
 71 | 
 72 | 	end
 73 | 
 74 | 
 75 | 	describe "with an Array of two different words" do
 76 | 
 77 | 		before( :each ) do
 78 | 			@array = ['cat', 'dog']
 79 | 		end
 80 | 
 81 | 		it "results in a phrase joined with 'and' with default options" do
 82 | 			@array.en.conjunction.should == "a cat and a dog"
 83 | 		end
 84 | 
 85 | 		it "results in a phrase joined with 'plus' if 'plus' is set as the conjunctive" do
 86 | 			@array.en.conjunction(:conjunctive => 'plus').should == "a cat plus a dog"
 87 | 		end
 88 | 
 89 | 		it "results in a phrase joined with a space if an empty string is set as the conjunctive" do
 90 | 			@array.en.conjunction(:conjunctive => '').should == "a cat a dog"
 91 | 		end
 92 | 
 93 | 	end
 94 | 
 95 | 
 96 | 	describe "with an Array of two words that differ only in case" do
 97 | 
 98 | 		before( :each ) do
 99 | 			@array = ['cat', 'Cat']
100 | 		end
101 | 
102 | 		it "combines them into their downcased equivalents with default options" do
103 | 			@array.en.conjunction.should == "two cats"
104 | 		end
105 | 
106 | 		it "lists them separately if :combine is set to false" do
107 | 			@array.en.conjunction(:combine => false).should == "a cat and a Cat"
108 | 		end
109 | 
110 | 		it "doesn't combine them if :casefold is turned off" do
111 | 			@array.en.conjunction(:casefold => false).should == "a cat and a Cat"
112 | 		end
113 | 
114 | 		it "combines and lists them with a non-specific count if :generalize is set" do
115 | 			@array.en.conjunction(:generalize => true).should == "several cats"
116 | 		end
117 | 
118 | 	end
119 | 
120 | 
121 | 	describe "with an Array of many (more than two) words of varying cases" do
122 | 
123 | 		before( :each ) do
124 | 			@array = %w{cat dog fox dog chicken chicken Fox chicken goose Dog goose}
125 | 		end
126 | 
127 | 		it "combines them into their downcased equivalents and lists them in order of amount " +
128 | 		   "with default options" do
129 | 			@array.en.conjunction.should ==
130 | 				'three dogs, three chickens, two foxes, two geese, and a cat'
131 | 		end
132 | 
133 | 		it "lists them separately if :combine is set to false" do
134 | 			@array.en.conjunction(:combine => false).should ==
135 | 				'a cat, a dog, a fox, a dog, a chicken, a chicken, a Fox, a '\
136 | 				'chicken, a goose, a Dog, and a goose'
137 | 		end
138 | 
139 | 		it "doesn't combine the differently-cased ones if :casefold is turned off" do
140 | 			@array.en.conjunction(:casefold => false).should ==
141 | 				'three chickens, two dogs, two geese, a cat, a fox, a Fox, '\
142 | 				'and a Dog'
143 | 		end
144 | 
145 | 		it "combines and lists them with a non-specific count if :generalize is set" do
146 | 			@array.en.conjunction(:generalize => true).should ==
147 | 				'several dogs, several chickens, several foxes, several '\
148 | 				'geese, and a cat'
149 | 		end
150 | 
151 | 	end
152 | 
153 | 
154 | end
155 | 


--------------------------------------------------------------------------------
/experiments/api.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | # 
  3 | # This is a little pseudo-program to work out how to best generalize the
  4 | # interface to the grammar tools.
  5 | # 
  6 | # == Authors
  7 | # 
  8 | # * Michael Granger <ged@FaerieMUD.org>
  9 | # 
 10 | # == Copyright
 11 | #
 12 | # Copyright (c) 2003, 2005 The FaerieMUD Consortium. All rights reserved.
 13 | # 
 14 | # This module is free software. You may use, modify, and/or redistribute this
 15 | # software under the terms of the Perl Artistic License. (See
 16 | # http://language.perl.com/misc/Artistic.html)
 17 | # 
 18 | # == Version
 19 | #
 20 | #  $Id$
 21 | # 
 22 | 
 23 | require 'linguistics'
 24 | Linguistics::use( :en, 'de' )
 25 | 
 26 | 
 27 | #####################################################################
 28 | ###	C O N J U N C T I O N S
 29 | #####################################################################
 30 | 
 31 | things = [
 32 | 	'a stick',
 33 | 	'a stone',
 34 | 	'a stick',
 35 | 	'a silver hammer',
 36 | 	'an old hammer',
 37 | ]
 38 | 
 39 | print things.en.conjunction
 40 | # => "two sticks, a stone, a silver hammer, and an old hammer"
 41 | 
 42 | things = [
 43 | 	"ein Stein",
 44 | 	"ein Reisig",
 45 | 	"ein Stein",
 46 | 	"ein silber Hammer",
 47 | 	"ein alten Hammer",
 48 | ]
 49 | print things.de.conjunction
 50 | # => "zwei Steinen, ein Reisig, ein silber Hammer, und ein alten Hammer"
 51 | 
 52 | 
 53 | 
 54 | #####################################################################
 55 | ###	I N F L E C T I O N S
 56 | #####################################################################
 57 | 
 58 | ### Unconditional plurals
 59 | 
 60 | "duck".en.plural
 61 | # => "ducks"
 62 | 
 63 | "goose".en.plural
 64 | # => "geese"
 65 | 
 66 | 
 67 | ### Conditional plurals
 68 | 
 69 | "trivet".en.plural( 1 )
 70 | # => "trivet"
 71 | 
 72 | "trivet".en.plural( 4 )
 73 | # => "trivets"
 74 | 
 75 | 
 76 | ### POS plurals
 77 | 
 78 | "paint".en.plural_noun
 79 | # => "paints"
 80 | 
 81 | "paint".en.plural_verb
 82 | # => "paint"
 83 | 
 84 | 
 85 | ### Ordinals
 86 | 
 87 | 5.ord
 88 | # => 5th
 89 | 
 90 | ### Plural/singular
 91 | 
 92 | # "0/1/N" -> "no/1/N" translation
 93 | [3, 1, 0].each {|errors|
 94 | 	puts "There " + 'were'.en.plural_verb(errors) + " error".en.no(errors)
 95 | }
 96 | # => There were 3 errors
 97 | # => There was 1 error
 98 | # => There were no errors
 99 | 
100 | # Compare two words "number-insensitively":
101 | puts "same" if word1.en === word2.en
102 | puts "same noun" if word1.en.noun === word2.en.noun
103 | puts "same verb" if word1.en.verb === word2.en.verb
104 | puts "same adjective" if word1.en.adj === word2.en.adj
105 | 
106 | # Add correct "a" or "an" for a given word:
107 | %{Did you want #{"thing".en.a} or #{"idea".en.a}?}
108 | 
109 | 
110 | ### Convert numerals to words (i.e. 1->"one", 101->"one hundred and one", etc.)
111 | ### In a scalar context: get back a single string...
112 | 
113 | # Add methods to Numeric, singleton methods to returned strings for #ord and
114 | # #words that will allow .words.to_a, etc.)
115 | 
116 | 1234.words
117 | # => "one thousand, two hundred and thirty-four"
118 | 1234.ord.words
119 | # => "one thousand, two hundred and thirty-fourth"
120 | 1234.words.to_a
121 | # => ["one thousand","two hundred and thirty-four"]
122 | 
123 | 
124 | # Optional parameters change translation:
125 | 12345.words( :group => 1 )
126 | # "one, two, three, four, five"
127 | 12345.words( :group => 2 )
128 | # "twelve, thirty-four, five"
129 | 12345.words( :group => 3 )
130 | # "one twenty-three, forty-five"
131 | 1234.words( :and => '' )
132 | # "one thousand, two hundred thirty-four"
133 | 1234.words( :and => ', plus' )
134 | # "one thousand, two hundred, plus thirty-four"
135 | 555_1202.words( :group => 1, :zero => 'oh' )
136 | # "five, five, five, one, two, oh, two"
137 | 123.456.words( :group => 1, :decimal => 'mark' )
138 | # "one two three mark four five six"
139 | 
140 | "duck".en.quantify( 0 )
141 | # => "no ducks"
142 | "duck".en.quantify( 1 )
143 | # => "a duck"
144 | "duck".en.quantify( 2 )
145 | # => "a few ducks"
146 | "duck".en.quantify( 5 )
147 | # => "several ducks"
148 | "duck".en.quantify( 50 )
149 | # => "many ducks"
150 | "duck".en.quantify( 504 )
151 | # => "hundreds of ducks"
152 | "duck".en.quantify( 5046 )
153 | # => "thousands of ducks"
154 | "duck".en.quantify( 50_461 )
155 | # => "tens of thousands of ducks"
156 | "duck".en.quantify( 504_614 )
157 | # => "hundreds of thousands of ducks"
158 | "duck".en.quantify( 5_046_140 )
159 | # => "millions of ducks"
160 | 
161 | 
162 | ### "Classical" plurals (eg: "focus"->"foci", "cherub"->"cherubim")
163 | #   Use classical plurals
164 | Linguistics::use( :en, :classical => true )
165 | 
166 | # Interpolate "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
167 | # "NUM()" AND "ORD()" WITHIN STRINGS:
168 | 
169 | "The plural of #{word} is PL(word)\n".inflect
170 | 
171 | 
172 | # print inflect("The plural of $word is PL($word)\n")
173 | # print inflect("I saw $cat_count PL("cat",$cat_count)\n")
174 | # print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)")
175 | # print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)")
176 | # print inflect("I saw NUM($cat_count) PL("cat")\nNUM()")
177 | # print inflect("There PL_V(was,$errors) NO(error,$errors)\n")
178 | # print inflect("There NUM($errors,) PL_V(was) NO(error)\n"
179 | # print inflect("Did you want A($thing) or AN($idea)\n")
180 | # print inflect("It was ORD($position) from the left\n")
181 | 
182 | # # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
183 | 
184 | # def_noun  "VAX"  => "VAXen";	# SINGULAR => PLURAL
185 | 
186 | # def_verb  "will" => "shall",	# 1ST PERSON SINGULAR => PLURAL
187 | # "will" => "will",	# 2ND PERSON SINGULAR => PLURAL
188 | # "will" => "will",	# 3RD PERSON SINGULAR => PLURAL
189 | 
190 | # def_adj   "hir"  => "their",	# SINGULAR => PLURAL
191 | 
192 | # def_a	"h"			# "AY HALWAYS SEZ 'HAITCH'!"
193 | 
194 | # def_an	"horrendous.*"		# "AN HORRENDOUS AFFECTATION"
195 | 
196 | 


--------------------------------------------------------------------------------
/lib/linguistics.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | # coding: utf-8
  3 | 
  4 | require 'loggability'
  5 | 
  6 | # An interface for extending core Ruby classes with natural-language methods.
  7 | module Linguistics
  8 | 	extend Loggability
  9 | 
 10 | 	# Loggability API -- set up a logger for Linguistics objects
 11 | 	log_as :linguistics
 12 | 
 13 | 
 14 | 	# Release version
 15 | 	VERSION = '2.0.2'
 16 | 
 17 | 	# VCS version
 18 | 	REVISION = %q$Revision$
 19 | 
 20 | 	# The list of Classes to add linguistic behaviours to.
 21 | 	DEFAULT_EXT_CLASSES = [ String, Numeric, Array ]
 22 | 
 23 | 
 24 | 	vvec = lambda {|version| version.split('.').collect {|v| v.to_i }.pack('N*') }
 25 | 	abort "This version of Linguistics requires Ruby 1.9.2 or greater." unless
 26 | 		vvec[RUBY_VERSION] >= vvec['1.9.2']
 27 | 
 28 | 
 29 | 	require 'linguistics/monkeypatches'
 30 | 	require 'linguistics/iso639'
 31 | 	require 'linguistics/inflector'
 32 | 
 33 | 	include Linguistics::ISO639
 34 | 
 35 | 
 36 | 	### Language modules and the inflector classes that act as their interfaces
 37 | 	@languages        = {}
 38 | 	@inflector_mixins = {}
 39 | 
 40 | 	class << self
 41 | 		# The Hash of loaded languages keyed by 3-letter bibliographic ISO639-2 code
 42 | 		attr_reader :languages
 43 | 
 44 | 		# The Hash of anonymous inflector modules that act as the mixin interface to
 45 | 		# a language module's inflector, keyed by the language module they belong to
 46 | 		attr_reader :inflector_mixins
 47 | 	end
 48 | 
 49 | 
 50 | 	### Return the library's version string
 51 | 	def self::version_string( include_buildnum=false )
 52 | 		vstring = "%s %s" % [ self.name, VERSION ]
 53 | 		vstring << " (build %s)" % [ REVISION[/: ([[:xdigit:]]+)/, 1] || '0' ] if include_buildnum
 54 | 		return vstring
 55 | 	end
 56 | 
 57 | 
 58 | 	### Register a module as providing linguistic functions for the specified +language+ (a two- 
 59 | 	### or three-letter ISO639-2 language codes as a Symbol)
 60 | 	def self::register_language( language, mod )
 61 | 		language_entry = LANGUAGE_CODES[ language.to_sym ] or
 62 | 			raise "Unknown ISO639-2 language code '#{language}'"
 63 | 		self.log.info "Registering %s for language %p" % [ mod, language_entry ]
 64 | 
 65 | 		language_entry[:codes].each do |lang|
 66 | 			self.languages[ lang.to_sym ] = mod
 67 | 		end
 68 | 
 69 | 		# Load in plugins for the language
 70 | 		Gem.find_files( "linguistics/#{language}/*.rb" ).each do |extension|
 71 | 			next if extension.include?( '/spec/' ) # Skip specs
 72 | 			extension.sub!( %r{.*/linguistics/}, 'linguistics/' )
 73 | 			self.log.debug "  trying to load #{language_entry[:eng_name]} extension %p" % [ extension ]
 74 | 			begin
 75 | 				require extension
 76 | 			rescue LoadError => err
 77 | 				self.log.debug "    failed (%s): %s %s" %
 78 | 					[ err.class.name, err.message, err.backtrace.first ]
 79 | 			else
 80 | 				self.log.debug "    success."
 81 | 			end
 82 | 		end
 83 | 
 84 | 	end
 85 | 
 86 | 
 87 | 	### Try to load the module that implements the given language, returning
 88 | 	### the Module object if successful.
 89 | 	def self::load_language( lang )
 90 | 		unless mod = self.languages[ lang.to_sym ]
 91 | 
 92 | 			self.log.debug "Trying to load language %p" % [ lang ]
 93 | 			language = LANGUAGE_CODES[ lang.to_sym ] or
 94 | 				raise "Unknown ISO639-2 language code '#{lang}'"
 95 | 			self.log.debug "  got language code %p" % [ language ]
 96 | 
 97 | 			# Sort all the codes for the specified language, trying the 2-letter
 98 | 			# versions first in alphabetical order, then the 3-letter ones
 99 | 			msgs = []
100 | 			mod = nil
101 | 
102 | 			language[:codes].sort.each do |code|
103 | 				next if code == ''
104 | 
105 | 				begin
106 | 					require "linguistics/#{code}"
107 | 					self.log.debug "  loaded linguistics/#{code}!"
108 | 					mod = self.languages[ lang.to_sym ]
109 | 					self.log.debug "  set mod to %p" % [ mod ]
110 | 					break
111 | 				rescue LoadError => err
112 | 					self.log.error "  require of linguistics/#{code} failed: #{err.message}"
113 | 					msgs << "Tried 'linguistics/#{code}': #{err.message}\n"
114 | 				end
115 | 			end
116 | 
117 | 			if mod.is_a?( Array )
118 | 				raise LoadError,
119 | 					"Failed to load language extension %s:\n%s" %
120 | 					[ lang, msgs.join ]
121 | 			end
122 | 
123 | 		end
124 | 
125 | 		return mod
126 | 	end
127 | 
128 | 
129 | 	### Add linguistics functions for the specified languages to Ruby's core
130 | 	### classes. The interface to all linguistic functions for a given language
131 | 	### is through a method which is the same the language's international 2- or
132 | 	### 3-letter code (ISO 639). You can also specify a Hash of configuration
133 | 	### options which control which classes are extended:
134 | 	###
135 | 	### [<b>:classes</b>]
136 | 	###   Specify the classes which are to be extended. If this is not specified,
137 | 	###   the Class objects in Linguistics::DEFAULT_EXT_CLASSES (an Array) are
138 | 	###   extended.
139 | 	### [<b>:monkeypatch</b>]
140 | 	###   Monkeypatch directly (albeit responsibly, via a mixin) the specified
141 | 	###   +classes+ instead of adding a single language-code method.
142 | 	def self::use( *languages )
143 | 		config = languages.pop if languages.last.is_a?( Hash )
144 | 		config ||= {}
145 | 
146 | 		classes = Array(config[:classes]) if config[:classes] 
147 | 		classes ||= DEFAULT_EXT_CLASSES
148 | 
149 | 		self.log.debug "Extending %d classes with %d language modules." %
150 | 			[ classes.length, languages.length ]
151 | 
152 | 		# Mix the language module for each requested language into each
153 | 		# specified class
154 | 		classes.each do |klass|
155 | 			self.log.debug "  extending %p" % [ klass ]
156 | 			languages.each do |lang|
157 | 				mod = load_language( lang ) or
158 | 					raise LoadError, "failed to load a language extension for %p" % [ lang ]
159 | 				self.log.debug "    using %s language module: %p" % [ lang, mod ]
160 | 
161 | 				if config[:monkeypatch]
162 | 					klass.send( :include, mod )
163 | 				else
164 | 					inflector = make_inflector_mixin( lang, mod )
165 | 					self.log.debug "    made an inflector mixin: %p" % [ inflector ]
166 | 					klass.send( :include, inflector )
167 | 				end
168 | 			end
169 | 		end
170 | 
171 | 		return classes
172 | 	end
173 | 
174 | 
175 | 	### Create a mixin module/class pair that act as the per-object interface to
176 | 	### the given language +mod+'s inflector.
177 | 	def self::make_inflector_mixin( lang, mod )
178 | 		language = LANGUAGE_CODES[ lang.to_sym ] or
179 | 			raise "Unknown ISO639-2 language code '#{lang}'"
180 | 
181 | 		unless mixin = self.inflector_mixins[ mod ]
182 | 			self.log.debug "Making an inflector mixin for %p" % [ mod ]
183 | 
184 | 			bibcode, alpha2code, termcode = *language[:codes]
185 | 			inflector = Class.new( Linguistics::Inflector ) { include(mod) }
186 | 			self.log.debug "  created inflector class %p for [%p, %p, %p]" %
187 | 				[ inflector, bibcode, termcode, alpha2code ]
188 | 
189 | 			mixin = Module.new do
190 | 				define_method( bibcode ) do
191 | 					inflector.new( bibcode, self )
192 | 				end
193 | 				alias_method termcode, bibcode unless termcode.nil? || termcode.empty?
194 | 				alias_method alpha2code, bibcode unless alpha2code.nil? || alpha2code.empty?
195 | 			end
196 | 			self.inflector_mixins[ mod ] = mixin
197 | 		end
198 | 
199 | 		return mixin
200 | 	end
201 | 
202 | 
203 | end # module Linguistics
204 | 
205 | 


--------------------------------------------------------------------------------
/README.rdoc:
--------------------------------------------------------------------------------
  1 | = Linguistics
  2 | 
  3 | docs :: http://deveiate.org/code/linguistics
  4 | project :: https://bitbucket.org/ged/linguistics
  5 | github :: https://github.com/ged/linguistics
  6 | 
  7 | 
  8 | == Description
  9 | 
 10 | Linguistics is a framework for building linguistic utilities for Ruby
 11 | objects in any language. It includes a generic language-independant
 12 | front end, a module for mapping language codes into language names, and
 13 | a module which contains various English-language utilities.
 14 | 
 15 | 
 16 | == Usage
 17 | 
 18 | The Linguistics module comes with a language-independant mechanism for
 19 | extending core Ruby classes with linguistic methods.
 20 | 
 21 | It consists of three parts: a core linguistics module which contains the
 22 | class-extension framework for languages, a generic inflector class that
 23 | serves as an extension point for linguistic methods on Ruby objects, and
 24 | one or more language-specific modules which contain the actual
 25 | linguistic functions.
 26 | 
 27 | The module works by adding a single instance method for each language
 28 | named after the language's two-letter code (or three-letter code, if no
 29 | two-letter code is defined by ISO639) to various Ruby classes. This
 30 | allows many language-specific methods to be added to objects without
 31 | cluttering up the interface or risking collision between them, albeit at
 32 | the cost of three or four more characters per method invocation. For
 33 | example:
 34 | 
 35 |     Linguistics.use( :en )
 36 |     "goose".en.plural
 37 |     # => "geese"
 38 | 
 39 | If you prefer monkeypatching (around 70) linguistics methods directly onto core
 40 | classes, you can do that by adding a 'monkeypatch' option to ::use:
 41 | 
 42 |     Linguistics.use( :en, monkeypatch: true )
 43 |     "goose".plural
 44 |     # => "geese"
 45 | 
 46 | === Controlling Which Classes Get Extended
 47 | 
 48 | If you should wish to extend classes other than the ones in
 49 | <tt>Linguistics::DEFAULT_EXT_CLASSES</tt>, you have a few options.
 50 | 
 51 | You can modify the DEFAULT_EXT_CLASSES array directly (before you call
 52 | ::use, of course):
 53 | 
 54 |     Linguistics::DEFAULT_EXT_CLASSES << MyClass
 55 | 
 56 | You can also pass an Array of classes to .use:
 57 | 
 58 |     Linguistics.use( :en, classes: [MyClass] )
 59 | 
 60 | Or you can add language methods to classes via mixin:
 61 | 
 62 |     class MyClass
 63 |         include Linguistics::EN
 64 |     end
 65 | 
 66 | All Linguistics methods use Ruby's casting mechanism, so at a minimum,
 67 | your classes should provide an implementation of #to_s that returns
 68 | words or phrases.
 69 | 
 70 | 
 71 | === Adding Language Modules
 72 | 
 73 | To add a new language to the framework, define a module that will act as
 74 | the top-level namespace for all your linguistic functions, and then
 75 | register it as being available, like so:
 76 | 
 77 |     module Linguistics::TLH
 78 |     
 79 |         # Add Klingon to the list of default languages
 80 |         Linguistics.register_language( :tlh, self )
 81 | 
 82 |     end
 83 | 
 84 | The first argument is either the two- or three-letter [ISO 639.2]
 85 | (http://www.loc.gov/standards/iso639-2/php/code_list.php) language code
 86 | for the language you're registering.
 87 | 
 88 | The second is the container module itself.
 89 | 
 90 | After you register your language, each class that Linguistics is told to
 91 | extend will have a method for your language code/s:
 92 | 
 93 |     irb> Linguistics.use( :tlh, :classes => Object )
 94 |     # => [Object]
 95 |     irb> Object.new.tlh
 96 |     # => #<(Klingon; tlhIngan-Hol-language inflector) for <Object:0x402d9674> >
 97 | 
 98 | If you use RSpec 2, you can test out any API requirements of the module
 99 | by requiring  'linguistics/languagebehavior' and adding a shared
100 | behavior to your spec:
101 | 
102 |     require 'rspec'
103 |     require 'linguistics/languagebehavior'
104 |     
105 |     describe Linguistics::TLH do
106 |     
107 |       it_should_behave_like "a Linguistics language module"
108 |     
109 |       # ... any other specs for your module
110 |     
111 |     end
112 | 
113 | If you wish to use the logging subsystem set up by Linguistics, you can
114 | do so one of two ways: by logging to the logger directly:
115 | 
116 |     Linguistics.log.debug "Registering Klingon language extension"
117 | 
118 | or by mixing the `Linguistics::Loggable' module into your class/module,
119 | which will give you a 'log' method that prepends the object class on
120 | each log message so it's easy to filter out the ones you want:
121 | 
122 |     require 'linguistics/mixins'
123 |     class Linguistics::TLH::Generator
124 |         include Linguistics::Loggable
125 | 
126 |         def generate_it
127 |             self.log.debug "starting generation..."
128 |         end
129 |     end
130 | 
131 | 
132 | 
133 | == English Language Module
134 | 
135 | Linguistics comes with an English-language module; see the API
136 | documentation for Linguistics::EN for more information about it.
137 | 
138 | 
139 | == Authors
140 | 
141 | * Michael Granger <ged@FaerieMUD.org>
142 | * Martin Chase <stillflame@FaerieMUD.org>
143 | 
144 | 
145 | == Contributors
146 | 
147 | * Robert Berry (bdigital on github) - English conjugation ported from
148 |   MorphAdorner
149 | 
150 | 
151 | == Requirements
152 | 
153 | * Ruby >= 1.9.3
154 | 
155 | It may work under earlier versions, but I'll only be testing it on 1.9.3
156 | or later.
157 | 
158 | 
159 | == Optional
160 | 
161 | The English-language module for Linguistics has support for a few other
162 | optional natural-language libraries:
163 | 
164 | linkparser[http://deveiate.org/projects/Ruby-LinkParser] ::
165 |   Ruby high-level interface to the CMU Link Grammar library
166 | 
167 | wordnet[http://deveiate.org/projects/Ruby-WordNet] ::
168 |   Adds integration for the Ruby binding for the WordNet®
169 |   lexical refrence system.
170 | 
171 | 
172 | == Contributing
173 | 
174 | You can check out the current development source with Mercurial via its
175 | {project page}[http://deveiate.org/projects/Linguistics]. Or if you prefer
176 | Git, via {its Github mirror}[https://github.com/ged/linguistics].
177 | 
178 | After checking out the source, run:
179 | 
180 |     $ rake newb
181 | 
182 | This task will install any missing dependencies, run the tests/specs, and
183 | generate the API documentation.
184 | 
185 | 
186 | == License
187 | 
188 | Copyright (c) 2003-2012, Michael Granger
189 | All rights reserved.
190 | 
191 | Redistribution and use in source and binary forms, with or without
192 | modification, are permitted provided that the following conditions are met:
193 | 
194 | * Redistributions of source code must retain the above copyright notice,
195 |   this list of conditions and the following disclaimer.
196 | 
197 | * Redistributions in binary form must reproduce the above copyright notice,
198 |   this list of conditions and the following disclaimer in the documentation
199 |   and/or other materials provided with the distribution.
200 | 
201 | * Neither the name of the author/s, nor the names of the project's
202 |   contributors may be used to endorse or promote products derived from this
203 |   software without specific prior written permission.
204 | 
205 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
206 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
207 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
208 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
209 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
210 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
211 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
212 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
213 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
214 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
215 | 
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/conjunctions.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | require 'linguistics/en' unless defined?( Linguistics::EN )
  4 | 
  5 | # Conjunction methods for the English-language Linguistics module.
  6 | module Linguistics::EN::Conjunctions
  7 | 
  8 | 	# Register this module to the list of modules to include
  9 | 	Linguistics::EN.register_extension( self )
 10 | 
 11 | 	# :stopdoc:
 12 | 
 13 | 	# Default configuration arguments for the #conjunction (junction, what's
 14 | 	# your) function.
 15 | 	CONJUNCTION_DEFAULTS = {
 16 | 		:separator		=> ', ',
 17 | 		:altsep			=> '; ',
 18 | 		:penultimate	=> true,
 19 | 		:conjunctive	=> 'and',
 20 | 		:combine		=> true,
 21 | 		:casefold		=> true,
 22 | 		:generalize		=> false,
 23 | 		:quantsort		=> true,
 24 | 	}
 25 | 
 26 | 
 27 | 	# :TODO: Needs refactoring
 28 | 
 29 | 	### Return the specified +obj+ (which must support the <tt>#collect</tt>
 30 | 	### method) as a conjunction. Each item is converted to a String if it is
 31 | 	### not already (using #to_s) unless a block is given, in which case it is
 32 | 	### called once for each object in the array, and the stringified return
 33 | 	### value from the block is used instead. Returning +nil+ causes that
 34 | 	### particular element to be omitted from the resulting conjunction. The
 35 | 	### following options can be used to control the makeup of the returned
 36 | 	### conjunction String:
 37 | 	### 
 38 | 	### [<b>:separator</b>]
 39 | 	###   Specify one or more characters to separate items in the resulting
 40 | 	###   list. Defaults to <tt>', '</tt>.
 41 | 	### [<b>:altsep</b>]
 42 | 	###   An alternate separator to use if any of the resulting conjunction's
 43 | 	###   clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
 44 | 	### [<b>:penultimate</b>]
 45 | 	###   Flag that indicates whether or not to join the last clause onto the
 46 | 	###   rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
 47 | 	###     %w{duck, cow, dog}.en.conjunction
 48 | 	###     # => "a duck, a cow, and a dog"
 49 | 	###     %w{duck cow dog}.en.conjunction( :penultimate => false )
 50 | 	###     "a duck, a cow and a dog"
 51 | 	###   Default to <tt>true</tt>.
 52 | 	### [<b>:conjunctive</b>]
 53 | 	###   Sets the word used as the conjunctive (separating word) of the
 54 | 	###   resulting string. Default to <tt>'and'</tt>.
 55 | 	### [<b>:combine</b>]
 56 | 	###   If set to <tt>true</tt> (the default), items which are indentical (after
 57 | 	###   surrounding spaces are stripped) will be combined in the resulting
 58 | 	###   conjunction. E.g.,
 59 | 	###     %w{goose cow goose dog}.en.conjunction
 60 | 	###     # => "two geese, a cow, and a dog"
 61 | 	###     %w{goose cow goose dog}.en.conjunction( :combine => false )
 62 | 	###     # => "a goose, a cow, a goose, and a dog"
 63 | 	### [<b>:casefold</b>]
 64 | 	###   If set to <tt>true</tt> (the default), then items are compared
 65 | 	###   case-insensitively when combining them. This has no effect if
 66 | 	###   <tt>:combine</tt> is <tt>false</tt>.
 67 | 	### [<b>:generalize</b>]
 68 | 	###   If set to <tt>true</tt>, then quantities of combined items are turned into
 69 | 	###   general descriptions instead of exact amounts.
 70 | 	###     ary = %w{goose pig dog horse goose reindeer goose dog horse}
 71 | 	###     ary.en.conjunction
 72 | 	###     # => "three geese, two dogs, two horses, a pig, and a reindeer"
 73 | 	###     ary.en.conjunction( :generalize => true )
 74 | 	###     # => "several geese, several dogs, several horses, a pig, and a reindeer"
 75 | 	###   See the #quantify method for specifics on how quantities are
 76 | 	###   generalized. Generalization defaults to <tt>false</tt>, and has no effect if
 77 | 	###   :combine is <tt>false</tt>.
 78 | 	### [<b>:quantsort</b>]
 79 | 	###   If set to <tt>true</tt> (the default), items which are combined in the
 80 | 	###   resulting conjunction will be listed in order of amount, with greater
 81 | 	###   quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
 82 | 	###   will appear where the first instance of them occurred in the
 83 | 	###   list. This sort is also the fallback for indentical quantities (ie.,
 84 | 	###   items of the same quantity will be listed in the order they appeared
 85 | 	###   in the source list).
 86 | 	###
 87 | 	def conjunction( args={} )
 88 | 		config = CONJUNCTION_DEFAULTS.merge( args )
 89 | 
 90 | 		# Transform items in the obj to phrases
 91 | 		phrases = if block_given?
 92 | 				self.log.debug "  collecting with a block"
 93 | 				self.collect {|item| yield(item) }.compact
 94 | 			else
 95 | 				self.log.debug "  collecting without a block"
 96 | 				rval = self.collect( &:to_s )
 97 | 				self.log.debug "  collected: %p" % [ rval ]
 98 | 				rval
 99 | 			end
100 | 
101 | 		self.log.debug "  phrases is: %p" % [ phrases ]
102 | 
103 | 		# No need for a conjunction if there's only one thing
104 | 		return phrases[0].en.a if phrases.length < 2
105 | 
106 | 		# Set up a Proc to derive a collector key from a phrase depending on the
107 | 		# configuration
108 | 		keyfunc =
109 | 			if config[:casefold]
110 | 				proc {|key| key.downcase.strip}
111 | 			else
112 | 				proc {|key| key.strip}
113 | 			end
114 | 
115 | 		# Count and delete phrases that hash the same when the keyfunc munges
116 | 		# them into the same thing if we're combining (:combine => true).
117 | 		collector = {}
118 | 		if config[:combine]
119 | 
120 | 			phrases.each_index do |i|
121 | 				# Stop when reaching the end of a truncated list
122 | 				break if phrases[i].nil?
123 | 
124 | 				# Make the key using the configured key function
125 | 				phrase = keyfunc[ phrases[i] ]
126 | 
127 | 				# If the collector already has this key, increment its count,
128 | 				# eliminate the duplicate from the phrase list, and redo the loop.
129 | 				if collector.key?( phrase )
130 | 					collector[ phrase ] += 1
131 | 					phrases.delete_at( i )
132 | 					redo
133 | 				end
134 | 
135 | 				collector[ phrase ] = 1
136 | 			end
137 | 		else
138 | 			# If we're not combining, just make everything have a count of 1.
139 | 			phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
140 | 		end
141 | 
142 | 		# If sort-by-quantity is turned on, sort the phrases first by how many
143 | 		# there are (most-first), and then by the order they were specified in.
144 | 		if config[:quantsort] && config[:combine]
145 | 			origorder = {}
146 | 			phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
147 | 			phrases.sort! {|a,b|
148 | 				(collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
149 | 				(origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
150 | 			}
151 | 		end
152 | 
153 | 		# Set up a filtering function that adds either an indefinite article, an
154 | 		# indefinite quantifier, or a definite quantifier to each phrase
155 | 		# depending on the configuration and the count of phrases in the
156 | 		# collector.
157 | 		filter =
158 | 			if config[:generalize]
159 | 				proc {|phrase, count| phrase.en.quantify(count) }
160 | 			else
161 | 				proc do |phrase, count|
162 | 					if count > 1
163 | 						"%s %s" % [
164 | 							# :TODO: Make this threshold settable
165 | 							count < 10 ? count.en.numwords : count.to_s,
166 | 							phrase.en.plural( count )
167 | 						]
168 | 					else
169 | 						phrase.en.a
170 | 					end
171 | 				end
172 | 			end
173 | 
174 | 		# Now use the configured filter to turn each phrase into its final
175 | 		# form. Hmmm... square-bracket Lisp?
176 | 		phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
177 | 
178 | 		# Prepend the conjunctive to the last element unless it's empty or
179 | 		# there's only one element
180 | 		phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
181 | 			config[:conjunctive].strip.empty? or
182 | 			phrases.length < 2
183 | 
184 | 		# Concatenate the last two elements if there's no penultimate separator,
185 | 		# and pick a separator based on how many phrases there are and whether
186 | 		# or not there's already an instance of it in the phrases.
187 | 		phrase_count = phrases.length
188 | 		phrases[-2] << " " << phrases.pop unless config[:penultimate]
189 | 		sep = config[:separator]
190 | 		if phrase_count <= 2
191 | 			sep = ' '
192 | 		elsif phrases.find {|str| str.include?(config[:separator]) }
193 | 			sep = config[:altsep]
194 | 		end
195 | 
196 | 		return phrases.join( sep )
197 | 	end
198 | 	Linguistics::EN.register_lprintf_formatter :CONJUNCT, :conjunction
199 | 
200 | 
201 | end # module Linguistics::EN::Conjunctions
202 | 
203 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/wordnet.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | require 'linguistics/en' unless defined?( Linguistics::EN )
  4 | 
  5 | # WordNet support for the English-language Linguistics module. It
  6 | # requires the Ruby-WordNet module to be installed; if it is not
  7 | # installed, calling the functions defined by this file will raise
  8 | # NotImplementedErrors.
  9 | #
 10 | #   # Test to be sure the WordNet module loaded okay.
 11 | #   Linguistics::EN.has_wordnet?
 12 | #   # => true
 13 | #
 14 | #   # Fetch the default synset for the word "balance"
 15 | #   "balance".en.synset
 16 | #   # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
 17 | #    (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
 18 | #
 19 | #   # Fetch the synset for the first verb sense of "balance"
 20 | #   "balance".en.synset( :verb )
 21 | #   # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
 22 | #   (verb): "bring into balance or equilibrium; "She has to balance work and her
 23 | #   domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
 24 | #   verbGroups: 2, hypernyms: 1, hyponyms: 5)>
 25 | #
 26 | #   # Fetch the second noun sense
 27 | #   "balance".en.synset( 2, :noun )
 28 | #   # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
 29 | #   on pull of gravity" (hypernyms: 1, hyponyms: 5)>
 30 | #
 31 | #   # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
 32 | #   "balance".en.synset( 2, :noun ).hypernyms
 33 | #   # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
 34 | #   instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
 35 | #   hyponyms: 2)>]
 36 | #
 37 | #   # A simpler way of doing the same thing:
 38 | #   "balance".en.hypernyms( 2, :noun )
 39 | #   # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
 40 | #   instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
 41 | #   hyponyms: 2)>]
 42 | #
 43 | #   # Fetch the first hypernym's hypernyms
 44 | #   "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
 45 | #   # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
 46 | #   measuring device (noun): "instrument that shows the extent or amount or quantity
 47 | #   or degree of something" (hypernyms: 1, hyponyms: 83)>]
 48 | #
 49 | #   # Find the synset to which both the second noun sense of "balance" and the
 50 | #   # default sense of "shovel" belong.
 51 | #   ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
 52 | #   # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
 53 | #   artifact (or system of artifacts) that is instrumental in accomplishing some
 54 | #   end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
 55 | #
 56 | #   # Fetch just the words for the other kinds of "instruments"
 57 | #   "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
 58 | #   # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
 59 | #   "extractor", "instrument of execution", "instrument of punishment", "measuring
 60 | #   instrument", "measuring system", "measuring device", "medical instrument",
 61 | #   "navigational instrument", "optical instrument", "plotter", "scientific
 62 | #   instrument", "sonograph", "surveying instrument", "surveyor's instrument",
 63 | #   "tracer", "weapon", "arm", "weapon system", "whip"]
 64 | # 
 65 | module Linguistics::EN::WordNet
 66 | 
 67 | 	@has_wordnet  = false
 68 | 	@wn_error     = nil
 69 | 	@lexicon      = nil
 70 | 
 71 | 	# Load WordNet if possible, saving the error that occurs if anything goes wrong.
 72 | 	begin
 73 | 		require 'wordnet'
 74 | 		@has_wordnet = true
 75 | 	rescue LoadError => err
 76 | 		@wn_error = err
 77 | 	end
 78 | 
 79 | 
 80 | 	# Container for methods intended to extend the EN module as singleton methods.
 81 | 	module SingletonMethods
 82 | 
 83 | 		### Returns +true+ if WordNet was loaded okay
 84 | 		def has_wordnet? ; @has_wordnet; end
 85 | 
 86 | 		### If #has_wordnet? returns +false+, this can be called to fetch the
 87 | 		### exception which was raised when WordNet was loaded.
 88 | 		def wordnet_error ; @wn_error; end
 89 | 
 90 | 	end # module SingletonMethods
 91 | 	extend SingletonMethods
 92 | 
 93 | 
 94 | 	# Register this module to the list of modules to include
 95 | 	Linguistics::EN.register_extension( self )
 96 | 
 97 | 
 98 | 	#################################################################
 99 | 	###	M O D U L E   M E T H O D S
100 | 	#################################################################
101 | 
102 | 	### The instance of the WordNet::Lexicon used for all Linguistics WordNet
103 | 	### functions.
104 | 	def self::lexicon
105 | 		raise self.wordnet_error unless self.has_wordnet?
106 | 		@lexicon ||= WordNet::Lexicon::new
107 | 	end
108 | 
109 | 
110 | 	### Set the WordNet::Lexicon used by the linguistic functions.
111 | 	def self::lexicon=( newlex )
112 | 		@lexicon = newlex
113 | 	end
114 | 
115 | 
116 | 	### Make a function that calls the method +meth+ on the synset of an input
117 | 	### word.
118 | 	def self::def_synset_function( name )
119 | 		define_method( name ) do |*criteria|
120 | 			syn = self.synset( *criteria ) or return nil
121 | 			return syn.send( name )
122 | 		end
123 | 	end
124 | 
125 | 
126 | 
127 | 	#################################################################
128 | 	###	W O R D N E T   I N T E R F A C E
129 | 	#################################################################
130 | 
131 | 	######
132 | 	public
133 | 	######
134 | 
135 | 	### Look up the synset associated with the given word or collocation in the
136 | 	### WordNet lexicon and return a WordNet::Synset object.
137 | 	def synset( *args )
138 | 		return Linguistics::EN::WordNet.lexicon[ self.to_s, *args ]
139 | 	end
140 | 
141 | 
142 | 	### Look up all the synsets associated with the given word or collocation in
143 | 	### the WordNet lexicon and return an Array of WordNet::Synset objects. If
144 | 	### +pos+ is +nil+, return synsets for all parts of speech.
145 | 	def synsets( *args )
146 | 		return Linguistics::EN::WordNet.lexicon.lookup_synsets( self.to_s, *args )
147 | 	end
148 | 
149 | 
150 | 	# Returns definitions and/or example sentences as a String.
151 | 	def_synset_function :definition
152 | 
153 | 	# Return nouns or verbs that have the same hypernym as the receiver.
154 | 	def_synset_function :coordinates
155 | 
156 | 	# Returns the Array of synonyms contained in the synset for the receiver.
157 | 	def_synset_function :words
158 | 	def_synset_function :synonyms
159 | 
160 | 	# Returns the name of the lexicographer file that contains the raw data for
161 | 	# the receiver.
162 | 	def_synset_function :lex_info
163 | 
164 | 	# :TODO: Finish these comments, and figure out how the hell to get the
165 | 	# methods to show up in RDoc.
166 | 	def_synset_function :frames
167 | 
168 | 
169 | 	# Returns the synsets for the receiver's antonyms, if any. Ex:
170 | 	# 'opaque'.en.synset.antonyms
171 | 	#   ==> [#<WordNet::Synset:0x010ca614/454927 clear (adjective): "free
172 | 	#        from cloudiness; allowing light to pass through; "clear water";
173 | 	#        "clear plastic bags"; "clear glass"; "the air is clear and clean""
174 | 	#        (similarTos: 6, attributes: 1, derivations: 2, antonyms: 1,
175 | 	#        seeAlsos: 1)>]
176 | 	def_synset_function :antonyms
177 | 
178 | 	def_synset_function :hypernyms
179 |     def_synset_function :instance_hypernyms
180 | 	def_synset_function :entailment
181 | 	def_synset_function :hyponyms
182 |     def_synset_function :instance_hyponyms
183 | 	def_synset_function :causes
184 | 	def_synset_function :verbgroups
185 | 	def_synset_function :similar_to
186 | 	def_synset_function :participles
187 | 	def_synset_function :pertainyms
188 | 	def_synset_function :attributes
189 | 	def_synset_function :derived_from
190 | 	def_synset_function :see_also
191 | 	def_synset_function :functions
192 | 
193 | 	def_synset_function :meronyms
194 | 	def_synset_function :member_meronyms
195 | 	def_synset_function :stuff_meronyms
196 | 	def_synset_function :portion_meronyms
197 | 	def_synset_function :component_meronyms
198 | 	def_synset_function :feature_meronyms
199 | 	def_synset_function :phase_meronyms
200 | 	def_synset_function :place_meronyms
201 | 
202 | 	def_synset_function :holonyms
203 | 	def_synset_function :member_holonyms
204 | 	def_synset_function :stuff_holonyms
205 | 	def_synset_function :portion_holonyms
206 | 	def_synset_function :component_holonyms
207 | 	def_synset_function :feature_holonyms
208 | 	def_synset_function :phase_holonyms
209 | 	def_synset_function :place_holonyms
210 | 
211 | 	def_synset_function :domains
212 | 	def_synset_function :category_domains
213 | 	def_synset_function :region_domains
214 | 	def_synset_function :usage_domains
215 | 
216 | 	def_synset_function :members
217 | 	def_synset_function :category_members
218 | 	def_synset_function :region_members
219 | 	def_synset_function :usage_members
220 | 
221 | 
222 | end # module Linguistics::EN
223 | 
224 | 


--------------------------------------------------------------------------------
/examples/endocs.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env ruby
  2 | 
  3 | $LOAD_PATH.unshift( 'lib' )
  4 | $stdout.sync = $stderr.sync = true
  5 | 
  6 | require 'loggability'
  7 | require 'linguistics'
  8 | require 'pry'
  9 | 
 10 | lines = File.readlines( __FILE__ ).slice_before( /^__END__/ ).to_a
 11 | header = lines.shift
 12 | source = lines.shift
 13 | source.shift
 14 | 
 15 | header_lines = header.length + 1
 16 | code = ''
 17 | result = nil
 18 | 
 19 | Loggability.level = $VERBOSE ? :debug : :warn
 20 | Loggability.format_with( :color )
 21 | 
 22 | source.each_with_index do |line, i|
 23 | 	case line
 24 | 
 25 | 	# Eval any accumulated code on a blank line
 26 | 	when /^\s*$/
 27 | 		puts
 28 | 		next if code.empty?
 29 | 		puts( code )
 30 | 		eval( code, binding(), __FILE__, header_lines + i )
 31 | 		code = ''
 32 | 
 33 | 	# Eval the code on a result marker, but also render the result after the marker
 34 | 	when /^#\s+=>/
 35 | 		puts( code )
 36 | 		$stdout.flush
 37 | 		result = eval( code, binding(), __FILE__, header_lines + i )
 38 | 		print '# => '
 39 | 		pp( result )
 40 | 		code = ''
 41 | 
 42 | 	# Output comment lines as-is
 43 | 	when /^#/
 44 | 		puts( line )
 45 | 
 46 | 	# Anything else gets appended to the code accumulator
 47 | 	else
 48 | 		code << line
 49 | 	end
 50 | end
 51 | 
 52 | __END__
 53 | 
 54 | # This module is a container for various English-language linguistic
 55 | # functions for the Linguistics library. It can be either loaded
 56 | # directly, or by passing some variant of +:en+ or +:eng+ to the
 57 | # Linguistics.use method.
 58 | 
 59 | require 'linguistics'
 60 | Linguistics.use( :en ) # extends Array, String, and Numeric
 61 | 
 62 | # == Pluralization
 63 | 
 64 | "box".en.plural
 65 | # => "boxes"
 66 | 
 67 | "mouse".en.plural
 68 | # => "mice"
 69 | 
 70 | "ruby".en.plural
 71 | # => "rubies"
 72 | 
 73 | 
 74 | # == Indefinite Articles
 75 | 
 76 | "book".en.a
 77 | # => "a book"
 78 | 
 79 | "article".en.a
 80 | # => "an article"
 81 | 
 82 | 
 83 | # == Present Participles
 84 | 
 85 | "runs".en.present_participle
 86 | # => "running"
 87 | 
 88 | "eats".en.present_participle
 89 | # => "eating"
 90 | 
 91 | "spies".en.present_participle
 92 | # => "spying"
 93 | 
 94 | 
 95 | # == Ordinal Numbers
 96 | 
 97 | 5.en.ordinal
 98 | # => "5th"
 99 | 
100 | 2004.en.ordinal
101 | # => "2004th"
102 | 
103 | 
104 | # == Numbers to Words
105 | 
106 | 5.en.numwords
107 | # => "five"
108 | 
109 | 2004.en.numwords
110 | # => "two thousand and four"
111 | 
112 | 2385762345876.en.numwords
113 | # => "two trillion, three hundred and eighty-five billion, seven hundred and sixty-two million, three hundred and forty-five thousand, eight hundred and seventy-six"
114 | 
115 | 
116 | # == Quantification
117 | 
118 | "cow".en.quantify( 5 )
119 | # => "several cows"
120 | 
121 | "cow".en.quantify( 1005 )
122 | # => "thousands of cows"
123 | 
124 | "cow".en.quantify( 20_432_123_000_000 )
125 | # => "tens of trillions of cows"
126 | 
127 | 
128 | # == Conjunctions
129 | 
130 | animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat dog cat cat dog cow goat goose goose ox alpaca}
131 | "The farm has: " + animals.en.conjunction
132 | # => The farm has: four dogs, three cows, three geese, three goats, two oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca
133 | 
134 | # Note that 'goose' and 'ox' are both correctly pluralized, and the correct
135 | # indefinite article 'an' has been used for 'alpaca'.
136 | #
137 | # You can also use the generalization function of the #quantify method to give
138 | # general descriptions of object lists instead of literal counts:
139 | 
140 | allobjs = []
141 | ObjectSpace::each_object {|obj| allobjs << obj.class.name }
142 | puts "The current Ruby objectspace contains: " + allobjs.en.conjunction( :generalize => true )
143 | # =>
144 | 
145 | 
146 | # == Infinitives
147 | 
148 | "leaving".en.infinitive
149 | # => "leave"
150 | 
151 | "left".en.infinitive
152 | # => "leave"
153 | 
154 | "leaving".en.infinitive.suffix
155 | # => "ing"
156 | 
157 | 
158 | # == Conjugation
159 | 
160 | #Conjugate a verb given an infinitive:
161 | 
162 | "run".en.past_tense
163 | # => "ran"
164 | 
165 | "run".en.past_participle
166 | # => "run"
167 | 
168 | "run".en.present_tense
169 | # => "run"
170 | 
171 | "run".en.present_participle
172 | # => "running"
173 | 
174 | # Conjugate an infinitive with an explicit tense and grammatical person:
175 | 
176 | "be".en.conjugate( :present, :third_person_singular )
177 | # => "is"
178 | 
179 | "be".en.conjugate( :present, :first_person_singular )
180 | # => "am"
181 | 
182 | "be".en.conjugate( :past, :first_person_singular )
183 | # => "was"
184 | 
185 | # The functionality is a port of the verb conjugation portion of Morph
186 | # Adorner (http://morphadorner.northwestern.edu/).
187 | #
188 | # It includes a good number of irregular verbs, but it's not going to be
189 | # 100% correct everytime.
190 | 
191 | 
192 | # == WordNet® Integration
193 | 
194 | # If you have the 'wordnet' gem installed, you can look up WordNet synsets using
195 | # the Linguistics interface:
196 | 
197 | # Test to be sure the WordNet module loaded okay.
198 | Linguistics::EN.has_wordnet?
199 | # => true
200 | 
201 | # Fetch the default synset for the word "balance"
202 | "balance".en.synset
203 | # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium" (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
204 | 
205 | # Fetch the synset for the first verb sense of "balance"
206 | "balance".en.synset( :verb )
207 | # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise (verb): "bring into balance or equilibrium; "She has to balance work and her domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1, verbGroups: 2, hypernyms: 1, hyponyms: 5)>
208 | 
209 | # Fetch the second noun sense
210 | "balance".en.synset( 2, :noun )
211 | # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends on pull of gravity" (hypernyms: 1, hyponyms: 5)>
212 | 
213 | # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
214 | "balance".en.synset( 2, :noun ).hypernyms
215 | # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1, hyponyms: 2)>]
216 | 
217 | # A simpler way of doing the same thing:
218 | "balance".en.hypernyms( 2, :noun )
219 | # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1, hyponyms: 2)>]
220 | 
221 | # Fetch the first hypernym's hypernyms
222 | "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
223 | # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system, measuring device (noun): "instrument that shows the extent or amount or quantity or degree of something" (hypernyms: 1, hyponyms: 83)>]
224 | 
225 | # Find the synset to which both the second noun sense of "balance" and the
226 | # default sense of "shovel" belong.
227 | ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
228 | # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an artifact (or system of artifacts) that is instrumental in accomplishing some end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
229 | 
230 | # Fetch words for the specific kinds of (device-ish) "instruments"
231 | "instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ')
232 | # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", "extractor", "instrument of execution", "instrument of punishment", "measuring instrument", "measuring system", "measuring device", "medical instrument", "navigational instrument", "optical instrument", "plotter", "scientific instrument", "sonograph", "surveying instrument", "surveyor's instrument", "tracer", "weapon", "arm", "weapon system", "whip"]
233 | 
234 | # ...or musical instruments
235 | "instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ')
236 | # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", "extractor", "instrument of execution", "instrument of punishment", "measuring instrument", "measuring system", "measuring device", "medical instrument", "navigational instrument", "optical instrument", "plotter", "scientific instrument", "sonograph", "surveying instrument", "surveyor's instrument", "tracer", "weapon", "arm", "weapon system", "whip"]
237 | 
238 | # There are many more WordNet methods supported--too many to list here. See the
239 | # documentation for the complete list.
240 | 
241 | 
242 | # == LinkParser Integration
243 | 
244 | # If you have the 'linkparser' gem installed, you can create linkages
245 | # from English sentences that let you query for parts of speech:
246 | 
247 | # Test to see whether or not the link parser is loaded.
248 | Linguistics::EN.has_linkparser?
249 | # => true
250 | 
251 | # Diagram the first linkage for a test sentence
252 | puts "he is a big dog".en.sentence.linkages.first.diagram
253 | 
254 | # Find the verb in the sentence
255 | "he is a big dog".en.sentence.verb
256 | # => "is"
257 | 
258 | # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
259 | # given sentence.
260 | "he is a big dog".en.sentence.verb.en.infinitive
261 | # => "be"
262 | 
263 | # Find the direct object of the sentence
264 | "he is a big dog".en.sentence.object
265 | # => "dog"
266 | 
267 | # Combine WordNet + LinkParser to find the definition of the direct object of
268 | # the sentence
269 | "he is a big dog".en.sentence.object.en.definition
270 | # =>
271 | 
272 | 
273 | 


--------------------------------------------------------------------------------
/experiments/gen_numwords_specs.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env ruby
  2 | 
  3 | NumberTests = [
  4 | 	["0", "zero", "zero", "zero", "zero", "zeroth", ],
  5 | 	["1", "one", "one", "one", "one", "first", ],
  6 | 	["2", "two", "two", "two", "two", "second", ],
  7 | 	["3", "three", "three", "three", "three", "third", ],
  8 | 	["4", "four", "four", "four", "four", "fourth", ],
  9 | 	["5", "five", "five", "five", "five", "fifth", ],
 10 | 	["6", "six", "six", "six", "six", "sixth", ],
 11 | 	["7", "seven", "seven", "seven", "seven", "seventh", ],
 12 | 	["8", "eight", "eight", "eight", "eight", "eighth", ],
 13 | 	["9", "nine", "nine", "nine", "nine", "ninth", ],
 14 | 	["10", "ten", "one, zero", "ten", "ten", "tenth", ],
 15 | 	["11", "eleven", "one, one", "eleven", "eleven", "eleventh", ],
 16 | 	["12", "twelve", "one, two", "twelve", "twelve", "twelfth", ],
 17 | 	["13", "thirteen", "one, three", "thirteen", "thirteen", "thirteenth", ],
 18 | 	["14", "fourteen", "one, four", "fourteen", "fourteen", "fourteenth", ],
 19 | 	["15", "fifteen", "one, five", "fifteen", "fifteen", "fifteenth", ],
 20 | 	["16", "sixteen", "one, six", "sixteen", "sixteen", "sixteenth", ],
 21 | 	["17", "seventeen", "one, seven", "seventeen", "seventeen", "seventeenth", ],
 22 | 	["18", "eighteen", "one, eight", "eighteen", "eighteen", "eighteenth", ],
 23 | 	["19", "nineteen", "one, nine", "nineteen", "nineteen", "nineteenth", ],
 24 | 	["20", "twenty", "two, zero", "twenty", "twenty", "twentieth", ],
 25 | 	["21", "twenty-one", "two, one", "twenty-one", "twenty-one", "twenty-first", ],
 26 | 	["29", "twenty-nine", "two, nine", "twenty-nine", "twenty-nine", "twenty-ninth", ],
 27 | 	["99", "ninety-nine", "nine, nine", "ninety-nine", "ninety-nine", "ninety-ninth", ],
 28 | 
 29 | 	["100", "one hundred", "one, zero, zero", "ten, zero", "one zero zero",
 30 | 		"one hundredth", ],
 31 | 	["101", "one hundred and one", "one, zero, one", "ten, one", "one zero one",
 32 | 		"one hundred and first", ],
 33 | 	["110", "one hundred and ten", "one, one, zero", "eleven, zero", "one ten",
 34 | 		"one hundred and tenth", ],
 35 | 	["111", "one hundred and eleven", "one, one, one", "eleven, one", "one eleven",
 36 | 		"one hundred and eleventh", ],
 37 | 	["900", "nine hundred", "nine, zero, zero", "ninety, zero", "nine zero zero",
 38 | 		"nine hundredth", ],
 39 | 	["999", "nine hundred and ninety-nine", "nine, nine, nine", "ninety-nine, nine",
 40 | 		"nine ninety-nine", "nine hundred and ninety-ninth", ],
 41 | 
 42 | 	["1000", "one thousand", "one, zero, zero, zero", "ten, zero zero",
 43 | 		"one zero zero, zero", "one thousandth", ],
 44 | 	["1001", "one thousand and one", "one, zero, zero, one", "ten, zero one",
 45 | 		"one zero zero, one", "one thousand and first", ],
 46 | 	["1010", "one thousand and ten", "one, zero, one, zero", "ten, ten",
 47 | 		"one zero one, zero", "one thousand and tenth", ],
 48 | 	["1100", "one thousand, one hundred", "one, one, zero, zero",
 49 | 		"eleven, zero zero", "one ten, zero", "one thousand, one hundredth", ],
 50 | 	["2000", "two thousand", "two, zero, zero, zero", "twenty, zero zero",
 51 | 		"two zero zero, zero", "two thousandth", ],
 52 | 	["10000", "ten thousand", "one, zero, zero, zero, zero", "ten, zero zero, zero",
 53 | 		"one zero zero, zero zero", "ten thousandth", ],
 54 | 
 55 | 	["100000", "one hundred thousand", "one, zero, zero, zero, zero, zero",
 56 | 		"ten, zero zero, zero zero", "one zero zero, zero zero zero",
 57 | 		"one hundred thousandth", ],
 58 | 	["100001", "one hundred thousand and one", "one, zero, zero, zero, zero, one",
 59 | 		"ten, zero zero, zero one", "one zero zero, zero zero one",
 60 | 		"one hundred thousand and first", ],
 61 | 	["123456", "one hundred and twenty-three thousand, four hundred and fifty-six",
 62 | 		"one, two, three, four, five, six", "twelve, thirty-four, fifty-six",
 63 | 		"one twenty-three, four fifty-six",
 64 | 		"one hundred and twenty-three thousand, four hundred and fifty-sixth", ],
 65 | 	["0123456", "one hundred and twenty-three thousand, four hundred and fifty-six",
 66 | 		"zero, one, two, three, four, five, six",
 67 | 		"zero one, twenty-three, forty-five, six",
 68 | 		"zero twelve, three forty-five, six",
 69 | 		"one hundred and twenty-three thousand, four hundred and fifty-sixth", ],
 70 | 
 71 | 	["1234567",
 72 | 		"one million, two hundred and thirty-four thousand, five hundred and sixty-seven",
 73 | 		"one, two, three, four, five, six, seven", "twelve, thirty-four, fifty-six, seven",
 74 | 		"one twenty-three, four fifty-six, seven",
 75 | 		"one million, two hundred and thirty-four thousand, five hundred and sixty-seventh", ],
 76 | 	["12345678",
 77 | 		"twelve million, three hundred and forty-five thousand, six hundred and seventy-eight",
 78 | 		"one, two, three, four, five, six, seven, eight",
 79 | 		"twelve, thirty-four, fifty-six, seventy-eight",
 80 | 		"one twenty-three, four fifty-six, seventy-eight",
 81 | 		"twelve million, three hundred and forty-five thousand, six hundred and seventy-eighth", ],
 82 | 	["12_345_678",
 83 | 		"twelve million, three hundred and forty-five thousand, six hundred and seventy-eight",
 84 | 		"one, two, three, four, five, six, seven, eight",
 85 | 		"twelve, thirty-four, fifty-six, seventy-eight",
 86 | 		"one twenty-three, four fifty-six, seventy-eight", ],
 87 | 	["1234,5678",
 88 | 		"twelve million, three hundred and forty-five thousand, six hundred and seventy-eight",
 89 | 		"one, two, three, four, five, six, seven, eight",
 90 | 		"twelve, thirty-four, fifty-six, seventy-eight",
 91 | 		"one twenty-three, four fifty-six, seventy-eight", ],
 92 | 	["1234567890",
 93 | 		"one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninety",
 94 | 		"one, two, three, four, five, six, seven, eight, nine, zero",
 95 | 		"twelve, thirty-four, fifty-six, seventy-eight, ninety",
 96 | 		"one twenty-three, four fifty-six, seven eighty-nine, zero",
 97 | 		"one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninetieth", ],
 98 | 	["123456789012345",
 99 | 		"one hundred and twenty-three trillion, four hundred and fifty-six billion, seven hundred and eighty-nine million, twelve thousand, three hundred and forty-five",
100 | 		"one, two, three, four, five, six, seven, eight, nine, zero, one, two, three, four, five",
101 | 		"twelve, thirty-four, fifty-six, seventy-eight, ninety, twelve, thirty-four, five",
102 | 		"one twenty-three, four fifty-six, seven eighty-nine, zero twelve, three forty-five",
103 | 		"one hundred and twenty-three trillion, four hundred and fifty-six billion, seven hundred and eighty-nine million, twelve thousand, three hundred and forty-fifth", ],
104 | 	["12345678901234567890",
105 | 		"twelve quintillion, three hundred and forty-five quadrillion, six hundred and seventy-eight trillion, nine hundred and one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninety",
106 | 		"one, two, three, four, five, six, seven, eight, nine, zero, one, two, three, four, five, six, seven, eight, nine, zero",
107 | 		"twelve, thirty-four, fifty-six, seventy-eight, ninety, twelve, thirty-four, fifty-six, seventy-eight, ninety",
108 | 		"one twenty-three, four fifty-six, seven eighty-nine, zero twelve, three forty-five, six seventy-eight, ninety",
109 | 		"twelve quintillion, three hundred and forty-five quadrillion, six hundred and seventy-eight trillion, nine hundred and one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninetieth", ],
110 | 
111 | 	["0.987654", "zero point nine eight seven six five four",
112 | 		"zero, point, nine, eight, seven, six, five, four",
113 | 		"zero, point, ninety-eight, seventy-six, fifty-four",
114 | 		"zero, point, nine eighty-seven, six fifty-four",
115 | 		"zero point nine eight seven six five fourth", ],
116 | 	[".987654", "point nine eight seven six five four",
117 | 		"point, nine, eight, seven, six, five, four",
118 | 		"point, ninety-eight, seventy-six, fifty-four",
119 | 		"point, nine eighty-seven, six fifty-four",
120 | 		"point nine eight seven six five fourth", ],
121 | 	["9.87654", "nine point eight seven six five four",
122 | 		"nine, point, eight, seven, six, five, four",
123 | 		"nine, point, eighty-seven, sixty-five, four",
124 | 		"nine, point, eight seventy-six, fifty-four",
125 | 		"nine point eight seven six five fourth", ],
126 | 	["98.7654", "ninety-eight point seven six five four",
127 | 		"nine, eight, point, seven, six, five, four",
128 | 		"ninety-eight, point, seventy-six, fifty-four",
129 | 		"ninety-eight, point, seven sixty-five, four",
130 | 		"ninety-eight point seven six five fourth", ],
131 | 	["987.654", "nine hundred and eighty-seven point six five four",
132 | 		"nine, eight, seven, point, six, five, four",
133 | 		"ninety-eight, seven, point, sixty-five, four",
134 | 		"nine eighty-seven, point, six fifty-four",
135 | 		"nine hundred and eighty-seven point six five fourth", ],
136 | 	["9876.54", "nine thousand, eight hundred and seventy-six point five four",
137 | 		"nine, eight, seven, six, point, five, four",
138 | 		"ninety-eight, seventy-six, point, fifty-four",
139 | 		"nine eighty-seven, six, point, fifty-four",
140 | 		"nine thousand, eight hundred and seventy-six point five fourth", ],
141 | 	["98765.4", "ninety-eight thousand, seven hundred and sixty-five point four",
142 | 		"nine, eight, seven, six, five, point, four",
143 | 		"ninety-eight, seventy-six, five, point, four",
144 | 		"nine eighty-seven, sixty-five, point, four",
145 | 		"ninety-eight thousand, seven hundred and sixty-five point fourth", ],
146 | 	["101.202.303", "one hundred and one point two zero two three zero three",
147 | 		"one, zero, one, point, two, zero, two, point, three, zero, three",
148 | 		"ten, one, point, twenty, two, point, thirty, three",
149 | 		"one zero one, point, two zero two, point, three zero three",
150 | 	]
151 | ]
152 | 
153 | NumberTests.each do
154 | 	|origin, regular, group1, group2, group3, numord, ordnum|
155 | 
156 | 	puts %{
157 | 		it "can transform #{origin} into english words" do
158 | 			#{origin.dump}.en.numwords == #{regular.dump}
159 | 		end
160 | 
161 | 		it "can transform #{origin} into english words in single-digit groups" do
162 | 			#{origin.dump}.en.numwords( :group => 1 ) == #{group1.dump}
163 | 		end
164 | 
165 | 		it "can transform #{origin} into english words in double-digit groups" do
166 | 			#{origin.dump}.en.numwords( :group => 2 ) == #{group2.dump}
167 | 		end
168 | 
169 | 		it "can transform #{origin} into english words in triple-digit groups" do
170 | 			#{origin.dump}.en.numwords( :group => 3 ) == #{group3.dump}
171 | 		end
172 | 	}
173 | 
174 | 	puts %{
175 | 		it "can transform the english words for #{origin} into an ordinal" do
176 | 			#{origin.dump}.en.numwords.en.ordinal.should == #{numord.dump}
177 | 		end
178 | 	} if numord
179 | 
180 | end
181 | 


--------------------------------------------------------------------------------
/lib/linguistics/en.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | require 'rubygems' # For Gem.find_files
  4 | require 'pathname'
  5 | 
  6 | require 'linguistics' unless defined?( Linguistics )
  7 | 
  8 | 
  9 | # This module is a container for various English-language linguistic
 10 | # functions for the Linguistics library. It can be either loaded
 11 | # directly, or by passing some variant of +:en+ or +:eng+ to the
 12 | # Linguistics.use method.
 13 | #
 14 | # == Pluralization
 15 | #
 16 | #  "box".en.plural
 17 | #  # => "boxes"
 18 | #
 19 | #  "mouse".en.plural
 20 | #  # => "mice"
 21 | #
 22 | #  "ruby".en.plural
 23 | #  # => "rubies"
 24 | #
 25 | #
 26 | # == Indefinite Articles
 27 | #
 28 | #  "book".en.a
 29 | #  # => "a book"
 30 | #
 31 | #  "article".en.a
 32 | #  # => "an article"
 33 | #
 34 | #
 35 | # == Present Participles
 36 | #
 37 | #  "runs".en.present_participle
 38 | #  # => "running"
 39 | #
 40 | #  "eats".en.present_participle
 41 | #  # => "eating"
 42 | #
 43 | #  "spies".en.present_participle
 44 | #  # => "spying"
 45 | #
 46 | #
 47 | # == Ordinal Numbers
 48 | #
 49 | #  5.en.ordinal
 50 | #  # => "5th"
 51 | #
 52 | #  2004.en.ordinal
 53 | #  # => "2004th"
 54 | #
 55 | #
 56 | # == Numbers to Words
 57 | #
 58 | #  5.en.numwords
 59 | #  # => "five"
 60 | #
 61 | #  2004.en.numwords
 62 | #  # => "two thousand and four"
 63 | #
 64 | #  2385762345876.en.numwords
 65 | #  # => "two trillion, three hundred and eighty-five billion, seven hundred and
 66 | #  #     sixty-two million, three hundred and forty-five thousand, eight hundred
 67 | #  #     and seventy-six"
 68 | #
 69 | #
 70 | # == Quantification
 71 | #
 72 | #  "cow".en.quantify( 5 )
 73 | #  # => "several cows"
 74 | #
 75 | #  "cow".en.quantify( 1005 )
 76 | #  # => "thousands of cows"
 77 | #
 78 | #  "cow".en.quantify( 20_432_123_000_000 )
 79 | #  # => "tens of trillions of cows"
 80 | #
 81 | #
 82 | # == Conjunctions
 83 | #
 84 | #  animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat
 85 | #               dog cat cat dog cow goat goose goose ox alpaca}
 86 | #  "The farm has: " + animals.en.conjunction
 87 | #  # => "The farm has: four dogs, three cows, three geese, three goats, two
 88 | #  #     oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca"
 89 | #
 90 | # Note that 'goose' and 'ox' are both correctly pluralized, and the correct
 91 | # indefinite article 'an' has been used for 'alpaca'.
 92 | #
 93 | # You can also use the generalization function of the #quantify method to give
 94 | # general descriptions of object lists instead of literal counts:
 95 | #
 96 | #  allobjs = []
 97 | #  ObjectSpace::each_object {|obj| allobjs << obj.class.name }
 98 | #  puts "The current Ruby objectspace contains: " +
 99 | #       allobjs.en.conjunction( :generalize => true )
100 | #
101 | # Outputs:
102 | #
103 | #  The current Ruby objectspace contains: hundreds of thousands of Strings,
104 | #  thousands of RubyVM::InstructionSequences, thousands of Arrays, thousands
105 | #  of Hashes, hundreds of Procs, hundreds of Regexps, [...], a
106 | #  SystemStackError, a Random, an ARGF.class, a Data, a fatal, an
107 | #  OptionParser::List, a YAML::EngineManager, a URI::Parser, a Rational, and
108 | #  a Gem::Platform
109 | #
110 | #
111 | # == Infinitives
112 | #
113 | #   "leaving".en.infinitive
114 | #   # => "leave"
115 | #
116 | #   "left".en.infinitive
117 | #   # => "leave"
118 | #
119 | #   "leaving".en.infinitive.suffix
120 | #   # => "ing"
121 | #
122 | #
123 | # == Conjugation
124 | #
125 | # Conjugate a verb given an infinitive:
126 | #
127 | #   "run".en.past_tense
128 | #   # => "ran"
129 | #
130 | #   "run".en.past_participle
131 | #   # => "run"
132 | #
133 | #   "run".en.present_tense
134 | #   # => "run"
135 | #
136 | #   "run".en.present_participle
137 | #   # => "running"
138 | #
139 | # Conjugate an infinitive with an explicit tense and grammatical person:
140 | #
141 | #   "be".en.conjugate( :present, :third_person_singular )
142 | #   # => "is"
143 | #
144 | #   "be".en.conjugate( :present, :first_person_singular )
145 | #   # => "am"
146 | #
147 | #   "be".en.conjugate( :past, :first_person_singular )
148 | #   # => "was"
149 | #
150 | # The functionality is a port of the verb conjugation portion of Morph
151 | # Adorner (http://morphadorner.northwestern.edu/).
152 | #
153 | # It includes a good number of irregular verbs, but it's not going to be
154 | # 100% correct everytime.
155 | #
156 | #
157 | # == WordNet® Integration
158 | #
159 | # If you have the 'wordnet' gem installed, you can look up WordNet synsets using
160 | # the Linguistics interface:
161 | #
162 | # Test to be sure the WordNet module loaded okay.
163 | #
164 | #    Linguistics::EN.has_wordnet?
165 | #    # => true
166 | #
167 | # Fetch the default synset for the word "balance"
168 | #
169 | #    "balance".en.synset
170 | #    # => #<WordNet::Synset:0x7f9fb11012f8 {102777100} 'balance' (noun):
171 | #    #    [noun.artifact] a scale for weighing; depends on pull of gravity>
172 | #
173 | # Fetch the synset for the first verb sense of "balance"
174 | #
175 | #   "balance".en.synset( :verb )
176 | #   # => #<WordNet::Synset:0x7f9fb10f3fb8 {201602318} 'balance, poise' (verb):
177 | #   #    [verb.contact] hold or carry in equilibrium>
178 | #
179 | # Fetch the second noun sense
180 | #
181 | #   "balance".en.synset( 2, :noun )
182 | #   # => #<WordNet::Synset:0x7f9fb10ebbd8 {102777402} 'balance, balance wheel'
183 | #   #     (noun): [noun.artifact] a wheel that regulates the rate of movement in a
184 | #   #     machine; especially a wheel oscillating against the hairspring of a
185 | #   #     timepiece to regulate its beat>
186 | #
187 | # Fetch the second noun sense's hypernyms (more-general words, like a
188 | # superclass)
189 | #
190 | #   "balance".en.synset( 2, :noun ).hypernyms
191 | #   # => [#<WordNet::Synset:0x7f9fb10dd100 {104574999} 'wheel' (noun):
192 | #   #    [noun.artifact] a simple machine consisting of a circular frame with
193 | #   #    spokes (or a solid disc) that can rotate on a shaft or axle (as in
194 | #   #    vehicles or other machines)>]
195 | #
196 | # A simpler way of doing the same thing:
197 | #
198 | #   "balance".en.hypernyms( 2, :noun )
199 | #   # => [#<WordNet::Synset:0x7f9fb10d24d0 {104574999} 'wheel' (noun):
200 | #   #    [noun.artifact] a simple machine consisting of a circular frame with
201 | #   #    spokes (or a solid disc) that can rotate on a shaft or axle (as in
202 | #   #    vehicles or other machines)>]
203 | #
204 | # Fetch the first hypernym's hypernyms
205 | #
206 | #   "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
207 | #   # => [#<WordNet::Synset:0x7f9fb10c5190 {103700963} 'machine, simple machine'
208 | #   #    (noun): [noun.artifact] a device for overcoming resistance at one point by
209 | #   #    applying force at some other point>]
210 | #
211 | # Find the synset to which both the second noun sense of "balance" and the
212 | # default sense of "shovel" belong.
213 | #
214 | #   ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
215 | #   # => #<WordNet::Synset:0x7f9fb1091e58 {103183080} 'device' (noun):
216 | #   #    [noun.artifact] an instrumentality invented for a particular purpose>
217 | #
218 | # Fetch words for the specific kinds of (device-ish) "instruments"
219 | #
220 | #   "instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ')
221 | #   # => "analyser, analyzer, cauterant, cautery, drafting instrument, engine,
222 | #   #    extractor, instrument of execution, instrument of punishment, measuring
223 | #   #    device, measuring instrument, measuring system, medical instrument,
224 | #   #    navigational instrument, optical instrument, plotter, scientific
225 | #   #    instrument, sonograph, surveying instrument, surveyor's instrument,
226 | #   #    tracer, arm, weapon, weapon system, whip"
227 | #
228 | # ...or musical instruments
229 | #
230 | #   "instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ')
231 | #   # => "barrel organ, grind organ, hand organ, hurdy-gurdy, hurdy gurdy,
232 | #   #    street organ, bass, calliope, steam organ, electronic instrument,
233 | #   #    electronic musical instrument, jew's harp, jews' harp, mouth bow, keyboard
234 | #   #    instrument, music box, musical box, percussion instrument, percussive
235 | #   #    instrument, stringed instrument, wind, wind instrument"
236 | #
237 | # There are many more WordNet methods supported--too many to list here. See the
238 | # WordNet::Synset API documentation for the complete list.
239 | #
240 | #
241 | # == LinkParser Integration
242 | #
243 | # If you have the 'linkparser' gem installed, you can create linkages
244 | # from English sentences that let you query for parts of speech:
245 | #
246 | # Test to see whether or not the link parser is loaded.
247 | #
248 | #   Linguistics::EN.has_linkparser?
249 | #   # => true
250 | #
251 | # Diagram the first linkage for a test sentence
252 | #
253 | #   puts "he is a big dog".en.sentence.linkages.first.diagram
254 | #
255 | # Outputs:
256 | #
257 | #        +-----Ost----+
258 | #        |  +----Ds---+
259 | #    +-Ss+  |   +--A--+
260 | #    |   |  |   |     |
261 | #   he is.v a big.a dog.n
262 | #
263 | # Find the verb in the sentence
264 | #
265 | #   "he is a big dog".en.sentence.verb.to_s
266 | #   # => "is"
267 | #
268 | # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
269 | # given sentence.
270 | #
271 | #   "he is a big dog".en.sentence.verb.en.infinitive
272 | #   # => "be"
273 | #
274 | # Find the direct object of the sentence
275 | #
276 | #   "he is a big dog".en.sentence.object.to_s
277 | #   # => "dog"
278 | #
279 | # Combine WordNet + LinkParser to find the definition of the direct object of
280 | # the sentence
281 | #
282 | #   "he is a big dog".en.sentence.object.en.definition
283 | #   # => "a member of the genus Canis (probably descended from the common wolf)
284 | #   #    that has been domesticated by man since prehistoric times; occurs in many
285 | #   #    breeds"
286 | #
287 | #
288 | module Linguistics::EN
289 | 	extend Loggability
290 | 
291 | 	# Loggability API -- log to the Linguistics logger
292 | 	log_to :linguistics
293 | 
294 | 	# The list of loaded modules
295 | 	MODULES = []
296 | 
297 | 	# The key to set in the thread-hash to indicate it's running in 'classical' mode
298 | 	THREAD_CLASSICAL_KEY = :english_classical_mode
299 | 
300 | 
301 | 	# A Hash of 'lprintf' formatters keyed by name
302 | 	@@lprintf_formatters = {}
303 | 
304 | 
305 | 	#################################################################
306 | 	###	U T I L I T Y   F U N C T I O N S
307 | 	#################################################################
308 | 
309 | 	### A Hash of formatters for the lprintf function.
310 | 	def self::lprintf_formatters
311 | 		return @@lprintf_formatters
312 | 	end
313 | 
314 | 
315 | 	### Register an English-language extension.
316 | 	def self::register_extension( mod )
317 | 		MODULES.push( mod )
318 | 		self.log.debug "Registered English extension %p" % [ mod ]
319 | 
320 | 		include( mod )
321 | 		mod.extend( Loggability )
322 | 		mod.log_to( :linguistics )
323 | 
324 | 		if mod.const_defined?( :SingletonMethods )
325 | 			smod = mod.const_get(:SingletonMethods)
326 | 			self.log.debug "  and its singleton methods %p" % [ smod ]
327 | 			extend( smod )
328 | 
329 | 			ivars = mod.instance_variables
330 | 			self.log.debug "  and instance variables %p" % [ ivars ]
331 | 			ivars.each do |ivar|
332 | 				instance_variable_set( ivar, mod.instance_variable_get(ivar) )
333 | 			end
334 | 		end
335 | 	end
336 | 
337 | 
338 | 	### Returns +true+ if the English-language module with the given +name+ was
339 | 	### successfully registered.
340 | 	def self::has_extension?( name )
341 | 		return MODULES.any? do |mod|
342 | 			mod.name.sub( /.*::/, '' ).downcase == name.to_s.downcase
343 | 		end
344 | 	end
345 | 
346 | 
347 | 	### Debugging output
348 | 	def self::debug_msg( *msgs ) # :nodoc:
349 | 		$stderr.puts msgs.join(" ") if $DEBUG
350 | 	end
351 | 
352 | 
353 | 	### Add an lprintf formatter named +name+ that will use the specified +callback+ method.
354 | 	### The name of the formatter is the placeholder that will be used in the
355 | 	### format string, and the +callback+ is the method to call on the english-language
356 | 	### inflector for the lprintf argument, and can either be an object that responds to
357 | 	### #call, or the name of a method to call as a Symbol.
358 | 	###
359 | 	### Using a Symbol:
360 | 	###
361 | 	###    def plural( count=2 )
362 | 	###        # return the plural of the inflected object
363 | 	###    end
364 | 	###    Linguistics::EN.register_lprintf_formatter :PL, :plural
365 | 	###
366 | 	### Using a method:
367 | 	###
368 | 	###    Linguistics::EN.register_lprintf_formatter :PL, method( :plural )
369 | 	###
370 | 	### Using a block:
371 | 	###
372 | 	###    Linguistics::EN.register_lprintf_formatter :PL do |obj|
373 | 	###        obj.en.plural
374 | 	###    end
375 | 	###
376 | 	def self::register_lprintf_formatter( name, callback=nil )
377 | 		raise LocalJumpError, "no callback or block given" unless callback || block_given?
378 | 		callback ||= Proc.new
379 | 
380 | 		@@lprintf_formatters[ name ] = callback.to_proc
381 | 	end
382 | 
383 | 
384 | 	### Return +true+ if running in a 'classical' mode.
385 | 	def self::classical?
386 | 		return Thread.current[ THREAD_CLASSICAL_KEY ] ? true : false
387 | 	end
388 | 
389 | 
390 | 	### Set classical mode for the current thread inside the block, then
391 | 	### unset it when it returns.
392 | 	def self::in_classical_mode
393 | 		old_setting = Thread.current[ THREAD_CLASSICAL_KEY ]
394 | 		Thread.current[ THREAD_CLASSICAL_KEY ] = true
395 | 
396 | 		yield
397 | 	ensure
398 | 		Thread.current[ THREAD_CLASSICAL_KEY ] = old_setting
399 | 	end
400 | 
401 | 
402 | 	#################################################################
403 | 	###	P U B L I C   F U N C T I O N S
404 | 	#################################################################
405 | 
406 | 	### Format the given +fmt+ string by replacing %-escaped sequences with the
407 | 	### result of performing a specified operation on the corresponding
408 | 	### argument, ala Kernel.sprintf.
409 | 	### %PL::
410 | 	###   Plural.
411 | 	### %A, %AN::
412 | 	###   Prepend indefinite article.
413 | 	### %NO::
414 | 	###   Zero-quantified phrase.
415 | 	### %NUMWORDS::
416 | 	###   Convert a number into the corresponding words.
417 | 	### %CONJUNCT::
418 | 	###   Conjunction.
419 | 	def lprintf( *args )
420 | 		return self.to_s.gsub( /%([A-Z_]+)/ ) do |match|
421 | 			op = $1.to_s.upcase.to_sym
422 | 			if (( callback = Linguistics::EN.lprintf_formatters[op] ))
423 | 				arg = args.shift
424 | 				callback.call( arg.en )
425 | 			else
426 | 				raise "no such formatter %p" % [ op ]
427 | 			end
428 | 		end
429 | 	end
430 | 
431 | 
432 | 	# Add 'english' to the list of default languages
433 | 	Linguistics.register_language( :en, self )
434 | 
435 | 
436 | end # module Linguistics::EN
437 | 
438 | 


--------------------------------------------------------------------------------
/lib/linguistics/en/numbers.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | require 'linguistics/en' unless defined?( Linguistics::EN )
  4 | 
  5 | # Numeric methods for the English-language Linguistics module.
  6 | module Linguistics::EN::Numbers
  7 | 
  8 | 	# Register this module to the list of modules to include
  9 | 	Linguistics::EN.register_extension( self )
 10 | 
 11 | 	#
 12 | 	# Numerals, ordinals, and numbers-to-words
 13 | 	#
 14 | 
 15 | 	# Default configuration arguments for the #numwords function
 16 | 	NUMWORD_DEFAULTS = {
 17 | 		:group		=> 0,
 18 | 		:comma		=> ', ',
 19 | 		:and		=> ' and ',
 20 | 		:zero		=> 'zero',
 21 | 		:decimal	=> 'point',
 22 | 		:asArray	=> false,
 23 | 	}
 24 | 
 25 | 	# Default configuration arguments for the #quantify function
 26 | 	QUANTIFY_DEFAULTS = {
 27 | 		:joinword	=> " of ",
 28 | 	}
 29 | 
 30 | 	# Default ranges for #quantify
 31 | 	SEVERAL_RANGE = 2..5
 32 | 	NUMBER_RANGE = 6..19
 33 | 	NUMEROUS_RANGE = 20..45
 34 | 	MANY_RANGE = 46..99
 35 | 
 36 | 	# Numerical inflections
 37 | 	NTH = {
 38 | 		0 => 'th',
 39 | 		1 => 'st',
 40 | 		2 => 'nd',
 41 | 		3 => 'rd',
 42 | 		4 => 'th',
 43 | 		5 => 'th',
 44 | 		6 => 'th',
 45 | 		7 => 'th',
 46 | 		8 => 'th',
 47 | 		9 => 'th',
 48 | 		11 => 'th',
 49 | 		12 => 'th',
 50 | 		13 => 'th',
 51 | 	}
 52 | 
 53 | 	# Ordinal word parts
 54 | 	ORDINALS = {
 55 | 		'ty'     => 'tieth',
 56 | 		'one'    => 'first',
 57 | 		'two'    => 'second',
 58 | 		'three'  => 'third',
 59 | 		'five'   => 'fifth',
 60 | 		'eight'  => 'eighth',
 61 | 		'nine'   => 'ninth',
 62 | 		'twelve' => 'twelfth',
 63 | 	}
 64 | 	ORDINAL_SUFFIXES = ORDINALS.keys.join("|") + "|"
 65 | 	ORDINALS[""] = 'th'
 66 | 
 67 | 	# Numeral names
 68 | 	UNITS = [''] + %w[one two three four five six seven eight nine]
 69 | 	TEENS = %w[ten eleven twelve thirteen fourteen
 70 | 			  fifteen sixteen seventeen eighteen nineteen]
 71 | 	TENS  = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
 72 | 	THOUSANDS = [' ', ' thousand'] + %w[
 73 | 		m b tr quadr quint sext sept oct non dec undec duodec tredec
 74 | 		quattuordec quindec sexdec septemdec octodec novemdec vigint
 75 | 	].collect {|prefix| ' ' + prefix + 'illion'}
 76 | 
 77 | 
 78 | 	# A collection of functions for transforming digits into word
 79 | 	# phrases. Indexed by the number of digits being transformed; e.g.,
 80 | 	# <tt>NUMBER_TO_WORDS_FUNCTIONS[2]</tt> is the function for transforming
 81 | 	# double-digit numbers.
 82 | 	NUMBER_TO_WORDS_FUNCTIONS = [
 83 | 		proc {|*args| raise "No digits (#{args.inspect})"},
 84 | 
 85 | 		# Single-digits
 86 | 		proc {|zero,x|
 87 | 			(x.nonzero? ? to_units(x) : "#{zero} ")
 88 | 		},
 89 | 
 90 | 		# Double-digits
 91 | 		proc {|zero,x,y|
 92 | 			if x.nonzero?
 93 | 				to_tens( x, y )
 94 | 			elsif y.nonzero?
 95 | 				"#{zero} " + NUMBER_TO_WORDS_FUNCTIONS[1].call( zero, y )
 96 | 			else
 97 | 				([zero] * 2).join(" ")
 98 | 			end
 99 | 		},
100 | 
101 | 		# Triple-digits
102 | 		proc {|zero,x,y,z|
103 | 			NUMBER_TO_WORDS_FUNCTIONS[1].call(zero,x) + 
104 | 			NUMBER_TO_WORDS_FUNCTIONS[2].call(zero,y,z)
105 | 		}
106 | 	]
107 | 
108 | 
109 | 	### Return the specified number as english words. One or more configuration
110 | 	### values may be passed to control the returned String:
111 | 	### 
112 | 	### [<b>:group</b>]
113 | 	###   Controls how many numbers at a time are grouped together. Valid values
114 | 	###   are <code>0</code> (normal grouping), <code>1</code> (single-digit 
115 | 	###   grouping, e.g., "one, two, three, four"), <code>2</code> 
116 | 	###   (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
117 | 	###   (triple-digit grouping, e.g., "one twenty-three, four").
118 | 	### [<b>:comma</b>]
119 | 	###   Set the character/s used to separate word groups. Defaults to 
120 | 	###   <code>", "</code>.
121 | 	### [<b>:and</b>]
122 | 	###   Set the word and/or characters used where <code>' and ' </code>(the 
123 | 	###   default) is normally used. Setting <code>:and</code> to 
124 | 	###   <code>' '</code>, for example, will cause <code>2556</code> to be 
125 | 	###   returned as "two-thousand, five hundred fifty-six" instead of 
126 | 	###   "two-thousand, five hundred and fifty-six".
127 | 	### [<b>:zero</b>]
128 | 	###   Set the word used to represent the numeral <code>0</code> in the 
129 | 	###   result. <code>'zero'</code> is the default.
130 | 	### [<b>:decimal</b>]
131 | 	###   Set the translation of any decimal points in the number; the default
132 | 	###   is <code>'point'</code>.
133 | 	### [<b>:as_array</b>]
134 | 	###   If set to a true value, the number will be returned as an array of
135 | 	###   word groups instead of a String.
136 | 	def numwords( hashargs={} )
137 | 		num = self.to_s
138 | 		self.log.debug "Turning %p into number words..." % [ num ]
139 | 		config = NUMWORD_DEFAULTS.merge( hashargs )
140 | 		raise "Bad chunking option: #{config[:group]}" unless
141 | 			config[:group].between?( 0, 3 )
142 | 
143 | 		# Array of number parts: first is everything to the left of the first
144 | 		# decimal, followed by any groups of decimal-delimted numbers after that
145 | 		parts = []
146 | 
147 | 		# Wordify any sign prefix
148 | 		sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
149 | 
150 | 		# Strip any ordinal suffixes
151 | 		ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
152 | 
153 | 		# Split the number into chunks delimited by '.'
154 | 		chunks = if !config[:decimal].empty? then
155 | 					 if config[:group].nonzero?
156 | 						 num.split(/\./)
157 | 					 else
158 | 						 num.split(/\./, 2)
159 | 					 end
160 | 				 else
161 | 					 [ num ]
162 | 				 end
163 | 
164 | 		# Wordify each chunk, pushing arrays into the parts array
165 | 		chunks.each_with_index do |chunk,section|
166 | 			chunk.gsub!( /\D+/, '' )
167 | 			self.log.debug "  working on chunk %p (section %d)" % [ chunk, section ]
168 | 
169 | 			# If there's nothing in this chunk of the number, set it to zero
170 | 			# unless it's the whole-number part, in which case just push an
171 | 			# empty array.
172 | 			if chunk.empty?
173 | 				self.log.debug "  chunk is empty..."
174 | 				if section.zero?
175 | 					self.log.debug "  skipping the empty whole-number part"
176 | 					parts.push []
177 | 					next
178 | 				end
179 | 			end
180 | 
181 | 			# Split the number section into wordified parts unless this is the
182 | 			# second or succeeding part of a non-group number
183 | 			unless config[:group].zero? && section.nonzero?
184 | 				parts.push number_to_words( chunk, config )
185 | 				self.log.debug "  added %p" % [ parts.last ]
186 | 			else
187 | 				parts.push number_to_words( chunk, config.merge(:group => 1) )
188 | 				self.log.debug "  added %p" % [ parts.last ]
189 | 			end
190 | 		end
191 | 
192 | 		self.log.debug "Parts => %p" % [ parts ]
193 | 
194 | 		# Turn the last word of the whole-number part back into an ordinal if
195 | 		# the original number came in that way.
196 | 		if ord && !parts[0].empty?
197 | 			self.log.debug "  turning the last whole-number part back into an ordinal, since it " +
198 | 				"came in that way"
199 | 			parts[0][-1] = ordinal( parts[0].last )
200 | 		end
201 | 
202 | 		# If the caller's expecting an Array return, just flatten and return the
203 | 		# parts array.
204 | 		if config[:as_array]
205 | 			self.log.debug "  returning the number parts as an Array"
206 | 			unless sign.empty?
207 | 				parts[0].unshift( sign )
208 | 			end
209 | 			return parts.flatten
210 | 		end
211 | 
212 | 		# Catenate each sub-parts array into a whole number part and one or more
213 | 		# post-decimal parts. If grouping is turned on, all sub-parts get joined
214 | 		# with commas, otherwise just the whole-number part is.
215 | 		if config[:group].zero?
216 | 			self.log.debug "  no custom grouping"
217 | 			if parts[0].length > 1
218 | 				self.log.debug "  whole and decimal part; working on the whole number first"
219 | 
220 | 				# Join all but the last part together with commas
221 | 				wholenum = parts[0][0...-1].join( config[:comma] )
222 | 
223 | 				# If the last part is just a single word, append it to the
224 | 				# wholenum part with an 'and'. This is to get things like 'three
225 | 				# thousand and three' instead of 'three thousand, three'.
226 | 				if /^\s*(\S+)\s*$/ =~ parts[0].last
227 | 					self.log.debug "last word is a single word; using the 'and' separator: %p" %
228 | 						[ config[:and] ]
229 | 					wholenum += config[:and] + parts[0].last
230 | 				else
231 | 					self.log.debug "last word has multiple words; using the comma separator: %p" %
232 | 						[ config[:comma] ]
233 | 					wholenum += config[:comma] + parts[0].last
234 | 				end
235 | 			else
236 | 				self.log.debug "  non-decimal."
237 | 				wholenum = parts[0][0]
238 | 			end
239 | 
240 | 			decimals = parts[1..-1].collect {|part| part.join(" ")}
241 | 			self.log.debug "  wholenum: %p; decimals: %p" % [ wholenum, decimals ]
242 | 
243 | 			# Join with the configured decimal; if it's empty, just join with
244 | 			# spaces.
245 | 			unless config[:decimal].empty?
246 | 				self.log.debug "  joining with the configured decimal: %p" % [ config[:decimal] ]
247 | 				return sign + ([ wholenum ] + decimals).
248 | 					join( " #{config[:decimal]} " ).strip
249 | 			else
250 | 				self.log.debug "  joining with the spaces since no decimal is configured"
251 | 				return sign + ([ wholenum ] + decimals).
252 | 					join( " " ).strip
253 | 			end
254 | 
255 | 		else
256 | 			self.log.debug "  grouping with decimal %p and comma %p" %
257 | 				config.values_at( :decimal, :comma )
258 | 			return parts.compact.
259 | 				separate( config[:decimal] ).
260 | 				delete_if {|el| el.empty?}.
261 | 				join( config[:comma] ).
262 | 				strip
263 | 		end
264 | 	end
265 | 	Linguistics::EN.register_lprintf_formatter :NUMWORDS, :numwords
266 | 
267 | 
268 | 	### Transform the given +number+ into an ordinal word. The +number+ object
269 | 	### can be either an Integer or a String.
270 | 	def ordinal
271 | 		if self.respond_to?( :to_int )
272 | 			number = self.to_int
273 | 			return "%d%s" % [ number, (NTH[ number % 100 ] || NTH[ number % 10 ]) ]
274 | 
275 | 		else
276 | 			number = self.to_s
277 | 			self.log.debug "Making an ordinal out of a non-Integer (%p)" % [ number ]
278 | 			return number.sub( /(#{ORDINAL_SUFFIXES})\Z/ ) { ORDINALS[$1] }
279 | 		end
280 | 	end
281 | 	Linguistics::EN.register_lprintf_formatter :ORD, :ordinal
282 | 
283 | 
284 | 	### Transform the given +number+ into an ordinate word.
285 | 	def ordinate
286 | 		return self.numwords.en.ordinal
287 | 	end
288 | 
289 | 
290 | 	### Return a phrase describing the specified +number+ of objects in the
291 | 	### inflected object in general terms. The following options can be used to 
292 | 	### control the makeup of the returned quantity String:
293 | 	###
294 |     ### [<b>:joinword</b>]
295 |     ###   Sets the word (and any surrounding spaces) used as the word separating the
296 |     ###   quantity from the noun in the resulting string. Defaults to <tt>' of
297 |     ###   '</tt>.
298 | 	def quantify( number=0, args={} )
299 | 		phrase = self.to_s
300 | 		self.log.debug "Quantifying %d instances of %p" % [ number, phrase ]
301 | 
302 | 		num = number.to_i
303 | 		config = QUANTIFY_DEFAULTS.merge( args )
304 | 
305 | 		case num
306 | 		when 0
307 | 			phrase.en.no
308 | 		when 1
309 | 			phrase.en.a
310 | 		when SEVERAL_RANGE
311 | 			"several " + phrase.en.plural( num )
312 | 		when NUMBER_RANGE
313 | 			"a number of " + phrase.en.plural( num )
314 | 		when NUMEROUS_RANGE
315 | 			"numerous " + phrase.en.plural( num )
316 | 		when MANY_RANGE
317 | 			"many " + phrase.en.plural( num )
318 | 		else
319 | 
320 | 			# Anything bigger than the MANY_RANGE gets described like
321 | 			# "hundreds of thousands of..." or "millions of..."
322 | 			# depending, of course, on how many there are.
323 | 			thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
324 | 			self.log.debug "thousands = %p, subthousands = %p" % [ thousands, subthousands ]
325 | 
326 | 			stword =
327 | 				case subthousands
328 | 				when 2
329 | 					"hundreds"
330 | 				when 1
331 | 					"tens"
332 | 				else
333 | 					nil
334 | 				end
335 | 
336 | 			unless thousands.zero?
337 | 				thword = to_thousands( thousands ).strip.en.plural
338 | 			end
339 | 
340 | 			[	# Hundreds (of)...
341 | 				stword,
342 | 
343 | 				# thousands (of)
344 | 				thword,
345 | 
346 | 				# stars.
347 | 				phrase.en.plural(number)
348 | 			].compact.join( config[:joinword] )
349 | 		end
350 | 	end
351 | 	Linguistics::EN.register_lprintf_formatter :QUANT, :quantify
352 | 
353 | 
354 | 	###############
355 | 	module_function
356 | 	###############
357 | 
358 | 	### Transform the specified number of units-place numerals into a
359 | 	### word-phrase at the given number of +thousands+ places.
360 | 	def to_units( units, thousands=0 )
361 | 		return UNITS[ units ] + to_thousands( thousands )
362 | 	end
363 | 
364 | 
365 | 	### Transform the specified number of tens- and units-place numerals into a
366 | 	### word-phrase at the given number of +thousands+ places.
367 | 	def to_tens( tens, units, thousands=0 )
368 | 		raise ArgumentError, "tens: no implicit conversion from nil" unless tens
369 | 		raise ArgumentError, "units: no implicit conversion from nil" unless units
370 | 
371 | 		unless tens == 1
372 | 			return TENS[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
373 | 				to_units( units, thousands )
374 | 		else
375 | 			return TEENS[ units ] + to_thousands( thousands )
376 | 		end
377 | 	end
378 | 
379 | 
380 | 	### Transform the specified number of hundreds-, tens-, and units-place
381 | 	### numerals into a word phrase. If the number of thousands (+thousands+) is
382 | 	### greater than 0, it will be used to determine where the decimal point is
383 | 	### in relation to the hundreds-place number.
384 | 	def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
385 | 		joinword = ' ' if joinword.empty?
386 | 		if hundreds.nonzero?
387 | 			return to_units( hundreds ) + " hundred" +
388 | 				(tens.nonzero? || units.nonzero? ? joinword : '') +
389 | 				to_tens( tens, units ) +
390 | 				to_thousands( thousands )
391 | 		elsif tens.nonzero? || units.nonzero?
392 | 			return to_tens( tens, units ) + to_thousands( thousands )
393 | 		else
394 | 			return nil
395 | 		end
396 | 	end
397 | 
398 | 	### Transform the specified number into one or more words like 'thousand',
399 | 	### 'million', etc. Uses the thousands (American) system.
400 | 	def to_thousands( thousands=0 )
401 | 		parts = []
402 | 		(0..thousands).step( THOUSANDS.length - 1 ) {|i|
403 | 			if i.zero?
404 | 				parts.push THOUSANDS[ thousands % (THOUSANDS.length - 1) ]
405 | 			else
406 | 				parts.push THOUSANDS.last
407 | 			end
408 | 		}
409 | 
410 | 		return parts.join(" ")
411 | 	end
412 | 
413 | 
414 | 	### Return the specified number +number+ as an array of number phrases.
415 | 	def number_to_words( number, config )
416 | 		return [config[:zero]] if number.to_i.zero?
417 | 
418 | 		if config[:group].nonzero? then
419 | 			return number_to_custom_word_groups( number, config[:group], config[:zero] )
420 | 		else
421 | 			return number_to_standard_word_groups( number, config[:and] )
422 | 		end
423 | 	end
424 | 
425 | 
426 | 	### Split the given +number+ up into groups of +groupsize+ and return
427 | 	### them as an Array of words. Use +zeroword+ for any occurences of '0'.
428 | 	def number_to_custom_word_groups( number, groupsize, zeroword="zero" )
429 | 		self.log.debug "Making custom word groups of %d digits out of %p" % [ groupsize, number ]
430 | 
431 | 		# Build a Regexp with <config[:group]> number of digits. Any past
432 | 		# the first are optional.
433 | 		re = Regexp.new( "(\\d)" + ("(\\d)?" * (groupsize - 1)) )
434 | 		self.log.debug "  regex for matching groups of %d digits is %p" % [ groupsize, re ]
435 | 
436 | 		# Scan the string, and call the word-chunk function that deals with
437 | 		# chunks of the found number of digits.
438 | 		return number.to_s.scan( re ).collect do |digits|
439 | 			self.log.debug "   digits = %p" % [ digits ]
440 | 			numerals = digits.flatten.compact.collect {|i| i.to_i}
441 | 			self.log.debug "   numerals = %p" % [ numerals ]
442 | 
443 | 			fn = NUMBER_TO_WORDS_FUNCTIONS[ numerals.length ]
444 | 			self.log.debug "  number to word function is #%d: %p" % [ numerals.length, fn ]
445 | 			fn.call( zeroword, *numerals ).strip
446 | 		end
447 | 	end
448 | 
449 | 
450 | 	### Split the given +number+ up into groups of three and return
451 | 	### the Array of words describing each group in the standard style.
452 | 	def number_to_standard_word_groups( number, andword="and" )
453 | 		phrase = number.to_s
454 | 		phrase.sub!( /\A\s*0+/, '' )
455 | 		chunks = []
456 | 		mill = 0
457 | 		self.log.debug "Making standard word groups out of %p" % [ phrase ]
458 | 
459 | 		# Match backward from the end of the digits in the string, turning
460 | 		# chunks of three, of two, and of one into words.
461 | 		mill += 1 while
462 | 			phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) do
463 | 				words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill, andword )
464 | 				chunks.unshift words.strip.squeeze(' ') unless words.nil?
465 | 				''
466 | 			end
467 | 
468 | 		phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) do
469 | 			chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
470 | 			''
471 | 		end
472 | 
473 | 		phrase.sub!( /(\d)(?=\D*\Z)/ ) do
474 | 			chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
475 | 			''
476 | 		end
477 | 
478 | 		return chunks
479 | 	end
480 | 
481 | 
482 | end # module Linguistics::EN::Numbers
483 | 
484 | 


--------------------------------------------------------------------------------
/spec/linguistics/en/infinitives_spec.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env spec -cfs
  2 | 
  3 | BEGIN {
  4 | 	require 'pathname'
  5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
  6 | 
  7 | 	libdir = basedir + "lib"
  8 | 
  9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
 10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
 11 | }
 12 | 
 13 | require 'rspec'
 14 | require 'spec/lib/helpers'
 15 | 
 16 | require 'linguistics'
 17 | require 'linguistics/en/infinitives'
 18 | 
 19 | 
 20 | describe Linguistics::EN::Infinitives do
 21 | 
 22 | 	before( :all ) do
 23 | 		setup_logging( :fatal )
 24 | 		Linguistics.use( :en, :proxy => true )
 25 | 		include Linguistics::EN
 26 | 	end
 27 | 
 28 | 	after( :all ) do
 29 | 		reset_logging()
 30 | 	end
 31 | 
 32 | 
 33 | 	describe "Infinitive object class" do
 34 | 		it "compares as equal if its primary word is equal" do
 35 | 			Linguistics::EN::Infinitives::Infinitive.new( 'basse', 'bass', 's', '2' ).should ==
 36 | 				'basse'
 37 | 		end
 38 | 
 39 | 		it "compares as equal if its secondary word is equal" do
 40 | 			Linguistics::EN::Infinitives::Infinitive.new( 'basse', 'bass', 's', '2' ).should ==
 41 | 				'bass'
 42 | 		end
 43 | 
 44 | 	end
 45 | 
 46 | 
 47 | 	it "uses rule 1 when calculating the infinitive of 'aches'" do
 48 | 		"aches".en.infinitive.should == 'ache'
 49 | 		"aches".en.infinitive.rule.should == '1'
 50 | 	end
 51 | 
 52 | 	it "uses rule 2 when calculating the infinitive of 'vases'" do
 53 | 		"vases".en.infinitive.should == 'vase'
 54 | 		"vases".en.infinitive.rule.should == '2'
 55 | 	end
 56 | 
 57 | 	it "uses rule 2 when calculating the infinitive of 'basses'" do
 58 | 		"basses".en.infinitive.should == 'bass'
 59 | 		"basses".en.infinitive.rule.should == '2'
 60 | 	end
 61 | 
 62 | 	it "uses rule 3 when calculating the infinitive of 'axes'" do
 63 | 		"axes".en.infinitive.should == 'axe'
 64 | 		"axes".en.infinitive.rule.should == '3'
 65 | 	end
 66 | 
 67 | 	it "uses rule 3 when calculating the infinitive of 'fixes'" do
 68 | 		"fixes".en.infinitive.should == 'fix'
 69 | 		"fixes".en.infinitive.rule.should == '3'
 70 | 	end
 71 | 
 72 | 	it "uses rule 4 when calculating the infinitive of 'hazes'" do
 73 | 		"hazes".en.infinitive.should == 'haze'
 74 | 		"hazes".en.infinitive.rule.should == '4'
 75 | 	end
 76 | 
 77 | 	it "uses rule 4 when calculating the infinitive of 'buzzes'" do
 78 | 		"buzzes".en.infinitive.should == 'buzz'
 79 | 		"buzzes".en.infinitive.rule.should == '4'
 80 | 	end
 81 | 
 82 | 	it "uses rule 6a when calculating the infinitive of 'caress'" do
 83 | 		"caress".en.infinitive.should == 'caress'
 84 | 		"caress".en.infinitive.rule.should == '6a'
 85 | 	end
 86 | 
 87 | 	it "uses rule 6b when calculating the infinitive of 'bans'" do
 88 | 		"bans".en.infinitive.should == 'ban'
 89 | 		"bans".en.infinitive.rule.should == '6b'
 90 | 	end
 91 | 
 92 | 	it "uses rule 7 when calculating the infinitive of 'Jones's'" do
 93 | 		"Jones's".en.infinitive.should == 'Jones'
 94 | 		"Jones's".en.infinitive.rule.should == '7'
 95 | 	end
 96 | 
 97 | 	it "uses rule 8 when calculating the infinitive of 'creater'" do
 98 | 		"creater".en.infinitive.should == 'creater'
 99 | 		"creater".en.infinitive.rule.should == '8'
100 | 	end
101 | 
102 | 	it "uses rule 9 when calculating the infinitive of 'reacter'" do
103 | 		"reacter".en.infinitive.should == 'reacter'
104 | 		"reacter".en.infinitive.rule.should == '9'
105 | 	end
106 | 
107 | 	it "uses rule 10 when calculating the infinitive of 'copier'" do
108 | 		"copier".en.infinitive.should == 'copy'
109 | 		"copier".en.infinitive.rule.should == '10'
110 | 	end
111 | 
112 | 	it "uses rule 11 when calculating the infinitive of 'baker'" do
113 | 		"baker".en.infinitive.should == 'bake'
114 | 		"baker".en.infinitive.rule.should == '11'
115 | 	end
116 | 
117 | 	it "uses rule 11 when calculating the infinitive of 'smaller'" do
118 | 		"smaller".en.infinitive.should == 'small'
119 | 		"smaller".en.infinitive.rule.should == '11'
120 | 	end
121 | 
122 | 	it "uses rule 12a when calculating the infinitive of 'curried'" do
123 | 		"curried".en.infinitive.should == 'curry'
124 | 		"curried".en.infinitive.rule.should == '12a'
125 | 	end
126 | 
127 | 	it "uses rule 12b when calculating the infinitive of 'bored'" do
128 | 		"bored".en.infinitive.should == 'bore'
129 | 		"bored".en.infinitive.rule.should == '12b'
130 | 	end
131 | 
132 | 	it "uses rule 12b when calculating the infinitive of 'seated'" do
133 | 		"seated".en.infinitive.should == 'seat'
134 | 		"seated".en.infinitive.rule.should == '12b'
135 | 	end
136 | 
137 | 	it "uses rule 12b when calculating the infinitive of 'tipped'" do
138 | 		"tipped".en.infinitive.should == 'tip'
139 | 		"tipped".en.infinitive.rule.should == '12b'
140 | 	end
141 | 
142 | 	it "uses rule 12b when calculating the infinitive of 'kitted'" do
143 | 		"kitted".en.infinitive.should == 'kit'
144 | 		"kitted".en.infinitive.rule.should == '12b'
145 | 	end
146 | 
147 | 	it "uses rule 12b when calculating the infinitive of 'capped'" do
148 | 		"capped".en.infinitive.should == 'cap'
149 | 		"capped".en.infinitive.rule.should == '12b'
150 | 	end
151 | 
152 | 	it "uses rule 12b when calculating the infinitive of 'chopped'" do
153 | 		"chopped".en.infinitive.should == 'chop'
154 | 		"chopped".en.infinitive.rule.should == '12b'
155 | 	end
156 | 
157 | 	it "uses rule 13a when calculating the infinitive of 'flies'" do
158 | 		"flies".en.infinitive.should == 'fly'
159 | 		"flies".en.infinitive.rule.should == '13a'
160 | 	end
161 | 
162 | 	it "uses rule 13b when calculating the infinitive of 'palates'" do
163 | 		"palates".en.infinitive.should == 'palate'
164 | 		"palates".en.infinitive.rule.should == '13b'
165 | 	end
166 | 
167 | 	it "uses rule 14a when calculating the infinitive of 'liveliest'" do
168 | 		"liveliest".en.infinitive.should == 'lively'
169 | 		"liveliest".en.infinitive.rule.should == '14a'
170 | 	end
171 | 
172 | 	it "uses rule 14b when calculating the infinitive of 'wisest'" do
173 | 		"wisest".en.infinitive.should == 'wise'
174 | 		"wisest".en.infinitive.rule.should == '14b'
175 | 	end
176 | 
177 | 	it "uses rule 14b when calculating the infinitive of 'strongest'" do
178 | 		"strongest".en.infinitive.should == 'strong'
179 | 		"strongest".en.infinitive.rule.should == '14b'
180 | 	end
181 | 
182 | 	it "uses rule 15 when calculating the infinitive of 'living'" do
183 | 		"living".en.infinitive.should == 'live'
184 | 		"living".en.infinitive.rule.should == '15'
185 | 	end
186 | 
187 | 	it "uses rule 15 when calculating the infinitive of 'laughing'" do
188 | 		"laughing".en.infinitive.should == 'laugh'
189 | 		"laughing".en.infinitive.rule.should == '15'
190 | 	end
191 | 
192 | 	it "uses rule 15 when calculating the infinitive of 'swaying'" do
193 | 		"swaying".en.infinitive.should == 'sway'
194 | 		"swaying".en.infinitive.rule.should == '15'
195 | 	end
196 | 
197 | 	it "uses rule 15 when calculating the infinitive of 'catching'" do
198 | 		"catching".en.infinitive.should == 'catch'
199 | 		"catching".en.infinitive.rule.should == '15'
200 | 	end
201 | 
202 | 	it "uses rule 15 when calculating the infinitive of 'smiling'" do
203 | 		"smiling".en.infinitive.should == 'smile'
204 | 		"smiling".en.infinitive.rule.should == '15'
205 | 	end
206 | 
207 | 	it "uses rule 15 when calculating the infinitive of 'swimming'" do
208 | 		"swimming".en.infinitive.should == 'swim'
209 | 		"swimming".en.infinitive.rule.should == '15'
210 | 	end
211 | 
212 | 	it "uses rule 15 when calculating the infinitive of 'running'" do
213 | 		"running".en.infinitive.should == 'run'
214 | 		"running".en.infinitive.rule.should == '15'
215 | 	end
216 | 
217 | 	it "uses rule 15 when calculating the infinitive of 'floating'" do
218 | 		"floating".en.infinitive.should == 'float'
219 | 		"floating".en.infinitive.rule.should == '15'
220 | 	end
221 | 
222 | 	it "uses rule 15 when calculating the infinitive of 'keyboarding'" do
223 | 		"keyboarding".en.infinitive.should == 'keyboard'
224 | 		"keyboarding".en.infinitive.rule.should == '15'
225 | 	end
226 | 
227 | 	it "uses rule 15 when calculating the infinitive of 'wrestling'" do
228 | 		"wrestling".en.infinitive.should == 'wrestle'
229 | 		"wrestling".en.infinitive.rule.should == '15'
230 | 	end
231 | 
232 | 	it "uses rule 15 when calculating the infinitive of 'traveling'" do
233 | 		"traveling".en.infinitive.should == 'travel'
234 | 		"traveling".en.infinitive.rule.should == '15'
235 | 	end
236 | 
237 | 	it "uses rule 15 when calculating the infinitive of 'traipsing'" do
238 | 		"traipsing".en.infinitive.should == 'traipse'
239 | 		"traipsing".en.infinitive.rule.should == '15'
240 | 	end
241 | 
242 | 	it "uses rule 16 when calculating the infinitive of 'stylist'" do
243 | 		"stylist".en.infinitive.should == 'style'
244 | 		"stylist".en.infinitive.rule.should == '16'
245 | 	end
246 | 
247 | 	it "uses rule 16 when calculating the infinitive of 'dentist'" do
248 | 		"dentist".en.infinitive.should == 'dent'
249 | 		"dentist".en.infinitive.rule.should == '16'
250 | 	end
251 | 
252 | 	it "uses rule 17 when calculating the infinitive of 'cubism'" do
253 | 		"cubism".en.infinitive.should == 'cube'
254 | 		"cubism".en.infinitive.rule.should == '17'
255 | 	end
256 | 
257 | 	it "uses rule 17 when calculating the infinitive of 'socialism'" do
258 | 		"socialism".en.infinitive.should == 'social'
259 | 		"socialism".en.infinitive.rule.should == '17'
260 | 	end
261 | 
262 | 	it "uses rule 18 when calculating the infinitive of 'scarcity'" do
263 | 		"scarcity".en.infinitive.should == 'scarce'
264 | 		"scarcity".en.infinitive.rule.should == '18'
265 | 	end
266 | 
267 | 	it "uses rule 18 when calculating the infinitive of 'rapidity'" do
268 | 		"rapidity".en.infinitive.should == 'rapid'
269 | 		"rapidity".en.infinitive.rule.should == '18'
270 | 	end
271 | 
272 | 	it "uses rule 19 when calculating the infinitive of 'immunize'" do
273 | 		"immunize".en.infinitive.should == 'immune'
274 | 		"immunize".en.infinitive.rule.should == '19'
275 | 	end
276 | 
277 | 	it "uses rule 19 when calculating the infinitive of 'lionize'" do
278 | 		"lionize".en.infinitive.should == 'lion'
279 | 		"lionize".en.infinitive.rule.should == '19'
280 | 	end
281 | 
282 | 	it "uses rule 20c when calculating the infinitive of 'livable'" do
283 | 		"livable".en.infinitive.should == 'live'
284 | 		"livable".en.infinitive.rule.should == '20c'
285 | 	end
286 | 
287 | 	it "uses rule 20c when calculating the infinitive of 'portable'" do
288 | 		"portable".en.infinitive.should == 'port'
289 | 		"portable".en.infinitive.rule.should == '20c'
290 | 	end
291 | 
292 | 	it "uses rule 22 when calculating the infinitive of 'nobility'" do
293 | 		"nobility".en.infinitive.should == 'noble'
294 | 		"nobility".en.infinitive.rule.should == '22'
295 | 	end
296 | 
297 | 	it "uses rule 23 when calculating the infinitive of 'identifiable'" do
298 | 		"identifiable".en.infinitive.should == 'identify'
299 | 		"identifiable".en.infinitive.rule.should == '23'
300 | 	end
301 | 
302 | 	it "uses rule 24 when calculating the infinitive of 'psychologist'" do
303 | 		"psychologist".en.infinitive.should == 'psychology'
304 | 		"psychologist".en.infinitive.rule.should == '24'
305 | 	end
306 | 
307 | 	it "uses rule 25 when calculating the infinitive of 'photographic'" do
308 | 		"photographic".en.infinitive.should == 'photography'
309 | 		"photographic".en.infinitive.rule.should == '25'
310 | 	end
311 | 
312 | 	it "uses rule 26 when calculating the infinitive of 'stylistic'" do
313 | 		"stylistic".en.infinitive.should == 'stylist'
314 | 		"stylistic".en.infinitive.rule.should == '26'
315 | 	end
316 | 
317 | 	it "uses rule 27 when calculating the infinitive of 'martensitic'" do
318 | 		"martensitic".en.infinitive.should == 'martensite'
319 | 		"martensitic".en.infinitive.rule.should == '27'
320 | 	end
321 | 
322 | 	it "uses rule 27 when calculating the infinitive of 'politic'" do
323 | 		"politic".en.infinitive.should == 'polite'
324 | 		"politic".en.infinitive.rule.should == '27'
325 | 	end
326 | 
327 | 	it "uses rule 28 when calculating the infinitive of 'ladylike'" do
328 | 		"ladylike".en.infinitive.should == 'lady'
329 | 		"ladylike".en.infinitive.rule.should == '28'
330 | 	end
331 | 
332 | 	it "uses rule 29 when calculating the infinitive of 'biologic'" do
333 | 		"biologic".en.infinitive.should == 'biology'
334 | 		"biologic".en.infinitive.rule.should == '29'
335 | 	end
336 | 
337 | 	it "uses rule 30 when calculating the infinitive of 'battlement'" do
338 | 		"battlement".en.infinitive.should == 'battle'
339 | 		"battlement".en.infinitive.rule.should == '30'
340 | 	end
341 | 
342 | 	it "uses rule 31 when calculating the infinitive of 'supplemental'" do
343 | 		"supplemental".en.infinitive.should == 'supplement'
344 | 		"supplemental".en.infinitive.rule.should == '31'
345 | 	end
346 | 
347 | 	it "uses rule 32 when calculating the infinitive of 'thermometry'" do
348 | 		"thermometry".en.infinitive.should == 'thermometer'
349 | 		"thermometry".en.infinitive.rule.should == '32'
350 | 	end
351 | 
352 | 	it "uses rule 33 when calculating the infinitive of 'inadvertence'" do
353 | 		"inadvertence".en.infinitive.should == 'inadvertent'
354 | 		"inadvertence".en.infinitive.rule.should == '33'
355 | 	end
356 | 
357 | 	it "uses rule 34 when calculating the infinitive of 'potency'" do
358 | 		"potency".en.infinitive.should == 'potent'
359 | 		"potency".en.infinitive.rule.should == '34'
360 | 	end
361 | 
362 | 	it "uses rule 35 when calculating the infinitive of 'discipleship'" do
363 | 		"discipleship".en.infinitive.should == 'disciple'
364 | 		"discipleship".en.infinitive.rule.should == '35'
365 | 	end
366 | 
367 | 	it "uses rule 36 when calculating the infinitive of 'mystical'" do
368 | 		"mystical".en.infinitive.should == 'mystic'
369 | 		"mystical".en.infinitive.rule.should == '36'
370 | 	end
371 | 
372 | 	it "uses rule 37 when calculating the infinitive of 'regional'" do
373 | 		"regional".en.infinitive.should == 'region'
374 | 		"regional".en.infinitive.rule.should == '37'
375 | 	end
376 | 
377 | 	it "uses rule 37 when calculating the infinitive of 'national'" do
378 | 		"national".en.infinitive.should == 'nation'
379 | 		"national".en.infinitive.rule.should == '37'
380 | 	end
381 | 
382 | 	it "uses rule 38 when calculating the infinitive of 'horribly'" do
383 | 		"horribly".en.infinitive.should == 'horrible'
384 | 		"horribly".en.infinitive.rule.should == '38'
385 | 	end
386 | 
387 | 	it "uses rule 39 when calculating the infinitive of 'scantily'" do
388 | 		"scantily".en.infinitive.should == 'scanty'
389 | 		"scantily".en.infinitive.rule.should == '39'
390 | 	end
391 | 
392 | 	it "uses rule 40 when calculating the infinitive of 'partly'" do
393 | 		"partly".en.infinitive.should == 'part'
394 | 		"partly".en.infinitive.rule.should == '40'
395 | 	end
396 | 
397 | 	it "uses rule 41a when calculating the infinitive of 'dutiful'" do
398 | 		"dutiful".en.infinitive.should == 'duty'
399 | 		"dutiful".en.infinitive.rule.should == '41a'
400 | 	end
401 | 
402 | 	it "uses rule 41b when calculating the infinitive of 'harmful'" do
403 | 		"harmful".en.infinitive.should == 'harm'
404 | 		"harmful".en.infinitive.rule.should == '41b'
405 | 	end
406 | 
407 | 	it "uses rule 42a when calculating the infinitive of 'likelihood'" do
408 | 		"likelihood".en.infinitive.should == 'likely'
409 | 		"likelihood".en.infinitive.rule.should == '42a'
410 | 	end
411 | 
412 | 	it "uses rule 42b when calculating the infinitive of 'neighborhood'" do
413 | 		"neighborhood".en.infinitive.should == 'neighbor'
414 | 		"neighborhood".en.infinitive.rule.should == '42b'
415 | 	end
416 | 
417 | 	it "uses rule 42b when calculating the infinitive of 'neighbourhood'" do
418 | 		"neighbourhood".en.infinitive.should == 'neighbour'
419 | 		"neighbourhood".en.infinitive.rule.should == '42b'
420 | 	end
421 | 
422 | 	it "uses rule 43a when calculating the infinitive of 'penniless'" do
423 | 		"penniless".en.infinitive.should == 'penny'
424 | 		"penniless".en.infinitive.rule.should == '43a'
425 | 	end
426 | 
427 | 	it "uses rule 43b when calculating the infinitive of 'listless'" do
428 | 		"listless".en.infinitive.should == 'list'
429 | 		"listless".en.infinitive.rule.should == '43b'
430 | 	end
431 | 
432 | 	it "uses rule 44a when calculating the infinitive of 'heartiness'" do
433 | 		"heartiness".en.infinitive.should == 'hearty'
434 | 		"heartiness".en.infinitive.rule.should == '44a'
435 | 	end
436 | 
437 | 	it "uses rule 44b when calculating the infinitive of 'coolness'" do
438 | 		"coolness".en.infinitive.should == 'cool'
439 | 		"coolness".en.infinitive.rule.should == '44b'
440 | 	end
441 | 
442 | 	it "uses rule 45 when calculating the infinitive of 'specification'" do
443 | 		"specification".en.infinitive.should == 'specify'
444 | 		"specification".en.infinitive.rule.should == '45'
445 | 	end
446 | 
447 | 	it "uses rule 46 when calculating the infinitive of 'rationalization'" do
448 | 		"rationalization".en.infinitive.should == 'rationalize'
449 | 		"rationalization".en.infinitive.rule.should == '46'
450 | 	end
451 | 
452 | 	it "uses rule 47 when calculating the infinitive of 'detection'" do
453 | 		"detection".en.infinitive.should == 'detect'
454 | 		"detection".en.infinitive.rule.should == '47'
455 | 	end
456 | 
457 | 	it "uses rule 48 when calculating the infinitive of 'exertion'" do
458 | 		"exertion".en.infinitive.should == 'exert'
459 | 		"exertion".en.infinitive.rule.should == '48'
460 | 	end
461 | 
462 | 	it "uses rule 49 when calculating the infinitive of 'creation'" do
463 | 		"creation".en.infinitive.should == 'create'
464 | 		"creation".en.infinitive.rule.should == '49'
465 | 	end
466 | 
467 | 	it "uses rule 50 when calculating the infinitive of 'creator'" do
468 | 		"creator".en.infinitive.should == 'create'
469 | 		"creator".en.infinitive.rule.should == '50'
470 | 	end
471 | 
472 | 	it "uses rule 51 when calculating the infinitive of 'detector'" do
473 | 		"detector".en.infinitive.should == 'detect'
474 | 		"detector".en.infinitive.rule.should == '51'
475 | 	end
476 | 
477 | 	it "uses rule 52 when calculating the infinitive of 'creative'" do
478 | 		"creative".en.infinitive.should == 'creation'
479 | 		"creative".en.infinitive.rule.should == '52'
480 | 	end
481 | 
482 | 	it "uses rule 52 when calculating the infinitive of 'decisive'" do
483 | 		"decisive".en.infinitive.should == 'decision'
484 | 		"decisive".en.infinitive.rule.should == '52'
485 | 	end
486 | 
487 | 	it "uses rule 53 when calculating the infinitive of 'Australian'" do
488 | 		"Australian".en.infinitive.should == 'Australia'
489 | 		"Australian".en.infinitive.rule.should == '53'
490 | 	end
491 | 
492 | 	it "uses rule 54 when calculating the infinitive of 'Jeffersonian'" do
493 | 		"Jeffersonian".en.infinitive.should == 'Jefferson'
494 | 		"Jeffersonian".en.infinitive.rule.should == '54'
495 | 	end
496 | 
497 | 	it "uses irregular rule when calculating the infinitive of 'rove'" do
498 | 		"rove".en.infinitive.should == 'reeve'
499 | 		"rove".en.infinitive.rule.should == 'irregular'
500 | 	end
501 | 
502 | 	it "uses irregular rule when calculating the infinitive of 'dove'" do
503 | 		"dove".en.infinitive.should == 'dive'
504 | 		"dove".en.infinitive.rule.should == 'irregular'
505 | 	end
506 | 
507 | 	it "uses irregular rule when calculating the infinitive of 'snuck'" do
508 | 		"snuck".en.infinitive.should == 'sneak'
509 | 		"snuck".en.infinitive.rule.should == 'irregular'
510 | 	end
511 | 
512 | 	it "uses irregular rule when calculating the infinitive of 'wot'" do
513 | 		"wot".en.infinitive.should == 'wit'
514 | 		"wot".en.infinitive.rule.should == 'irregular'
515 | 	end
516 | 
517 | end
518 | 
519 | 


--------------------------------------------------------------------------------
/lib/linguistics/iso639.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | # coding: utf-8
  3 | 
  4 | require 'linguistics' unless defined?( Linguistics )
  5 | 
  6 | # A hash of International 2- and 3-letter ISO639-1 and ISO639-2
  7 | # language codes information. Each entry is keyed by all of its
  8 | # language codes as Symbols, and the entry itself has three keys:
  9 | #
 10 | # [<tt>:codes</tt>]
 11 | #   All of the codes known for this language as Strings
 12 | # [<tt>:eng_name</tt>]
 13 | #   The English-language name of the language.
 14 | # [<tt>:fre_name</tt>]
 15 | #   The French-language name of the language.
 16 | #
 17 | # Entries for 'ja' and 'en':
 18 | #
 19 | #   irb > Linguistics::ISO639::LANGUAGE_CODES[:en]
 20 | #    => {:eng_name=>"English", :fre_name=>"anglais", :codes=>["en", "eng"]}
 21 | #   irb > Linguistics::ISO639::LANGUAGE_CODES[:eng]
 22 | #    => {:eng_name=>"English", :fre_name=>"anglais", :codes=>["en", "eng"]}
 23 | #   irb > Linguistics::ISO639::LANGUAGE_CODES[:ja]
 24 | #    => {:eng_name=>"Japanese", :fre_name=>"japonais", :codes=>["ja", "jpn"]}
 25 | 
 26 | module Linguistics::ISO639
 27 | 
 28 | 	# Hash of ISO639 2- and 3-letter language codes
 29 | 	LANGUAGE_CODES = {}
 30 | 
 31 | 	# Read everything after the __END__
 32 | 	_, data = File.read( __FILE__, :encoding => 'utf-8' ).split( /^__END__$/, 2 )
 33 | 
 34 | 	# To read the files, please note that one line of text contains one
 35 | 	# entry. An alpha-3 (bibliographic) code, an alpha-3 (terminologic)
 36 | 	# code (when given), an alpha-2 code (when given), an English name,
 37 | 	# and a French name of a language are all separated by pipe (|)
 38 | 	# characters. If one of these elements is not applicable to the entry,
 39 | 	# the field is left empty, i.e., a pipe (|) character immediately
 40 | 	# follows the preceding entry. The Line terminator is the LF character.
 41 | 
 42 | 	# bib_alpha3|term_alpha3|alpha2|eng_name|fre_name
 43 | 	# E.g., "eng||en|English|anglais"
 44 | 	data.lines do |line|
 45 | 		next unless line =~ /\|/ # Skip non-language lines
 46 | 		bib_alpha3, term_alpha3, alpha2, eng_name, fre_name = line.chomp.split( '|', 5 )
 47 | 		entry      = {
 48 | 			:eng_name => eng_name,
 49 | 			:fre_name => fre_name,
 50 | 			:codes    => [ bib_alpha3, alpha2, term_alpha3 ].reject {|item| item.empty? }
 51 | 		}
 52 | 		$stderr.puts "  adding language code entry %p from line: %p" %
 53 | 			[ entry, line ] if $DEBUG
 54 | 
 55 | 		LANGUAGE_CODES[ bib_alpha3.to_sym ] = entry
 56 | 		LANGUAGE_CODES[ alpha2.to_sym ] = entry if alpha2
 57 | 	end
 58 | 
 59 | end # module Linguistics::ISO639
 60 | 
 61 | # Data from: http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 62 | __END__
 63 | aar||aa|Afar|afar
 64 | abk||ab|Abkhazian|abkhaze
 65 | ace|||Achinese|aceh
 66 | ach|||Acoli|acoli
 67 | ada|||Adangme|adangme
 68 | ady|||Adyghe; Adygei|adyghé
 69 | afa|||Afro-Asiatic languages|afro-asiatiques, langues
 70 | afh|||Afrihili|afrihili
 71 | afr||af|Afrikaans|afrikaans
 72 | ain|||Ainu|aïnou
 73 | aka||ak|Akan|akan
 74 | akk|||Akkadian|akkadien
 75 | alb|sqi|sq|Albanian|albanais
 76 | ale|||Aleut|aléoute
 77 | alg|||Algonquian languages|algonquines, langues
 78 | alt|||Southern Altai|altai du Sud
 79 | amh||am|Amharic|amharique
 80 | ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100)
 81 | anp|||Angika|angika
 82 | apa|||Apache languages|apaches, langues
 83 | ara||ar|Arabic|arabe
 84 | arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE)
 85 | arg||an|Aragonese|aragonais
 86 | arm|hye|hy|Armenian|arménien
 87 | arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce
 88 | arp|||Arapaho|arapaho
 89 | art|||Artificial languages|artificielles, langues
 90 | arw|||Arawak|arawak
 91 | asm||as|Assamese|assamais
 92 | ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais
 93 | ath|||Athapascan languages|athapascanes, langues
 94 | aus|||Australian languages|australiennes, langues
 95 | ava||av|Avaric|avar
 96 | ave||ae|Avestan|avestique
 97 | awa|||Awadhi|awadhi
 98 | aym||ay|Aymara|aymara
 99 | aze||az|Azerbaijani|azéri
100 | bad|||Banda languages|banda, langues
101 | bai|||Bamileke languages|bamiléké, langues
102 | bak||ba|Bashkir|bachkir
103 | bal|||Baluchi|baloutchi
104 | bam||bm|Bambara|bambara
105 | ban|||Balinese|balinais
106 | baq|eus|eu|Basque|basque
107 | bas|||Basa|basa
108 | bat|||Baltic languages|baltes, langues
109 | bej|||Beja; Bedawiyet|bedja
110 | bel||be|Belarusian|biélorusse
111 | bem|||Bemba|bemba
112 | ben||bn|Bengali|bengali
113 | ber|||Berber languages|berbères, langues
114 | bho|||Bhojpuri|bhojpuri
115 | bih||bh|Bihari languages|langues biharis
116 | bik|||Bikol|bikol
117 | bin|||Bini; Edo|bini; edo
118 | bis||bi|Bislama|bichlamar
119 | bla|||Siksika|blackfoot
120 | bnt|||Bantu (Other)|bantoues, autres langues
121 | bos||bs|Bosnian|bosniaque
122 | bra|||Braj|braj
123 | bre||br|Breton|breton
124 | btk|||Batak languages|batak, langues
125 | bua|||Buriat|bouriate
126 | bug|||Buginese|bugi
127 | bul||bg|Bulgarian|bulgare
128 | bur|mya|my|Burmese|birman
129 | byn|||Blin; Bilin|blin; bilen
130 | cad|||Caddo|caddo
131 | cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues
132 | car|||Galibi Carib|karib; galibi; carib
133 | cat||ca|Catalan; Valencian|catalan; valencien
134 | cau|||Caucasian languages|caucasiennes, langues
135 | ceb|||Cebuano|cebuano
136 | cel|||Celtic languages|celtiques, langues; celtes, langues
137 | cha||ch|Chamorro|chamorro
138 | chb|||Chibcha|chibcha
139 | che||ce|Chechen|tchétchène
140 | chg|||Chagatai|djaghataï
141 | chi|zho|zh|Chinese|chinois
142 | chk|||Chuukese|chuuk
143 | chm|||Mari|mari
144 | chn|||Chinook jargon|chinook, jargon
145 | cho|||Choctaw|choctaw
146 | chp|||Chipewyan; Dene Suline|chipewyan
147 | chr|||Cherokee|cherokee
148 | chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare
149 | chv||cv|Chuvash|tchouvache
150 | chy|||Cheyenne|cheyenne
151 | cmc|||Chamic languages|chames, langues
152 | cop|||Coptic|copte
153 | cor||kw|Cornish|cornique
154 | cos||co|Corsican|corse
155 | cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais
156 | cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français
157 | cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais
158 | cre||cr|Cree|cree
159 | crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé
160 | crp|||Creoles and pidgins |créoles et pidgins
161 | csb|||Kashubian|kachoube
162 | cus|||Cushitic languages|couchitiques, langues
163 | cze|ces|cs|Czech|tchèque
164 | dak|||Dakota|dakota
165 | dan||da|Danish|danois
166 | dar|||Dargwa|dargwa
167 | day|||Land Dayak languages|dayak, langues
168 | del|||Delaware|delaware
169 | den|||Slave (Athapascan)|esclave (athapascan)
170 | dgr|||Dogrib|dogrib
171 | din|||Dinka|dinka
172 | div||dv|Divehi; Dhivehi; Maldivian|maldivien
173 | doi|||Dogri|dogri
174 | dra|||Dravidian languages|dravidiennes, langues
175 | dsb|||Lower Sorbian|bas-sorabe
176 | dua|||Duala|douala
177 | dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350)
178 | dut|nld|nl|Dutch; Flemish|néerlandais; flamand
179 | dyu|||Dyula|dioula
180 | dzo||dz|Dzongkha|dzongkha
181 | efi|||Efik|efik
182 | egy|||Egyptian (Ancient)|égyptien
183 | eka|||Ekajuk|ekajuk
184 | elx|||Elamite|élamite
185 | eng||en|English|anglais
186 | enm|||English, Middle (1100-1500)|anglais moyen (1100-1500)
187 | epo||eo|Esperanto|espéranto
188 | est||et|Estonian|estonien
189 | ewe||ee|Ewe|éwé
190 | ewo|||Ewondo|éwondo
191 | fan|||Fang|fang
192 | fao||fo|Faroese|féroïen
193 | fat|||Fanti|fanti
194 | fij||fj|Fijian|fidjien
195 | fil|||Filipino; Pilipino|filipino; pilipino
196 | fin||fi|Finnish|finnois
197 | fiu|||Finno-Ugrian languages|finno-ougriennes, langues
198 | fon|||Fon|fon
199 | fre|fra|fr|French|français
200 | frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600)
201 | fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400)
202 | frr|||Northern Frisian|frison septentrional
203 | frs|||Eastern Frisian|frison oriental
204 | fry||fy|Western Frisian|frison occidental
205 | ful||ff|Fulah|peul
206 | fur|||Friulian|frioulan
207 | gaa|||Ga|ga
208 | gay|||Gayo|gayo
209 | gba|||Gbaya|gbaya
210 | gem|||Germanic languages|germaniques, langues
211 | geo|kat|ka|Georgian|géorgien
212 | ger|deu|de|German|allemand
213 | gez|||Geez|guèze
214 | gil|||Gilbertese|kiribati
215 | gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais
216 | gle||ga|Irish|irlandais
217 | glg||gl|Galician|galicien
218 | glv||gv|Manx|manx; mannois
219 | gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500)
220 | goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050)
221 | gon|||Gondi|gond
222 | gor|||Gorontalo|gorontalo
223 | got|||Gothic|gothique
224 | grb|||Grebo|grebo
225 | grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453)
226 | gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453)
227 | grn||gn|Guarani|guarani
228 | gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien
229 | guj||gu|Gujarati|goudjrati
230 | gwi|||Gwich'in|gwich'in
231 | hai|||Haida|haida
232 | hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien
233 | hau||ha|Hausa|haoussa
234 | haw|||Hawaiian|hawaïen
235 | heb||he|Hebrew|hébreu
236 | her||hz|Herero|herero
237 | hil|||Hiligaynon|hiligaynon
238 | him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales
239 | hin||hi|Hindi|hindi
240 | hit|||Hittite|hittite
241 | hmn|||Hmong|hmong
242 | hmo||ho|Hiri Motu|hiri motu
243 | hrv||hr|Croatian|croate
244 | hsb|||Upper Sorbian|haut-sorabe
245 | hun||hu|Hungarian|hongrois
246 | hup|||Hupa|hupa
247 | iba|||Iban|iban
248 | ibo||ig|Igbo|igbo
249 | ice|isl|is|Icelandic|islandais
250 | ido||io|Ido|ido
251 | iii||ii|Sichuan Yi; Nuosu|yi de Sichuan
252 | ijo|||Ijo languages|ijo, langues
253 | iku||iu|Inuktitut|inuktitut
254 | ile||ie|Interlingue; Occidental|interlingue
255 | ilo|||Iloko|ilocano
256 | ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale)
257 | inc|||Indic languages|indo-aryennes, langues
258 | ind||id|Indonesian|indonésien
259 | ine|||Indo-European languages|indo-européennes, langues
260 | inh|||Ingush|ingouche
261 | ipk||ik|Inupiaq|inupiaq
262 | ira|||Iranian languages|iraniennes, langues
263 | iro|||Iroquoian languages|iroquoises, langues
264 | ita||it|Italian|italien
265 | jav||jv|Javanese|javanais
266 | jbo|||Lojban|lojban
267 | jpn||ja|Japanese|japonais
268 | jpr|||Judeo-Persian|judéo-persan
269 | jrb|||Judeo-Arabic|judéo-arabe
270 | kaa|||Kara-Kalpak|karakalpak
271 | kab|||Kabyle|kabyle
272 | kac|||Kachin; Jingpho|kachin; jingpho
273 | kal||kl|Kalaallisut; Greenlandic|groenlandais
274 | kam|||Kamba|kamba
275 | kan||kn|Kannada|kannada
276 | kar|||Karen languages|karen, langues
277 | kas||ks|Kashmiri|kashmiri
278 | kau||kr|Kanuri|kanouri
279 | kaw|||Kawi|kawi
280 | kaz||kk|Kazakh|kazakh
281 | kbd|||Kabardian|kabardien
282 | kha|||Khasi|khasi
283 | khi|||Khoisan languages|khoïsan, langues
284 | khm||km|Central Khmer|khmer central
285 | kho|||Khotanese; Sakan|khotanais; sakan
286 | kik||ki|Kikuyu; Gikuyu|kikuyu
287 | kin||rw|Kinyarwanda|rwanda
288 | kir||ky|Kirghiz; Kyrgyz|kirghiz
289 | kmb|||Kimbundu|kimbundu
290 | kok|||Konkani|konkani
291 | kom||kv|Komi|kom
292 | kon||kg|Kongo|kongo
293 | kor||ko|Korean|coréen
294 | kos|||Kosraean|kosrae
295 | kpe|||Kpelle|kpellé
296 | krc|||Karachay-Balkar|karatchai balkar
297 | krl|||Karelian|carélien
298 | kro|||Kru languages|krou, langues
299 | kru|||Kurukh|kurukh
300 | kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama
301 | kum|||Kumyk|koumyk
302 | kur||ku|Kurdish|kurde
303 | kut|||Kutenai|kutenai
304 | lad|||Ladino|judéo-espagnol
305 | lah|||Lahnda|lahnda
306 | lam|||Lamba|lamba
307 | lao||lo|Lao|lao
308 | lat||la|Latin|latin
309 | lav||lv|Latvian|letton
310 | lez|||Lezghian|lezghien
311 | lim||li|Limburgan; Limburger; Limburgish|limbourgeois
312 | lin||ln|Lingala|lingala
313 | lit||lt|Lithuanian|lituanien
314 | lol|||Mongo|mongo
315 | loz|||Lozi|lozi
316 | ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois
317 | lua|||Luba-Lulua|luba-lulua
318 | lub||lu|Luba-Katanga|luba-katanga
319 | lug||lg|Ganda|ganda
320 | lui|||Luiseno|luiseno
321 | lun|||Lunda|lunda
322 | luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie)
323 | lus|||Lushai|lushai
324 | mac|mkd|mk|Macedonian|macédonien
325 | mad|||Madurese|madourais
326 | mag|||Magahi|magahi
327 | mah||mh|Marshallese|marshall
328 | mai|||Maithili|maithili
329 | mak|||Makasar|makassar
330 | mal||ml|Malayalam|malayalam
331 | man|||Mandingo|mandingue
332 | mao|mri|mi|Maori|maori
333 | map|||Austronesian languages|austronésiennes, langues
334 | mar||mr|Marathi|marathe
335 | mas|||Masai|massaï
336 | may|msa|ms|Malay|malais
337 | mdf|||Moksha|moksa
338 | mdr|||Mandar|mandar
339 | men|||Mende|mendé
340 | mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200)
341 | mic|||Mi'kmaq; Micmac|mi'kmaq; micmac
342 | min|||Minangkabau|minangkabau
343 | mis|||Uncoded languages|langues non codées
344 | mkh|||Mon-Khmer languages|môn-khmer, langues
345 | mlg||mg|Malagasy|malgache
346 | mlt||mt|Maltese|maltais
347 | mnc|||Manchu|mandchou
348 | mni|||Manipuri|manipuri
349 | mno|||Manobo languages|manobo, langues
350 | moh|||Mohawk|mohawk
351 | mon||mn|Mongolian|mongol
352 | mos|||Mossi|moré
353 | mul|||Multiple languages|multilingue
354 | mun|||Munda languages|mounda, langues
355 | mus|||Creek|muskogee
356 | mwl|||Mirandese|mirandais
357 | mwr|||Marwari|marvari
358 | myn|||Mayan languages|maya, langues
359 | myv|||Erzya|erza
360 | nah|||Nahuatl languages|nahuatl, langues
361 | nai|||North American Indian languages|nord-amérindiennes, langues
362 | nap|||Neapolitan|napolitain
363 | nau||na|Nauru|nauruan
364 | nav||nv|Navajo; Navaho|navaho
365 | nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud
366 | nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord
367 | ndo||ng|Ndonga|ndonga
368 | nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas
369 | nep||ne|Nepali|népalais
370 | new|||Nepal Bhasa; Newari|nepal bhasa; newari
371 | nia|||Nias|nias
372 | nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues
373 | niu|||Niuean|niué
374 | nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien
375 | nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål
376 | nog|||Nogai|nogaï; nogay
377 | non|||Norse, Old|norrois, vieux
378 | nor||no|Norwegian|norvégien
379 | nqo|||N'Ko|n'ko
380 | nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord
381 | nub|||Nubian languages|nubiennes, langues
382 | nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique
383 | nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja
384 | nym|||Nyamwezi|nyamwezi
385 | nyn|||Nyankole|nyankolé
386 | nyo|||Nyoro|nyoro
387 | nzi|||Nzima|nzema
388 | oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal
389 | oji||oj|Ojibwa|ojibwa
390 | ori||or|Oriya|oriya
391 | orm||om|Oromo|galla
392 | osa|||Osage|osage
393 | oss||os|Ossetian; Ossetic|ossète
394 | ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928)
395 | oto|||Otomian languages|otomi, langues
396 | paa|||Papuan languages|papoues, langues
397 | pag|||Pangasinan|pangasinan
398 | pal|||Pahlavi|pahlavi
399 | pam|||Pampanga; Kapampangan|pampangan
400 | pan||pa|Panjabi; Punjabi|pendjabi
401 | pap|||Papiamento|papiamento
402 | pau|||Palauan|palau
403 | peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.)
404 | per|fas|fa|Persian|persan
405 | phi|||Philippine languages|philippines, langues
406 | phn|||Phoenician|phénicien
407 | pli||pi|Pali|pali
408 | pol||pl|Polish|polonais
409 | pon|||Pohnpeian|pohnpei
410 | por||pt|Portuguese|portugais
411 | pra|||Prakrit languages|prâkrit, langues
412 | pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500)
413 | pus||ps|Pushto; Pashto|pachto
414 | qaa-qtz|||Reserved for local use|réservée à l'usage local
415 | que||qu|Quechua|quechua
416 | raj|||Rajasthani|rajasthani
417 | rap|||Rapanui|rapanui
418 | rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook
419 | roa|||Romance languages|romanes, langues
420 | roh||rm|Romansh|romanche
421 | rom|||Romany|tsigane
422 | rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave
423 | run||rn|Rundi|rundi
424 | rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain
425 | rus||ru|Russian|russe
426 | sad|||Sandawe|sandawe
427 | sag||sg|Sango|sango
428 | sah|||Yakut|iakoute
429 | sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues
430 | sal|||Salishan languages|salishennes, langues
431 | sam|||Samaritan Aramaic|samaritain
432 | san||sa|Sanskrit|sanskrit
433 | sas|||Sasak|sasak
434 | sat|||Santali|santal
435 | scn|||Sicilian|sicilien
436 | sco|||Scots|écossais
437 | sel|||Selkup|selkoupe
438 | sem|||Semitic languages|sémitiques, langues
439 | sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900)
440 | sgn|||Sign Languages|langues des signes
441 | shn|||Shan|chan
442 | sid|||Sidamo|sidamo
443 | sin||si|Sinhala; Sinhalese|singhalais
444 | sio|||Siouan languages|sioux, langues
445 | sit|||Sino-Tibetan languages|sino-tibétaines, langues
446 | sla|||Slavic languages|slaves, langues
447 | slo|slk|sk|Slovak|slovaque
448 | slv||sl|Slovenian|slovène
449 | sma|||Southern Sami|sami du Sud
450 | sme||se|Northern Sami|sami du Nord
451 | smi|||Sami languages|sames, langues
452 | smj|||Lule Sami|sami de Lule
453 | smn|||Inari Sami|sami d'Inari
454 | smo||sm|Samoan|samoan
455 | sms|||Skolt Sami|sami skolt
456 | sna||sn|Shona|shona
457 | snd||sd|Sindhi|sindhi
458 | snk|||Soninke|soninké
459 | sog|||Sogdian|sogdien
460 | som||so|Somali|somali
461 | son|||Songhai languages|songhai, langues
462 | sot||st|Sotho, Southern|sotho du Sud
463 | spa||es|Spanish; Castilian|espagnol; castillan
464 | srd||sc|Sardinian|sarde
465 | srn|||Sranan Tongo|sranan tongo
466 | srp||sr|Serbian|serbe
467 | srr|||Serer|sérère
468 | ssa|||Nilo-Saharan languages|nilo-sahariennes, langues
469 | ssw||ss|Swati|swati
470 | suk|||Sukuma|sukuma
471 | sun||su|Sundanese|soundanais
472 | sus|||Susu|soussou
473 | sux|||Sumerian|sumérien
474 | swa||sw|Swahili|swahili
475 | swe||sv|Swedish|suédois
476 | syc|||Classical Syriac|syriaque classique
477 | syr|||Syriac|syriaque
478 | tah||ty|Tahitian|tahitien
479 | tai|||Tai languages|tai, langues
480 | tam||ta|Tamil|tamoul
481 | tat||tt|Tatar|tatar
482 | tel||te|Telugu|télougou
483 | tem|||Timne|temne
484 | ter|||Tereno|tereno
485 | tet|||Tetum|tetum
486 | tgk||tg|Tajik|tadjik
487 | tgl||tl|Tagalog|tagalog
488 | tha||th|Thai|thaï
489 | tib|bod|bo|Tibetan|tibétain
490 | tig|||Tigre|tigré
491 | tir||ti|Tigrinya|tigrigna
492 | tiv|||Tiv|tiv
493 | tkl|||Tokelau|tokelau
494 | tlh|||Klingon; tlhIngan-Hol|klingon
495 | tli|||Tlingit|tlingit
496 | tmh|||Tamashek|tamacheq
497 | tog|||Tonga (Nyasa)|tonga (Nyasa)
498 | ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga)
499 | tpi|||Tok Pisin|tok pisin
500 | tsi|||Tsimshian|tsimshian
501 | tsn||tn|Tswana|tswana
502 | tso||ts|Tsonga|tsonga
503 | tuk||tk|Turkmen|turkmène
504 | tum|||Tumbuka|tumbuka
505 | tup|||Tupi languages|tupi, langues
506 | tur||tr|Turkish|turc
507 | tut|||Altaic languages|altaïques, langues
508 | tvl|||Tuvalu|tuvalu
509 | twi||tw|Twi|twi
510 | tyv|||Tuvinian|touva
511 | udm|||Udmurt|oudmourte
512 | uga|||Ugaritic|ougaritique
513 | uig||ug|Uighur; Uyghur|ouïgour
514 | ukr||uk|Ukrainian|ukrainien
515 | umb|||Umbundu|umbundu
516 | und|||Undetermined|indéterminée
517 | urd||ur|Urdu|ourdou
518 | uzb||uz|Uzbek|ouszbek
519 | vai|||Vai|vaï
520 | ven||ve|Venda|venda
521 | vie||vi|Vietnamese|vietnamien
522 | vol||vo|Volapük|volapük
523 | vot|||Votic|vote
524 | wak|||Wakashan languages|wakashanes, langues
525 | wal|||Walamo|walamo
526 | war|||Waray|waray
527 | was|||Washo|washo
528 | wel|cym|cy|Welsh|gallois
529 | wen|||Sorbian languages|sorabes, langues
530 | wln||wa|Walloon|wallon
531 | wol||wo|Wolof|wolof
532 | xal|||Kalmyk; Oirat|kalmouk; oïrat
533 | xho||xh|Xhosa|xhosa
534 | yao|||Yao|yao
535 | yap|||Yapese|yapois
536 | yid||yi|Yiddish|yiddish
537 | yor||yo|Yoruba|yoruba
538 | ypk|||Yupik languages|yupik, langues
539 | zap|||Zapotec|zapotèque
540 | zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss
541 | zen|||Zenaga|zenaga
542 | zha||za|Zhuang; Chuang|zhuang; chuang
543 | znd|||Zande languages|zandé, langues
544 | zul||zu|Zulu|zoulou
545 | zun|||Zuni|zuni
546 | zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable
547 | zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki
548 | 


--------------------------------------------------------------------------------
/spec/linguistics/en/articles_spec.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env rspec -cfd
  2 | 
  3 | BEGIN {
  4 | 	require 'pathname'
  5 | 	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
  6 | 
  7 | 	libdir = basedir + "lib"
  8 | 
  9 | 	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
 10 | 	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
 11 | }
 12 | 
 13 | require 'rspec'
 14 | require 'spec/lib/helpers'
 15 | 
 16 | require 'linguistics'
 17 | require 'linguistics/en'
 18 | require 'linguistics/en/articles'
 19 | 
 20 | 
 21 | describe Linguistics::EN::Articles do
 22 | 
 23 | 	before( :all ) do
 24 | 		setup_logging()
 25 | 		Linguistics.use( :en )
 26 | 	end
 27 | 
 28 | 	after( :all ) do
 29 | 		reset_logging()
 30 | 	end
 31 | 
 32 | 	it "adds EN::Articles to the list of English language modules" do
 33 | 		Linguistics::EN.should have_extension( :articles )
 34 | 	end
 35 | 
 36 | 
 37 | 	describe "in monkeypatch mode" do
 38 | 
 39 | 		let( :monkeypatched_class ) do
 40 | 			Class.new do
 41 | 				def to_s; "antelope"; end
 42 | 			end
 43 | 		end
 44 | 		let( :monkeypatched_object ) do
 45 | 			Linguistics.use( :en, classes: monkeypatched_class, monkeypatch: true )
 46 | 			monkeypatched_class.new
 47 | 		end
 48 | 
 49 | 
 50 | 		it "uses the stringified receiver as the object which should get the article" do
 51 | 			monkeypatched_object.a.should == 'an antelope'
 52 | 		end
 53 | 
 54 | 		it "uses correct pluralization to form the negative article" do
 55 | 			monkeypatched_object.no.should == 'no antelopes'
 56 | 		end
 57 | 
 58 | 	end
 59 | 
 60 | 
 61 | 	it "uses 'an' as the indefinite article for 'A.B.C'" do
 62 | 		"A.B.C".en.a.should == "an A.B.C"
 63 | 	end
 64 | 
 65 | 	it "uses 'an' as the indefinite article for 'AI'" do
 66 | 		"AI".en.a.should == "an AI"
 67 | 	end
 68 | 
 69 | 	it "uses 'an' as the indefinite article for 'AGE'" do
 70 | 		"AGE".en.a.should == "an AGE"
 71 | 	end
 72 | 
 73 | 	it "uses 'an' as the indefinite article for 'agendum'" do
 74 | 		"agendum".en.a.should == "an agendum"
 75 | 	end
 76 | 
 77 | 	it "uses 'an' as the indefinite article for 'aide-de-camp'" do
 78 | 		"aide-de-camp".en.a.should == "an aide-de-camp"
 79 | 	end
 80 | 
 81 | 	it "uses 'an' as the indefinite article for 'albino'" do
 82 | 		"albino".en.a.should == "an albino"
 83 | 	end
 84 | 
 85 | 	it "uses 'a' as the indefinite article for 'B.L.T. sandwich'" do
 86 | 		"B.L.T. sandwich".en.a.should == "a B.L.T. sandwich"
 87 | 	end
 88 | 
 89 | 	it "uses 'a' as the indefinite article for 'BMW'" do
 90 | 		"BMW".en.a.should == "a BMW"
 91 | 	end
 92 | 
 93 | 	it "uses 'a' as the indefinite article for 'BLANK'" do
 94 | 		"BLANK".en.a.should == "a BLANK"
 95 | 	end
 96 | 
 97 | 	it "uses 'a' as the indefinite article for 'bacterium'" do
 98 | 		"bacterium".en.a.should == "a bacterium"
 99 | 	end
100 | 
101 | 	it "uses 'a' as the indefinite article for 'Burmese restaurant'" do
102 | 		"Burmese restaurant".en.a.should == "a Burmese restaurant"
103 | 	end
104 | 
105 | 	it "uses 'a' as the indefinite article for 'C.O.'" do
106 | 		"C.O.".en.a.should == "a C.O."
107 | 	end
108 | 
109 | 	it "uses 'a' as the indefinite article for 'CCD'" do
110 | 		"CCD".en.a.should == "a CCD"
111 | 	end
112 | 
113 | 	it "uses 'a' as the indefinite article for 'COLON'" do
114 | 		"COLON".en.a.should == "a COLON"
115 | 	end
116 | 
117 | 	it "uses 'a' as the indefinite article for 'cameo'" do
118 | 		"cameo".en.a.should == "a cameo"
119 | 	end
120 | 
121 | 	it "uses 'a' as the indefinite article for 'CAPITAL'" do
122 | 		"CAPITAL".en.a.should == "a CAPITAL"
123 | 	end
124 | 
125 | 	it "uses 'a' as the indefinite article for 'D.S.M.'" do
126 | 		"D.S.M.".en.a.should == "a D.S.M."
127 | 	end
128 | 
129 | 	it "uses 'a' as the indefinite article for 'DNR'" do
130 | 		"DNR".en.a.should == "a DNR"
131 | 	end
132 | 
133 | 	it "uses 'a' as the indefinite article for 'DINNER'" do
134 | 		"DINNER".en.a.should == "a DINNER"
135 | 	end
136 | 
137 | 	it "uses 'a' as the indefinite article for 'dynamo'" do
138 | 		"dynamo".en.a.should == "a dynamo"
139 | 	end
140 | 
141 | 	it "uses 'an' as the indefinite article for 'E.K.G.'" do
142 | 		"E.K.G.".en.a.should == "an E.K.G."
143 | 	end
144 | 
145 | 	it "uses 'an' as the indefinite article for 'ECG'" do
146 | 		"ECG".en.a.should == "an ECG"
147 | 	end
148 | 
149 | 	it "uses 'an' as the indefinite article for 'EGG'" do
150 | 		"EGG".en.a.should == "an EGG"
151 | 	end
152 | 
153 | 	it "uses 'an' as the indefinite article for 'embryo'" do
154 | 		"embryo".en.a.should == "an embryo"
155 | 	end
156 | 
157 | 	it "uses 'an' as the indefinite article for 'erratum'" do
158 | 		"erratum".en.a.should == "an erratum"
159 | 	end
160 | 
161 | 	it "uses 'a' as the indefinite article for 'eucalyptus'" do
162 | 		"eucalyptus".en.a.should == "a eucalyptus"
163 | 	end
164 | 
165 | 	it "uses 'an' as the indefinite article for 'Euler number'" do
166 | 		"Euler number".en.a.should == "an Euler number"
167 | 	end
168 | 
169 | 	it "uses 'a' as the indefinite article for 'eulogy'" do
170 | 		"eulogy".en.a.should == "a eulogy"
171 | 	end
172 | 
173 | 	it "uses 'a' as the indefinite article for 'euphemism'" do
174 | 		"euphemism".en.a.should == "a euphemism"
175 | 	end
176 | 
177 | 	it "uses 'a' as the indefinite article for 'euphoria'" do
178 | 		"euphoria".en.a.should == "a euphoria"
179 | 	end
180 | 
181 | 	it "uses 'a' as the indefinite article for 'ewe'" do
182 | 		"ewe".en.a.should == "a ewe"
183 | 	end
184 | 
185 | 	it "uses 'a' as the indefinite article for 'ewer'" do
186 | 		"ewer".en.a.should == "a ewer"
187 | 	end
188 | 
189 | 	it "uses 'an' as the indefinite article for 'extremum'" do
190 | 		"extremum".en.a.should == "an extremum"
191 | 	end
192 | 
193 | 	it "uses 'an' as the indefinite article for 'eye'" do
194 | 		"eye".en.a.should == "an eye"
195 | 	end
196 | 
197 | 	it "uses 'an' as the indefinite article for 'F.B.I. agent'" do
198 | 		"F.B.I. agent".en.a.should == "an F.B.I. agent"
199 | 	end
200 | 
201 | 	it "uses 'an' as the indefinite article for 'FSM'" do
202 | 		"FSM".en.a.should == "an FSM"
203 | 	end
204 | 
205 | 	it "uses 'a' as the indefinite article for 'FACT'" do
206 | 		"FACT".en.a.should == "a FACT"
207 | 	end
208 | 
209 | 	it "uses 'a' as the indefinite article for 'FAQ'" do
210 | 		"FAQ".en.a.should == "a FAQ"
211 | 	end
212 | 
213 | 	it "uses 'an' as the indefinite article for 'F.A.Q.'" do
214 | 		"F.A.Q.".en.a.should == "an F.A.Q."
215 | 	end
216 | 
217 | 	it "uses 'a' as the indefinite article for 'fish'" do
218 | 		"fish".en.a.should == "a fish"
219 | 	end
220 | 
221 | 	it "uses 'a' as the indefinite article for 'G-string'" do
222 | 		"G-string".en.a.should == "a G-string"
223 | 	end
224 | 
225 | 	it "uses 'a' as the indefinite article for 'GSM phone'" do
226 | 		"GSM phone".en.a.should == "a GSM phone"
227 | 	end
228 | 
229 | 	it "uses 'a' as the indefinite article for 'GOD'" do
230 | 		"GOD".en.a.should == "a GOD"
231 | 	end
232 | 
233 | 	it "uses 'a' as the indefinite article for 'genus'" do
234 | 		"genus".en.a.should == "a genus"
235 | 	end
236 | 
237 | 	it "uses 'a' as the indefinite article for 'Governor General'" do
238 | 		"Governor General".en.a.should == "a Governor General"
239 | 	end
240 | 
241 | 	it "uses 'an' as the indefinite article for 'H-Bomb'" do
242 | 		"H-Bomb".en.a.should == "an H-Bomb"
243 | 	end
244 | 
245 | 	it "uses 'an' as the indefinite article for 'H.M.S Ark Royal'" do
246 | 		"H.M.S Ark Royal".en.a.should == "an H.M.S Ark Royal"
247 | 	end
248 | 
249 | 	it "uses 'an' as the indefinite article for 'HSL colour space'" do
250 | 		"HSL colour space".en.a.should == "an HSL colour space"
251 | 	end
252 | 
253 | 	it "uses 'a' as the indefinite article for 'HAL 9000'" do
254 | 		"HAL 9000".en.a.should == "a HAL 9000"
255 | 	end
256 | 
257 | 	it "uses 'an' as the indefinite article for 'H.A.L. 9000'" do
258 | 		"H.A.L. 9000".en.a.should == "an H.A.L. 9000"
259 | 	end
260 | 
261 | 	it "uses 'a' as the indefinite article for 'has-been'" do
262 | 		"has-been".en.a.should == "a has-been"
263 | 	end
264 | 
265 | 	it "uses 'a' as the indefinite article for 'height'" do
266 | 		"height".en.a.should == "a height"
267 | 	end
268 | 
269 | 	it "uses 'an' as the indefinite article for 'heir'" do
270 | 		"heir".en.a.should == "an heir"
271 | 	end
272 | 
273 | 	it "uses 'a' as the indefinite article for 'honed blade'" do
274 | 		"honed blade".en.a.should == "a honed blade"
275 | 	end
276 | 
277 | 	it "uses 'an' as the indefinite article for 'honest man'" do
278 | 		"honest man".en.a.should == "an honest man"
279 | 	end
280 | 
281 | 	it "uses 'a' as the indefinite article for 'honeymoon'" do
282 | 		"honeymoon".en.a.should == "a honeymoon"
283 | 	end
284 | 
285 | 	it "uses 'an' as the indefinite article for 'honorarium'" do
286 | 		"honorarium".en.a.should == "an honorarium"
287 | 	end
288 | 
289 | 	it "uses 'an' as the indefinite article for 'honorary degree'" do
290 | 		"honorary degree".en.a.should == "an honorary degree"
291 | 	end
292 | 
293 | 	it "uses 'an' as the indefinite article for 'honoree'" do
294 | 		"honoree".en.a.should == "an honoree"
295 | 	end
296 | 
297 | 	it "uses 'an' as the indefinite article for 'honorific'" do
298 | 		"honorific".en.a.should == "an honorific"
299 | 	end
300 | 
301 | 	it "uses 'a' as the indefinite article for 'Hough transform'" do
302 | 		"Hough transform".en.a.should == "a Hough transform"
303 | 	end
304 | 
305 | 	it "uses 'a' as the indefinite article for 'hound'" do
306 | 		"hound".en.a.should == "a hound"
307 | 	end
308 | 
309 | 	it "uses 'an' as the indefinite article for 'hour'" do
310 | 		"hour".en.a.should == "an hour"
311 | 	end
312 | 
313 | 	it "uses 'an' as the indefinite article for 'hourglass'" do
314 | 		"hourglass".en.a.should == "an hourglass"
315 | 	end
316 | 
317 | 	it "uses 'a' as the indefinite article for 'houri'" do
318 | 		"houri".en.a.should == "a houri"
319 | 	end
320 | 
321 | 	it "uses 'a' as the indefinite article for 'house'" do
322 | 		"house".en.a.should == "a house"
323 | 	end
324 | 
325 | 	it "uses 'an' as the indefinite article for 'I.O.U.'" do
326 | 		"I.O.U.".en.a.should == "an I.O.U."
327 | 	end
328 | 
329 | 	it "uses 'an' as the indefinite article for 'IQ'" do
330 | 		"IQ".en.a.should == "an IQ"
331 | 	end
332 | 
333 | 	it "uses 'an' as the indefinite article for 'IDEA'" do
334 | 		"IDEA".en.a.should == "an IDEA"
335 | 	end
336 | 
337 | 	it "uses 'an' as the indefinite article for 'inferno'" do
338 | 		"inferno".en.a.should == "an inferno"
339 | 	end
340 | 
341 | 	it "uses 'an' as the indefinite article for 'Inspector General'" do
342 | 		"Inspector General".en.a.should == "an Inspector General"
343 | 	end
344 | 
345 | 	it "uses 'a' as the indefinite article for 'jumbo'" do
346 | 		"jumbo".en.a.should == "a jumbo"
347 | 	end
348 | 
349 | 	it "uses 'a' as the indefinite article for 'knife'" do
350 | 		"knife".en.a.should == "a knife"
351 | 	end
352 | 
353 | 	it "uses 'an' as the indefinite article for 'L.E.D.'" do
354 | 		"L.E.D.".en.a.should == "an L.E.D."
355 | 	end
356 | 
357 | 	it "uses 'a' as the indefinite article for 'LED'" do
358 | 		"LED".en.a.should == "a LED"
359 | 	end
360 | 
361 | 	it "uses 'an' as the indefinite article for 'LCD'" do
362 | 		"LCD".en.a.should == "an LCD"
363 | 	end
364 | 
365 | 	it "uses 'a' as the indefinite article for 'lady in waiting'" do
366 | 		"lady in waiting".en.a.should == "a lady in waiting"
367 | 	end
368 | 
369 | 	it "uses 'a' as the indefinite article for 'leaf'" do
370 | 		"leaf".en.a.should == "a leaf"
371 | 	end
372 | 
373 | 	it "uses 'an' as the indefinite article for 'M.I.A.'" do
374 | 		"M.I.A.".en.a.should == "an M.I.A."
375 | 	end
376 | 
377 | 	it "uses 'a' as the indefinite article for 'MIASMA'" do
378 | 		"MIASMA".en.a.should == "a MIASMA"
379 | 	end
380 | 
381 | 	it "uses 'an' as the indefinite article for 'MTV channel'" do
382 | 		"MTV channel".en.a.should == "an MTV channel"
383 | 	end
384 | 
385 | 	it "uses 'a' as the indefinite article for 'Major General'" do
386 | 		"Major General".en.a.should == "a Major General"
387 | 	end
388 | 
389 | 	it "uses 'an' as the indefinite article for 'N.C.O.'" do
390 | 		"N.C.O.".en.a.should == "an N.C.O."
391 | 	end
392 | 
393 | 	it "uses 'an' as the indefinite article for 'NCO'" do
394 | 		"NCO".en.a.should == "an NCO"
395 | 	end
396 | 
397 | 	it "uses 'a' as the indefinite article for 'NATO country'" do
398 | 		"NATO country".en.a.should == "a NATO country"
399 | 	end
400 | 
401 | 	it "uses 'a' as the indefinite article for 'note'" do
402 | 		"note".en.a.should == "a note"
403 | 	end
404 | 
405 | 	it "uses 'an' as the indefinite article for 'O.K.'" do
406 | 		"O.K.".en.a.should == "an O.K."
407 | 	end
408 | 
409 | 	it "uses 'an' as the indefinite article for 'OK'" do
410 | 		"OK".en.a.should == "an OK"
411 | 	end
412 | 
413 | 	it "uses 'an' as the indefinite article for 'OLE'" do
414 | 		"OLE".en.a.should == "an OLE"
415 | 	end
416 | 
417 | 	it "uses 'an' as the indefinite article for 'octavo'" do
418 | 		"octavo".en.a.should == "an octavo"
419 | 	end
420 | 
421 | 	it "uses 'an' as the indefinite article for 'octopus'" do
422 | 		"octopus".en.a.should == "an octopus"
423 | 	end
424 | 
425 | 	it "uses 'an' as the indefinite article for 'okay'" do
426 | 		"okay".en.a.should == "an okay"
427 | 	end
428 | 
429 | 	it "uses 'a' as the indefinite article for 'once-and-future-king'" do
430 | 		"once-and-future-king".en.a.should == "a once-and-future-king"
431 | 	end
432 | 
433 | 	it "uses 'an' as the indefinite article for 'oncologist'" do
434 | 		"oncologist".en.a.should == "an oncologist"
435 | 	end
436 | 
437 | 	it "uses 'a' as the indefinite article for 'one night stand'" do
438 | 		"one night stand".en.a.should == "a one night stand"
439 | 	end
440 | 
441 | 	it "uses 'an' as the indefinite article for 'onerous task'" do
442 | 		"onerous task".en.a.should == "an onerous task"
443 | 	end
444 | 
445 | 	it "uses 'an' as the indefinite article for 'opera'" do
446 | 		"opera".en.a.should == "an opera"
447 | 	end
448 | 
449 | 	it "uses 'an' as the indefinite article for 'optimum'" do
450 | 		"optimum".en.a.should == "an optimum"
451 | 	end
452 | 
453 | 	it "uses 'an' as the indefinite article for 'opus'" do
454 | 		"opus".en.a.should == "an opus"
455 | 	end
456 | 
457 | 	it "uses 'an' as the indefinite article for 'ox'" do
458 | 		"ox".en.a.should == "an ox"
459 | 	end
460 | 
461 | 	it "uses 'a' as the indefinite article for 'Ph.D.'" do
462 | 		"Ph.D.".en.a.should == "a Ph.D."
463 | 	end
464 | 
465 | 	it "uses 'a' as the indefinite article for 'PET'" do
466 | 		"PET".en.a.should == "a PET"
467 | 	end
468 | 
469 | 	it "uses 'a' as the indefinite article for 'P.E.T. scan'" do
470 | 		"P.E.T. scan".en.a.should == "a P.E.T. scan"
471 | 	end
472 | 
473 | 	it "uses 'a' as the indefinite article for 'plateau'" do
474 | 		"plateau".en.a.should == "a plateau"
475 | 	end
476 | 
477 | 	it "uses 'a' as the indefinite article for 'quantum'" do
478 | 		"quantum".en.a.should == "a quantum"
479 | 	end
480 | 
481 | 	it "uses 'an' as the indefinite article for 'R.S.V.P.'" do
482 | 		"R.S.V.P.".en.a.should == "an R.S.V.P."
483 | 	end
484 | 
485 | 	it "uses 'an' as the indefinite article for 'RSVP'" do
486 | 		"RSVP".en.a.should == "an RSVP"
487 | 	end
488 | 
489 | 	it "uses 'a' as the indefinite article for 'REST'" do
490 | 		"REST".en.a.should == "a REST"
491 | 	end
492 | 
493 | 	it "uses 'a' as the indefinite article for 'reindeer'" do
494 | 		"reindeer".en.a.should == "a reindeer"
495 | 	end
496 | 
497 | 	it "uses 'an' as the indefinite article for 'S.O.S.'" do
498 | 		"S.O.S.".en.a.should == "an S.O.S."
499 | 	end
500 | 
501 | 	it "uses 'a' as the indefinite article for 'SUM'" do
502 | 		"SUM".en.a.should == "a SUM"
503 | 	end
504 | 
505 | 	it "uses 'an' as the indefinite article for 'SST'" do
506 | 		"SST".en.a.should == "an SST"
507 | 	end
508 | 
509 | 	it "uses 'a' as the indefinite article for 'salmon'" do
510 | 		"salmon".en.a.should == "a salmon"
511 | 	end
512 | 
513 | 	it "uses 'a' as the indefinite article for 'T.N.T. bomb'" do
514 | 		"T.N.T. bomb".en.a.should == "a T.N.T. bomb"
515 | 	end
516 | 
517 | 	it "uses 'a' as the indefinite article for 'TNT bomb'" do
518 | 		"TNT bomb".en.a.should == "a TNT bomb"
519 | 	end
520 | 
521 | 	it "uses 'a' as the indefinite article for 'TENT'" do
522 | 		"TENT".en.a.should == "a TENT"
523 | 	end
524 | 
525 | 	it "uses 'a' as the indefinite article for 'thought'" do
526 | 		"thought".en.a.should == "a thought"
527 | 	end
528 | 
529 | 	it "uses 'a' as the indefinite article for 'tomato'" do
530 | 		"tomato".en.a.should == "a tomato"
531 | 	end
532 | 
533 | 	it "uses 'a' as the indefinite article for 'U-boat'" do
534 | 		"U-boat".en.a.should == "a U-boat"
535 | 	end
536 | 
537 | 	it "uses 'a' as the indefinite article for 'U.F.O.'" do
538 | 		"U.F.O.".en.a.should == "a U.F.O."
539 | 	end
540 | 
541 | 	it "uses 'a' as the indefinite article for 'UFO'" do
542 | 		"UFO".en.a.should == "a UFO"
543 | 	end
544 | 
545 | 	it "uses 'a' as the indefinite article for 'ubiquity'" do
546 | 		"ubiquity".en.a.should == "a ubiquity"
547 | 	end
548 | 
549 | 	it "uses 'a' as the indefinite article for 'unicorn'" do
550 | 		"unicorn".en.a.should == "a unicorn"
551 | 	end
552 | 
553 | 	it "uses 'an' as the indefinite article for 'unidentified flying object'" do
554 | 		"unidentified flying object".en.a.should == "an unidentified flying object"
555 | 	end
556 | 
557 | 	it "uses 'a' as the indefinite article for 'uniform'" do
558 | 		"uniform".en.a.should == "a uniform"
559 | 	end
560 | 
561 | 	it "uses 'a' as the indefinite article for 'unimodal system'" do
562 | 		"unimodal system".en.a.should == "a unimodal system"
563 | 	end
564 | 
565 | 	it "uses 'an' as the indefinite article for 'unimpressive record'" do
566 | 		"unimpressive record".en.a.should == "an unimpressive record"
567 | 	end
568 | 
569 | 	it "uses 'an' as the indefinite article for 'uninformed opinion'" do
570 | 		"uninformed opinion".en.a.should == "an uninformed opinion"
571 | 	end
572 | 
573 | 	it "uses 'an' as the indefinite article for 'uninvited guest'" do
574 | 		"uninvited guest".en.a.should == "an uninvited guest"
575 | 	end
576 | 
577 | 	it "uses 'a' as the indefinite article for 'union'" do
578 | 		"union".en.a.should == "a union"
579 | 	end
580 | 
581 | 	it "uses 'a' as the indefinite article for 'uniplex'" do
582 | 		"uniplex".en.a.should == "a uniplex"
583 | 	end
584 | 
585 | 	it "uses 'a' as the indefinite article for 'uniprocessor'" do
586 | 		"uniprocessor".en.a.should == "a uniprocessor"
587 | 	end
588 | 
589 | 	it "uses 'a' as the indefinite article for 'unique opportunity'" do
590 | 		"unique opportunity".en.a.should == "a unique opportunity"
591 | 	end
592 | 
593 | 	it "uses 'a' as the indefinite article for 'unisex hairdresser'" do
594 | 		"unisex hairdresser".en.a.should == "a unisex hairdresser"
595 | 	end
596 | 
597 | 	it "uses 'a' as the indefinite article for 'unison'" do
598 | 		"unison".en.a.should == "a unison"
599 | 	end
600 | 
601 | 	it "uses 'a' as the indefinite article for 'unit'" do
602 | 		"unit".en.a.should == "a unit"
603 | 	end
604 | 
605 | 	it "uses 'a' as the indefinite article for 'unitarian'" do
606 | 		"unitarian".en.a.should == "a unitarian"
607 | 	end
608 | 
609 | 	it "uses 'a' as the indefinite article for 'united front'" do
610 | 		"united front".en.a.should == "a united front"
611 | 	end
612 | 
613 | 	it "uses 'a' as the indefinite article for 'unity'" do
614 | 		"unity".en.a.should == "a unity"
615 | 	end
616 | 
617 | 	it "uses 'a' as the indefinite article for 'univalent bond'" do
618 | 		"univalent bond".en.a.should == "a univalent bond"
619 | 	end
620 | 
621 | 	it "uses 'a' as the indefinite article for 'univariate statistic'" do
622 | 		"univariate statistic".en.a.should == "a univariate statistic"
623 | 	end
624 | 
625 | 	it "uses 'a' as the indefinite article for 'universe'" do
626 | 		"universe".en.a.should == "a universe"
627 | 	end
628 | 
629 | 	it "uses 'an' as the indefinite article for 'unordered meal'" do
630 | 		"unordered meal".en.a.should == "an unordered meal"
631 | 	end
632 | 
633 | 	it "uses 'a' as the indefinite article for 'uranium atom'" do
634 | 		"uranium atom".en.a.should == "a uranium atom"
635 | 	end
636 | 
637 | 	it "uses 'an' as the indefinite article for 'urban myth'" do
638 | 		"urban myth".en.a.should == "an urban myth"
639 | 	end
640 | 
641 | 	it "uses 'an' as the indefinite article for 'urbane miss'" do
642 | 		"urbane miss".en.a.should == "an urbane miss"
643 | 	end
644 | 
645 | 	it "uses 'an' as the indefinite article for 'urchin'" do
646 | 		"urchin".en.a.should == "an urchin"
647 | 	end
648 | 
649 | 	it "uses 'a' as the indefinite article for 'urea detector'" do
650 | 		"urea detector".en.a.should == "a urea detector"
651 | 	end
652 | 
653 | 	it "uses 'a' as the indefinite article for 'urethane monomer'" do
654 | 		"urethane monomer".en.a.should == "a urethane monomer"
655 | 	end
656 | 
657 | 	it "uses 'an' as the indefinite article for 'urge'" do
658 | 		"urge".en.a.should == "an urge"
659 | 	end
660 | 
661 | 	it "uses 'an' as the indefinite article for 'urgency'" do
662 | 		"urgency".en.a.should == "an urgency"
663 | 	end
664 | 
665 | 	it "uses 'a' as the indefinite article for 'urinal'" do
666 | 		"urinal".en.a.should == "a urinal"
667 | 	end
668 | 
669 | 	it "uses 'an' as the indefinite article for 'urn'" do
670 | 		"urn".en.a.should == "an urn"
671 | 	end
672 | 
673 | 	it "uses 'a' as the indefinite article for 'usage'" do
674 | 		"usage".en.a.should == "a usage"
675 | 	end
676 | 
677 | 	it "uses 'a' as the indefinite article for 'use'" do
678 | 		"use".en.a.should == "a use"
679 | 	end
680 | 
681 | 	it "uses 'an' as the indefinite article for 'usher'" do
682 | 		"usher".en.a.should == "an usher"
683 | 	end
684 | 
685 | 	it "uses 'a' as the indefinite article for 'usual suspect'" do
686 | 		"usual suspect".en.a.should == "a usual suspect"
687 | 	end
688 | 
689 | 	it "uses 'a' as the indefinite article for 'usurer'" do
690 | 		"usurer".en.a.should == "a usurer"
691 | 	end
692 | 
693 | 	it "uses 'a' as the indefinite article for 'usurper'" do
694 | 		"usurper".en.a.should == "a usurper"
695 | 	end
696 | 
697 | 	it "uses 'a' as the indefinite article for 'utensil'" do
698 | 		"utensil".en.a.should == "a utensil"
699 | 	end
700 | 
701 | 	it "uses 'a' as the indefinite article for 'utility'" do
702 | 		"utility".en.a.should == "a utility"
703 | 	end
704 | 
705 | 	it "uses 'an' as the indefinite article for 'utmost urgency'" do
706 | 		"utmost urgency".en.a.should == "an utmost urgency"
707 | 	end
708 | 
709 | 	it "uses 'a' as the indefinite article for 'utopia'" do
710 | 		"utopia".en.a.should == "a utopia"
711 | 	end
712 | 
713 | 	it "uses 'an' as the indefinite article for 'utterance'" do
714 | 		"utterance".en.a.should == "an utterance"
715 | 	end
716 | 
717 | 	it "uses 'a' as the indefinite article for 'V.I.P.'" do
718 | 		"V.I.P.".en.a.should == "a V.I.P."
719 | 	end
720 | 
721 | 	it "uses 'a' as the indefinite article for 'VIPER'" do
722 | 		"VIPER".en.a.should == "a VIPER"
723 | 	end
724 | 
725 | 	it "uses 'a' as the indefinite article for 'viper'" do
726 | 		"viper".en.a.should == "a viper"
727 | 	end
728 | 
729 | 	it "uses 'an' as the indefinite article for 'X-ray'" do
730 | 		"X-ray".en.a.should == "an X-ray"
731 | 	end
732 | 
733 | 	it "uses 'an' as the indefinite article for 'X.O.'" do
734 | 		"X.O.".en.a.should == "an X.O."
735 | 	end
736 | 
737 | 	it "uses 'a' as the indefinite article for 'XYLAPHONE'" do
738 | 		"XYLAPHONE".en.a.should == "a XYLAPHONE"
739 | 	end
740 | 
741 | 	it "uses 'an' as the indefinite article for 'XY chromosome'" do
742 | 		"XY chromosome".en.a.should == "an XY chromosome"
743 | 	end
744 | 
745 | 	it "uses 'a' as the indefinite article for 'xenophobe'" do
746 | 		"xenophobe".en.a.should == "a xenophobe"
747 | 	end
748 | 
749 | 	it "uses 'a' as the indefinite article for 'Y-shaped pipe'" do
750 | 		"Y-shaped pipe".en.a.should == "a Y-shaped pipe"
751 | 	end
752 | 
753 | 	it "uses 'a' as the indefinite article for 'Y.Z. plane'" do
754 | 		"Y.Z. plane".en.a.should == "a Y.Z. plane"
755 | 	end
756 | 
757 | 	it "uses 'a' as the indefinite article for 'YMCA'" do
758 | 		"YMCA".en.a.should == "a YMCA"
759 | 	end
760 | 
761 | 	it "uses 'an' as the indefinite article for 'YBLENT eye'" do
762 | 		"YBLENT eye".en.a.should == "an YBLENT eye"
763 | 	end
764 | 
765 | 	it "uses 'an' as the indefinite article for 'yblent eye'" do
766 | 		"yblent eye".en.a.should == "an yblent eye"
767 | 	end
768 | 
769 | 	it "uses 'an' as the indefinite article for 'yclad body'" do
770 | 		"yclad body".en.a.should == "an yclad body"
771 | 	end
772 | 
773 | 	it "uses 'a' as the indefinite article for 'yellowing'" do
774 | 		"yellowing".en.a.should == "a yellowing"
775 | 	end
776 | 
777 | 	it "uses 'a' as the indefinite article for 'yield'" do
778 | 		"yield".en.a.should == "a yield"
779 | 	end
780 | 
781 | 	it "uses 'a' as the indefinite article for 'youth'" do
782 | 		"youth".en.a.should == "a youth"
783 | 	end
784 | 
785 | 	it "uses 'a' as the indefinite article for 'youth'" do
786 | 		"youth".en.a.should == "a youth"
787 | 	end
788 | 
789 | 	it "uses 'an' as the indefinite article for 'ypsiliform junction'" do
790 | 		"ypsiliform junction".en.a.should == "an ypsiliform junction"
791 | 	end
792 | 
793 | 	it "uses 'an' as the indefinite article for 'yttrium atom'" do
794 | 		"yttrium atom".en.a.should == "an yttrium atom"
795 | 	end
796 | 
797 | 	it "uses 'a' as the indefinite article for 'zoo'" do
798 | 		"zoo".en.a.should == "a zoo"
799 | 	end
800 | 
801 | 
802 | 	it "uses correct pluralization to form the negative article" do
803 | 		"mouse".en.no.should == "no mice"
804 | 	end
805 | 
806 | 	it "uses currect pluralization for noun phrases to form the negative article" do
807 | 		"univariate statistic".en.no.should == "no univariate statistics"
808 | 	end
809 | 
810 | 	it "uses the correct pluralization for 'Secretary of State' to form the negative article" do
811 | 		"Secretary of State".en.no.should == "no Secretaries of State"
812 | 	end
813 | 
814 | 
815 | 	context "lprintf formatters" do
816 | 
817 | 		it "registers the :A lprintf formatter" do
818 | 			Linguistics::EN.lprintf_formatters.should include( :A )
819 | 		end
820 | 
821 | 		it "registers the :AN lprintf formatter" do
822 | 			Linguistics::EN.lprintf_formatters.should include( :AN )
823 | 		end
824 | 
825 | 		it "adds an indefinite article to the argument to %A" do
826 | 			"You pick up %A.".en.lprintf( "umbrella" ).
827 | 				should == "You pick up an umbrella."
828 | 		end
829 | 
830 | 		it "adds an indefinite article to the argument to %AN" do
831 | 			"You pick up %AN.".en.lprintf( "chocolate bar" ).
832 | 				should == "You pick up a chocolate bar."
833 | 		end
834 | 
835 | 	end
836 | 
837 | end
838 | 
839 | 


--------------------------------------------------------------------------------