├── .dialyzer_ignore_warnings
├── .formatter.exs
├── .gitignore
├── CHANGELOG.md
├── LICENSE.md
├── README.md
├── data
    └── inflector
    │   └── en
    │       ├── additions
    │           ├── category_a_as_ae.txt
    │           ├── category_a_as_ata.txt
    │           ├── category_any_i.txt
    │           ├── category_any_im.txt
    │           ├── category_en_ens_ina.txt
    │           ├── category_ex_exes_ices.txt
    │           ├── category_ex_ices.txt
    │           ├── category_general_generals.txt
    │           ├── category_is_ises_ides.txt
    │           ├── category_o_os.txt
    │           ├── category_o_os_i.txt
    │           ├── category_on_a.txt
    │           ├── category_um_a.txt
    │           ├── category_um_ums_a.txt
    │           ├── category_us_uses_i.txt
    │           ├── category_us_uses_us.txt
    │           ├── irregular_noun.txt
    │           ├── singular_s.txt
    │           └── uninflected_noun.txt
    │       └── en.html
├── lib
    ├── classifier.ex
    ├── corpus.ex
    ├── inflect
    │   └── en.ex
    ├── language.ex
    ├── language
    │   └── classifier
    │   │   ├── cummulative_frequency.ex
    │   │   ├── naive_bayesian.ex
    │   │   └── rank_order.ex
    ├── ngram.ex
    ├── text.ex
    ├── vocabulary.ex
    └── word_count.ex
├── logo.png
├── mix.exs
├── mix.lock
├── mix
    ├── english_infector_data.ex
    └── tasks
    │   └── create_inflector.ex
├── priv
    └── inflection
    │   └── en
    │       └── en.etf
└── test
    ├── irregular_noun_test.exs
    ├── support
        ├── irregular_plurals.csv
        ├── plural_nouns.csv
        └── plurals_helper.ex
    ├── test_helper.exs
    └── text_test.exs


/.dialyzer_ignore_warnings:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kipcole9/text/a93981626c9deb2cdc2bb4bb514b883aa17c792e/.dialyzer_ignore_warnings


--------------------------------------------------------------------------------
/.formatter.exs:
--------------------------------------------------------------------------------
1 | [
2 |   inputs: ["mix.exs", "{config,lib,test,mix}/**/*.{ex,exs}"],
3 |   locals_without_parens: [docp: 1, defparsec: 2, defparsec: 3]
4 | ]
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /_build
 2 | /cover
 3 | /deps
 4 | /doc
 5 | /references
 6 | *.snapshot
 7 | erl_crash.dump
 8 | *.ez
 9 | *.tar
10 | .DS_Store
11 | .iex.exs
12 | /.DS_Store
13 | 
14 | # Generated erlang source
15 | /src/*.erl
16 | 
17 | # asdf
18 | .tool-versions
19 | 
20 | # Excel temp file
21 | corpus/analysis/~$Accuracy Analysis.xlsx
22 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog for Text v0.2.0
 2 | 
 3 | This is the changelog for Text v0.2.0 released on June 28th, 2020.  For older changelogs please consult the release tag on [GitHub](https://github.com/kipcole9/text/tags)
 4 | 
 5 | ### Enhancements
 6 | 
 7 | * Adds pluralization for english words
 8 | 
 9 | * Adds language detection classifiers (corpus' are defined in separate libraries, for example [text_corpus_udhr](https://hex.pm/packages/text_corpus_udhr))
10 | 
11 | * Refactor word counting
12 | 
13 | # Changelog for Text v0.1.0
14 | 
15 | This is the changelog for Text v0.1.0 released on August 26th, 2019.  For older changelogs please consult the release tag on [GitHub](https://github.com/kipcole9/text/tags)
16 | 
17 | ### Enhancements
18 | 
19 | * Initial version implementing `ngram`s.
20 | 
21 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ## License
 2 | 
 3 | Copyright 2020 Kip Cole
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
 6 | compliance with the License. You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software distributed under the License
11 | is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 | implied. See the License for the specific language governing permissions and limitations under the
13 | License.
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Text
 2 | 
 3 | Text & language processing for Elixir.  Initial release focuses on:
 4 | 
 5 | * [x] n-gram generation from text
 6 | * [x] pluralization of english words
 7 | * [x] word counting (word freqencies)
 8 | * [x] language detection using pluggable classifier, vocabulary and corpus backends.
 9 | 
10 | Second phase will focus on:
11 | 
12 | * Stemming
13 | * tokenization and part-of-speech tagging (at least for english)
14 | * Sentiment analysis
15 | 
16 | Each of these phases requires prior development. See [below](#down_the_rabbit_hole).
17 | 
18 | ## Status Update Sept 2021
19 | 
20 | The `Text` project remains active and maintained. However with the advent of the amazing [Numerical Elixir (Nx)](https://github.com/elixir-nx) project, many improved opportunities to leverage ML for text analysis open up and this is the planned path.  I expect to focus using ML for the additional planned functionality as a calendar year 2022 project.  Bug reports, PR and suggests are welcome!
21 | 
22 | ## Installation
23 | 
24 | ```elixir
25 | def deps do
26 |   [
27 |     {:text, "~> 0.2.0"}
28 |   ]
29 | end
30 | ```
31 | 
32 | ## Word Counting
33 | 
34 | `text` contains an implementation of word counting that is oriented towards large streams of words rather than discrete strings. Input to `Text.Word.word_count/2` can be a `String.t`, `File.Stream.t` or `Flow.t` allowing flexible streaming of text.
35 | 
36 | ## English Pluralization
37 | 
38 | `text` includes an inflector for the English language that takes an approach based upon  [An Algorithmic Approach to English Pluralization](http://users.monash.edu/~damian/papers/HTML/Plurals.html). See the module `Text.Inflect.En` and the functions:
39 | 
40 | * `Text.Inflect.En.pluralize/2`
41 | * `Text.Inflect.En.pluralize_noun/2`
42 | * `Text.Inflect.En.pluralize_verb/1`
43 | * `Text.Inflect.En.pluralize_adjective/1`
44 | 
45 | ## Language Detection
46 | 
47 | `text` contains 3 language classifiers to aid in natural language detection. However it does not include any corpora; these are contained in separate libraries. The available classifiers are:
48 | 
49 | * `Text.Language.Classifier.CommulativeFrequency`
50 | * `Text.Language.Classifier.NaiveBayesian`
51 | * `Text.Language.Classifier.RankOrder`
52 | 
53 | Additional classifiers can be added by defining a module that implements the `Text.Language.Classifier` behaviour.
54 | 
55 | The library [text_corpus_udhr](https://hex.pm/packages/text_corpus_udhr) implements the `Text.Corpus` behaviour for the [United National Declaration of Human Rights](https://en.wikipedia.org/wiki/Universal_Declaration_of_Human_Rights) which is available for download in 423 languages from [Unicode](https://unicode.org/udhr/).
56 | 
57 | See `Text.Language.detect/2`.
58 | 
59 | ## N-Gram generation
60 | 
61 | The `Text.Ngram` module supports efficient generation of n-grams of length `2` to `7`. See `Text.Ngram.ngram/2`.
62 | 
63 | ## Down the rabbit hole
64 | 
65 | Text analysis at a fundamental level requires segmenting arbitrary text in any language into characters (graphemes), words and sentences. This is a complex topic covered by the [Unicode text segmentation](https://unicode.org/reports/tr29) standard agumented by localised rules in [CLDR's](https://cldr.unicode.org)  [segmentations](https://unicode-org.github.io/cldr/ldml/tr35-general.html#Segmentations) data.
66 | 
67 | Therefore in order to provide higher order text analysis the order of development looks like this:
68 | 
69 | 1. Finish the [Unicode regular expression](http://unicode.org/reports/tr18/) engine in [ex_unicode_set](https://github.com/elixir-unicode/unicode_set). Most of the work is complete but compound character classes needs further work.  Unicode regular expressions are required to implement both [Unicode transforms](https://unicode.org/reports/tr35/tr35-general.html#Transforms) and [Unicode segmentation](https://unicode-org/reports/tr25/tr35-general.html#Segmentations)
70 | 
71 | 2. Implement basic Unicode word and sentence segmentation in [ex_unicode_string](https://github.com/elixir-unicode/unicode_string). Grapheme cluster segmentation is available in the standard library as `String.graphemes/1`
72 | 
73 | 3. Add CLDR tailorings for locale-specific segmentation of words and sentences.
74 | 
75 | 4. Finish up the [Snowball](https://snowballstem.org) stemming compiler. There is a lot to do here, only the parser is partially complete.
76 | 
77 | 5. Implement stemming
78 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_a_as_ae.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A10 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | 
8 | minutia


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_a_as_ata.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A12 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_any_i.txt:
--------------------------------------------------------------------------------
 1 | # Corresponds to table A24 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
 2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
 3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
 4 | #
 5 | # Add additional words here that will be incorporated into the library at *build* time
 6 | # of the library.
 7 | #
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_any_im.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A25 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 
9 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_en_ens_ina.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A13 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 
9 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_ex_exes_ices.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A15 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 
9 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_ex_ices.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A14 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_general_generals.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A26 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_is_ises_ides.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A16 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | 
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_o_os.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A17 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_o_os_i.txt:
--------------------------------------------------------------------------------
 1 | # Corresponds to table A18 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
 2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
 3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
 4 | #
 5 | # Add additional words here that will be incorporated into the library at *build* time
 6 | # of the library.
 7 | #
 8 | 
 9 | libretto
10 | concerto


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_on_a.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A19 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | oxymoron
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_um_a.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A20 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_um_ums_a.txt:
--------------------------------------------------------------------------------
 1 | # Corresponds to table A21 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
 2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
 3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
 4 | #
 5 | # Add additional words here that will be incorporated into the library at *build* time
 6 | # of the library.
 7 | #
 8 | 
 9 | addendum
10 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_us_uses_i.txt:
--------------------------------------------------------------------------------
 1 | # Corresponds to table A22 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
 2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
 3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
 4 | 
 5 | # Add additional words here that will be incorporated into the library at *build* time
 6 | # of the library.
 7 | #
 8 | 
 9 | alumnus
10 | stimulus
11 | nucleus
12 | syllabus
13 | locus
14 | bacillus
15 | cactus
16 | virus
17 | genius
18 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/category_us_uses_us.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A23 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | #
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/irregular_noun.txt:
--------------------------------------------------------------------------------
 1 | # Corresponds to table A1 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
 2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
 3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
 4 | #
 5 | # Add additional words here that will be incorporated into the library at *build* time
 6 | # of the library. Addition
 7 | # Contains additional words that
 8 | # have irregular inflection
 9 | #
10 | # The format of an entry is
11 | # word, plural in modern mode, plural in classical mode
12 | #
13 | # The plural in classical mode is optional.
14 | # Trailing commas are not permitted
15 | # Words are trimmed of white space
16 | # Blank lines are ignored
17 | #
18 | 
19 | quiz, quizzes
20 | graffito, graffiti
21 | vita, vitae
22 | corpus, corpora
23 | thief, thieves
24 | loaf, loaves
25 | opus, opuses
26 | genus, genera
27 | appendix, appendixes, appendices
28 | fez, fezzes
29 | bus, busses
30 | business, businesses
31 | platypus, platypuses, platypodes
32 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/singular_s.txt:
--------------------------------------------------------------------------------
1 | # Corresponds to table A3 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
4 | #
5 | # Add additional words here that will be incorporated into the library at *build* time
6 | # of the library.
7 | 
8 | 


--------------------------------------------------------------------------------
/data/inflector/en/additions/uninflected_noun.txt:
--------------------------------------------------------------------------------
 1 | # Corresponds to table A2 at http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html
 2 | # Data is in support of the paper "An Algorithmic Approach to English Pluralization"
 3 | # which is found at http://users.monash.edu/~damian/papers/HTML/Plurals.html
 4 | 
 5 | # Add additional words here that will be incorporated into the library at *build* time
 6 | # of the library.
 7 | 
 8 | # Contains additional words that have no inflection
 9 | #
10 | graffiti
11 | moose
12 | aircraft
13 | faux pas
14 | means
15 | offspring
16 | grapefruit
17 | shrimp
18 | data
19 | dice
20 | staff
21 | equipment
22 | software
23 | hardware
24 | 


--------------------------------------------------------------------------------
/data/inflector/en/en.html:
--------------------------------------------------------------------------------
   1 | <HTML>
   2 | <HEAD>
   3 |    <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
   4 |    <META NAME="GENERATOR" CONTENT="Mozilla/4.03 [en] (X11; U; IRIX 6.2 IP22) [Netscape]">
   5 |    <TITLE>An Algorithmic Approach to English Pluralization</TITLE>
   6 | <!-- This document was created from RTF source by rtftohtml version 3.9.3 -->
   7 | </HEAD>
   8 | <BODY TEXT="#000000" BGCOLOR="#FFFFFF">
   9 | 
  10 | <H1>
  11 | Appendix A - Plural categories</H1>
  12 | Note: This appendix belongs to the paper: Conway, D., <I><A HREF="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html">An
  13 | Algorithmic Approach to English Pluralization</A>.</I>
  14 | <H4>
  15 | <A NAME="Heading45"></A>Table A.1: Irregular nouns</H4>
  16 | 
  17 | <CENTER><TABLE BORDER >
  18 | <TR ALIGN=CENTER VALIGN=CENTER>
  19 | <TD ALIGN=CENTER><B>Singular form</B></TD>
  20 | 
  21 | <TD ALIGN=CENTER><B>Anglicized plural</B></TD>
  22 | 
  23 | <TD ALIGN=CENTER><B>Classical plural</B></TD>
  24 | </TR>
  25 | 
  26 | <TR ALIGN=CENTER VALIGN=CENTER>
  27 | <TD ALIGN=CENTER><B><TT>beef</TT></B></TD>
  28 | 
  29 | <TD ALIGN=CENTER><B><TT>beefs</TT></B></TD>
  30 | 
  31 | <TD ALIGN=CENTER><B><TT>beeves</TT></B></TD>
  32 | </TR>
  33 | 
  34 | <TR ALIGN=CENTER VALIGN=CENTER>
  35 | <TD ALIGN=CENTER><B><TT>brother</TT></B></TD>
  36 | 
  37 | <TD ALIGN=CENTER><B><TT>brothers</TT></B></TD>
  38 | 
  39 | <TD ALIGN=CENTER><B><TT>brethren</TT></B></TD>
  40 | </TR>
  41 | 
  42 | <TR ALIGN=CENTER VALIGN=CENTER>
  43 | <TD ALIGN=CENTER><B><TT>child</TT></B></TD>
  44 | 
  45 | <TD ALIGN=CENTER><I>(none)</I></TD>
  46 | 
  47 | <TD ALIGN=CENTER><B><TT>children</TT></B></TD>
  48 | </TR>
  49 | 
  50 | <TR ALIGN=CENTER VALIGN=CENTER>
  51 | <TD ALIGN=CENTER><B><TT>cow</TT></B></TD>
  52 | 
  53 | <TD ALIGN=CENTER><B><TT>cows</TT></B></TD>
  54 | 
  55 | <TD ALIGN=CENTER><B><TT>kine</TT></B></TD>
  56 | </TR>
  57 | 
  58 | <TR ALIGN=CENTER VALIGN=CENTER>
  59 | <TD ALIGN=CENTER><B><TT>ephemeris</TT></B></TD>
  60 | 
  61 | <TD ALIGN=CENTER><I>(none)</I></TD>
  62 | 
  63 | <TD ALIGN=CENTER><B><TT>ephemerides</TT></B></TD>
  64 | </TR>
  65 | 
  66 | <TR ALIGN=CENTER VALIGN=CENTER>
  67 | <TD ALIGN=CENTER><B><TT>genie</TT></B></TD>
  68 | 
  69 | <TD ALIGN=CENTER><B><TT>genies</TT></B></TD>
  70 | 
  71 | <TD ALIGN=CENTER><B><TT>genii</TT></B></TD>
  72 | </TR>
  73 | 
  74 | <TR ALIGN=CENTER VALIGN=CENTER>
  75 | <TD ALIGN=CENTER><B><TT>money</TT></B></TD>
  76 | 
  77 | <TD ALIGN=CENTER><B><TT>moneys</TT></B></TD>
  78 | 
  79 | <TD ALIGN=CENTER><B><TT>monies</TT></B></TD>
  80 | </TR>
  81 | 
  82 | <TR ALIGN=CENTER VALIGN=CENTER>
  83 | <TD ALIGN=CENTER><B><TT>mongoose</TT></B></TD>
  84 | 
  85 | <TD ALIGN=CENTER><B><TT>mongooses</TT></B></TD>
  86 | 
  87 | <TD ALIGN=CENTER><I>(none)</I></TD>
  88 | </TR>
  89 | 
  90 | <TR ALIGN=CENTER VALIGN=CENTER>
  91 | <TD ALIGN=CENTER><B><TT>mythos</TT></B></TD>
  92 | 
  93 | <TD ALIGN=CENTER><I>(none)</I></TD>
  94 | 
  95 | <TD ALIGN=CENTER><B><TT>mythoi</TT></B></TD>
  96 | </TR>
  97 | 
  98 | <TR ALIGN=CENTER VALIGN=CENTER>
  99 | <TD ALIGN=CENTER><B><TT>octopus</TT></B></TD>
 100 | 
 101 | <TD ALIGN=CENTER><B><TT>octopuses</TT></B></TD>
 102 | 
 103 | <TD ALIGN=CENTER><B><TT>octopodes</TT></B></TD>
 104 | </TR>
 105 | 
 106 | <TR ALIGN=CENTER VALIGN=CENTER>
 107 | <TD ALIGN=CENTER><B><TT>ox</TT></B></TD>
 108 | 
 109 | <TD ALIGN=CENTER><I>(none)</I></TD>
 110 | 
 111 | <TD ALIGN=CENTER><B><TT>oxen</TT></B></TD>
 112 | </TR>
 113 | 
 114 | <TR ALIGN=CENTER VALIGN=CENTER>
 115 | <TD ALIGN=CENTER><B><TT>soliloquy</TT></B></TD>
 116 | 
 117 | <TD ALIGN=CENTER><B><TT>soliloquies</TT></B></TD>
 118 | 
 119 | <TD ALIGN=CENTER><I>(none)</I></TD>
 120 | </TR>
 121 | 
 122 | <TR ALIGN=CENTER VALIGN=CENTER>
 123 | <TD ALIGN=CENTER><B><TT>trilby</TT></B></TD>
 124 | 
 125 | <TD ALIGN=CENTER><B><TT>trilbys</TT></B></TD>
 126 | 
 127 | <TD ALIGN=CENTER><I>(none)</I></TD>
 128 | </TR>
 129 | </TABLE></CENTER>
 130 | 
 131 | <H4>
 132 | <A NAME="Heading46"></A>Table A.2: Uninflected nouns</H4>
 133 | 
 134 | <CENTER><TABLE BORDER >
 135 | <TR ALIGN=CENTER VALIGN=CENTER>
 136 | <TD ALIGN=CENTER><B><TT>bison</TT></B></TD>
 137 | 
 138 | <TD ALIGN=CENTER><B><TT>flounder</TT></B></TD>
 139 | 
 140 | <TD ALIGN=CENTER><B><TT>pliers</TT></B></TD>
 141 | </TR>
 142 | 
 143 | <TR ALIGN=CENTER VALIGN=CENTER>
 144 | <TD ALIGN=CENTER><B><TT>bream</TT></B></TD>
 145 | 
 146 | <TD ALIGN=CENTER><B><TT>gallows</TT></B></TD>
 147 | 
 148 | <TD ALIGN=CENTER><B><TT>proceedings</TT></B></TD>
 149 | </TR>
 150 | 
 151 | <TR ALIGN=CENTER VALIGN=CENTER>
 152 | <TD ALIGN=CENTER><B><TT>breeches</TT></B></TD>
 153 | 
 154 | <TD ALIGN=CENTER><B><TT>graffiti</TT></B></TD>
 155 | 
 156 | <TD ALIGN=CENTER><B><TT>rabies</TT></B></TD>
 157 | </TR>
 158 | 
 159 | <TR ALIGN=CENTER VALIGN=CENTER>
 160 | <TD ALIGN=CENTER><B><TT>britches</TT></B></TD>
 161 | 
 162 | <TD ALIGN=CENTER><B><TT>headquarters</TT></B></TD>
 163 | 
 164 | <TD ALIGN=CENTER><B><TT>salmon</TT></B></TD>
 165 | </TR>
 166 | 
 167 | <TR ALIGN=CENTER VALIGN=CENTER>
 168 | <TD ALIGN=CENTER><B><TT>carp</TT></B></TD>
 169 | 
 170 | <TD ALIGN=CENTER><B><TT>herpes</TT></B></TD>
 171 | 
 172 | <TD ALIGN=CENTER><B><TT>scissors</TT></B></TD>
 173 | </TR>
 174 | 
 175 | <TR ALIGN=CENTER VALIGN=CENTER>
 176 | <TD ALIGN=CENTER><B><TT>chassis</TT></B></TD>
 177 | 
 178 | <TD ALIGN=CENTER><B><TT>high-jinks</TT></B></TD>
 179 | 
 180 | <TD ALIGN=CENTER><B><TT>sea-bass</TT></B></TD>
 181 | </TR>
 182 | 
 183 | <TR ALIGN=CENTER VALIGN=CENTER>
 184 | <TD ALIGN=CENTER><B><TT>clippers</TT></B></TD>
 185 | 
 186 | <TD ALIGN=CENTER><B><TT>homework</TT></B></TD>
 187 | 
 188 | <TD ALIGN=CENTER><B><TT>series</TT></B></TD>
 189 | </TR>
 190 | 
 191 | <TR ALIGN=CENTER VALIGN=CENTER>
 192 | <TD ALIGN=CENTER><B><TT>cod</TT></B></TD>
 193 | 
 194 | <TD ALIGN=CENTER><B><TT>innings</TT></B></TD>
 195 | 
 196 | <TD ALIGN=CENTER><B><TT>shears</TT></B></TD>
 197 | </TR>
 198 | 
 199 | <TR ALIGN=CENTER VALIGN=CENTER>
 200 | <TD ALIGN=CENTER><B><TT>contretemps</TT></B></TD>
 201 | 
 202 | <TD ALIGN=CENTER><B><TT>jackanapes</TT></B></TD>
 203 | 
 204 | <TD ALIGN=CENTER><B><TT>species</TT></B></TD>
 205 | </TR>
 206 | 
 207 | <TR ALIGN=CENTER VALIGN=CENTER>
 208 | <TD ALIGN=CENTER><B><TT>corps</TT></B></TD>
 209 | 
 210 | <TD ALIGN=CENTER><B><TT>mackerel</TT></B></TD>
 211 | 
 212 | <TD ALIGN=CENTER><B><TT>swine</TT></B></TD>
 213 | </TR>
 214 | 
 215 | <TR ALIGN=CENTER VALIGN=CENTER>
 216 | <TD ALIGN=CENTER><B><TT>debris</TT></B></TD>
 217 | 
 218 | <TD ALIGN=CENTER><B><TT>measles</TT></B></TD>
 219 | 
 220 | <TD ALIGN=CENTER><B><TT>trout</TT></B></TD>
 221 | </TR>
 222 | 
 223 | <TR ALIGN=CENTER VALIGN=CENTER>
 224 | <TD ALIGN=CENTER><B><TT>diabetes</TT></B></TD>
 225 | 
 226 | <TD ALIGN=CENTER><B><TT>mews</TT></B></TD>
 227 | 
 228 | <TD ALIGN=CENTER><B><TT>tuna</TT></B></TD>
 229 | </TR>
 230 | 
 231 | <TR ALIGN=CENTER VALIGN=CENTER>
 232 | <TD ALIGN=CENTER><B><TT>djinn</TT></B></TD>
 233 | 
 234 | <TD ALIGN=CENTER><B><TT>mumps</TT></B></TD>
 235 | 
 236 | <TD ALIGN=CENTER><B><TT>whiting</TT></B></TD>
 237 | </TR>
 238 | 
 239 | <TR ALIGN=CENTER VALIGN=CENTER>
 240 | <TD ALIGN=CENTER><B><TT>eland</TT></B></TD>
 241 | 
 242 | <TD ALIGN=CENTER><B><TT>news</TT></B></TD>
 243 | 
 244 | <TD ALIGN=CENTER><B><TT>wildebeest</TT></B></TD>
 245 | </TR>
 246 | 
 247 | <TR ALIGN=CENTER VALIGN=CENTER>
 248 | <TD ALIGN=CENTER><B><TT>elk</TT></B></TD>
 249 | 
 250 | <TD ALIGN=CENTER><B><TT>pincers</TT></B></TD>
 251 | 
 252 | <TD ALIGN=CENTER><B><TT></TT></B></TD>
 253 | </TR>
 254 | </TABLE></CENTER>
 255 | 
 256 | <H4>
 257 | <A NAME="Heading47"></A>Table A.3: Singular nouns ending in a single <B><TT>-s</TT></B></H4>
 258 | 
 259 | <CENTER><TABLE BORDER >
 260 | <TR ALIGN=CENTER VALIGN=CENTER>
 261 | <TD ALIGN=CENTER><B><TT>acropolis</TT></B></TD>
 262 | 
 263 | <TD ALIGN=CENTER><B><TT>chaos</TT></B></TD>
 264 | 
 265 | <TD ALIGN=CENTER><B><TT>lens</TT></B></TD>
 266 | </TR>
 267 | 
 268 | <TR ALIGN=CENTER VALIGN=CENTER>
 269 | <TD ALIGN=CENTER><B><TT>aegis</TT></B></TD>
 270 | 
 271 | <TD ALIGN=CENTER><B><TT>cosmos</TT></B></TD>
 272 | 
 273 | <TD ALIGN=CENTER><B><TT>mantis</TT></B></TD>
 274 | </TR>
 275 | 
 276 | <TR ALIGN=CENTER VALIGN=CENTER>
 277 | <TD ALIGN=CENTER><B><TT>alias</TT></B></TD>
 278 | 
 279 | <TD ALIGN=CENTER><B><TT>dais</TT></B></TD>
 280 | 
 281 | <TD ALIGN=CENTER><B><TT>marquis</TT></B></TD>
 282 | </TR>
 283 | 
 284 | <TR ALIGN=CENTER VALIGN=CENTER>
 285 | <TD ALIGN=CENTER><B><TT>asbestos</TT></B></TD>
 286 | 
 287 | <TD ALIGN=CENTER><B><TT>digitalis</TT></B></TD>
 288 | 
 289 | <TD ALIGN=CENTER><B><TT>metropolis</TT></B></TD>
 290 | </TR>
 291 | 
 292 | <TR ALIGN=CENTER VALIGN=CENTER>
 293 | <TD ALIGN=CENTER><B><TT>atlas</TT></B></TD>
 294 | 
 295 | <TD ALIGN=CENTER><B><TT>epidermis</TT></B></TD>
 296 | 
 297 | <TD ALIGN=CENTER><B><TT>pathos</TT></B></TD>
 298 | </TR>
 299 | 
 300 | <TR ALIGN=CENTER VALIGN=CENTER>
 301 | <TD ALIGN=CENTER><B><TT>bathos</TT></B></TD>
 302 | 
 303 | <TD ALIGN=CENTER><B><TT>ethos</TT></B></TD>
 304 | 
 305 | <TD ALIGN=CENTER><B><TT>pelvis</TT></B></TD>
 306 | </TR>
 307 | 
 308 | <TR ALIGN=CENTER VALIGN=CENTER>
 309 | <TD ALIGN=CENTER><B><TT>bias</TT></B></TD>
 310 | 
 311 | <TD ALIGN=CENTER><B><TT>gas</TT></B></TD>
 312 | 
 313 | <TD ALIGN=CENTER><B><TT>polis</TT></B></TD>
 314 | </TR>
 315 | 
 316 | <TR ALIGN=CENTER VALIGN=CENTER>
 317 | <TD ALIGN=CENTER><B><TT>caddis</TT></B></TD>
 318 | 
 319 | <TD ALIGN=CENTER><B><TT>glottis</TT></B></TD>
 320 | 
 321 | <TD ALIGN=CENTER><B><TT>rhinoceros</TT></B></TD>
 322 | </TR>
 323 | 
 324 | <TR ALIGN=CENTER VALIGN=CENTER>
 325 | <TD ALIGN=CENTER><B><TT>cannabis</TT></B></TD>
 326 | 
 327 | <TD ALIGN=CENTER><B><TT>glottis</TT></B></TD>
 328 | 
 329 | <TD ALIGN=CENTER><B><TT>sassafras</TT></B></TD>
 330 | </TR>
 331 | 
 332 | <TR ALIGN=CENTER VALIGN=CENTER>
 333 | <TD ALIGN=CENTER><B><TT>canvas</TT></B></TD>
 334 | 
 335 | <TD ALIGN=CENTER><B><TT>ibis</TT></B></TD>
 336 | 
 337 | <TD ALIGN=CENTER><B><TT>trellis</TT></B></TD>
 338 | </TR>
 339 | </TABLE></CENTER>
 340 | 
 341 | <H4>
 342 | <A NAME="Heading48"></A>Table A.4: Sample ambiguous words (nouns or verbs)</H4>
 343 | 
 344 | <CENTER><TABLE BORDER >
 345 | <TR ALIGN=CENTER VALIGN=CENTER>
 346 | <TD ALIGN=CENTER><B><TT>act</TT></B></TD>
 347 | 
 348 | <TD ALIGN=CENTER><B><TT>fight</TT></B></TD>
 349 | 
 350 | <TD ALIGN=CENTER><B><TT>run</TT></B></TD>
 351 | </TR>
 352 | 
 353 | <TR ALIGN=CENTER VALIGN=CENTER>
 354 | <TD ALIGN=CENTER><B><TT>bend</TT></B></TD>
 355 | 
 356 | <TD ALIGN=CENTER><B><TT>fire</TT></B></TD>
 357 | 
 358 | <TD ALIGN=CENTER><B><TT>saw</TT></B></TD>
 359 | </TR>
 360 | 
 361 | <TR ALIGN=CENTER VALIGN=CENTER>
 362 | <TD ALIGN=CENTER><B><TT>bent</TT></B></TD>
 363 | 
 364 | <TD ALIGN=CENTER><B><TT>like</TT></B></TD>
 365 | 
 366 | <TD ALIGN=CENTER><B><TT>sink</TT></B></TD>
 367 | </TR>
 368 | 
 369 | <TR ALIGN=CENTER VALIGN=CENTER>
 370 | <TD ALIGN=CENTER><B><TT>blame</TT></B></TD>
 371 | 
 372 | <TD ALIGN=CENTER><B><TT>look</TT></B></TD>
 373 | 
 374 | <TD ALIGN=CENTER><B><TT>sleep</TT></B></TD>
 375 | </TR>
 376 | 
 377 | <TR ALIGN=CENTER VALIGN=CENTER>
 378 | <TD ALIGN=CENTER><B><TT>copy</TT></B></TD>
 379 | 
 380 | <TD ALIGN=CENTER><B><TT>make</TT></B></TD>
 381 | 
 382 | <TD ALIGN=CENTER><B><TT>thought</TT></B></TD>
 383 | </TR>
 384 | 
 385 | <TR ALIGN=CENTER VALIGN=CENTER>
 386 | <TD ALIGN=CENTER><B><TT>cut</TT></B></TD>
 387 | 
 388 | <TD ALIGN=CENTER><B><TT>might</TT></B></TD>
 389 | 
 390 | <TD ALIGN=CENTER><B><TT>view</TT></B></TD>
 391 | </TR>
 392 | 
 393 | <TR ALIGN=CENTER VALIGN=CENTER>
 394 | <TD ALIGN=CENTER><B><TT>drink</TT></B></TD>
 395 | 
 396 | <TD ALIGN=CENTER><B><TT>reach</TT></B></TD>
 397 | 
 398 | <TD ALIGN=CENTER><B><TT>will</TT></B></TD>
 399 | </TR>
 400 | </TABLE></CENTER>
 401 | 
 402 | <H4>
 403 | <A NAME="Heading49"></A>Table A.5: Personal pronouns (nominative, accusative,
 404 | and reflexive)</H4>
 405 | 
 406 | <CENTER><TABLE BORDER >
 407 | <TR ALIGN=CENTER VALIGN=CENTER>
 408 | <TD ALIGN=CENTER><B><TT>1st Person</TT></B></TD>
 409 | 
 410 | <TD ALIGN=CENTER><B><TT>2nd Person</TT></B></TD>
 411 | 
 412 | <TD ALIGN=CENTER><B><TT>3rd Person</TT></B></TD>
 413 | </TR>
 414 | 
 415 | <TR ALIGN=CENTER VALIGN=CENTER>
 416 | <TD ALIGN=CENTER><TT><B>I </B>-></TT> <B><TT>we</TT></B></TD>
 417 | 
 418 | <TD ALIGN=CENTER><TT><B>you </B>-></TT> <B><TT>you</TT></B>&nbsp;
 419 | <BR><B><TT>thou</TT></B> <TT>-></TT> <B><TT>you|ye</TT></B></TD>
 420 | 
 421 | <TD ALIGN=CENTER><TT><B>she </B>-></TT> <B><TT>they</TT></B>&nbsp;
 422 | <BR><TT><B>he </B>-></TT> <B><TT>they</TT></B>&nbsp;
 423 | <BR><TT><B>it </B>-></TT> <B><TT>they</TT></B>&nbsp;
 424 | <BR><TT><B>they </B>-></TT> <B><TT>they</TT></B></TD>
 425 | </TR>
 426 | 
 427 | <TR ALIGN=CENTER VALIGN=CENTER>
 428 | <TD ALIGN=CENTER><TT><B>me </B>-></TT> <B><TT>us</TT></B></TD>
 429 | 
 430 | <TD ALIGN=CENTER><TT><B>you </B>-></TT> <B><TT>you</TT></B>&nbsp;
 431 | <BR><B><TT>thee</TT></B> <TT>-></TT> <B><TT>you|ye</TT></B></TD>
 432 | 
 433 | <TD ALIGN=CENTER><TT><B>her </B>-></TT> <B><TT>them</TT></B>&nbsp;
 434 | <BR><TT><B>him </B>-></TT> <B><TT>them</TT></B>&nbsp;
 435 | <BR><TT><B>it </B>-></TT> <B><TT>them</TT></B>&nbsp;
 436 | <BR><TT><B>them </B>-></TT> <B><TT>them</TT></B></TD>
 437 | </TR>
 438 | 
 439 | <TR ALIGN=CENTER VALIGN=CENTER>
 440 | <TD ALIGN=CENTER><TT><B>myself </B>-></TT> <B><TT>ourselves</TT></B></TD>
 441 | 
 442 | <TD ALIGN=CENTER><TT><B>yourself </B>-></TT> <B><TT>yourself</TT></B>&nbsp;
 443 | <BR><TT><B>thyself </B>-></TT> <B><TT>yourself</TT></B></TD>
 444 | 
 445 | <TD ALIGN=CENTER><TT><B>herself </B>-></TT> <B><TT>themselves</TT></B>&nbsp;
 446 | <BR><TT><B>himself </B>-></TT> <B><TT>themselves</TT></B>&nbsp;
 447 | <BR><TT><B>itself </B>-></TT> <B><TT>themselves</TT></B>&nbsp;
 448 | <BR><TT><B>themself </B>-></TT> <B><TT>themselves</TT></B>&nbsp;
 449 | <BR><B><TT>oneself</TT></B> <TT>-></TT> <B><TT>oneselves</TT></B></TD>
 450 | </TR>
 451 | </TABLE></CENTER>
 452 | 
 453 | <H4>
 454 | <A NAME="Heading50"></A>Table A.6: Possessive pronouns</H4>
 455 | 
 456 | <CENTER><TABLE BORDER >
 457 | <TR ALIGN=CENTER VALIGN=CENTER>
 458 | <TD ALIGN=CENTER><B><TT>1st Person</TT></B></TD>
 459 | 
 460 | <TD ALIGN=CENTER><B><TT>2nd Person</TT></B></TD>
 461 | 
 462 | <TD ALIGN=CENTER><B><TT>3rd Person</TT></B></TD>
 463 | </TR>
 464 | 
 465 | <TR ALIGN=CENTER VALIGN=CENTER>
 466 | <TD ALIGN=CENTER><TT><B>mine </B>-></TT> <B><TT>ours</TT></B></TD>
 467 | 
 468 | <TD ALIGN=CENTER><TT><B>yours </B>-></TT> <B><TT>yours</TT></B>&nbsp;
 469 | <BR><TT><B>thine </B>-></TT> <B><TT>yours</TT></B></TD>
 470 | 
 471 | <TD ALIGN=CENTER><TT><B>hers </B>-></TT> <B><TT>theirs</TT></B>&nbsp;
 472 | <BR><TT><B>his </B>-></TT> <B><TT>theirs</TT></B>&nbsp;
 473 | <BR><TT><B>its </B>-></TT> <B><TT>theirs</TT></B>&nbsp;
 474 | <BR><TT><B>theirs </B>-></TT> <B><TT>theirs</TT></B></TD>
 475 | </TR>
 476 | </TABLE></CENTER>
 477 | 
 478 | <H4>
 479 | <A NAME="Heading51"></A>Table A.7: Personal possessive adjectives</H4>
 480 | 
 481 | <CENTER><TABLE BORDER >
 482 | <TR ALIGN=CENTER VALIGN=CENTER>
 483 | <TD ALIGN=CENTER><B><TT>1st Person</TT></B></TD>
 484 | 
 485 | <TD ALIGN=CENTER><B><TT>2nd Person</TT></B></TD>
 486 | 
 487 | <TD ALIGN=CENTER><B><TT>3rd Person</TT></B></TD>
 488 | </TR>
 489 | 
 490 | <TR ALIGN=CENTER VALIGN=CENTER>
 491 | <TD ALIGN=CENTER><TT><B>my </B>-></TT> <B><TT>our</TT></B></TD>
 492 | 
 493 | <TD ALIGN=CENTER><TT><B>your </B>-></TT> <B><TT>your</TT></B>&nbsp;
 494 | <BR><TT><B>thy </B>-></TT> <B><TT>your</TT></B></TD>
 495 | 
 496 | <TD ALIGN=CENTER><TT><B>her </B>-></TT> <B><TT>their</TT></B>&nbsp;
 497 | <BR><TT><B>his </B>-></TT> <B><TT>their</TT></B>&nbsp;
 498 | <BR><TT><B>its </B>-></TT> <B><TT>their</TT></B>&nbsp;
 499 | <BR><TT><B>their </B>-></TT> <B><TT>their</TT></B></TD>
 500 | </TR>
 501 | </TABLE></CENTER>
 502 | 
 503 | <H4>
 504 | <A NAME="Heading52"></A>Table A.8: Irregular verbs</H4>
 505 | 
 506 | <CENTER><TABLE BORDER >
 507 | <TR ALIGN=CENTER VALIGN=CENTER>
 508 | <TD ALIGN=CENTER><B><TT>1st Person</TT></B></TD>
 509 | 
 510 | <TD ALIGN=CENTER><B><TT>2nd Person</TT></B></TD>
 511 | 
 512 | <TD ALIGN=CENTER><B><TT>3rd Person</TT></B></TD>
 513 | </TR>
 514 | 
 515 | <TR ALIGN=CENTER VALIGN=CENTER>
 516 | <TD ALIGN=CENTER><TT><B>am </B>-></TT> <B><TT>are</TT></B></TD>
 517 | 
 518 | <TD ALIGN=CENTER><TT><B>are </B>-></TT> <B><TT>are</TT></B></TD>
 519 | 
 520 | <TD ALIGN=CENTER><TT><B>is </B>-></TT> <B><TT>are</TT></B></TD>
 521 | </TR>
 522 | 
 523 | <TR ALIGN=CENTER VALIGN=CENTER>
 524 | <TD ALIGN=CENTER><TT><B>was </B>-></TT> <B><TT>were</TT></B></TD>
 525 | 
 526 | <TD ALIGN=CENTER><TT><B>were </B>-></TT> <B><TT>were</TT></B></TD>
 527 | 
 528 | <TD ALIGN=CENTER><TT><B>was </B>-></TT> <B><TT>were</TT></B></TD>
 529 | </TR>
 530 | 
 531 | <TR ALIGN=CENTER VALIGN=CENTER>
 532 | <TD ALIGN=CENTER><TT><B>have </B>-></TT> <B><TT>have</TT></B></TD>
 533 | 
 534 | <TD ALIGN=CENTER><TT><B>have </B>-></TT> <B><TT>have</TT></B></TD>
 535 | 
 536 | <TD ALIGN=CENTER><TT><B>has </B>-></TT> <B><TT>have</TT></B></TD>
 537 | </TR>
 538 | </TABLE></CENTER>
 539 | 
 540 | <H4>
 541 | <A NAME="Heading53"></A>Table A.9: Uninflected verbs</H4>
 542 | 
 543 | <CENTER><TABLE BORDER >
 544 | <TR ALIGN=CENTER VALIGN=CENTER>
 545 | <TD ALIGN=CENTER><B><TT>ate</TT></B></TD>
 546 | 
 547 | <TD ALIGN=CENTER><B><TT>had</TT></B></TD>
 548 | 
 549 | <TD ALIGN=CENTER><B><TT>sank</TT></B></TD>
 550 | </TR>
 551 | 
 552 | <TR ALIGN=CENTER VALIGN=CENTER>
 553 | <TD ALIGN=CENTER><B><TT>could</TT></B></TD>
 554 | 
 555 | <TD ALIGN=CENTER><B><TT>made</TT></B></TD>
 556 | 
 557 | <TD ALIGN=CENTER><B><TT>shall</TT></B></TD>
 558 | </TR>
 559 | 
 560 | <TR ALIGN=CENTER VALIGN=CENTER>
 561 | <TD ALIGN=CENTER><B><TT>did</TT></B></TD>
 562 | 
 563 | <TD ALIGN=CENTER><B><TT>must</TT></B></TD>
 564 | 
 565 | <TD ALIGN=CENTER><B><TT>should</TT></B></TD>
 566 | </TR>
 567 | 
 568 | <TR ALIGN=CENTER VALIGN=CENTER>
 569 | <TD ALIGN=CENTER><B><TT>fought</TT></B></TD>
 570 | 
 571 | <TD ALIGN=CENTER><B><TT>ought</TT></B></TD>
 572 | 
 573 | <TD ALIGN=CENTER><B><TT>sought</TT></B></TD>
 574 | </TR>
 575 | 
 576 | <TR ALIGN=CENTER VALIGN=CENTER>
 577 | <TD ALIGN=CENTER><B><TT>gave</TT></B></TD>
 578 | 
 579 | <TD ALIGN=CENTER><B><TT>put</TT></B></TD>
 580 | 
 581 | <TD ALIGN=CENTER><B><TT>spent</TT></B></TD>
 582 | </TR>
 583 | </TABLE></CENTER>
 584 | 
 585 | <H4>
 586 | <A NAME="Heading54"></A>Table A.10: <B><TT>-a</TT></B> to <B><TT>-ae</TT></B></H4>
 587 | 
 588 | <CENTER><TABLE BORDER >
 589 | <TR ALIGN=CENTER VALIGN=CENTER>
 590 | <TD ALIGN=CENTER><B><TT>alumna</TT></B></TD>
 591 | 
 592 | <TD ALIGN=CENTER><B><TT>alga</TT></B></TD>
 593 | 
 594 | <TD ALIGN=CENTER><B><TT>vertebra</TT></B></TD>
 595 | </TR>
 596 | </TABLE></CENTER>
 597 | 
 598 | <H4>
 599 | <A NAME="Heading55"></A>Table A.11: <B><TT>-a</TT></B> to <B><TT>-as</TT></B>
 600 | (anglicized) or <B><TT>-ae</TT></B> (classical)</H4>
 601 | 
 602 | <CENTER><TABLE BORDER >
 603 | <TR ALIGN=CENTER VALIGN=CENTER>
 604 | <TD ALIGN=CENTER><B><TT>abscissa</TT></B></TD>
 605 | 
 606 | <TD ALIGN=CENTER><B><TT>formula</TT></B></TD>
 607 | 
 608 | <TD ALIGN=CENTER><B><TT>medusa</TT></B></TD>
 609 | </TR>
 610 | 
 611 | <TR ALIGN=CENTER VALIGN=CENTER>
 612 | <TD ALIGN=CENTER><B><TT>amoeba</TT></B></TD>
 613 | 
 614 | <TD ALIGN=CENTER><B><TT>hydra</TT></B></TD>
 615 | 
 616 | <TD ALIGN=CENTER><B><TT>nebula</TT></B></TD>
 617 | </TR>
 618 | 
 619 | <TR ALIGN=CENTER VALIGN=CENTER>
 620 | <TD ALIGN=CENTER><B><TT>antenna</TT></B></TD>
 621 | 
 622 | <TD ALIGN=CENTER><B><TT>hyperbola</TT></B></TD>
 623 | 
 624 | <TD ALIGN=CENTER><B><TT>nova</TT></B></TD>
 625 | </TR>
 626 | 
 627 | <TR ALIGN=CENTER VALIGN=CENTER>
 628 | <TD ALIGN=CENTER><B><TT>aurora</TT></B></TD>
 629 | 
 630 | <TD ALIGN=CENTER><B><TT>lacuna</TT></B></TD>
 631 | 
 632 | <TD ALIGN=CENTER><B><TT>parabola</TT></B></TD>
 633 | </TR>
 634 | </TABLE></CENTER>
 635 | 
 636 | <H4>
 637 | <A NAME="Heading56"></A>Table A.12: <B><TT>-a</TT></B> to <B><TT>-as</TT></B>
 638 | (anglicized) or <B><TT>-ata</TT></B> (classical)</H4>
 639 | 
 640 | <CENTER><TABLE BORDER >
 641 | <TR ALIGN=CENTER VALIGN=CENTER>
 642 | <TD ALIGN=CENTER><B><TT>anathema</TT></B></TD>
 643 | 
 644 | <TD ALIGN=CENTER><B><TT>enema</TT></B></TD>
 645 | 
 646 | <TD ALIGN=CENTER><B><TT>oedema</TT></B></TD>
 647 | </TR>
 648 | 
 649 | <TR ALIGN=CENTER VALIGN=CENTER>
 650 | <TD ALIGN=CENTER><B><TT>bema</TT></B></TD>
 651 | 
 652 | <TD ALIGN=CENTER><B><TT>enigma</TT></B></TD>
 653 | 
 654 | <TD ALIGN=CENTER><B><TT>sarcoma</TT></B></TD>
 655 | </TR>
 656 | 
 657 | <TR ALIGN=CENTER VALIGN=CENTER>
 658 | <TD ALIGN=CENTER><B><TT>carcinoma</TT></B></TD>
 659 | 
 660 | <TD ALIGN=CENTER><B><TT>gumma</TT></B></TD>
 661 | 
 662 | <TD ALIGN=CENTER><B><TT>schema</TT></B></TD>
 663 | </TR>
 664 | 
 665 | <TR ALIGN=CENTER VALIGN=CENTER>
 666 | <TD ALIGN=CENTER><B><TT>charisma</TT></B></TD>
 667 | 
 668 | <TD ALIGN=CENTER><B><TT>lemma</TT></B></TD>
 669 | 
 670 | <TD ALIGN=CENTER><B><TT>soma</TT></B></TD>
 671 | </TR>
 672 | 
 673 | <TR ALIGN=CENTER VALIGN=CENTER>
 674 | <TD ALIGN=CENTER><B><TT>diploma</TT></B></TD>
 675 | 
 676 | <TD ALIGN=CENTER><B><TT>lymphoma</TT></B></TD>
 677 | 
 678 | <TD ALIGN=CENTER><B><TT>stigma</TT></B></TD>
 679 | </TR>
 680 | 
 681 | <TR ALIGN=CENTER VALIGN=CENTER>
 682 | <TD ALIGN=CENTER><B><TT>dogma</TT></B></TD>
 683 | 
 684 | <TD ALIGN=CENTER><B><TT>magma</TT></B></TD>
 685 | 
 686 | <TD ALIGN=CENTER><B><TT>stoma</TT></B></TD>
 687 | </TR>
 688 | 
 689 | <TR ALIGN=CENTER VALIGN=CENTER>
 690 | <TD ALIGN=CENTER><B><TT>drama</TT></B></TD>
 691 | 
 692 | <TD ALIGN=CENTER><B><TT>melisma</TT></B></TD>
 693 | 
 694 | <TD ALIGN=CENTER><B><TT>trauma</TT></B></TD>
 695 | </TR>
 696 | 
 697 | <TR ALIGN=CENTER VALIGN=CENTER>
 698 | <TD ALIGN=CENTER><B><TT>edema</TT></B></TD>
 699 | 
 700 | <TD ALIGN=CENTER><B><TT>miasma</TT></B></TD>
 701 | 
 702 | <TD ALIGN=CENTER></TD>
 703 | </TR>
 704 | </TABLE></CENTER>
 705 | 
 706 | <H4>
 707 | <A NAME="Heading57"></A>Table A.13: <B><TT>-en</TT></B> to <B><TT>-ens</TT></B>
 708 | (anglicized) or <B><TT>-ina</TT></B> (classical)</H4>
 709 | 
 710 | <CENTER><TABLE BORDER >
 711 | <TR ALIGN=CENTER VALIGN=CENTER>
 712 | <TD ALIGN=CENTER><B><TT>stamen</TT></B></TD>
 713 | 
 714 | <TD ALIGN=CENTER><B><TT>foramen</TT></B></TD>
 715 | 
 716 | <TD ALIGN=CENTER><B><TT>lumen</TT></B></TD>
 717 | </TR>
 718 | </TABLE></CENTER>
 719 | 
 720 | <H4>
 721 | <A NAME="Heading58"></A>Table A.14: <B><TT>-ex</TT></B> to <B><TT>-ices</TT></B></H4>
 722 | 
 723 | <CENTER><TABLE BORDER >
 724 | <TR ALIGN=LEFT VALIGN=CENTER>
 725 | <TD ALIGN=CENTER><B><TT>codex</TT></B></TD>
 726 | 
 727 | <TD ALIGN=CENTER><B><TT>murex</TT></B></TD>
 728 | 
 729 | <TD ALIGN=CENTER><B><TT>silex</TT></B></TD>
 730 | </TR>
 731 | </TABLE></CENTER>
 732 | 
 733 | <H4>
 734 | <A NAME="Heading59"></A>Table A.15: <B><TT>-ex</TT></B> to <B><TT>-exes</TT></B>
 735 | (anglicized) or <B><TT>-ices</TT></B> (classical)</H4>
 736 | 
 737 | <CENTER><TABLE BORDER >
 738 | <TR ALIGN=CENTER VALIGN=CENTER>
 739 | <TD ALIGN=CENTER><B><TT>apex</TT></B></TD>
 740 | 
 741 | <TD ALIGN=CENTER><B><TT>latex</TT></B></TD>
 742 | 
 743 | <TD ALIGN=CENTER><B><TT>vertex</TT></B></TD>
 744 | </TR>
 745 | 
 746 | <TR ALIGN=CENTER VALIGN=CENTER>
 747 | <TD ALIGN=CENTER><B><TT>cortex</TT></B></TD>
 748 | 
 749 | <TD ALIGN=CENTER><B><TT>pontifex</TT></B></TD>
 750 | 
 751 | <TD ALIGN=CENTER><B><TT>vortex</TT></B></TD>
 752 | </TR>
 753 | 
 754 | <TR ALIGN=CENTER VALIGN=CENTER>
 755 | <TD ALIGN=CENTER><B><TT>index</TT></B></TD>
 756 | 
 757 | <TD ALIGN=CENTER><B><TT>simplex</TT></B></TD>
 758 | 
 759 | <TD ALIGN=CENTER></TD>
 760 | </TR>
 761 | </TABLE></CENTER>
 762 | 
 763 | <H4>
 764 | <A NAME="Heading60"></A>Table A.16: <B><TT>-is</TT></B> to <B><TT>-ises</TT></B>
 765 | (anglicized) or <B><TT>-ides</TT></B> (classical)</H4>
 766 | 
 767 | <CENTER><TABLE BORDER >
 768 | <TR ALIGN=CENTER VALIGN=CENTER>
 769 | <TD ALIGN=CENTER><B><TT>iris</TT></B></TD>
 770 | 
 771 | <TD ALIGN=CENTER><B><TT>clitoris</TT></B></TD>
 772 | </TR>
 773 | </TABLE></CENTER>
 774 | 
 775 | <H4>
 776 | <A NAME="Heading61"></A>Table A.17: <B><TT>-o</TT></B> to <B><TT>-os</TT></B></H4>
 777 | 
 778 | <CENTER><TABLE BORDER >
 779 | <TR ALIGN=CENTER VALIGN=CENTER>
 780 | <TD ALIGN=CENTER><B><TT>albino</TT></B></TD>
 781 | 
 782 | <TD ALIGN=CENTER><B><TT>generalissimo</TT></B></TD>
 783 | 
 784 | <TD ALIGN=CENTER><B><TT>manifesto</TT></B></TD>
 785 | </TR>
 786 | 
 787 | <TR ALIGN=CENTER VALIGN=CENTER>
 788 | <TD ALIGN=CENTER><B><TT>archipelago</TT></B></TD>
 789 | 
 790 | <TD ALIGN=CENTER><B><TT>ghetto</TT></B></TD>
 791 | 
 792 | <TD ALIGN=CENTER><B><TT>medico</TT></B></TD>
 793 | </TR>
 794 | 
 795 | <TR ALIGN=CENTER VALIGN=CENTER>
 796 | <TD ALIGN=CENTER><B><TT>armadillo</TT></B></TD>
 797 | 
 798 | <TD ALIGN=CENTER><B><TT>guano</TT></B></TD>
 799 | 
 800 | <TD ALIGN=CENTER><B><TT>octavo</TT></B></TD>
 801 | </TR>
 802 | 
 803 | <TR ALIGN=CENTER VALIGN=CENTER>
 804 | <TD ALIGN=CENTER><B><TT>commando</TT></B></TD>
 805 | 
 806 | <TD ALIGN=CENTER><B><TT>inferno</TT></B></TD>
 807 | 
 808 | <TD ALIGN=CENTER><B><TT>photo</TT></B></TD>
 809 | </TR>
 810 | 
 811 | <TR ALIGN=CENTER VALIGN=CENTER>
 812 | <TD ALIGN=CENTER><B><TT>ditto</TT></B></TD>
 813 | 
 814 | <TD ALIGN=CENTER><B><TT>jumbo</TT></B></TD>
 815 | 
 816 | <TD ALIGN=CENTER><B><TT>pro</TT></B></TD>
 817 | </TR>
 818 | 
 819 | <TR ALIGN=CENTER VALIGN=CENTER>
 820 | <TD ALIGN=CENTER><B><TT>dynamo</TT></B></TD>
 821 | 
 822 | <TD ALIGN=CENTER><B><TT>lingo</TT></B></TD>
 823 | 
 824 | <TD ALIGN=CENTER><B><TT>quarto</TT></B></TD>
 825 | </TR>
 826 | 
 827 | <TR ALIGN=CENTER VALIGN=CENTER>
 828 | <TD ALIGN=CENTER><B><TT>embryo</TT></B></TD>
 829 | 
 830 | <TD ALIGN=CENTER><B><TT>lumbago</TT></B></TD>
 831 | 
 832 | <TD ALIGN=CENTER><B><TT>rhino</TT></B></TD>
 833 | </TR>
 834 | 
 835 | <TR ALIGN=CENTER VALIGN=CENTER>
 836 | <TD ALIGN=CENTER><B><TT>fiasco</TT></B></TD>
 837 | 
 838 | <TD ALIGN=CENTER><B><TT>magneto</TT></B></TD>
 839 | 
 840 | <TD ALIGN=CENTER><B><TT>stylo</TT></B></TD>
 841 | </TR>
 842 | </TABLE></CENTER>
 843 | 
 844 | <H4>
 845 | <A NAME="Heading62"></A>Table A.18: <B><TT>-o</TT></B> to <B><TT>-os</TT></B>
 846 | (anglicized) or <B><TT>-i</TT></B> (classical)</H4>
 847 | 
 848 | <CENTER><TABLE BORDER >
 849 | <TR ALIGN=CENTER VALIGN=CENTER>
 850 | <TD ALIGN=CENTER>
 851 | <CENTER><B><TT>alto</TT></B></CENTER>
 852 | </TD>
 853 | 
 854 | <TD ALIGN=CENTER>
 855 | <CENTER><B><TT>contralto</TT></B></CENTER>
 856 | </TD>
 857 | 
 858 | <TD ALIGN=CENTER>
 859 | <CENTER><B><TT>soprano</TT></B></CENTER>
 860 | </TD>
 861 | </TR>
 862 | 
 863 | <TR>
 864 | <TD>
 865 | <CENTER><B><TT>basso</TT></B></CENTER>
 866 | </TD>
 867 | 
 868 | <TD>
 869 | <CENTER><B><TT>crescendo</TT></B></CENTER>
 870 | </TD>
 871 | 
 872 | <TD>
 873 | <CENTER><B><TT>tempo</TT></B></CENTER>
 874 | </TD>
 875 | </TR>
 876 | 
 877 | <TR ALIGN=CENTER VALIGN=CENTER>
 878 | <TD ALIGN=CENTER>
 879 | <CENTER><B><TT>canto</TT></B></CENTER>
 880 | </TD>
 881 | 
 882 | <TD ALIGN=CENTER>
 883 | <CENTER><B><TT>solo</TT></B></CENTER>
 884 | </TD>
 885 | 
 886 | <TD ALIGN=CENTER>
 887 | <CENTER><B><TT></TT></B>&nbsp;</CENTER>
 888 | </TD>
 889 | </TR>
 890 | </TABLE></CENTER>
 891 | 
 892 | <CENTER>
 893 | <H4>
 894 | <A NAME="Heading63"></A>Table A.19: <B><TT>-on</TT></B> to <B><TT>-a</TT></B></H4></CENTER>
 895 | 
 896 | <CENTER><TABLE BORDER >
 897 | <TR ALIGN=CENTER VALIGN=CENTER>
 898 | <TD ALIGN=CENTER><B><TT>aphelion</TT></B></TD>
 899 | 
 900 | <TD ALIGN=CENTER><B><TT>hyperbaton</TT></B></TD>
 901 | 
 902 | <TD ALIGN=CENTER><B><TT>perihelion</TT></B></TD>
 903 | </TR>
 904 | 
 905 | <TR ALIGN=CENTER VALIGN=CENTER>
 906 | <TD ALIGN=CENTER><B><TT>asyndeton</TT></B></TD>
 907 | 
 908 | <TD ALIGN=CENTER><B><TT>noumenon</TT></B></TD>
 909 | 
 910 | <TD ALIGN=CENTER><B><TT>phenomenon</TT></B></TD>
 911 | </TR>
 912 | 
 913 | <TR ALIGN=CENTER VALIGN=CENTER>
 914 | <TD ALIGN=CENTER><B><TT>criterion</TT></B></TD>
 915 | 
 916 | <TD ALIGN=CENTER><B><TT>organon</TT></B></TD>
 917 | 
 918 | <TD ALIGN=CENTER><B><TT>prolegomenon</TT></B></TD>
 919 | </TR>
 920 | </TABLE></CENTER>
 921 | 
 922 | <H4>
 923 | <A NAME="Heading64"></A>Table A.20: <B><TT>-um</TT></B> to <B><TT>-a</TT></B></H4>
 924 | 
 925 | <CENTER><TABLE BORDER >
 926 | <TR ALIGN=CENTER VALIGN=CENTER>
 927 | <TD ALIGN=CENTER><B><TT>agendum</TT></B></TD>
 928 | 
 929 | <TD ALIGN=CENTER><B><TT>datum</TT></B></TD>
 930 | 
 931 | <TD ALIGN=CENTER><B><TT>extremum</TT></B></TD>
 932 | </TR>
 933 | 
 934 | <TR ALIGN=CENTER VALIGN=CENTER>
 935 | <TD ALIGN=CENTER><B><TT>bacterium</TT></B></TD>
 936 | 
 937 | <TD ALIGN=CENTER><B><TT>desideratum</TT></B></TD>
 938 | 
 939 | <TD ALIGN=CENTER><B><TT>stratum</TT></B></TD>
 940 | </TR>
 941 | 
 942 | <TR ALIGN=CENTER VALIGN=CENTER>
 943 | <TD ALIGN=CENTER><B><TT>candelabrum</TT></B></TD>
 944 | 
 945 | <TD ALIGN=CENTER><B><TT>erratum</TT></B></TD>
 946 | 
 947 | <TD ALIGN=CENTER><B><TT>ovum</TT></B></TD>
 948 | </TR>
 949 | </TABLE></CENTER>
 950 | 
 951 | <H4>
 952 | <A NAME="Heading65"></A>Table A.21: <B><TT>-um</TT></B> to <B><TT>-ums</TT></B>
 953 | (anglicized) or <B><TT>-a</TT></B> (classical)</H4>
 954 | 
 955 | <CENTER><TABLE BORDER >
 956 | <TR ALIGN=CENTER VALIGN=CENTER>
 957 | <TD ALIGN=CENTER><B><TT>aquarium</TT></B></TD>
 958 | 
 959 | <TD ALIGN=CENTER><B><TT>interregnum</TT></B></TD>
 960 | 
 961 | <TD ALIGN=CENTER><B><TT>quantum</TT></B></TD>
 962 | </TR>
 963 | 
 964 | <TR ALIGN=CENTER VALIGN=CENTER>
 965 | <TD ALIGN=CENTER><B><TT>compendium</TT></B></TD>
 966 | 
 967 | <TD ALIGN=CENTER><B><TT>lustrum</TT></B></TD>
 968 | 
 969 | <TD ALIGN=CENTER><B><TT>rostrum</TT></B></TD>
 970 | </TR>
 971 | 
 972 | <TR ALIGN=CENTER VALIGN=CENTER>
 973 | <TD ALIGN=CENTER><B><TT>consortium</TT></B></TD>
 974 | 
 975 | <TD ALIGN=CENTER><B><TT>maximum</TT></B></TD>
 976 | 
 977 | <TD ALIGN=CENTER><B><TT>spectrum</TT></B></TD>
 978 | </TR>
 979 | 
 980 | <TR ALIGN=CENTER VALIGN=CENTER>
 981 | <TD ALIGN=CENTER><B><TT>cranium</TT></B></TD>
 982 | 
 983 | <TD ALIGN=CENTER><B><TT>medium</TT></B></TD>
 984 | 
 985 | <TD ALIGN=CENTER><B><TT>speculum</TT></B></TD>
 986 | </TR>
 987 | 
 988 | <TR ALIGN=CENTER VALIGN=CENTER>
 989 | <TD ALIGN=CENTER><B><TT>curriculum</TT></B></TD>
 990 | 
 991 | <TD ALIGN=CENTER><B><TT>memorandum</TT></B></TD>
 992 | 
 993 | <TD ALIGN=CENTER><B><TT>stadium</TT></B></TD>
 994 | </TR>
 995 | 
 996 | <TR ALIGN=CENTER VALIGN=CENTER>
 997 | <TD ALIGN=CENTER><B><TT>dictum</TT></B></TD>
 998 | 
 999 | <TD ALIGN=CENTER><B><TT>millenium</TT></B></TD>
1000 | 
1001 | <TD ALIGN=CENTER><B><TT>trapezium</TT></B></TD>
1002 | </TR>
1003 | 
1004 | <TR ALIGN=CENTER VALIGN=CENTER>
1005 | <TD ALIGN=CENTER><B><TT>emporium</TT></B></TD>
1006 | 
1007 | <TD ALIGN=CENTER><B><TT>minimum</TT></B></TD>
1008 | 
1009 | <TD ALIGN=CENTER><B><TT>ultimatum</TT></B></TD>
1010 | </TR>
1011 | 
1012 | <TR ALIGN=CENTER VALIGN=CENTER>
1013 | <TD ALIGN=CENTER><B><TT>enconium</TT></B></TD>
1014 | 
1015 | <TD ALIGN=CENTER><B><TT>momentum</TT></B></TD>
1016 | 
1017 | <TD ALIGN=CENTER><B><TT>vacuum</TT></B></TD>
1018 | </TR>
1019 | 
1020 | <TR ALIGN=CENTER VALIGN=CENTER>
1021 | <TD ALIGN=CENTER><B><TT>gymnasium</TT></B></TD>
1022 | 
1023 | <TD ALIGN=CENTER><B><TT>optimum</TT></B></TD>
1024 | 
1025 | <TD ALIGN=CENTER><B><TT>velum</TT></B></TD>
1026 | </TR>
1027 | 
1028 | <TR ALIGN=CENTER VALIGN=CENTER>
1029 | <TD ALIGN=CENTER><B><TT>honorarium</TT></B></TD>
1030 | 
1031 | <TD ALIGN=CENTER><B><TT>phylum</TT></B></TD>
1032 | 
1033 | <TD ALIGN=CENTER></TD>
1034 | </TR>
1035 | </TABLE></CENTER>
1036 | 
1037 | <H4>
1038 | <A NAME="Heading66"></A>Table A.22: <B><TT>-us</TT></B> to <B><TT>-uses</TT></B>
1039 | (anglicized) or <B><TT>-i</TT></B> (classical)</H4>
1040 | 
1041 | <CENTER><TABLE BORDER >
1042 | <TR ALIGN=CENTER VALIGN=CENTER>
1043 | <TD ALIGN=CENTER><B><TT>focus</TT></B></TD>
1044 | 
1045 | <TD ALIGN=CENTER><B><TT>nimbus</TT></B></TD>
1046 | 
1047 | <TD ALIGN=CENTER><B><TT>succubus</TT></B></TD>
1048 | </TR>
1049 | 
1050 | <TR ALIGN=CENTER VALIGN=CENTER>
1051 | <TD ALIGN=CENTER><B><TT>fungus</TT></B></TD>
1052 | 
1053 | <TD ALIGN=CENTER><B><TT>nucleolus</TT></B></TD>
1054 | 
1055 | <TD ALIGN=CENTER><B><TT>torus</TT></B></TD>
1056 | </TR>
1057 | 
1058 | <TR ALIGN=CENTER VALIGN=CENTER>
1059 | <TD ALIGN=CENTER><B><TT>genius</TT></B></TD>
1060 | 
1061 | <TD ALIGN=CENTER><B><TT>radius</TT></B></TD>
1062 | 
1063 | <TD ALIGN=CENTER><B><TT>umbilicus</TT></B></TD>
1064 | </TR>
1065 | 
1066 | <TR ALIGN=CENTER VALIGN=CENTER>
1067 | <TD ALIGN=CENTER><B><TT>incubus</TT></B></TD>
1068 | 
1069 | <TD ALIGN=CENTER><B><TT>stylus</TT></B></TD>
1070 | 
1071 | <TD ALIGN=CENTER><B><TT>uterus</TT></B></TD>
1072 | </TR>
1073 | </TABLE></CENTER>
1074 | 
1075 | <H4>
1076 | <A NAME="Heading67"></A>Table A.23: <B><TT>-us</TT></B> to <B><TT>-uses</TT></B>
1077 | (anglicized) or <B><TT>-us</TT></B> (classical)</H4>
1078 | 
1079 | <CENTER><TABLE BORDER >
1080 | <TR ALIGN=CENTER VALIGN=CENTER>
1081 | <TD ALIGN=CENTER><B><TT>apparatus</TT></B></TD>
1082 | 
1083 | <TD ALIGN=CENTER><B><TT>impetus</TT></B></TD>
1084 | 
1085 | <TD ALIGN=CENTER><B><TT>prospectus</TT></B></TD>
1086 | </TR>
1087 | 
1088 | <TR ALIGN=CENTER VALIGN=CENTER>
1089 | <TD ALIGN=CENTER><B><TT>cantus</TT></B></TD>
1090 | 
1091 | <TD ALIGN=CENTER><B><TT>nexus</TT></B></TD>
1092 | 
1093 | <TD ALIGN=CENTER><B><TT>sinus</TT></B></TD>
1094 | </TR>
1095 | 
1096 | <TR ALIGN=CENTER VALIGN=CENTER>
1097 | <TD ALIGN=CENTER><B><TT>coitus</TT></B></TD>
1098 | 
1099 | <TD ALIGN=CENTER><B><TT>plexus</TT></B></TD>
1100 | 
1101 | <TD ALIGN=CENTER><B><TT>status</TT></B></TD>
1102 | </TR>
1103 | 
1104 | <TR ALIGN=CENTER VALIGN=CENTER>
1105 | <TD ALIGN=CENTER><B><TT>hiatus</TT></B></TD>
1106 | 
1107 | <TD ALIGN=CENTER>&nbsp;</TD>
1108 | 
1109 | <TD ALIGN=CENTER>&nbsp;</TD>
1110 | </TR>
1111 | </TABLE></CENTER>
1112 | 
1113 | <H4>
1114 | <A NAME="Heading68"></A>Table A.24: <B><TT>-</TT></B> to <B><TT>-i</TT></B></H4>
1115 | 
1116 | <CENTER><TABLE BORDER >
1117 | <TR ALIGN=CENTER VALIGN=CENTER>
1118 | <TD ALIGN=CENTER><B><TT>afreet</TT></B></TD>
1119 | 
1120 | <TD ALIGN=CENTER><B><TT>afrit</TT></B></TD>
1121 | 
1122 | <TD ALIGN=CENTER><B><TT>efreet</TT></B></TD>
1123 | </TR>
1124 | </TABLE></CENTER>
1125 | 
1126 | <H4>
1127 | <A NAME="Heading69"></A>Table A.25: <B><TT>-</TT></B> to <B><TT>-im</TT></B></H4>
1128 | 
1129 | <CENTER><TABLE BORDER >
1130 | <TR ALIGN=LEFT VALIGN=CENTER>
1131 | <TD ALIGN=CENTER><B><TT>cherub</TT></B></TD>
1132 | 
1133 | <TD ALIGN=CENTER><B><TT>goy</TT></B></TD>
1134 | 
1135 | <TD ALIGN=CENTER><B><TT>seraph</TT></B></TD>
1136 | </TR>
1137 | </TABLE></CENTER>
1138 | 
1139 | <H4>
1140 | <A NAME="Heading70"></A>Table A.26: <B><TT>-general</TT></B> to <B><TT>-generals</TT></B></H4>
1141 | 
1142 | <CENTER><TABLE BORDER >
1143 | <TR ALIGN=CENTER VALIGN=CENTER>
1144 | <TD ALIGN=CENTER><B><TT>Adjutant</TT></B></TD>
1145 | 
1146 | <TD ALIGN=CENTER><B><TT>Lieutenant</TT></B></TD>
1147 | 
1148 | <TD ALIGN=CENTER><B><TT>Quartermaster</TT></B></TD>
1149 | </TR>
1150 | 
1151 | <TR ALIGN=CENTER VALIGN=CENTER>
1152 | <TD ALIGN=CENTER><B><TT>Brigadier</TT></B></TD>
1153 | 
1154 | <TD ALIGN=CENTER><B><TT>Major</TT></B></TD>
1155 | 
1156 | <TD ALIGN=CENTER>&nbsp;</TD>
1157 | </TR>
1158 | </TABLE></CENTER>
1159 | &nbsp;
1160 | </BODY>
1161 | </HTML>
1162 | 


--------------------------------------------------------------------------------
/lib/classifier.ex:
--------------------------------------------------------------------------------
 1 | defmodule Text.Language.Classifier do
 2 |   @moduledoc """
 3 |   A behaviour definition module for language
 4 |   classifiers.
 5 | 
 6 |   A langauge classifier correlates supplied
 7 |   natural language text against a vocabulary
 8 |   and returns a score indicating how closely the
 9 |   supplied text matches the vocabulary.
10 | 
11 |   """
12 | 
13 |   @typedoc "A classifier is a module that implements the `Text.Language.Classifier` behaviour."
14 |   @type t :: module()
15 | 
16 |   @typedoc "A list of 2-tuples of the form `{language, number}`"
17 |   @type frequency_list :: [Text.frequency_tuple(), ...]
18 | 
19 |   @typedoc "A list mapping an n-gram as a charlist to a `Text.Ngram.Frequency struct`"
20 |   @type text_ngrams :: %{charlist => Text.Ngram.Frequency.t}
21 | 
22 |   @doc """
23 |   Returns the classifier score for one language.
24 | 
25 |   A classifier correlates how closely a
26 |   supplied string (encoded into n-grams)
27 |   matches against a given langauge profile
28 |   implemented as a vocabulary.
29 | 
30 |   See `Text.Language.Classifier.NaiveBayesian`
31 |   for an example.
32 | 
33 |   """
34 |   @callback score_one_language(Text.language, text_ngrams, Text.vocabulary) :: frequency_list()
35 | 
36 |   @doc """
37 |   Sorts the classifier scores from all languages in
38 |   order or correlation.
39 |   """
40 |   @callback order_scores(frequency_list) :: frequency_list()
41 | 
42 | end


--------------------------------------------------------------------------------
/lib/corpus.ex:
--------------------------------------------------------------------------------
  1 | defmodule Text.Corpus do
  2 |   @moduledoc """
  3 |   Defines the behaviour for a language
  4 |   corpus with convenience functions to
  5 |   simplifying the creation of corpus
  6 |   vocabularies.
  7 | 
  8 |   """
  9 | 
 10 |   @doc """
 11 |   Returns a list of vocabularies for a corpus.
 12 | 
 13 |   """
 14 |   @callback known_vocabularies :: [Text.vocabulary, ...]
 15 | 
 16 |   @doc """
 17 |   Returns a ist of vocabularies for a corpus.
 18 | 
 19 |   """
 20 |   @callback known_languages :: [Text.language, ...]
 21 | 
 22 |   @doc """
 23 |   Returns the natural langauge training text for
 24 |   a given language in the corpus.
 25 | 
 26 |   """
 27 |   @callback language_content(Text.language) :: String.t
 28 | 
 29 |   @doc """
 30 |   Normalizes the text used for training and
 31 |   for classification.
 32 | 
 33 |   """
 34 |   @callback normalize_text(String.t) :: String.t
 35 | 
 36 |   @doc """
 37 |   Classifies the natural language of a given
 38 |   text into an ordered list.
 39 | 
 40 |   See `Text.Language.classify/2` for
 41 |   the options that may be passed.
 42 | 
 43 |   """
 44 |   @callback classify(String.t, Keyword.t) ::
 45 |     [Text.frequency_tuple, ...] | {:error, {module(), String.t}}
 46 | 
 47 |   @doc """
 48 |   Detects the most likely natural language of a given
 49 |   text.
 50 | 
 51 |   See `Text.Language.detect/2` for
 52 |   the options that may be passed.
 53 | 
 54 |   """
 55 |   @callback detect(String.t, Keyword.t) ::
 56 |     {:ok, Text.language} | {:error, {module(), String.t}}
 57 | 
 58 |   @max_demand 5
 59 | 
 60 |   @doc """
 61 |   Builds the vocabulary for
 62 |   all known vocabulary modules
 63 | 
 64 |   """
 65 |   def build_vocabularies(corpus, options \\ []) do
 66 |     max_demand = Keyword.get(options, :max_demand, @max_demand)
 67 | 
 68 |     corpus.known_vocabularies()
 69 |     |> Enum.each(&build_vocabulary(corpus, &1, max_demand: max_demand))
 70 |   end
 71 | 
 72 |   @doc """
 73 |   Builds a vocabulary for a given vocanulary
 74 |   module.
 75 | 
 76 |   """
 77 |   def build_vocabulary(corpus, vocabulary, options \\ []) do
 78 |     ngram_range = vocabulary.ngram_range()
 79 |     file = vocabulary.filename()
 80 |     max_demand = Keyword.get(options, :max_demand, @max_demand)
 81 | 
 82 |     frequency_map_by_language =
 83 |       corpus.known_languages()
 84 |       |> Flow.from_enumerable(max_demand: max_demand)
 85 |       |> Flow.map(&Text.Vocabulary.calculate_corpus_ngrams(corpus, &1, ngram_range))
 86 |       |> Enum.to_list
 87 |       |> calculate_global_frequencies
 88 |       |> remove_structs_for_space_reduction
 89 | 
 90 |     binary = :erlang.term_to_binary(frequency_map_by_language)
 91 |     :ok = File.write!(file, binary)
 92 | 
 93 |     frequency_map_by_language
 94 |   end
 95 | 
 96 |   @doc false
 97 |   def remove_structs_for_space_reduction(frequency_map) do
 98 |     Enum.map(frequency_map, fn {language, ngram_map} ->
 99 |       new_ngram_map =
100 |         Enum.map(ngram_map, fn {ngram, stats} -> {ngram, Map.from_struct(stats)} end)
101 |         |> Map.new()
102 | 
103 |       {language, new_ngram_map}
104 |     end)
105 |     |> Map.new()
106 |   end
107 | 
108 |   # Calculate the total frequency for each
109 |   # ngram across all regions
110 |   @doc false
111 |   def calculate_global_frequencies(frequency_map_by_language) do
112 |     frequency_map_by_language
113 |     |> invert_to_frequency_map_by_ngram()
114 |     |> calculate_global_frequency_and_rank()
115 |     |> invert_to_frequency_map_by_language()
116 |   end
117 | 
118 |   # Invert
119 |   #   %{language => %{ngram => frequencies}}
120 |   # to:
121 |   #   %{ngram => %{language => frequencies}}
122 |   @doc false
123 |   def invert_to_frequency_map_by_ngram(frequency_map_by_language) do
124 |     Enum.reduce(frequency_map_by_language, %{}, fn {language, ngrams}, acc ->
125 |       Enum.reduce(ngrams, acc, fn {ngram, ngram_stats}, acc2 ->
126 |         Map.update(acc2, ngram, %{language => ngram_stats}, &Map.put(&1, language, ngram_stats))
127 |       end)
128 |     end)
129 |   end
130 | 
131 |   # Invert
132 |   #   %{ngram => %{language => frequencies}}
133 |   # to:
134 |   #   %{language => %{ngram => frequencies}}
135 |   @doc false
136 |   def invert_to_frequency_map_by_language(frequency_map_by_ngram) do
137 |     Enum.reduce(frequency_map_by_ngram, %{}, fn {ngram, languages}, acc ->
138 |       Enum.reduce(languages, acc, fn {language, ngram_stats}, acc2 ->
139 |         Map.update(acc2, language, %{ngram => ngram_stats}, &Map.put(&1, ngram, ngram_stats))
140 |       end)
141 |     end)
142 |   end
143 | 
144 |   # Calculate the frequencies across all regions
145 |   # and then the global range across all region
146 |   @doc false
147 |   def calculate_global_frequency_and_rank(frequency_map_by_ngram) do
148 |     Enum.map(frequency_map_by_ngram, fn {ngram, ngram_by_language} ->
149 |       total_count_for_ngram = total_ngram_count_for_languages(ngram_by_language)
150 | 
151 |       added_global_stats =
152 |         Enum.map(ngram_by_language, fn {language, ngram_stats} ->
153 |           {language, %{ngram_stats | global_frequency: ngram_stats.count / total_count_for_ngram}}
154 |         end)
155 |         |> Enum.sort(&(elem(&1, 1).global_frequency > elem(&2, 1).global_frequency))
156 |         |> Enum.with_index(1)
157 |         |> Enum.map(fn {{language, ngram_stats}, global_rank} ->
158 |           {language, %{ngram_stats | global_rank: global_rank}}
159 |         end)
160 | 
161 |       {ngram, added_global_stats}
162 |     end)
163 |   end
164 | 
165 |   @doc false
166 |   def total_ngram_count_for_languages(ngram_by_language) do
167 |     Enum.reduce(ngram_by_language, 0, fn {_language, %{count: count}}, acc ->
168 |       acc + count
169 |     end)
170 |   end
171 | end


--------------------------------------------------------------------------------
/lib/inflect/en.ex:
--------------------------------------------------------------------------------
   1 | defmodule Text.Inflect.En do
   2 |   @moduledoc """
   3 |   Pluralisation for the English language based on the paper
   4 |   [An Algorithmic Approach to English Pluralization](http://users.monash.edu/~damian/papers/HTML/Plurals.html).
   5 | 
   6 |   """
   7 |   @saved_data_path "priv/inflection/en/en.etf"
   8 |   @external_resource @saved_data_path
   9 | 
  10 |   @inflections File.read!(@saved_data_path)
  11 |                |> :erlang.binary_to_term()
  12 | 
  13 |   @doc false
  14 |   def inflections do
  15 |     @inflections
  16 |   end
  17 | 
  18 |   @doc """
  19 |   Pluralize an english noun, pronoun,
  20 |   verb or adjective.
  21 | 
  22 |   ## Arguments
  23 | 
  24 |   * `word` is any English word.
  25 | 
  26 |   * `mode` is `:modern` or `:classical`. The
  27 |     default is `:modern`. This applies to
  28 |     nouns only.
  29 | 
  30 |   ## Returns
  31 | 
  32 |   * a `String` representing the pluralized word.
  33 | 
  34 |   ## Notes
  35 | 
  36 |   `mode` when `:classical` applies pluralization
  37 |   on latin nouns in english but with latin
  38 |   suffixes.
  39 | 
  40 |   ## Examples
  41 | 
  42 |       iex> Text.Inflect.En.pluralize "fish"
  43 |       "fish"
  44 | 
  45 |       iex> Text.Inflect.En.pluralize "soliloquy"
  46 |       "soliloquies"
  47 | 
  48 |       iex> Text.Inflect.En.pluralize "genius", :classical
  49 |       "genii"
  50 | 
  51 |       iex> Text.Inflect.En.pluralize "has"
  52 |       "have"
  53 | 
  54 |       iex> Text.Inflect.En.pluralize "catches"
  55 |       "catch"
  56 | 
  57 |       iex> Text.Inflect.En.pluralize "child's"
  58 |       "children's"
  59 | 
  60 |       iex> Text.Inflect.En.pluralize "Mary's"
  61 |       "Marys'"
  62 | 
  63 |   """
  64 | 
  65 |   def pluralize(word, mode \\ :modern) do
  66 |     # Handle known adjectives...
  67 |     #         try steps 2 through 4 of Algorithm 3
  68 |     # Handle known verbs...
  69 |     #         try steps 2 through 5 of Algorithm 2
  70 |     # is_third_person_singular_s(word) ||
  71 |     # Handle singular nouns ending in -s (ethos, axis, etc. - see Tables A.2, A.3, A.16, A.22, and A.23)...
  72 |     #         if word is a noun ending in -s,
  73 |     #                 try steps 2 through 13 of Algorithm 1
  74 |     is_indefinite_article(word) ||
  75 |       is_possessive_pronoun(word) ||
  76 |       is_genetive(word) ||
  77 |       is_non_inflecting_verb(word) ||
  78 |       is_irregular_verb(word) ||
  79 |       is_third_person_singular(word) ||
  80 |       if suffix?(word, "s") do
  81 |         pluralize_noun(word, mode)
  82 |       else
  83 |         # Handle 3rd person singular verbs (that is, any other words ending in -s)...
  84 |         #         try steps 4 and 5 of Algorithm 2
  85 |         # Treat the word as a noun...
  86 |         #         try steps 2 through 13 of Algorithm 1
  87 |         is_third_person_singular(word) ||
  88 |           is_third_person_singular_s(word) ||
  89 |           pluralize_noun(word, mode)
  90 |       end
  91 |   end
  92 | 
  93 |   @doc """
  94 |   Pluralize an english noun.
  95 | 
  96 |   ## Arguments
  97 | 
  98 |   * `word` is any English noun.
  99 | 
 100 |   * `mode` is `:modern` or `:classical`. The
 101 |     default is `:modern`.
 102 | 
 103 |   ## Returns
 104 | 
 105 |   * a `String` representing the pluralized noun
 106 | 
 107 |   ## Notes
 108 | 
 109 |   `mode` when `:classical` applies pluralization
 110 |   on latin nouns used in english but with latin
 111 |   suffixes.
 112 | 
 113 |   ## Examples
 114 | 
 115 |       iex> Text.Inflect.En.pluralize_noun "Major general"
 116 |       "Major generals"
 117 | 
 118 |       iex> Text.Inflect.En.pluralize_noun "fish"
 119 |       "fish"
 120 | 
 121 |       iex> Text.Inflect.En.pluralize_noun "soliloquy"
 122 |       "soliloquies"
 123 | 
 124 |       iex> Text.Inflect.En.pluralize_noun "genius", :classical
 125 |       "genii"
 126 | 
 127 |       iex> Text.Inflect.En.pluralize_noun "genius"
 128 |       "geniuses"
 129 | 
 130 |       iex> Text.Inflect.En.pluralize_noun "platypus", :classical
 131 |       "platypodes"
 132 | 
 133 |       iex> Text.Inflect.En.pluralize_noun "platypus"
 134 |       "platypuses"
 135 | 
 136 |   """
 137 |   def pluralize_noun(word, mode \\ :modern) do
 138 |     is_non_inflecting(word, mode) ||
 139 |       is_pronoun(word, mode) ||
 140 |       is_irregular_noun(word, mode) ||
 141 |       is_irregular_suffix(word, mode) ||
 142 |       is_assimilated_classical(word, mode) ||
 143 |       is_classical(word, mode) ||
 144 |       is_compound_plural(word, mode) ||
 145 |       is_ves_plural(word, mode) ||
 146 |       is_word_ending_in_y(word, mode) ||
 147 |       is_o_suffix(word, mode) ||
 148 |       is_general(word, mode) ||
 149 |       is_regular(word, mode)
 150 |   end
 151 | 
 152 |   # Handle words that do not inflect in the plural (such as fish, travois, chassis, nationalities
 153 |   # ending in -ese etc. - see Tables A.2 and A.3)...
 154 |   #         if suffix(-fish) or suffix(-ois) or suffix(-sheep)
 155 |   #         or suffix(-deer) or suffix(-pox) or suffix(-[A-Z].*ese)
 156 |   #         or suffix(-itis) or category(-,-),
 157 |   #                 return the original noun
 158 | 
 159 |   defp is_non_inflecting(word, mode) when is_binary(word) do
 160 |     cond do
 161 |       category?(word, "herd", mode) ->
 162 |         word
 163 | 
 164 |       category?(word, "nationalities", mode) ->
 165 |         word
 166 | 
 167 |       category?(word, "-", "-", mode) ->
 168 |         word
 169 | 
 170 |       true ->
 171 |         nil
 172 |     end
 173 |   end
 174 | 
 175 |   # Handle pronouns in the nominative, accusative, and dative (see Tables A.5), as well as
 176 |   # prepositional phrases...
 177 |   #         if the word is a pronoun,
 178 |   #                 return the specified plural of the pronoun
 179 |   #
 180 |   #         if the word is of the form: "<preposition> <pronoun>",
 181 |   #                 return "<preposition> <specified plural of pronoun>"
 182 | 
 183 |   defp is_pronoun(word, mode) do
 184 |     cond do
 185 |       category?(word, "pronoun", mode) ->
 186 |         pronoun(word, mode)
 187 | 
 188 |       true ->
 189 |         nil
 190 |     end
 191 |   end
 192 | 
 193 |   # Handle standard irregular plurals (mongooses, oxen, etc. - see table A.1)...
 194 |   #         if the word has an irregular plural,
 195 |   #                 return the specified plural
 196 | 
 197 |   defp is_irregular_noun(word, mode) do
 198 |     cond do
 199 |       category?(word, "irregular_noun", mode) ->
 200 |         irregular_noun(word, mode)
 201 | 
 202 |       true ->
 203 |         nil
 204 |     end
 205 |   end
 206 | 
 207 |   # Handle irregular inflections for common suffixes (synopses, mice and men, etc.)...
 208 |   #         if suffix(-man),      return inflection(-man,-men)
 209 |   #         if suffix(-[lm]ouse), return inflection(-ouse,-ice)
 210 |   #         if suffix(-tooth),    return inflection(-tooth,-teeth)
 211 |   #         if suffix(-goose),    return inflection(-goose,-geese)
 212 |   #         if suffix(-foot),     return inflection(-foot,-feet)
 213 |   #         if suffix(-zoon),     return inflection(-zoon,-zoa)
 214 |   #         if suffix(-[csx]is),  return inflection(-is,-es)
 215 | 
 216 |   defp is_irregular_suffix(word, _mode) do
 217 |     cond do
 218 |       suffix?(word, "man") ->
 219 |         replace_suffix(word, "man", "men")
 220 | 
 221 |       suffix?(word, "louse") ->
 222 |         replace_suffix(word, "louse", "lice")
 223 | 
 224 |       suffix?(word, "mouse") ->
 225 |         replace_suffix(word, "mouse", "mice")
 226 | 
 227 |       suffix?(word, "tooth") ->
 228 |         replace_suffix(word, "tooth", "teeth")
 229 | 
 230 |       suffix?(word, "goose") ->
 231 |         replace_suffix(word, "goose", "geese")
 232 | 
 233 |       suffix?(word, "foot") ->
 234 |         replace_suffix(word, "foot", "feet")
 235 | 
 236 |       suffix?(word, "zoon") ->
 237 |         replace_suffix(word, "zoon", "zoa")
 238 | 
 239 |       suffix?(word, "cis") ->
 240 |         replace_suffix(word, "cis", "ces")
 241 | 
 242 |       suffix?(word, "sis") ->
 243 |         replace_suffix(word, "sis", "ses")
 244 | 
 245 |       suffix?(word, "xis") ->
 246 |         replace_suffix(word, "xis", "xes")
 247 | 
 248 |       true ->
 249 |         nil
 250 |     end
 251 |   end
 252 | 
 253 |   # Handle fully assimilated classical inflections (vertebrae, codices, etc. - see tables A.10,
 254 |   # A.14, A.19 and A.20, and tables A.11, A.15 and A.21 if in "classical mode)...
 255 |   #         if category(-ex,-ices), return inflection(-ex,-ices)
 256 |   #         if category(-um,-a),    return inflection(-um,-a)
 257 |   #         if category(-on,-a),    return inflection(-on,-a)
 258 |   #         if category(-a,-ae),    return inflection(-a,-ae)
 259 | 
 260 |   defp is_assimilated_classical(word, mode) do
 261 |     cond do
 262 |       category?(word, "-ex", "-ices", mode) ->
 263 |         replace_suffix(word, "ex", "ices")
 264 | 
 265 |       category?(word, "-um", "-a", mode) ->
 266 |         replace_suffix(word, "um", "a")
 267 | 
 268 |       category?(word, "-on", "-a", mode) ->
 269 |         replace_suffix(word, "on", "a")
 270 | 
 271 |       category?(word, "-a", "-ae", mode) ->
 272 |         replace_suffix(word, "a", "ae")
 273 | 
 274 |       true ->
 275 |         nil
 276 |     end
 277 |   end
 278 | 
 279 |   # Handle classical variants of modern inflections (stigmata, soprani, etc. - see tables A.11 to
 280 |   # A.13, A.15, A.16, A.18, A.21 to A.25)...
 281 |   #         if in classical mode,
 282 |   #                 if suffix(-trix),       return inflection(-trix,-trices)
 283 |   #                 if suffix(-eau),        return inflection(-eau,-eaux)
 284 |   #                 if suffix(-ieu),        return inflection(-ieu,-ieux)
 285 |   #                 if suffix(-..[iay]nx),  return inflection(-nx,-nges)
 286 |   #                 if category(-en,-ina),  return inflection(-en,-ina)
 287 |   #                 if category(-a,-ata),   return inflection(-a,-ata)
 288 |   #                 if category(-is,-ides), return inflection(-is,-ides)
 289 |   #                 if category(-us,-i),    return inflection(-us,-i)
 290 |   #                 if category(-us,-us),   return the original noun
 291 |   #                 if category(-o,-i),     return inflection(-o,-i)
 292 |   #                 if category(-,-i),      return inflection(-,-i)
 293 |   #                 if category(-,-im),     return inflection(-,-im)
 294 | 
 295 |   defp is_classical(word, :classical = mode) do
 296 |     cond do
 297 |       suffix?(word, "trix") ->
 298 |         replace_suffix(word, "trix", "trices")
 299 | 
 300 |       suffix?(word, "eau") ->
 301 |         word <> "x"
 302 | 
 303 |       suffix?(word, "ieu") ->
 304 |         word <> "x"
 305 | 
 306 |       suffix?(word, "inx") ->
 307 |         replace_suffix(word, "nx", "nges")
 308 | 
 309 |       suffix?(word, "anx") ->
 310 |         replace_suffix(word, "nx", "nges")
 311 | 
 312 |       suffix?(word, "ynx") ->
 313 |         replace_suffix(word, "nx", "nges")
 314 | 
 315 |       category?(word, "-en", "-ina", mode) ->
 316 |         replace_suffix(word, "en", "ina")
 317 | 
 318 |       category?(word, "-a", "-ata", mode) ->
 319 |         word <> "ta"
 320 | 
 321 |       category?(word, "-is", "-ides", mode) ->
 322 |         replace_suffix(word, "is", "ides")
 323 | 
 324 |       category?(word, "-us", "-i", mode) ->
 325 |         replace_suffix(word, "us", "i")
 326 | 
 327 |       category?(word, "-us", "-us", mode) ->
 328 |         word
 329 | 
 330 |       category?(word, "-o", "-i", mode) ->
 331 |         replace_suffix(word, "o", "i")
 332 | 
 333 |       category?(word, "-", "-i", mode) ->
 334 |         word <> "i"
 335 | 
 336 |       category?(word, "-", "-im", mode) ->
 337 |         word <> "im"
 338 | 
 339 |       true ->
 340 |         nil
 341 |     end
 342 |   end
 343 | 
 344 |   defp is_classical(word, :modern = mode) do
 345 |     cond do
 346 |       category?(word, "-us", "-i", mode) ->
 347 |         replace_suffix(word, "us", "uses")
 348 | 
 349 |       true ->
 350 |         nil
 351 |     end
 352 |   end
 353 | 
 354 |   # The suffixes -ch, -sh, and -ss all take -es in the plural (churches, classes, etc)...
 355 |   #         if suffix(-[cs]h), return inflection(-h,-hes)
 356 |   #         if suffix(-ss),    return inflection(-ss,-sses)
 357 | 
 358 |   defp is_compound_plural(word, _mode) do
 359 |     cond do
 360 |       suffix?(word, "ch") ->
 361 |         replace_suffix(word, "h", "hes")
 362 | 
 363 |       suffix?(word, "sh") ->
 364 |         replace_suffix(word, "h", "hes")
 365 | 
 366 |       suffix?(word, "ss") ->
 367 |         replace_suffix(word, "h", "sses")
 368 | 
 369 |       true ->
 370 |         nil
 371 |     end
 372 |   end
 373 | 
 374 |   # Certain words ending in -f or -fe take -ves in the plural (lives, wolves, etc)...
 375 |   #         if suffix(-[aeo]lf) or suffix(-[^d]eaf) or suffix(-arf),
 376 |   #                 return inflection(-f,-ves)
 377 |   #
 378 |   #         if suffix(-[nlw]ife),
 379 |   #                 return inflection(-fe,-ves)
 380 | 
 381 |   defp is_ves_plural(word, _mode) do
 382 |     cond do
 383 |       suffix?(word, "alf") ->
 384 |         replace_suffix(word, "f", "ves")
 385 | 
 386 |       suffix?(word, "elf") ->
 387 |         replace_suffix(word, "f", "ves")
 388 | 
 389 |       suffix?(word, "olf") ->
 390 |         replace_suffix(word, "f", "ves")
 391 | 
 392 |       suffix?(word, "arf") ->
 393 |         replace_suffix(word, "f", "ves")
 394 | 
 395 |       suffix?(word, "nife") ->
 396 |         replace_suffix(word, "fe", "ves")
 397 | 
 398 |       suffix?(word, "life") ->
 399 |         replace_suffix(word, "fe", "ves")
 400 | 
 401 |       suffix?(word, "wife") ->
 402 |         replace_suffix(word, "fe", "ves")
 403 | 
 404 |       suffix?(word, "eaf") ->
 405 |         if String.at(word, -4) == "d", do: nil, else: replace_suffix(word, "f", "ves")
 406 | 
 407 |       true ->
 408 |         nil
 409 |     end
 410 |   end
 411 | 
 412 |   # Words ending in -y take -ys if preceded by a vowel (storeys, stays, etc.) or when a proper noun
 413 |   # (Marys, Tonys, etc.), but -ies if preceded by a consonant (stories, skies, etc.)...
 414 |   #         if suffix(-[aeiou]y), return inflection(-y,-ys)
 415 |   #         if suffix(-[A-Z].*y), return inflection(-y,-ys)
 416 |   #         if suffix(-y),        return inflection(-y,-ies)
 417 | 
 418 |   defp is_word_ending_in_y(word, _mode) do
 419 |     cond do
 420 |       suffix?(word, "y") && vowel?(word, -2) ->
 421 |         word <> "s"
 422 | 
 423 |       suffix?(word, "y") && starts_with_upper?(word) ->
 424 |         word <> "s"
 425 | 
 426 |       suffix?(word, "y") ->
 427 |         replace_suffix(word, "y", "ies")
 428 | 
 429 |       true ->
 430 |         nil
 431 |     end
 432 |   end
 433 | 
 434 |   # Some words ending in -o take -os (lassos, solos, etc. - see tables A.17 and A.18); the rest
 435 |   # take -oes (potatoes, dominoes, etc.) However, words in which the -o is preceded by a vowel
 436 |   # always take -os (folios, bamboos)...
 437 |   #         if category(-o,-os) or suffix(-[aeiou]o),
 438 |   #                 return inflection(-o,-os)
 439 |   #
 440 |   #         if suffix(-o), return inflection(-o,-oes)
 441 | 
 442 |   defp is_o_suffix(word, :modern = mode) do
 443 |     cond do
 444 |       category?(word, "-o", "-os", mode) ->
 445 |         word <> "s"
 446 | 
 447 |       suffix?(word, "o") && vowel?(word, -2) ->
 448 |         word <> "s"
 449 | 
 450 |       suffix?(word, "o") ->
 451 |         word <> "es"
 452 | 
 453 |       true ->
 454 |         nil
 455 |     end
 456 |   end
 457 | 
 458 |   defp is_o_suffix(word, :classical = mode) do
 459 |     cond do
 460 |       category?(word, "-o", "-os", mode) ->
 461 |         replace_suffix(word, "o", "i")
 462 | 
 463 |       suffix?(word, "o") ->
 464 |         word <> "es"
 465 | 
 466 |       true ->
 467 |         nil
 468 |     end
 469 |   end
 470 | 
 471 |   # Handle plurals of compound words (Postmasters General, Major Generals, mothers-in-law, etc) by
 472 |   # recursively applying the entire algorithm to the underlying noun. See Table A.26 for the
 473 |   # military suffix -general, which inflects to -generals...
 474 |   #         if category(-general,-generals), return inflection(-l,-ls)
 475 |   #
 476 |   #         if the word is of the form: "<word> general",
 477 |   #                 return "<plural of word> general"
 478 |   #
 479 |   #         if the word is of the form: "<word> <preposition> <words>",
 480 |   #                 return "<plural of word> <preposition> <words>"
 481 | 
 482 |   @generals @inflections
 483 |             |> Map.get("a26")
 484 | 
 485 |   for general <- @generals do
 486 |     defp is_general(unquote(general) <> suffix, _mode) do
 487 |       cond do
 488 |         suffix?(suffix, "l") -> unquote(general) <> suffix <> "s"
 489 |         true -> nil
 490 |       end
 491 |     end
 492 |   end
 493 | 
 494 |   defp is_general(_word, _mode) do
 495 |     nil
 496 |   end
 497 | 
 498 |   # Otherwise, assume that the plural just adds -s (cats, programmes, trees, etc.)...
 499 |   #         otherwise, return inflection(-,-s)
 500 |   defp is_regular(word, _mode) do
 501 |     word <> "s"
 502 |   end
 503 | 
 504 |   @doc """
 505 |   Pluralize an english verb.
 506 | 
 507 |   ## Arguments
 508 | 
 509 |   * `word` is any English verb.
 510 | 
 511 |   ## Returns
 512 | 
 513 |   * a `String` representing the pluralized verb
 514 | 
 515 |   ## Examples
 516 | 
 517 |       iex> Text.Inflect.En.pluralize_verb "has"
 518 |       "have"
 519 | 
 520 |       iex> Text.Inflect.En.pluralize_verb "catches"
 521 |       "catch"
 522 | 
 523 |   """
 524 |   def pluralize_verb(word) do
 525 |     # All other cases are regular 1st or 2nd person verbs, which don't inflect...
 526 |     #         otherwise, return the verb uninflected
 527 |     is_non_inflecting_verb(word) ||
 528 |       is_irregular_verb(word) ||
 529 |       is_third_person_singular(word) ||
 530 |       is_third_person_singular_s(word) ||
 531 |       is_ambiguous(word) ||
 532 |       word
 533 |   end
 534 | 
 535 |   defp is_non_inflecting_verb(word) do
 536 |     cond do
 537 |       category?(word, "non_inflecting_verb") ->
 538 |         word
 539 | 
 540 |       true ->
 541 |         nil
 542 |     end
 543 |   end
 544 | 
 545 |   # Check if the verb is being used as an auxiliary and has a known irregular inflection (has seen,
 546 |   # was going, etc. See Table A.8 for irregular verbs)...
 547 |   #         if the word has the form "<auxiliary> <words>"
 548 |   #         and <auxiliary> belongs to the category of irregular verbs,
 549 |   #                 return "<specified plural of auxiliary> <words>"
 550 | 
 551 |   # Handle simple irregular verbs (has, is, etc. - see Table A.8)...
 552 |   #         if the word belongs to the category of irregular verbs,
 553 |   #                 return the specified plural form
 554 | 
 555 |   # Combine the both cases in this simpler execution
 556 | 
 557 |   defp is_irregular_verb(word) do
 558 |     cond do
 559 |       category?(word, "irregular_verb") ->
 560 |         irregular_verb(word)
 561 | 
 562 |       true ->
 563 |         nil
 564 |     end
 565 |   end
 566 | 
 567 |   # Verbs in the regular 3rd person singular lose their -es, -ies, or -oes suffix (she catches -
 568 |   # they catch, he tries -> they try, it does -> they do, etc.)...
 569 |   #         if suffix(-[cs]hes), return inflection(-hes,-h)
 570 |   #         if suffix(-[sx]es),  return inflection(-es,-)
 571 |   #         if suffix(-zzes),    return inflection(-es,-)
 572 |   #         if suffix(-ies),     return inflection(-ies,-y)
 573 |   #         if suffix(-oes),     return inflection(-oes,-o)
 574 | 
 575 |   defp is_third_person_singular(word) do
 576 |     cond do
 577 |       suffix?(word, "ches") ->
 578 |         replace_suffix(word, "hes", "h")
 579 | 
 580 |       suffix?(word, "shes") ->
 581 |         replace_suffix(word, "hes", "h")
 582 | 
 583 |       suffix?(word, "ses") ->
 584 |         replace_suffix(word, "es", "")
 585 | 
 586 |       suffix?(word, "xes") ->
 587 |         replace_suffix(word, "es", "")
 588 | 
 589 |       suffix?(word, "zzes") ->
 590 |         replace_suffix(word, "es", "")
 591 | 
 592 |       suffix?(word, "ies") ->
 593 |         replace_suffix(word, "ies", "y")
 594 | 
 595 |       suffix?(word, "oes") ->
 596 |         replace_suffix(word, "oes", "o")
 597 | 
 598 |       true ->
 599 |         nil
 600 |     end
 601 |   end
 602 | 
 603 |   # Other 3rd person singular verbs ending in -s (but not -ss) also lose their suffix...
 604 |   #         if suffix(-[^s]s), return inflection(-s,-)
 605 | 
 606 |   defp is_third_person_singular_s(word) do
 607 |     cond do
 608 |       suffix?(word, "ss") ->
 609 |         nil
 610 | 
 611 |       suffix?(word, "s") ->
 612 |         replace_suffix(word, "s", "")
 613 | 
 614 |       true ->
 615 |         nil
 616 |     end
 617 |   end
 618 | 
 619 |   # Handle ambiguous simple verbs that might also be nouns (thought, sink, fly, etc. - see Table
 620 |   # A.4)...
 621 |   #         if the word is in the ambiguous category,
 622 |   #                 return the specified plural form
 623 | 
 624 |   defp is_ambiguous(word) do
 625 |     cond do
 626 |       category?(word, "ambiguous") ->
 627 |         pluralize_noun(word)
 628 | 
 629 |       true ->
 630 |         nil
 631 |     end
 632 |   end
 633 | 
 634 |   @doc """
 635 |   Pluralize an english adjective.
 636 | 
 637 |   ## Arguments
 638 | 
 639 |   * `word` is any English adjective.
 640 | 
 641 |   ## Returns
 642 | 
 643 |   * a `String` representing the pluralized
 644 |     adjective
 645 | 
 646 |   ## Examples
 647 | 
 648 |       iex> Text.Inflect.En.pluralize_adjective "a"
 649 |       "some"
 650 | 
 651 |       iex> Text.Inflect.En.pluralize_adjective "my"
 652 |       "our"
 653 | 
 654 |       iex> Text.Inflect.En.pluralize_adjective "child's"
 655 |       "children's"
 656 | 
 657 |       iex> Text.Inflect.En.pluralize_adjective "Mary's"
 658 |       "Marys'"
 659 | 
 660 |   """
 661 |   def pluralize_adjective(word) do
 662 |     # In all other cases no inflection is required...
 663 |     #         otherwise, return the adjective uninflected
 664 |     is_indefinite_article(word) ||
 665 |       is_possessive_pronoun(word) ||
 666 |       is_genetive(word) ||
 667 |       word
 668 |   end
 669 | 
 670 |   # Handle indefinite articles and demonstratives...
 671 |   #         if the word is "a" or "an", return "some"
 672 |   #         if the word is "this",      return "these"
 673 |   #         if the word is "that",      return "those"
 674 | 
 675 |   def is_indefinite_article(word) do
 676 |     cond do
 677 |       word in ["a", "an"] ->
 678 |         "some"
 679 | 
 680 |       word == "this" ->
 681 |         "these"
 682 | 
 683 |       word == "that" ->
 684 |         "those"
 685 | 
 686 |       true ->
 687 |         nil
 688 |     end
 689 |   end
 690 | 
 691 |   # Handle possessive pronouns (my -> our, its -> their, etc - see Table A.7)...
 692 |   #         if the word is a personal possessive,
 693 |   #                 return the specified plural form
 694 | 
 695 |   def is_possessive_pronoun(word) do
 696 |     cond do
 697 |       category?(word, "personal_possessive") ->
 698 |         personal_possessive(word)
 699 | 
 700 |       true ->
 701 |         nil
 702 |     end
 703 |   end
 704 | 
 705 |   # Handle genitives (dog's -> dogs', child's -> children's, Mary's -> Marys', etc). The general
 706 |   # rule is: remove the apostrophe and any trailing -s, form the plural of the resultant noun, and
 707 |   # then append an apostrophe (or -'s if the pluralized noun doesn't end in -s)...
 708 |   #         if suffix(-'s) or suffix(-'),
 709 |   #                 if suffix(-'), let the noun <owner> be inflection(-',-)
 710 |   #                 otherwise,     let the noun <owner> be inflection(-'s,-)
 711 |   #                 let the noun <owners> be the noun plural of <owner>
 712 |   #                 if <owners> ends in -s, return "<owners>'"
 713 |   #                 otherwise,              return "<owners>'s"
 714 |   def is_genetive(word) do
 715 |     cond do
 716 |       suffix?(word, "'s") ->
 717 |         do_genetive(word, "'s")
 718 | 
 719 |       suffix?(word, "'") ->
 720 |         do_genetive(word, "'")
 721 | 
 722 |       true ->
 723 |         nil
 724 |     end
 725 |   end
 726 | 
 727 |   def do_genetive(word, suffix) do
 728 |     plural_noun =
 729 |       word
 730 |       |> replace_suffix(suffix, "")
 731 |       |> pluralize_noun()
 732 | 
 733 |     if suffix?(plural_noun, "s") do
 734 |       plural_noun <> "'"
 735 |     else
 736 |       plural_noun <> "'s"
 737 |     end
 738 |   end
 739 | 
 740 |   ##########################################
 741 | 
 742 |   # Category definitions
 743 | 
 744 |   ##########################################
 745 | 
 746 |   @non_inflecting_nouns @inflections
 747 |                         |> Map.take(["a2", "a3"])
 748 |                         |> Map.values()
 749 |                         |> List.flatten()
 750 | 
 751 |   @ambiguous @inflections
 752 |              |> Map.get("a4")
 753 | 
 754 |   @personal_possessive @inflections
 755 |                        |> Map.get("a7")
 756 |                        |> Enum.drop(3)
 757 |                        |> Enum.flat_map(&String.split(&1, " "))
 758 |                        |> Enum.reject(&(&1 == "->" || &1 == " "))
 759 |                        |> Enum.chunk_every(2)
 760 |                        |> Enum.map(&List.to_tuple/1)
 761 |                        |> Map.new()
 762 | 
 763 |   @pluralize_auxillary_irregular @inflections
 764 |                                  |> Map.get("a8")
 765 |                                  |> Enum.drop(3)
 766 |                                  |> Enum.map(&String.split(&1, " -> "))
 767 |                                  |> Enum.map(&List.to_tuple/1)
 768 |                                  |> Map.new()
 769 | 
 770 |   @non_inflecting_verbs @inflections
 771 |                         |> Map.get("a9")
 772 | 
 773 |   @a_ae_modern @inflections
 774 |                |> Map.get("a10")
 775 | 
 776 |   @a_ae_classical @inflections
 777 |                   |> Map.take(["a10", "a11"])
 778 |                   |> Map.values()
 779 |                   |> List.flatten()
 780 | 
 781 |   @a_ata @inflections
 782 |          |> Map.get("a12")
 783 | 
 784 |   @en_ina @inflections
 785 |           |> Map.get("a13")
 786 | 
 787 |   @ex_ices_modern @inflections
 788 |                   |> Map.get("a14")
 789 | 
 790 |   @ex_ices_classical @inflections
 791 |                      |> Map.take(["a14", "a15"])
 792 |                      |> Map.values()
 793 |                      |> List.flatten()
 794 | 
 795 |   @is_ides @inflections
 796 |            |> Map.get("a16")
 797 | 
 798 |   @o_i @inflections
 799 |        |> Map.get("a18")
 800 | 
 801 |   @o_words_modern @inflections
 802 |                   |> Map.take(["a17", "a18"])
 803 |                   |> Map.values()
 804 |                   |> List.flatten()
 805 | 
 806 |   @o_words_classical @inflections
 807 |                      |> Map.get("a17")
 808 | 
 809 |   @on_a @inflections
 810 |         |> Map.get("a19")
 811 | 
 812 |   @um_a_modern @inflections
 813 |                |> Map.get("a20")
 814 | 
 815 |   @um_a_classical @inflections
 816 |                   |> Map.take(["a20", "a21"])
 817 |                   |> Map.values()
 818 |                   |> List.flatten()
 819 | 
 820 |   @us_i @inflections
 821 |         |> Map.get("a22")
 822 | 
 823 |   @us_us @inflections
 824 |          |> Map.get("a23")
 825 | 
 826 |   @any_i @inflections
 827 |          |> Map.get("a24")
 828 | 
 829 |   @any_im @inflections
 830 |           |> Map.get("a25")
 831 | 
 832 |   @pronouns @inflections
 833 |             |> Map.get("a5")
 834 |             |> Enum.drop(3)
 835 |             |> Enum.reject(&(&1 == "->"))
 836 |             |> Enum.map(&String.replace(&1, " ->", ""))
 837 |             |> Enum.map(fn x -> if String.contains?(x, "|"), do: String.split(x, "|"), else: x end)
 838 |             |> Enum.chunk_every(2)
 839 |             |> Map.new(&List.to_tuple/1)
 840 | 
 841 |   @irregular @inflections
 842 |              |> Map.get("a1")
 843 |              |> Enum.chunk_every(3)
 844 |              |> Enum.drop(1)
 845 |              |> Enum.map(fn
 846 |                [word, "(none)", plural] -> {word, [plural, plural]}
 847 |                [word, plural, "(none)"] -> {word, [plural, plural]}
 848 |                [word, modern, classical] -> {word, [modern, classical]}
 849 |                [a, b] -> {a, [b, b]}
 850 |              end)
 851 |              |> Map.new()
 852 | 
 853 |   @doc false
 854 |   def category?(word, "irregular_verb") do
 855 |     Map.has_key?(@pluralize_auxillary_irregular, word)
 856 |   end
 857 | 
 858 |   def category?(word, "ambiguous") do
 859 |     word in @ambiguous
 860 |   end
 861 | 
 862 |   def category?(word, "non_inflecting_verb") do
 863 |     word in @non_inflecting_verbs
 864 |   end
 865 | 
 866 |   def category?(word, "personal_possessive") do
 867 |     Map.has_key?(@personal_possessive, word)
 868 |   end
 869 | 
 870 |   @doc false
 871 |   def category?(word, "irregular_noun", _mode) do
 872 |     Map.has_key?(@irregular, word)
 873 |   end
 874 | 
 875 |   def category?(word, "pronoun", _mode) do
 876 |     Map.has_key?(@pronouns, word)
 877 |   end
 878 | 
 879 |   @non_inflecting_suffix ["fish", "ois", "sheep", "deer", "pox", "itis"]
 880 |   def category?(word, "herd", _mode) do
 881 |     Enum.any?(@non_inflecting_suffix, &suffix?(word, &1))
 882 |   end
 883 | 
 884 |   def category?(word, "nationalities", _mode) do
 885 |     suffix?(word, "ese") && starts_with_upper?(word)
 886 |   end
 887 | 
 888 |   @doc false
 889 |   def category?(word, "-", "-", _) do
 890 |     word in @non_inflecting_nouns
 891 |   end
 892 | 
 893 |   def category?(word, "-o", "-os", :classical) do
 894 |     word in @o_words_classical
 895 |   end
 896 | 
 897 |   def category?(word, "-o", "-os", :modern) do
 898 |     word in @o_words_modern
 899 |   end
 900 | 
 901 |   def category?(word, "-ex", "-ices", :modern) do
 902 |     word in @ex_ices_modern
 903 |   end
 904 | 
 905 |   def category?(word, "-ex", "-ices", :classical) do
 906 |     word in @ex_ices_classical
 907 |   end
 908 | 
 909 |   def category?(word, "-um", "-a", :modern) do
 910 |     word in @um_a_modern
 911 |   end
 912 | 
 913 |   def category?(word, "-um", "-a", :classical) do
 914 |     word in @um_a_classical
 915 |   end
 916 | 
 917 |   def category?(word, "-on", "-a", :modern) do
 918 |     word in @on_a
 919 |   end
 920 | 
 921 |   def category?(word, "-on", "-a", :classical) do
 922 |     word in @on_a
 923 |   end
 924 | 
 925 |   def category?(word, "-a", "-ae", :modern) do
 926 |     word in @a_ae_modern
 927 |   end
 928 | 
 929 |   def category?(word, "-a", "-ae", :classical) do
 930 |     word in @a_ae_classical
 931 |   end
 932 | 
 933 |   def category?(word, "-en", "-ina", :classical) do
 934 |     word in @en_ina
 935 |   end
 936 | 
 937 |   def category?(word, "-a", "-ata", _mode) do
 938 |     word in @a_ata
 939 |   end
 940 | 
 941 |   def category?(word, "-is", "-ides", _mode) do
 942 |     word in @is_ides
 943 |   end
 944 | 
 945 |   def category?(word, "-us", "-i", _mode) do
 946 |     word in @us_i
 947 |   end
 948 | 
 949 |   def category?(word, "-us", "-us", _mode) do
 950 |     word in @us_us
 951 |   end
 952 | 
 953 |   def category?(word, "-o", "-i", _mode) do
 954 |     word in @o_i
 955 |   end
 956 | 
 957 |   def category?(word, "-", "-i", _mode) do
 958 |     word in @any_i
 959 |   end
 960 | 
 961 |   def category?(word, "-", "-im", _mode) do
 962 |     word in @any_im
 963 |   end
 964 | 
 965 |   ##########################################
 966 | 
 967 |   # Helpers
 968 | 
 969 |   ##########################################
 970 | 
 971 |   defp suffix?(word, suffix) do
 972 |     String.ends_with?(word, suffix)
 973 |   end
 974 | 
 975 |   defp replace_suffix(word, suffix, replacement) do
 976 |     String.replace_trailing(word, suffix, replacement)
 977 |   end
 978 | 
 979 |   @vowels ["a", "e", "i", "o", "u"]
 980 |   defp vowel?(word, pos) do
 981 |     String.at(word, pos) in @vowels
 982 |   end
 983 | 
 984 |   defp irregular_noun(word, mode) do
 985 |     [modern, classical] = Map.fetch!(@irregular, word)
 986 |     if mode == :modern, do: modern, else: classical
 987 |   end
 988 | 
 989 |   defp irregular_verb(word) do
 990 |     Map.fetch!(@pluralize_auxillary_irregular, word)
 991 |   end
 992 | 
 993 |   defp pronoun(word, mode) do
 994 |     [modern, classical] = Map.fetch!(@pronouns, word)
 995 |     if mode == :modern, do: modern, else: classical
 996 |   end
 997 | 
 998 |   defp personal_possessive(word) do
 999 |     Map.fetch!(@personal_possessive, word)
1000 |   end
1001 | 
1002 |   defp starts_with_upper?(<<char::utf8, _rest::binary>>) when char in ?A..?Z, do: true
1003 |   defp starts_with_upper?(_word), do: false
1004 | end
1005 | 


--------------------------------------------------------------------------------
/lib/language.ex:
--------------------------------------------------------------------------------
  1 | defmodule Text.Language do
  2 |   @moduledoc """
  3 |   A module to support natural language
  4 |   detection.
  5 | 
  6 |   The primary models are implementations
  7 |   derived from [Language Identification from Text
  8 |   Using N-gram Based Cumulative Frequency Addition](http://www.csis.pace.edu/~ctappert/srd2004/paper12.pdf)
  9 | 
 10 |   """
 11 | 
 12 |   @known_classifiers [
 13 |     Text.Language.Classifier.NaiveBayesian,
 14 |     Text.Language.Classifier.CummulativeFrequency,
 15 |     Text.Language.Classifier.RankOrder
 16 |   ]
 17 | 
 18 |   @default_max_demand 20
 19 | 
 20 |   @doc """
 21 |   Detect the natural language of a given text.
 22 | 
 23 |   ## Arguments
 24 | 
 25 |   * `text` is a binary text from which
 26 |     the language is detected.
 27 | 
 28 |   * `options` is a keyword list of
 29 |     options.
 30 | 
 31 |   ## Options
 32 | 
 33 |   * `:corpus` is a module encapsulating a body of
 34 |     text in one or more natural languages.A corpus
 35 |     module implements the `Text.Corpus` behaviour.
 36 |     The default is `Text.Corpus.Udhr` which is implemented by the
 37 |     [text_corpus_udhr](https://hex.pm/packages/text_corpus_udhr)
 38 |     package. This package must be installed as a dependency in
 39 |     order for this default to be used.
 40 | 
 41 |   * `:classifier` is the module used to detect the language.
 42 |     The default is `Text.Language.Classifier.NaiveBayesian`.
 43 |     Other classifiers are `Text.Language.Classifier.RankOrder`,
 44 |     `Text.Classifier.CummulativeFrequency` and
 45 |     `Text.Language.Classifier.Spearman`. Any module that
 46 |     implements the `Text.Language.Classifier` behaviour
 47 |     may be used.
 48 | 
 49 |   * `:vocabulary` is the vocabulary to be used. The
 50 |     default is `hd(corpus.known_vocabularies())`. Available
 51 |     vocabularies are returned from `corpus.known_vocabularies/0`.
 52 | 
 53 |   * `:only` is a list of languages to be used
 54 |     as candidates for the language of `text`. The
 55 |     default is `corpus.known_languages/0`
 56 |     which is all the lanuages known to a given
 57 |     corpus.
 58 | 
 59 |   * `:max_demand` is used to determine the batch size
 60 |     for `Flow.from_enumerable/1`. The default is
 61 |     `#{@default_max_demand}`.
 62 | 
 63 |   ## Returns
 64 | 
 65 |   * A list of `2-tuples` in order of confidence with
 66 |     the first element being the BCP-47 language code
 67 |     and the second element being the score as determined
 68 |     by the requested classifier. The score has no meaning
 69 |     except to order the results by confidence level.
 70 | 
 71 |   """
 72 |   @spec detect(String.t, Keyword.t) :: {:ok, Text.language} | {:error, {module(), String.t}}
 73 | 
 74 |   def detect(text, options \\ []) when is_binary(text) do
 75 |     case classify(text, options) do
 76 |       {:error, _} = error -> error
 77 |       [{language, _} | _rest] -> {:ok, language}
 78 |     end
 79 |   end
 80 | 
 81 |   @doc """
 82 |   Classify the natural language of a given text.
 83 | 
 84 |   ## Arguments
 85 | 
 86 |   * `text` is a binary text from which
 87 |     the language is detected.
 88 | 
 89 |   * `options` is a keyword list of
 90 |     options.
 91 | 
 92 |   ## Options
 93 | 
 94 |   * `:corpus` is a module encapsulating a body of
 95 |     text in one or more natural languages.A corpus
 96 |     module implements the `Text.Corpus` behaviour.
 97 |     The default is `Text.Corpus.Udhr` which is implemented by the
 98 |     [text_corpus_udhr](https://hex.pm/packages/text_corpus_udhr)
 99 |     package. This package must be installed as a dependency in
100 |     order for this default to be used.
101 | 
102 |   * `:classifier` is the module used to detect the language.
103 |     The default is `Text.Language.Classifier.NaiveBayesian`.
104 |     Other classifiers are `Text.Language.Classifier.RankOrder`,
105 |     `Text.Classifier.CummulativeFrequency` and
106 |     `Text.Language.Classifier.Spearman`. Any module that
107 |     implements the `Text.Language.Classifier` behaviour
108 |     may be used.
109 | 
110 |   * `:vocabulary` is the vocabulary to be used. The
111 |     default is `hd(corpus.known_vocabularies())`. Available
112 |     vocabularies are returned from `corpus.known_vocabularies/0`.
113 | 
114 |   * `:only` is a list of languages to be used
115 |     as candidates for the language of `text`. The
116 |     default is `corpus.known_languages/0`
117 |     which is all the lanuages known to a given
118 |     corpus.
119 | 
120 |   * `:max_demand` is used to determine the batch size
121 |     for `Flow.from_enumerable/1`. The default is
122 |     `#{@default_max_demand}`.
123 | 
124 |   ## Returns
125 | 
126 |   * A list of `2-tuples` in order of confidence with
127 |     the first element being the BCP-47 language code
128 |     and the second element being the score as determined
129 |     by the requested classifier. The score has no meaning
130 |     except to order the results by confidence level.
131 | 
132 |   """
133 |   @spec classify(String.t, Keyword.t) ::
134 |     Text.Language.Classifier.frequency_list() | {:error, {module(), String.t}}
135 | 
136 |   def classify(text, options \\ []) when is_binary(text) do
137 |     corpus = Keyword.get(options, :corpus, Text.Corpus.Udhr)
138 |     classifier = Keyword.get(options, :classifier, Text.Language.Classifier.NaiveBayesian)
139 |     vocabulary = Keyword.get(options, :vocabulary)
140 |     languages = Keyword.get(options, :only, corpus.known_languages())
141 |     max_demand = Keyword.get(options, :max_demand, @default_max_demand)
142 | 
143 |     with {:ok, corpus} <- validate(:corpus, corpus),
144 |          {:ok, classifier} <- validate(:classifier, classifier),
145 |          {:ok, vocabulary} <- validate(:vocabulary, corpus, vocabulary),
146 |          {:ok, languages} <- validate(:only, corpus, languages) do
147 |       ensure_vocabulary_loaded!(vocabulary)
148 | 
149 |       text_ngrams =
150 |         text
151 |         |> corpus.normalize_text
152 |         |> vocabulary.calculate_ngrams
153 | 
154 |       languages
155 |       |> Flow.from_enumerable(max_demand: max_demand)
156 |       |> Flow.map(&classifier.score_one_language(&1, text_ngrams, vocabulary))
157 |       |> Enum.to_list
158 |       |> classifier.order_scores()
159 |     end
160 |   end
161 | 
162 |   @doc """
163 |   Returns a list of the known
164 |   classifiers that can be applied as
165 |   a `:classifer` option to `Text.Language.detect/2`
166 | 
167 |   """
168 |   @spec known_classifiers :: [Text.Language.Classifier.t, ...]
169 |   def known_classifiers do
170 |     @known_classifiers
171 |   end
172 | 
173 |   @doc """
174 |   Function to remove text elements that
175 |   interfer with language detection.
176 | 
177 |   Each corpus has a callback `normalize_text/1`
178 |   that is applied when training the
179 |   classifier and when detecting language
180 |   from natural text. If desired, the corpus
181 |   can delegate to this function.
182 | 
183 |   ## Argument
184 | 
185 |   * `text` is any `String.t`
186 | 
187 |   ## Returns
188 | 
189 |   * A normalized `String.t`
190 | 
191 |   """
192 |   @spec normalize_text(String.t) :: String.t
193 |   def normalize_text(text) do
194 |     text
195 |     # Downcase
196 |     |> String.downcase()
197 |     # Make sure that there is letter before punctuation
198 |     |> String.replace(~r/\.\s*/u, "_")
199 |     # Discard all digits
200 |     |> String.replace(~r/[0-9]/u, "")
201 |     # Discard all punctuation except for apostrophe
202 |     |> String.replace(~r/[&\/\\#,+()$~%.":*?<>{}]/u, "")
203 |     # Remove duplicate spaces
204 |     |> String.replace(~r/\s+/u, " ")
205 |   end
206 | 
207 |   defp ensure_vocabulary_loaded!(vocabulary) do
208 |     :persistent_term.get({vocabulary, :languages}, nil) || vocabulary.load_vocabulary!
209 |   end
210 | 
211 |   defp validate(:corpus, corpus) when is_atom(corpus) do
212 |     if corpus_module?(corpus) do
213 |       {:ok, corpus}
214 |     else
215 |       {:error, {ArgumentError, "Unknown corpus #{inspect(corpus)}"}}
216 |     end
217 |   end
218 | 
219 |   defp validate(:classifier, classifier) when classifier in @known_classifiers do
220 |     {:ok, classifier}
221 |   end
222 | 
223 |   defp validate(:classifier, classifier) do
224 |     {:error,
225 |      {ArgumentError,
226 |       "Unknown classifier #{inspect(classifier)}. " <>
227 |         "Known classifiers are #{inspect(@known_classifiers)}."}}
228 |   end
229 | 
230 |   defp validate(:vocabulary, corpus, nil) do
231 |     known_vocabularies = corpus.known_vocabularies
232 |     validate(:vocabulary, corpus, hd(known_vocabularies))
233 |   end
234 | 
235 |   defp validate(:vocabulary, corpus, vocabulary) do
236 |     known_vocabularies = corpus.known_vocabularies
237 | 
238 |     if vocabulary in corpus.known_vocabularies do
239 |       {:ok, vocabulary}
240 |     else
241 |       {:error,
242 |         {ArgumentError,
243 |           "Unknown vocabulary #{inspect(vocabulary)}. " <>
244 |           "Known vocabularies are #{inspect(known_vocabularies)}."
245 |       }}
246 |     end
247 |   end
248 | 
249 |   defp validate(:only, corpus, languages) do
250 |     known_languages = corpus.known_languages
251 |     unknown_languages = Enum.filter(languages, &(&1 not in known_languages))
252 | 
253 |     if unknown_languages == [] do
254 |       {:ok, languages}
255 |     else
256 |       {:error,
257 |        {ArgumentError,
258 |         "Unknown languages #{inspect(unknown_languages)}. " <>
259 |           "Known languages are #{inspect(known_languages)}."}}
260 |     end
261 |   end
262 | 
263 |   @doc false
264 |   def corpus_module?(corpus) do
265 |     Code.ensure_loaded?(corpus) && function_exported?(corpus, :known_vocabularies, 0)
266 |   end
267 | end
268 | 


--------------------------------------------------------------------------------
/lib/language/classifier/cummulative_frequency.ex:
--------------------------------------------------------------------------------
 1 | defmodule Text.Language.Classifier.CummulativeFrequency do
 2 |   @moduledoc """
 3 |   A language detection model that uses cummulative
 4 |   frequencies
 5 | 
 6 |   It sums the frequencies of detected
 7 |   n-grams.
 8 | 
 9 |   """
10 |   @no_entry %Text.Ngram.Frequency{
11 |     rank: 1000,
12 |     count: 0,
13 |     frequency: 0,
14 |     global_rank: 1000,
15 |     global_frequency: 0,
16 |     log_frequency: :math.log(5.0e-6)
17 |   }
18 | 
19 |   @doc """
20 |   Sums the frequencies of each n-gram
21 | 
22 |   A strong negative weighting is
23 |   applied if the n-gram is not contained
24 |   in the given vocabulary.
25 |   """
26 |   def score_one_language(language, text_ngrams, vocabulary) do
27 |     vocab = vocabulary.get_vocabulary(language)
28 | 
29 |     score =
30 |       text_ngrams
31 |       |> Enum.reduce(0, fn {ngram, %{count: count}}, acc ->
32 |         ngram_stats = Map.get(vocab, ngram, @no_entry)
33 |         acc + count * ngram_stats.frequency
34 |       end)
35 | 
36 |     {language, score}
37 |   end
38 | 
39 |   @doc """
40 |   Return the `{language score}` tuples
41 |   in the correct order for this classifier.
42 | 
43 |   """
44 |   def order_scores(scores) do
45 |     scores
46 |     |> Enum.sort(fn
47 |       {ngram1, score}, {ngram2, score} -> ngram1 > ngram2
48 |       {_, score1}, {_, score2} -> score1 >= score2
49 |     end)
50 |   end
51 | end
52 | 


--------------------------------------------------------------------------------
/lib/language/classifier/naive_bayesian.ex:
--------------------------------------------------------------------------------
 1 | defmodule Text.Language.Classifier.NaiveBayesian do
 2 |   @moduledoc """
 3 |   A language detection model that uses n-gram
 4 |   frequencies.
 5 | 
 6 |   It multiplies the frequencies of detected
 7 |   n-grams. Since the frequencies are stored
 8 |   as `log(frequency)` the addition of the
 9 |   `log(frequency)` entries is the same as
10 |   `frequency * frequency`.
11 | 
12 |   """
13 |   @no_entry %Text.Ngram.Frequency{
14 |     rank: 1000,
15 |     count: 0,
16 |     frequency: 0,
17 |     global_rank: 1000,
18 |     global_frequency: 0,
19 |     log_frequency: :math.log(5.0e-6)
20 |   }
21 | 
22 |   @doc """
23 |   Sums the frequencies of each n-gram
24 | 
25 |   A strong negative weighting is
26 |   applied if the n-gram is not contained
27 |   in the given vocabulary.
28 |   """
29 |   def score_one_language(language, text_ngrams, vocabulary) do
30 |     vocab = vocabulary.get_vocabulary(language)
31 | 
32 |     score =
33 |       text_ngrams
34 |       |> Enum.reduce(0, fn {ngram, %{count: count}}, acc ->
35 |         ngram_stats = Map.get(vocab, ngram, @no_entry)
36 |         acc + count * ngram_stats.log_frequency
37 |       end)
38 | 
39 |     {language, score}
40 |   end
41 | 
42 |   @doc """
43 |   Return the `{language score}` tuples
44 |   in the correct order for this classifier.
45 | 
46 |   """
47 |   def order_scores(scores) do
48 |     scores
49 |     |> Enum.sort(fn
50 |       {ngram1, score}, {ngram2, score} -> ngram1 > ngram2
51 |       {_, score1}, {_, score2} -> score1 >= score2
52 |     end)
53 |   end
54 | end
55 | 


--------------------------------------------------------------------------------
/lib/language/classifier/rank_order.ex:
--------------------------------------------------------------------------------
 1 | defmodule Text.Language.Classifier.RankOrder do
 2 |   @moduledoc """
 3 |   A language detection model that uses a rank
 4 |   order coefficient to determine language
 5 |   similarity.
 6 | 
 7 |   """
 8 |   @no_entry %Text.Ngram.Frequency{
 9 |     rank: 1000,
10 |     count: 0,
11 |     frequency: 0,
12 |     global_rank: 1000,
13 |     global_frequency: 0,
14 |     log_frequency: :math.log(5.0e-6)
15 |   }
16 | 
17 |   @doc """
18 |   Correlates based upon a
19 |   rank order coefficient.
20 | 
21 |   """
22 |   def score_one_language(language, text_ngrams, vocabulary) do
23 |     language_vocab = vocabulary.get_vocabulary(language)
24 | 
25 |     score =
26 |       Enum.reduce(text_ngrams, 0, fn {ngram, %{rank: text_rank, count: count}}, score ->
27 |         vocab = Map.get(language_vocab, ngram, @no_entry)
28 |         score + count * (abs(vocab.rank - text_rank) + abs(vocab.global_rank - text_rank))
29 |       end)
30 | 
31 |     {language, score}
32 |   end
33 | 
34 |   @doc """
35 |   Return the `{language score}` tuples
36 |   in the correct order for this classifier.
37 | 
38 |   """
39 |   def order_scores(scores) do
40 |     Enum.sort(scores, fn
41 |       {ngram1, score}, {ngram2, score} -> ngram1 < ngram2
42 |       {_ngram1, score1}, {_ngram2, score2} -> score1 < score2
43 |     end)
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/lib/ngram.ex:
--------------------------------------------------------------------------------
  1 | defmodule Text.Ngram do
  2 |   @moduledoc """
  3 |   Compute ngrams and their counts from a given
  4 |   UTF8 string.
  5 | 
  6 |   Computes ngrams for n in 2..7
  7 | 
  8 |   """
  9 |   @max_ngram 7
 10 |   @min_ngram 2
 11 |   @default_ngram @min_ngram
 12 | 
 13 |   @type ngram_range :: 2..7
 14 | 
 15 |   defmodule Frequency do
 16 |     @type t :: %{
 17 |       rank: non_neg_integer,
 18 |       count: non_neg_integer,
 19 |       frequency: float,
 20 |       log_frequency: float,
 21 |       global_rank: non_neg_integer,
 22 |       global_frequency: float
 23 |     }
 24 | 
 25 |     defstruct [
 26 |       :rank,
 27 |       :count,
 28 |       :frequency,
 29 |       :log_frequency,
 30 |       :global_rank,
 31 |       :global_frequency
 32 |     ]
 33 |   end
 34 | 
 35 |   @doc """
 36 |   Returns a map of n-grams for a given text
 37 |   and n-gram size.
 38 | 
 39 |   The n-gram size is a minimum of #{@min_ngram} and
 40 |   a maximum of #{@max_ngram} with a default of #{@default_ngram}.
 41 | 
 42 |   """
 43 |   @spec ngram(String.t(), ngram_range) :: %{list() => integer}
 44 |   def ngram(string, n \\ @default_ngram) when is_binary(string) and n in @min_ngram..@max_ngram do
 45 |     string
 46 |     |> :unicode.characters_to_nfc_binary()
 47 |     |> ngram(n, %{})
 48 |   end
 49 | 
 50 |   def ngram("", _n, acc) do
 51 |     acc
 52 |   end
 53 | 
 54 |   def ngram(<<_a::utf8>>, 2, acc) do
 55 |     acc
 56 |   end
 57 | 
 58 |   def ngram(<<_a::utf8, _b::utf8>>, 3, acc) do
 59 |     acc
 60 |   end
 61 | 
 62 |   def ngram(<<_a::utf8, _b::utf8, _c::utf8>>, 4, acc) do
 63 |     acc
 64 |   end
 65 | 
 66 |   def ngram(<<_a::utf8, _b::utf8, _c::utf8, _d::utf8>>, 5, acc) do
 67 |     acc
 68 |   end
 69 | 
 70 |   def ngram(<<_a::utf8, _b::utf8, _c::utf8, _d::utf8, _e::utf8>>, 6, acc) do
 71 |     acc
 72 |   end
 73 | 
 74 |   def ngram(<<_a::utf8, _b::utf8, _c::utf8, _d::utf8, _e::utf8, _f::utf8>>, 7, acc) do
 75 |     acc
 76 |   end
 77 | 
 78 |   def ngram(<<a::utf8, b::utf8, rest::binary>>, 2 = n, acc) do
 79 |     ngram = [a, b]
 80 |     acc = Map.update(acc, ngram, 1, &(&1 + 1))
 81 |     ngram(<<b::utf8, rest::binary>>, n, acc)
 82 |   end
 83 | 
 84 |   def ngram(<<a::utf8, b::utf8, c::utf8, rest::binary>>, 3 = n, acc) do
 85 |     ngram = [a, b, c]
 86 |     acc = Map.update(acc, ngram, 1, &(&1 + 1))
 87 |     ngram(<<b::utf8, c::utf8, rest::binary>>, n, acc)
 88 |   end
 89 | 
 90 |   def ngram(<<a::utf8, b::utf8, c::utf8, d::utf8, rest::binary>>, 4 = n, acc) do
 91 |     ngram = [a, b, c, d]
 92 |     acc = Map.update(acc, ngram, 1, &(&1 + 1))
 93 |     ngram(<<b::utf8, c::utf8, d::utf8, rest::binary>>, n, acc)
 94 |   end
 95 | 
 96 |   def ngram(<<a::utf8, b::utf8, c::utf8, d::utf8, e::utf8, rest::binary>>, 5 = n, acc) do
 97 |     ngram = [a, b, c, d, e]
 98 |     acc = Map.update(acc, ngram, 1, &(&1 + 1))
 99 |     ngram(<<b::utf8, c::utf8, d::utf8, e::utf8, rest::binary>>, n, acc)
100 |   end
101 | 
102 |   def ngram(<<a::utf8, b::utf8, c::utf8, d::utf8, e::utf8, f::utf8, rest::binary>>, 6 = n, acc) do
103 |     ngram = [a, b, c, d, e, f]
104 |     acc = Map.update(acc, ngram, 1, &(&1 + 1))
105 |     ngram(<<b::utf8, c::utf8, d::utf8, e::utf8, f::utf8, rest::binary>>, n, acc)
106 |   end
107 | 
108 |   def ngram(
109 |         <<a::utf8, b::utf8, c::utf8, d::utf8, e::utf8, f::utf8, g::utf8, rest::binary>>,
110 |         7 = n,
111 |         acc
112 |       ) do
113 |     ngram = [a, b, c, d, e, f, g]
114 |     acc = Map.update(acc, ngram, 1, &(&1 + 1))
115 |     ngram(<<b::utf8, c::utf8, d::utf8, e::utf8, f::utf8, g::utf8, rest::binary>>, n, acc)
116 |   end
117 | end
118 | 


--------------------------------------------------------------------------------
/lib/text.ex:
--------------------------------------------------------------------------------
 1 | defmodule Text do
 2 |   @moduledoc """
 3 |   Functions for basic text processing
 4 |   and analysis.
 5 | 
 6 |   """
 7 |   @typedoc "A language as a BCP-47 string"
 8 |   @type language :: String.t()
 9 | 
10 |   @typedoc "A vocabulary module"
11 |   @type vocabulary :: module()
12 | 
13 |   @typedoc "A corpus module"
14 |   @type corpus :: module()
15 | 
16 |   @typedoc "A tuple of the form `{language, number}`"
17 |   @type frequency_tuple :: {language, number}
18 | 
19 |   defdelegate ngram(text, n), to: Text.Ngram
20 |   defdelegate detect(text), to: Text.Language
21 |   defdelegate detect(text, options), to: Text.Language
22 | 
23 |   @doc """
24 |   Pluralize a noun.
25 | 
26 |   ## Arguments
27 | 
28 |   * `word` is any English noun.
29 | 
30 |   * `options` is a keyword list
31 |     of options.
32 | 
33 |   ## Options
34 | 
35 |   * `:mode` is either `:modern` or `:classical`. The
36 |     default is `:modern`.
37 | 
38 |   * `:language` is the inflection module
39 |     to be used. The default and ony option is
40 |     `Text.Inflect.En`
41 | 
42 |   ## Returns
43 | 
44 |   * a `String` representing the pluralized noun
45 | 
46 |   ## Notes
47 | 
48 |   `mode` when `:classical` applies pluralization
49 |   on latin nouns used in english but with latin
50 |   suffixes.
51 | 
52 |   ## Examples
53 | 
54 |       iex> Text.pluralize_noun "Major general"
55 |       "Major generals"
56 | 
57 |       iex> Text.pluralize_noun "fish"
58 |       "fish"
59 | 
60 |       iex> Text.pluralize_noun "soliloquy"
61 |       "soliloquies"
62 | 
63 |       iex> Text.pluralize_noun "genius", mode: :classical
64 |       "genii"
65 | 
66 |       iex> Text.pluralize_noun "genius"
67 |       "geniuses"
68 | 
69 |       iex> Text.pluralize_noun "platypus", mode: :classical
70 |       "platypodes"
71 | 
72 |       iex> Text.pluralize_noun "platypus"
73 |       "platypuses"
74 | 
75 |   """
76 |   def pluralize_noun(word, options \\ []) do
77 |     inflector = inflector_from(options)
78 |     mode = Keyword.get(options, :mode, :modern)
79 |     inflector.pluralize_noun(word, mode)
80 |   end
81 | 
82 |   # Only "en" is supoprted
83 |   defp inflector_from(_options) do
84 |     Text.Inflect.En
85 |   end
86 | 
87 |   @doc false
88 |   def ensure_compiled?(module) do
89 |     case Code.ensure_compiled(module) do
90 |       {:module, _module} -> true
91 |       _other -> false
92 |     end
93 |   end
94 | end
95 | 


--------------------------------------------------------------------------------
/lib/vocabulary.ex:
--------------------------------------------------------------------------------
  1 | defmodule Text.Vocabulary do
  2 |   @moduledoc """
  3 |   A vocabulary is the encoded form of
  4 |   a training text that is used to support
  5 |   language matching.
  6 | 
  7 |   A vocabulary is mapping of an
  8 |   n-gram to its rank and probability.
  9 | 
 10 |   """
 11 |   alias Text.Ngram
 12 | 
 13 |   @type t :: module()
 14 | 
 15 |   @callback get_vocabulary(String.t()) :: map()
 16 |   @callback filename() :: String.t()
 17 |   @callback calculate_ngrams(String.t()) :: map()
 18 |   @callback ngram_range() :: Range.t()
 19 |   @callback load_vocabulary! :: map()
 20 | 
 21 |   @max_ngrams 300
 22 | 
 23 |   def known_vocabularies(corpus) do
 24 |     corpus.known_vocabularies
 25 |   end
 26 | 
 27 |   @doc """
 28 |   Get the vocabulary entry for
 29 |   a given language and vocabulary
 30 | 
 31 |   """
 32 |   def get_vocabulary(vocabulary, language) do
 33 |     :persistent_term.get({vocabulary, language}, nil)
 34 |   end
 35 | 
 36 |   @doc """
 37 |   Loads the given vocabulary.
 38 | 
 39 |   Vocabularies are placed in
 40 |   `:persistent_store` since this
 41 |   reduces memory copies and has efficient
 42 |   multi-process access.
 43 | 
 44 |   """
 45 |   def load_vocabulary!(vocabulary) do
 46 |     vocabulary_content =
 47 |       vocabulary.filename
 48 |       |> File.read!()
 49 |       |> :erlang.binary_to_term()
 50 |       |> structify_ngram_stats
 51 | 
 52 |     for {language, ngrams} <- vocabulary_content do
 53 |       :persistent_term.put({vocabulary, language}, ngrams)
 54 |     end
 55 | 
 56 |     :persistent_term.put({vocabulary, :languages}, Map.keys(vocabulary_content))
 57 |     vocabulary_content
 58 |   end
 59 | 
 60 |   defp structify_ngram_stats(ngram_by_language) do
 61 |     Enum.map(ngram_by_language, fn {language, ngram_map} ->
 62 |       new_ngram_map =
 63 |         Enum.map(ngram_map, fn {ngram, stats} ->
 64 |           {ngram, struct(Text.Ngram.Frequency, stats)}
 65 |         end)
 66 |         |> Map.new()
 67 | 
 68 |       {language, new_ngram_map}
 69 |     end)
 70 |     |> Map.new()
 71 |   end
 72 | 
 73 |   @doc """
 74 |   Rerturns a list of the top n
 75 |   vocabulary entries by rank for a
 76 |   given language and vocabulary.
 77 | 
 78 |   This function is primarily intended
 79 |   for debugging support.
 80 | 
 81 |   """
 82 |   def top_n(vocabulary, language, n) do
 83 |     vocabulary.filename
 84 |     |> File.read!()
 85 |     |> :erlang.binary_to_term()
 86 |     |> Map.fetch!(language)
 87 |     |> top_n(n)
 88 |   end
 89 | 
 90 |   @doc """
 91 |   Returns the top n by rank for a list
 92 |   of entries for a given languages
 93 |   vocabulary
 94 | 
 95 |   """
 96 |   def top_n(language_vocabulary, n \\ @max_ngrams) do
 97 |     language_vocabulary
 98 |     |> Enum.sort(fn {_, %{rank: rank1}}, {_, %{rank: rank2}} -> rank1 < rank2 end)
 99 |     |> Enum.take(n)
100 |   end
101 | 
102 |   @doc """
103 |   Returns the ngrams for a given
104 |   text and range representing
105 |   a range of n-grams
106 | 
107 |   """
108 |   def get_ngrams(content, from..to) do
109 |     for n <- from..to do
110 |       Ngram.ngram(content, n)
111 |     end
112 |     |> merge_maps
113 |   end
114 | 
115 |   defp merge_maps([a]) do
116 |     a
117 |   end
118 | 
119 |   defp merge_maps([a, b]) do
120 |     Map.merge(a, b)
121 |   end
122 | 
123 |   defp merge_maps([a, b | rest]) do
124 |     merge_maps([Map.merge(a, b) | rest])
125 |   end
126 | 
127 |   def calculate_corpus_ngrams(corpus, language, range) do
128 |     ngrams =
129 |       language
130 |       |> corpus.language_content
131 |       |> corpus.normalize_text
132 |       |> calculate_ngrams(range)
133 | 
134 |     {language, ngrams}
135 |   end
136 | 
137 |   @doc """
138 |   Calculate the n-grams for a given text
139 | 
140 |   A range of n-grams is calculated from
141 |   `range` and the top `n` ranked
142 |   n-grams from the text are returned
143 | 
144 |   """
145 | 
146 |   def calculate_ngrams(content, range, top_n \\ @max_ngrams) when is_binary(content) do
147 |     content
148 |     |> get_ngrams(range)
149 |     |> add_statistics()
150 |     |> order_by_count()
151 |     |> Enum.with_index(1)
152 |     |> Enum.map(fn {{ngram, ngram_stats}, rank} ->
153 |       {ngram, %{ngram_stats | rank: rank}}
154 |     end)
155 |     |> top_n(top_n)
156 |     |> Map.new()
157 |   end
158 | 
159 |   @doc false
160 |   def order_by_count(ngrams) do
161 |     Enum.sort(ngrams, fn
162 |       {ngram1, %{count: count}}, {ngram2, %{count: count}} -> ngram1 > ngram2
163 |       {_, %{count: count1}}, {_, %{count: count2}} -> count1 > count2
164 |     end)
165 |   end
166 | 
167 |   # For each n-gram keep the count,
168 |   # frequency and log of the frequency
169 |   defp add_statistics(ngrams) do
170 |     total_count =
171 |       ngrams
172 |       |> Enum.map(&elem(&1, 1))
173 |       |> Enum.sum()
174 | 
175 |     ngrams
176 |     |> Enum.map(fn {ngram, count} ->
177 |       frequency = count / total_count
178 | 
179 |       {ngram,
180 |        %Text.Ngram.Frequency{
181 |          count: count,
182 |          frequency: frequency,
183 |          log_frequency: :math.log(frequency)
184 |        }}
185 |     end)
186 |   end
187 | end
188 | 


--------------------------------------------------------------------------------
/lib/word_count.ex:
--------------------------------------------------------------------------------
  1 | defmodule Text.Word do
  2 |   @moduledoc """
  3 |   Implements word counting for lists,
  4 |   streams and flows.
  5 | 
  6 |   """
  7 | 
  8 |   @typedoc "Enumerable types for word counting"
  9 |   @type text :: Flow.t() | File.Stream.t() | String.t() | [String.t(), ...]
 10 | 
 11 |   @typedoc "A list of words and their frequencies in a text"
 12 |   @type frequency_list :: [{String.t(), pos_integer}, ...]
 13 | 
 14 |   @typedoc "A function to split text"
 15 |   @type splitter :: function()
 16 | 
 17 |   @doc """
 18 |   Counts the number of words in a string,
 19 |   `File.Stream`, or `Flow`.
 20 | 
 21 |   ## Arguments
 22 | 
 23 |   * `text` is either a `String.t`, `Flow.t`,
 24 |     `File.Stream.t` or a list of strings.
 25 | 
 26 |   * `splitter` is an arity-1 function
 27 |     that splits the text stream.
 28 |     The default is `&String.split/1`.
 29 | 
 30 |    ## Returns
 31 | 
 32 |    * A list of 2-tuples of the form
 33 |      `{word, count}` referred to as
 34 |      a frequency list.
 35 | 
 36 |   ## Examples
 37 | 
 38 |   """
 39 |   @spec word_count(Flow.t() | File.Stream.t() | String.t() | [String.t()], splitter) ::
 40 |           frequency_list
 41 | 
 42 |   def word_count(text, splitter \\ &String.split/1)
 43 | 
 44 |   def word_count(text, splitter) when is_binary(text) do
 45 |     word_count([text], splitter)
 46 |   end
 47 | 
 48 |   def word_count(list, splitter) when is_list(list) do
 49 |     list
 50 |     |> Flow.from_enumerable()
 51 |     |> word_count(splitter)
 52 |   end
 53 | 
 54 |   def word_count(%File.Stream{} = stream, splitter) do
 55 |     stream
 56 |     |> Flow.from_enumerable()
 57 |     |> word_count(splitter)
 58 |   end
 59 | 
 60 |   def word_count(%Flow{} = stream, splitter) do
 61 |     table = :ets.new(:word_count, [{:write_concurrency, true}, :public])
 62 | 
 63 |     stream
 64 |     |> Flow.flat_map(splitter)
 65 |     |> Flow.map(&:ets.update_counter(table, &1, {2, 1}, {&1, 0}))
 66 |     |> Flow.run()
 67 | 
 68 |     list = :ets.tab2list(table)
 69 |     :ets.delete(table)
 70 | 
 71 |     list
 72 |   end
 73 | 
 74 |   @doc """
 75 |   Counts the total number of words in a
 76 |   frequency list.
 77 | 
 78 |   ## Arguments
 79 | 
 80 |   * `frequency_list` is a list of frequencies
 81 |     returned from `Text.Word.word_count/2`
 82 | 
 83 |    ## Returns
 84 | 
 85 |    * An integer number of words
 86 | 
 87 |   ## Examples
 88 | 
 89 |   """
 90 |   @spec total_word_count(frequency_list) :: pos_integer
 91 |   def total_word_count(frequency_list) when is_list(frequency_list) do
 92 |     Enum.reduce(frequency_list, 0, fn {_word, count}, acc -> acc + count end)
 93 |   end
 94 | 
 95 |   @doc """
 96 |   Counts the average word length in a
 97 |   frequency list.
 98 | 
 99 |   ## Arguments
100 | 
101 |   * `frequency_list` is a list of frequencies
102 |     returned from `Text.Word.word_count/2`
103 | 
104 |    ## Returns
105 | 
106 |    * An float representing the
107 |      average word length
108 | 
109 |   ## Examples
110 | 
111 |   """
112 |   @spec average_word_length(frequency_list) :: float
113 |   def average_word_length(frequency_list) when is_list(frequency_list) do
114 |     {all, count} =
115 |       Enum.reduce(frequency_list, {0, 0}, fn {word, count}, {all, total_count} ->
116 |         all = all + String.length(word) * count
117 |         total_count = total_count + count
118 |         {all, total_count}
119 |       end)
120 | 
121 |     all / count
122 |   end
123 | 
124 |   @doc """
125 |   Sorts the words in words in a
126 |   frequency list.
127 | 
128 |   ## Arguments
129 | 
130 |   * `frequency_list` is a list of frequencies
131 |     returned from `Text.Word.word_count/2`
132 | 
133 |   * `directions` is either `:asc` or
134 |     `:desc`. The default is `:desc`.
135 | 
136 |    ## Returns
137 | 
138 |   * The `frequency_list` sorted in the
139 |    direction specified
140 | 
141 |   ## Examples
142 | 
143 |   """
144 |   @spec sort(frequency_list, :asc | :desc) :: frequency_list
145 |   def sort(frequency_list, direction \\ :desc)
146 | 
147 |   def sort(frequency_list, :desc) do
148 |     Enum.sort(frequency_list, &(elem(&1, 1) > elem(&2, 1)))
149 |   end
150 | 
151 |   def sort(frequency_list, :asc) do
152 |     Enum.sort_by(frequency_list, &elem(&1, 1))
153 |   end
154 | end
155 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kipcole9/text/a93981626c9deb2cdc2bb4bb514b883aa17c792e/logo.png


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule Text.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   @version "0.2.0"
 5 | 
 6 |   def project do
 7 |     [
 8 |       app: :text,
 9 |       version: @version,
10 |       docs: docs(),
11 |       elixir: "~> 1.8",
12 |       name: "Text",
13 |       source_url: "https://github.com/kipcole9/text",
14 |       description: description(),
15 |       package: package(),
16 |       start_permanent: Mix.env() == :prod,
17 |       deps: deps(),
18 |       elixirc_paths: elixirc_paths(Mix.env()),
19 |       dialyzer: [
20 |         ignore_warnings: ".dialyzer_ignore_warnings",
21 |         plt_add_apps: ~w(inets jason mix meeseeks)a
22 |       ]
23 |     ]
24 |   end
25 | 
26 |   defp description do
27 |     """
28 |     Text analysis and processing for Elixir including ngram,
29 |     language detection and more.
30 |     """
31 |   end
32 | 
33 |   def application do
34 |     [
35 |       extra_applications: [:logger]
36 |     ]
37 |   end
38 | 
39 |   defp package do
40 |     [
41 |       maintainers: ["Kip Cole"],
42 |       licenses: ["Apache 2.0"],
43 |       links: links(),
44 |       files: [
45 |         "lib",
46 |         "priv",
47 |         "mix.exs",
48 |         "README*",
49 |         "CHANGELOG*",
50 |         "LICENSE*"
51 |       ]
52 |     ]
53 |   end
54 | 
55 |   def docs do
56 |     [
57 |       source_ref: "v#{@version}",
58 |       main: "readme",
59 |       logo: "logo.png",
60 |       skip_undefined_reference_warnings_on: ["changelog", "CHANGELOG.md"],
61 |       extras: ["README.md", "CHANGELOG.md", "LICENSE.md"]
62 |     ]
63 |   end
64 | 
65 |   defp deps do
66 |     [
67 |       {:flow, "~> 0.14"},
68 |       {:meeseeks, "~> 0.15", only: [:dev, :test], optional: true},
69 |       {:ex_doc, "~> 0.21", only: [:dev, :release], optional: true},
70 |       {:benchee, "~> 1.0", only: :dev, runtime: false},
71 |       {:jason, "~> 1.0", only: :dev, runtime: false},
72 |       {:dialyxir, "~> 1.0", only: [:dev], runtime: false, optional: true}
73 |     ]
74 |   end
75 | 
76 |   def links do
77 |     %{
78 |       "GitHub" => "https://github.com/kipcole9/text",
79 |       "Readme" => "https://github.com/kipcole9/text/blob/v#{@version}/README.md",
80 |       "Changelog" => "https://github.com/kipcole9/text/blob/v#{@version}/CHANGELOG.md"
81 |     }
82 |   end
83 | 
84 |   defp elixirc_paths(:test), do: ["lib", "mix", "test"]
85 |   defp elixirc_paths(:dev), do: ["lib", "mix", "bench"]
86 |   defp elixirc_paths(_), do: ["lib"]
87 | end
88 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm", "3ad58ae787e9c7c94dd7ceda3b587ec2c64604563e049b2a0e8baafae832addb"},
 3 |   "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"},
 4 |   "dialyxir": {:hex, :dialyxir, "1.1.0", "c5aab0d6e71e5522e77beff7ba9e08f8e02bad90dfbeffae60eaf0cb47e29488", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "07ea8e49c45f15264ebe6d5b93799d4dd56a44036cf42d0ad9c960bc266c0b9a"},
 5 |   "earmark": {:hex, :earmark, "1.4.5", "62ffd3bd7722fb7a7b1ecd2419ea0b458c356e7168c1f5d65caf09b4fbdd13c8", [:mix], [], "hexpm", "b7d0e6263d83dc27141a523467799a685965bf8b13b6743413f19a7079843f4f"},
 6 |   "earmark_parser": {:hex, :earmark_parser, "1.4.15", "b29e8e729f4aa4a00436580dcc2c9c5c51890613457c193cc8525c388ccb2f06", [:mix], [], "hexpm", "044523d6438ea19c1b8ec877ec221b008661d3c27e3b848f4c879f500421ca5c"},
 7 |   "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
 8 |   "ex_doc": {:hex, :ex_doc, "0.25.3", "3edf6a0d70a39d2eafde030b8895501b1c93692effcbd21347296c18e47618ce", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "9ebebc2169ec732a38e9e779fd0418c9189b3ca93f4a676c961be6c1527913f5"},
 9 |   "flow": {:hex, :flow, "0.15.0", "503717c0e367b5713336181d5305106840f64abbad32c75d7af5ef1bb0908e38", [:mix], [{:gen_stage, "~> 0.14.0", [hex: :gen_stage, repo: "hexpm", optional: false]}], "hexpm", "d7ecbd4dd38a188494bc996d5014ef8335f436a0b262140a1f6441ae94714581"},
10 |   "gen_stage": {:hex, :gen_stage, "0.14.3", "d0c66f1c87faa301c1a85a809a3ee9097a4264b2edf7644bf5c123237ef732bf", [:mix], [], "hexpm", "8453e2289d94c3199396eb517d65d6715ef26bcae0ee83eb5ff7a84445458d76"},
11 |   "jason": {:hex, :jason, "1.2.2", "ba43e3f2709fd1aa1dce90aaabfd039d000469c05c56f0b8e31978e03fa39052", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "18a228f5f0058ee183f29f9eae0805c6e59d61c3b006760668d8d18ff0d12179"},
12 |   "makeup": {:hex, :makeup, "1.0.5", "d5a830bc42c9800ce07dd97fa94669dfb93d3bf5fcf6ea7a0c67b2e0e4a7f26c", [:mix], [{:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cfa158c02d3f5c0c665d0af11512fed3fba0144cf1aadee0f2ce17747fba2ca9"},
13 |   "makeup_elixir": {:hex, :makeup_elixir, "0.15.1", "b5888c880d17d1cc3e598f05cdb5b5a91b7b17ac4eaf5f297cb697663a1094dd", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "db68c173234b07ab2a07f645a5acdc117b9f99d69ebf521821d89690ae6c6ec8"},
14 |   "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"},
15 |   "meeseeks": {:hex, :meeseeks, "0.16.0", "f3442c3fcd9ffd23ccdf6b61c88cacc9f97536b4fcb6788e8f22800f2c59f43e", [:mix], [{:meeseeks_html5ever, "~> 0.13.0", [hex: :meeseeks_html5ever, repo: "hexpm", optional: false]}], "hexpm", "699d60c9897f2bfaba3feb7805d2d964f147bf14d56917f2332986c4cc860afb"},
16 |   "meeseeks_html5ever": {:hex, :meeseeks_html5ever, "0.13.0", "b0a88d381c31ecff1246dc6fea034798de75fd31bc3e3183409b994d2ffe728d", [:mix], [{:rustler, "~> 0.22.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm", "086701e495068c648faa3050472d64d0251d0b5d5bfb5a2456cbd2a31617d4ff"},
17 |   "nimble_parsec": {:hex, :nimble_parsec, "1.1.0", "3a6fca1550363552e54c216debb6a9e95bd8d32348938e13de5eda962c0d7f89", [:mix], [], "hexpm", "08eb32d66b706e913ff748f11694b17981c0b04a33ef470e33e11b3d3ac8f54b"},
18 |   "rustler": {:hex, :rustler, "0.22.0", "e2930f9d6933e910f87526bb0a7f904e32b62a7e838a3ca4a884ee7fdfb957ed", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "01f5989dd511ebec09be481e07d3c59773d5373c5061e09d3ebc3ef61811b49d"},
19 |   "sweet_xml": {:hex, :sweet_xml, "0.6.6", "fc3e91ec5dd7c787b6195757fbcf0abc670cee1e4172687b45183032221b66b8", [:mix], [], "hexpm", "2e1ec458f892ffa81f9f8386e3f35a1af6db7a7a37748a64478f13163a1f3573"},
20 |   "toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm", "f1e3dabef71fb510d015fad18c0e05e7c57281001141504c6b69d94e99750a07"},
21 | }
22 | 


--------------------------------------------------------------------------------
/mix/english_infector_data.ex:
--------------------------------------------------------------------------------
  1 | defmodule Text.Inflect.Data.En do
  2 |   @moduledoc """
  3 |   Functions to structure the data underpinning
  4 |   the English language infelctor `Text.Inflect.En`.
  5 | 
  6 |   The data is downloaded from
  7 |   [An Algorithmic Approach to English Pluralization](http://users.monash.edu/~damian/papers/HTML/Plurals_AppendixA.html)
  8 | 
  9 |   Additional data is stored in the files under `corpus/inflector/en/additions`.
 10 |   Each data file corresponds to the one of the tables `A1` through `A3` and
 11 |   `A11` through `A26`.
 12 | 
 13 |   This data from these files is added to the downloaded data
 14 |   set at library build time and can be rebuild by the library
 15 |   maintainer with `mix text.create_english_plurals`.
 16 | 
 17 |   """
 18 | 
 19 |   @tables Enum.to_list(1..26) |> Enum.map(&("a" <> to_string(&1)))
 20 | 
 21 |   @data_dir "corpus/inflector/en"
 22 | 
 23 |   defp data_dir do
 24 |     @data_dir
 25 |   end
 26 | 
 27 |   defp data_path do
 28 |     Path.join(data_dir(), "en.html")
 29 |   end
 30 | 
 31 |   defp saved_path do
 32 |     Path.join("priv/inflection/en", "en.etf")
 33 |   end
 34 | 
 35 |   defp data do
 36 |     File.read!(data_path())
 37 |   end
 38 | 
 39 |   defp parsed do
 40 |     Meeseeks.parse(data())
 41 |   end
 42 | 
 43 |   @doc """
 44 |   Returns the map of all the tables
 45 |   contained in the downlaoded data set.
 46 | 
 47 |   This data is in a raw form and this
 48 |   function is intended for internal
 49 |   use for this module or debudding
 50 |   purposes.
 51 | 
 52 |   """
 53 |   def tables() do
 54 |     import Meeseeks.XPath
 55 | 
 56 |     tables =
 57 |       parsed()
 58 |       |> Meeseeks.all(xpath("//table"))
 59 |       |> Enum.map(&Meeseeks.all(&1, xpath("//tt")))
 60 |       |> Enum.map(&extract_text/1)
 61 | 
 62 |     @tables
 63 |     |> Enum.zip(tables)
 64 |     |> Map.new()
 65 |   end
 66 | 
 67 |   @doc """
 68 |   Returns the map of all one table
 69 |   contained in the downlaoded data set.
 70 | 
 71 |   The parameter is the name of one of
 72 |   the tables from `a1` to `a26`.
 73 | 
 74 |   This data is in a raw form and this
 75 |   function is intended for internal
 76 |   use for this module or debudding
 77 |   purposes.
 78 | 
 79 |   """
 80 |   def tables("a1" = key) do
 81 |     import Meeseeks.XPath
 82 | 
 83 |     tables =
 84 |       parsed()
 85 |       |> Meeseeks.all(xpath("//table"))
 86 |       |> Enum.map(&Meeseeks.all(&1, xpath("//td")))
 87 |       |> Enum.map(&extract_text/1)
 88 | 
 89 |     @tables
 90 |     |> Enum.zip(tables)
 91 |     |> Map.new()
 92 |     |> Map.get(key)
 93 |   end
 94 | 
 95 |   def tables("a7" = key) do
 96 |     import Meeseeks.XPath
 97 | 
 98 |     tables =
 99 |       parsed()
100 |       |> Meeseeks.all(xpath("//table"))
101 |       |> Enum.map(&Meeseeks.all(&1, xpath("//td")))
102 |       |> Enum.map(&extract_text/1)
103 | 
104 |     @tables
105 |     |> Enum.zip(tables)
106 |     |> Map.new()
107 |     |> Map.get(key)
108 |   end
109 | 
110 |   def tables("a8" = key) do
111 |     import Meeseeks.XPath
112 | 
113 |     tables =
114 |       parsed()
115 |       |> Meeseeks.all(xpath("//table"))
116 |       |> Enum.map(&Meeseeks.all(&1, xpath("//td")))
117 |       |> Enum.map(&extract_text/1)
118 | 
119 |     @tables
120 |     |> Enum.zip(tables)
121 |     |> Map.new()
122 |     |> Map.get(key)
123 |   end
124 | 
125 |   def tables(key) when is_binary(key) do
126 |     import Meeseeks.XPath
127 | 
128 |     tables =
129 |       parsed()
130 |       |> Meeseeks.all(xpath("//table"))
131 |       |> Enum.map(&Meeseeks.all(&1, xpath("//tt")))
132 |       |> Enum.map(&extract_text/1)
133 | 
134 |     @tables
135 |     |> Enum.zip(tables)
136 |     |> Map.new()
137 |     |> Map.get(key)
138 |   end
139 | 
140 |   defp extract_text(tt) do
141 |     Enum.map(tt, &Meeseeks.text(&1))
142 |   end
143 | 
144 |   @additions_file_mapping %{
145 |     "a1" => "irregular_noun.txt",
146 |     "a2" => "uninflected_noun.txt",
147 |     "a3" => "singular_s.txt",
148 |     "a11" => "category_a_as_ae.txt",
149 |     "a12" => "category_a_as_ata.txt",
150 |     "a13" => "category_en_ens_ina.txt",
151 |     "a14" => "category_ex_ices.txt",
152 |     "a15" => "category_ex_exes_ices.txt",
153 |     "a16" => "category_is_ises_ides.txt",
154 |     "a17" => "category_o_os.txt",
155 |     "a18" => "category_o_os_i.txt",
156 |     "a19" => "category_on_a.txt",
157 |     "a20" => "category_um_a.txt",
158 |     "a21" => "category_um_ums_a.txt",
159 |     "a22" => "category_us_uses_i.txt",
160 |     "a23" => "category_us_uses_us.txt",
161 |     "a24" => "category_any_i.txt",
162 |     "a25" => "category_any_im.txt",
163 |     "a26" => "category_general_generals.txt"
164 |   }
165 | 
166 |   @doc """
167 |   Saves the plural data set, including
168 |   additions, in an erlang term file
169 |   used at runtime and in the packaged
170 |   library.
171 | 
172 |   This function is called from
173 |   `mix text.create_english_plurals`.
174 | 
175 |   """
176 |   def save_data do
177 |     a1 = tables("a1")
178 |     a7 = tables("a7")
179 |     a8 = tables("a8")
180 | 
181 |     all =
182 |       tables()
183 |       |> Map.put("a1", a1)
184 |       |> Map.put("a7", a7)
185 |       |> Map.put("a8", a8)
186 | 
187 |     final =
188 |       all
189 |       |> Enum.map(fn
190 |         {"a1" = table, values} ->
191 |           add_values =
192 |             table
193 |             |> get_additions()
194 |             |> Enum.flat_map(fn
195 |               [single, plural] -> [single, plural, plural]
196 |               [single, modern, classical] -> [single, modern, classical]
197 |             end)
198 | 
199 |           {"a1", values ++ add_values}
200 | 
201 |         {table, values} ->
202 |           {table, Enum.uniq(values ++ get_additions(table))}
203 |       end)
204 |       |> Map.new()
205 | 
206 |     File.write!(saved_path(), :erlang.term_to_binary(final))
207 |   end
208 | 
209 |   defp additions_file(table) do
210 |     Map.fetch(@additions_file_mapping, table)
211 |   end
212 | 
213 |   @doc """
214 |   Returns the parsed contents
215 |   of the additions file for a
216 |   given table named `a1` ro `a16`.
217 | 
218 |   """
219 |   def get_additions(table) do
220 |     case additions_file(table) do
221 |       {:ok, file} ->
222 |         path = Path.join(data_dir(), ["additions/", file])
223 |         parse_file(path)
224 | 
225 |       _other ->
226 |         []
227 |     end
228 |   end
229 | 
230 |   @doc """
231 |   Creates the additions files
232 |   if they do not exist.
233 | 
234 |   """
235 |   def touch_additions_files do
236 |     @additions_file_mapping
237 |     |> Map.values()
238 |     |> Enum.map(&Path.join(data_dir(), ["additions/", &1]))
239 |     |> Enum.each(&File.touch/1)
240 |   end
241 | 
242 |   defp parse_file(file) do
243 |     file
244 |     |> File.read!()
245 |     |> String.replace(~r/#.*\n/, "")
246 |     |> String.split("\n")
247 |     |> Enum.map(&String.trim/1)
248 |     |> Enum.reject(&(&1 == ""))
249 |     |> Enum.map(&split_and_trim/1)
250 |   end
251 | 
252 |   defp split_and_trim(string) do
253 |     list =
254 |       string
255 |       |> String.split(",", trim: true)
256 |       |> Enum.map(&String.trim/1)
257 | 
258 |     case list do
259 |       [word] -> word
260 |       other -> other
261 |     end
262 |   end
263 | end


--------------------------------------------------------------------------------
/mix/tasks/create_inflector.ex:
--------------------------------------------------------------------------------
 1 | defmodule Mix.Tasks.Text.CreateEnglishPlurals do
 2 |   @moduledoc """
 3 |   Mix task to create the plurals data set used
 4 |   by the English inflector
 5 |   """
 6 | 
 7 |   use Mix.Task
 8 | 
 9 |   @shortdoc "Create the English plurals data set"
10 | 
11 |   @doc false
12 |   def run(_) do
13 |     Text.Inflect.Data.En.save_data()
14 |   end
15 | end
16 | 
17 | 


--------------------------------------------------------------------------------
/priv/inflection/en/en.etf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kipcole9/text/a93981626c9deb2cdc2bb4bb514b883aa17c792e/priv/inflection/en/en.etf


--------------------------------------------------------------------------------
/test/irregular_noun_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Text.Inflect.Noun.Test do
 2 |   use ExUnit.Case
 3 | 
 4 |   # Tests defined from the data at
 5 |   # https://www.thoughtco.com/irregular-plural-nouns-in-english-1692634
 6 |   for [single, plurals] <- Text.Plurals.Helper.irregular_plurals() do
 7 |     test "irregular plural for #{single}" do
 8 |       case unquote(plurals) do
 9 |         [plural] ->
10 |           assert Text.Inflect.En.pluralize_noun(unquote(single), :classical) == plural
11 | 
12 |         [plural, alternate] ->
13 |           assert Text.Inflect.En.pluralize_noun(unquote(single), :classical) == plural ||
14 |                    Text.Inflect.En.pluralize_noun(unquote(single), :classical) == alternate
15 | 
16 |         [plural, alternate, other] ->
17 |           assert Text.Inflect.En.pluralize_noun(unquote(single), :classical) == plural ||
18 |                    Text.Inflect.En.pluralize_noun(unquote(single), :classical) == alternate ||
19 |                    Text.Inflect.En.pluralize_noun(unquote(single), :classical) == other
20 |       end
21 |     end
22 |   end
23 | 
24 |   # Tests defined from the data at
25 |   # http://www.focus.olsztyn.pl/list-of-plural-nouns.html
26 |   for [single, plurals] <- Text.Plurals.Helper.plurals() do
27 |     test "plural noun for #{single}" do
28 |       case unquote(plurals) do
29 |         [plural] ->
30 |           assert Text.Inflect.En.pluralize_noun(unquote(single), :classical) == plural
31 | 
32 |         [plural, alternate] ->
33 |           assert Text.Inflect.En.pluralize_noun(unquote(single), :classical) == plural ||
34 |                    Text.Inflect.En.pluralize_noun(unquote(single), :classical) == alternate
35 | 
36 |         [plural, alternate, other] ->
37 |           assert Text.Inflect.En.pluralize_noun(unquote(single), :classical) == plural ||
38 |                    Text.Inflect.En.pluralize_noun(unquote(single), :classical) == alternate ||
39 |                    Text.Inflect.En.pluralize_noun(unquote(single), :classical) == other
40 |       end
41 |     end
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/test/support/irregular_plurals.csv:
--------------------------------------------------------------------------------
  1 | addendum, addenda-addendums
  2 | aircraft, aircraft
  3 | alumna, alumnae
  4 | alumnus, alumni
  5 | analysis, analyses
  6 | antenna, antennae-antennas
  7 | antithesis, antitheses
  8 | apex, apices-apexes
  9 | appendix, appendices-appendixes
 10 | axis, axes
 11 | bacillus, bacilli
 12 | bacterium, bacteria
 13 | basis, bases
 14 | beau, beaux-beaus
 15 | bison, bison
 16 | bureau, bureaux-bureaus
 17 | cactus, cacti-cactus-cactuses
 18 | château, châteaux-châteaus
 19 | child, children
 20 | codex, codices
 21 | concerto, concerti-concertos
 22 | corpus, corpora
 23 | crisis, crises
 24 | criterion, criteria-criterions
 25 | curriculum, curricula-curriculums
 26 | datum, data
 27 | deer, deer-deers
 28 | diagnosis, diagnoses
 29 | die, dice-dies
 30 | dwarf, dwarves-dwarfs
 31 | ellipsis, ellipses
 32 | erratum, errata
 33 | faux pas, faux pas
 34 | fez, fezzes-fezes
 35 | fish, fish-fishes
 36 | focus, foci-focuses
 37 | foot, feet-foot
 38 | formula, formulae-formulas
 39 | fungus, fungi-funguses
 40 | genus, genera-genuses
 41 | goose, geese
 42 | graffito, graffiti
 43 | grouse, grouse-grouses
 44 | half, halves
 45 | hoof, hooves-hoofs
 46 | hypothesis, hypotheses
 47 | index, indices-indexes
 48 | larva, larvae-larvas
 49 | libretto, libretti-librettos
 50 | loaf, loaves
 51 | locus, loci
 52 | louse, lice
 53 | man, men
 54 | matrix, matrices-matrixes
 55 | medium, media-mediums
 56 | memorandum, memoranda-memorandums
 57 | minutia, minutiae
 58 | moose, moose
 59 | mouse, mice
 60 | nebula, nebulae-nebulas
 61 | nucleus, nuclei-nucleuses
 62 | oasis, oases
 63 | offspring, offspring-offsprings
 64 | opus, opera-opuses
 65 | ovum, ova
 66 | ox, oxen-ox
 67 | parenthesis, parentheses
 68 | phenomenon, phenomena-phenomenons
 69 | phylum, phyla
 70 | quiz, quizzes
 71 | radius, radii-radiuses
 72 | referendum, referenda-referendums
 73 | salmon, salmon-salmons
 74 | scarf, scarves-scarfs
 75 | self, selves
 76 | series, series
 77 | sheep, sheep
 78 | shrimp, shrimp-shrimps
 79 | species, species
 80 | stimulus, stimuli
 81 | stratum, strata
 82 | swine, swine
 83 | syllabus, syllabi-syllabuses
 84 | symposium, symposia-symposiums
 85 | synopsis, synopses
 86 | tableau, tableaux-tableaus
 87 | thesis, theses
 88 | thief, thieves
 89 | tooth, teeth
 90 | trout, trout-trouts
 91 | tuna, tuna-tunas
 92 | vertebra, vertebrae-vertebras
 93 | vertex, vertices-vertexes
 94 | vita, vitae
 95 | vortex, vortices-vortexes
 96 | wharf, wharves-wharfs
 97 | wife, wives
 98 | wolf, wolves
 99 | woman, women
100 | 


--------------------------------------------------------------------------------
/test/support/plural_nouns.csv:
--------------------------------------------------------------------------------
 1 | analysis, analyses
 2 | status, status-statuses
 3 | moose, moose
 4 | crisis, crises
 5 | fish, fish-fishes
 6 | series, series
 7 | appendix, appendixes-appendices
 8 | sheep, sheep
 9 | bus, buses-busses
10 | formula, formulae-formulas
11 | life, lives
12 | deer, deer
13 | ox, oxen
14 | focus, focuses-foci
15 | basis, bases
16 | cactus, cacti-cactuses
17 | woman, women
18 | equipment, equipment
19 | dice, dice
20 | leaf, leaves
21 | life, lives
22 | phenomenon, phenomena
23 | staff, staff
24 | alumnus, alumni
25 | radius, radii-radiuses
26 | staff, staff
27 | alumnus, alumni
28 | software, software
29 | data, data
30 | addendum, addenda
31 | synopsis, synopses
32 | mongoose, mongooses
33 | genus, genera
34 | formula, formulae-formulas
35 | roof, roofs
36 | phenomenon, phenomena
37 | medium, media-mediums
38 | business, businesses
39 | datum, data
40 | chassis, chassis
41 | crisis, crises
42 | hero, heroes
43 | axis, axes
44 | 


--------------------------------------------------------------------------------
/test/support/plurals_helper.ex:
--------------------------------------------------------------------------------
 1 | defmodule Text.Plurals.Helper do
 2 |   def irregular_plurals do
 3 |     parse("test/support/irregular_plurals.csv")
 4 |   end
 5 | 
 6 |   def plurals do
 7 |     parse("test/support/plural_nouns.csv")
 8 |   end
 9 | 
10 |   defp parse(path) do
11 |     path
12 |     |> File.read!()
13 |     |> String.split("\n")
14 |     |> Enum.map(&String.split(&1, ", "))
15 |     |> Enum.map(fn
16 |       [single, plural] -> [single, String.split(plural, "-")]
17 |       _other -> nil
18 |     end)
19 |     |> Enum.reject(&is_nil/1)
20 |     |> Enum.uniq_by(&hd/1)
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 | 


--------------------------------------------------------------------------------
/test/text_test.exs:
--------------------------------------------------------------------------------
1 | defmodule TextTest do
2 |   use ExUnit.Case
3 |   doctest Text
4 |   doctest Text.Inflect.En
5 | end
6 | 


--------------------------------------------------------------------------------