├── src
    ├── _version.js
    ├── 03-three
    │   ├── numbers
    │   │   ├── plugin.js
    │   │   ├── find.js
    │   │   ├── parse
    │   │   │   ├── _data.js
    │   │   │   ├── index.js
    │   │   │   └── fromText.js
    │   │   ├── format
    │   │   │   ├── index.js
    │   │   │   └── toText.js
    │   │   └── data.js
    │   ├── topics
    │   │   ├── plugin.js
    │   │   └── api.js
    │   ├── contractions
    │   │   ├── plugin.js
    │   │   └── api.js
    │   ├── nouns
    │   │   ├── plugin.js
    │   │   └── api.js
    │   ├── adjectives
    │   │   ├── plugin.js
    │   │   └── api.js
    │   └── verbs
    │   │   ├── plugin.js
    │   │   └── api
    │   │       ├── adverbs.js
    │   │       ├── parse.js
    │   │       ├── toJSON.js
    │   │       └── find.js
    ├── 02-two
    │   ├── preTagger
    │   │   ├── methods
    │   │   │   ├── index.js
    │   │   │   └── guessGender.js
    │   │   ├── compute
    │   │   │   ├── 2nd-pass
    │   │   │   │   ├── noun-fallback.js
    │   │   │   │   ├── neighbours.js
    │   │   │   │   ├── suffix-lookup.js
    │   │   │   │   └── acronym.js
    │   │   │   ├── 3rd-pass
    │   │   │   │   ├── fix-contractions.js
    │   │   │   │   ├── adj-plurals.js
    │   │   │   │   ├── number-types.js
    │   │   │   │   ├── noun-gender.js
    │   │   │   │   ├── noun-plurals.js
    │   │   │   │   ├── adj-gender.js
    │   │   │   │   ├── verb-tense.js
    │   │   │   │   └── verb-form.js
    │   │   │   ├── 1st-pass
    │   │   │   │   ├── titlecase.js
    │   │   │   │   ├── regex.js
    │   │   │   │   └── year.js
    │   │   │   └── index.js
    │   │   ├── model
    │   │   │   ├── index.js
    │   │   │   ├── regex
    │   │   │   │   ├── regex-text.js
    │   │   │   │   ├── regex-normal.js
    │   │   │   │   └── regex-numbers.js
    │   │   │   └── suffixes.js
    │   │   ├── plugin.js
    │   │   └── tagRank.js
    │   ├── postTagger
    │   │   ├── plugin.js
    │   │   └── matches.js
    │   └── tagset
    │   │   ├── plugin.js
    │   │   └── tags
    │   │       ├── values.js
    │   │       ├── dates.js
    │   │       ├── misc.js
    │   │       ├── nouns.js
    │   │       └── verbs.js
    ├── _lib.js
    ├── 01-one
    │   ├── lexicon
    │   │   ├── methods
    │   │   │   ├── index.js
    │   │   │   ├── model.js
    │   │   │   ├── noun
    │   │   │   │   └── index.js
    │   │   │   ├── adjective
    │   │   │   │   └── index.js
    │   │   │   └── verb
    │   │   │   │   └── index.js
    │   │   ├── plugin.js
    │   │   ├── model
    │   │   │   ├── misc.js
    │   │   │   └── lexicon.js
    │   │   └── compute
    │   │   │   └── root.js
    │   └── tokenize
    │   │   ├── plugin.js
    │   │   ├── compute
    │   │       ├── index.js
    │   │       └── machine.js
    │   │   ├── contractions.js
    │   │   └── unicode.js
    └── index.js
├── data
    ├── lexicon
    │   ├── misc
    │   │   ├── determiners.js
    │   │   ├── conjunctions.js
    │   │   ├── prepositions.js
    │   │   ├── expressions.js
    │   │   ├── currencies.js
    │   │   └── adverbs.js
    │   ├── dates
    │   │   ├── dates.js
    │   │   ├── weekdays.js
    │   │   └── months.js
    │   ├── nouns
    │   │   ├── feminine.js
    │   │   ├── pronouns.js
    │   │   ├── possessives.js
    │   │   ├── uncountables.js
    │   │   ├── masculine.js
    │   │   └── sportsTeams.js
    │   ├── numbers
    │   │   ├── ordinals.js
    │   │   ├── cardinals.js
    │   │   └── units.js
    │   ├── people
    │   │   ├── firstnames.js
    │   │   ├── honorifics.js
    │   │   └── people.js
    │   ├── misc.js
    │   ├── places
    │   │   ├── places.js
    │   │   └── regions.js
    │   └── index.js
    └── models
    │   ├── _lint.js
    │   └── index.js
├── plugins
    └── dates
    │   ├── src
    │       ├── phrase
    │       │   ├── date
    │       │   │   ├── 01-date.js
    │       │   │   ├── 02-year.js
    │       │   │   ├── 03-misc.js
    │       │   │   ├── data.js
    │       │   │   ├── units.js
    │       │   │   └── index.js
    │       │   ├── normalize.js
    │       │   └── index.js
    │       ├── plugin.js
    │       ├── toJson.js
    │       ├── find.js
    │       └── api.js
    │   ├── tests
    │       ├── _lib.js
    │       ├── backburner
    │       │   ├── ambig-weekday.ignore.js
    │       │   ├── equals.ignore.js
    │       │   ├── to-iso.ignore.js
    │       │   └── ambig-month.ignore.js
    │       └── dates.test.js
    │   ├── README.md
    │   ├── rollup.config.js
    │   ├── index.d.ts
    │   ├── scratch.js
    │   └── package.json
├── .gitignore
├── tmp.js
├── learn
    ├── giga
    │   ├── makeModel.js
    │   ├── french.js
    │   ├── _giga.js
    │   ├── getList.js
    │   ├── getPairs.js
    │   ├── corpus.js
    │   └── test.js
    ├── adjectives
    │   └── learn.js
    ├── wiktionary
    │   ├── add.js
    │   └── index.js
    ├── wikinews
    │   ├── packSuffixes.js
    │   ├── parse.js
    │   ├── getLexicon.js
    │   └── getSuffix.js
    ├── nouns
    │   └── learn.js
    ├── wolf
    │   └── parse_wolf.js
    └── verbs
    │   ├── old.js
    │   ├── toPairs.js
    │   ├── single-pairs.js
    │   └── learn.js
├── scripts
    ├── version.js
    ├── stress.js
    ├── cleanup.js
    ├── pack.js
    └── types.ts
├── tests
    ├── _lib.js
    ├── buildNet.test.js
    ├── conjugate.test.js
    └── numbers
    │   ├── ordinal.test.js
    │   └── number-misc.test.js
├── add-verbs.js
├── rollup.config.js
├── .esformatter
├── changelog.md
├── LICENSE
├── .github
    └── workflows
    │   └── build-and-test.yml
├── .eslintrc
├── package.json
├── types
    ├── view
    │   └── fr.ts
    ├── index.d.ts
    └── misc.ts
└── scratch.js


/src/_version.js:
--------------------------------------------------------------------------------
1 | export default '0.2.8'


--------------------------------------------------------------------------------
/src/03-three/numbers/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api
5 | }


--------------------------------------------------------------------------------
/src/03-three/topics/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api
5 | }


--------------------------------------------------------------------------------
/data/lexicon/misc/determiners.js:
--------------------------------------------------------------------------------
1 | export default ['le', 'la', 'les', 'au', 'aux', 'ol', 'un', 'une']
2 | 


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/date/01-date.js:
--------------------------------------------------------------------------------
1 | const parse = function () {
2 | 
3 | }
4 | export default parse


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/date/02-year.js:
--------------------------------------------------------------------------------
1 | const parse = function () {
2 | 
3 | }
4 | export default parse


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/date/03-misc.js:
--------------------------------------------------------------------------------
1 | const parse = function () {
2 | 
3 | }
4 | export default parse


--------------------------------------------------------------------------------
/src/03-three/contractions/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api
5 | }


--------------------------------------------------------------------------------
/src/03-three/nouns/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api,
5 | }
6 | 


--------------------------------------------------------------------------------
/src/03-three/adjectives/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api,
5 | }
6 | 


--------------------------------------------------------------------------------
/src/03-three/verbs/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api/api.js'
2 | 
3 | export default {
4 |   api,
5 | }
6 | 


--------------------------------------------------------------------------------
/data/lexicon/dates/dates.js:
--------------------------------------------------------------------------------
1 | // uncontroversial date words
2 | export default ['aujourd\'hui', 'demain', 'hier', 'weekend']
3 | 


--------------------------------------------------------------------------------
/data/lexicon/misc/conjunctions.js:
--------------------------------------------------------------------------------
1 | export default ['et', 'mais', 'soit', 'puis', 'car', 'voire', 'sinon', 'comme', 'donc']
2 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/feminine.js:
--------------------------------------------------------------------------------
1 | export default ['confiture', 'géologie', 'librairie', 'ambulance', 'poule', 'rue', 'lutte']
2 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/methods/index.js:
--------------------------------------------------------------------------------
1 | import guessGender from './guessGender.js'
2 | export default { one: { guessGender } }
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | build/
3 | .DS_Store
4 | coverage
5 | wolf-1.0b4.xml
6 | wikinews.txt
7 | /learn/giga/results/*.js
8 | learn/scrape/*


--------------------------------------------------------------------------------
/src/02-two/postTagger/plugin.js:
--------------------------------------------------------------------------------
1 | import postTagger from './matches.js'
2 | 
3 | export default {
4 |   compute: {
5 |     postTagger
6 |   },
7 |   hooks: ['postTagger']
8 | }


--------------------------------------------------------------------------------
/src/_lib.js:
--------------------------------------------------------------------------------
1 | // console.log('local-path')
2 | // import nlp from '/Users/spencer/mountain/compromise/src/one.js'
3 | import nlp from 'compromise/one'
4 | export default nlp


--------------------------------------------------------------------------------
/src/01-one/lexicon/methods/index.js:
--------------------------------------------------------------------------------
1 | import adjective from './adjective/index.js'
2 | import noun from './noun/index.js'
3 | import verb from './verb/index.js'
4 | 
5 | export default { adjective, noun, verb }
6 | 


--------------------------------------------------------------------------------
/tmp.js:
--------------------------------------------------------------------------------
1 | import verbs from './data/models/verb/present-tense.js'
2 | import lex from './data/lexicon/index.js'
3 | Object.keys(verbs).forEach(k => {
4 |   if (!lex[k]) {
5 |     console.log(k)
6 |   }
7 | })
8 | 
9 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/pronouns.js:
--------------------------------------------------------------------------------
 1 | // are these right?
 2 | export default [
 3 |   'il',
 4 |   'c',
 5 |   'elle',
 6 |   'on',
 7 |   'ils',
 8 |   'nous',
 9 |   'je',
10 |   'ce',
11 |   'j',
12 |   'elles',
13 |   'vous',
14 |   'tu',
15 |   't',
16 |   'moi',
17 | ]
18 | 


--------------------------------------------------------------------------------
/learn/giga/makeModel.js:
--------------------------------------------------------------------------------
1 | import data from './results/plural-sing.js'
2 | import { learn, compress, test, validate } from 'suffix-thumb'
3 | 
4 | const pairs = validate(data)
5 | test(pairs)
6 | const model = learn(pairs)
7 | console.log(JSON.stringify(model, null, 2))
8 | 
9 | 


--------------------------------------------------------------------------------
/data/models/_lint.js:
--------------------------------------------------------------------------------
 1 | import model from './verb/present-tense.js'
 2 | 
 3 | Object.keys(model).forEach(k => {
 4 |   let s = new Set()
 5 |   model[k].slice(1).forEach(str => {
 6 |     if (s.has(str)) {
 7 |       console.log(k, str)
 8 |     }
 9 |     s.add(str)
10 |   })
11 | })


--------------------------------------------------------------------------------
/scripts/version.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | // avoid requiring our whole package.json file
3 | // make a small file for our version number
4 | let pkg = JSON.parse(fs.readFileSync('./package.json').toString())
5 | 
6 | fs.writeFileSync('./src/_version.js', `export default '${pkg.version}'`)
7 | 


--------------------------------------------------------------------------------
/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import build from '../builds/fr-compromise.mjs'
 3 | import src from '../src/index.js'
 4 | let nlp = src
 5 | if (process.env.TESTENV === 'prod') {
 6 |   console.warn('== production build test 🚀 ==')
 7 |   nlp = build
 8 | }
 9 | export default nlp
10 | 


--------------------------------------------------------------------------------
/plugins/dates/src/plugin.js:
--------------------------------------------------------------------------------
 1 | import api from './api.js'
 2 | 
 3 | let lexicon = {
 4 |   heir: 'Date',
 5 |   soir: 'Date',
 6 |   nuit: 'Date',
 7 |   'soirée': 'Date',
 8 |   matin: 'Date',
 9 |   'après midi': 'Date',
10 |   semaine: 'Duration',
11 | }
12 | 
13 | export default {
14 |   words: lexicon,
15 |   api,
16 | }


--------------------------------------------------------------------------------
/src/01-one/lexicon/plugin.js:
--------------------------------------------------------------------------------
 1 | import methods from './methods/index.js'
 2 | import words from './model/lexicon.js'
 3 | import root from './compute/root.js'
 4 | 
 5 | export default {
 6 |   methods: {
 7 |     two: {
 8 |       transform: methods
 9 |     }
10 |   },
11 |   words,
12 |   compute: {
13 |     root: root
14 |   }
15 | }


--------------------------------------------------------------------------------
/src/01-one/tokenize/plugin.js:
--------------------------------------------------------------------------------
 1 | import unicode from './unicode.js'
 2 | import contractions from './contractions.js'
 3 | import compute from './compute/index.js'
 4 | 
 5 | 
 6 | export default {
 7 |   mutate: (world) => {
 8 |     world.model.one.unicode = unicode
 9 | 
10 |     world.model.one.contractions = contractions
11 |   },
12 |   compute
13 | }


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/2nd-pass/noun-fallback.js:
--------------------------------------------------------------------------------
 1 | const nounFallback = function (terms, i, world) {
 2 |   let setTag = world.methods.one.setTag
 3 |   let term = terms[i]
 4 |   if (term.tags.size === 0) {
 5 |     setTag([term], 'Noun', world, false, 'fallback')
 6 |     return true
 7 |   }
 8 |   return null
 9 | }
10 | export default nounFallback


--------------------------------------------------------------------------------
/src/02-two/tagset/plugin.js:
--------------------------------------------------------------------------------
 1 | import nouns from './tags/nouns.js'
 2 | import verbs from './tags/verbs.js'
 3 | import values from './tags/values.js'
 4 | import dates from './tags/dates.js'
 5 | import misc from './tags/misc.js'
 6 | 
 7 | let tags = Object.assign({}, nouns, verbs, values, dates, misc)
 8 | 
 9 | export default {
10 |   tags
11 | }
12 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/model/index.js:
--------------------------------------------------------------------------------
 1 | import regexNormal from './regex/regex-normal.js'
 2 | import regexNumbers from './regex/regex-numbers.js'
 3 | import regexText from './regex/regex-text.js'
 4 | import suffixPatterns from './suffixes.js'
 5 | 
 6 | 
 7 | export default {
 8 |   regexNormal,
 9 |   regexNumbers,
10 |   regexText,
11 |   suffixPatterns
12 | }
13 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/possessives.js:
--------------------------------------------------------------------------------
 1 | // are these right?
 2 | export default ['en', 'lui', 'nous', 'leur', 'm', 'me', 'vous', 'te', 'toi', 'ce',
 3 | 
 4 |   'mon', 'ma', 'mes',// 	my 
 5 |   'ton', 'ta', 'tes',// 	your 
 6 |   'son', 'sa', 'ses',// 	his
 7 |   'notre', 'notre', 'nos',// 	our 
 8 |   'votre', 'votre', 'vos',// 	your 
 9 |   'leur', 'leur', 'leurs',// 	their 
10 | ]
11 | 


--------------------------------------------------------------------------------
/src/01-one/lexicon/methods/model.js:
--------------------------------------------------------------------------------
 1 | import { uncompress } from 'suffix-thumb'
 2 | import packed from './_data.js'
 3 | 
 4 | // uncompress them
 5 | let model = Object.keys(packed).reduce((h, k) => {
 6 |   h[k] = {}
 7 |   Object.keys(packed[k]).forEach(form => {
 8 |     h[k][form] = uncompress(packed[k][form])
 9 |   })
10 |   return h
11 | }, {})
12 | 
13 | export default model


--------------------------------------------------------------------------------
/src/02-two/preTagger/plugin.js:
--------------------------------------------------------------------------------
 1 | import preTagger from './compute/index.js'
 2 | import tagRank from './tagRank.js'
 3 | import model from './model/index.js'
 4 | import methods from './methods/index.js'
 5 | 
 6 | 
 7 | export default {
 8 |   compute: {
 9 |     preTagger,
10 |     tagRank
11 |   },
12 |   methods,
13 |   model: {
14 |     two: model
15 |   },
16 |   hooks: ['preTagger']
17 | }


--------------------------------------------------------------------------------
/data/lexicon/dates/weekdays.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'lundi', // - Monday.
 3 |   'mardi', // - Tuesday.
 4 |   'mercredi', // - Wednesday.
 5 |   'jeudi', // - Thursday.
 6 |   'vendredi', // - Friday.
 7 |   'samedi', // - Saturday.
 8 |   'dimanche', // - Sunday.
 9 |   'lun', // 
10 |   'mar', // 
11 |   'mer', // 
12 |   'jeu', // 
13 |   'ven', // 
14 |   'sam', // 
15 |   'dim', // 
16 | ]
17 | 


--------------------------------------------------------------------------------
/plugins/dates/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import build from '../../../builds/fr-compromise.mjs'
 3 | import src from '../../../src/index.js'
 4 | let nlp = src
 5 | if (process.env.TESTENV === 'prod') {
 6 |   console.warn('== production build test 🚀 ==')
 7 |   nlp = build
 8 | }
 9 | 
10 | import plg from '../src/plugin.js'
11 | nlp.plugin(plg)
12 | 
13 | export default nlp
14 | 


--------------------------------------------------------------------------------
/plugins/dates/src/toJson.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const toJson = function (arr) {
 3 |   return arr.map(o => {
 4 |     let res = {
 5 |       start: o.start.start().iso()
 6 |     }
 7 |     // either explicit or implicit end date
 8 |     if (o.end) {
 9 |       res.end = o.end.end().iso()
10 |     } else {
11 |       res.end = o.start.end().iso()
12 |     }
13 |     return res
14 |   })
15 | }
16 | export default toJson


--------------------------------------------------------------------------------
/src/01-one/tokenize/compute/index.js:
--------------------------------------------------------------------------------
 1 | import machine from './machine.js'
 2 | 
 3 | // cheat-method for a quick loop
 4 | const termLoop = function (view, fn) {
 5 |   let docs = view.docs
 6 |   for (let i = 0; i < docs.length; i += 1) {
 7 |     for (let t = 0; t < docs[i].length; t += 1) {
 8 |       fn(docs[i][t], view.world)
 9 |     }
10 |   }
11 | }
12 | export default {
13 |   machine: (view) => termLoop(view, machine),
14 | }


--------------------------------------------------------------------------------
/learn/adjectives/learn.js:
--------------------------------------------------------------------------------
 1 | import data from './data.js'
 2 | // import data from '../nouns/data.js'
 3 | 
 4 | import { learn, compress, test } from 'suffix-thumb'
 5 | 
 6 | 
 7 | const pairs = {}
 8 | data.forEach(a => {
 9 |   let [m, f, mp, fp] = a
10 |   pairs[m] = [f, mp, fp]
11 | })
12 | 
13 | console.log(JSON.stringify(pairs, null, 2))
14 | // let model = learn(pairs)
15 | // model = compress(model)
16 | // console.log(JSON.stringify(model, null, 2))
17 | // test(pairs)


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/normalize.js:
--------------------------------------------------------------------------------
 1 | const normalize = function (m) {
 2 |   m = m.clone()
 3 |   // remove redundant day-names like 'Wed march 2nd'
 4 |   if (m.has('#WeekDay') && m.has('#Month') && m.has('#NumericValue')) {
 5 |     m.remove('#WeekDay')
 6 |   }
 7 |   // jusqu'à le quatorze juillet
 8 |   m.remove('(le|la)')
 9 |   // quatorze -> 14
10 |   m.numbers().toCardinal().toNumber()
11 |   // m.compute('index')
12 |   return m
13 | }
14 | export default normalize


--------------------------------------------------------------------------------
/data/lexicon/numbers/ordinals.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'zeroième',
 3 |   'premier',
 4 |   'unième',
 5 |   'deuxième',
 6 |   'troisième',
 7 |   'quatrième',
 8 |   'cinquième',
 9 |   'sixième',
10 |   'septième',
11 |   'huitième',
12 |   'neuvième',
13 |   'dixième',
14 |   'onzième',
15 |   'douzième',
16 |   'treizième',
17 |   'quatorzième',
18 |   'quinzième',
19 |   'seizième',
20 |   'vingtième',
21 |   'trentième',
22 |   'quarantième',
23 |   'cinquantième',
24 |   'soixantième',
25 | ]
26 | 


--------------------------------------------------------------------------------
/src/03-three/numbers/find.js:
--------------------------------------------------------------------------------
 1 | const findNumbers = function (view) {
 2 |   let m = view.match('#Value+')
 3 | 
 4 |   //seventh fifth
 5 |   if (m.match('#Ordinal #Ordinal').match('#TextValue').found && !m.has('#Multiple')) {
 6 |     m = m.splitAfter('#Ordinal')
 7 |   }
 8 | 
 9 |   //fifth five
10 |   m = m.splitBefore('#Ordinal [#Cardinal]', 0)
11 |   //5-8
12 |   m = m.splitAfter('#NumberRange')
13 |   // june 5th 1999
14 |   m = m.splitBefore('#Year')
15 |   return m
16 | }
17 | export default findNumbers


--------------------------------------------------------------------------------
/src/01-one/lexicon/methods/noun/index.js:
--------------------------------------------------------------------------------
 1 | import { convert, reverse } from 'suffix-thumb'
 2 | import model from '../model.js'
 3 | 
 4 | let pRev = reverse(model.noun.plural)
 5 | const toPlural = (str) => convert(str, model.noun.plural)
 6 | const fromPlural = (str) => convert(str, pRev)
 7 | 
 8 | const all = (str) => {
 9 |   let plr = toPlural(str)
10 |   if (str === plr) {
11 |     return [str]
12 |   }
13 |   return [str, plr]
14 | }
15 | export default {
16 |   toPlural,
17 |   fromPlural,
18 |   all
19 | }


--------------------------------------------------------------------------------
/plugins/dates/README.md:
--------------------------------------------------------------------------------
 1 |   
 2 |   <div align="center">
 3 |     <sub>
 4 |        travaux en cours! • work-in-progress!  
 5 |     </sub>
 6 |   </div>
 7 | 
 8 | ```js
 9 | import nlp from 'fr-compromise'
10 | import frDatePlugin from 'fr-compromise-dates'
11 | nlp.plugin(frDatePlugin)
12 | 
13 | let doc = nlp('entre sept et oct')
14 | doc.dates().json()[0]
15 | /*
16 |  { text: 'entre sept et oct',
17 |    date: [{
18 |       start: { month: 9, year: 2023 },
19 |       end: { month: 10, year: 2023 }
20 |   }]
21 |  }*/
22 | ```
23 | 
24 | MIT


--------------------------------------------------------------------------------
/learn/wiktionary/add.js:
--------------------------------------------------------------------------------
 1 | import fixes from './fixes.js'
 2 | import adj from '../../data/models/adjective/index.js'
 3 | 
 4 | let data = adj
 5 | //m: [f, p, fp]
 6 | let out = {}
 7 | Object.keys(fixes).forEach(k => {
 8 |   let arr = fixes[k]
 9 |   if (arr.length === 1) {
10 |     // only got a plural
11 |     out[k] = [k, arr[0], arr[0]]
12 |   } else if (arr.length === 3) {
13 |     // only fem plurals
14 |     let [m, f, fp] = arr
15 |     out[k] = [f, m, fp]
16 |   }
17 | })
18 | data = Object.assign(data, out)
19 | console.log(JSON.stringify(data, null, 2))


--------------------------------------------------------------------------------
/src/02-two/preTagger/model/regex/regex-text.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   // #coolguy
 3 |   [/^#[a-z0-9_\u00C0-\u00FF]{2,}$/i, 'HashTag'],
 4 | 
 5 |   // @spencermountain
 6 |   [/^@\w{2,}$/, 'AtMention'],
 7 | 
 8 |   // period-ones acronyms - f.b.i.
 9 |   [/^([A-Z]\.){2}[A-Z]?/i, ['Acronym', 'Noun'], 'F.B.I'], //ascii-only
10 | 
11 |   // ending-apostrophes
12 |   [/.{3}[lkmnp]in['‘’‛‵′`´]$/, 'Gerund', "chillin'"],
13 |   [/.{4}s['‘’‛‵′`´]$/, 'Possessive', "flanders'"],
14 | 
15 |   // leading contractions
16 |   // [/^s'[a-z]$/, 'Verb'],
17 |   // [/^l'[a-z]$/, 'Noun'],
18 | ]
19 | 


--------------------------------------------------------------------------------
/data/lexicon/numbers/cardinals.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'zero', // - 0
 3 |   'un', // - 1
 4 |   'deux', // - 2
 5 |   'trois', // - 3
 6 |   'quatre', // - 4
 7 |   'cinq', // - 5
 8 |   'six', // - 6
 9 |   'sept', // - 7
10 |   'huit', // - 8
11 |   'neuf', // - 9
12 | 
13 |   'dix',
14 |   'onze',
15 |   'douze',
16 |   'treize',
17 |   'quatorze',
18 |   'quinze',
19 |   'seize',
20 |   'dix sept',
21 |   'dix huit',
22 |   'dix neuf',
23 |   'vingt',
24 |   'trente',
25 |   'quarante',
26 |   'cinquante',
27 |   'soixante',
28 |   // 'quatre vingt',
29 |   // 'quatre vingt dix huit',
30 | 
31 | ]
32 | 


--------------------------------------------------------------------------------
/plugins/dates/src/find.js:
--------------------------------------------------------------------------------
 1 | const findDates = function (doc) {
 2 |   let m = doc.match('#Date+')
 3 |   // 7 jun 2018
 4 |   m = m.growLeft('#Value+$')
 5 |   m = m.growRight('^#Value+')
 6 |   // pendant juin
 7 |   m = m.growLeft('(le|la)$')// jusqu'a le
 8 |   m = m.growLeft('(en|entre|depuis|courant|pendant|dans|lorsque|avant|après|à|a|au)$')
 9 |   m = m.growLeft('au cours de$')
10 |   m = m.growLeft('jusque$')// jusqu'en jusqu'à 
11 |   // sept-et-jun
12 |   m = m.growRight('^et (le|la)? #Date+')
13 | 
14 |   // remove overlaps
15 |   m = m.settle()
16 |   // m.debug()
17 |   return m
18 | }
19 | export default findDates


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/fix-contractions.js:
--------------------------------------------------------------------------------
 1 | // better guesses for 'le/la/les' in l'foo
 2 | const fixContractions = function (terms, i) {
 3 |   let term = terms[i]
 4 |   // let tags = term.tags
 5 |   if (term.implicit === 'le') {
 6 |     let nextTerm = terms[i + 1]
 7 |     if (!nextTerm) {
 8 |       return null
 9 |     }
10 |     if (nextTerm.tags.has('FemaleNoun')) {
11 |       term.implicit = 'la'
12 |     }
13 |     // support female plural?
14 |     if (nextTerm.tags.has('PluralNoun')) {
15 |       term.implicit = 'les'
16 |     }
17 |   }
18 |   return null
19 | }
20 | export default fixContractions


--------------------------------------------------------------------------------
/add-verbs.js:
--------------------------------------------------------------------------------
 1 | import prettyJSON from 'pretty-json-stringify'
 2 | 
 3 | import fs from 'fs'
 4 | // parse JSON-newline file
 5 | let arr = fs.readFileSync('./more-verbs.jsonl').toString()
 6 |   .split(/\n/).filter(str => str).map(str => JSON.parse(str))
 7 | 
 8 | let out = {}
 9 | arr.forEach(obj => {
10 |   if (obj['Indicatif Futur'][0]) {
11 |     let str = obj['Indicatif Futur']
12 |     out[obj.word] = str
13 |   }
14 | })
15 | console.log(prettyJSON(out, {
16 |   shouldExpand: (_, level) => level >= 1 ? false : true
17 | }))
18 | 
19 | import nlp from './src/index.js'
20 | // console.log(nlp('dépister').verbs().conjugate())
21 | 
22 | 


--------------------------------------------------------------------------------
/data/lexicon/people/firstnames.js:
--------------------------------------------------------------------------------
 1 | //ambiguously-gendered firstnames
 2 | //names commonly used in either gender
 3 | export default [
 4 |   'alexis',
 5 |   'andra',
 6 |   'aubrey',
 7 |   'blair',
 8 |   'casey',
 9 |   'cassidy',
10 |   'cheyenne',
11 |   'devan',
12 |   'devon',
13 |   'jamie',
14 |   'jammie',
15 |   'jessie',
16 |   'jude',
17 |   'kasey',
18 |   'kelsey',
19 |   'kenyatta',
20 |   'kerry',
21 |   'kris',
22 |   'lashawn',
23 |   'marion',
24 |   'marlo',
25 |   'mel',
26 |   'morgan',
27 |   'nelly',
28 |   'quinn',
29 |   'regan',
30 |   'rene',
31 |   'shay',
32 |   'shea',
33 |   'shelby',
34 |   'shiloh',
35 | ]
36 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/1st-pass/titlecase.js:
--------------------------------------------------------------------------------
 1 | const isTitleCase = function (str) {
 2 |   return /^[A-Z][a-z'\u00C0-\u00FF]/.test(str) || /^[A-Z]$/.test(str)
 3 | }
 4 | 
 5 | // add a noun to any non-0 index titlecased word, with no existing tag
 6 | const titleCaseNoun = function (terms, i, world) {
 7 |   let setTag = world.methods.one.setTag
 8 |   let term = terms[i]
 9 |   if (i === 0) {
10 |     return null
11 |   }
12 |   if (term.tags.size > 0) {
13 |     return null
14 |   }
15 |   if (isTitleCase(term.text)) {
16 |     setTag([term], 'ProperNoun', world, false, 'title-case')
17 |     return true
18 |   }
19 |   return null
20 | }
21 | export default titleCaseNoun


--------------------------------------------------------------------------------
/tests/buildNet.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | let here = '[fr-buildNet] '
 4 | 
 5 | test('buildNet:', function (t) {
 6 |   let matches = [
 7 |     { match: '{crier/Verb}' },
 8 |     { match: '{jaune/Adjective}' },
 9 |     { match: '{troupe/Noun}' }
10 |   ]
11 |   let net = nlp.buildNet(matches)
12 |   t.ok(net.hooks.crier, here + 'crier')
13 |   t.ok(net.hooks.criaient, here + 'criaient')
14 |   t.ok(net.hooks.criaient, here + 'criaient')
15 |   t.ok(net.hooks.jaune, here + 'jaune')
16 |   t.ok(net.hooks.jaunes, here + 'jaunes')
17 |   t.ok(net.hooks.troupe, here + 'troupe')
18 |   t.ok(net.hooks.troupes, here + 'troupes')
19 |   t.end()
20 | })


--------------------------------------------------------------------------------
/scripts/stress.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console, no-unused-vars */
 2 | import corpus from 'fr-corpus' //install with `npm i fr-corpus --no-save`
 3 | import nlp from '../src/index.js'
 4 | let texts = corpus.all()
 5 | console.log(`\n\n--- running compromise on ${texts.length.toLocaleString()} random sentences---\n`)
 6 | console.log('    --should take a few minutes--')
 7 | 
 8 | for (let i = 0; i < texts.length; i++) {
 9 |   let txt = texts[i][0]
10 |   let doc = nlp(txt)
11 |   let m = doc.match('#Determiner #Adverb #Adjective #Noun')
12 |   m.forEach(d => {
13 |     d.terms()
14 |   })
15 |   m.verbs().conjugate()
16 |   doc.numbers().add(2)
17 | }
18 | 
19 | console.log('\n\n - done!')
20 | 


--------------------------------------------------------------------------------
/data/lexicon/dates/months.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'janvier', // - January
 3 |   'février', // - February
 4 |   'mars', // - March
 5 |   'avril', // - April
 6 |   'mai', // - May
 7 |   'juin', // - June
 8 |   'juillet', // - July
 9 |   'aout', // - August
10 |   'septembre', // -September
11 |   'octobre', // - October
12 |   'novembre', // - November
13 |   'décembre', // - December
14 |   'fevrier',
15 |   'decembre',
16 | 
17 |   'janv',
18 |   'jan',
19 |   'fév',
20 |   'fev',
21 |   'févr',
22 |   'fevr',
23 |   'mars',
24 |   'avr',
25 |   'mai',
26 |   'juin',
27 |   'juil',
28 |   'juill',
29 |   'aout',
30 |   'sept',
31 |   'oct',
32 |   'nov',
33 |   'déc',
34 |   'dec',
35 | ]
36 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/adj-plurals.js:
--------------------------------------------------------------------------------
 1 | // guess a plural/singular tag each Adjective
 2 | const adjPlurals = function (terms, i, world) {
 3 |   let setTag = world.methods.one.setTag
 4 |   let term = terms[i]
 5 |   let tags = term.tags
 6 |   let str = term.implicit || term.normal || term.text || ''
 7 |   if (tags.has('Adjective')) {
 8 |     if (str.endsWith('s') || str.endsWith('aux')) {
 9 |       return setTag([term], 'PluralAdjective', world, false, '3-plural-adj')
10 |     }
11 |     // if (str.endsWith('euse')) {
12 |     //   return setTag([term], 'SingularAdjective', world, false, '3-plural-adj')
13 |     // }
14 |   }
15 |   return null
16 | }
17 | export default adjPlurals


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/number-types.js:
--------------------------------------------------------------------------------
 1 | // const dateWords = new Set('en', 'entre', 'depuis', 'courant', 'pendant', 'dans', 'lorsque', 'avant', 'après')
 2 | 
 3 | // guess a gender for each noun
 4 | const numberTags = function (terms, i, world) {
 5 |   let setTag = world.methods.one.setTag
 6 |   let { tags } = terms[i]
 7 |   // tag some values as a year
 8 |   if (tags.has('Cardinal') && tags.has('NumericValue')) {
 9 |     let term = terms[i]
10 |     let n = Number(term.text)
11 |     if (n && n > 1600 && n < 2090 && n === parseInt(n, 10)) {
12 |       return setTag([term], 'Year', world, false, '3-year')
13 |     }
14 |   }
15 |   return null
16 | }
17 | export default numberTags


--------------------------------------------------------------------------------
/src/03-three/numbers/parse/_data.js:
--------------------------------------------------------------------------------
 1 | import data from '../data.js'
 2 | 
 3 | const toCardinal = {}
 4 | const toOrdinal = {}
 5 | const toNumber = {}
 6 | 
 7 | Object.keys(data).forEach(k => {
 8 |   data[k].forEach(a => {
 9 |     let [num, w, ord] = a
10 |     toCardinal[ord] = w
11 |     toOrdinal[w] = ord
12 |     toNumber[w] = num
13 |     // add ordinal without accents
14 |     let norm = ord.replace(/è/, 'e')
15 |     toNumber[norm] = num
16 |   })
17 | })
18 | 
19 | // add some more
20 | Object.assign(toNumber, {
21 |   cents: 100,
22 |   milles: 1000,
23 |   millions: 1000000,
24 |   milliards: 1000000000,
25 | })
26 | 
27 | export {
28 |   toOrdinal,
29 |   toCardinal,
30 |   toNumber
31 | }


--------------------------------------------------------------------------------
/src/03-three/verbs/api/adverbs.js:
--------------------------------------------------------------------------------
 1 | // split adverbs as before/after the root
 2 | const getAdverbs = function (vb, root) {
 3 |   let res = {
 4 |     pre: vb.none(),
 5 |     post: vb.none(),
 6 |   }
 7 |   if (!vb.has('#Adverb')) {
 8 |     return res
 9 |   }
10 |   // pivot on the main verb
11 |   let parts = vb.splitOn(root)
12 |   if (parts.length === 3) {
13 |     return {
14 |       pre: parts.eq(0).adverbs(),
15 |       post: parts.eq(2).adverbs(),
16 |     }
17 |   }
18 |   // it must be the second one
19 |   if (parts.eq(0).isDoc(root)) {
20 |     res.post = parts.eq(1).adverbs()
21 |     return res
22 |   }
23 |   res.pre = parts.eq(0).adverbs()
24 |   return res
25 | }
26 | export default getAdverbs
27 | 


--------------------------------------------------------------------------------
/scripts/cleanup.js:
--------------------------------------------------------------------------------
 1 | import keep from '../data/lexicon/nouns/nouns.js'
 2 | import og from '../data/lexicon/data/neutralNouns.js'
 3 | 
 4 | // import messy from '../data/lexicon/verbs.js'
 5 | // const unique = function (arr) {
 6 | //   let obj = {}
 7 | //   for (let i = 0; i < arr.length; i += 1) {
 8 | //     obj[arr[i]] = true
 9 | //   }
10 | //   return Object.keys(obj)
11 | // }
12 | 
13 | // console.log(JSON.stringify(unique(messy), null, 2))
14 | 
15 | 
16 | let loose = og.filter(str => {
17 |   let found = keep.find(s => s === str)
18 |   if (found) {
19 |     console.log(str)
20 |     return false
21 |   }
22 |   return true
23 | })
24 | 
25 | console.log(og.length)
26 | console.log(loose.length)
27 | // console.log(JSON.stringify(loose, null, 2))


--------------------------------------------------------------------------------
/src/02-two/tagset/tags/values.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   Value: {
 3 |     not: ['Verb', 'Adjective', 'Adverb'],
 4 |   },
 5 |   Ordinal: {
 6 |     is: 'Value',
 7 |     not: ['Cardinal'],
 8 |   },
 9 |   Cardinal: {
10 |     is: 'Value',
11 |     not: ['Ordinal'],
12 |   },
13 |   Fraction: {
14 |     is: 'Value',
15 |     not: ['Noun'],
16 |   },
17 |   Multiple: {
18 |     is: 'TextValue',
19 |   },
20 |   RomanNumeral: {
21 |     is: 'Cardinal',
22 |     not: ['TextValue'],
23 |   },
24 |   TextValue: {
25 |     is: 'Value',
26 |     not: ['NumericValue'],
27 |   },
28 |   NumericValue: {
29 |     is: 'Value',
30 |     not: ['TextValue'],
31 |   },
32 |   Money: {
33 |     is: 'Cardinal',
34 |   },
35 |   Percent: {
36 |     is: 'Value',
37 |   },
38 | }
39 | 


--------------------------------------------------------------------------------
/learn/wikinews/packSuffixes.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const suff = require('../../src/tagger/data/suffixMap.js')
 3 | 
 4 | // find any long suffixes that are covered by shorter ones
 5 | const twos = suff[5]
 6 | const twoWords = Object.keys(twos)
 7 | let count = 0
 8 | 
 9 | for (let i = 6; i <= 6; i += 1) {
10 |   twoWords.forEach((ending) => {
11 |     let testWords = Object.keys(suff[i])
12 |     testWords.forEach((w) => {
13 |       if (w.endsWith(ending)) {
14 |         if (twos[ending] === suff[i][w]) {
15 |           count += 1
16 |           console.log('kill:', w, `(${ending})`)
17 |           delete suff[i][w]
18 |         }
19 |       }
20 |     })
21 |   })
22 | }
23 | 
24 | // console.log(count)
25 | console.log(JSON.stringify(suff, null, 2))
26 | 


--------------------------------------------------------------------------------
/learn/giga/french.js:
--------------------------------------------------------------------------------
 1 | import { streamXml } from './_giga.js'
 2 | const gigaFr = '/Users/spencer/data/opus/fr/giga-fren/xml/fr/giga-fren.release2.fixed.'
 3 | 
 4 | // kick them off
 5 | const parseXml = function (id, doBoth) {
 6 |   const parseFR = function (item) {
 7 |     try {
 8 |       doBoth({ fr: item.w || [] })
 9 |       return true
10 |     } catch (e) {
11 |       console.log(e)
12 |     }
13 |   }
14 |   return new Promise((resolve, reject) => {
15 | 
16 |     const doneMaybe = function () {
17 |       console.log('--done-- ')
18 |       resolve()
19 |     }
20 | 
21 |     try {
22 |       streamXml(gigaFr + `${id}.xml`, parseFR, doneMaybe)
23 |     } catch (e) {
24 |       console.log(e)
25 |       reject(e)
26 |     }
27 |   })
28 | }
29 | 
30 | export default parseXml


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/noun-gender.js:
--------------------------------------------------------------------------------
 1 | // guess a gender for each noun
 2 | const nounGender = function (terms, i, world) {
 3 |   let setTag = world.methods.one.setTag
 4 |   const guessGender = world.methods.one.guessGender
 5 |   let { tags } = terms[i]
 6 |   if (tags.has('Noun') && !tags.has('MaleNoun') && !tags.has('FemaleNoun')) {
 7 |     let term = terms[i]
 8 |     // should these have genders?
 9 |     if (tags.has('ProperNoun') || tags.has('Pronoun') || tags.has('Possessive')) {
10 |       return null
11 |     }
12 |     // look for 'le', look for suffix
13 |     let found = guessGender(terms, i)
14 |     if (found) {
15 |       return setTag([term], found, world, false, '3-noun-gender')
16 |     }
17 |   }
18 |   return null
19 | }
20 | export default nounGender


--------------------------------------------------------------------------------
/plugins/dates/rollup.config.js:
--------------------------------------------------------------------------------
 1 | import terser from '@rollup/plugin-terser'
 2 | import { nodeResolve } from '@rollup/plugin-node-resolve'
 3 | 
 4 | const opts = { keep_classnames: true, module: true }
 5 | 
 6 | export default [
 7 |   {
 8 |     input: 'src/plugin.js',
 9 |     output: [{ file: 'builds/fr-compromise-dates.cjs', format: 'umd', name: 'frCompromiseDates' }],
10 |     plugins: [nodeResolve()],
11 |   },
12 |   {
13 |     input: 'src/plugin.js',
14 |     output: [{ file: 'builds/fr-compromise-dates.min.js', format: 'umd', name: 'frCompromiseDates' }],
15 |     plugins: [nodeResolve(), terser(opts)],
16 |   },
17 |   {
18 |     input: 'src/plugin.js',
19 |     output: [{ file: 'builds/fr-compromise-dates.mjs', format: 'esm' }],
20 |     plugins: [nodeResolve(), terser(opts)],
21 |   }
22 | ]
23 | 


--------------------------------------------------------------------------------
/rollup.config.js:
--------------------------------------------------------------------------------
 1 | import terser from '@rollup/plugin-terser'
 2 | import { nodeResolve } from '@rollup/plugin-node-resolve'
 3 | 
 4 | const opts = {
 5 |   keep_classnames: true,
 6 |   module: true,
 7 | }
 8 | 
 9 | export default [
10 |   // === Main ==
11 |   {
12 |     input: 'src/index.js',
13 |     output: [{ file: 'builds/fr-compromise.cjs', format: 'umd', name: 'frCompromise' }],
14 |     plugins: [nodeResolve()],
15 |   },
16 |   {
17 |     input: 'src/index.js',
18 |     output: [{ file: 'builds/fr-compromise.min.js', format: 'umd', name: 'frCompromise' }],
19 |     plugins: [nodeResolve(), terser(opts)],
20 |   },
21 |   {
22 |     input: 'src/index.js',
23 |     output: [{ file: 'builds/fr-compromise.mjs', format: 'esm' }],
24 |     plugins: [nodeResolve(), terser(opts)],
25 |   }
26 | 
27 | ]
28 | 


--------------------------------------------------------------------------------
/learn/wikinews/parse.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | 
 3 | let lines = fs
 4 |   .readFileSync(__dirname + '/wikinews.txt')
 5 |   .toString()
 6 |   .split(/\n/)
 7 | 
 8 | // lines = lines.slice(0, 100)
 9 | 
10 | const mapping = {
11 |   NPP: 'N',
12 |   NC: 'N',
13 |   U: 'N',
14 |   ET: 'N',
15 | 
16 |   VINF: 'V',
17 |   VS: 'V',
18 |   VPP: 'PastTense',
19 |   VPR: 'Gerund',
20 | }
21 | 
22 | lines = lines.map((str) => {
23 |   let words = str.split(/ /g)
24 |   words = words.map((w) => {
25 |     let arr = w.split(/_/)
26 |     let tag = (arr[1] || '').trim()
27 |     tag = mapping[tag] || tag
28 |     return {
29 |       word: arr[0].trim(),
30 |       tag: tag,
31 |     }
32 |   })
33 |   words = words.filter((w) => w.tag && w.word && w.tag !== 'PONCT')
34 |   return words
35 | })
36 | module.exports = lines
37 | 


--------------------------------------------------------------------------------
/src/01-one/tokenize/compute/machine.js:
--------------------------------------------------------------------------------
 1 | const hasDash = /^\p{Letter}+-\p{Letter}+$/u
 2 | // 'machine' is a normalized form that looses human-readability
 3 | const doMachine = function (term) {
 4 |   let str = term.implicit || term.normal || term.text
 5 |   // remove apostrophes
 6 |   str = str.replace(/['’]s$/, '')
 7 |   str = str.replace(/s['’]$/, 's')
 8 |   //lookin'->looking (make it easier for conjugation)
 9 |   str = str.replace(/([aeiou][ktrp])in'$/, '$1ing')
10 |   //turn re-enactment to reenactment
11 |   if (hasDash.test(str)) {
12 |     str = str.replace(/-/g, '')
13 |   }
14 |   // remove accented chars
15 |   // str = str.replace(/è/g, 'e')
16 |   //#tags, @mentions
17 |   str = str.replace(/^[#@]/, '')
18 |   if (str !== term.normal) {
19 |     term.machine = str
20 |   }
21 | }
22 | export default doMachine
23 | 


--------------------------------------------------------------------------------
/.esformatter:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugins": [
 3 |     "esformatter-quotes",
 4 |     "esformatter-parseint",
 5 |     "esformatter-braces",
 6 |     "esformatter-semicolons"
 7 |   ],
 8 |   "quotes": {
 9 |     "type": "single",
10 |     "avoidEscape": false
11 |   },
12 |   "whiteSpace": {
13 |     "before": {
14 |       "ParameterList": -1,
15 |       "ParameterComma": -1,
16 |       "FunctionDeclarationOpeningBrace": -1,
17 |       "FunctionDeclarationClosingBrace": -1,
18 |       "ForStatementExpressionOpening": -1
19 |     },
20 |     "after": {
21 |       "FunctionName": -1,
22 |       "ParameterComma": 1,
23 |       "FunctionReservedWord": -1,
24 |       "ParameterList": -1,
25 |       "FunctionDeclarationOpeningBrace": -1,
26 |       "PropertyName": -1
27 |     }
28 |   },
29 |   "lineBreak": {
30 |     "before": {
31 |       "EndOfFile": 1
32 |     }
33 |   }
34 | }


--------------------------------------------------------------------------------
/learn/nouns/learn.js:
--------------------------------------------------------------------------------
 1 | const data = require('./data')
 2 | // const toFemme = require('../../src/transforms/nouns/toFemme.js')
 3 | const toMasc = require('../../src/transforms/nouns/toMasc.js')
 4 | const toSigular = require('../../src/transforms/nouns/toSingular.js')
 5 | 
 6 | const toRoot = function (str) {
 7 |   str = toSigular(str)
 8 |   str = toMasc(str)
 9 |   return str
10 | }
11 | 
12 | const irregs = {}
13 | let count = 0
14 | data.forEach((a) => {
15 |   let from = a[3]
16 |   let want = a[0]
17 |   let w = toRoot(from)
18 |   if (w === want) {
19 |     count += 1
20 |   } else {
21 |     // if (from.endsWith('eur')) {
22 |     irregs[from] = want
23 |     console.log(from + ' ➔ ' + w + '  (' + want + ')')
24 |     // }
25 |   }
26 | })
27 | console.log(count)
28 | console.log(count / data.length)
29 | // console.log(JSON.stringify(irregs, null, 2))
30 | 


--------------------------------------------------------------------------------
/plugins/dates/index.d.ts:
--------------------------------------------------------------------------------
 1 | import nlp from 'compromise'
 2 | type View = ReturnType<typeof nlp>
 3 | 
 4 | interface DateView extends View {
 5 |   /** convert parsed dates to a date format */
 6 |   format(fmt: string): View
 7 |   /** get parsed date metadata */
 8 |   get(): object[]
 9 | }
10 | 
11 | interface TimeView extends View {
12 |   /** convert parsed dates to a time format */
13 |   format(fmt: string): View
14 |   /** get parsed time metadata */
15 |   get(): object[]
16 | }
17 | 
18 | export interface DatesMethods {
19 |   /** match all date-phrases */
20 |   dates(): DateView
21 |   /** match time-of-day phrases */
22 |   times(): TimeView
23 |   /** match lengths of time, like '2 weeks' */
24 |   durations(): View
25 | }
26 | 
27 | /** extended compromise lib **/
28 | declare const nlpSpeed: nlp.TypedPlugin<DatesMethods>
29 | 
30 | export default nlpSpeed
31 | 


--------------------------------------------------------------------------------
/learn/giga/_giga.js:
--------------------------------------------------------------------------------
 1 | import XmlStream from 'xml-stream'
 2 | import fs from 'fs'
 3 | 
 4 | const streamXml = function (file, cb, end) {
 5 |   const stream = fs.createReadStream(file)
 6 |   const xml = new XmlStream(stream)
 7 |   xml.collect('w')
 8 |   xml.on('endElement: s', function (item) {
 9 |     cb(item, xml)
10 |   })
11 |   xml.on('end', end)
12 | }
13 | 
14 | 
15 | const topk = function (arr) {
16 |   let obj = {}
17 |   arr.forEach(a => {
18 |     obj[a] = obj[a] || 0
19 |     obj[a] += 1
20 |   })
21 |   let res = Object.keys(obj).map(k => [k, obj[k]])
22 |   res = res.sort((a, b) => (a[1] > b[1] ? -1 : 0))
23 |   return res.map(a => a[0])
24 | }
25 | 
26 | async function forEachSync(array, callback) {
27 |   for (let i = 0; i < array.length; i++) {
28 |     await callback(array[i], i, array)
29 |   }
30 | }
31 | 
32 | 
33 | export { streamXml, forEachSync, topk }


--------------------------------------------------------------------------------
/data/lexicon/misc.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   n: 'Negative',
 3 |   ne: 'Negative',
 4 |   ni: 'Negative',
 5 |   aucun: 'Negative',
 6 | 
 7 |   se: 'Auxiliary',
 8 |   te: 'Auxiliary',
 9 |   me: 'Auxiliary',
10 | 
11 |   ai: 'Auxiliary',
12 |   ont: 'Auxiliary',
13 | 
14 |   // questions
15 |   ou: 'Conjunction',
16 |   qui: 'Preposition',
17 |   que: 'Preposition',
18 |   a: 'Preposition',
19 |   ces: 'Determiner',
20 |   cette: 'Determiner',
21 | 
22 | 
23 |   quelle: 'QuestionWord',
24 |   // que: 'QuestionWord',
25 |   qu: 'QuestionWord',
26 |   quand: 'QuestionWord',
27 | 
28 |   '&': 'Conjunction',
29 | 
30 |   si: 'Condition',
31 |   sinon: 'Condition',
32 |   'aujourd\'hui': 'Noun',
33 | 
34 |   'quelque': 'Adjective',
35 |   'quelques': 'Adjective',
36 | 
37 |   // alt verbs
38 |   'essaie': 'PresentTense',
39 |   'essaies': 'PresentTense',
40 |   'essaient': 'PresentTense'
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/src/02-two/tagset/tags/dates.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   Date: {
 3 |     not: ['Verb', 'Adverb', 'Adjective'],
 4 |   },
 5 |   Month: {
 6 |     is: 'Singular',
 7 |     also: ['Date'],
 8 |     not: ['Year', 'WeekDay', 'Time'],
 9 |   },
10 |   WeekDay: {
11 |     is: 'Noun',
12 |     also: ['Date'],
13 |   },
14 |   Year: {
15 |     is: 'Date',
16 |     not: ['RomanNumeral'],
17 |   },
18 |   FinancialQuarter: {
19 |     is: 'Date',
20 |     not: 'Fraction',
21 |   },
22 |   // 'easter'
23 |   Holiday: {
24 |     is: 'Date',
25 |     also: ['Noun'],
26 |   },
27 |   // 'summer'
28 |   Season: {
29 |     is: 'Date',
30 |   },
31 |   Timezone: {
32 |     is: 'Noun',
33 |     also: ['Date'],
34 |     not: ['ProperNoun'],
35 |   },
36 |   Time: {
37 |     is: 'Date',
38 |     not: ['AtMention'],
39 |   },
40 |   // 'months'
41 |   Duration: {
42 |     is: 'Noun',
43 |     also: ['Date'],
44 |   },
45 | }
46 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/model/regex/regex-normal.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   //web tags
 3 |   [/^[\w.]+@[\w.]+\.[a-z]{2,3}$/, 'Email'],
 4 |   [/^(https?:\/\/|www\.)+\w+\.[a-z]{2,3}/, 'Url', 'http..'],
 5 |   [/^[a-z0-9./].+\.(com|net|gov|org|ly|edu|info|biz|dev|ru|jp|de|in|uk|br|io|ai)/, 'Url', '.com'],
 6 | 
 7 |   // timezones
 8 |   [/^[PMCE]ST$/, 'Timezone', 'EST'],
 9 | 
10 |   //names
11 |   [/^ma?c'.*/, 'LastName', "mc'neil"],
12 |   [/^o'[drlkn].*/, 'LastName', "o'connor"],
13 |   [/^ma?cd[aeiou]/, 'LastName', 'mcdonald'],
14 | 
15 |   //slang things
16 |   [/^(lol)+[sz]$/, 'Expression', 'lol'],
17 |   [/^wo{2,}a*h?$/, 'Expression', 'wooah'],
18 |   [/^(hee?){2,}h?$/, 'Expression', 'hehe'],
19 |   [/^(un|de|re)\\-[a-z\u00C0-\u00FF]{2}/, 'Verb', 'un-vite'],
20 | 
21 |   // m/h
22 |   [/^(m|k|cm|km)\/(s|h|hr)$/, 'Unit', '5 k/m'],
23 |   // μg/g
24 |   [/^(ug|ng|mg)\/(l|m3|ft3)$/, 'Unit', 'ug/L'],
25 | ]
26 | 


--------------------------------------------------------------------------------
/learn/wolf/parse_wolf.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | var parser = require('xml2json');
 3 | let fs = require('fs');
 4 | 
 5 | let xml = fs.readFileSync(__dirname + '/wolf-1.0b4.xml', 'utf8');
 6 | // let xml = fs.readFileSync(__dirname + '/tiny.xml', 'utf8');
 7 | // xml to json
 8 | // var xml = '<foo>bar</foo>';
 9 | var json = JSON.parse(parser.toJson(xml));
10 | 
11 | let words = [];
12 | 
13 | let len = json.WN.SYNSET.length;
14 | for (var i = 0; i < len; i++) {
15 |   if (json.WN.SYNSET[i].SYNONYM.LITERAL !== '_EMPTY_') {
16 |     if (json.WN.SYNSET[i].POS !== 'n') {
17 |       continue;
18 |     }
19 |     let str = json.WN.SYNSET[i].SYNONYM.LITERAL['$t'];
20 |     if (str) {
21 |       words.push(str);
22 |     } else {
23 |       json.WN.SYNSET[i].SYNONYM.LITERAL.forEach(function(o) {
24 |         words.push(o['$t']);
25 |       });
26 |     }
27 |   }
28 | }
29 | 
30 | console.log(JSON.stringify(words, null, 2));
31 | 


--------------------------------------------------------------------------------
/data/lexicon/people/honorifics.js:
--------------------------------------------------------------------------------
 1 | //extend to person-names if infront of a name - 'Professor Frink'
 2 | export default [
 3 |   'admiral',
 4 |   'ayatullah',
 5 |   'brigadier',
 6 |   'captain',
 7 |   'captain',
 8 |   'chancellor',
 9 |   'colonel',
10 |   'commander',
11 |   'congressman',
12 |   'congresswoman',
13 |   'councillor',
14 |   'count',
15 |   'doctor',
16 |   'dutchess',
17 |   'excellency',
18 |   'field marshal',
19 |   'first lady',
20 |   'first lieutenant',
21 |   'judge',
22 |   'king',
23 |   'lieutenant',
24 |   'magistrate',
25 |   'marshal',
26 |   'mayor',
27 |   'officer',
28 |   'pastor',
29 |   'president',
30 |   'prime minister',
31 |   'prince',
32 |   'princess',
33 |   'professor',
34 |   'queen',
35 |   'rabbi',
36 |   'rear admiral',
37 |   'reverend',
38 |   'second lieutenant',
39 |   'secretary',
40 |   'sergeant',
41 |   'sultan',
42 |   'taoiseach',
43 |   'vice admiral',
44 | ]
45 | 


--------------------------------------------------------------------------------
/src/03-three/adjectives/api.js:
--------------------------------------------------------------------------------
 1 | export const getNth = (doc, n) => (typeof n === 'number' ? doc.eq(n) : doc)
 2 | 
 3 | // get root form of adjective
 4 | const getRoot = function (m) {
 5 |   m.compute('root')
 6 |   let str = m.text('root')
 7 |   return str
 8 | }
 9 | 
10 | const api = function (View) {
11 |   class Adjectives extends View {
12 |     constructor(document, pointer, groups) {
13 |       super(document, pointer, groups)
14 |       this.viewType = 'Adjectives'
15 |     }
16 |     conjugate(n) {
17 |       const methods = this.methods.two.transform.adjective
18 |       return getNth(this, n).map(m => {
19 |         let adj = getRoot(m)
20 |         return methods.conjugate(adj, methods)
21 |       }, [])
22 |     }
23 |   }
24 | 
25 |   View.prototype.adjectives = function (n) {
26 |     let m = this.match('#Adjective')
27 |     m = getNth(m, n)
28 |     return new Adjectives(this.document, m.pointer)
29 |   }
30 | }
31 | export default api


--------------------------------------------------------------------------------
/src/02-two/preTagger/tagRank.js:
--------------------------------------------------------------------------------
 1 | const boringTags = new Set(['Auxiliary', 'Possessive'])
 2 | 
 3 | const sortByKids = function (tags, tagSet) {
 4 |   tags = tags.sort((a, b) => {
 5 |     // (unknown tags are interesting)
 6 |     if (boringTags.has(a) || !tagSet.hasOwnProperty(b)) {
 7 |       return 1
 8 |     }
 9 |     if (boringTags.has(b) || !tagSet.hasOwnProperty(a)) {
10 |       return -1
11 |     }
12 |     let kids = tagSet[a].children || []
13 |     let aKids = kids.length
14 |     kids = tagSet[b].children || []
15 |     let bKids = kids.length
16 |     return aKids - bKids
17 |   })
18 |   return tags
19 | }
20 | 
21 | const tagRank = function (view) {
22 |   const { document, world } = view
23 |   const tagSet = world.model.one.tagSet
24 |   document.forEach(terms => {
25 |     terms.forEach(term => {
26 |       let tags = Array.from(term.tags)
27 |       term.tagRank = sortByKids(tags, tagSet)
28 |     })
29 |   })
30 | }
31 | export default tagRank
32 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
 1 | ### 0.2.8 [Aug 2023]
 2 | 
 3 | - **[fix]** - conjugtion issues
 4 | - **[update]** - dependences
 5 | 
 6 | ### 0.2.7 [May 2023]
 7 | 
 8 | - **[fix]** - tagging
 9 | - **[new]** - `fr-compromise-dates`
10 | 
11 | ### 0.2.6 [Feb 2023]
12 | 
13 | - **[fix]** - support multi-lexicon
14 | - **[fix]** - try new suffix thumb
15 | - **[fix]** - conjugation fixes
16 | 
17 | ### 0.2.0 [Sept 2022]
18 | 
19 | - **[fix]** - inflections+conjugations
20 | - **[new]** - start of verb, noun, and adjective methods
21 | 
22 | ### 0.1.2 [August 2022]
23 | 
24 | - **[fix]** - inflections+conjugations
25 | 
26 | ### 0.1.1 [July 2022]
27 | 
28 | - **[fix]** - import format
29 | - **[new]** - typescript types
30 | 
31 | ### 0.1.0 [June 2022]
32 | 
33 | - **[new]** - `.compute('root')`
34 | - **[new]** - number-parsing
35 | 
36 | ### 0.0.2 [June 2022]
37 | 
38 | - **[new]** - support root matches
39 | - **[new]** - `.compute('root')`
40 | - **[new]** - FirstPerson, SecondPerson tags etc.
41 | 


--------------------------------------------------------------------------------
/src/01-one/tokenize/contractions.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   { word: "qu'il", out: ['que', 'il'] },
 3 |   { word: "n'y", out: ['ne', 'a'] },
 4 |   { word: "n'est", out: ['ne', 'est'] },
 5 |   { word: 'aux', out: ['à', 'les'] },
 6 |   { word: 'au', out: ['à', 'le'] },
 7 |   { before: 'm', out: ['me'] },
 8 |   { before: 's', out: ['se'] },
 9 |   { before: 't', out: ['tu'] },
10 |   { before: 'n', out: ['ne'] },
11 |   { before: 'qu', out: ['que'] },//tant qu'étudiant
12 |   { before: 'puisqu', out: ['puisque'] },
13 |   { before: 'lorsqu', out: ['lorsque'] },//lorsqu’il
14 |   { before: 'jusqu', out: ['jusque'] },//jusqu'en
15 |   { before: 'quelqu', out: ['quelque'] },//Quelqu'un
16 | 
17 |   { word: 'auquel', out: ['à', 'lequel'] },
18 |   { word: 'auxquels', out: ['à', 'lesquels'] },
19 |   { word: 'auxquelles', out: ['à', 'lesquelles'] },
20 |   { word: 'duquel', out: ['de', 'lequel'] },
21 |   { word: 'desquels', out: ['de', 'lesquels'] },
22 |   { word: 'desquelles', out: ['de', 'lesquelles'] },
23 | ]


--------------------------------------------------------------------------------
/learn/wiktionary/index.js:
--------------------------------------------------------------------------------
 1 | // import wtf from 'wtf_wikipedia'
 2 | import rp from 'request-promise';
 3 | import $ from 'cheerio';
 4 | import list from './list.js'
 5 | 
 6 | 
 7 | const doit = async function (word) {
 8 |   const url = `https://fr.wiktionary.org/wiki/${encodeURIComponent(word)}`;
 9 |   return rp(url)
10 |     .then(function (html) {
11 |       //success!
12 |       let all = []
13 |       let r = $('.flextable-fr-mfsp :first a ', html)
14 |       r.each(function (i, o) {
15 |         let str = $(this).text()
16 |         if (!str.match(/^\\/)) {
17 |           all.push(str)
18 |         }
19 |       })
20 |       return all
21 |     })
22 |     .catch(function (err) {
23 |       console.log('error')
24 |     });
25 | 
26 | }
27 | 
28 |   ; (async () => {
29 |     let all = {}
30 | 
31 |     let keys = Object.keys(list)
32 |     for (let i = 0; i < keys.length; i += 1) {
33 | 
34 |       let w = keys[i]
35 |       all[w] = await doit(w)
36 |     }
37 |     console.log(JSON.stringify(all, null, 2))
38 | 
39 |   })()


--------------------------------------------------------------------------------
/src/03-three/topics/api.js:
--------------------------------------------------------------------------------
 1 | const findPeople = function () {
 2 |   let m = this.match('#Honorific+? #Person+')
 3 |   return m
 4 | }
 5 | 
 6 | const findOrgs = function () {
 7 |   return this.match('#Organization+')
 8 | }
 9 | 
10 | const findPlaces = function () {
11 |   let m = this.match('(#Place|#Address)+')
12 | 
13 |   // split all commas except for 'paris, france'
14 |   let splits = m.match('@hasComma')
15 |   splits = splits.filter(c => {
16 |     // split 'europe, china'
17 |     if (c.has('(asia|africa|europe|america)$')) {
18 |       return true
19 |     }
20 |     // don't split 'paris, france'
21 |     if (c.has('(#City|#Region|#ProperNoun)$') && c.after('^(#Country|#Region)').found) {
22 |       return false
23 |     }
24 |     return true
25 |   })
26 |   m = m.splitAfter(splits)
27 |   return m
28 | }
29 | 
30 | const api = function (View) {
31 |   View.prototype.people = findPeople
32 |   View.prototype.organizations = findOrgs
33 |   View.prototype.places = findPlaces
34 | }
35 | 
36 | export default api
37 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/noun-plurals.js:
--------------------------------------------------------------------------------
 1 | const exceptions = new Set([
 2 |   'bras',
 3 |   'bus',
 4 |   'corps',
 5 |   'discours',
 6 |   'fils',
 7 |   'héros',
 8 |   'os',
 9 |   'pays',
10 |   'procès',
11 |   'poids',
12 |   'repas',
13 |   'sens',
14 |   'succès',
15 | ])
16 | // guess a plural/singular tag each noun
17 | const nounPlurals = function (terms, i, world) {
18 |   let setTag = world.methods.one.setTag
19 |   let term = terms[i]
20 |   let tags = term.tags
21 |   let str = term.implicit || term.normal || term.text || ''
22 |   if (tags.has('Noun')) {
23 |     if (tags.has('Pronoun') || tags.has('ProperNoun') || tags.has('Uncountable') || tags.has('Date')) {
24 |       return null
25 |     }
26 |     if (exceptions.has(str)) {
27 |       return setTag([term], 'Singular', world, false, '3-plural-guess')
28 |     }
29 |     if (str.endsWith('s') && !str.endsWith('is')) {
30 |       return setTag([term], 'PluralNoun', world, false, '3-plural-guess')
31 |     }
32 |   }
33 |   return null
34 | }
35 | export default nounPlurals


--------------------------------------------------------------------------------
/data/lexicon/misc/prepositions.js:
--------------------------------------------------------------------------------
 1 | // these need some work
 2 | export default [
 3 |   'lorsque',
 4 |   'puisque',
 5 |   'lorsqu',
 6 |   'puisqu',
 7 |   'quoiqu',
 8 |   'pourquoi',
 9 |   'quelqu',
10 |   'quoique',
11 | 
12 |   'y',// -?
13 | 
14 |   'de', 'du', 'des',
15 |   'a',
16 |   'd',
17 |   'en',
18 |   'dans',
19 |   'pour',
20 |   'par',
21 |   'sur',
22 |   'avec',
23 |   'apres',
24 |   'selon',
25 |   'depuis',
26 |   'contre',
27 |   'entre',
28 |   'comme',
29 |   'avant',
30 |   'sans',
31 |   'devant',
32 |   'sous',
33 |   'vers',
34 |   'pendant',
35 |   'afin',
36 |   'des',
37 |   'durant',
38 |   'parmi',
39 |   'pres',
40 |   'malgre',
41 |   'chez',
42 |   'aupres',
43 |   "jusqu'",
44 |   'concernant',
45 |   'a',
46 |   'à',
47 |   'derriere',
48 |   'hors',
49 |   'outre',
50 |   'envers',
51 |   'sauf',
52 |   'via',
53 |   'jusque',
54 |   'suivant',
55 |   'hormis',
56 |   'environ',
57 |   'par dessus',
58 |   'excepte',
59 |   "quelqu'",
60 |   'because',
61 |   'grace',
62 |   'courant',
63 |   'au dessus',
64 |   'voici',
65 | ]
66 | 


--------------------------------------------------------------------------------
/learn/verbs/old.js:
--------------------------------------------------------------------------------
 1 | import verbs from './data.js'
 2 | import { learn, test, validate, compress } from 'suffix-thumb'
 3 | const hasPipe = /[\|\[]/
 4 | 
 5 | let index = {
 6 |   'je': 0, // "achète",
 7 |   'tu': 1, // "achètes",
 8 |   'il': 2, // "achète",
 9 |   'nous': 3, // "achetons",
10 |   'vous': 4, // "achetez",
11 |   'ils': 5, // "achètent"
12 | }
13 | 
14 | const doModel = function (tense, form) {
15 |   let pairs = []
16 |   const i = index[form]
17 |   Object.keys(verbs).forEach(inf => {
18 |     let want = verbs[inf][tense][i]
19 |     if (want && !hasPipe.test(want)) {
20 |       pairs.push([inf, want])
21 |     }
22 |   })
23 |   pairs = validate(pairs)
24 |   // test(pairs)
25 |   const model = learn(pairs)
26 |   return model
27 | }
28 | 
29 | 
30 | let tense = "Présent"
31 | const models = {
32 |   je: doModel(tense, 'je'),
33 |   tu: doModel(tense, 'tu'),
34 |   il: doModel(tense, 'il'),
35 |   nous: doModel(tense, 'nous'),
36 |   vous: doModel(tense, 'vous'),
37 |   ils: doModel(tense, 'ils'),
38 | }
39 | 
40 | // let model = doModel("Présent", 'je')
41 | // model = compress(model)
42 | console.log(JSON.stringify(models, null, 2))
43 | 


--------------------------------------------------------------------------------
/data/lexicon/numbers/units.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   '°c',
 3 |   'celsius',
 4 |   '°f',
 5 |   'fahrenheit',
 6 |   'fahrenheits',
 7 |   'kelvin',
 8 |   'kelvins',
 9 |   '°n',
10 |   'm³',
11 | 
12 |   'hertz',
13 |   'km/h',
14 |   'byte',
15 |   'bytes',
16 |   // 'kb',
17 |   'kilobyte',
18 |   'kilobytes',
19 |   // 'mb',
20 |   'megabyte',
21 |   'megabytes',
22 |   // 'gb',
23 |   'gigabyte',
24 |   'gigabytes',
25 |   // 'tb',
26 |   'terabyte',
27 |   'terabytes',
28 |   'petabyte',
29 |   'petabytes',
30 |   'eb',
31 |   'exabyte',
32 |   'exabytes',
33 |   'zb',
34 |   'zettabyte',
35 |   'zettabytes',
36 |   'yb',
37 |   'yottabyte',
38 |   'yottabytes',
39 |   'joule',
40 |   'joules',
41 | 
42 |   'µs',
43 | 
44 |   'percent',
45 | 
46 | 
47 |   'gramme',
48 |   'grammes',
49 |   'kilogramme',
50 |   'kilogrammes',
51 |   'kilo',
52 |   'kilos',
53 |   'litre',
54 |   'litres',
55 |   'millilitre',
56 |   'millilitres',
57 |   'centimètre',
58 |   'centimètres',
59 |   'mètre',
60 |   'mètres',
61 |   'kilomètre',
62 |   'km',
63 |   'kms',
64 |   // pied
65 |   'pouce',
66 |   'pouces',
67 |   'mile',
68 |   'miles'
69 |   // livre
70 | ]
71 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2019 Spencer Kelly
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/date/data.js:
--------------------------------------------------------------------------------
 1 | const months = {
 2 |   'janvier': 1, // January
 3 |   'février': 2, // February
 4 |   'fevrier': 2, // February
 5 |   'mars': 3, // March
 6 |   'avril': 4, // April
 7 |   'mai': 5, // May
 8 |   'juin': 6, // June
 9 |   'juillet': 7, // July
10 |   'aout': 8, // August
11 |   'septembre': 9, //September
12 |   'octobre': 10, // October
13 |   'novembre': 11, // November
14 |   'décembre': 12, // December
15 |   'decembre': 12, // December
16 |   'jan': 1,
17 |   'fév': 2,
18 |   'fev': 2,
19 |   'mar': 3,
20 |   'avr': 4,
21 |   'aou': 8,
22 |   'sep': 9,
23 |   'sept': 9, //hmm
24 |   'oct': 10,
25 |   'nov': 11,
26 |   'déc': 12,
27 |   'janv': 1,
28 |   'févr': 2,
29 |   'fevr': 2,
30 |   'juil': 7,
31 |   'juill': 7,
32 | }
33 | 
34 | const days = {
35 |   'lundi': 1, // Monday
36 |   'mardi': 2, // Tuesday
37 |   'mercredi': 3, // Wednesday
38 |   'jeudi': 4, // Thursday
39 |   'vendredi': 5, // Friday
40 |   'samedi': 6, // Saturday
41 |   'dimanche': 0, // Sunday
42 |   'lun': 1,
43 |   'mar': 2,
44 |   'mer': 3,
45 |   'jeu': 4,
46 |   'ven': 5,
47 |   'sam': 6,
48 |   'dim': 0,
49 | }
50 | export { months, days }


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/2nd-pass/neighbours.js:
--------------------------------------------------------------------------------
 1 | const hasBefore = {
 2 |   la: 'FemaleNoun',
 3 |   une: 'FemaleNoun',
 4 |   un: 'MaleNoun',
 5 |   du: 'MaleNoun',
 6 |   au: 'MaleNoun',
 7 |   des: 'PluralNoun',
 8 |   aux: 'PluralNoun',
 9 |   de: 'Noun',
10 |   // modals
11 |   dois: 'Verb',
12 |   doit: 'Verb',
13 |   devons: 'Verb',
14 |   devez: 'Verb',
15 |   doivent: 'Verb',
16 | 
17 |   peux: 'Verb',
18 |   peut: 'Verb',
19 |   pouvons: 'Verb',
20 |   pouvez: 'Verb',
21 |   peuvent: 'Verb',
22 |   // (conditional)
23 |   pouvait: 'Verb',
24 |   pourrait: 'Verb',
25 |   pourrais: 'Verb',
26 |   pourrions: 'Verb',
27 |   pourriez: 'Verb',
28 |   pourraient: 'Verb',
29 | 
30 |   // 
31 |   avoir: 'Noun',
32 |   pas: 'Verb' //maybe
33 | }
34 | 
35 | const tagNeighbours = function (terms, i, world) {
36 |   let setTag = world.methods.one.setTag
37 |   if (terms[i - 1]) {
38 |     let lastStr = terms[i - 1].normal
39 |     if (terms[i].tags.size === 0 && hasBefore.hasOwnProperty(lastStr)) {
40 |       setTag([terms[i]], hasBefore[lastStr], world, false, 'neighbour')
41 |       return true
42 |     }
43 |   }
44 |   return null
45 | }
46 | export default tagNeighbours


--------------------------------------------------------------------------------
/plugins/dates/src/api.js:
--------------------------------------------------------------------------------
 1 | import find from './find.js'
 2 | import parse from './phrase/index.js'
 3 | import spacetime from 'spacetime'
 4 | import toJson from './toJson.js'
 5 | 
 6 | export const getNth = (doc, n) => (typeof n === 'number' ? doc.eq(n) : doc)
 7 | 
 8 | 
 9 | const api = function (View) {
10 |   class Dates extends View {
11 |     constructor(document, pointer, groups, opts = {}) {
12 |       super(document, pointer, groups)
13 |       this.viewType = 'Dates'
14 |       this.opts = opts || {}
15 |     }
16 |     parse(n) {
17 |       return getNth(this, n).map(m => toJson(parse(m.this.opts)))
18 |     }
19 |     json(opts, n) {
20 |       let m = getNth(this, n)
21 |       let arr = m.map(vb => {
22 |         let out = vb.toView().json(opts)[0] || {}
23 |         let res = parse(vb, this.opts)
24 |         out.dates = toJson(res)
25 |         return out
26 |       }, [])
27 |       return arr
28 |     }
29 |   }
30 | 
31 |   View.prototype.dates = function (opts = {}) {
32 |     opts.today = spacetime(opts.today, opts.timezone)
33 |     let m = find(this, opts)
34 |     return new Dates(this.document, m.pointer, null, opts)
35 |   }
36 | }
37 | export default api


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/1st-pass/regex.js:
--------------------------------------------------------------------------------
 1 | const hasApostrophe = /['‘’‛‵′`´]/
 2 | 
 3 | // normal regexes
 4 | const doRegs = function (str, regs) {
 5 |   for (let i = 0; i < regs.length; i += 1) {
 6 |     if (regs[i][0].test(str) === true) {
 7 |       return regs[i]
 8 |     }
 9 |   }
10 |   return null
11 | }
12 | 
13 | const checkRegex = function (terms, i, world) {
14 |   let setTag = world.methods.one.setTag
15 |   let term = terms[i]
16 |   let { regexText, regexNormal, regexNumbers } = world.model.two
17 |   let normal = term.machine || term.normal
18 |   let text = term.text
19 |   // keep dangling apostrophe?
20 |   if (hasApostrophe.test(term.post) && !hasApostrophe.test(term.pre)) {
21 |     text += term.post.trim()
22 |   }
23 |   let arr = doRegs(text, regexText) || doRegs(normal, regexNormal)
24 |   // hide a bunch of number regexes behind this one
25 |   if (!arr && /[0-9]/.test(normal)) {
26 |     arr = doRegs(normal, regexNumbers)
27 |   }
28 |   if (arr) {
29 |     setTag([term], arr[1], world, false, `2-regex- '${arr[2] || arr[0]}'`)
30 |     term.confidence = 0.6
31 |     return true
32 |   }
33 |   return null
34 | }
35 | export default checkRegex
36 | 


--------------------------------------------------------------------------------
/src/01-one/lexicon/methods/adjective/index.js:
--------------------------------------------------------------------------------
 1 | import { convert, reverse } from 'suffix-thumb'
 2 | import model from '../model.js'
 3 | 
 4 | let fRev = reverse(model.adjective.female)
 5 | let pRev = reverse(model.adjective.plural)
 6 | let fpRev = reverse(model.adjective.femalePlural)
 7 | 
 8 | const toFemale = (str) => convert(str, model.adjective.female)
 9 | const toPlural = (str) => convert(str, model.adjective.plural)
10 | const toFemalePlural = (str) => convert(str, model.adjective.femalePlural)
11 | const fromFemale = (str) => convert(str, fRev)
12 | const fromPlural = (str) => convert(str, pRev)
13 | const fromFemalePlural = (str) => convert(str, fpRev)
14 | 
15 | const conjugate = function (str) {
16 |   return {
17 |     male: str,
18 |     female: toFemale(str),
19 |     plural: toPlural(str),
20 |     femalePlural: toFemalePlural(str),
21 |   }
22 | }
23 | 
24 | const all = (str) => {
25 |   let arr = Object.values(conjugate(str))
26 |   return arr.filter(s => s)
27 | }
28 | 
29 | export default {
30 |   all,
31 |   conjugate,
32 |   toFemale,
33 |   toPlural,
34 |   toFemalePlural,
35 |   fromFemale,
36 |   fromPlural,
37 |   fromFemalePlural,
38 | }
39 | // console.log(conjugate('frais'))


--------------------------------------------------------------------------------
/data/lexicon/misc/expressions.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'a la',
 3 |   'ah',
 4 |   'ahem',
 5 |   'argh',
 6 |   'bah',
 7 |   'boo',
 8 |   'bye',
 9 |   'dammit',
10 |   'damn',
11 |   'damnit',
12 |   'dang',
13 |   'duh',
14 |   'eek',
15 |   'eep',
16 |   'eh',
17 |   'et cetera',
18 |   'eww',
19 |   'fuck',
20 |   'gah',
21 |   'gee',
22 |   'golly',
23 |   'goodbye',
24 |   'grr',
25 |   'haha',
26 |   'hahaha',
27 |   'hai',
28 |   'hee',
29 |   'hell',
30 |   'hello',
31 |   'hey',
32 |   'hi',
33 |   'hmm',
34 |   'holy moly',
35 |   'holy',
36 |   'hurrah',
37 |   'lmao',
38 |   'lmfao',
39 |   'lol',
40 |   'lols',
41 |   'meh',
42 |   'mmm',
43 |   'nah',
44 |   'nope',
45 |   'oh',
46 |   'ohh',
47 |   'ooh',
48 |   'ooo',
49 |   'oops',
50 |   'ow',
51 |   'oy',
52 |   'pff',
53 |   'phew',
54 |   'please',
55 |   'plz',
56 |   'psst',
57 |   'sheesh',
58 |   'shhh',
59 |   'shit',
60 |   'tsk',
61 |   'ugh',
62 |   'uh huh',
63 |   'uh oh',
64 |   'uh',
65 |   'uhh',
66 |   'uhm',
67 |   'voila',
68 |   'whee',
69 |   'whew',
70 |   'whoa',
71 |   'wow',
72 |   'wtaf',
73 |   'wtf',
74 |   'ya',
75 |   'yaa',
76 |   'yahoo',
77 |   'yay',
78 |   'yeah',
79 |   'yuck',
80 |   'yup',
81 |   "d'oh",
82 | ]
83 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-test.yml:
--------------------------------------------------------------------------------
 1 | name: Build and test
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   build-and-test:
 7 |     runs-on: ${{ matrix.os }}
 8 | 
 9 |     strategy:
10 |       matrix:
11 |         node-version: [14.x, 18.x]
12 |         os: [ubuntu-latest, windows-latest]
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v3
16 | 
17 |       - name: use node.js ${{ matrix.node-version }}
18 |         uses: actions/setup-node@v3
19 |         with:
20 |           node-version: ${{ matrix.node-version }}
21 | 
22 |       - name: cache dependencies
23 |         uses: actions/cache@v3
24 |         with:
25 |           path: ~/.npm
26 |           key: ${{ runner.os }}-npm-${{ matrix.node-version }}-${{ hashFiles('package-lock.json') }}
27 |           restore-keys: |
28 |             ${{ runner.os }}-npm-${{ matrix.node-version }}-
29 |             ${{ runner.os }}-npm-
30 | 
31 |       - name: install
32 |         run: |
33 |           npm ci
34 | 
35 |       - name: static checks
36 |         run: |
37 |           npm run lint
38 | 
39 |       - name: build
40 |         run: |
41 |           npm run build
42 | 
43 |       - name: test
44 |         run: |
45 |           npm run test
46 |           npm run testb
47 | 


--------------------------------------------------------------------------------
/learn/giga/getList.js:
--------------------------------------------------------------------------------
 1 | import { forEachSync } from './_giga.js'
 2 | import doSentences from './french.js'
 3 | import fs from 'fs'
 4 | 
 5 | let ids = []
 6 | for (let i = 1; i <= 10; i += 1) {
 7 |   let str = String(i).padStart(4, '0')
 8 |   ids.push(str)
 9 | }
10 | // ids = ['0004']
11 | 
12 | let list = []
13 | const tag = 'NOM'
14 | 
15 | const doBoth = function (both) {
16 |   let terms = both.fr
17 |   terms.forEach((term, i) => {
18 |     if (i === 0) {
19 |       return
20 |     }
21 |     if (term['$'].pos === tag) {
22 |       let last = terms[i - 1]['$text'].toLowerCase()
23 |       if (last === 'le' || last === 'un') {
24 |         let w = term['$text']
25 |         let inf = term['$'].lem
26 |         // console.log(last, w, inf)
27 |         if (w && inf) {
28 |           w = w.toLowerCase().trim()
29 |           inf = inf.toLowerCase().trim()
30 |           list.push(inf)
31 |         }
32 |       }
33 |     }
34 |   })
35 | }
36 | 
37 | await forEachSync(ids, async id => {
38 |   try {
39 |     console.log(`\ndoing ${id}:\n`)
40 |     await doSentences(id, doBoth)
41 |   } catch (e) {
42 |     console.log(e)
43 |   }
44 | })
45 | console.log('done')
46 | fs.writeFileSync('./pairs.js', 'export default ' + JSON.stringify(list))
47 | 


--------------------------------------------------------------------------------
/src/03-three/verbs/api/parse.js:
--------------------------------------------------------------------------------
 1 | import getAdverbs from './adverbs.js'
 2 | 
 3 | const getAuxiliary = function (vb, root) {
 4 |   let parts = vb.splitBefore(root)
 5 |   if (parts.length <= 1) {
 6 |     return vb.none()
 7 |   }
 8 |   let aux = parts.eq(0)
 9 |   aux = aux.not('(#Adverb|#Negative|#Prefix)')
10 |   return aux
11 | }
12 | 
13 | const getNegative = function (vb) {
14 |   return vb.match('#Negative')
15 | }
16 | 
17 | // pull-apart phrasal-verb into verb-particle
18 | // const getPhrasal = function (root) {
19 | //   let particle = root.match('#Particle$')
20 | //   return {
21 | //     verb: root.not(particle),
22 | //     particle: particle,
23 | //   }
24 | // }
25 | 
26 | const getRoot = function (view) {
27 |   view.compute('root')
28 |   let str = view.text('root')
29 |   return str
30 | }
31 | 
32 | const parseVerb = function (view) {
33 |   let vb = view.clone()
34 |   // vb.contractions().expand()
35 |   const root = getRoot(vb)
36 |   let res = {
37 |     root: root,
38 |     prefix: vb.match('#Prefix'),
39 |     adverbs: getAdverbs(vb, root),
40 |     auxiliary: getAuxiliary(vb, root),
41 |     negative: getNegative(vb),
42 |     // phrasal: getPhrasal(root),
43 |   }
44 |   return res
45 | }
46 | export default parseVerb
47 | 


--------------------------------------------------------------------------------
/learn/verbs/toPairs.js:
--------------------------------------------------------------------------------
 1 | import verbs from './data.js'
 2 | import scraped from '../scrape/result.js'
 3 | 
 4 | import { learn, test, validate, compress } from 'suffix-thumb'
 5 | const hasPipe = /[\|\[]/
 6 | 
 7 | let index = {
 8 |   'je': 0, // "achète",
 9 |   'tu': 1, // "achètes",
10 |   'il': 2, // "achète",
11 |   'nous': 3, // "achetons",
12 |   'vous': 4, // "achetez",
13 |   'ils': 5, // "achètent"
14 | }
15 | 
16 | const getPairs = function (tense) {
17 |   let byWord = {}
18 |   Object.keys(verbs).forEach(inf => {
19 |     let words = verbs[inf][tense] || []
20 |     if (words.some(str => str === '' || str.length === 1)) {
21 |       return
22 |     }
23 |     byWord[inf] = verbs[inf][tense]
24 |   })
25 |   return byWord
26 | }
27 | 
28 | 
29 | 
30 | const res = getPairs("Imparfait")
31 | Object.keys(scraped).forEach(inf => {
32 |   if (res[inf]) {
33 |     return
34 |   }
35 |   let vals = Object.values(scraped[inf]["Imperfect"])
36 |   if (vals.length < 5 || vals.some(str => str === '' || str.length === 1 || str === 'le')) {
37 |     return
38 |   }
39 |   res[inf] = vals
40 | })
41 | 
42 | // let model = doModel("Présent", 'je')
43 | // model = compress(model)
44 | console.log(JSON.stringify(res, null, 2))
45 | console.log(Object.keys(res).length)


--------------------------------------------------------------------------------
/src/03-three/verbs/api/toJSON.js:
--------------------------------------------------------------------------------
 1 | import parseVerb from './parse.js'
 2 | // import getGrammar from './parse/grammar/index.js'
 3 | // import { getTense } from './lib.js'
 4 | 
 5 | const toArray = function (m) {
 6 |   if (!m || !m.isView) {
 7 |     return []
 8 |   }
 9 |   const opts = { normal: true, terms: false, text: false }
10 |   return m.json(opts).map(s => s.normal)
11 | }
12 | 
13 | const toText = function (m) {
14 |   if (!m || !m.isView) {
15 |     return ''
16 |   }
17 |   return m.text('normal')
18 | }
19 | 
20 | // const toInfinitive = function (root) {
21 | //   const { verbToInfinitive } = root.methods.two.transform
22 | //   let str = root.text('normal')
23 | //   return verbToInfinitive(str, root.model, getTense(root))
24 | // }
25 | 
26 | const toJSON = function (vb) {
27 |   let parsed = parseVerb(vb)
28 |   vb = vb.clone().toView()
29 |   // const info = getGrammar(vb, parsed)
30 |   return {
31 |     root: parsed.root,
32 |     preAdverbs: toArray(parsed.adverbs.pre),
33 |     postAdverbs: toArray(parsed.adverbs.post),
34 |     auxiliary: toText(parsed.auxiliary),
35 |     negative: parsed.negative.found,
36 |     prefix: toText(parsed.prefix),
37 |     infinitive: parsed.root,
38 |     // grammar: info,
39 |   }
40 | }
41 | export default toJSON
42 | 


--------------------------------------------------------------------------------
/data/lexicon/places/places.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   //some of the busiest airports in the world from
 3 |   //https://www.world-airport-codes.com/world-top-30-airports.html
 4 |   'ams',
 5 |   'atl',
 6 |   'bcn',
 7 |   'bkk',
 8 |   'cdg',
 9 |   'cgk',
10 |   'clt',
11 |   'den',
12 |   'dfw',
13 |   'dxb',
14 |   'fco',
15 |   'fra',
16 |   'hkg',
17 |   'hnd',
18 |   'iax',
19 |   'icn',
20 |   'ist',
21 |   'jfk',
22 |   'kul',
23 |   'las',
24 |   'lax',
25 |   'lgw',
26 |   'lhr',
27 |   'mco',
28 |   'muc',
29 |   'ord',
30 |   'pek',
31 |   'phl',
32 |   'phx',
33 |   'sfo',
34 |   'syd',
35 |   'yyz',
36 | 
37 |   'antarctic ocean',
38 |   'arctic ocean',
39 |   'atlantic ocean',
40 |   'everglades',
41 |   'great britain',
42 |   'great lakes',
43 |   'indian ocean',
44 |   'new england',
45 |   'pacific ocean',
46 | 
47 |   //continents
48 |   'africa',
49 |   'europe',
50 |   'americas',
51 |   'asia',
52 | 
53 |   //some notable neighbourhoods (just #Place)
54 |   'midtown',
55 |   'downtown',
56 |   'uptown',
57 |   'the bronx',
58 |   'brooklyn',
59 |   'manhattan',
60 |   'greenwich',
61 |   'soho',
62 |   'harlem',
63 |   'chinatown',
64 |   'the hamptons',
65 |   'beverly hills',
66 |   'bel air',
67 |   'malibu',
68 |   'gay village',
69 |   'sunderland',
70 | ]
71 | 


--------------------------------------------------------------------------------
/data/models/index.js:
--------------------------------------------------------------------------------
 1 | import noun from './noun/plurals.js'
 2 | import adjective from './adjective/index.js'
 3 | 
 4 | import futureTense from './verb/future-tense.js'
 5 | import imperfect from './verb/imperfect.js'
 6 | import pastParticiple from './verb/past-participle.js'
 7 | import presentTense from './verb/present-tense.js'
 8 | 
 9 | const vbOrder = ['je', 'tu', 'il', 'nous', 'vous', 'ils']
10 | const nOrder = ['plural']
11 | const adjOrder = ['female', 'plural', 'femalePlural']
12 | const todo = {
13 |   noun: { data: noun, keys: nOrder },
14 |   adjective: { data: adjective, keys: adjOrder },
15 |   futureTense: { data: futureTense, keys: vbOrder },
16 |   imperfect: { data: imperfect, keys: vbOrder },
17 |   pastParticiple: { data: pastParticiple, keys: ['prt'] },
18 |   presentTense: { data: presentTense, keys: vbOrder },
19 | }
20 | 
21 | // turn our conjugation data into word-pairs
22 | let model = {}
23 | Object.keys(todo).forEach(k => {
24 |   model[k] = {}
25 |   let { data, keys } = todo[k]
26 |   keys.forEach((form, i) => {
27 |     let pairs = []
28 |     Object.keys(data).forEach(inf => {
29 |       pairs.push([inf, data[inf][i]])
30 |     })
31 |     model[k][form] = pairs
32 |     // console.log(k, form, pairs.length)
33 |   })
34 | })
35 | 
36 | export default model
37 | 


--------------------------------------------------------------------------------
/data/lexicon/people/people.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   //famous people with names that are hard to recognize independendtly
 3 |   //male
 4 |   'hitler',
 5 |   'ronaldo',
 6 |   'ashton kutcher',
 7 |   'barack obama',
 8 |   'cardinal wolsey',
 9 |   'carson palmer',
10 |   'denzel washington',
11 |   'dick wolf',
12 |   'emeril lagasse',
13 |   'hulk hogan',
14 |   'kanye west',
15 |   'kiefer sutherland',
16 |   'kobe bryant',
17 |   'lebron james',
18 |   'messiaen',
19 |   'mitt romney',
20 |   'mubarek',
21 |   'ray romano',
22 |   'rod stewart',
23 |   'ronaldinho',
24 |   'rush limbaugh',
25 |   'saddam hussain',
26 |   'slobodan milosevic',
27 |   'tiger woods',
28 |   'valentino rossi',
29 |   'van gogh',
30 | 
31 |   //female
32 |   'halle berry',
33 |   'jk rowling',
34 |   'oprah winfrey',
35 |   'paris hilton',
36 |   'reese witherspoon',
37 |   'scarlett johansson',
38 |   'theresa may',
39 |   'tyra banks',
40 |   'virgin mary',
41 | 
42 |   //sometimes firstname, sometimes lastname
43 |   'brock',
44 |   'carson',
45 |   'clinton',
46 |   'cruz',
47 |   'dalton',
48 |   'dante',
49 |   'effie',
50 |   'ezekiel',
51 |   'gaston',
52 |   'inez',
53 |   'jaime',
54 |   'jefferson',
55 |   'lee',
56 |   'nettie',
57 |   'ora',
58 |   'palmer',
59 |   'piper',
60 |   'sung',
61 | ]
62 | 


--------------------------------------------------------------------------------
/learn/wikinews/getLexicon.js:
--------------------------------------------------------------------------------
 1 | let lines = require('./parse')
 2 | // lines = lines.slice(0, 300)
 3 | 
 4 | let tags = {}
 5 | lines.forEach((s) => {
 6 |   s.forEach((w) => {
 7 |     tags[w.tag] = tags[w.tag] || {}
 8 |     let word = w.word.toLowerCase()
 9 |     tags[w.tag][word] = tags[w.tag][word] || 0
10 |     tags[w.tag][word] += 1
11 |   })
12 | })
13 | 
14 | // 'P+D': 241,
15 | // ADJ: 719,
16 | // ADV: 311,
17 | // CC: 172,
18 | // CLO: 32,
19 | // CLR: 53,
20 | // CLS: 88,
21 | // CS: 90,
22 | // DET: 1353,
23 | // ET: 136,
24 | 
25 | // nouns:
26 | // NC: 1877,
27 | // NPP: 493,
28 | // P: 1242,
29 | // PREF: 8,
30 | 
31 | // PRO: 43, //pronoun
32 | // PROREL: 89,  //relative pronoun
33 | // U: 100,
34 | 
35 | // V: 509,
36 | // VINF: 140,
37 | // VPP: 402, //PastTense
38 | // VPR: 61, //Gerund
39 | // VS: 10, //presentTense
40 | 
41 | // VPP: 'PastTense',
42 | // VPR: 'Gerund',
43 | // VS: 'V',
44 | 
45 | const top = function (obj) {
46 |   let keys = Object.keys(obj).sort((a, b) => {
47 |     if (obj[a] > obj[b]) {
48 |       return -1
49 |     } else if (obj[a] < obj[b]) {
50 |       return 1
51 |     }
52 |     return 0
53 |   })
54 |   let arr = keys.filter((k) => {
55 |     return obj[k] > 1
56 |   })
57 |   return arr
58 | }
59 | 
60 | console.log(JSON.stringify(top(tags['ADJ']), null, 2))
61 | 


--------------------------------------------------------------------------------
/learn/verbs/single-pairs.js:
--------------------------------------------------------------------------------
 1 | import verbs from './data.js'
 2 | import scraped from '../scrape/result.js'
 3 | 
 4 | import { learn, test, validate, compress } from 'suffix-thumb'
 5 | const hasPipe = /[\|\[]/
 6 | 
 7 | let index = {
 8 |   'je': 0, // "achète",
 9 |   'tu': 1, // "achètes",
10 |   'il': 2, // "achète",
11 |   'nous': 3, // "achetons",
12 |   'vous': 4, // "achetez",
13 |   'ils': 5, // "achètent"
14 | }
15 | 
16 | const getPairs = function (tense) {
17 |   let byWord = {}
18 |   Object.keys(verbs).forEach(inf => {
19 |     let words = verbs[inf][tense] || []
20 |     if (words.length === 0 || words.some(str => str === '' || str.length === 1)) {
21 |       return
22 |     }
23 |     byWord[inf] = words[0]
24 |   })
25 |   return byWord
26 | }
27 | 
28 | 
29 | 
30 | const res = getPairs("Participe Passé")
31 | Object.keys(scraped).forEach(inf => {
32 |   if (res[inf]) {
33 |     return
34 |   }
35 |   let vals = Object.values(scraped[inf]["Present Perfect"])
36 |   if (vals.length < 5 || vals.some(str => str === '' || str.length === 1 || str === 'le')) {
37 |     return
38 |   }
39 |   res[inf] = vals[0].replace(/^(a|ai) /, '')
40 | })
41 | 
42 | // let model = doModel("Présent", 'je')
43 | // model = compress(model)
44 | console.log(JSON.stringify(res, null, 2))
45 | console.log(Object.keys(res).length)


--------------------------------------------------------------------------------
/src/03-three/verbs/api/find.js:
--------------------------------------------------------------------------------
 1 | const findVerbs = function (doc) {
 2 |   let m = doc.match('<Verb>')
 3 | 
 4 |   m = m.splitAfter('@hasComma')
 5 | 
 6 |   // the reason he will is ...
 7 |   // all i do is talk
 8 |   m = m.splitAfter('[(do|did|am|was|is|will)] (is|was)', 0)
 9 |   // m = m.splitAfter('[(do|did|am|was|is|will)] #PresentTense', 0)
10 | 
11 |   // cool
12 | 
13 |   // like being pampered
14 |   m = m.splitBefore('(#Verb && !#Copula) [being] #Verb', 0)
15 |   // like to be pampered
16 |   m = m.splitBefore('#Verb [to be] #Verb', 0)
17 | 
18 |   // implicit conjugation - 'help fix'
19 | 
20 |   m = m.splitAfter('[help] #PresentTense', 0)
21 |   // what i can sell is..
22 |   m = m.splitBefore('(#PresentTense|#PastTense) [#Copula]$', 0)
23 |   // what i can sell will be
24 |   m = m.splitBefore('(#PresentTense|#PastTense) [will be]$', 0)
25 | 
26 |   // professes love
27 |   let toVerbs = m.match('(#PresentTense|#PastTense) #Infinitive')
28 |   if (toVerbs.found && !toVerbs.has('^go')) {
29 |     m = m.splitBefore('(#PresentTense|#PastTense) [#Infinitive]', 0)
30 |   }
31 |   // 'allow yourself'
32 |   m = m.not('#Reflexive$')
33 |   //ensure there's actually a verb
34 |   m = m.if('#Verb')
35 |   // the reason he will is ...
36 |   // ensure it's not two verbs
37 |   return m
38 | }
39 | export default findVerbs
40 | 


--------------------------------------------------------------------------------
/learn/verbs/learn.js:
--------------------------------------------------------------------------------
 1 | let verbs = require('./data')
 2 | 
 3 | let pairs = []
 4 | Object.keys(verbs).forEach((inf) => {
 5 |   let want = verbs[inf]['Présent'][0]
 6 |   if (want) {
 7 |     pairs.push([inf, want])
 8 |   }
 9 | })
10 | 
11 | // order matters
12 | const regs = [
13 |   [/ébrer$/, 'èbre'],
14 |   [/eter$/, 'ette'],
15 |   [/er$/, 'e'],
16 | 
17 |   [/dre$/, 'ds'],
18 |   [/ure$/, 'us'],
19 |   [/ure$/, 'us'],
20 |   [/tre$/, 's'],
21 |   [/ire$/, 'is'],
22 |   [/ore$/, 'os'],
23 |   [/cre$/, 'cs'],
24 | 
25 |   [/llir$/, 'lle'],
26 |   [/voir$/, 'vois'],
27 |   [/tir$/, 's'],
28 |   [/ir$/, 's'],
29 | ]
30 | 
31 | const toJe = function (str) {
32 |   // try each replacement
33 |   for (let i = 0; i < regs.length; i += 1) {
34 |     let reg = regs[i][0]
35 |     if (str.match(reg)) {
36 |       str = str.replace(reg, regs[i][1])
37 |       // for some reason, this seems to happen
38 |       str = str.replace(/î/, 'i')
39 |       return str
40 |     }
41 |   }
42 |   // otherwise...
43 |   str += 's'
44 |   return str
45 | }
46 | 
47 | let count = 0
48 | pairs.forEach((a) => {
49 |   let je = toJe(a[0])
50 |   if (je === a[1]) {
51 |     count += 1
52 |   } else {
53 |     if (a[0].endsWith('oir')) {
54 |       console.log(`${a[0]}   ~${je}~    want:(${a[1]})`)
55 |     }
56 |   }
57 | })
58 | 
59 | console.log(count / pairs.length)
60 | 


--------------------------------------------------------------------------------
/src/03-three/numbers/format/index.js:
--------------------------------------------------------------------------------
 1 | import toText from './toText.js'
 2 | import { toOrdinal } from '../parse/_data.js'
 3 | 
 4 | const makeSuffix = function (obj) {
 5 |   return {
 6 |     prefix: obj.prefix || '',
 7 |     suffix: obj.suffix || '',
 8 |   }
 9 | }
10 | 
11 | const formatNumber = function (parsed, fmt) {
12 |   let { prefix, suffix } = makeSuffix(parsed)
13 |   if (fmt === 'TextOrdinal') {
14 |     let words = toText(parsed.num)
15 |     let last = words[words.length - 1]
16 |     words[words.length - 1] = toOrdinal[last]
17 |     let num = words.join(' ')
18 |     return `${prefix}${num}${suffix}`
19 |   }
20 |   if (fmt === 'TextCardinal') {
21 |     let num = toText(parsed.num).join(' ')
22 |     return `${prefix}${num}${suffix}`
23 |   }
24 |   // numeric formats
25 |   // '55e'
26 |   if (fmt === 'Ordinal') {
27 |     let str = String(parsed.num)
28 |     let last = str.slice(str.length - 1, str.length)
29 |     if (last === '1') {
30 |       let num = str + 'er'
31 |       return `${prefix}${num}${suffix}`
32 |     }
33 |     let num = str + 'e'
34 |     return `${prefix}${num}${suffix}`
35 |   }
36 |   if (fmt === 'Cardinal') {
37 |     let num = String(parsed.num)
38 |     return `${prefix}${num}${suffix}`
39 |   }
40 |   let num = String(parsed.num || '')
41 |   return `${prefix}${num}${suffix}`
42 | }
43 | export default formatNumber


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/2nd-pass/suffix-lookup.js:
--------------------------------------------------------------------------------
 1 | 
 2 | //sweep-through all suffixes
 3 | const suffixLoop = function (str = '', suffixes = []) {
 4 |   const len = str.length
 5 |   let max = 7
 6 |   if (len <= max) {
 7 |     max = len - 1
 8 |   }
 9 |   for (let i = max; i > 1; i -= 1) {
10 |     let suffix = str.substr(len - i, len)
11 |     if (suffixes[suffix.length].hasOwnProperty(suffix) === true) {
12 |       // console.log(suffix)
13 |       let tag = suffixes[suffix.length][suffix]
14 |       return tag
15 |     }
16 |   }
17 |   return null
18 | }
19 | 
20 | // decide tag from the ending of the word
21 | const suffixCheck = function (terms, i, world) {
22 |   let setTag = world.methods.one.setTag
23 |   let suffixes = world.model.two.suffixPatterns
24 |   let term = terms[i]
25 |   if (term.tags.size === 0) {
26 |     let tag = suffixLoop(term.normal, suffixes)
27 |     if (tag !== null) {
28 |       setTag([term], tag, world, false, '2-suffix')
29 |       term.confidence = 0.7
30 |       return true
31 |     }
32 |     // try implicit form of word, too
33 |     if (term.implicit) {
34 |       tag = suffixLoop(term.implicit, suffixes)
35 |       if (tag !== null) {
36 |         setTag([term], tag, world, false, '2-implicit-suffix')
37 |         term.confidence = 0.7
38 |         return true
39 |       }
40 |     }
41 |   }
42 |   return null
43 | }
44 | export default suffixCheck
45 | 


--------------------------------------------------------------------------------
/plugins/dates/scratch.js:
--------------------------------------------------------------------------------
 1 | import nlp from '../../src/index.js'
 2 | import plg from './src/plugin.js'
 3 | nlp.plugin(plg)
 4 | // nlp.verbose(true)
 5 | let arr = [
 6 |   `Je peux emprunter votre voiture entre le 2 mai et le 14 juillet`,
 7 |   `Je peux emprunter votre voiture jusqu'au quatorze juillet`,
 8 |   'entre sept et oct',
 9 |   `jusqu'en juin`,
10 |   `jusqu'à juin`,
11 |   `jusqu'à le quatorze juillet`,
12 |   'decembre 25, 2012',
13 |   'Juin 5, 2012',
14 |   'hier après-midi',
15 |   '14h30 demain',
16 |   'hier après-midi',
17 |   'aujourd\'hui',
18 |   'hier soir',
19 |   `Novembre 3, 2021`,
20 |   // 'Novembre 3, 2021',
21 |   // '12/01/2018',
22 |   // '13/01/2018',
23 |   // '5/2/2020',
24 |   `le quatorze juillet.`,
25 |   'Mercredi 11 mars',
26 |   `Le 6 avril`,
27 |   `Il n'y a pas d'augmentation prévue jusqu'en 2032`,
28 |   `le 3 novembre 2012`,
29 |   'je suis né le 2 septembre 1982',
30 |   'rendez-vous avant vendredi',
31 |   `je t'appellerai jusqu'en septembre`,
32 |   `15/12/2020`,
33 |   `2020-10-02T07:10:12`,
34 |   `juin 2e`,
35 |   `2021-02-12`,
36 |   `je suis né en juin`,
37 |   `ta voiture jusqu’à lundi prochain`,
38 |   `entre sept et oct`,
39 | ]
40 | let doc = nlp(arr[0]).debug()
41 | 
42 | // let m = doc.match('[<date>#Value] [<month>#Month]')
43 | // m.debug()
44 | // m.groups().date.debug()
45 | // m.groups().month.debug()
46 | 
47 | let json = doc.dates({ timezone: 'UTC', today: '2023-03-02' }).json({ terms: false })
48 | console.dir(json, { depth: 5 })


--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "root": true,
 3 |   "extends": [
 4 |     "eslint:recommended",
 5 |     "plugin:regexp/recommended"
 6 |   ],
 7 |   "ignorePatterns": [
 8 |     "builds/*",
 9 |     "learn/**",
10 |     "scripts/**",
11 |     "plugins/dates/**"
12 |   ],
13 |   "env": {
14 |     "es6": true,
15 |     "browser": true,
16 |     "node": true
17 |   },
18 |   "parserOptions": {
19 |     "ecmaVersion": "latest",
20 |     "sourceType": "module"
21 |   },
22 |   "rules": {
23 |     "comma-dangle": [
24 |       1,
25 |       "only-multiline"
26 |     ],
27 |     "quotes": [
28 |       0,
29 |       "single",
30 |       "avoid-escape"
31 |     ],
32 |     "max-nested-callbacks": [
33 |       1,
34 |       4
35 |     ],
36 |     "max-params": [
37 |       1,
38 |       5
39 |     ],
40 |     "consistent-return": 1,
41 |     "no-bitwise": 1,
42 |     "no-empty": 1,
43 |     "no-console": 1,
44 |     "no-duplicate-imports": 1,
45 |     "no-eval": 2,
46 |     "no-implied-eval": 2,
47 |     "no-mixed-operators": 2,
48 |     "no-multi-assign": 2,
49 |     "no-nested-ternary": 1,
50 |     "no-prototype-builtins": 0,
51 |     "no-self-compare": 1,
52 |     "no-sequences": 1,
53 |     "no-shadow": 2,
54 |     "no-unmodified-loop-condition": 1,
55 |     "no-use-before-define": 1,
56 |     "prefer-const": 0,
57 |     "radix": 1,
58 |     "no-unused-vars": 1,
59 |     "regexp/prefer-d": 0,
60 |     "regexp/prefer-w": 0,
61 |     "regexp/prefer-range": 0,
62 |     "regexp/no-unused-capturing-group": 0
63 |   }
64 | }


--------------------------------------------------------------------------------
/data/lexicon/misc/currencies.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   '¢',
 3 |   '$',
 4 |   '£',
 5 |   '¥',
 6 |   '฿',
 7 |   '₡',
 8 |   '€',
 9 |   '₭',
10 |   '₨',
11 |   '﷼',
12 |   'aud',
13 |   'baht',
14 |   'bitcoin',
15 |   'bitcoins',
16 |   'cad',
17 |   'cent',
18 |   'cents',
19 |   'cny',
20 |   'denar',
21 |   'denars',
22 |   'dime',
23 |   'dimes',
24 |   'dinar',
25 |   'dinars',
26 |   'dirham',
27 |   'dirhams',
28 |   'dkk',
29 |   'dobra',
30 |   'dobras',
31 |   'dollar',
32 |   'dollars',
33 |   'eur',
34 |   'euro',
35 |   'euros',
36 |   'forint',
37 |   'forints',
38 |   'franc',
39 |   'francs',
40 |   'gbp',
41 |   'hkd',
42 |   'inr',
43 |   'jpy',
44 |   'kn',
45 |   'kr',
46 |   'nis',
47 |   'krona',
48 |   'kronas',
49 |   'krw',
50 |   'kwanza',
51 |   'kwanzas',
52 |   'kyat',
53 |   'kyats',
54 |   'lei',
55 |   'lempira',
56 |   'lempiras',
57 |   'lira',
58 |   'liras',
59 |   'pence',
60 |   'pences',
61 |   'pennies',
62 |   'penny',
63 |   'peso',
64 |   'pesos',
65 |   'pound sterling',
66 |   'pound sterlings',
67 |   'pound',
68 |   'pounds',
69 |   'riel',
70 |   'rouble',
71 |   'roubles',
72 |   'rp',
73 |   'rupee',
74 |   'rupees',
75 |   'shekel',
76 |   'shekels',
77 |   'sheqel',
78 |   'sheqels',
79 |   'shilling',
80 |   'shillings',
81 |   'sterling',
82 |   'sterlings',
83 |   'usd',
84 |   'xaf',
85 |   'xof',
86 |   'yen',
87 |   'yuan',
88 |   'yuans',
89 |   'zł',
90 |   'zloty',
91 |   'zlotys',
92 |   'ден',
93 |   'лв',
94 |   'руб',
95 | ]
96 | 


--------------------------------------------------------------------------------
/src/03-three/numbers/data.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 | 
 3 |   ones: [
 4 |     [0, 'zero', 'zeroième'],
 5 |     [1, 'un', 'unième'],
 6 |     [2, 'deux', 'deuxième'],
 7 |     [3, 'trois', 'troisième'],
 8 |     [4, 'quatre', 'quatrième'],
 9 |     [5, 'cinq', 'cinquième'],
10 |     [6, 'six', 'sixième'],
11 |     [7, 'sept', 'septième'],
12 |     [8, 'huit', 'huitième'],
13 |     [9, 'neuf', 'neuvième'],
14 |     [10, 'dix', 'dixième'],
15 |     [11, 'onze', 'onzième'],
16 |     [12, 'douze', 'douzième'],
17 |     [13, 'treize', 'treizième'],
18 |     [14, 'quatorze', 'quatorzième'],
19 |     [15, 'quinze', 'quinzième'],
20 |     [16, 'seize', 'seizième'],
21 |     [17, 'dix sept', 'dix septième'],
22 |     [18, 'dix huit', 'dix huitième'],
23 |     [19, 'dix neuf', 'dix neuvième'],
24 |   ],
25 |   tens: [
26 |     [20, 'vingt', 'vingtième'],
27 |     [30, 'trente', 'trentième'],
28 |     [40, 'quarante', 'quarantième'],
29 |     [50, 'cinquante', 'cinquantième'],
30 |     [60, 'soixante', 'soixantième'],
31 |     [70, 'soixante dix', 'soixante dixième'],
32 |     [80, 'quatre vingt', 'quatre vingtième'],
33 |     [90, 'quatre vingt dix', 'quatre vingt dixième'],
34 |   ],
35 |   multiples: [
36 |     [100, 'cent', 'centième'],
37 |     [1000, 'mille', 'millième'],
38 |     [1000000, 'million', 'millionième'],//million 1000,000
39 |     [1000000000, 'milliard', 'milliardième'],//billion 1000,000,000
40 |     // [1000000000000, 'mille milliards', 'mille milliardième'],//trillion 1000,000,000
41 |   ]
42 | 
43 | }


--------------------------------------------------------------------------------
/plugins/dates/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "fr-compromise-dates",
 3 |   "description": "plugin for fr-compromise",
 4 |   "version": "0.0.2",
 5 |   "author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
 6 |   "main": "./src/plugin.js",
 7 |   "unpkg": "./builds/fr-compromise-dates.min.js",
 8 |   "module": "./builds/fr-compromise-dates.mjs",
 9 |   "type": "module",
10 |   "sideEffects": false,
11 |   "types": "./index.d.ts",
12 |   "exports": {
13 |     ".": {
14 |       "import": "./src/plugin.js",
15 |       "require": "./builds/fr-compromise-dates.cjs",
16 |       "types": "./index.d.ts"
17 |     }
18 |   },
19 |   "repository": {
20 |     "type": "git",
21 |     "url": "git://github.com/nlp-compromise/fr-compromise.git"
22 |   },
23 |   "homepage": "https://github.com/nlp-compromise/fr-compromise/tree/master/plugins/dates",
24 |   "scripts": {
25 |     "test": "tape \"./tests/**/*.test.js\" | tap-dancer  --color always",
26 |     "testb": "cross-env TESTENV=prod tape \"./tests/**/*.test.js\" | tap-dancer  --color always",
27 |     "watch": "amble ./scratch.js",
28 |     "perf": "node ./scripts/perf.js",
29 |     "build": "rollup -c --silent"
30 |   },
31 |   "files": [
32 |     "builds/",
33 |     "src/",
34 |     "index.d.ts"
35 |   ],
36 |   "eslintIgnore": [
37 |     "builds/*.js"
38 |   ],
39 |   "peerDependencies": {
40 |     "fr-compromise": ">=0.2.0"
41 |   },
42 |   "dependencies": {
43 |     "spacetime": "7.4.3",
44 |     "spacetime-holiday": "0.3.0"
45 |   },
46 |   "license": "MIT"
47 | }


--------------------------------------------------------------------------------
/src/03-three/contractions/api.js:
--------------------------------------------------------------------------------
 1 | const titleCase = /^\p{Lu}[\p{Ll}'’]/u //upercase, then lowercase
 2 | // import contract from './contract.js'
 3 | 
 4 | const toTitleCase = function (str = '') {
 5 |   str = str.replace(/^ *[a-z\u00C0-\u00FF]/, x => x.toUpperCase()) //TODO: support unicode
 6 |   return str
 7 | }
 8 | 
 9 | const api = function (View) {
10 |   /** */
11 |   class Contractions extends View {
12 |     constructor(document, pointer, groups) {
13 |       super(document, pointer, groups)
14 |       this.viewType = 'Contraction'
15 |     }
16 |     /** i've -> 'i have' */
17 |     expand() {
18 |       this.docs.forEach(terms => {
19 |         let isTitleCase = titleCase.test(terms[0].text)
20 |         terms.forEach((t, i) => {
21 |           t.text = t.implicit
22 |           delete t.implicit
23 |           //add whitespace
24 |           if (i < terms.length - 1 && t.post === '') {
25 |             t.post += ' '
26 |           }
27 |           // flag it as dirty
28 |           t.dirty = true
29 |         })
30 |         // make the first word title-case?
31 |         if (isTitleCase) {
32 |           terms[0].text = toTitleCase(terms[0].text)
33 |         }
34 |       })
35 |       this.compute('normal') //re-set normalized text
36 |       return this
37 |     }
38 |   }
39 |   // add fn to View
40 |   View.prototype.contractions = function () {
41 |     let m = this.match('@hasContraction+')
42 |     return new Contractions(this.document, m.pointer)
43 |   }
44 |   // View.prototype.contract = contract
45 | }
46 | 
47 | export default api


--------------------------------------------------------------------------------
/src/03-three/numbers/parse/index.js:
--------------------------------------------------------------------------------
 1 | import fromText from './fromText.js'
 2 | 
 3 | const fromNumber = function (m) {
 4 |   let str = m.text('normal').toLowerCase()
 5 |   str = str.replace(/(e|er)$/, '')
 6 |   let hasComma = false
 7 |   if (/,/.test(str)) {
 8 |     hasComma = true
 9 |     str = str.replace(/,/g, '')
10 |   }
11 |   // get prefix/suffix
12 |   let arr = str.split(/([-0-9.,]*)/)
13 |   let [prefix, num] = arr
14 |   let suffix = arr.slice(2).join('')
15 |   if (num !== '' && m.length < 2) {
16 |     num = Number(num || str)
17 |     //ensure that num is an actual number
18 |     if (typeof num !== 'number') {
19 |       num = null
20 |     }
21 |     // strip an ordinal off the suffix
22 |     if (suffix === 'e' || suffix === 'er') {
23 |       suffix = ''
24 |     }
25 |   }
26 |   return {
27 |     hasComma,
28 |     prefix,
29 |     num,
30 |     suffix,
31 |   }
32 | }
33 | 
34 | const parseNumber = function (m) {
35 |   let terms = m.docs[0]
36 |   let num = null
37 |   let prefix = ''
38 |   let suffix = ''
39 |   let hasComma = false
40 |   let isText = m.has('#TextValue')
41 |   if (isText) {
42 |     num = fromText(terms)
43 |   } else {
44 |     let res = fromNumber(m)
45 |     prefix = res.prefix
46 |     suffix = res.suffix
47 |     num = res.num
48 |     hasComma = res.hasComma
49 |   }
50 |   return {
51 |     hasComma,
52 |     prefix,
53 |     num,
54 |     suffix,
55 |     isText,
56 |     isOrdinal: m.has('#Ordinal'),
57 |     isFraction: m.has('#Fraction'),
58 |     isMoney: m.has('#Money'),
59 |   }
60 | }
61 | export default parseNumber


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/date/units.js:
--------------------------------------------------------------------------------
 1 | import spacetime from 'spacetime'
 2 | 
 3 | class Moment {
 4 |   constructor(input, opts) {
 5 |     this.unit = 'millisecond'
 6 |     this.opts = opts || {}
 7 |     this.s = spacetime(input, opts.timezone)
 8 |   }
 9 |   start() {
10 |     this.s = this.s.startOf(this.unit)
11 |     return this
12 |   }
13 |   end() {
14 |     this.s = this.s.endOf(this.unit)
15 |     return this
16 |   }
17 |   mid() {
18 |     //do nothing
19 |     return this
20 |   }
21 |   iso() {
22 |     return this.s.iso()
23 |   }
24 | }
25 | 
26 | 
27 | class Day extends Moment {
28 |   constructor(str, opts) {
29 |     super(str, opts)
30 |     this.unit = 'day'
31 |   }
32 |   mid() {
33 |     this.start()
34 |     this.s = this.s.add(12, 'hour')//noon
35 |     return this
36 |   }
37 | }
38 | 
39 | class Week extends Moment {
40 |   constructor(str, opts) {
41 |     super(str, opts)
42 |     this.unit = 'week'
43 |   }
44 |   mid() {
45 |     this.start()
46 |     this.s = this.s.add(3, 'day')//wednesday
47 |     return this
48 |   }
49 | }
50 | 
51 | class Month extends Moment {
52 |   constructor(str, opts) {
53 |     super(str, opts)
54 |     this.unit = 'month'
55 |   }
56 |   mid() {
57 |     this.start()
58 |     this.s = this.s.add(14, 'days')
59 |     return this
60 |   }
61 | }
62 | 
63 | class Year extends Moment {
64 |   constructor(str, opts) {
65 |     super(str, opts)
66 |     this.unit = 'year'
67 |   }
68 |   mid() {
69 |     this.start()
70 |     this.s = this.s.add(6, 'months')
71 |     return this
72 |   }
73 | }
74 | 
75 | export { Moment, Month, Day, Week, Year }
76 | 


--------------------------------------------------------------------------------
/plugins/dates/tests/backburner/ambig-weekday.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | import spacetime from 'spacetime'
 4 | 
 5 | const fmt = (iso) => (iso ? spacetime(iso).format('{iso-short}') : '-')
 6 | 
 7 | test('this monday', function (t) {
 8 |   let arr = [
 9 |     ['2020-12-7', '2020-12-07'], //mon (itself)
10 |     ['2020-12-8', '2020-12-14'], //tues
11 |     ['2020-12-9', '2020-12-14'], //wed
12 |     ['2020-12-10', '2020-12-14'], //thu
13 |     ['2020-12-11', '2020-12-14'], //fri
14 |     ['2020-12-12', '2020-12-14'], //sat
15 |     ['2020-12-13', '2020-12-14'], //sun
16 |   ]
17 |   arr.forEach((a) => {
18 |     let doc = nlp('this monday')
19 |     let found = doc.dates({ today: a[0] }).json()[0]
20 |     t.equal(fmt(found.dates.start), a[1], 'monday-start')
21 |     t.equal(fmt(found.dates.end), a[1], 'monday-end')
22 |   })
23 |   t.end()
24 | })
25 | 
26 | // test('last monday', function (t) {
27 | //   let arr = [
28 | //     ['2020-12-7', '2020-11-30'], //mon (obvious)
29 | //     ['2020-12-8', '2020-11-30'], //tues
30 | //     ['2020-12-9', '2020-11-30'], //wed
31 | //     ['2020-12-10', '2020-11-30'], //thu
32 | //     ['2020-12-11', '2020-11-30'], //fri
33 | //     ['2020-12-12', '2020-11-30'], //sat
34 | //     ['2020-12-13', '2020-11-30'], //sun
35 | //   ]
36 | //   arr.forEach((a) => {
37 | //     let doc = nlp('last monday')
38 | //     let found = doc.dates({ today: a[0] }).json()[0]
39 | //     t.equal(fmt(found.date.start), a[1], 'last-monday-start')
40 | //     t.equal(fmt(found.date.end), a[1], 'last-monday-end')
41 | //   })
42 | //   t.end()
43 | // })
44 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/uncountables.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'anglais',
 3 |   'os',
 4 |   'bois',
 5 |   'corps',
 6 |   'bras',
 7 |   'poids',
 8 |   'repas',
 9 |   'sens',
10 | 
11 | 
12 |   'conseils',//advice
13 |   'munitions',//ammunition
14 |   'asperges',//asparagus
15 |   'combles',//attic
16 |   'spectateurs',//audience
17 |   'auditeurs',//
18 |   'baggage',//luggage
19 |   'bagages',//
20 |   'brocolis',//broccoli
21 |   'affaires',//business
22 |   'dégâts',//damage
23 |   'céréales',//cereal
24 |   'échecs',//chess
25 |   'vêtements',//clothing
26 |   'coordonnées',//address
27 |   'ténèbres',//darkness
28 |   'datadonnées',//**
29 |   'débris',//debris
30 |   'arrhes',//deposit
31 |   'recherches',//research
32 |   'fiançailles',//engagement
33 |   'remords',//remorse
34 |   'victuailles',//food
35 |   'prévisions',//forecast
36 |   'fruits',//fruit
37 |   'funérailles',//funeral
38 |   'obsèques',//
39 |   'meubles',//furniture
40 |   'garbage',//rubbish
41 |   'ordures',
42 |   'déchets',//
43 |   'graffitis',//graffiti
44 |   'cheveux',//hair
45 |   'ravages',//havoc
46 |   'foins',//hay
47 |   'chevrons',//herringbone
48 |   'devoirs',//homework
49 |   'renseignements',//information
50 |   'médicaments',//medicine
51 |   'abats',//offal
52 |   'pâtes',//pasta
53 |   'décombres',//rubble
54 |   'sciences*',//science
55 |   'crevettes',//shrimp
56 |   'logiciels',//software
57 |   'spaghettis',//spaghetti
58 |   'épinards',//spinach
59 |   'parasites',//static
60 |   'transports',//transportation
61 |   'vacances',//vacation
62 |   'environs',//vicinity
63 |   'fumerolles',//gas
64 |   'noces',//wedding
65 | ]


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/1st-pass/year.js:
--------------------------------------------------------------------------------
 1 | const min = 1400
 2 | const max = 2100
 3 | 
 4 | const dateWords = new Set(['pendant', 'dans', 'avant', 'apres', 'pour', 'en'])
 5 | 
 6 | const seemsGood = function (term) {
 7 |   if (!term) {
 8 |     return false
 9 |   }
10 |   if (dateWords.has(term.normal)) {
11 |     return true
12 |   }
13 |   if (term.tags.has('Date') || term.tags.has('Month') || term.tags.has('WeekDay')) {
14 |     return true
15 |   }
16 |   return false
17 | }
18 | 
19 | const seemsOkay = function (term) {
20 |   if (!term) {
21 |     return false
22 |   }
23 |   if (term.tags.has('Ordinal')) {
24 |     return true
25 |   }
26 |   return false
27 | }
28 | 
29 | // recognize '1993' as a year
30 | const tagYear = function (terms, i, world) {
31 |   let setTag = world.methods.one.setTag
32 |   const term = terms[i]
33 |   if (term.tags.has('NumericValue') && term.tags.has('Cardinal') && term.normal.length === 4) {
34 |     let num = Number(term.normal)
35 |     // number between 1400 and 2100
36 |     if (num && !isNaN(num)) {
37 |       if (num > min && num < max) {
38 |         if (seemsGood(terms[i - 1]) || seemsGood(terms[i + 1])) {
39 |           setTag([term], 'Year', world, false, '2-tagYear')
40 |           return true
41 |         }
42 |         // or is it really-close to a year?
43 |         if (num > 1950 && num < 2025) {
44 |           if (seemsOkay(terms[i - 1]) || seemsOkay(terms[i + 1])) {
45 |             setTag([term], 'Year', world, false, '2-tagYear-close')
46 |             return true
47 |           }
48 |         }
49 |       }
50 |     }
51 |   }
52 |   return null
53 | }
54 | export default tagYear


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/adj-gender.js:
--------------------------------------------------------------------------------
 1 | // maître
 2 | // traître
 3 | 
 4 | const guessGender = function (str) {
 5 |   // female singular
 6 |   if (str.match(/[eë]$/)) {
 7 |     return 'f'
 8 |   }
 9 |   // female plurals
10 |   let suffixes = [
11 |     /[aei]lles$/,
12 |     /[aei]les$/,
13 |     /[aeiou]ttes$/,
14 |     /ntes$/,
15 |     /i[vct]es$/,
16 |     /uses$/,
17 |     /sses$/,
18 |     /[èuay]res$/,
19 |     /ires$/,
20 |     /ées$/,
21 |     /ues$/,
22 |     /ies$/,
23 |     /ée$/,
24 |     /[ndvt]es$/,
25 |   ]
26 |   for (let i = 0; i < suffixes.length; i += 1) {
27 |     if (suffixes[i].test(str)) {
28 |       return 'f'
29 |     }
30 |   }
31 | 
32 | 
33 |   return 'm'
34 | }
35 | 
36 | // guess a gender tag each Adjective
37 | const adjGender = function (terms, i, world) {
38 |   let setTag = world.methods.one.setTag
39 |   let term = terms[i]
40 |   let tags = term.tags
41 |   if (tags.has('Adjective') && !tags.has('FemaleAdjective') && !tags.has('#MaleAdjective')) {
42 |     let str = term.implicit || term.normal || term.text || ''
43 |     // i actually think there are no exceptions.
44 |     if (guessGender(str) === 'f') {
45 |       return setTag([term], 'FemaleAdjective', world, false, '3-adj-gender')
46 |     } else {
47 |       return setTag([term], 'MaleAdjective', world, false, '3-adj-gender')
48 |     }
49 |   }
50 |   return null
51 | }
52 | export default adjGender
53 | 
54 | // import data from '../../data/models/adjective/index.js'
55 | // let count = 0
56 | // Object.keys(data).forEach(m => {
57 | //   let [f, mp, fp] = data[m]
58 | //   if (guessGender(fp) !== 'f') {
59 | //     console.log(fp)
60 | //     count += 1
61 | //   }
62 | // })
63 | // console.log(count)
64 | 


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/index.js:
--------------------------------------------------------------------------------
 1 | import parseOne from './date/index.js'
 2 | import { Moment, Month, Day, Week, Year } from './date/units.js'
 3 | 
 4 | 
 5 | // generic callback
 6 | const startEnd = function (m, opts) {
 7 |   if (m.found) {
 8 |     let { start, end } = m.groups()
 9 |     let out = {
10 |       start: parseOne(start, opts),
11 |       end: parseOne(end, opts)
12 |     }
13 |     if (out.start) {
14 |       return out
15 |     }
16 |   }
17 |   return null
18 | }
19 | const justStart = function (m, opts) {
20 |   let out = { start: parseOne(m, opts) }
21 |   if (out.start) {
22 |     return out
23 |   }
24 |   return null
25 | }
26 | 
27 | const untilEnd = function (m, opts) {
28 |   let { end } = m.groups()
29 |   let out = { start: new Moment(opts.today, opts), end: parseOne(end, opts) }
30 |   if (out.end) {
31 |     // until - just before x
32 |     out.end = new Moment(out.end.s.minus(1, 'millisecond'), opts)
33 |     return out
34 |   }
35 |   return null
36 | }
37 | 
38 | const phrases = [
39 |   // 'entre sept et oct'
40 |   { match: 'entre [<start>.*] et [<end>.*]', cb: startEnd },
41 |   // 'jusqu'en juin' (until june)
42 |   { match: '(jusqu|jusque) (en|a|à|au) [<end>#Date+]', cb: untilEnd },
43 |   // fallback to parsing one date
44 |   { match: '.*', cb: justStart },
45 | ]
46 | 
47 | const parsePhrase = function (matches, opts) {
48 |   let arr = []
49 |   matches.forEach(view => {
50 |     for (let i = 0; i < phrases.length; i += 1) {
51 |       let { match, cb } = phrases[i]
52 |       let m = view.match(match)
53 |       if (m.found) {
54 |         let res = cb(m, opts)
55 |         if (res) {
56 |           arr.push(res)
57 |           return
58 |         }
59 |       }
60 |     }
61 | 
62 | 
63 |   })
64 |   return arr
65 | }
66 | export default parsePhrase


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
 3 |   "name": "fr-compromise",
 4 |   "description": "Linguistique computationnelle modeste",
 5 |   "version": "0.2.8",
 6 |   "main": "./builds/fr-compromise.mjs",
 7 |   "unpkg": "./builds/fr-compromise.min.js",
 8 |   "type": "module",
 9 |   "sideEffects": false,
10 |   "exports": {
11 |     ".": {
12 |       "import": "./builds/fr-compromise.mjs",
13 |       "require": "./builds/fr-compromise.cjs",
14 |       "types": "./types/index.d.ts"
15 |     }
16 |   },
17 |   "types": "types/index.d.ts",
18 |   "repository": {
19 |     "type": "git",
20 |     "url": "git://github.com/nlp-compromise/fr-compromise.git"
21 |   },
22 |   "scripts": {
23 |     "test": "tape \"./tests/**/*.test.js\" | tap-dancer",
24 |     "testb": "cross-env TESTENV=prod npm run test",
25 |     "build": "npm run version && rollup -c --silent",
26 |     "pack": "node ./scripts/pack.js",
27 |     "watch": "amble ./scratch.js",
28 |     "version": "node ./scripts/version.js",
29 |     "score": "node ./learn/giga/test.js",
30 |     "lint": "eslint ./src/**/*",
31 |     "stress": "node scripts/stress.js"
32 |   },
33 |   "files": [
34 |     "builds/",
35 |     "types/",
36 |     "src/"
37 |   ],
38 |   "dependencies": {
39 |     "compromise": "14.10.0",
40 |     "efrt": "2.7.0",
41 |     "suffix-thumb": "5.0.2"
42 |   },
43 |   "devDependencies": {
44 |     "@rollup/plugin-node-resolve": "15.2.0",
45 |     "@rollup/plugin-terser": "0.4.3",
46 |     "amble": "1.3.0",
47 |     "cross-env": "^7.0.3",
48 |     "eslint": "8.47.0",
49 |     "eslint-plugin-regexp": "1.15.0",
50 |     "fr-corpus": "^0.0.1",
51 |     "rollup": "3.28.0",
52 |     "tap-dancer": "0.3.4",
53 |     "tape": "5.6.6"
54 |   },
55 |   "license": "MIT"
56 | }
57 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | import nlp from './_lib.js'
 2 | import tokenize from './01-one/tokenize/plugin.js'
 3 | import lexicon from './01-one/lexicon/plugin.js'
 4 | import preTagger from './02-two/preTagger/plugin.js'
 5 | import postTagger from './02-two/postTagger/plugin.js'
 6 | import tagset from './02-two/tagset/plugin.js'
 7 | import numbers from './03-three/numbers/plugin.js'
 8 | import topics from './03-three/topics/plugin.js'
 9 | import verbs from './03-three/verbs/plugin.js'
10 | import adjectives from './03-three/adjectives/plugin.js'
11 | import nouns from './03-three/nouns/plugin.js'
12 | import contractions from './03-three/contractions/plugin.js'
13 | import version from './_version.js'
14 | 
15 | nlp.plugin(tokenize)
16 | nlp.plugin(tagset)
17 | nlp.plugin(lexicon)
18 | nlp.plugin(preTagger)
19 | nlp.plugin(postTagger)
20 | nlp.plugin(numbers)
21 | nlp.plugin(topics)
22 | nlp.plugin(verbs)
23 | nlp.plugin(adjectives)
24 | nlp.plugin(nouns)
25 | nlp.plugin(contractions)
26 | 
27 | const fr = function (txt, lex) {
28 |   let dok = nlp(txt, lex)
29 |   return dok
30 | }
31 | 
32 | // copy constructor methods over
33 | Object.keys(nlp).forEach(k => {
34 |   if (nlp.hasOwnProperty(k)) {
35 |     fr[k] = nlp[k]
36 |   }
37 | })
38 | 
39 | // this one is hidden
40 | Object.defineProperty(fr, '_world', {
41 |   value: nlp._world,
42 |   writable: true,
43 | })
44 | 
45 | 
46 | 
47 | /** log the decision-making to console */
48 | fr.verbose = function (set) {
49 |   let env = typeof process === 'undefined' ? self.env || {} : process.env //use window, in browser
50 |   env.DEBUG_TAGS = set === 'tagger' || set === true ? true : ''
51 |   env.DEBUG_MATCH = set === 'match' || set === true ? true : ''
52 |   env.DEBUG_CHUNKS = set === 'chunker' || set === true ? true : ''
53 |   return this
54 | }
55 | fr.version = version
56 | 
57 | export default fr


--------------------------------------------------------------------------------
/learn/giga/getPairs.js:
--------------------------------------------------------------------------------
 1 | import { forEachSync } from './_giga.js'
 2 | import doSentences from './french.js'
 3 | import fs from 'fs'
 4 | 
 5 | let ids = []
 6 | for (let i = 1; i <= 10; i += 1) {
 7 |   let str = String(i).padStart(4, '0')
 8 |   ids.push(str)
 9 | }
10 | // ids = ['0004']
11 | 
12 | // ABR	abbreviation
13 | // ADJ	adjective
14 | // ADV	adverb
15 | 
16 | // VER:pres	verb present
17 | // VER:simp	verb simple past
18 | // VER:futu	verb futur
19 | // VER:cond	verb conditional
20 | // VER:impe	verb imperative
21 | // VER:impf	verb imperfect
22 | // VER:infi	verb infinitive
23 | // VER:pper	verb past participle
24 | // VER:ppre	verb present participle
25 | // VER:subi	verb subjunctive imperfect
26 | // VER:subp	verb subjunctive present
27 | 
28 | // "NOM": true,
29 | let pairs = {}
30 | const tag = 'NOM'
31 | // const prev = 'les'
32 | 
33 | let results = {}
34 | const doBoth = function (both) {
35 |   let terms = both.fr
36 |   terms.forEach((term, i) => {
37 |     if (i === 0) {
38 |       return
39 |     }
40 |     if (term['$'].pos === tag) {
41 |       console.log(term)
42 |       // let last = terms[i - 1]['$text'].toLowerCase()
43 |       // if (last === prev) {
44 |       //   let w = term['$text']
45 |       //   let inf = term['$'].lem
46 |       //   // console.log(last, w, inf)
47 |       //   if (w && inf) {
48 |       //     w = w.toLowerCase().trim()
49 |       //     inf = inf.toLowerCase().trim()
50 |       //     results[w] = inf
51 |       //   }
52 |       // }
53 |     }
54 |   })
55 | }
56 | 
57 | await forEachSync(ids, async id => {
58 |   try {
59 |     console.log(`\ndoing ${id}:\n`)
60 |     await doSentences(id, doBoth)
61 |   } catch (e) {
62 |     console.log(e)
63 |   }
64 | })
65 | console.log('done')
66 | results = Object.entries(results)
67 | fs.writeFileSync('./pairs.js', 'export default ' + JSON.stringify(results))
68 | 


--------------------------------------------------------------------------------
/types/view/fr.ts:
--------------------------------------------------------------------------------
 1 | import View from './one'
 2 | 
 3 | 
 4 | interface Numbers extends View {
 5 |   /** grab the parsed number */
 6 |   parse: (n?: number) => object[]
 7 |   /** grab the parsed number */
 8 |   get: (n?: number) => number | number[]
 9 |   /** grab 'kilos' from `25 kilos' */
10 |   // units: () => View
11 |   /** return only ordinal numbers */
12 |   isOrdinal: () => View
13 |   /** return only cardinal numbers */
14 |   isCardinal: () => View
15 |   /** convert number to `5` or `5th` */
16 |   toNumber: () => View
17 |   /** add commas, or nicer formatting for numbers */
18 |   toLocaleString: () => View
19 |   /** convert number to `five` or `fifth` */
20 |   toText: () => View
21 |   /** convert number to `five` or `5` */
22 |   toCardinal: () => View
23 |   /** convert number to `fifth` or `5th` */
24 |   toOrdinal: () => View
25 |   /** return numbers with this value */
26 |   isEqual: () => View
27 |   /** return numbers bigger than n */
28 |   greaterThan: (min: number) => View
29 |   /** return numbers smaller than n */
30 |   lessThan: (max: number) => View
31 |   /** return numbers between min and max */
32 |   between: (min: number, max: number) => View
33 |   /** set number to n */
34 |   set: (n: number) => View
35 |   /** increase number by n */
36 |   add: (n: number) => View
37 |   /** decrease number by n*/
38 |   subtract: (n: number) => View
39 |   /** increase number by 1 */
40 |   increment: () => View
41 |   /** decrease number by 1*/
42 |   decrement: () => View
43 | }
44 | 
45 | interface Contractions extends View {
46 |   /**  */
47 |   expand(): View
48 | }
49 | 
50 | 
51 | 
52 | interface FrView extends View {
53 |   /** return any multi-word terms, like "didn't"  */
54 |   contractions: (n?: number) => Contractions
55 |   /**  */
56 |   numbers(): Numbers
57 |   /**  */
58 |   topics(): View
59 | }
60 | 
61 | export default FrView


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/verb-tense.js:
--------------------------------------------------------------------------------
 1 | const tenses = [
 2 |   'PresentTense',
 3 |   'Infinitive',
 4 |   'Imperative',
 5 |   'Gerund',
 6 |   'PastTense',
 7 |   'Modal',
 8 |   'Auxiliary',
 9 |   'PerfectTense',
10 |   'Pluperfect',
11 |   'ConditionalVerb',
12 |   'FutureTense',
13 | ]
14 | 
15 | 
16 | let whichTense = [
17 | 
18 |   //er - present conditional 
19 |   ['erais', 'ConditionalVerb'],
20 |   ['erait', 'ConditionalVerb'],
21 |   ['erions', 'ConditionalVerb'],
22 |   ['eriez', 'ConditionalVerb'],
23 |   ['eraient', 'ConditionalVerb'],
24 | 
25 |   //er- future
26 |   ['erai', 'FutureTense'],
27 |   ['era', 'FutureTense'],
28 |   ['erons', 'FutureTense'],
29 |   ['erez', 'FutureTense'],
30 |   ['eront', 'FutureTense'],
31 | 
32 |   // er - imparfait -> PastTense
33 |   ['ais', 'PastTense'],
34 |   ['ait', 'PastTense'],
35 |   ['ions', 'PastTense'],
36 |   ['iez', 'PastTense'],
37 |   ['ient', 'PastTense'],
38 | 
39 |   // past-participle
40 |   ['ées', 'PastParticiple'],
41 |   ['és', 'PastParticiple'],
42 |   ['ée', 'PastParticiple'],
43 |   ['é', 'Participle'],
44 |   ['u', 'Participle'],//entendu
45 | ]
46 | 
47 | 
48 | // guess a tense tag each Verb
49 | const verbTense = function (terms, i, world) {
50 |   let setTag = world.methods.one.setTag
51 |   let term = terms[i]
52 |   let tags = term.tags
53 |   if (tags.has('Verb')) {
54 |     // console.log(term)
55 |     let str = term.implicit || term.normal || term.text || ''
56 |     // if we have no tense
57 |     if (!tenses.find(s => tags.has(s))) {
58 |       let found = whichTense.find(a => str.endsWith(a[0]))
59 |       if (found) {
60 |         setTag([term], found[1], world, false, '3-tense-suffix-' + found[1])
61 |       } else {
62 |         setTag([term], 'PresentTense', world, false, '3-tense-fallback')
63 |       }
64 |     }
65 |   }
66 |   return null
67 | }
68 | export default verbTense


--------------------------------------------------------------------------------
/learn/wikinews/getSuffix.js:
--------------------------------------------------------------------------------
 1 | let lines = require('./parse')
 2 | // lines = lines.slice(0, 300)
 3 | const end = 5
 4 | 
 5 | // 'P+D': 241,
 6 | // ADJ: 719,
 7 | // ADV: 311,
 8 | // CC: 172,
 9 | // CLO: 32,
10 | // CLR: 53,
11 | // CLS: 88,
12 | // CS: 90,
13 | // DET: 1353,
14 | // ET: 136,
15 | 
16 | // nouns:
17 | // NC: 1877,
18 | // NPP: 493,
19 | // P: 1242,
20 | // PREF: 8,
21 | 
22 | // PRO: 43, //pronoun
23 | // PROREL: 89,  //relative pronoun
24 | // U: 100,
25 | 
26 | // V: 509,
27 | // VINF: 140,
28 | // VPP: 402,
29 | // VPR: 61,
30 | // VS: 10,
31 | 
32 | let tags = {}
33 | lines.forEach((s) => {
34 |   s.forEach((w) => {
35 |     let len = w.word.length
36 |     if (len <= end) {
37 |       return
38 |     }
39 |     let suffix = w.word.toLowerCase().substr(len - end, len)
40 |     // suffix = suffix.replace(/[éèêë]/, 'e')
41 |     // suffix = suffix.replace(/[ï]/, 'i')
42 |     // suffix = suffix.replace(/[û]/, 'u')
43 |     if (suffix.match(/[0-9]/)) {
44 |       return
45 |     }
46 |     tags[suffix] = tags[suffix] || {}
47 |     tags[suffix][w.tag] = tags[suffix][w.tag] || 0
48 |     tags[suffix][w.tag] += 1
49 |   })
50 | })
51 | 
52 | let found = {}
53 | const wantTag = 'N'
54 | Object.keys(tags).forEach((k) => {
55 |   let foundTags = Object.keys(tags[k])
56 |   if (foundTags.length === 2 && tags[k][wantTag] > 5) {
57 |     foundTags.forEach((tag) => {
58 |       if (tags[k][tag] === 1) {
59 |         delete tags[k][tag]
60 |       }
61 |     })
62 |     foundTags = Object.keys(tags[k])
63 |     // console.log(tags[k])
64 |     //   console.log(foundTags)
65 |   }
66 |   if (foundTags.length === 1) {
67 |     let count = tags[k][foundTags[0]]
68 |     if (count > 1 && foundTags[0] === wantTag) {
69 |       if (tags[k][wantTag] > 90) {
70 |         // console.log(tags[k])
71 |         found[k] = foundTags[0]
72 |       }
73 |       // console.log(k+':' foundTags[0], count)
74 |     }
75 |   }
76 | })
77 | console.log(found)
78 | 


--------------------------------------------------------------------------------
/tests/conjugate.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | let here = '[conjugate] '
 4 | nlp.verbose(false)
 5 | 
 6 | test('adj-conjugate:', function (t) {
 7 |   let all = ["sanglant", "sanglante", "sanglants", "sanglantes"]
 8 |   t.deepEqual(Object.values(nlp(all[0]).adjectives().conjugate()[0]), all, here + 'from-male')
 9 |   t.deepEqual(Object.values(nlp(all[1]).adjectives().conjugate()[0]), all, here + 'from-female')
10 |   t.deepEqual(Object.values(nlp(all[2]).adjectives().conjugate()[0]), all, here + 'from-plural')
11 |   t.deepEqual(Object.values(nlp(all[3]).adjectives().conjugate()[0]), all, here + 'from-female-plural')
12 |   t.end()
13 | })
14 | 
15 | test('noun-conjugate:', function (t) {
16 |   let all = ["cargaison", "cargaisons"]
17 |   let o = nlp(all[0]).nouns().conjugate()[0]
18 |   t.deepEqual([o.singular, o.plural], all, here + 'from-sing')
19 |   o = nlp(all[1]).nouns().conjugate()[0]
20 |   t.deepEqual([o.singular, o.plural], all, here + 'from-plural')
21 | 
22 |   all = ["bois", "bois"]
23 |   o = nlp(all[0]).nouns().conjugate()[0]
24 |   t.deepEqual([o.singular, o.plural], all, here + 'from-sing')
25 |   t.end()
26 | })
27 | 
28 | test('verb-conjugate:', function (t) {
29 |   let all = ["endors", "endors", "endort", "endormons", "endormez", "endorment"]
30 |   t.deepEqual(Object.values(nlp(all[0]).verbs().conjugate()[0].PresentTense), all, here + 'from-first')
31 |   t.deepEqual(Object.values(nlp(all[1]).verbs().conjugate()[0].PresentTense), all, here + 'from-2nd')
32 |   t.deepEqual(Object.values(nlp(all[2]).verbs().conjugate()[0].PresentTense), all, here + 'from-3d')
33 |   t.deepEqual(Object.values(nlp(all[3]).verbs().conjugate()[0].PresentTense), all, here + 'from-1p')
34 |   t.deepEqual(Object.values(nlp(all[4]).verbs().conjugate()[0].PresentTense), all, here + 'from-2p')
35 |   t.deepEqual(Object.values(nlp(all[5]).verbs().conjugate()[0].PresentTense), all, here + 'from-3p')
36 |   t.end()
37 | })


--------------------------------------------------------------------------------
/src/01-one/tokenize/unicode.js:
--------------------------------------------------------------------------------
 1 | //a hugely-ignorant, and widely subjective transliteration of latin, cryllic, greek unicode characters to english ascii.
 2 | //approximate visual (not semantic or phonetic) relationship between unicode and ascii characters
 3 | //http://en.wikipedia.org/wiki/List_of_Unicode_characters
 4 | //https://docs.google.com/spreadsheet/ccc?key=0Ah46z755j7cVdFRDM1A2YVpwa1ZYWlpJM2pQZ003M0E
 5 | 
 6 | 
 7 | // allowed french symbols
 8 | // ç – la cédille (the cedilla)
 9 | // é – l'accent aigu (the acute accent)
10 | // â/ê/î/ô/û – l'accent circonflexe (the circumflex)
11 | // à/è/ì/ò/ù – l'accent grave (the grave accent)
12 | // ë/ï/ü 
13 | let compact = {
14 |   '!': '¡',
15 |   '?': '¿Ɂ',
16 |   '"': '“”"❝❞',
17 |   "'": '‘‛❛❜’',
18 |   '-': '—–',
19 |   a: 'ªÁÃÄÅáãäåĀāĂăĄąǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑΔΛάαλАаѦѧӐӑӒӓƛæ',
20 |   b: 'ßþƀƁƂƃƄƅɃΒβϐϦБВЪЬвъьѢѣҌҍ',
21 |   c: '¢©ĆćĈĉĊċČčƆƇƈȻȼͻͼϲϹϽϾСсєҀҁҪҫ',
22 |   d: 'ÐĎďĐđƉƊȡƋƌ',
23 |   e: 'ĒēĔĕĖėĘęĚěƐȄȅȆȇȨȩɆɇΈΕΞΣέεξϵЀЁЕеѐёҼҽҾҿӖӗ',
24 |   f: 'ƑƒϜϝӺӻҒғſ',
25 |   g: 'ĜĝĞğĠġĢģƓǤǥǦǧǴǵ',
26 |   h: 'ĤĥĦħƕǶȞȟΉΗЂЊЋНнђћҢңҤҥҺһӉӊ',
27 |   I: 'Í',
28 |   i: 'íĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇії',
29 |   j: 'ĴĵǰȷɈɉϳЈј',
30 |   k: 'ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ',
31 |   l: 'ĹĺĻļĽľĿŀŁłƚƪǀǏǐȴȽΙӀӏ',
32 |   m: 'ΜϺϻМмӍӎ',
33 |   n: 'ÑñŃńŅņŇňŉŊŋƝƞǸǹȠȵΝΠήηϞЍИЙЛПийлпѝҊҋӅӆӢӣӤӥπ',
34 |   o: 'ÓÕÖØðóõöøŌōŎŏŐőƟƠơǑǒǪǫǬǭǾǿȌȍȎȏȪȫȬȭȮȯȰȱΌΘΟθοσόϕϘϙϬϴОФоѲѳӦӧӨөӪӫ',
35 |   p: 'ƤΡρϷϸϼРрҎҏÞ',
36 |   q: 'Ɋɋ',
37 |   r: 'ŔŕŖŗŘřƦȐȑȒȓɌɍЃГЯгяѓҐґ',
38 |   s: 'ŚśŜŝŞşŠšƧƨȘșȿЅѕ',
39 |   t: 'ŢţŤťŦŧƫƬƭƮȚțȶȾΓΤτϮТт',
40 |   u: 'µÚúŨũŪūŬŭŮůŰűŲųƯưƱƲǓǔǕǖǗǘǙǚǛǜȔȕȖȗɄΰμυϋύ',
41 |   v: 'νѴѵѶѷ',
42 |   w: 'ŴŵƜωώϖϢϣШЩшщѡѿ',
43 |   x: '×ΧχϗϰХхҲҳӼӽӾӿ',
44 |   y: 'ÝýÿŶŷŸƳƴȲȳɎɏΎΥΫγψϒϓϔЎУучўѰѱҮүҰұӮӯӰӱӲӳ',
45 |   z: 'ŹźŻżŽžƵƶȤȥɀΖ',
46 |   oe: 'œ',
47 | }
48 | //decompress data into two hashes
49 | let unicode = {}
50 | Object.keys(compact).forEach(function (k) {
51 |   compact[k].split('').forEach(function (s) {
52 |     unicode[s] = k
53 |   })
54 | })
55 | 
56 | export default unicode


--------------------------------------------------------------------------------
/data/lexicon/nouns/masculine.js:
--------------------------------------------------------------------------------
  1 | export default ['bateau', 'parapluie',
  2 | 
  3 | 
  4 |   'échelle',
  5 |   'végétale',
  6 |   'automobile',
  7 |   'file',
  8 |   'mobile',
  9 |   'année',
 10 |   'musée',
 11 |   'idée',
 12 | 
 13 |   'pratique',
 14 |   'statistique',
 15 |   'politique',
 16 |   'musique',
 17 |   'technique',
 18 | 
 19 |   'table',
 20 |   'ensemble',
 21 |   'bénéficiaire',
 22 |   'commentaire',
 23 |   'affaire',
 24 |   'partenaire',
 25 |   'gestionnaire',
 26 |   'fonctionnaire',
 27 |   'salaire',
 28 | 
 29 |   'animal',
 30 |   'taux',
 31 |   'niveau',
 32 |   'réseau',
 33 |   'bureau',
 34 |   'journal',
 35 |   'eau',
 36 | 
 37 |   'entente',
 38 |   'vente',
 39 |   'atteinte',
 40 |   'plante',
 41 |   'plainte',
 42 | 
 43 |   'jeu',
 44 |   // 'enjeux',
 45 |   'lieu',
 46 | 
 47 |   'perspective',
 48 |   'initiative',
 49 |   'élève',
 50 | 
 51 |   'objectif',
 52 |   'tarif',
 53 | 
 54 |   'avenir',
 55 |   'air',
 56 | 
 57 |   'janvier',
 58 |   'hiver',
 59 |   'mer',
 60 |   'dossier',
 61 |   'degré',
 62 | 
 63 |   'droit',
 64 |   'crédit',
 65 |   'profit',
 66 |   'endroit',
 67 | 
 68 | 
 69 | 
 70 |   'gouvernement',
 71 |   'développement',
 72 |   'financement',
 73 |   'enseignement',
 74 |   'rendement',
 75 |   'environnement',
 76 |   'établissement',
 77 |   'enregistrement',
 78 |   'document',
 79 |   'investissement',
 80 |   'moment',
 81 |   'règlement',
 82 |   'traitement',
 83 |   'engagement',
 84 |   'paiement',
 85 |   'approvisionnement',
 86 |   'changement',
 87 |   'élément',
 88 |   'équipement',
 89 |   'événement',
 90 |   'fonctionnement',
 91 |   'parlement',
 92 |   'perfectionnement',
 93 |   'agrément',
 94 |   'accroissement',
 95 |   'renforcement',
 96 |   'renouvellement',
 97 |   'recensement',
 98 |   'remboursement',
 99 |   'segment',
100 |   'recrutement',
101 |   'mouvement',
102 | 
103 |   'donnée',
104 |   'restaurant',
105 |   'espace',
106 | 
107 | 
108 | ]
109 | 


--------------------------------------------------------------------------------
/types/index.d.ts:
--------------------------------------------------------------------------------
 1 | import { Lexicon, Plugin, matchOptions, Match, Net } from './misc'
 2 | import View from './view/fr'
 3 | 
 4 | /** parse a given text */
 5 | declare function nlp(text: string, lexicon?: Lexicon): View
 6 | 
 7 | // Constructor
 8 | declare module nlp {
 9 |   /** interpret text without tagging */
10 |   export function tokenize(text: string, lexicon?: Lexicon): View
11 |   /** scan through text with minimal analysis */
12 |   export function lazy(text: string, match?: string): View
13 |   /** mix-in a compromise plugin */
14 |   export function plugin(plugin: Plugin): any
15 |   /** mix-in a compromise plugin */
16 |   export function extend(plugin: Plugin): any
17 |   /** turn a match-string into json */
18 |   export function parseMatch(match: string, opts?: matchOptions): object[]
19 |   /** grab library internals */
20 |   export function world(): object
21 |   /** grab library metadata */
22 |   export function model(): object
23 |   /** grab exposed library methods */
24 |   export function methods(): object
25 |   /** which compute functions run automatically */
26 |   export function hooks(): string[]
27 |   /**  log our decision-making for debugging */
28 |   export function verbose(toLog?: boolean | string): any
29 |   /**  current semver version of the library */
30 |   export const version: string
31 |   /** connect new tags to tagset graph */
32 |   export function addTags(tags: object): any
33 |   /** add new words to internal lexicon */
34 |   export function addWords(words: Lexicon): any
35 |   /** turn a list of words into a searchable graph */
36 |   export function buildTrie(words: string[]): object
37 |   /** compile a set of match objects to a more optimized form */
38 |   export function buildNet(matches: Match[]): Net
39 |   /** add words to the autoFill dictionary */
40 |   export function typeahead(words: Lexicon): any
41 |   /** export internal methods for plugins */
42 |   export interface TypedPlugin<Methods extends object> extends Plugin { methods: Methods }
43 | }
44 | 
45 | export default nlp
46 | 
47 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/model/suffixes.js:
--------------------------------------------------------------------------------
 1 | const rb = 'Adverb'
 2 | const nn = 'Noun'
 3 | const vb = 'Verb'
 4 | const jj = 'Adjective'
 5 | const inf = 'Infinitive'
 6 | // const pres = 'PresentTense'
 7 | 
 8 | 
 9 | export default [
10 |   null,
11 |   null,
12 |   {
13 |     //2-letter
14 |     ce: nn,//connaissance
15 |     ge: nn,
16 |     ie: nn,
17 | 
18 |     er: inf,
19 |     ir: inf,
20 |     ée: vb,
21 |     és: vb,
22 |     sé: vb,
23 |     ré: vb,
24 |     çu: vb,//conçu
25 |     ra: vb,//faudra
26 |     it: vb,//fournit
27 |     ez: vb,//consultez
28 | 
29 |     if: jj,//descriptif
30 |   },
31 |   {
32 |     //3-letter
33 |     ité: nn, //qualité
34 |     eur: nn,//directeur
35 |     ces: nn,//connaissances
36 | 
37 |     ées: vb,//énoncées
38 |     ait: vb,//devrait
39 |     era: vb,//aidera
40 |     ser: vb,//utiliser
41 |     ter: vb,//adopter
42 | 
43 |     ive: jj, //
44 |     ifs: jj, //relatifs
45 |     ile: jj, //civile
46 |     ale: jj, //nationale
47 |     ble: jj, //capable
48 |     aux: jj, //nationaux
49 |     eux: jj, //précieux
50 |     nte: jj, //différente
51 |   },
52 |   {
53 |     //4-letter
54 |     ment: rb,
55 | 
56 |     elle: jj,
57 |     bles: jj,
58 |     ales: jj,
59 |     ique: jj,
60 |     aire: jj,
61 |     ives: jj,
62 |     ntes: jj, //différentes
63 | 
64 |     sent: vb,//produisent
65 | 
66 |     sion: nn,//commission
67 |     eurs: nn,//directeurs
68 |     tion: nn,//amélioration
69 |     ance: nn,//croissance
70 |     euse: jj,//rigoureuse
71 |     ouce: jj//douce
72 |   },
73 |   {
74 |     //5-letter
75 |     tions: nn,//améliorations
76 |     ments: nn,//aliments
77 |     sions: nn,//commissions
78 | 
79 |     aient: vb,//auraient
80 |     arant: vb,//préparant
81 |     irant: vb,//inspirant
82 |     orant: vb,//élaborant
83 |     urant: vb,//assurant
84 |     trant: vb,//montrant
85 |     llant: vb,//détaillant
86 | 
87 |     ouces: jj,//douces
88 |     elles: jj,
89 |     iques: jj,
90 |     aires: jj,
91 |     euses: jj
92 |   },
93 |   {
94 |     //6-letter
95 |   },
96 |   {
97 |     //7-letter
98 |   },
99 | ]


--------------------------------------------------------------------------------
/src/03-three/nouns/api.js:
--------------------------------------------------------------------------------
 1 | export const getNth = (doc, n) => (typeof n === 'number' ? doc.eq(n) : doc)
 2 | 
 3 | // get root form of adjective
 4 | const getRoot = function (m) {
 5 |   m.compute('root')
 6 |   let str = m.text('root')
 7 |   // let isPlural = m.has('#PluralNoun')
 8 |   // if (isPlural) {
 9 |   //   return transform.adjective.fromPlural(str)
10 |   // }
11 |   return str
12 | }
13 | 
14 | const api = function (View) {
15 |   class Nouns extends View {
16 |     constructor(document, pointer, groups) {
17 |       super(document, pointer, groups)
18 |       this.viewType = 'Nouns'
19 |     }
20 |     conjugate(n) {
21 |       const methods = this.methods.two.transform.noun
22 |       return getNth(this, n).map(m => {
23 |         let str = m.text()
24 |         if (m.has('#PluralNoun')) {
25 |           return {
26 |             plural: str,
27 |             singular: methods.fromPlural(str)
28 |           }
29 |         }
30 |         if (m.has('#Uncountable')) {
31 |           return {
32 |             singular: str,
33 |             plural: str,
34 |           }
35 |         }
36 |         return {
37 |           singular: str,
38 |           plural: methods.toPlural(str)
39 |         }
40 |       }, [])
41 |     }
42 |     isPlural(n) {
43 |       return getNth(this, n).if('#PluralNoun')
44 |     }
45 |     toPlural(n) {
46 |       const methods = this.methods.two.transform.noun
47 |       return getNth(this, n).if('#Singular').map(m => {
48 |         let str = getRoot(m)
49 |         let plural = methods.toPlural(str)
50 |         return m.replaceWith(plural)
51 |       })
52 |     }
53 |     toSingular(n) {
54 |       const methods = this.methods.two.transform.noun
55 |       return getNth(this, n).if('#PluralNoun').map(m => {
56 |         let str = getRoot(m)
57 |         let singular = methods.fromPlural(str)
58 |         return m.replaceWith(singular)
59 |       })
60 |     }
61 |   }
62 | 
63 |   View.prototype.nouns = function (n) {
64 |     let m = this.match('#Noun')
65 |     m = getNth(m, n)
66 |     return new Nouns(this.document, m.pointer)
67 |   }
68 | }
69 | export default api


--------------------------------------------------------------------------------
/src/02-two/tagset/tags/misc.js:
--------------------------------------------------------------------------------
 1 | const anything = ['Noun', 'Verb', 'Adjective', 'Adverb', 'Value', 'QuestionWord']
 2 | 
 3 | export default {
 4 |   Adjective: {
 5 |     not: ['Noun', 'Verb', 'Adverb', 'Value'],
 6 |   },
 7 |   Comparable: {
 8 |     is: 'Adjective',
 9 |   },
10 |   Comparative: {
11 |     is: 'Adjective',
12 |   },
13 |   Superlative: {
14 |     is: 'Adjective',
15 |     not: ['Comparative'],
16 |   },
17 |   MaleAdjective: {
18 |     is: 'Adjective',
19 |     not: ['FemaleAdjective'],
20 |   },
21 |   FemaleAdjective: {
22 |     is: 'Adjective',
23 |     not: ['MaleAdjective'],
24 |   },
25 |   PluralAdjective: {
26 |     is: 'Adjective',
27 |   },
28 |   NumberRange: {},
29 |   Adverb: {
30 |     not: ['Noun', 'Verb', 'Adjective', 'Value'],
31 |   },
32 | 
33 |   Determiner: {
34 |     not: ['Noun', 'Verb', 'Adjective', 'Adverb', 'QuestionWord', 'Conjunction', 'Preposition'], //allow 'a' to be a Determiner/Value
35 |   },
36 |   Conjunction: {
37 |     not: anything,
38 |   },
39 |   Preposition: {
40 |     not: ['Noun', 'Verb', 'Adjective', 'Adverb', 'QuestionWord'],
41 |   },
42 |   QuestionWord: {
43 |     not: ['Determiner'],
44 |   },
45 |   Currency: {
46 |     is: 'Noun',
47 |   },
48 |   Expression: {
49 |     not: ['Noun', 'Adjective', 'Verb', 'Adverb'],
50 |   },
51 |   Abbreviation: {},
52 |   Url: {
53 |     not: ['HashTag', 'PhoneNumber', 'Verb', 'Adjective', 'Value', 'AtMention', 'Email'],
54 |   },
55 |   PhoneNumber: {
56 |     not: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention', 'Email'],
57 |   },
58 |   HashTag: {},
59 |   AtMention: {
60 |     is: 'Noun',
61 |     not: ['HashTag', 'Email'],
62 |   },
63 |   Emoji: {
64 |     not: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention'],
65 |   },
66 |   Emoticon: {
67 |     not: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention'],
68 |   },
69 |   Email: {
70 |     not: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention'],
71 |   },
72 |   Acronym: {
73 |     not: ['PluralNoun', 'RomanNumeral'],
74 |   },
75 |   Negative: {
76 |     not: ['Noun', 'Adjective', 'Value'],
77 |   },
78 |   Condition: {
79 |     not: ['Verb', 'Adjective', 'Noun', 'Value'],
80 |   },
81 | }
82 | 


--------------------------------------------------------------------------------
/scripts/pack.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import fs from 'fs'
 3 | import { pack } from 'efrt'
 4 | import { learn, compress } from 'suffix-thumb'
 5 | import lexicon from '../data/lexicon/index.js'
 6 | import models from '../data/models/index.js'
 7 | // import switches from '../lib/switches/index.js'
 8 | // import senses from '../lib/senses/index.js'
 9 | 
10 | const steps = [
11 |   {
12 |     label: 'lexicon',
13 |     path: './src/01-one/lexicon/model/_data.js',
14 |     compress: function () {
15 |       let packed = {}
16 |       //turn them into a series of flat-arrays
17 |       Object.keys(lexicon).forEach(word => {
18 |         let tags = lexicon[word]
19 |         if (typeof tags === 'string') {
20 |           tags = [tags]
21 |         }
22 |         tags.forEach(tag => {
23 |           packed[tag] = packed[tag] || []
24 |           packed[tag].push(word)
25 |         })
26 |       })
27 |       //pack each array into a tiny string
28 |       Object.keys(packed).forEach(tag => {
29 |         packed[tag] = pack(packed[tag])
30 |       })
31 |       return packed
32 |     },
33 |   },
34 |   {
35 |     label: 'models',
36 |     path: './src/01-one/lexicon/methods/_data.js',
37 |     compress: function () {
38 |       let packed = {}
39 |       Object.keys(models).forEach(k => {
40 |         packed[k] = {}
41 |         Object.keys(models[k]).forEach(form => {
42 |           let pairs = models[k][form]
43 |           console.log(k, form)
44 |           packed[k][form] = learn(pairs)
45 |           packed[k][form] = compress(packed[k][form])
46 |         })
47 |       })
48 |       return packed
49 |     },
50 |   }
51 | ]
52 | 
53 | // run through all our steps
54 | steps.forEach(obj => {
55 |   console.log(`\n 🕑  - packing ${obj.label}..`)
56 |   const packed = obj.compress()
57 | 
58 |   //write it to a file in ./src
59 |   const banner = `// generated in ./lib/${obj.label}\n`
60 |   fs.writeFileSync(obj.path, banner + 'export default ' + JSON.stringify(packed, null, 2), 'utf8')
61 | 
62 |   //get filesize
63 |   const stats = fs.statSync(obj.path)
64 |   let size = (stats.size / 1000.0).toFixed(1)
65 |   console.log(`       - ${obj.label} is  ` + size + 'k\n')
66 | })
67 | 


--------------------------------------------------------------------------------
/src/01-one/lexicon/model/misc.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   // copulas (incomplete)
 3 |   es: ['Copula', 'PresentTense'],
 4 |   est: ['Copula', 'PresentTense'],
 5 |   suis: ['Copula', 'PresentTense'],
 6 |   sommes: ['Copula', 'PresentTense'],
 7 |   etes: ['Copula', 'PresentTense'],
 8 |   sont: ['Copula', 'PresentTense'],
 9 | 
10 |   ete: ['Copula', 'PastTense'],
11 |   etais: ['Copula', 'PastTense'],
12 |   etions: ['Copula', 'PastTense'],
13 | 
14 |   serons: ['Copula', 'FutureTense'],
15 |   seront: ['Copula', 'FutureTense'],
16 |   serai: ['Copula', 'FutureTense'],
17 | 
18 |   cent: ['Multiple', 'Cardinal'],
19 |   mille: ['Multiple', 'Cardinal'],
20 |   million: ['Multiple', 'Cardinal'],
21 |   milliard: ['Multiple', 'Cardinal'],
22 |   quadrillion: ['Multiple', 'Cardinal'],
23 |   centième: ['Multiple', 'Ordinal'],
24 |   millième: ['Multiple', 'Ordinal'],
25 |   millionième: ['Multiple', 'Ordinal'],
26 |   milliardième: ['Multiple', 'Ordinal'],
27 |   billionième: ['Multiple', 'Ordinal'],
28 |   trillionième: ['Multiple', 'Ordinal'],
29 |   // plural numbers
30 |   septs: ['TextValue', 'Cardinal'],
31 | 
32 |   cents: ['Multiple', 'Cardinal'],
33 |   milles: ['Multiple', 'Cardinal'],
34 |   millions: ['Multiple', 'Cardinal'],
35 |   milliards: ['Multiple', 'Cardinal'],
36 | 
37 |   êtes: ['Copula', 'PresentTense'],
38 |   étions: ['Copula', 'PresentTense'],
39 |   serez: ['Copula', 'PresentTense'],
40 |   été: ['Copula'],
41 |   fus: ['Copula', 'PastTense'],
42 |   fut: ['Copula', 'PastTense'],
43 |   fûmes: ['Copula', 'PastTense'],
44 |   fûtes: ['Copula', 'PastTense'],
45 |   furent: ['Copula', 'PastTense'],
46 |   fusse: ['Copula', 'PastTense'],
47 |   fusses: ['Copula', 'PastTense'],
48 |   fût: ['Copula', 'PastTense'],
49 |   fussions: ['Copula', 'PastTense'],
50 |   fussiez: ['Copula', 'PastTense'],
51 |   fussent: ['Copula', 'PastTense'],
52 |   serais: ['Copula', 'PresentTense'],
53 |   serait: ['Copula', 'PresentTense'],
54 |   serions: ['Copula', 'PresentTense'],
55 |   seriez: ['Copula', 'PresentTense'],
56 |   seraient: ['Copula', 'PresentTense'],
57 |   sois: ['Copula', 'PresentTense'],
58 |   soyons: ['Copula', 'PresentTense'],
59 |   soyez: ['Copula', 'PresentTense'],
60 |   être: ['Copula', 'PresentTense'],
61 | 
62 | 
63 | 
64 | }


--------------------------------------------------------------------------------
/src/03-three/numbers/format/toText.js:
--------------------------------------------------------------------------------
 1 | import data from '../data.js'
 2 | let ones = data.ones.reverse()
 3 | let tens = data.tens.reverse()
 4 | 
 5 | let multiples = [
 6 |   [1e12, 'mille milliard'],
 7 |   [1e11, 'cent milliard'],
 8 |   [1e9, 'milliard'],
 9 |   [1e8, 'cent million'],
10 |   [1e6, 'million'],
11 |   [100000, 'cent mille'],
12 |   [1000, 'mille'],
13 |   [100, 'cent'],
14 |   [1, 'one'],
15 | ]
16 | 
17 | //turn number into an array of magnitudes, like [[5, million], [2, hundred]]
18 | const getMagnitudes = function (num) {
19 |   let working = num
20 |   let have = []
21 |   multiples.forEach(a => {
22 |     if (num >= a[0]) {
23 |       let howmany = Math.floor(working / a[0])
24 |       working -= howmany * a[0]
25 |       if (howmany) {
26 |         have.push({
27 |           unit: a[1],
28 |           num: howmany,
29 |         })
30 |       }
31 |     }
32 |   })
33 |   return have
34 | }
35 | 
36 | const twoDigit = function (num) {
37 |   let words = []
38 |   // 20-90
39 |   for (let i = 0; i < tens.length; i += 1) {
40 |     if (tens[i][0] <= num) {
41 |       words.push(tens[i][1])
42 |       num -= tens[i][0]
43 |       break
44 |     }
45 |   }
46 |   if (num === 0) {
47 |     return words
48 |   }
49 |   // 0-19
50 |   for (let i = 0; i < ones.length; i += 1) {
51 |     if (ones[i][0] <= num) {
52 |       // 'et un'
53 |       if (words.length && ones[i][1] === 'un') {
54 |         words.push('et')
55 |       }
56 |       words.push(ones[i][1])
57 |       num -= ones[i][0]
58 |       break
59 |     }
60 |   }
61 |   return words
62 | }
63 | 
64 | // turn a number like 80 into words like 'quatre vingt'
65 | const toText = function (num) {
66 |   if (num === 0) {
67 |     return ['zero']
68 |   }
69 |   let words = []
70 |   if (num < 0) {
71 |     words.push('moins')
72 |     num = Math.abs(num)
73 |   }
74 |   // handle multiples
75 |   let found = getMagnitudes(num)
76 |   found.forEach(obj => {
77 |     let res = twoDigit(obj.num)
78 |     if (obj.num === 1 && obj.unit !== 'one') {
79 |       // don't add reduntant 'un cent'
80 |     } else {
81 |       words = words.concat(res)
82 |     }
83 |     if (obj.unit !== 'one') {
84 |       words.push(obj.unit)
85 |     }
86 |   })
87 |   return words
88 | }
89 | export default toText


--------------------------------------------------------------------------------
/src/01-one/lexicon/compute/root.js:
--------------------------------------------------------------------------------
 1 | const verbForm = function (term) {
 2 |   let want = [
 3 |     'FirstPerson',
 4 |     'SecondPerson',
 5 |     'ThirdPerson',
 6 |     'FirstPersonPlural',
 7 |     'SecondPersonPlural',
 8 |     'ThirdPersonPlural',
 9 |   ]
10 |   return want.find(tag => term.tags.has(tag))
11 | }
12 | 
13 | const root = function (view) {
14 |   const transform = view.world.methods.two.transform
15 |   view.docs.forEach(terms => {
16 |     terms.forEach(term => {
17 |       let str = term.implicit || term.normal || term.text
18 |       // nouns -> singular masculine form
19 |       if (term.tags.has('Noun') && !term.tags.has('Pronoun')) {
20 |         let isPlural = term.tags.has('PluralNoun')
21 |         // let isFemale = term.tags.has('FemaleNoun')
22 |         if (isPlural) {
23 |           term.root = transform.noun.fromPlural(str)
24 |         }
25 |       }
26 |       // adjectives -> singular masculine form
27 |       if (term.tags.has('Adjective')) {
28 |         let isPlural = term.tags.has('PluralAdjective')
29 |         let isFemale = term.tags.has('FemaleAdjective')
30 |         if (isPlural && isFemale) {
31 |           term.root = transform.adjective.fromFemalePlural(str)
32 |         } else if (isFemale) {
33 |           term.root = transform.adjective.fromFemale(str)
34 |         } else if (isPlural) {
35 |           term.root = transform.adjective.fromPlural(str)
36 |         }
37 |       }
38 |       // verbs -> infinitive form
39 |       if (term.tags.has('Verb')) {
40 |         if (term.tags.has('PresentTense')) {
41 |           let form = verbForm(term)
42 |           term.root = transform.verb.fromPresentTense(str, form)
43 |         }
44 |         if (term.tags.has('FutureTense')) {
45 |           let form = verbForm(term)
46 |           term.root = transform.verb.fromFutureTense(str, form)
47 |         }
48 |         if (term.tags.has('Passive')) {
49 |           let form = verbForm(term)
50 |           term.root = transform.verb.fromPassive(str, form)
51 |         } else if (term.tags.has('PastTense')) {
52 |           let form = verbForm(term)
53 |           term.root = transform.verb.fromPastParticiple(str, form)
54 |         }
55 |         //  fromImperfectTense, fromPastParticiple
56 |       }
57 |     })
58 |   })
59 | }
60 | export default root


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/index.js:
--------------------------------------------------------------------------------
 1 | // 1st pass
 2 | import checkRegex from './1st-pass/regex.js'
 3 | import titleCase from './1st-pass/titlecase.js'
 4 | import checkYear from './1st-pass/year.js'
 5 | // 2nd pass
 6 | import acronym from './2nd-pass/acronym.js'
 7 | import neighbours from './2nd-pass/neighbours.js'
 8 | import nounFallback from './2nd-pass/noun-fallback.js'
 9 | import suffixCheck from './2nd-pass/suffix-lookup.js'
10 | // 3rd pass
11 | import nounGender from './3rd-pass/noun-gender.js'
12 | import nounPlurals from './3rd-pass/noun-plurals.js'
13 | import adjPlurals from './3rd-pass/adj-plurals.js'
14 | import adjGender from './3rd-pass/adj-gender.js'
15 | import verbTense from './3rd-pass/verb-tense.js'
16 | import verbForm from './3rd-pass/verb-form.js'
17 | import numberTypes from './3rd-pass/number-types.js'
18 | import fixContractions from './3rd-pass/fix-contractions.js'
19 | 
20 | // these methods don't care about word-neighbours
21 | const firstPass = function (terms, world) {
22 |   for (let i = 0; i < terms.length; i += 1) {
23 |     //  is it titlecased?
24 |     let found = titleCase(terms, i, world)
25 |     // try look-like rules
26 |     found = found || checkRegex(terms, i, world)
27 |     // turn '1993' into a year
28 |     checkYear(terms, i, world)
29 |   }
30 | }
31 | const secondPass = function (terms, world) {
32 |   for (let i = 0; i < terms.length; i += 1) {
33 |     let found = acronym(terms, i, world)
34 |     found = found || suffixCheck(terms, i, world)
35 |     found = found || neighbours(terms, i, world)
36 |     found = found || nounFallback(terms, i, world)
37 |   }
38 | }
39 | const thirdPass = function (terms, world) {
40 |   for (let i = 0; i < terms.length; i += 1) {
41 |     nounGender(terms, i, world)
42 |     nounPlurals(terms, i, world)
43 |     adjPlurals(terms, i, world)
44 |     adjGender(terms, i, world)
45 |     verbTense(terms, i, world)
46 |     verbForm(terms, i, world)
47 |     numberTypes(terms, i, world)
48 |   }
49 |   // (4th pass)
50 |   for (let i = 0; i < terms.length; i += 1) {
51 |     fixContractions(terms, i, world)
52 |   }
53 | }
54 | 
55 | 
56 | const tagger = function (view) {
57 |   let world = view.world
58 |   view.docs.forEach(terms => {
59 |     firstPass(terms, world)
60 |     secondPass(terms, world)
61 |     thirdPass(terms, world)
62 |   })
63 |   return view
64 | }
65 | export default tagger


--------------------------------------------------------------------------------
/types/misc.ts:
--------------------------------------------------------------------------------
 1 | export type Document = Term[][]
 2 | 
 3 | export type Pointer = [n?: number, start?: number, end?: number, startId?: string, endId?: string]
 4 | 
 5 | export type outMethods = 'text' | 'normal' | 'offset' | 'terms' | 'topk' | 'json' | 'tags' | 'array' | 'debug'
 6 | 
 7 | export type Groups = object
 8 | 
 9 | export interface Term {
10 |   text: string,
11 |   pre: string,
12 |   post: string,
13 |   normal: string,
14 | 
15 |   // in /two
16 |   tags?: Set<string>,
17 |   index?: [n?: number, start?: number],
18 |   id?: string,
19 |   chunk?: string,
20 |   dirty?: boolean
21 | 
22 |   // other things you may find...
23 |   syllables?: string[],
24 | }
25 | 
26 | // possible values to .json()
27 | export interface JsonProps {
28 |   /**  a perfect copy of the input text */
29 |   text?: boolean
30 |   /** normalized whitespace, case, unicode, punctuation */
31 |   normal?: boolean
32 |   /** lowercase, trimmed, contractions expanded. */
33 |   reduced?: boolean
34 |   /** cleanup whitespace */
35 |   trim?: boolean
36 |   /** character-position where this begins */
37 |   offset?: boolean
38 |   /** frequency of this match in the document */
39 |   count?: boolean
40 |   /**  remove duplicate results*/
41 |   unique?: boolean
42 |   /** starting term # in document */
43 |   index?: boolean
44 |   /** options for each term */
45 |   terms?: {
46 |     text?: boolean
47 |     normal?: boolean
48 |     clean?: boolean
49 |     implicit?: boolean
50 |     tags?: boolean
51 |     whitespace?: boolean
52 |     id?: boolean
53 |     offset?: boolean
54 |     bestTag?: boolean
55 |   }
56 | }
57 | 
58 | // a key-value object of words, terms
59 | export interface Lexicon {
60 |   [key: string]: string
61 | }
62 | 
63 | export interface Plugin {
64 |   methods?: object,
65 |   model?: object,
66 |   compute?: object,
67 |   hooks?: string[],
68 |   tags?: object,
69 |   words?: object,
70 |   lib?: () => object,
71 |   api?: (fn: (view: any) => {}) => void,  //should be View
72 |   mutate?: (fn: (world: object) => {}) => void,
73 | }
74 | 
75 | export interface matchOptions {
76 |   fuzzy?: number,
77 |   caseSensitive?: boolean,
78 | }
79 | 
80 | export interface Match {
81 |   match: string,
82 |   tag?: string | string[],
83 |   unTag?: string | string[],
84 |   group?: string | number,
85 |   reason?: string,
86 | }
87 | 
88 | export interface Net {
89 |   hooks: object,
90 |   always?: any,
91 |   isNet: boolean
92 | }


--------------------------------------------------------------------------------
/src/02-two/tagset/tags/nouns.js:
--------------------------------------------------------------------------------
  1 | const entity = ['Person', 'Place', 'Organization']
  2 | 
  3 | export default {
  4 |   Noun: {
  5 |     not: ['Verb', 'Adjective', 'Adverb', 'Value', 'Determiner'],
  6 |   },
  7 |   Singular: {
  8 |     is: 'Noun',
  9 |     not: ['PluralNoun'],
 10 |   },
 11 |   ProperNoun: {
 12 |     is: 'Noun',
 13 |   },
 14 |   Person: {
 15 |     is: 'Singular',
 16 |     also: ['ProperNoun'],
 17 |     not: ['Place', 'Organization', 'Date'],
 18 |   },
 19 |   FirstName: {
 20 |     is: 'Person',
 21 |   },
 22 |   MaleName: {
 23 |     is: 'FirstName',
 24 |     not: ['FemaleName', 'LastName'],
 25 |   },
 26 |   FemaleName: {
 27 |     is: 'FirstName',
 28 |     not: ['MaleName', 'LastName'],
 29 |   },
 30 |   LastName: {
 31 |     is: 'Person',
 32 |     not: ['FirstName'],
 33 |   },
 34 |   Honorific: {
 35 |     is: 'Noun',
 36 |     not: ['FirstName', 'LastName', 'Value'],
 37 |   },
 38 |   Place: {
 39 |     is: 'Singular',
 40 |     not: ['Person', 'Organization'],
 41 |   },
 42 |   Country: {
 43 |     is: 'Place',
 44 |     also: ['ProperNoun'],
 45 |     not: ['City'],
 46 |   },
 47 |   City: {
 48 |     is: 'Place',
 49 |     also: ['ProperNoun'],
 50 |     not: ['Country'],
 51 |   },
 52 |   Region: {
 53 |     is: 'Place',
 54 |     also: ['ProperNoun'],
 55 |   },
 56 |   Address: {
 57 |     // is: 'Place',
 58 |   },
 59 |   Organization: {
 60 |     is: 'ProperNoun',
 61 |     not: ['Person', 'Place'],
 62 |   },
 63 |   SportsTeam: {
 64 |     is: 'Organization',
 65 |   },
 66 |   School: {
 67 |     is: 'Organization',
 68 |   },
 69 |   Company: {
 70 |     is: 'Organization',
 71 |   },
 72 |   PluralNoun: {
 73 |     is: 'Noun',
 74 |     not: ['Singular'],
 75 |   },
 76 |   Uncountable: {
 77 |     is: 'Noun',
 78 |   },
 79 |   Pronoun: {
 80 |     is: 'Noun',
 81 |     not: entity,
 82 |   },
 83 |   Actor: {
 84 |     is: 'Noun',
 85 |     not: entity,
 86 |   },
 87 |   Activity: {
 88 |     is: 'Noun',
 89 |     not: ['Person', 'Place'],
 90 |   },
 91 |   Unit: {
 92 |     is: 'Noun',
 93 |     not: entity,
 94 |   },
 95 |   Demonym: {
 96 |     is: 'Noun',
 97 |     also: ['ProperNoun'],
 98 |     not: entity,
 99 |   },
100 |   Possessive: {
101 |     is: 'Noun',
102 |   },
103 |   // german genders
104 |   MaleNoun: {
105 |     is: 'Noun',
106 |     not: ['FemaleNoun'],
107 |   },
108 |   FemaleNoun: {
109 |     is: 'Noun',
110 |     not: ['MaleNoun'],
111 |   },
112 | }
113 | 


--------------------------------------------------------------------------------
/data/lexicon/misc/adverbs.js:
--------------------------------------------------------------------------------
  1 | // all '-ment' words are tagged by suffix
  2 | export default [
  3 |   'pas',
  4 |   // 'plus',
  5 |   'ainsi',
  6 |   'lors',
  7 |   'alors',
  8 |   'aussi',
  9 |   'donc',
 10 |   'tres',
 11 |   'très',
 12 |   'deja',
 13 |   'encore',
 14 |   // 'tout',
 15 |   'bien',
 16 |   // 'moins',
 17 |   'non',
 18 |   // 'hier',
 19 |   "jusqu'",
 20 |   'meme',
 21 |   // 'peu',
 22 |   'toujours',
 23 |   'cependant',
 24 |   'ailleurs',
 25 |   'toutefois',
 26 |   // 'ici',
 27 |   'environ',
 28 |   'quant',
 29 |   'que',
 30 |   'tandis',
 31 |   'beaucoup',
 32 |   'outre',
 33 |   'qu',
 34 |   'ensuite',
 35 |   'tant',
 36 |   'jamais',
 37 |   'enfin',
 38 |   'tard',
 39 |   'desormais',
 40 |   // 'maintenant',
 41 |   'trop',
 42 |   'autant',
 43 |   'loin',
 44 |   'pourtant',
 45 |   'surtout',
 46 |   'autour',
 47 |   'auparavant',
 48 |   'neanmoins',
 49 |   'assez',
 50 |   'tot',
 51 |   'mieux',
 52 |   'souvent',
 53 |   'plutot',
 54 |   'demain',
 55 |   'pres',
 56 |   'longtemps',
 57 |   'presque',
 58 |   'peut-etre',
 59 |   // 'mal',
 60 |   'avant',
 61 |   'partout',
 62 |   'davantage',
 63 |   'juste',
 64 |   'vite',
 65 |   'puis',
 66 |   'parfois',
 67 |   'guere',
 68 |   'au dela',
 69 |   'oui',
 70 |   'au dessus',
 71 |   'ores',
 72 |   // 'dehors',
 73 |   'si',
 74 |   'ci',
 75 |   'bientot',
 76 |   // 'ensemble',
 77 |   'apres',
 78 |   'depuis',
 79 |   'quand',
 80 |   'quelque',
 81 |   'aussitôt',
 82 |   'quasi',
 83 |   // 'fort',
 84 |   'vis a vis',
 85 |   'dessous',
 86 |   'voire',
 87 |   'certes',
 88 |   'jusque la',
 89 |   'ci dessus',
 90 |   // 'matin',
 91 |   'ci dessous',
 92 |   'contre',
 93 |   'autrefois',
 94 |   'combien',
 95 |   'comme',
 96 |   'sous',
 97 |   'inter',
 98 |   'la bas',
 99 |   'dorenavant',
100 |   'dessus',
101 |   'sans',
102 |   'alias',
103 |   'bel',
104 |   'jadis',
105 |   // 'rien',
106 |   'etc',
107 |   'soit',
108 |   'entre temps',
109 |   'avant hier',
110 |   "presqu'",
111 |   // 'point',
112 |   'la dessus',
113 |   'mais',
114 |   'debout',
115 |   'ultra',
116 |   'bref',
117 |   'naguere',
118 |   'la-dedans',
119 |   'deca',
120 |   'ca',
121 |   'soi-disant',
122 |   'devant',
123 |   'fi',
124 |   'dedans',
125 |   'deja',
126 |   'idem',
127 |   'sic',
128 |   'sitot',
129 |   'derriere',
130 |   'haut',
131 |   'outre mer',
132 |   'crescendo',
133 |   'pourquoi',
134 |   'primo',
135 |   'secundo',
136 |   'tertio',
137 |   'quelqu',
138 |   'ferme',
139 |   'au-dessous',
140 |   'pele mele',
141 |   'sident',
142 | ]
143 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/2nd-pass/acronym.js:
--------------------------------------------------------------------------------
 1 | const oneLetterAcronym = /^[A-Z]('s|,)?$/
 2 | const isUpperCase = /^[A-Z-]+$/
 3 | const periodAcronym = /([A-Z]\.)+[A-Z]?,?$/
 4 | const noPeriodAcronym = /[A-Z]{2,}('s|,)?$/
 5 | const lowerCaseAcronym = /([a-z]\.)+[a-z]\.?$/
 6 | 
 7 | const oneLetterWord = {
 8 |   I: true,
 9 |   A: true,
10 | }
11 | // just uppercase acronyms, no periods - 'UNOCHA'
12 | const isNoPeriodAcronym = function (term, model) {
13 |   let str = term.text
14 |   // ensure it's all upper-case
15 |   if (isUpperCase.test(str) === false) {
16 |     return false
17 |   }
18 |   // long capitalized words are not usually either
19 |   if (str.length > 5) {
20 |     return false
21 |   }
22 |   // 'I' is not a acronym
23 |   if (oneLetterWord.hasOwnProperty(str)) {
24 |     return false
25 |   }
26 |   // known-words, like 'PIZZA' is not an acronym.
27 |   if (model.one.lexicon.hasOwnProperty(term.normal)) {
28 |     return false
29 |   }
30 |   //like N.D.A
31 |   if (periodAcronym.test(str) === true) {
32 |     return true
33 |   }
34 |   //like c.e.o
35 |   if (lowerCaseAcronym.test(str) === true) {
36 |     return true
37 |   }
38 |   //like 'F.'
39 |   if (oneLetterAcronym.test(str) === true) {
40 |     return true
41 |   }
42 |   //like NDA
43 |   if (noPeriodAcronym.test(str) === true) {
44 |     return true
45 |   }
46 |   return false
47 | }
48 | 
49 | const isAcronym = function (terms, i, world) {
50 |   let setTag = world.methods.one.setTag
51 |   let term = terms[i]
52 |   //these are not acronyms
53 |   if (term.tags.has('RomanNumeral') || term.tags.has('Acronym')) {
54 |     return null
55 |   }
56 |   //non-period ones are harder
57 |   if (isNoPeriodAcronym(term, world.model)) {
58 |     term.tags.clear()
59 |     setTag([term], ['Acronym', 'Noun'], world, false, '3-no-period-acronym')
60 |     return true
61 |   }
62 |   // one-letter acronyms
63 |   if (!oneLetterWord.hasOwnProperty(term.text) && oneLetterAcronym.test(term.text)) {
64 |     term.tags.clear()
65 |     setTag([term], ['Acronym', 'Noun'], world, false, '3-one-letter-acronym')
66 |     return true
67 |   }
68 |   //if it's a very-short organization?
69 |   if (term.tags.has('Organization') && term.text.length <= 3) {
70 |     setTag([term], 'Acronym', world, false, '3-org-acronym')
71 |     return true
72 |   }
73 |   // upper-case org, like UNESCO
74 |   if (term.tags.has('Organization') && isUpperCase.test(term.text) && term.text.length <= 6) {
75 |     setTag([term], 'Acronym', world, false, '3-titlecase-acronym')
76 |     return true
77 |   }
78 |   return null
79 | }
80 | export default isAcronym
81 | 


--------------------------------------------------------------------------------
/src/02-two/preTagger/model/regex/regex-numbers.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 | 
 3 |   [/^[012]?[0-9]h$/i, 'Time', '04h'],
 4 |   [/^[012]?[0-9]h[0-9]{2}$/i, 'Time', '23h30'],
 5 |   [/^'[0-9]{2}$/, 'Year'],
 6 |   // times
 7 |   [/^[012]?[0-9](:[0-5][0-9])(:[0-5][0-9])$/, 'Time', '3:12:31'],
 8 |   [/^[012]?[0-9](:[0-5][0-9])?(:[0-5][0-9])$/, 'Time', '1:12'],
 9 | 
10 |   // iso-dates
11 |   [/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}/i, 'Date', 'iso-date'],
12 |   [/^[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,4}$/, 'Date', 'iso-dash'],
13 |   [/^[0-9]{1,4}\/[0-9]{1,2}\/[0-9]{1,4}$/, 'Date', 'iso-slash'],
14 |   [/^[0-9]{1,4}\.[0-9]{1,2}\.[0-9]{1,4}$/, 'Date', 'iso-dot'],
15 |   [/^[0-9]{1,4}-[a-z]{2,9}-[0-9]{1,4}$/i, 'Date', '12-dec-2019'],
16 | 
17 |   // timezones
18 |   [/^utc ?[+-]?[0-9]+$/, 'Timezone', 'utc-9'],
19 |   [/^(gmt|utc)[+-][0-9]{1,2}$/i, 'Timezone', 'gmt-3'],
20 | 
21 |   //phone numbers
22 |   [/^[0-9]{3}-[0-9]{4}$/, 'PhoneNumber', '421-0029'],
23 |   [/^(\+?[0-9][ -])?[0-9]{3}[ -]?[0-9]{3}-[0-9]{4}$/, 'PhoneNumber', '1-800-'],
24 | 
25 | 
26 |   //money
27 |   //like $5.30
28 |   [
29 |     /^[-+]?[$\xA2-\xA5\u058F\u060B\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20BD\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6][-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?([kmb]|bn)?\+?$/,
30 |     ['Money', 'Value'],
31 |     '$5.30',
32 |   ],
33 |   //like 5.30$
34 |   [
35 |     /^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?[$\xA2-\xA5\u058F\u060B\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20BD\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6]\+?$/,
36 |     ['Money', 'Value'],
37 |     '5.30£',
38 |   ],
39 |   //like
40 |   [/^[-+]?[$£]?[0-9]([0-9,.])+(usd|eur|jpy|gbp|cad|aud|chf|cny|hkd|nzd|kr|rub)$/i, ['Money', 'Value'], '$400usd'],
41 | 
42 |   //numbers
43 |   // 50 | -50 | 3.23  | 5,999.0  | 10+
44 |   [/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?\+?$/, ['Cardinal', 'NumericValue'], '5,999'],
45 |   [/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?(e|er)$/, ['Ordinal', 'NumericValue'], '53rd'],
46 |   // .73th
47 |   [/^\.[0-9]+\+?$/, ['Cardinal', 'NumericValue'], '.73th'],
48 |   //percent
49 |   [/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?%\+?$/, ['Percent', 'Cardinal', 'NumericValue'], '-4%'],
50 |   [/^\.[0-9]+%$/, ['Percent', 'Cardinal', 'NumericValue'], '.3%'],
51 |   //fraction
52 |   [/^[0-9]{1,4}\/[0-9]{1,4}(e|er)?s?$/, ['Fraction', 'NumericValue'], '2/3rds'],
53 |   //range
54 |   [/^[0-9.]{1,3}[a-z]{0,2}[-–—][0-9]{1,3}[a-z]{0,2}$/, ['Value', 'NumberRange'], '3-4'],
55 |   //time-range
56 |   [/^[0-9]{1,2}(:[0-9][0-9])?(am|pm)? ?[-–—] ?[0-9]{1,2}(:[0-9][0-9])?(am|pm)$/, ['Time', 'NumberRange'], '3-4pm'],
57 |   //with unit
58 |   [/^[0-9.]+([a-z]{1,4})$/, 'Value', '9km'],
59 | ]


--------------------------------------------------------------------------------
/src/01-one/lexicon/model/lexicon.js:
--------------------------------------------------------------------------------
 1 | import lexData from './_data.js'
 2 | import { unpack } from 'efrt'
 3 | import transform from '../methods/index.js'
 4 | import misc from './misc.js'
 5 | 
 6 | const tagMap = {
 7 |   first: 'FirstPerson',
 8 |   second: 'SecondPerson',
 9 |   third: 'ThirdPerson',
10 |   firstPlural: 'FirstPersonPlural',
11 |   secondPlural: 'SecondPersonPlural',
12 |   thirdPlural: 'ThirdPersonPlural',
13 | }
14 | 
15 | let words = {}
16 | Object.keys(lexData).forEach(tag => {
17 |   let wordsObj = unpack(lexData[tag])
18 |   Object.keys(wordsObj).forEach(w => {
19 |     words[w] = tag
20 | 
21 |     // expand
22 |     if (tag === 'MaleAdjective') {
23 |       let res = transform.adjective.conjugate(w)
24 |       words[res.female] = words[res.female] || 'FemaleAdjective'
25 |       words[res.plural] = words[res.plural] || 'MaleAdjective'
26 |       words[res.femalePlural] = words[res.femalePlural] || 'FemaleAdjective'
27 |     }
28 |     if (tag === 'Cardinal') {
29 |       words[w] = ['TextValue', 'Cardinal']
30 |     }
31 |     if (tag === 'Noun' || tag === 'MaleNoun' || tag === 'FemaleNoun') {
32 |       words[w] = [tag, 'Singular']
33 |       let plur = transform.noun.toPlural(w)
34 |       words[plur] = words[plur] || ['Noun', 'Plural']
35 |     }
36 |     if (tag === 'Ordinal') {
37 |       words[w] = ['TextValue', 'Ordinal']
38 |       let norm = w.replace(/è/, 'e')
39 |       words[norm] = words[norm] || ['TextValue', 'Ordinal']
40 |     }
41 |     if (tag === 'MaleNoun') {
42 |       let p = transform.noun.toPlural(w)
43 |       words[p] = words[p] || 'PluralNoun'
44 |     }
45 |     if (tag === 'Infinitive') {
46 |       // do future-tense
47 |       let res = transform.verb.toFutureTense(w)
48 |       Object.keys(res).forEach(k => {
49 |         if (!words[res[k]]) {
50 |           words[res[k]] = words[res[k]] || [tagMap[k], 'FutureTense']
51 |         }
52 |       })
53 |       // do present-tense
54 |       res = transform.verb.toPresentTense(w)
55 |       Object.keys(res).forEach(k => {
56 |         if (!words[res[k]]) {
57 |           words[res[k]] = words[res[k]] || [tagMap[k], 'PresentTense']
58 |         }
59 |       })
60 |       // do imperfect mood
61 |       res = transform.verb.toImperfect(w)
62 |       Object.keys(res).forEach(k => words[res[k]] = words[res[k]] || 'Verb')
63 |       // past-participle
64 |       let out = transform.verb.toPastParticiple(w)
65 |       words[out] = words[out] || 'PastParticiple'
66 |     }
67 |   })
68 | })
69 | 
70 | let lexicon = Object.assign({}, words, misc)
71 | // console.log(Object.keys(lexicon).length.toLocaleString(), 'words')
72 | // console.log(lexicon['ralentir'])
73 | export default lexicon


--------------------------------------------------------------------------------
/tests/numbers/ordinal.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | let here = '[number ordinal] '
 4 | 
 5 | 
 6 | let arr = [
 7 |   [0, 'zero', 'zeroième'],
 8 |   // [1, 'un', 'unième'],
 9 |   [2, 'deux', 'deuxième'],
10 |   [3, 'trois', 'troisième'],
11 |   [4, 'quatre', 'quatrième'],
12 |   [5, 'cinq', 'cinquième'],
13 |   [6, 'six', 'sixième'],
14 |   [7, 'sept', 'septième'],
15 |   [8, 'huit', 'huitième'],
16 |   [9, 'neuf', 'neuvième'],
17 | 
18 |   [10, 'dix', 'dixième'],
19 |   [11, 'onze', 'onzième'],
20 |   [12, 'douze', 'douzième'],
21 |   [13, 'treize', 'treizième'],
22 |   [14, 'quatorze', 'quatorzième'],
23 |   [15, 'quinze', 'quinzième'],
24 |   [16, 'seize', 'seizième'],
25 |   [17, 'dix sept', 'dix septième'],
26 |   [18, 'dix huit', 'dix huitième'],
27 |   [19, 'dix neuf', 'dix neuvième'],
28 | 
29 |   [20, 'vingt', 'vingtième'],
30 |   [30, 'trente', 'trentième'],
31 |   [40, 'quarante', 'quarantième'],
32 |   [50, 'cinquante', 'cinquantième'],
33 |   [60, 'soixante', 'soixantième'],
34 |   [70, 'soixante dix', 'soixante dixième'],
35 |   [80, 'quatre vingt', 'quatre vingtième'],
36 |   [90, 'quatre vingt dix huit', 'quatre vingt dix huitième'],
37 | 
38 |   [100, 'cent', 'centième'],
39 |   [1000, 'mille', 'millième'],
40 |   [1000000, 'million', 'millionième'],//million 1000,000
41 |   [1000000000, 'milliard', 'milliardième'],//billion 1000,000,000
42 |   // [1000000000000, 'mille milliards', 'mille milliardième'],//trillion 1000,000,000
43 | 
44 | ]
45 | test('cardinal to ordinal:', function (t) {
46 |   arr.forEach(function (a) {
47 |     let [_, card, ord] = a
48 |     let doc = nlp(card).numbers().toOrdinal()
49 |     t.equal(doc.text(), ord, here + ' [toOrdinal] ' + card)
50 |   })
51 |   t.end()
52 | })
53 | test('ordinal -> cardinal:', function (t) {
54 |   arr.forEach(function (a) {
55 |     let [, card, ord] = a
56 |     let doc = nlp(ord).numbers().toCardinal()
57 |     t.equal(doc.text(), card, here + ' [toCardinal] ' + card)
58 |   })
59 |   t.end()
60 | })
61 | 
62 | 
63 | test('ordinal fmt:', function (t) {
64 |   let list = [
65 |     // [1, 'première', '1er'],//'first'
66 |     [2, 'deuxième', '2e'],//'second'
67 |     [3, 'troisième', '3e'],//'third'
68 |     [4, 'quatrième', '4e'],//'fourth'
69 |     [5, 'cinquième', '5e'],//'fifth'
70 |     [6, 'sixième', '6e'],//'sixth'
71 |     [7, 'septième', '7e'],//'seventh'
72 |     [8, 'huitième', '8e'],//'eighth'
73 |     [9, 'neuvième', '9e'],//'ninth'
74 |     [10, 'dixième', '10e'],//'tenth'
75 |   ]
76 |   list.forEach(function (a) {
77 |     let [_, str, want] = a
78 |     let m = nlp(str).numbers().toNumber()
79 |     t.equal(m.text(), want, here + str)
80 |   })
81 |   t.end()
82 | })
83 | 


--------------------------------------------------------------------------------
/tests/numbers/number-misc.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | let here = '[number-misc] '
 4 | 
 5 | 
 6 | test('num equals', function (t) {
 7 |   let arr = [
 8 |     ['un cent', 'cent'],
 9 |     ['trois cents', 'trois cent'],
10 |     ['un million', 'million'],
11 |     ['3 cent', 'trois cent'],
12 |     ['cinquante', 'cinquantième'],
13 |     ['sept', 'septième'],
14 |     ['dix huit', 'dix huitième'],
15 |     ['moins dix huitième', '-18e'],
16 |     ['moins dix huit', '-18'],
17 |     ['moins deux centième', '-200'],
18 |     ['quatorze cent', 'quatorze centième']
19 |   ]
20 |   arr.forEach(a => {
21 |     let [left, right] = a
22 |     left = nlp(left).numbers().get()[0]
23 |     right = nlp(right).numbers().get()[0]
24 |     t.equal(left, right, here + a.join(' == '))
25 |   })
26 |   t.end()
27 | })
28 | 
29 | test('prefix/suffix:', function (t) {
30 |   let doc = nlp('$7,938').numbers().add(1)
31 |   t.equal(doc.text(), '$7939', here + 'add money')
32 | 
33 |   doc = nlp('7,938kg').numbers().minus(1)
34 |   t.equal(doc.text(), '7937kg', here + 'minus w/ unit')
35 | 
36 |   doc = nlp('938.4cm').numbers().minus(1)
37 |   t.equal(doc.text(), '937.4cm', here + 'minus w/ decimal')
38 | 
39 |   doc = nlp('33e').numbers().add(1)
40 |   t.equal(doc.text(), '34e', here + 'add ordinal')
41 |   t.end()
42 | })
43 | 
44 | // test('units-basic:', function (t) {
45 | //   let arr = [
46 | //     // ['33km', 'km'],
47 | //     ['33 km', 'km'],
48 | //     ['40,000 mètres', 'mètres'],
49 | //     ['1 pouce', 'pouce'],
50 | //     ['2 pouces', 'pouces'],
51 | //     ['seven hundred litres', 'litres'],
52 | //     ['one litre', 'litre'],
53 | //     ['0.4 mètre', 'meter'],
54 | //     // ['3 km2', 'km2'],
55 | //     ['3 km²', 'km²'],
56 | //     // ['44 °c', '°c'],
57 | //   ]
58 | //   arr.forEach(a => {
59 | //     let m = nlp(a[0]).numbers().units()
60 | //     t.equal(m.out('normal'), a[1], here + a[0])
61 | //   })
62 | //   t.end()
63 | // })
64 | 
65 | 
66 | test('plus:', function (t) {
67 |   let doc = nlp(`j'ai quatre vingt deux pommes`)
68 |   doc.numbers().add(2)
69 |   t.equal(doc.text(), `j'ai quatre vingt quatre pommes`, here + 'plus-2')
70 | 
71 |   doc = nlp(`j'ai moins quarante pommes`)
72 |   doc.numbers().add(50)
73 |   t.equal(doc.text(), `j'ai dix pommes`, here + 'plus-50')
74 |   t.end()
75 | })
76 | 
77 | test('minus:', function (t) {
78 |   let doc = nlp(`j'ai quarante pommes`)
79 |   doc.numbers().minus(50)
80 |   t.equal(doc.text(), `j'ai moins dix pommes`, here + 'minus-50')
81 | 
82 |   doc = nlp(`j'ai moins quarante pommes`)
83 |   doc.numbers().minus(50)
84 |   t.equal(doc.text(), `j'ai moins quatre vingt dix pommes`, here + 'minus-50')
85 |   t.end()
86 | })


--------------------------------------------------------------------------------
/src/02-two/preTagger/methods/guessGender.js:
--------------------------------------------------------------------------------
 1 | let masc = new Set(['le', 'un', 'du'])
 2 | let femme = new Set(['la', 'une'])
 3 | 
 4 | const femaleEnds = ['anse', 'ette', 'esse', 'ance', 'eine', 'ure', 'ion']
 5 | const maleEnds = [
 6 |   'age', 'isme', 'eau', 'ment', 'in', 'ou', 'et', 'ege', 'eme', 'ome', 'aume', 'age', 'isme', 'an', 'ent', 'ai', 'out', 'et', 'eu', 'ut', 'is', 'il', 'ex',
 7 |   'an', 'and', 'ant', 'ent', 'in', 'int', 'om', 'ond', 'ont', 'eau', 'au', 'aud', 'aut', 'o', 'os', 'ot', 'ai', 'ais', 'ait', 'es', 'et', 'ou', 'out', 'out', 'oux', 'i', 'il', 'it', 'is', 'y', 'at', 'as', 'ois', 'oit', 'u', 'us', 'ut',
 8 |   'eu', 'er', 'cé', 'age', 'ege', 'ème', 'ome', 'aume', 'isme', 'as', 'is', 'os', 'us', 'ex', 'it', 'est', 'al', 'el', 'il', 'ol', 'eul', 'all', 'if', 'ef', 'ac', 'ic', 'oc', 'uc', 'am', 'um', 'en', 'air', 'er',
 9 |   'erf', 'ert', 'ar', 'arc', 'ars', 'art', 'our', 'ours', 'or', 'ord', 'ors', 'ort', 'ir', 'oir', 'eur', 'ail', 'eil', 'euil', 'ueil', 'ing',
10 | ]
11 | 
12 | 
13 | const suffixGuess = function (term) {
14 |   let str = term.normal
15 |   str = str.replace(/s$/, '')
16 |   if (femaleEnds.find(suff => str.endsWith(suff))) {
17 |     return 'FemaleNoun'
18 |   }
19 |   if (maleEnds.find(suff => str.endsWith(suff))) {
20 |     return 'MaleNoun'
21 |   }
22 |   return null
23 | }
24 | 
25 | const fallback = function (term) {
26 |   let str = term.normal
27 |   if (str.endsWith('e') || str.endsWith('es')) {
28 |     return 'FemaleNoun'
29 |   }
30 |   return null //-?
31 | }
32 | 
33 | const lookLeft = function (terms, i) {
34 |   for (let n = 1; n < 3; n += 1) {
35 |     if (!terms[i - n]) {
36 |       return null
37 |     }
38 |     let term = terms[i - n]
39 |     if (masc.has(term.normal)) {
40 |       return 'MaleNoun'
41 |     }
42 |     if (femme.has(term.normal)) {
43 |       return 'FemaleNoun'
44 |     }
45 |   }
46 |   return null
47 | }
48 | 
49 | // look for a gendered adjective
50 | const lookRight = function (terms, i) {
51 |   for (let n = 1; n < 2; n += 1) {
52 |     if (!terms[i + n]) {
53 |       return null
54 |     }
55 |     let term = terms[i + n]
56 |     if (term.tags.has('MaleAdjective')) {
57 |       return 'MaleNoun'
58 |     }
59 |     if (term.tags.has('FemaleAdjective')) {
60 |       return 'FemaleNoun'
61 |     }
62 |   }
63 |   return null
64 | }
65 | 
66 | const guessGender = function (terms, i) {
67 |   let { tags } = terms[i]
68 |   if (!tags.has('Noun')) {
69 |     return null
70 |   }
71 |   if (tags.has('MaleNoun')) {
72 |     return 'MaleNoun'
73 |   }
74 |   if (tags.has('FemaleNoun')) {
75 |     return 'FemaleNoun'
76 |   }
77 |   let found = lookLeft(terms, i)
78 |   found = found || lookRight(terms, i)
79 |   found = found || suffixGuess(terms[i])
80 |   found = found || fallback(terms[i])
81 |   return found
82 | }
83 | export default guessGender


--------------------------------------------------------------------------------
/plugins/dates/tests/backburner/equals.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | 
 4 | const context = {
 5 |   today: '2020-01-21',
 6 |   timezone: 'Canada/Pacific',
 7 | }
 8 | 
 9 | const arr = [
10 |   // explicit-dates
11 |   [`march 2nd`, '2020-03-02T00:00:00.000-08:00'],
12 |   [`2 march`, '2020-03-02T00:00:00.000-08:00'],
13 |   [`tues march 2`, '2020-03-02T00:00:00.000-08:00'],
14 |   [`march the second`, '2020-03-02T00:00:00.000-08:00'],
15 |   [`on the 2nd of march`, '2020-03-02T00:00:00.000-08:00'],
16 | 
17 |   // numerical-dates
18 |   [`1999/03/02`, 'march 2 1999'],
19 |   [`1999-03-02`, 'march 2 1999'],
20 |   [`03-02-1999`, 'march 2nd 1999'],
21 |   [`03/02`, 'march 2'],
22 |   [`2015.08.13`, 'aug 13 2015'],
23 | 
24 |   // named-dates
25 |   [`today`, '2020-01-21'],
26 |   [`now`, 'right now'],
27 |   [`q1`, 'jan 1'],
28 |   [`tomorrow`, '2020-01-22'],
29 | 
30 |   // time
31 |   [`2pm`, '2020-01-21T14:00:00.000-08:00'],
32 |   [`2:12pm`, '2020-01-21T14:12:00.000-08:00'],
33 |   [`2pm eastern time`, '2020-01-21T14:00:00.000-05:00'],
34 |   [`2:12 in the evening`, '2020-01-21T14:12:00.000-08:00'],
35 |   [`02:12:00am`, '2020-01-21T02:12:00.000-08:00'],
36 |   [`2 oclock am`, '2020-01-21T02:00:00.000-08:00'],
37 |   [`noon`, 'today at 12pm'],
38 |   [`at night`, 'today at 8:00pm'],
39 |   [`in the morning`, 'tomorrow at 8:00pm'],
40 |   [`tomorrow evening`, 'Jan 22 6pm'],
41 |   [`aug-20`, '20-aug'],
42 |   [`in a few years`, `in 3 years`],
43 |   [`in a couple years`, `in 2 years`],
44 |   [`2 weeks back`, `2 weeks ago`],
45 |   [`last q1`, `q1 2019`],
46 |   [`last q2`, `q2 2019`],
47 |   [`last q3`, `q3 2019`],
48 |   [`last q4`, `q4 2019`],
49 |   [`this q1`, `q1 2020`],
50 |   [`this q2`, `q2 2020`],
51 |   [`this q3`, `q3 2020`],
52 |   [`this q4`, `q4 2020`],
53 |   [`next q1`, `q1 2021`],
54 |   [`next q2`, `q2 2021`],
55 |   [`next q3`, `q3 2021`],
56 |   [`next q4`, `q4 2021`],
57 |   [`tuesday at 3`, `tuesday 3:00pm`],
58 |   [`tuesday at 4:00`, `tuesday 4:00pm`],
59 |   [`5:30`, `today at 5:30pm`],
60 |   [`tuesday at 3am`, `tuesday 3:00am`],
61 |   [`5 oclock`, `today at 5:00pm`],
62 |   [`5 oclock am`, `today at 5:00am`],
63 |   [`10 oclock`, `today at 10:00am`],
64 |   [`11:30`, `today at 11:30am`],
65 |   [`11:30pm`, `today at 11:30pm`],
66 |   [`tuesday at 1`, `tuesday at 1pm`],
67 |   ['this fri, monday', 'fri jan 24 and mon jan 27'],
68 |   ['next friday, this monday', 'fri jan 31 and mon jan 27'],
69 | ]
70 | 
71 | test('date-variety', function (t) {
72 |   arr.forEach((a) => {
73 |     let left = nlp(a[0]).dates(context).json()[0] || {}
74 |     let right = nlp(a[1]).dates(context).json()[0] || {}
75 |     left.date = left.date || {}
76 |     right.date = right.date || {}
77 |     t.equal(left.date.start, right.date.start, a[0])
78 |   })
79 |   t.end()
80 | })
81 | 


--------------------------------------------------------------------------------
/src/02-two/tagset/tags/verbs.js:
--------------------------------------------------------------------------------
  1 | export default {
  2 |   Verb: {
  3 |     not: ['Noun', 'Adjective', 'Adverb', 'Value', 'Expression'],
  4 |   },
  5 |   PresentTense: {
  6 |     is: 'Verb',
  7 |     not: ['PastTense'],
  8 |   },
  9 |   Infinitive: {
 10 |     is: 'PresentTense',
 11 |     not: ['Gerund'],
 12 |   },
 13 |   Imperative: {
 14 |     is: 'Infinitive',
 15 |   },
 16 |   Gerund: {
 17 |     is: 'PresentTense',
 18 |     not: ['Copula'],
 19 |   },
 20 |   PastTense: {
 21 |     is: 'Verb',
 22 |     not: ['PresentTense', 'Gerund'],
 23 |   },
 24 |   Copula: {
 25 |     is: 'Verb',
 26 |   },
 27 |   Modal: {
 28 |     is: 'Verb',
 29 |     not: ['Infinitive'],
 30 |   },
 31 |   PerfectTense: {
 32 |     is: 'Verb',
 33 |     not: ['Gerund'],
 34 |   },
 35 |   Pluperfect: {
 36 |     is: 'Verb',
 37 |   },
 38 |   Participle: {
 39 |     is: 'PastTense',
 40 |   },
 41 |   PhrasalVerb: {
 42 |     is: 'Verb',
 43 |   },
 44 |   Passive: {
 45 |     is: 'PastTense',
 46 |   },
 47 |   Particle: {
 48 |     is: 'PhrasalVerb',
 49 |     not: ['PastTense', 'PresentTense', 'Copula', 'Gerund'],
 50 |   },
 51 |   Auxiliary: {
 52 |     is: 'Verb',
 53 |     not: ['PastTense', 'PresentTense', 'Gerund', 'Conjunction'],
 54 |   },
 55 | 
 56 |   // french verb forms
 57 |   PresentParticiple: {
 58 |     is: 'PresentTense',
 59 |     not: ['PastTense', 'FutureTense'],
 60 |   },
 61 |   PastParticiple: {
 62 |     is: 'PastTense',
 63 |     not: ['PresentTense', 'FutureTense'],
 64 |   },
 65 |   // [only formal]  parlai, parlâmes
 66 |   PastSimple: {
 67 |     is: 'PastTense',
 68 |     not: ['PresentTense', 'FutureTense'],
 69 |   },
 70 |   ConditionalVerb: {
 71 |     is: 'Verb',
 72 |   },
 73 |   FutureTense: {
 74 |     is: 'Verb',
 75 |     not: ['PresentTense', 'PastTense', 'Gerund'],
 76 |   },
 77 | 
 78 |   // 
 79 |   FirstPerson: {
 80 |     is: 'Verb',
 81 |     not: ['SecondPerson', 'ThirdPerson', 'FirstPersonPlural', 'SecondPersonPlural', 'ThirdPersonPlural']
 82 |   },
 83 |   SecondPerson: {
 84 |     is: 'Verb',
 85 |     not: ['FirstPerson', 'ThirdPerson', 'FirstPersonPlural', 'SecondPersonPlural', 'ThirdPersonPlural']
 86 |   },
 87 |   ThirdPerson: {
 88 |     is: 'Verb',
 89 |     not: ['FirstPerson', 'SecondPerson', 'FirstPersonPlural', 'SecondPersonPlural', 'ThirdPersonPlural']
 90 |   },
 91 |   FirstPersonPlural: {
 92 |     is: 'Verb',
 93 |     not: ['FirstPerson', 'SecondPerson', 'ThirdPerson', 'SecondPersonPlural', 'ThirdPersonPlural']
 94 |   },
 95 |   SecondPersonPlural: {
 96 |     is: 'Verb',
 97 |     not: ['FirstPerson', 'SecondPerson', 'ThirdPerson', 'FirstPersonPlural', 'ThirdPersonPlural']
 98 |   },
 99 |   ThirdPersonPlural: {
100 |     is: 'Verb',
101 |     not: ['FirstPerson', 'SecondPerson', 'ThirdPerson', 'FirstPersonPlural', 'SecondPersonPlural']
102 |   },
103 | }
104 | 


--------------------------------------------------------------------------------
/learn/giga/corpus.js:
--------------------------------------------------------------------------------
 1 | import { forEachSync } from './_giga.js'
 2 | import doSentences from './french.js'
 3 | import fs from 'fs'
 4 | 
 5 | 
 6 | let ids = []
 7 | for (let i = 1; i <= 10; i += 1) {
 8 |   let str = String(i).padStart(4, '0')
 9 |   ids.push(str)
10 | }
11 | // ids = ['0004']
12 | 
13 | let tagMap = {
14 |   'ABR': 'Abbreviation',//abbreviation
15 |   'ADJ': 'Adjective',//adjective
16 |   'ADV': 'Adverb',//adjective
17 |   'DET:ART': 'Determiner',//article
18 |   'DET:POS': 'Pronoun',//possessive pronoun (ma, ta, ...)
19 |   'INT': 'Interjection',//interjection
20 |   'KON': 'Conjunction',//conjunction
21 |   'NAM': 'ProperNoun',//proper name
22 |   'NOM': 'Noun',//noun
23 |   'NUM': 'Value',//numeral
24 |   'PRO': 'Pronoun',//pronoun
25 |   'PRO:DEM': 'Pronoun',//demonstrative pronoun
26 |   'PRO:IND': 'Pronoun',//indefinite pronoun
27 |   'PRO:PER': 'Pronoun',//personal pronoun
28 |   'PRO:POS': 'Pronoun',//possessive pronoun (mien, tien, ...)
29 |   'PRO:REL': 'Pronoun',//relative pronoun
30 |   'PRP': 'Preposition',//preposition
31 |   'PRP:det': 'Preposition',//preposition plus article (au,du,aux,des)
32 |   // 'PUN':'',//punctuation
33 |   // 'PUN:cit':'',//punctuation citation
34 |   // 'SENT':'',//sentence tag
35 |   // 'SYM':'',//symbol
36 |   'VER:cond': 'Verb',//verb conditional
37 |   'VER:futu': 'Verb',//verb futur
38 |   'VER:impe': 'Verb',//verb imperative
39 |   'VER:impf': 'Verb',//verb imperfect
40 |   'VER:infi': 'Verb',//verb infinitive
41 |   'VER:pper': 'Verb',//verb past participle
42 |   'VER:ppre': 'Verb',//verb present participle
43 |   'VER:pres': 'Verb',//verb present
44 |   'VER:simp': 'Verb',//verb simple past
45 |   'VER:subi': 'Verb',//verb subjunctive imperfect
46 |   'VER:subp': 'Verb',//verb subjunctive present
47 | }
48 | 
49 | let byTag = {
50 |   Verb: {},
51 |   Noun: {},
52 |   Adjective: {},
53 |   Adverb: {},
54 | }
55 | const doBoth = function (both) {
56 |   both.fr.forEach((term, i) => {
57 |     let tag = tagMap[term['$'].pos]
58 |     let str = term['$text'].toLowerCase()
59 |     if (tag && byTag[tag]) {
60 |       byTag[tag][str] = byTag[tag][str] || 0
61 |       byTag[tag][str] += 1
62 |     }
63 |   })
64 | }
65 | await forEachSync(ids, async id => {
66 |   try {
67 |     console.log(`\ndoing ${id}:\n`)
68 |     await doSentences(id, doBoth)
69 |   } catch (e) {
70 |     console.log(e)
71 |   }
72 | })
73 | 
74 | const doTag = function (tag, max = 6) {
75 |   let all = Object.entries(byTag[tag])
76 |   all = all.filter(a => a[1] > max)
77 |   all = all.sort((a, b) => {
78 |     if (a[1] > b[1]) {
79 |       return -1
80 |     } else if (a[1] < b[1]) {
81 |       return 1
82 |     }
83 |     return 0
84 |   })
85 |   all = all.map(a => a[0])
86 |   fs.writeFileSync(`./${tag}.js`, 'export default ' + JSON.stringify(all, null, 2))
87 |   return all
88 | }
89 | doTag('Adverb')
90 | doTag('Verb')
91 | doTag('Noun')
92 | doTag('Adjective')
93 | // console.dir(byTag, { depth: 5 })


--------------------------------------------------------------------------------
/plugins/dates/tests/backburner/to-iso.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | 
 4 | const context = {
 5 |   today: '2019-02-02T03:40:00.000Z',
 6 |   timezone: 'UTC',
 7 | }
 8 | 
 9 | let arr = [
10 |   ['june 5th 1999', '1999-06-05T00:00:00.000Z'],
11 |   ['june 5th 1999', '1999-06-05T00:00:00.000Z'],
12 |   ['january 1st 1644', '1644-01-01T00:00:00.000Z'],
13 |   ['jan 1st 1644', '1644-01-01T00:00:00.000Z'],
14 |   ['June 4th 1993', '1993-06-04T00:00:00.000Z'],
15 |   ['March 1st 1987', '1987-03-01T00:00:00.000Z'],
16 |   ['June 22nd 2014', '2014-06-22T00:00:00.000Z'],
17 |   ['may 22nd 2014', '2014-05-22T00:00:00.000Z'],
18 |   ['sep 22nd 2014', '2014-09-22T00:00:00.000Z'],
19 |   ['apr 22nd 2014', '2014-04-22T00:00:00.000Z'],
20 |   ['June 22nd 1997', '1997-06-22T00:00:00.000Z'],
21 |   ['january 5th 1998', '1998-01-05T00:00:00.000Z'],
22 |   ['3rd of March 1969', '1969-03-03T00:00:00.000Z'],
23 |   ['2nd of April 1929', '1929-04-02T00:00:00.000Z'],
24 |   ['2nd of jul 1929', '1929-07-02T00:00:00.000Z'],
25 |   ['March 1969', '1969-03-01T00:00:00.000Z'],
26 |   ['jan 1921', '1921-01-01T00:00:00.000Z'],
27 |   ['March 18th', '2019-03-18T00:00:00.000Z'],
28 |   ['August 28th', '2019-08-28T00:00:00.000Z'],
29 |   ['18th of March', '2019-03-18T00:00:00.000Z'],
30 |   ['27th of March', '2019-03-27T00:00:00.000Z'],
31 |   ['february 10th', '2019-02-10T00:00:00.000Z'],
32 |   ['february 28th', '2019-02-28T00:00:00.000Z'],
33 |   ['first day of 2019', '2019-01-01T00:00:00.000Z'],
34 |   ['last day of 2019', '2019-12-31T00:00:00.000Z'],
35 |   ['7th hour of 2019', '2019-01-01T06:00:00.000Z'],
36 |   ['7th day of 2019', '2019-01-07T00:00:00.000Z'],
37 |   ['second quarter of 2019', '2019-04-01T00:00:00.000Z'],
38 |   ['30th minute of 2019', '2019-01-01T00:30:00.000Z'],
39 |   ['2019', '2019-01-01T00:00:00.000Z'],
40 |   ['2028', '2028-01-01T00:00:00.000Z'],
41 |   ['in 2028', '2028-01-01T00:00:00.000Z'],
42 |   ['2nd month in 2028', '2028-02-01T00:00:00.000Z'],
43 |   ['first day of march 2019', '2019-03-01T00:00:00.000Z'],
44 |   ['5th day of march 2019', '2019-03-05T00:00:00.000Z'],
45 |   ['5th day of q1 2002', '2002-01-05T00:00:00.000Z'],
46 |   ['5th hour of March 3rd 2002', '2002-03-03T04:00:00.000Z'],
47 |   ['last hour of March 2021', '2021-03-31T23:00:00.000Z'],
48 |   ['may to august 1996', '1996-05-01T00:00:00.000Z'],
49 |   ['half past 4', '2019-02-02T16:30:00.000Z'],
50 |   ['20 past 2', '2019-02-02T14:20:00.000Z'],
51 |   ['at 20 past', '2019-02-02T04:20:00.000Z'],
52 |   ['at half past', '2019-02-02T04:30:00.000Z'],
53 |   ['at quarter to', '2019-02-02T03:45:00.000Z'],
54 |   ['at quarter after', '2019-02-02T04:15:00.000Z'],
55 |   // ['august to may 1996', '1996-05-01T00:00:00.000Z'],
56 | ]
57 | 
58 | test('date-parse :', function (t) {
59 |   arr.forEach(function (a) {
60 |     let json = nlp(a[0]).dates(context).json()[0] || {}
61 |     t.equal(json.dates.start, a[1], a[0])
62 |   })
63 |   t.end()
64 | })
65 | 


--------------------------------------------------------------------------------
/src/03-three/numbers/parse/fromText.js:
--------------------------------------------------------------------------------
  1 | import { toCardinal, toNumber } from './_data.js'
  2 | 
  3 | const multiLeft = {
  4 |   dix: true,//dix huit
  5 |   soixante: true,//soixante dix
  6 |   quatre: true,//quatre vingt
  7 |   mille: true//mille milliards
  8 | }
  9 | 
 10 | const multiples = {
 11 |   // cent: 100,//hundred
 12 |   mille: 1000,//thousand
 13 |   milles: 1000,//thousand
 14 |   million: 1000000,//million
 15 |   millions: 1000000,//million
 16 |   milliards: 1000000000//billion
 17 | }
 18 | 
 19 | // greedy scan for multi-word numbers, like 'quatre vingt'
 20 | const scanAhead = function (terms, i) {
 21 |   let skip = 0
 22 |   let add = 0
 23 |   let words = []
 24 |   for (let index = 0; index < 3; index += 1) {
 25 |     if (!terms[i + index]) {
 26 |       break
 27 |     }
 28 |     let w = terms[i + index].normal || ''
 29 |     if (toCardinal.hasOwnProperty(w)) {
 30 |       w = toCardinal[w]
 31 |     }
 32 |     words.push(w)
 33 |     let str = words.join(' ')
 34 |     if (toNumber.hasOwnProperty(str)) {
 35 |       skip = index
 36 |       add = toNumber[str]
 37 |     }
 38 |   }
 39 |   return { skip, add }
 40 | }
 41 | 
 42 | const parseNumbers = function (terms = []) {
 43 |   let sum = 0
 44 |   let carry = 0
 45 |   let minus = false
 46 |   let sums = []
 47 |   for (let i = 0; i < terms.length; i += 1) {
 48 |     let { tags, normal } = terms[i]
 49 |     let w = normal || ''
 50 |     if (w === 'moins') {
 51 |       minus = true
 52 |       continue
 53 |     }
 54 |     // ... et-un
 55 |     if (w === 'et') {
 56 |       continue
 57 |     }
 58 |     // 'huitieme'
 59 |     if (tags.has('Ordinal')) {
 60 |       w = toCardinal[w]
 61 |     }
 62 |     // add thousand, million
 63 |     if (multiples.hasOwnProperty(w)) {
 64 |       sum += carry
 65 |       carry = 0
 66 |       if (!sum) {
 67 |         sum = 1
 68 |       }
 69 |       sum *= multiples[w]
 70 |       sums.push(sum)
 71 |       sum = 0
 72 |       continue
 73 |     }
 74 |     // support 'quatre vingt dix', etc
 75 |     if (multiLeft.hasOwnProperty(w)) {
 76 |       let { add, skip } = scanAhead(terms, i)
 77 |       if (skip > 0) {
 78 |         carry += add
 79 |         i += skip
 80 |         continue
 81 |       }
 82 |     }
 83 | 
 84 |     // 'cent'
 85 |     if (tags.has('Multiple')) {
 86 |       let mult = toNumber[w] || 1
 87 |       if (carry === 0) {
 88 |         carry = 1
 89 |       }
 90 |       sum += mult * carry
 91 |       carry = 0
 92 |       continue
 93 |     }
 94 |     // 'trois'
 95 |     if (toNumber.hasOwnProperty(w)) {
 96 |       carry += toNumber[w]
 97 |     } else {
 98 |       let n = Number(w)
 99 |       if (n) {
100 |         carry += n
101 |       } else {
102 |         // console.log('missing', w) //TODO: fixme
103 |       }
104 |     }
105 |   }
106 |   // include any remaining
107 |   if (carry !== 0) {
108 |     sum += carry
109 |   }
110 |   sums.push(sum)
111 |   sum = sums.reduce((h, n) => {
112 |     return h + n
113 |   }, 0)
114 |   if (minus === true) {
115 |     sum *= -1
116 |   }
117 |   return sum
118 | }
119 | export default parseNumbers


--------------------------------------------------------------------------------
/data/lexicon/index.js:
--------------------------------------------------------------------------------
  1 | //directory of files to pack with `node scripts/pack.js`
  2 | //they are stored in compressed form
  3 | import lex from './misc.js'
  4 | 
  5 | import firstnames from './people/firstnames.js'
  6 | import lastnames from './people/lastnames.js'
  7 | import maleNames from './people/maleNames.js'
  8 | import femaleNames from './people/femaleNames.js'
  9 | import honorifics from './people/honorifics.js'
 10 | import people from './people/people.js'
 11 | 
 12 | import countries from './places/countries.js'
 13 | import regions from './places/regions.js'
 14 | import places from './places/places.js'
 15 | import cities from './places/cities.js'
 16 | 
 17 | import cardinals from './numbers/cardinals.js'
 18 | import ordinals from './numbers/ordinals.js'
 19 | import units from './numbers/units.js'
 20 | 
 21 | import infinitives from './verbs/infinitives.js'
 22 | 
 23 | import masculine from './nouns/masculine.js'
 24 | import feminine from './nouns/feminine.js'
 25 | import sportsTeams from './nouns/sportsTeams.js'
 26 | import organizations from './nouns/organizations.js'
 27 | import possessives from './nouns/possessives.js'
 28 | import pronouns from './nouns/pronouns.js'
 29 | import uncountables from './nouns/uncountables.js'
 30 | import nouns from './nouns/nouns.js'
 31 | 
 32 | import masc from './adjectives/masc.js'
 33 | 
 34 | import dates from './dates/dates.js'
 35 | import months from './dates/months.js'
 36 | import weekdays from './dates/weekdays.js'
 37 | 
 38 | import adverbs from './misc/adverbs.js'
 39 | import conjunctions from './misc/conjunctions.js'
 40 | import currencies from './misc/currencies.js'
 41 | import expressions from './misc/expressions.js'
 42 | import determiners from './misc/determiners.js'
 43 | import prepositions from './misc/prepositions.js'
 44 | //add-in the generic, flat word-lists
 45 | const data = [
 46 |   [firstnames, 'FirstName'],
 47 |   [lastnames, 'LastName'],
 48 |   [maleNames, 'MaleName'],
 49 |   [femaleNames, 'FemaleName'],
 50 |   [honorifics, 'Honorific'],
 51 |   [people, 'Person'],
 52 | 
 53 |   [countries, 'Country'],
 54 |   [regions, 'Region'],
 55 |   [places, 'Place'],
 56 |   [cities, 'City'],
 57 | 
 58 |   [cardinals, 'Cardinal'],
 59 |   [ordinals, 'Ordinal'],
 60 |   [units, 'Unit'],
 61 | 
 62 |   [infinitives, 'Infinitive'],
 63 | 
 64 |   [masculine, 'MaleNoun'],
 65 |   [feminine, 'FemaleNoun'],
 66 |   [sportsTeams, 'SportsTeam'],
 67 |   [organizations, 'Organization'],
 68 |   [possessives, 'Possessive'],
 69 |   [pronouns, 'Pronoun'],
 70 |   [uncountables, 'Uncountable'],
 71 |   [nouns, 'Noun'],
 72 | 
 73 |   [masc, 'MaleAdjective'],
 74 | 
 75 |   [adverbs, 'Adverb'],
 76 |   [conjunctions, 'Conjunction'],
 77 |   [currencies, 'Currency'],
 78 |   [expressions, 'Expression'],
 79 |   [determiners, 'Determiner'],
 80 |   [prepositions, 'Preposition'],
 81 | 
 82 |   [dates, 'Date'],
 83 |   [months, 'Month'],
 84 |   [weekdays, 'WeekDay'],
 85 | ]
 86 | for (let i = 0; i < data.length; i++) {
 87 |   const list = data[i][0]
 88 |   for (let o = 0; o < list.length; o++) {
 89 |     //log duplicates
 90 |     // if (lex[list[o]]) {
 91 |     //   console.log(list[o] + '  ' + lex[list[o]] + ' ' + data[i][1])
 92 |     // }
 93 |     lex[list[o]] = data[i][1]
 94 |   }
 95 | }
 96 | 
 97 | export default lex
 98 | // console.log(Object.keys(lex).length);
 99 | // console.log(lex['mars'])
100 | 


--------------------------------------------------------------------------------
/scratch.js:
--------------------------------------------------------------------------------
  1 | import nlp from './src/index.js'
  2 | nlp.verbose('tagger')
  3 | /*
  4 | 
  5 | */
  6 | 
  7 | 
  8 | // console.log(nlp('essayer').verbs().conjugate())
  9 | 
 10 | let root = 'errer'
 11 | let arr = [
 12 |   // mauvais
 13 |   // 'Elle a eu une mauvaise expérience',
 14 |   // devenir
 15 |   // 'Elle est devenue une célèbre', //passe-compose
 16 | 
 17 |   // bénir
 18 |   // 'Que Dieu te bénisse avec bonheur', //subjunctive
 19 | 
 20 |   // revendiquer
 21 |   // 'Il revendiqua avoir vu un OVNI.', //passe-simple
 22 | 
 23 |   // accroupir
 24 |   // `Elle s'est accroupie derrière l'arbre`, //passe anterior
 25 | 
 26 | 
 27 |   // ménage
 28 |   // `Les tâches ménagères `,
 29 | 
 30 |   // nier
 31 |   // `la nouvelle loi nierait leurs droits`, //conditional
 32 | 
 33 |   // vieux
 34 |   // `La vieille maison`,
 35 |   // `une collection de <vieilles> photographies`,
 36 | 
 37 |   // promouvoir
 38 |   // `Elle a été promue à un poste`, // 
 39 | 
 40 |   // pleuvoir
 41 |   // `quand il pleut `,
 42 | 
 43 |   // refléter
 44 |   // `Je réfléchis toujours`, //?
 45 | 
 46 |   // rôtir
 47 |   // `Elle a rôti une dinde`, //passe compose
 48 | 
 49 | 
 50 |   // soupirer
 51 |   // `Elle soupira `, //passe simple
 52 | 
 53 |   // envoler
 54 |   // `La montgolfière <s'envola> au-dessus des montagnes`,
 55 | 
 56 |   // // chanceler
 57 |   // `Il <chancela> chez lui `,
 58 | 
 59 | 
 60 |   // épais
 61 |   // `une couverture épaisse`,
 62 | 
 63 |   // essayer
 64 |   // `Elle essaie de parler `,
 65 | 
 66 |   // errer
 67 |   `Le vieil homme <erra> et se perdit.`, //passe simple
 68 |   // ["devenir", "become", "Verb", "She <became> a famous singer after years of practice.", "Elle est devenue une célèbre chanteuse après des années de pratique."],
 69 |   // ["accroupir", "crouch", "Verb", "She <crouched> behind the tree to hide.", "Elle s'est accroupie derrière l'arbre pour se cacher."],
 70 | 
 71 |   // ["endormi", "asleep", "Adjective", "I love listening to music while falling <asleep>.", "J'aime écouter de la musique en m'endormant."],
 72 |   // ["mauvais", "bad", "Adjective", "She had a <bad> experience with her previous boss.", "Elle a eu une mauvaise expérience avec son ancien patron."],
 73 |   // ["épais", "thick", "Adjective", "The book has a <thick> cover.", "Le livre a une couverture épaisse."],
 74 | 
 75 | 
 76 | 
 77 |   // ['Il pêche la truite tous', 'pêcher'],
 78 |   // [`L'équipe a été vaincue lors du match final`, 'vaincre'],
 79 |   // ['', ''],
 80 |   // 'accroupir',
 81 | 
 82 |   // 'Il abrégera son nom ',
 83 |   // 'marcher',
 84 |   // 'ralentir',
 85 |   // 'vendre',
 86 |   // 'hier',
 87 |   // // 'célèbre',
 88 |   // // 'très  délicieux ',
 89 |   // 'Le  gâteau  était  très  délicieux ',
 90 |   // 'j\'ai lu trois livres',
 91 |   // `nous détestons le sable`,
 92 |   // `deuxième`,
 93 |   // 'vieillir',
 94 |   // 'envahir',
 95 |   // 'réfléchir',
 96 |   // 'des coûts « démontre que le gouvernement  »',
 97 | ]
 98 | // let [fr, en, pos, enTxt, frTxt] = arr[0]
 99 | 
100 | // console.log(fr, pos)
101 | let doc = nlp(arr[0]).debug()
102 | doc.match(`{${root}}`).debug()
103 | console.log(nlp(root).verbs().conjugate())
104 | 
105 | // console.log(doc.verbs().conjugate())
106 | // doc.verbs().toPastTense().debug()
107 | // doc.numbers().toNumber()
108 | // doc.debug()
109 | 
110 | 
111 | // let doc = nlp('4th sept')
112 | // let m = doc.match('[<date>#Value] [<month>#Month]')
113 | // m.debug()
114 | // m.groups().date.debug()
115 | // m.groups().month.debug()


--------------------------------------------------------------------------------
/src/01-one/lexicon/methods/verb/index.js:
--------------------------------------------------------------------------------
  1 | import { convert, reverse } from 'suffix-thumb'
  2 | import model from '../model.js'
  3 | 
  4 | // ---verbs--
  5 | const reverseAll = function (obj) {
  6 |   return Object.keys(obj).reduce((h, k) => {
  7 |     h[k] = reverse(obj[k])
  8 |     return h
  9 |   }, {})
 10 | }
 11 | 
 12 | const doVerb = function (str, m) {
 13 |   return {
 14 |     first: convert(str, m.je),
 15 |     second: convert(str, m.tu),
 16 |     third: convert(str, m.il),
 17 |     firstPlural: convert(str, m.nous),
 18 |     secondPlural: convert(str, m.vous),
 19 |     thirdPlural: convert(str, m.ils),
 20 |   }
 21 | }
 22 | const doOneVerb = function (str, form, m) {
 23 |   if (form === 'FirstPerson') {
 24 |     return convert(str, m.je)
 25 |   }
 26 |   if (form === 'SecondPerson') {
 27 |     return convert(str, m.tu)
 28 |   }
 29 |   if (form === 'ThirdPerson') {
 30 |     return convert(str, m.il)
 31 |   }
 32 |   if (form === 'FirstPersonPlural') {
 33 |     return convert(str, m.nous)
 34 |   }
 35 |   if (form === 'SecondPersonPlural') {
 36 |     return convert(str, m.vous)
 37 |   }
 38 |   if (form === 'ThirdPersonPlural') {
 39 |     return convert(str, m.ils)
 40 |   }
 41 |   return str
 42 | }
 43 | 
 44 | const toPresentTense = (str) => doVerb(str, model.presentTense)
 45 | const toFutureTense = (str) => doVerb(str, model.futureTense)
 46 | const toImperfect = (str) => doVerb(str, model.imperfect)
 47 | const toPastParticiple = (str) => convert(str, model.pastParticiple.prt)
 48 | 
 49 | const fromPresent = reverseAll(model.presentTense)
 50 | const fromPresentTense = (str, form) => doOneVerb(str, form, fromPresent)
 51 | 
 52 | const fromFuture = reverseAll(model.futureTense)
 53 | const fromFutureTense = (str, form) => doOneVerb(str, form, fromFuture)
 54 | 
 55 | const fromImperfect = reverseAll(model.imperfect)
 56 | const fromImperfectTense = (str, form) => doOneVerb(str, form, fromImperfect)
 57 | 
 58 | const fromParticiple = reverse(model.pastParticiple.prt)
 59 | const fromPastParticiple = (str) => convert(str, fromParticiple)
 60 | 
 61 | // do this one manually
 62 | const fromPassive = function (str) {
 63 |   str = str.replace(/ées$/, 'er')
 64 |   str = str.replace(/ée$/, 'er')
 65 |   str = str.replace(/és$/, 'er')
 66 |   str = str.replace(/é$/, 'er')
 67 |   return str
 68 | }
 69 | 
 70 | // i don't really know how this works
 71 | const toPassive = function (str) {
 72 |   if (str.endsWith('er')) {
 73 |     return [
 74 |       str.replace(/er$/, 'ées'),
 75 |       str.replace(/er$/, 'ée'),
 76 |       str.replace(/er$/, 'és'),
 77 |       str.replace(/er$/, 'é'),
 78 |     ]
 79 |   }
 80 |   return []
 81 | }
 82 | 
 83 | // an array of every inflection, for '{inf}' syntax
 84 | const all = function (str) {
 85 |   let arr = [str].concat(
 86 |     Object.values(toPresentTense(str)),
 87 |     Object.values(toFutureTense(str)),
 88 |     Object.values(toImperfect(str)),
 89 |     toPassive(str)
 90 |   )
 91 |   arr.push(toPastParticiple(str))
 92 |   arr = arr.filter(s => s)
 93 |   arr = new Set(arr)
 94 |   return Array.from(arr)
 95 | }
 96 | 
 97 | export default {
 98 |   all,
 99 |   toPresentTense, toFutureTense, toImperfect, toPastParticiple,
100 |   fromPresentTense, fromFutureTense, fromImperfectTense, fromPastParticiple, fromPassive
101 | }
102 | 
103 | // console.log(presentTense('marcher'))
104 | // console.log(futureTense('marcher'))
105 | // console.log(imperfect('marcher'))
106 | // console.log(pastParticiple('marcher'))
107 | // console.log(noun('roche'))
108 | // console.log(adjective('gentil'))


--------------------------------------------------------------------------------
/plugins/dates/tests/dates.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | let here = '[fr-dates] '
 4 | 
 5 | //yep,
 6 | let jan = '01'
 7 | let feb = '02'
 8 | let mar = '03'
 9 | let apr = '04'
10 | let may = '05'
11 | let june = '06'
12 | let july = '07'
13 | let august = '08'
14 | let sept = '09'
15 | let oct = '10'
16 | let nov = '11'
17 | let dec = '12'
18 | const today = [1998, 2, 2]
19 | const opts = { timezone: 'UTC', today }
20 | 
21 | const arr = [
22 |   [`je suis né le 2 septembre 1982`, [1982, sept, 2]],
23 |   [`Je travaille jusqu'en juin.`, [1998, 3, 2], [1998, june, 1]],
24 |   [`Il n'y a pas d'augmentation prévue jusqu'en 2032`, [2032, jan, 1]],
25 |   [`Je suis en vacances jusqu'au 3 janvier.`, [1998, jan, 3]],
26 |   [`Je peux t'emprunter ta voiture jusqu'à lundi prochain`, [1998, feb, 17]],
27 |   ['Nous avons acheté la maison le 15 avril 2013.', [2013, apr, 15]],
28 |   ['Le 1er mai est un jour férié en France', [1998, may, 1]],
29 |   ['Je vais y aller le premier décembre 2014.', [2014, dec, 1]],
30 |   [`le 8 aout 2014.`, [2014, august, 8]],
31 |   [`Aujourd'hui, c'est le 8 septembre 2024.`, [2024, sept, 8]],
32 |   [`Nous sommes le 1er février aujourd'hui.`, [1998, feb, 1]],
33 |   [`Nous sommes le vendredi 1er février aujourd'hui`, [1998, feb, 1]],
34 |   ['15/12/2020', [2020, dec, 15]],
35 |   ['5/2/2020', [2020, feb, 5]],
36 |   ['12/01/2018', [2018, jan, 12]],
37 |   // ['01/13/2018', [2018, jan, 13]],
38 |   ['Le 6 avril', [1998, apr, 6]],
39 |   ['Mercredi 11 mars', [1998, mar, 11]],
40 |   ['12/06/2020', [2020, june, 12]],
41 |   ['Halloween est le 31 octobre.', [1998, oct, 31]],
42 |   [`C'est le quatorze juillet.`, [1998, july, 14]],
43 |   [`c'est le premier janvier`, [1998, jan, 1]],
44 |   ['le 5 juin 2012', [2012, june, 5]],
45 |   ['Juin 5, 2012', [2012, june, 5]],
46 |   ['6/5/2012', [2012, may, 6]],
47 |   ['le 25 décembre 2012', [2012, dec, 25]],
48 |   ['December 25, 2012', [2012, dec, 25]],
49 |   ['12/15/2012', [2012, dec, 15]],
50 |   ['le 3 novembre 2012', [2012, nov, 3]],
51 |   ['Novembre 3, 2021', [2021, nov, 3]],  // have 2 years in slug
52 |   ['3/11/21', [2021, nov, 3]],
53 |   ['entre sept et oct', [1998, sept, 1], [1998, oct, 1]],
54 |   ['demain à 10h', [1998, feb, 3]], // tomorrow at 10am
55 |   ['lundi 20', [1998, apr, 20]], // next monday 20th
56 |   ['lundi 20 à 10h', [1998, apr, 20]], // next monday 20th at 10am
57 |   ['hier soir', [1998, feb, 12]], // yesterday evening
58 |   ['semaine prochaine', [1998, feb, 17]], // next week
59 |   ['14h30 demain', [1998, feb, 3]], // 2:30pm tomorow
60 |   ['demain matin à 9h', [1998, feb, 3]], // tomorrow morning at 9am
61 |   ['hier après-midi', [1998, feb, 1]], //yesterday afternoon
62 | ]
63 | 
64 | const padZero = num => String(num).padStart(2, '0')
65 | 
66 | test('dates:', function (t) {
67 |   arr.forEach(a => {
68 |     let [str, start, end] = a
69 |     // make them ISOs
70 |     start = start.map(padZero).join('-')
71 |     end = end || []
72 |     end = end.map(padZero).join('-')
73 | 
74 |     let doc = nlp(str)
75 |     // t.equal(doc.has('#Date'), true, here + `has-date: '${str}'`)
76 | 
77 |     let json = doc.dates(opts).json({ terms: false })[0] || { date: [] }
78 |     let dates = json.dates[0] || { start: '', end: '' }
79 | 
80 |     // test the start date is the ISO
81 |     let iso = dates.start.replace(/T00:00:00\.000Z$/, '', '')
82 |     t.equal(iso, start, here + `[start]: ${str}`)
83 |     // test the end date is the ISO
84 |     if (end) {
85 |       iso = dates.end.replace(/T.*$/, '', '')
86 |       t.equal(iso, end, `[end]: ${str}`)
87 |     }
88 |   })
89 |   t.end()
90 | })


--------------------------------------------------------------------------------
/data/lexicon/places/regions.js:
--------------------------------------------------------------------------------
  1 | //some major 'second-level' administrative divisions
  2 | export default [
  3 |   'alabama',
  4 |   'alaska',
  5 |   'arizona',
  6 |   'arkansas',
  7 |   'california',
  8 |   'colorado',
  9 |   'connecticut',
 10 |   'delaware',
 11 |   'florida',
 12 |   'georgia',
 13 |   'hawaii',
 14 |   'idaho',
 15 |   'illinois',
 16 |   'indiana',
 17 |   'iowa',
 18 |   'kansas',
 19 |   'kentucky',
 20 |   'louisiana',
 21 |   'maine',
 22 |   'maryland',
 23 |   'massachusetts',
 24 |   'michigan',
 25 |   'minnesota',
 26 |   'mississippi',
 27 |   'missouri',
 28 |   'montana',
 29 |   'nebraska',
 30 |   'nevada',
 31 |   'new hampshire',
 32 |   'new jersey',
 33 |   'new mexico',
 34 |   'new york state',
 35 |   'new york',
 36 |   'north carolina',
 37 |   'north dakota',
 38 |   'ohio',
 39 |   'oklahoma',
 40 |   'oregon',
 41 |   'pennsylvania',
 42 |   'rhode island',
 43 |   'south carolina',
 44 |   'south dakota',
 45 |   'tennessee',
 46 |   'texas',
 47 |   'utah',
 48 |   'vermont',
 49 |   'virginia',
 50 |   'washington dc',
 51 |   'washington',
 52 |   'west virginia',
 53 |   'wisconsin',
 54 |   'wyoming',
 55 | 
 56 |   //canada
 57 |   'alberta',
 58 |   'british columbia',
 59 |   'manitoba',
 60 |   'new brunswick',
 61 |   'newfoundland',
 62 |   'newfoundland and labrador',
 63 |   'nova scotia',
 64 |   'nunavut',
 65 |   'ontario',
 66 |   'prince edward island',
 67 |   'pei',
 68 |   'quebec',
 69 |   'saskatchewan',
 70 |   'yukon',
 71 | 
 72 |   //australia
 73 |   'norfolk',
 74 |   'queensland',
 75 |   'tasmania',
 76 |   'victoria',
 77 | 
 78 |   //china
 79 |   'qinghai',
 80 |   'sichuan',
 81 |   'gansu',
 82 |   'hunan',
 83 |   'guangdong',
 84 |   'guizhou',
 85 |   'fujian',
 86 |   'jiangxi',
 87 | 
 88 |   //india
 89 |   'rajasthan',
 90 |   'madhya',
 91 |   'maharashtra',
 92 |   'uttar pradesh',
 93 |   'kashmir',
 94 |   'gujarat',
 95 |   'karnataka',
 96 |   'manipur',
 97 |   'odisha',
 98 | 
 99 |   //mexico
100 |   'aguascalientes',
101 |   'baja california',
102 |   'campeche',
103 |   'chiapas',
104 |   'chihuahua',
105 |   'coahuila',
106 |   'colima',
107 |   'durango',
108 |   'guanajuato',
109 |   'guerrero',
110 |   'hidalgo',
111 |   'jalisco',
112 |   'michoacan',
113 |   'morelos',
114 |   'nayarit',
115 |   'nuevo leon',
116 |   'oaxaca',
117 |   'queretaro',
118 |   'quintana roo',
119 |   'san luis potosi',
120 |   'sinaloa',
121 |   'sonora',
122 |   'tabasco',
123 |   'tamaulipas',
124 |   'tlaxcala',
125 |   'veracruz',
126 |   'yucatan',
127 |   'zacatecas',
128 | 
129 |   //western-europe
130 |   'basque',
131 |   'bavaria',
132 |   'bremen',
133 |   'buckinghamshire',
134 |   'cambridgeshire',
135 |   'corsica',
136 |   'coventry',
137 |   'cumbria',
138 |   'derbyshire',
139 |   'dorset',
140 |   'essex',
141 |   'gloucestershire',
142 |   'hampshire',
143 |   'hertfordshire',
144 |   'lancashire',
145 |   'leeds',
146 |   'leicestershire',
147 |   'lincolnshire',
148 |   'midlands',
149 |   'normandy',
150 |   'north yorkshire',
151 |   'northamptonshire',
152 |   'nottinghamshire',
153 |   'oxfordshire',
154 |   'saxony',
155 |   'sicily',
156 |   'somerset',
157 |   'staffordshire',
158 |   'suffolk',
159 |   'surrey',
160 |   'sussex',
161 |   'tuscany',
162 |   'warwickshire',
163 |   'yorkshire',
164 | 
165 |   //bangladesh
166 |   'rajshahi',
167 |   'rangpur',
168 |   'khulna',
169 |   'sylhet',
170 | 
171 |   //brazil
172 |   'minas gerais',
173 |   'bahia',
174 |   'parana',
175 |   'pernambuco',
176 |   'ceara',
177 |   'para',
178 |   'maranhao',
179 |   'santa catarina',
180 | 
181 |   //misc
182 |   'siberia',
183 | ]
184 | 


--------------------------------------------------------------------------------
/plugins/dates/tests/backburner/ambig-month.ignore.js:
--------------------------------------------------------------------------------
  1 | import test from 'tape'
  2 | import nlp from './_lib.js'
  3 | import spacetime from 'spacetime'
  4 | 
  5 | const fmt = (iso) => (iso ? spacetime(iso).format('{iso-short}') : '-')
  6 | 
  7 | test('this month', function (t) {
  8 |   let arr = [
  9 |     [2020, 11, 1],
 10 |     [2020, 11, 8],
 11 |     [2020, 11, 11],
 12 |     [2020, 11, 20],
 13 |     [2020, 11, 25],
 14 |     [2020, 11, 31],
 15 |     [2020, 11, 31],
 16 |   ]
 17 |   arr.forEach((a) => {
 18 |     let doc = nlp('this month')
 19 |     let found = doc.dates({ today: a }).json()[0] || {}
 20 |     t.equal(fmt((found.dates || {}).start), '2020-12-01', 'this-start')
 21 |     t.equal(fmt((found.dates || {}).end), '2020-12-31', 'this-end')
 22 |   })
 23 |   t.end()
 24 | })
 25 | 
 26 | test('next month', function (t) {
 27 |   let arr = [
 28 |     [2020, 11, 1],
 29 |     [2020, 11, 8],
 30 |     [2020, 11, 11],
 31 |     [2020, 11, 20],
 32 |     [2020, 11, 25],
 33 |     [2020, 11, 31],
 34 |     [2020, 11, 31],
 35 |   ]
 36 |   arr.forEach((a) => {
 37 |     let doc = nlp('next month')
 38 |     let found = doc.dates({ today: a }).json()[0] || {}
 39 |     t.equal(fmt((found.dates || {}).start), '2021-01-01', 'next-start')
 40 |     t.equal(fmt((found.dates || {}).end), '2021-01-31', 'next-end')
 41 |   })
 42 |   t.end()
 43 | })
 44 | 
 45 | test('last month', function (t) {
 46 |   let arr = [
 47 |     [2020, 11, 1],
 48 |     [2020, 11, 8],
 49 |     [2020, 11, 11],
 50 |     [2020, 11, 20],
 51 |     [2020, 11, 25],
 52 |     [2020, 11, 31],
 53 |     [2020, 11, 31],
 54 |   ]
 55 |   arr.forEach((a) => {
 56 |     let doc = nlp('last month')
 57 |     let found = doc.dates({ today: a }).json()[0] || {}
 58 |     t.equal(fmt((found.dates || {}).start), '2020-11-01', 'last-start')
 59 |     t.equal(fmt((found.dates || {}).end), '2020-11-30', 'last-end')
 60 |   })
 61 |   t.end()
 62 | })
 63 | 
 64 | test('this december', function (t) {
 65 |   let arr = [
 66 |     [2020, 1, 1],
 67 |     [2020, 2, 8],
 68 |     [2020, 3, 11],
 69 |     [2020, 4, 20],
 70 |     [2020, 5, 25],
 71 |     [2020, 6, 28],
 72 |     [2020, 7, 12],
 73 |     [2020, 8, 12],
 74 |     [2020, 9, 16],
 75 |     [2020, 10, 1],
 76 |     [2020, 11, 11],
 77 |   ]
 78 |   arr.forEach((a) => {
 79 |     let doc = nlp('this december')
 80 |     let found = doc.dates({ today: a }).json()[0] || {}
 81 |     t.equal(fmt((found.dates || {}).start), '2020-12-01', 'this december')
 82 |     t.equal(fmt((found.dates || {}).end), '2020-12-31', 'this december')
 83 | 
 84 |     doc = nlp('next december')
 85 |     found = doc.dates({ today: a }).json()[0] || {}
 86 |     t.equal(fmt((found.dates || {}).start), '2021-12-01', 'next december')
 87 |     t.equal(fmt((found.dates || {}).end), '2021-12-31', 'next december')
 88 | 
 89 |     doc = nlp('last december')
 90 |     found = doc.dates({ today: a }).json()[0] || {}
 91 |     t.equal(fmt((found.dates || {}).start), '2019-12-01', 'last december')
 92 |     t.equal(fmt((found.dates || {}).end), '2019-12-31', 'last december')
 93 |   })
 94 |   t.end()
 95 | })
 96 | 
 97 | test('this september', function (t) {
 98 |   let doc = nlp('this september')
 99 |   let found = doc.dates({ today: [2019, 7, 4] }).json()[0] || {}
100 |   t.equal(fmt((found.dates || {}).start), '2019-09-01', 'this sept - before')
101 | 
102 |   found = doc.dates({ today: [2019, 8, 4] }).json()[0] || {}
103 |   t.equal(fmt((found.dates || {}).start), '2019-09-01', 'this sept - during')
104 | 
105 |   found = doc.dates({ today: [2019, 9, 4] }).json()[0] || {}
106 |   t.equal(fmt((found.dates || {}).start), '2020-09-01', 'this sept - after')
107 |   t.end()
108 | })
109 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/sportsTeams.js:
--------------------------------------------------------------------------------
  1 | export default [
  2 |   //mlb
  3 |   'arizona diamondbacks',
  4 |   'atlanta braves',
  5 |   'baltimore orioles',
  6 |   'boston red sox',
  7 |   'chicago cubs',
  8 |   'chicago white sox',
  9 |   'cincinnati reds',
 10 |   'cleveland indians',
 11 |   'colorado rockies',
 12 |   'detroit tigers',
 13 |   'houston astros',
 14 |   'kansas city royals',
 15 |   'los angeles dodgers',
 16 |   'miami marlins',
 17 |   'milwaukee brewers',
 18 |   'minnesota twins',
 19 |   'new york mets',
 20 |   'new york yankees',
 21 |   'oakland athletics',
 22 |   'philadelphia phillies',
 23 |   'pittsburgh pirates',
 24 |   'san diego padres',
 25 |   'san francisco giants',
 26 |   'seattle mariners',
 27 |   'st. louis cardinals',
 28 |   'tampa bay rays',
 29 |   'texas rangers',
 30 |   'toronto blue jays',
 31 |   'washington nationals',
 32 |   'diamondbacks',
 33 |   'white sox',
 34 |   'astros',
 35 |   'dodgers',
 36 |   'mets',
 37 |   'yankees',
 38 |   'phillies',
 39 |   'padres',
 40 | 
 41 |   //nba
 42 |   'boston celtics',
 43 |   'brooklyn nets',
 44 |   'new york knicks',
 45 |   'philadelphia 76ers',
 46 |   'toronto raptors',
 47 |   'chicago bulls',
 48 |   'cleveland cavaliers',
 49 |   'detroit pistons',
 50 |   'indiana pacers',
 51 |   'milwaukee bucks',
 52 |   'atlanta hawks',
 53 |   'charlotte hornets',
 54 |   'miami heat',
 55 |   'orlando magic',
 56 |   'washington wizards',
 57 |   'dallas mavericks',
 58 |   'houston rockets',
 59 |   'memphis grizzlies',
 60 |   'new orleans pelicans',
 61 |   'san antonio spurs',
 62 |   'denver nuggets',
 63 |   'minnesota timberwolves',
 64 |   'portland trail blazers',
 65 |   'oklahoma city thunder',
 66 |   'utah jazz',
 67 |   'golden state warriors',
 68 |   'los angeles clippers',
 69 |   'los angeles lakers',
 70 |   'phoenix suns',
 71 |   'sacramento kings',
 72 |   'knicks',
 73 |   'lakers',
 74 |   'celtics',
 75 | 
 76 |   //nfl
 77 |   'arizona cardinals',
 78 |   'atlanta falcons',
 79 |   'baltimore ravens',
 80 |   'buffalo bills',
 81 |   'carolina panthers',
 82 |   'chicago bears',
 83 |   'cincinnati bengals',
 84 |   'cleveland browns',
 85 |   'dallas cowboys',
 86 |   'denver broncos',
 87 |   'detroit lions',
 88 |   'green bay packers',
 89 |   'houston texans',
 90 |   'indianapolis colts',
 91 |   'jacksonville jaguars',
 92 |   'kansas city chiefs',
 93 |   'miami dolphins',
 94 |   'minnesota vikings',
 95 |   'new england patriots',
 96 |   'new orleans saints',
 97 |   'new york giants',
 98 |   'new york jets',
 99 |   'oakland raiders',
100 |   'philadelphia eagles',
101 |   'pittsburgh steelers',
102 |   'san diego chargers',
103 |   'san francisco 49ers',
104 |   'seattle seahawks',
105 |   'st. louis rams',
106 |   'tampa bay buccaneers',
107 |   'tennessee titans',
108 |   'washington redskins',
109 | 
110 |   //mls
111 |   'atlanta united',
112 |   'chicago fire',
113 |   'colorado rapids',
114 |   'columbus crew sc',
115 |   'd.c. united',
116 |   'fc dallas',
117 |   'houston dynamo',
118 |   'la galaxy',
119 |   'minnesota united',
120 |   'montreal impact',
121 |   'new england revolution',
122 |   'new york city fc',
123 |   'new york red bulls',
124 |   'philadelphia union',
125 |   'portland timbers',
126 |   'real salt lake',
127 |   'san jose earthquakes',
128 |   'seattle sounders',
129 |   'sporting kansas city',
130 |   'vancouver whitecaps',
131 |   //premier league soccer (mostly city+fc)
132 |   'aston villa',
133 |   'blackburn rovers',
134 |   'cardiff city',
135 |   'leicester city',
136 |   'manchester city',
137 |   'manchester united',
138 |   'newcastle united',
139 |   'queens park rangers',
140 |   'sheffield united',
141 |   'stoke city',
142 |   'tottenham hotspur',
143 |   'west ham united',
144 | ]
145 | 


--------------------------------------------------------------------------------
/learn/giga/test.js:
--------------------------------------------------------------------------------
  1 | import { forEachSync } from './_giga.js'
  2 | import doSentences from './french.js'
  3 | import fs from 'fs'
  4 | import nlp from '../../src/index.js'
  5 | 
  6 | 
  7 | let ids = []
  8 | for (let i = 1; i <= 10; i += 1) {
  9 |   let str = String(i).padStart(4, '0')
 10 |   ids.push(str)
 11 | }
 12 | ids = ['0004']
 13 | 
 14 | let tagMap = {
 15 |   'ABR': 'Acronym',//abbreviation
 16 |   'ADJ': 'Adjective',//adjective
 17 |   'ADV': 'Adverb',//adjective
 18 |   'DET:ART': 'Determiner',//article
 19 |   'DET:POS': 'Pronoun',//possessive pronoun (ma, ta, ...)
 20 |   'INT': 'Interjection',//interjection
 21 |   'KON': 'Conjunction',//conjunction
 22 |   'NAM': 'ProperNoun',//proper name
 23 |   'NOM': 'Noun',//noun
 24 |   'NUM': 'Value',//numeral
 25 |   'PRO': 'Pronoun',//pronoun
 26 |   'PRO:DEM': 'Pronoun',//demonstrative pronoun
 27 |   'PRO:IND': 'Pronoun',//indefinite pronoun
 28 |   'PRO:PER': 'Pronoun',//personal pronoun
 29 |   'PRO:POS': 'Pronoun',//possessive pronoun (mien, tien, ...)
 30 |   'PRO:REL': 'Pronoun',//relative pronoun
 31 |   'PRP': 'Preposition',//preposition
 32 |   'PRP:det': 'Preposition',//preposition plus article (au,du,aux,des)
 33 |   // 'PUN':'',//punctuation
 34 |   // 'PUN:cit':'',//punctuation citation
 35 |   // 'SENT':'',//sentence tag
 36 |   // 'SYM':'',//symbol
 37 |   'VER:cond': 'Verb',//verb conditional
 38 |   'VER:futu': 'Verb',//verb futur
 39 |   'VER:impe': 'Verb',//verb imperative
 40 |   'VER:impf': 'Verb',//verb imperfect
 41 |   'VER:infi': 'Verb',//verb infinitive
 42 |   'VER:pper': 'Verb',//verb past participle
 43 |   'VER:ppre': 'Verb',//verb present participle
 44 |   'VER:pres': 'Verb',//verb present
 45 |   'VER:simp': 'Verb',//verb simple past
 46 |   'VER:subi': 'Verb',//verb subjunctive imperfect
 47 |   'VER:subp': 'Verb',//verb subjunctive present
 48 | }
 49 | 
 50 | const ignore = new Set(['au', 'aux', 'des', 'au', 'ne', '$', '.', '(', ')', 'se'])
 51 | 
 52 | let bad = {}
 53 | 
 54 | let right = 0
 55 | let wrong = 0
 56 | const doBoth = function (both) {
 57 |   let txt = both.fr.map(o => o['$text']).join(' ')
 58 |   txt = txt.replace(/ ([.,?):])/g, `$1`)
 59 |   let correct = {}
 60 |   both.fr.forEach((term, i) => {
 61 |     let tag = tagMap[term['$'].pos]
 62 |     if (tag) {
 63 |       let str = term['$text'].toLowerCase()
 64 |       correct[str] = tag
 65 |     }
 66 |   })
 67 |   let doc = nlp(txt)
 68 |   doc.terms().forEach(t => {
 69 |     let str = t.text('normal')
 70 |     let want = correct[str] || null
 71 |     if (want && !ignore.has(str)) {
 72 |       if (t.has('#' + want)) {
 73 |         right += 1
 74 |       } else {
 75 |         wrong += 1
 76 |         bad[str] = bad[str] || 0
 77 |         bad[str] += 1
 78 |         // console.log(txt)
 79 |         // console.log(want)
 80 |         // t.debug()
 81 |       }
 82 |     }
 83 |   })
 84 | }
 85 | 
 86 | 
 87 | // setInterval(() => {
 88 | //   let all = Object.entries(bad).sort((a, b) => {
 89 | //     if (a[1] > b[1]) {
 90 | //       return -1
 91 | //     } else if (a[1] < b[1]) {
 92 | //       return 1
 93 | //     }
 94 | //     return 0
 95 | //   })
 96 | //   all = all.slice(0, 100)
 97 | //   console.log(all)
 98 | // }, 10000)
 99 | 
100 | const percent = (part, total) => {
101 |   let num = (part / total) * 100;
102 |   num = Math.round(num * 10) / 10;
103 |   return num;
104 | };
105 | 
106 | await forEachSync(ids, async id => {
107 |   try {
108 |     console.log(`\ndoing ${id}:\n`)
109 |     await doSentences(id, doBoth)
110 |     console.log(right, ` right  ${percent(right, right + wrong)}%`)
111 |   } catch (e) {
112 |     console.log(e)
113 |   }
114 | })
115 | console.log(right, ` right  ${percent(right, right + wrong)}%`)
116 | console.log(wrong, ` wrong ${percent(wrong, right + wrong)}%`)


--------------------------------------------------------------------------------
/scripts/types.ts:
--------------------------------------------------------------------------------
  1 | // a smoke-test for our typescipt typings
  2 | import frCompromise from '../'
  3 | import tape from 'tape'
  4 | console.log('\n 🥗  - running types-test..\n')
  5 | 
  6 | tape('misc functions', function (t) {
  7 |   let doc = frCompromise('John and Joe walked to the store')
  8 |   let m = doc.filter(s => s.found)
  9 |   let b = doc.map(s => s)
 10 |   doc.forEach((s) => s)
 11 |   let o = doc.find(s => s.found)
 12 |   m = doc.some(s => s.found)
 13 |   m = doc.random()
 14 |   m = doc.all()
 15 |   m = doc.eq(0)
 16 |   m = doc.first()
 17 |   m = doc.firstTerms()
 18 |   m = doc.fullSentences()
 19 |   m = doc.last()
 20 |   m = doc.lastTerms()
 21 |   m = doc.none()
 22 |   m = doc.slice(0, 1)
 23 |   m = doc.terms()
 24 |   m = doc.update([])
 25 |   m = doc.toView([])
 26 |   m = doc.fromText('')
 27 |   m = doc.clone()
 28 |   let obj = doc.groups()
 29 |   let arr = doc.termList()
 30 |   let c = doc.wordCount()
 31 |   doc.fullPointer
 32 |   doc.docs
 33 |   doc.pointer
 34 |   doc.methods
 35 |   doc.model
 36 |   doc.hooks
 37 |   doc.isView
 38 |   doc.found
 39 |   doc.length
 40 | 
 41 |   // One
 42 |   doc.compute('id')
 43 |   // change
 44 |   m = doc.toLowerCase()
 45 |   m = doc.toUpperCase()
 46 |   m = doc.toTitleCase()
 47 |   m = doc.toCamelCase()
 48 |   m = doc.insertAfter('asdf')
 49 |   m = doc.insertBefore('boo')
 50 |   m = doc.append('foo')
 51 |   m = doc.prepend('foo')
 52 |   m = doc.insert('bar')
 53 |   m = doc.match('flood').replaceWith('asf')
 54 |   m = doc.replace('m', 'woo')
 55 |   m = doc.remove('foo')
 56 |   m = doc.delete('bar')
 57 |   m = doc.pre(' ')
 58 |   m = doc.post(' ')
 59 |   m = doc.trim()
 60 |   m = doc.hyphenate()
 61 |   m = doc.dehyphenate()
 62 |   m = doc.toQuotations()
 63 |   m = doc.toParentheses()
 64 |   m = doc.deHyphenate()
 65 |   m = doc.toQuotation()
 66 |   m = doc.unique()
 67 |   m = doc.reverse()
 68 |   m = doc.sort()
 69 |   m = doc.concat(doc.none())
 70 |   // doc.fork()
 71 | 
 72 |   doc.compute('contractions')
 73 |   doc.compute('lexicon')
 74 |   doc.lookup(['blue jays', 'farmer'])
 75 | 
 76 |   // match
 77 |   m = doc.matchOne('#Foo')
 78 |   m = doc.match('#Foo')
 79 |   let bool = doc.has('#Foo')
 80 |   m = doc.if('#Foo')
 81 |   m = doc.ifNo('#Foo')
 82 |   m = doc.before('#Foo')
 83 |   m = doc.after('#Foo')
 84 |   m = doc.growLeft('#Foo')
 85 |   m = doc.growRight('#Foo')
 86 |   m = doc.grow('#Foo')
 87 |   m = doc.splitOn('#Foo')
 88 |   m = doc.splitBefore('#Foo')
 89 |   m = doc.splitAfter('#Foo')
 90 |   m = doc.split('#Foo')
 91 | 
 92 |   // output
 93 |   let res = doc.out()
 94 |   let txt = doc.text()
 95 |   txt = doc.text('normal')
 96 |   txt = doc.text('machine')
 97 |   txt = doc.text('root')
 98 |   txt = doc.text('implicit')
 99 |   txt = doc.json()
100 | 
101 |   // sets
102 |   m = doc.union('blah')
103 |   m = doc.and('blah')
104 |   m = doc.intersection('blah')
105 |   m = doc.difference('blah')
106 |   m = doc.not('blah')
107 |   m = doc.complement('blah')
108 |   m = doc.settle('blah')
109 | 
110 |   m = doc.tag('Foo')
111 |   m = doc.tagSafe('Foo')
112 |   m = doc.unTag('Foo')
113 |   m = doc.canBe('Foo')
114 | 
115 |   doc.compute('alias')
116 |   doc.compute('normal')
117 |   doc.compute('machine')
118 |   doc.compute('freq')
119 |   doc.compute('offset')
120 |   doc.compute('index')
121 |   doc.compute('wordCount')
122 | 
123 |   doc.compute('typeahead')
124 |   doc.autoFill()
125 | 
126 |   // sweep
127 |   let matches = [
128 |     { match: '2nd quarter of? 2022', tag: 'TimePeriod' },
129 |     { match: '(from|by|before) now', tag: 'FooBar' },
130 |   ]
131 |   let net = frCompromise.buildNet(matches)
132 |   doc = frCompromise(`so good by now. woo hoo before now. in the 2nd quarter 2022`)
133 |   let sr = doc.sweep(net)
134 | 
135 |   // lazy
136 |   doc = frCompromise.lazy('hello', 'foo')
137 | 
138 |   t.ok(true)
139 |   t.end()
140 | })
141 | 
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/plugins/dates/src/phrase/date/index.js:
--------------------------------------------------------------------------------
  1 | import { months, days } from './data.js'
  2 | import { Moment, Month, Day, Week, Year } from './units.js'
  3 | import spacetime from 'spacetime'
  4 | 
  5 | 
  6 | 
  7 | 
  8 | // some re-used helper functions:
  9 | const parseMonth = function (m) {
 10 |   let str = m.text('normal')
 11 |   if (months.hasOwnProperty(str)) {
 12 |     return months[str] - 1
 13 |   }
 14 |   return null
 15 | }
 16 | const parseNumber = function (m) {
 17 |   let str = m.text('normal')
 18 |   str = str.replace(/e$/, '')//ordinal
 19 |   return parseInt(str, 10)
 20 | }
 21 | 
 22 | const isValid = function (cal) {
 23 |   // if (!cal.month || !cal.date || !cal.year) {
 24 |   //   return false
 25 |   // }
 26 |   return true
 27 | }
 28 | 
 29 | // pull-apart a spcific date, like 'le 2e oct' independant of a longer phrase
 30 | const parseOne = function (m, opts) {
 31 |   const { today } = opts
 32 |   // clean it up a little
 33 |   // m = normalize(m)
 34 |   // match '2 septembre 1982'
 35 |   let res = m.match('[<date>#Value] [<month>#Month] [<year>#Year]')
 36 |   if (res.found) {
 37 |     let cal = {
 38 |       month: parseMonth(res.groups('month')),
 39 |       date: parseNumber(res.groups('date')),
 40 |       year: parseNumber(res.groups('year')),
 41 |     }
 42 |     if (isValid(cal)) {
 43 |       return new Day(cal, opts)
 44 |     }
 45 |   }
 46 |   // 'oct 2021'
 47 |   res = m.match('[<month>#Month]  [<year>#Year]')
 48 |   if (res.found) {
 49 |     let cal = {
 50 |       month: parseMonth(res.groups('month')),
 51 |       year: parseNumber(res.groups('year')) || today.year(),
 52 |     }
 53 |     if (isValid(cal)) {
 54 |       return new Month(cal, opts)
 55 |     }
 56 |   }
 57 |   // 'oct 22nd'
 58 |   res = m.match('[<month>#Month] [<date>#Value] #Year?')
 59 |   if (res.found) {
 60 |     let cal = {
 61 |       month: parseMonth(res.groups('month')),
 62 |       date: parseNumber(res.groups('date')) || today.date(),
 63 |       year: parseNumber(res.match('#Year')) || today.year(),
 64 |     }
 65 |     if (isValid(cal)) {
 66 |       return new Day(cal, opts)
 67 |     }
 68 |   }
 69 |   // '6 avril'
 70 |   res = m.match('[<date>#Value] [<month>#Month] #Year?')
 71 |   if (res.found) {
 72 |     let cal = {
 73 |       // month: parseMonth(res.groups('month')),
 74 |       // date: parseNumber(res.groups('date')) || today.date(),
 75 |       month: parseMonth(res.match('#Month')),
 76 |       date: parseNumber(res.match('#Value')) || today.date(),
 77 |       year: parseNumber(res.match('#Year')) || today.year(),
 78 |     }
 79 |     if (isValid(cal)) {
 80 |       return new Day(cal, opts)
 81 |     }
 82 |   }
 83 |   // '2021'
 84 |   res = m.match('[<year>#Year]')
 85 |   if (res.found) {
 86 |     let cal = { year: parseNumber(res.groups('year')) }
 87 |     if (isValid(cal)) {
 88 |       return new Year(cal, opts)
 89 |     }
 90 |   }
 91 |   // 'octobre'
 92 |   res = m.match('[<month>#Month]')
 93 |   if (res.found) {
 94 |     let cal = { month: parseMonth(res.groups('month')), year: today.year() }
 95 |     if (isValid(cal)) {
 96 |       return new Month(cal, opts)
 97 |     }
 98 |   }
 99 |   // '2021-02-12'
100 |   res = m.match('#Date+')
101 |   if (res.found) {
102 |     let s = spacetime(res.text('normal'), opts.timezone, { dmy: true })
103 |     if (s.isValid()) {
104 |       return new Moment(s, opts)
105 |     }
106 |   }
107 |   // known words
108 |   // yesterday
109 |   if (m.has('hier')) {
110 |     let s = spacetime(null, opts.timezone).minus(1, 'day')
111 |     return new Day(s, opts)
112 |   }
113 |   // tomorrow
114 |   if (m.has('demain')) {
115 |     let s = spacetime(null, opts.timezone).plus(1, 'day')
116 |     return new Day(s, opts)
117 |   }
118 |   // today
119 |   if (m.has('aujourd\'hui')) {
120 |     let s = spacetime(null, opts.timezone)
121 |     return new Day(s, opts)
122 |   }
123 | 
124 |   // todo: support other forms here! ↓
125 | 
126 | 
127 |   return null
128 | }
129 | export default parseOne


--------------------------------------------------------------------------------
/src/02-two/preTagger/compute/3rd-pass/verb-form.js:
--------------------------------------------------------------------------------
  1 | let person = ['FirstPerson', 'SecondPerson', 'ThirdPerson', 'FirstPersonPlural', 'SecondPersonPlural', 'ThirdPersonPlural']
  2 | 
  3 | let whichForm = [
  4 |   // future
  5 |   ['ai', 'FirstPerson'],
  6 |   ['tas', 'SecondPerson'],
  7 |   ['ta', 'ThirdPerson'],
  8 |   ['âmes', 'FirstPersonPlural'],
  9 |   ['âtes', 'SecondPersonPlural'],
 10 |   ['èrent', 'ThirdPersonPlural'],
 11 |   // imperfect
 12 |   ['ait', 'ThirdPerson'],
 13 |   // futur
 14 |   ['eras', 'SecondPerson'],
 15 |   ['eront', 'ThirdPersonPlural'],
 16 |   // imparfait
 17 |   ['asse', 'FirstPerson'],
 18 |   ['asses', 'SecondPerson'],
 19 |   ['tât', 'ThirdPerson'],
 20 |   // present
 21 |   ['es', 'SecondPerson'],
 22 |   ['ons', 'FirstPersonPlural'],
 23 |   ['ez', 'SecondPersonPlural'],
 24 |   ['ent', 'ThirdPersonPlural'],
 25 | ]
 26 | const pronouns = {
 27 |   je: 'FirstPerson',
 28 |   tu: 'SecondPerson',
 29 |   il: 'ThirdPerson',
 30 |   elle: 'ThirdPerson',
 31 |   nous: 'FirstPersonPlural',
 32 |   vous: 'SecondPersonPlural',
 33 |   ils: 'ThirdPersonPlural',
 34 | }
 35 | // can give us a hint to verb person, too
 36 | const auxiliaries = {
 37 |   // etre
 38 |   suis: 'FirstPerson',
 39 |   es: 'SecondPerson',
 40 |   est: 'ThirdPerson',
 41 |   sommes: 'FirstPersonPlural',
 42 |   êtes: 'SecondPersonPlural',
 43 |   sont: 'ThirdPersonPlural',
 44 |   serai: 'FirstPerson',
 45 |   seras: 'SecondPerson',
 46 |   sera: 'ThirdPerson',
 47 |   serons: 'FirstPersonPlural',
 48 |   serez: 'SecondPersonPlural',
 49 |   seront: 'ThirdPersonPlural',
 50 |   serait: 'ThirdPerson',
 51 |   serions: 'FirstPersonPlural',
 52 |   seriez: 'SecondPersonPlural',
 53 |   seraient: 'ThirdPersonPlural',
 54 | 
 55 |   // 'avoir'
 56 |   ai: 'FirstPerson',
 57 |   as: 'SecondPerson',
 58 |   a: 'ThirdPerson',
 59 |   avons: 'FirstPersonPlural',
 60 |   avez: 'SecondPersonPlural',
 61 |   ont: 'ThirdPersonPlural',
 62 |   // future anterior
 63 |   aurai: 'FirstPerson',
 64 |   auras: 'SecondPerson',
 65 |   aura: 'ThirdPerson',
 66 |   aurons: 'FirstPersonPlural',
 67 |   aurez: 'SecondPersonPlural',
 68 |   auront: 'ThirdPersonPlural',
 69 |   // Plus-que-parfait
 70 |   'avait': 'ThirdPerson',
 71 |   'avions': 'FirstPersonPlural',
 72 |   'aviez': 'SecondPersonPlural',
 73 |   'avaient': 'ThirdPersonPlural',
 74 |   // conditional avoir
 75 |   aurait: 'ThirdPerson',
 76 |   aurions: 'FirstPersonPlural',
 77 |   auriez: 'SecondPersonPlural',
 78 |   auraient: 'ThirdPersonPlural',
 79 | }
 80 | 
 81 | // guess a tense tag each Verb
 82 | const verbForm = function (terms, i, world) {
 83 |   let setTag = world.methods.one.setTag
 84 |   let term = terms[i]
 85 |   let tags = term.tags
 86 |   if (tags.has('Verb')) {
 87 |     // console.log(term)
 88 |     let str = term.implicit || term.normal || term.text || ''
 89 |     // if we have no person-tag
 90 |     if (!person.find(s => tags.has(s))) {
 91 |       // look at the word suffix, for clues
 92 |       let found = whichForm.find(a => str.endsWith(a[0]))
 93 |       if (found) {
 94 |         return setTag([term], found[1], world, false, '3-person-suffix-' + found[1])
 95 |       }
 96 |       //look backwards for clues
 97 |       for (let back = 0; back < 3; back += 1) {
 98 |         if (!terms[i - back]) {
 99 |           break
100 |         }
101 |         let s = terms[i - back].normal
102 |         //look backwards for a pronoun
103 |         if (terms[i - back].tags.has('Pronoun')) {
104 |           if (pronouns.hasOwnProperty(s)) {
105 |             return setTag([term], pronouns[s], world, false, '3-person-pronoun-' + s)
106 |           }
107 |         }
108 |         //look backwards for a auxiliary verb - 'sont'
109 |         if (terms[i - back].tags.has('Verb')) {
110 |           if (auxiliaries.hasOwnProperty(s)) {
111 |             return setTag([term], auxiliaries[s], world, false, '3-person-auxiliary-' + s)
112 |           }
113 |         }
114 |       }
115 |     }
116 |   }
117 |   return null
118 | }
119 | export default verbForm


--------------------------------------------------------------------------------
/src/02-two/postTagger/matches.js:
--------------------------------------------------------------------------------
 1 | import nounGender from '../preTagger/compute/3rd-pass/noun-gender.js'
 2 | import nounPlurals from '../preTagger/compute/3rd-pass/noun-plurals.js'
 3 | import adjGender from '../preTagger/compute/3rd-pass/adj-gender.js'
 4 | import adjPlurals from '../preTagger/compute/3rd-pass/adj-plurals.js'
 5 | import verbTense from '../preTagger/compute/3rd-pass/verb-tense.js'
 6 | 
 7 | const tagNoun = function (m) {
 8 |   let world = m.world
 9 |   m.docs.forEach(terms => {
10 |     terms.forEach((_t, i) => {
11 |       nounGender(terms, i, world)
12 |       nounPlurals(terms, i, world)
13 |     })
14 |   })
15 | }
16 | const tagAdj = function (m) {
17 |   let world = m.world
18 |   m.docs.forEach(terms => {
19 |     terms.forEach((_t, i) => {
20 |       adjGender(terms, i, world)
21 |       adjPlurals(terms, i, world)
22 |     })
23 |   })
24 | }
25 | const tagVerb = function (m) {
26 |   let world = m.world
27 |   m.docs.forEach(terms => {
28 |     terms.forEach((_t, i) => {
29 |       verbTense(terms, i, world)
30 |     })
31 |   })
32 | }
33 | 
34 | const postTagger = function (doc) {
35 |   // ==Nouns==
36 |   // l'inconnu
37 |   doc.match('(le|un) [#Verb]', 0).tag(['MaleNoun', 'Singular'], 'le-verb')
38 |   doc.match('(la|une) [#Verb]', 0).tag(['FemaleNoun', 'Singular'], 'la-verb')
39 |   tagNoun(doc.match('(quelques|quelque) [#Verb]', 0).tag('Noun', 'quelque-verb'))
40 |   tagNoun(doc.match('(des|les|mes|ces|tes|ses|nos|vos|leurs) [#Verb]', 0).tag('PluralNoun', 'des-verb'))
41 | 
42 |   // ==Verbs==
43 |   // ne foo pas
44 |   tagVerb(doc.match('ne [.] pas', 0).tag('Verb', 'ne-verb-pas'))
45 |   // il active le
46 |   tagVerb(doc.match('il [.] (le|la|les)', 0).tag('Verb', 'il-verb-le'))
47 |   // reflexive
48 |   tagVerb(doc.match('(se|me|te) [.]', 0).tag('Verb', 'se-noun'))
49 |   // Elle interdit les transactions
50 |   tagVerb(doc.match('(je|tu|il|elle|nous|vous|ils) [#Adjective] (la|le|les)', 0).tag('Verb', 'ils-x-les'))
51 |   // sont interdites par l'interdiction
52 |   tagVerb(doc.match('(est|été|sont|était|serait) [#Adjective] #Preposition', 0).tag('Verb', 'song-x-par'))
53 |   // a dissimulées
54 |   tagVerb(doc.match('(ai|as|a|avons|avez|ont) [#Adjective]', 0).tag('PastTense', 'have-adj'))
55 |   // have unpacked
56 |   doc.match('(ai|as|a|avons|avez|ont) [#PresentTense]', 0).tag('PastTense', 'have-pres')
57 |   // passive voice - est-aimée
58 |   doc.match('#Copula #Adverb?+ [#PastParticiple]', 0).tag('Passive', 'passive-voice')
59 | 
60 |   // ==Adjectives==
61 |   // est bien calculée
62 |   tagAdj(doc.match('#Copula (bien|très|pas|plus|tant|presque|seulement)+ [#Verb]', 0).tag('Adjective', 'est-bein-calculee'))
63 | 
64 |   // ==Numbers==
65 |   doc.match('#Value et (un|#Value)').tag('TextValue', 'et-un')
66 |   doc.match('#Value un').tag('TextValue', 'quatre-vingt-un')
67 |   doc.match('moins #Value').tag('TextValue', 'moins-value')
68 | 
69 |   // ==Dates==
70 |   doc.match('[#Value] #Month', 0).tag('Date', 'val-month')
71 |   // ambig 'sept'
72 |   doc.match('#Month [#Value] #Year', 0).tag('Date', 'mdy')
73 |   doc.match('[#Value] #Month #Year', 0).tag('Date', 'dmy')
74 |   doc.match('le #Value [sept]', 0).tag('Month', 'val-sept')
75 |   doc.match('[sept] #Year', 0).tag('Month', 'sept-year')
76 |   doc.match('[sept] (et|ou) #Month', 0).tag('Month', 'sept-et-month')
77 |   doc.match('sept$').tag('TextValue', 'sept-alone')
78 |   doc.match('et [sept]').tag('TextValue', 'et-sept')
79 |   // sept trente
80 |   doc.match('sept (dix|vingt|trente|quarante|cinquante|soixante|soixante|#Multiple)').tag('TextValue', 'sept-trente')
81 |   doc.match('(dix|vingt|trente|quarante|cinquante|soixante|soixante|#Multiple) sept').tag('TextValue', 'trente-sept')
82 |   // // sept-et-jun
83 |   // doc.match('#Date [et] #Date', 0).tag('Date', 'date-et-date')
84 |   // // courant juin
85 |   // doc.match('(en|entre|depuis|courant|pendant|dans|lorsque|avant|après) #Date').tag('Date', 'depuis-date')
86 |   // // jusque'en juin
87 |   // doc.match('jusque (en|à) #Date').tag('Date', 'jusque-date')
88 |   // // au cours de juin
89 |   // doc.match('au cours de #Date').tag('Date', 'au-cours-de-date')
90 | }
91 | export default postTagger


--------------------------------------------------------------------------------