├── plugins
    ├── wikipedia
    │   ├── .gitignore
    │   ├── config.js
    │   ├── scripts
    │   │   ├── generate
    │   │   │   └── index.js
    │   │   └── stat.js
    │   ├── index.d.ts
    │   ├── scratch.js
    │   ├── tests
    │   │   ├── _lib.js
    │   │   └── misc.test.js
    │   ├── src
    │   │   └── plugin.js
    │   └── demo
    │   │   └── index.html
    ├── _experiments
    │   ├── markdown
    │   │   ├── src
    │   │   │   ├── Wrap.js
    │   │   │   ├── parse
    │   │   │   │   └── index.js
    │   │   │   └── plugin.js
    │   │   ├── README.md
    │   │   └── scratch.js
    │   ├── sentiment
    │   │   ├── README.md
    │   │   ├── scratch.js
    │   │   └── src
    │   │   │   └── escape.js
    │   ├── compress
    │   │   ├── README.md
    │   │   └── src
    │   │   │   └── index.js
    │   ├── cmd-k
    │   │   ├── src
    │   │   │   ├── plugin.js
    │   │   │   └── slashCmd.js
    │   │   ├── scratch.js
    │   │   └── README.md
    │   └── ast
    │   │   ├── README.md
    │   │   ├── src
    │   │       ├── compute
    │   │       │   └── index.js
    │   │       ├── plugin.js
    │   │       └── lines.js
    │   │   └── scratch.js
    ├── dates
    │   ├── src
    │   │   ├── _version.js
    │   │   ├── api
    │   │   │   ├── index.js
    │   │   │   ├── parse
    │   │   │   │   ├── one
    │   │   │   │   │   ├── units
    │   │   │   │   │   │   ├── index.js
    │   │   │   │   │   │   └── _time.js
    │   │   │   │   │   ├── 01-tokenize
    │   │   │   │   │   │   ├── 07-weekday.js
    │   │   │   │   │   │   └── 05-section.js
    │   │   │   │   │   └── 02-parse
    │   │   │   │   │   │   ├── index.js
    │   │   │   │   │   │   └── 02-holidays.js
    │   │   │   │   ├── range
    │   │   │   │   │   └── _reverse.js
    │   │   │   │   ├── index.js
    │   │   │   │   └── normalize.js
    │   │   │   └── toJSON.js
    │   │   ├── model
    │   │   │   ├── words
    │   │   │   │   ├── dates.js
    │   │   │   │   ├── times.js
    │   │   │   │   ├── index.js
    │   │   │   │   └── durations.js
    │   │   │   ├── tags.js
    │   │   │   └── regex.js
    │   │   └── plugin.js
    │   ├── index.d.cts
    │   ├── scripts
    │   │   └── version.js
    │   ├── tests
    │   │   └── _lib.js
    │   └── demo
    │   │   └── index.html
    ├── speed
    │   ├── src
    │   │   ├── _version.js
    │   │   ├── lazyParse
    │   │   │   ├── plugin.js
    │   │   │   ├── lazyParse.js
    │   │   │   └── maybeMatch.js
    │   │   ├── workerPool
    │   │   │   ├── plugin.js
    │   │   │   └── pool
    │   │   │   │   └── create.js
    │   │   └── plugin.js
    │   ├── scripts
    │   │   └── version.js
    │   ├── tests
    │   │   ├── _lib.js
    │   │   └── stream.test.js
    │   └── index.d.ts
    ├── paragraphs
    │   ├── src
    │   │   └── plugin.js
    │   ├── index.d.ts
    │   └── tests
    │   │   └── _lib.js
    ├── speech
    │   ├── src
    │   │   ├── plugin.js
    │   │   ├── compute
    │   │   │   ├── index.js
    │   │   │   ├── soundsLike
    │   │   │   │   ├── index.js
    │   │   │   │   └── metaphone.js
    │   │   │   └── syllables
    │   │   │   │   └── index.js
    │   │   └── api.js
    │   ├── index.d.ts
    │   ├── tests
    │   │   ├── _lib.js
    │   │   └── soundsLike.test.js
    │   ├── scratch.js
    │   └── demo
    │   │   └── index.html
    ├── stats
    │   ├── src
    │   │   ├── ngram
    │   │   │   ├── tokenize.js
    │   │   │   └── sort.js
    │   │   ├── plugin.js
    │   │   ├── tfidf
    │   │   │   ├── unpack.js
    │   │   │   └── tf.js
    │   │   └── compute.js
    │   ├── tests
    │   │   ├── _lib.js
    │   │   └── misc.test.js
    │   ├── scratch.js
    │   ├── demo
    │   │   └── index.html
    │   └── scripts
    │   │   └── generate.js
    └── payload
    │   ├── src
    │       └── debug.js
    │   ├── scratch.js
    │   ├── tests
    │       └── _lib.js
    │   └── index.d.ts
├── src
    ├── _version.js
    ├── 3-three
    │   ├── sentences
    │   │   ├── plugin.js
    │   │   └── conjugate
    │   │   │   ├── toInfinitive.js
    │   │   │   └── toNegative.js
    │   ├── normalize
    │   │   └── plugin.js
    │   ├── nouns
    │   │   ├── plugin.js
    │   │   └── api
    │   │   │   ├── hasPlural.js
    │   │   │   ├── toSingular.js
    │   │   │   ├── parse.js
    │   │   │   ├── toJSON.js
    │   │   │   └── isPlural.js
    │   ├── verbs
    │   │   ├── plugin.js
    │   │   └── api
    │   │   │   └── parse
    │   │   │       ├── root.js
    │   │   │       └── adverbs.js
    │   ├── coreference
    │   │   ├── compute
    │   │   │   ├── findIt.js
    │   │   │   ├── lib.js
    │   │   │   └── findThey.js
    │   │   └── plugin.js
    │   ├── chunker
    │   │   ├── plugin.js
    │   │   ├── compute
    │   │   │   ├── index.js
    │   │   │   └── 05-fixUp.js
    │   │   └── api
    │   │   │   └── chunks.js
    │   ├── topics
    │   │   ├── orgs
    │   │   │   └── api.js
    │   │   ├── places
    │   │   │   ├── api.js
    │   │   │   └── find.js
    │   │   ├── people
    │   │   │   ├── find.js
    │   │   │   └── parse.js
    │   │   ├── plugin.js
    │   │   └── topics.js
    │   ├── numbers
    │   │   ├── fractions
    │   │   │   ├── convert
    │   │   │   │   ├── toCardinal.js
    │   │   │   │   └── toOrdinal.js
    │   │   │   └── find.js
    │   │   ├── plugin.js
    │   │   └── numbers
    │   │   │   ├── parse
    │   │   │       └── toNumber
    │   │   │       │   ├── validate.js
    │   │   │       │   ├── parseNumeric.js
    │   │   │       │   ├── parseDecimals.js
    │   │   │       │   └── findModifiers.js
    │   │   │   ├── _toString.js
    │   │   │   ├── format
    │   │   │       ├── toOrdinal
    │   │   │       │   └── numOrdinal.js
    │   │   │       └── index.js
    │   │   │   └── isUnit.js
    │   ├── misc
    │   │   ├── parentheses
    │   │   │   └── index.js
    │   │   ├── quotations
    │   │   │   └── index.js
    │   │   ├── plugin.js
    │   │   └── slashes
    │   │   │   └── index.js
    │   └── redact
    │   │   └── plugin.js
    ├── 4-four
    │   ├── facts
    │   │   ├── plugin.js
    │   │   └── parse
    │   │   │   ├── pivot.js
    │   │   │   ├── noun.js
    │   │   │   ├── adjective.js
    │   │   │   ├── postProcess.js
    │   │   │   └── verb.js
    │   └── sense
    │   │   ├── plugin.js
    │   │   ├── api
    │   │       └── api.js
    │   │   └── model
    │   │       ├── index.js
    │   │       └── senses
    │   │           ├── verb.js
    │   │           ├── adjective.js
    │   │           └── index.js
    ├── 2-two
    │   ├── lazy
    │   │   ├── plugin.js
    │   │   ├── lazyParse.js
    │   │   └── maybeMatch.js
    │   ├── preTagger
    │   │   ├── model
    │   │   │   ├── personWords.js
    │   │   │   ├── clues
    │   │   │   │   ├── person-adj.js
    │   │   │   │   ├── unit-noun.js
    │   │   │   │   ├── actor-verb.js
    │   │   │   │   ├── person-noun.js
    │   │   │   │   └── person-verb.js
    │   │   │   ├── _expand
    │   │   │   │   └── irregulars.js
    │   │   │   └── regex
    │   │   │   │   └── regex-text.js
    │   │   ├── compute
    │   │   │   ├── index.js
    │   │   │   └── tagger
    │   │   │   │   ├── 3rd-pass
    │   │   │   │       ├── 07-verb-type.js
    │   │   │   │       └── 05-fallback.js
    │   │   │   │   ├── 2nd-pass
    │   │   │   │       └── 00-tagSwitch.js
    │   │   │   │   └── 1st-pass
    │   │   │   │       ├── 02-hyphens.js
    │   │   │   │       └── 01-colons.js
    │   │   ├── methods
    │   │   │   ├── transform
    │   │   │   │   ├── index.js
    │   │   │   │   ├── verbs
    │   │   │   │   │   ├── index.js
    │   │   │   │   │   └── getTense
    │   │   │   │   │   │   └── index.js
    │   │   │   │   ├── nouns
    │   │   │   │   │   ├── index.js
    │   │   │   │   │   └── toSingular
    │   │   │   │   │   │   └── index.js
    │   │   │   │   └── adjectives
    │   │   │   │   │   ├── conjugate
    │   │   │   │   │       └── lib.js
    │   │   │   │   │   └── index.js
    │   │   │   └── index.js
    │   │   ├── plugin.js
    │   │   └── tagSet
    │   │   │   ├── index.js
    │   │   │   ├── values.js
    │   │   │   └── dates.js
    │   ├── swap
    │   │   ├── plugin.js
    │   │   └── api
    │   │   │   └── swap-verb.js
    │   ├── contraction-two
    │   │   └── plugin.js
    │   └── postTagger
    │   │   ├── plugin.js
    │   │   ├── model
    │   │       ├── verbs
    │   │       │   ├── adj-gerund.js
    │   │       │   └── passive.js
    │   │       └── numbers
    │   │       │   └── money.js
    │   │   ├── api.js
    │   │   └── compute
    │   │       └── index.js
    ├── 1-one
    │   ├── tokenize
    │   │   ├── model
    │   │   │   ├── abbreviations
    │   │   │   │   ├── organizations.js
    │   │   │   │   ├── months.js
    │   │   │   │   ├── nouns.js
    │   │   │   │   ├── honorifics.js
    │   │   │   │   ├── places.js
    │   │   │   │   └── units.js
    │   │   │   ├── aliases.js
    │   │   │   ├── suffixes.js
    │   │   │   ├── index.js
    │   │   │   └── prefixes.js
    │   │   ├── methods
    │   │   │   ├── unicode.js
    │   │   │   ├── 03-whitespace
    │   │   │   │   └── index.js
    │   │   │   ├── 02-terms
    │   │   │   │   ├── 02-slashes.js
    │   │   │   │   └── 03-ranges.js
    │   │   │   ├── index.js
    │   │   │   ├── parse.js
    │   │   │   └── 01-sentences
    │   │   │   │   └── 03-smart-merge.js
    │   │   ├── compute
    │   │   │   ├── wordCount.js
    │   │   │   ├── normal
    │   │   │   │   ├── index.js
    │   │   │   │   └── 02-acronyms.js
    │   │   │   ├── offset.js
    │   │   │   ├── reindex.js
    │   │   │   ├── freq.js
    │   │   │   ├── machine.js
    │   │   │   └── index.js
    │   │   └── plugin.js
    │   ├── contraction-one
    │   │   ├── compute
    │   │   │   ├── index.js
    │   │   │   └── contractions
    │   │   │   │   ├── apostrophe-t.js
    │   │   │   │   ├── number-unit.js
    │   │   │   │   └── number-range.js
    │   │   ├── model
    │   │   │   ├── index.js
    │   │   │   └── number-suffix.js
    │   │   └── plugin.js
    │   ├── output
    │   │   ├── methods
    │   │   │   ├── index.js
    │   │   │   └── debug
    │   │   │   │   ├── index.js
    │   │   │   │   ├── _color.js
    │   │   │   │   └── client-side.js
    │   │   ├── plugin.js
    │   │   └── api
    │   │   │   ├── index.js
    │   │   │   ├── debug.js
    │   │   │   └── _fmts.js
    │   ├── cache
    │   │   ├── methods
    │   │   │   └── index.js
    │   │   ├── compute.js
    │   │   ├── plugin.js
    │   │   └── api.js
    │   ├── lexicon
    │   │   ├── methods
    │   │   │   └── index.js
    │   │   ├── plugin.js
    │   │   └── compute
    │   │   │   └── index.js
    │   ├── change
    │   │   ├── plugin.js
    │   │   ├── compute
    │   │   │   └── index.js
    │   │   └── api
    │   │   │   ├── harden.js
    │   │   │   └── index.js
    │   ├── pointers
    │   │   ├── plugin.js
    │   │   ├── api
    │   │   │   └── lib
    │   │   │   │   └── difference.js
    │   │   └── methods
    │   │   │   └── index.js
    │   ├── tag
    │   │   ├── api
    │   │   │   └── index.js
    │   │   ├── methods
    │   │   │   ├── index.js
    │   │   │   ├── addTags
    │   │   │   │   └── _colors.js
    │   │   │   ├── canBe.js
    │   │   │   └── unTag.js
    │   │   ├── lib.js
    │   │   └── plugin.js
    │   ├── match
    │   │   ├── plugin.js
    │   │   ├── methods
    │   │   │   ├── index.js
    │   │   │   ├── match
    │   │   │   │   ├── _lib.js
    │   │   │   │   ├── 03-notIf.js
    │   │   │   │   ├── steps
    │   │   │   │   │   ├── greedy-match.js
    │   │   │   │   │   ├── optional-match.js
    │   │   │   │   │   └── contraction-skip.js
    │   │   │   │   └── 03-getGroup.js
    │   │   │   └── parseMatch
    │   │   │   │   └── 03-splitHyphens.js
    │   │   ├── lib.js
    │   │   └── api
    │   │   │   └── index.js
    │   ├── sweep
    │   │   ├── plugin.js
    │   │   ├── methods
    │   │   │   ├── index.js
    │   │   │   ├── tagger
    │   │   │   │   └── canBe.js
    │   │   │   └── sweep
    │   │   │   │   └── 01-getHooks.js
    │   │   └── lib.js
    │   ├── typeahead
    │   │   ├── plugin.js
    │   │   └── api.js
    │   ├── lookup
    │   │   ├── plugin.js
    │   │   └── api
    │   │   │   ├── index.js
    │   │   │   └── buildTrie
    │   │   │       └── compress.js
    │   └── freeze
    │   │   └── debug.js
    ├── four.js
    ├── API
    │   ├── methods
    │   │   └── index.js
    │   ├── world.js
    │   └── _lib.js
    └── two.js
├── codecov.yml
├── types
    ├── one.d.cts
    ├── two.d.cts
    ├── three.d.cts
    └── view
    │   ├── two.d.cts
    │   ├── one.d.cts
    │   ├── three.d.cts
    │   └── two.d.ts
├── .npmignore
├── data
    ├── lexicon
    │   ├── people
    │   │   └── honorifics.js
    │   ├── verbs
    │   │   ├── verbs.js
    │   │   └── modals.js
    │   ├── numbers
    │   │   ├── multiples.js
    │   │   ├── ordinals.js
    │   │   └── cardinals.js
    │   ├── switches
    │   │   ├── person-date.js
    │   │   ├── person-adj.js
    │   │   ├── person-place.js
    │   │   ├── person-verb.js
    │   │   ├── unit-noun.js
    │   │   └── actor-verb.js
    │   ├── dates
    │   │   ├── dates.js
    │   │   ├── months.js
    │   │   ├── weekdays.js
    │   │   └── durations.js
    │   ├── nouns
    │   │   ├── pronouns.js
    │   │   ├── possessives.js
    │   │   ├── relative-prounoun.js
    │   │   └── properNouns.js
    │   └── misc
    │   │   └── determiners.js
    └── pairs
    │   └── index.js
├── scripts
    ├── chunks.js
    ├── patterns
    │   └── patterns.js
    ├── debug.js
    ├── version.js
    ├── perf
    │   ├── flame
    │   │   └── index.js
    │   ├── pool
    │   │   └── _lib.js
    │   └── _fetch.js
    ├── typescript
    │   └── two.ts
    ├── test
    │   ├── coverage.js
    │   └── stress.js
    ├── coreference
    │   └── index.js
    ├── match.js
    └── plugins.js
├── tests
    ├── one
    │   ├── _lib.js
    │   ├── misc
    │   │   └── misc.test.js
    │   ├── change
    │   │   └── fork.ignore.js
    │   ├── match
    │   │   ├── punctuation-match.test.js
    │   │   └── sweep-not.test.js
    │   ├── tokenize
    │   │   └── term-split.test.js
    │   └── miss.test.js
    ├── two
    │   ├── _lib.js
    │   ├── misc
    │   │   ├── misc.test.js
    │   │   ├── lazy.test.js
    │   │   ├── confidence.test.js
    │   │   └── canBe.test.js
    │   └── match
    │   │   └── soft-match.test.js
    ├── three
    │   ├── _lib.js
    │   ├── sentences
    │   │   └── negative.test.js
    │   ├── redact.test.js
    │   ├── verbs
    │   │   ├── phrasals.test.js
    │   │   ├── isplural.test.js
    │   │   └── phrasal.test.js
    │   ├── numbers
    │   │   └── backlog
    │   │   │   ├── overlap.ignore.js
    │   │   │   ├── conversion.ignore.js
    │   │   │   └── agreement.ignore.js
    │   ├── subsets.test.js
    │   └── nouns
    │   │   └── adjectives.test.js
    ├── four
    │   ├── _lib.js
    │   ├── match.ignore.js
    │   └── misc.ignore.js
    └── hmm.js
├── .gitignore
├── demos
    └── web-worker
    │   └── _worker.js
├── one
    └── package.json
├── two
    └── package.json
├── tokenize
    └── package.json
├── three
    └── package.json
├── tsconfig.json
└── .github
    └── workflows
        └── coverage.yml


/plugins/wikipedia/.gitignore:
--------------------------------------------------------------------------------
1 | files


--------------------------------------------------------------------------------
/plugins/_experiments/markdown/src/Wrap.js:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/_version.js:
--------------------------------------------------------------------------------
1 | export default '14.14.5'


--------------------------------------------------------------------------------
/plugins/dates/src/_version.js:
--------------------------------------------------------------------------------
1 | export default '3.7.1'


--------------------------------------------------------------------------------
/plugins/speed/src/_version.js:
--------------------------------------------------------------------------------
1 | export default '0.1.2'


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | branches:
3 |   - 'master'
4 | 


--------------------------------------------------------------------------------
/types/one.d.cts:
--------------------------------------------------------------------------------
1 | import nlp from "./one.d";
2 | 
3 | export = nlp
4 | 
5 | 


--------------------------------------------------------------------------------
/types/two.d.cts:
--------------------------------------------------------------------------------
1 | import nlp from "./two.d";
2 | 
3 | export = nlp
4 | 
5 | 


--------------------------------------------------------------------------------
/types/three.d.cts:
--------------------------------------------------------------------------------
1 | import nlp from "./three.d";
2 | 
3 | export = nlp
4 | 
5 | 


--------------------------------------------------------------------------------
/types/view/two.d.cts:
--------------------------------------------------------------------------------
1 | import Two from "./two.d";
2 | 
3 | export = Two
4 | 
5 | 


--------------------------------------------------------------------------------
/types/view/one.d.cts:
--------------------------------------------------------------------------------
1 | import View from "./one.d";
2 | 
3 | export = View
4 | 
5 | 


--------------------------------------------------------------------------------
/plugins/dates/index.d.cts:
--------------------------------------------------------------------------------
1 | import dates from './index.d';
2 | 
3 | export = dates
4 | 


--------------------------------------------------------------------------------
/types/view/three.d.cts:
--------------------------------------------------------------------------------
1 | import Three from "./three.d";
2 | 
3 | export = Three
4 | 
5 | 


--------------------------------------------------------------------------------
/src/3-three/sentences/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default { api }
4 | 


--------------------------------------------------------------------------------
/src/4-four/facts/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api
5 | }


--------------------------------------------------------------------------------
/plugins/paragraphs/src/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api,
5 | }


--------------------------------------------------------------------------------
/src/3-three/normalize/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api.js'
2 | 
3 | export default {
4 |   api
5 | }


--------------------------------------------------------------------------------
/src/3-three/nouns/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api/api.js'
2 | 
3 | export default {
4 |   api,
5 | }
6 | 


--------------------------------------------------------------------------------
/src/3-three/verbs/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api/api.js'
2 | 
3 | export default {
4 |   api,
5 | }
6 | 


--------------------------------------------------------------------------------
/plugins/_experiments/markdown/README.md:
--------------------------------------------------------------------------------
1 | experimental nlp on a [unified/remark](https://unifiedjs.com/) AST.
2 | 


--------------------------------------------------------------------------------
/plugins/_experiments/sentiment/README.md:
--------------------------------------------------------------------------------
1 | experimental rule-based, compressed-data sentiment analysis by Scott Cram
2 | 


--------------------------------------------------------------------------------
/src/2-two/lazy/plugin.js:
--------------------------------------------------------------------------------
1 | import lazy from './lazyParse.js'
2 | 
3 | export default {
4 |   lib: {
5 |     lazy
6 |   }
7 | }


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/abbreviations/organizations.js:
--------------------------------------------------------------------------------
1 | export default ['dept', 'univ', 'assn', 'bros', 'inc', 'ltd', 'co']
2 | 


--------------------------------------------------------------------------------
/src/3-three/coreference/compute/findIt.js:
--------------------------------------------------------------------------------
1 | const findIt = function (m) {
2 |   return m.none()
3 | }
4 | export default findIt


--------------------------------------------------------------------------------
/plugins/speed/src/lazyParse/plugin.js:
--------------------------------------------------------------------------------
1 | import lazy from './lazyParse.js'
2 | 
3 | export default {
4 |   lib: {
5 |     lazy
6 |   }
7 | }


--------------------------------------------------------------------------------
/src/1-one/contraction-one/compute/index.js:
--------------------------------------------------------------------------------
1 | import contractions from './contractions/index.js'
2 | 
3 | export default { contractions }
4 | 


--------------------------------------------------------------------------------
/src/1-one/output/methods/index.js:
--------------------------------------------------------------------------------
1 | import hash from './hash.js'
2 | import debug from './debug/index.js'
3 | 
4 | export { hash, debug }
5 | 


--------------------------------------------------------------------------------
/src/1-one/cache/methods/index.js:
--------------------------------------------------------------------------------
1 | import cacheDoc from './cacheDoc.js'
2 | 
3 | export default {
4 |   one: {
5 |     cacheDoc,
6 |   },
7 | }
8 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/personWords.js:
--------------------------------------------------------------------------------
1 | // extended professions, for #Actor tag
2 | // 'x therapist', 'y engineer'
3 | export default [
4 | 
5 | ]


--------------------------------------------------------------------------------
/plugins/speed/src/workerPool/plugin.js:
--------------------------------------------------------------------------------
1 | import workerPool from './index.js'
2 | 
3 | export default {
4 |   lib: {
5 |     workerPool
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/src/1-one/lexicon/methods/index.js:
--------------------------------------------------------------------------------
1 | import expandLexicon from './expand.js'
2 | 
3 | export default {
4 |   one: {
5 |     expandLexicon,
6 |   }
7 | }


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/abbreviations/months.js:
--------------------------------------------------------------------------------
1 | export default ['jan', 'feb', 'mar', 'apr', 'jun', 'jul', 'aug', 'sep', 'sept', 'oct', 'nov', 'dec']
2 | 


--------------------------------------------------------------------------------
/plugins/speech/src/plugin.js:
--------------------------------------------------------------------------------
1 | import compute from './compute/index.js'
2 | import api from './api.js'
3 | 
4 | export default {
5 |   api,
6 |   compute
7 | }


--------------------------------------------------------------------------------
/src/1-one/cache/compute.js:
--------------------------------------------------------------------------------
1 | 
2 | export default {
3 |   cache: function (view) {
4 |     view._cache = view.methods.one.cacheDoc(view.document)
5 |   }
6 | }


--------------------------------------------------------------------------------
/plugins/_experiments/compress/README.md:
--------------------------------------------------------------------------------
1 | they say that compression and intellegence are the same thing,
2 | but I'm not small-enough to understand that.
3 | 
4 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | data
 2 | demo
 3 | plugins
 4 | scripts
 5 | tests
 6 | .eslintrc
 7 | .gitignore
 8 | changelog.md
 9 | hmm.md
10 | rollup.config.js
11 | scratch.js
12 | 


--------------------------------------------------------------------------------
/src/1-one/change/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api/index.js'
2 | import compute from './compute/index.js'
3 | 
4 | export default {
5 |   api,
6 |   compute,
7 | }
8 | 


--------------------------------------------------------------------------------
/src/1-one/pointers/plugin.js:
--------------------------------------------------------------------------------
1 | import methods from './methods/index.js'
2 | import api from './api/index.js'
3 | 
4 | export default {
5 |   methods,
6 |   api,
7 | }
8 | 


--------------------------------------------------------------------------------
/plugins/_experiments/cmd-k/src/plugin.js:
--------------------------------------------------------------------------------
1 | import searchBang from './searchBang.js'
2 | import slashCmd from './slashCmd.js'
3 | 
4 | export default [searchBang, slashCmd]
5 | 


--------------------------------------------------------------------------------
/src/1-one/tag/api/index.js:
--------------------------------------------------------------------------------
1 | import tag from './tag.js'
2 | 
3 | const tagAPI = function (View) {
4 |   Object.assign(View.prototype, tag)
5 | }
6 | export default tagAPI
7 | 


--------------------------------------------------------------------------------
/src/2-two/swap/plugin.js:
--------------------------------------------------------------------------------
1 | import swap from './api/swap.js'
2 | 
3 | const api = function (View) {
4 |   View.prototype.swap = swap
5 | }
6 | 
7 | export default {
8 |   api
9 | }


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/aliases.js:
--------------------------------------------------------------------------------
1 | const aliases = {
2 |   '&': 'and',
3 |   '@': 'at',
4 |   '%': 'percent',
5 |   'plz': 'please',
6 |   'bein': 'being',
7 | }
8 | export default aliases
9 | 


--------------------------------------------------------------------------------
/src/3-three/coreference/plugin.js:
--------------------------------------------------------------------------------
1 | import coreference from './compute/index.js'
2 | import api from './api/pronouns.js'
3 | 
4 | export default {
5 |   compute: { coreference },
6 |   api
7 | }


--------------------------------------------------------------------------------
/plugins/speech/src/compute/index.js:
--------------------------------------------------------------------------------
1 | import soundsLike from './soundsLike/index.js'
2 | import syllables from './syllables/index.js'
3 | 
4 | export default {
5 |   soundsLike,
6 |   syllables
7 | }


--------------------------------------------------------------------------------
/src/2-two/preTagger/compute/index.js:
--------------------------------------------------------------------------------
1 | import preTagger from './tagger/index.js'
2 | import root from './root.js'
3 | import penn from './penn.js'
4 | 
5 | export default { preTagger, root, penn }
6 | 


--------------------------------------------------------------------------------
/src/3-three/sentences/conjugate/toInfinitive.js:
--------------------------------------------------------------------------------
1 | const toInfinitive = function (s) {
2 |   s.verbs().toInfinitive()
3 |   // s.compute('chunks')
4 |   return s
5 | }
6 | export default toInfinitive


--------------------------------------------------------------------------------
/data/lexicon/people/honorifics.js:
--------------------------------------------------------------------------------
1 | export default [
2 |   'lieutenant general',
3 |   'field marshal',
4 |   'rear admiral',
5 |   'vice admiral',
6 |   'sergeant major',
7 |   'director general',
8 | ]


--------------------------------------------------------------------------------
/src/2-two/contraction-two/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api/index.js'
2 | import compute from './compute/index.js'
3 | 
4 | export default {
5 |   compute,
6 |   api,
7 |   hooks: ['contractionTwo']
8 | }


--------------------------------------------------------------------------------
/src/3-three/chunker/plugin.js:
--------------------------------------------------------------------------------
1 | import api from './api/api.js'
2 | import compute from './compute/index.js'
3 | 
4 | export default {
5 |   compute: compute,
6 |   api: api,
7 |   hooks: ['chunks'],
8 | }
9 | 


--------------------------------------------------------------------------------
/scripts/chunks.js:
--------------------------------------------------------------------------------
1 | import corpus from 'nlp-corpus'
2 | import nlp from '../src/three.js'
3 | const docs = corpus.some(13)
4 | docs.forEach(str => {
5 |   nlp(str).debug({ tags: false, chunks: true })
6 | })
7 | 


--------------------------------------------------------------------------------
/scripts/patterns/patterns.js:
--------------------------------------------------------------------------------
1 | // list of all match patterns, Nov 2020
2 | let patterns = []
3 | patterns = patterns.reduce((h, str) => {
4 |   h[str] = 0
5 |   return h
6 | }, {})
7 | 
8 | module.exports = patterns
9 | 


--------------------------------------------------------------------------------
/src/1-one/match/plugin.js:
--------------------------------------------------------------------------------
 1 | import api from './api/index.js'
 2 | import methods from './methods/index.js'
 3 | import lib from './lib.js'
 4 | 
 5 | export default {
 6 |   api,
 7 |   methods,
 8 |   lib,
 9 | }
10 | 


--------------------------------------------------------------------------------
/plugins/_experiments/ast/README.md:
--------------------------------------------------------------------------------
1 | attempt to create a [unist-formatted](https://github.com/syntax-tree/unist) Abstract Syntax Tree via some [dependency parsing](http://nlpprogress.com/english/dependency_parsing.html)
2 | 


--------------------------------------------------------------------------------
/src/1-one/cache/plugin.js:
--------------------------------------------------------------------------------
 1 | import methods from './methods/index.js'
 2 | import api from './api.js'
 3 | import compute from './compute.js'
 4 | 
 5 | export default {
 6 |   api,
 7 |   compute,
 8 |   methods,
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/four.js:
--------------------------------------------------------------------------------
1 | import nlp from './three.js'
2 | import sense from './4-four/sense/plugin.js'
3 | import facts from './4-four/facts/plugin.js'
4 | 
5 | nlp.plugin(sense)
6 | nlp.plugin(facts)
7 | 
8 | export default nlp
9 | 


--------------------------------------------------------------------------------
/plugins/wikipedia/config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   lang: 'en',
3 |   project: 'wikipedia',
4 |   // min_pageviews: 3 // remove 378,151
5 |   min_pageviews: 200 // remove 1,159,783
6 |   // fresh: true, //start fresh
7 | }
8 | 


--------------------------------------------------------------------------------
/plugins/wikipedia/scripts/generate/index.js:
--------------------------------------------------------------------------------
1 | // import download from './01-download.js'
2 | import filter from './02-filter.js'
3 | import compress from './03-compress.js'
4 | 
5 | // await download()
6 | filter()
7 | compress()


--------------------------------------------------------------------------------
/src/3-three/topics/orgs/api.js:
--------------------------------------------------------------------------------
1 | 
2 | const api = function (View) {
3 |   View.prototype.organizations = function (n) {
4 |     const m = this.match('#Organization+')
5 |     return m.getNth(n)
6 |   }
7 | }
8 | export default api
9 | 


--------------------------------------------------------------------------------
/data/lexicon/verbs/verbs.js:
--------------------------------------------------------------------------------
 1 | //verbs we shouldn't conjugate, for whatever reason
 2 | export default [
 3 |   'has',
 4 |   'keep tabs',
 5 |   'born',
 6 |   'cannot',
 7 |   'gonna',
 8 |   'msg',
 9 |   'make sure',
10 | 
11 | ]
12 | 


--------------------------------------------------------------------------------
/scripts/debug.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-console */
2 | import nlp from '../src/three.js'
3 | const txt = process.argv.slice(2).join(' ')
4 | console.log(`\n\n======== '${txt}' ======\n`)
5 | nlp.verbose(true)
6 | 
7 | nlp(txt).debug()
8 | 


--------------------------------------------------------------------------------
/src/3-three/nouns/api/hasPlural.js:
--------------------------------------------------------------------------------
1 | 
2 | const hasPlural = function (root) {
3 |   if (root.has('^(#Uncountable|#ProperNoun|#Place|#Pronoun|#Acronym)+$')) {
4 |     return false
5 |   }
6 |   return true
7 | }
8 | export default hasPlural


--------------------------------------------------------------------------------
/data/lexicon/numbers/multiples.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'hundred',
 3 |   'thousand',
 4 |   'million',
 5 |   'billion',
 6 |   'trillion',
 7 |   'quadrillion',
 8 |   'quintillion',
 9 |   'sextillion',
10 |   'septillion',
11 | ]
12 | 


--------------------------------------------------------------------------------
/src/1-one/sweep/plugin.js:
--------------------------------------------------------------------------------
 1 | import lib from './lib.js'
 2 | import api from './api.js'
 3 | import methods from './methods/index.js'
 4 | 
 5 | export default {
 6 |   lib,
 7 |   api,
 8 |   methods: {
 9 |     one: methods,
10 |   }
11 | }


--------------------------------------------------------------------------------
/data/lexicon/switches/person-date.js:
--------------------------------------------------------------------------------
1 | // person-names that can be dates
2 | export default [
3 |   // clues: [person, date],
4 |   // fallback: 'Month',
5 |   'april', 'august', 'jan', 'january', 'june', 'sep', 'avril',
6 |   // 'may'
7 | ]
8 | 
9 | 


--------------------------------------------------------------------------------
/src/1-one/sweep/methods/index.js:
--------------------------------------------------------------------------------
1 | import buildNet from './buildNet/index.js'
2 | import bulkMatch from './sweep/index.js'
3 | import bulkTagger from './tagger/index.js'
4 | 
5 | export default {
6 |   buildNet,
7 |   bulkMatch,
8 |   bulkTagger
9 | }


--------------------------------------------------------------------------------
/src/1-one/contraction-one/model/index.js:
--------------------------------------------------------------------------------
 1 | import contractions from './contractions.js'
 2 | import numberSuffixes from './number-suffix.js'
 3 | 
 4 | export default {
 5 |   one: {
 6 |     contractions,
 7 |     numberSuffixes
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/data/lexicon/switches/person-adj.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'misty',
 3 |   'rusty',
 4 |   'dusty',
 5 |   'rich',
 6 |   'randy',
 7 |   'sandy',
 8 |   'earnest',
 9 |   'frank',
10 |   // 'young',
11 |   'brown',
12 |   'bella',
13 |   'woody'
14 | ]


--------------------------------------------------------------------------------
/plugins/stats/src/ngram/tokenize.js:
--------------------------------------------------------------------------------
1 | // tokenize by term
2 | const tokenize = function (doc) {
3 |   const list = doc.json({ text: false }).map(o => {
4 |     return o.terms.map(t => t.normal)
5 |   })
6 |   return list
7 | }
8 | export default tokenize
9 | 


--------------------------------------------------------------------------------
/src/1-one/output/plugin.js:
--------------------------------------------------------------------------------
 1 | import api from './api/index.js'
 2 | import { debug, hash } from './methods/index.js'
 3 | 
 4 | export default {
 5 |   api,
 6 |   methods: {
 7 |     one: {
 8 |       hash,
 9 |       debug,
10 |     },
11 |   },
12 | }
13 | 


--------------------------------------------------------------------------------
/src/1-one/contraction-one/plugin.js:
--------------------------------------------------------------------------------
 1 | import model from './model/index.js'
 2 | import compute from './compute/index.js'
 3 | 
 4 | const plugin = {
 5 |   model: model,
 6 |   compute: compute,
 7 |   hooks: ['contractions'],
 8 | }
 9 | export default plugin
10 | 


--------------------------------------------------------------------------------
/src/4-four/sense/plugin.js:
--------------------------------------------------------------------------------
 1 | import model from './model/index.js'
 2 | import api from './api/api.js'
 3 | import sense from './compute/index.js'
 4 | 
 5 | export default {
 6 |   compute: { sense },
 7 |   api,
 8 |   model,
 9 |   // hooks: ['sense'],
10 | }
11 | 


--------------------------------------------------------------------------------
/data/lexicon/dates/dates.js:
--------------------------------------------------------------------------------
 1 | // uncontroversial date words
 2 | export default [
 3 |   'today',
 4 |   'tomorrow',
 5 |   'tmr',
 6 |   'tmrw',
 7 |   'yesterday',
 8 |   'weekend',
 9 |   'weekends',
10 |   'week end',
11 |   'ago',
12 |   'someday',
13 |   'oneday',
14 | ]
15 | 


--------------------------------------------------------------------------------
/data/lexicon/dates/months.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   // 'january',
 3 |   'february',
 4 |   // 'april',
 5 |   // 'june',
 6 |   'july',
 7 |   // 'august',
 8 |   'september',
 9 |   'october',
10 |   'november',
11 |   'december',
12 |   //abbreviations are elsewhere
13 | ]
14 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/index.js:
--------------------------------------------------------------------------------
 1 | import dates from './dates.js'
 2 | import times from './times.js'
 3 | import durations from './durations/index.js'
 4 | 
 5 | const api = function (View) {
 6 |   dates(View)
 7 |   times(View)
 8 |   durations(View)
 9 | }
10 | export default api


--------------------------------------------------------------------------------
/src/API/methods/index.js:
--------------------------------------------------------------------------------
 1 | import compute from './compute.js'
 2 | import loops from './loops.js'
 3 | import util from './utils.js'
 4 | 
 5 | const methods = Object.assign({}, util, compute, loops)
 6 | 
 7 | // aliases
 8 | methods.get = methods.eq
 9 | export default methods
10 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/pronouns.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'it',
 3 |   'they',
 4 |   'i',
 5 |   'them',
 6 |   'you',
 7 |   'she',
 8 |   'me',
 9 |   'he',
10 |   'him',
11 |   'us',
12 |   'we',
13 |   'thou',
14 |   'thee',
15 |   'il',
16 |   'elle',
17 |   // `'em`,
18 | ]
19 | 


--------------------------------------------------------------------------------
/src/1-one/sweep/lib.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   // compile a list of matches into a match-net
3 |   buildNet: function (matches) {
4 |     const methods = this.methods()
5 |     const net = methods.one.buildNet(matches, this.world())
6 |     net.isNet = true
7 |     return net
8 |   }
9 | }


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/transform/index.js:
--------------------------------------------------------------------------------
 1 | import nouns from './nouns/index.js'
 2 | import verbs from './verbs/index.js'
 3 | import adjectives from './adjectives/index.js'
 4 | 
 5 | export default {
 6 |   noun: nouns,
 7 |   verb: verbs,
 8 |   adjective: adjectives
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/2-two/postTagger/plugin.js:
--------------------------------------------------------------------------------
 1 | import model from './model/index.js'
 2 | import compute from './compute/index.js'
 3 | import api from './api.js'
 4 | 
 5 | 
 6 | const plugin = {
 7 |   api,
 8 |   compute,
 9 |   model,
10 |   hooks: ['postTagger'],
11 | }
12 | export default plugin
13 | 


--------------------------------------------------------------------------------
/data/lexicon/verbs/modals.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'can',
 3 |   'could',
 4 |   'lets', //arguable
 5 |   // 'may',
 6 |   'might',
 7 |   'must',
 8 |   'ought to',
 9 |   'ought',
10 |   'oughta',
11 |   'shall',
12 |   'shant',
13 |   'should',
14 |   'will',
15 |   'would',
16 | ]
17 | 


--------------------------------------------------------------------------------
/plugins/stats/src/plugin.js:
--------------------------------------------------------------------------------
 1 | import ngram from './ngram/index.js'
 2 | import tfidf from './tfidf/index.js'
 3 | import compute from './compute.js'
 4 | 
 5 | const api = function (View) {
 6 |   ngram(View)
 7 |   tfidf(View)
 8 | }
 9 | 
10 | export default {
11 |   compute,
12 |   api
13 | }


--------------------------------------------------------------------------------
/scripts/version.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | // avoid requiring our whole package.json file
3 | // make a small file for our version number
4 | const pkg = JSON.parse(fs.readFileSync('./package.json').toString())
5 | 
6 | fs.writeFileSync('./src/_version.js', `export default '${pkg.version}'`)
7 | 


--------------------------------------------------------------------------------
/plugins/_experiments/ast/src/compute/index.js:
--------------------------------------------------------------------------------
 1 | 
 2 | export default {
 3 |   lines: function (view) {
 4 |     view.lines().forEach((arr, i) => {
 5 |       arr.forEach(s => {
 6 |         s.docs[0].forEach(term => {
 7 |           term.line = i
 8 |         })
 9 |       })
10 |     })
11 |   }
12 | }


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/abbreviations/nouns.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'ad',
 3 |   'al',
 4 |   'arc',
 5 |   'ba',
 6 |   'bl',
 7 |   'ca',
 8 |   'cca',
 9 |   'col',
10 |   'corp',
11 |   'ft',
12 |   'fy',
13 |   'ie',
14 |   'lit',
15 |   'ma',
16 |   'md',
17 |   'pd',
18 |   'tce',
19 | ]
20 | 


--------------------------------------------------------------------------------
/src/3-three/sentences/conjugate/toNegative.js:
--------------------------------------------------------------------------------
1 | const toNegative = function (s) {
2 |   s.verbs().first().toNegative().compute('chunks')
3 |   return s
4 | }
5 | const toPositive = function (s) {
6 |   s.verbs().first().toPositive().compute('chunks')
7 |   return s
8 | }
9 | export { toNegative, toPositive }


--------------------------------------------------------------------------------
/src/API/world.js:
--------------------------------------------------------------------------------
 1 | const methods = {
 2 |   one: {},
 3 |   two: {},
 4 |   three: {},
 5 |   four: {},
 6 | }
 7 | 
 8 | const model = {
 9 |   one: {},
10 |   two: {},
11 |   three: {},
12 | }
13 | const compute = {}
14 | const hooks = []
15 | 
16 | export default { methods, model, compute, hooks }
17 | 


--------------------------------------------------------------------------------
/plugins/dates/scripts/version.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | // avoid requiring our whole package.json file
3 | // make a small file for our version number
4 | const pkg = JSON.parse(fs.readFileSync('./package.json').toString())
5 | 
6 | fs.writeFileSync('./src/_version.js', `export default '${pkg.version}'`)
7 | 


--------------------------------------------------------------------------------
/plugins/speed/scripts/version.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | // avoid requiring our whole package.json file
3 | // make a small file for our version number
4 | const pkg = JSON.parse(fs.readFileSync('./package.json').toString())
5 | 
6 | fs.writeFileSync('./src/_version.js', `export default '${pkg.version}'`)
7 | 


--------------------------------------------------------------------------------
/src/1-one/tag/methods/index.js:
--------------------------------------------------------------------------------
 1 | import setTag from './setTag.js'
 2 | import unTag from './unTag.js'
 3 | import canBe from './canBe.js'
 4 | import addTags from './addTags/index.js'
 5 | 
 6 | export default {
 7 |   one: {
 8 |     setTag,
 9 |     unTag,
10 |     addTags,
11 |     canBe,
12 |   },
13 | }
14 | 


--------------------------------------------------------------------------------
/src/3-three/topics/places/api.js:
--------------------------------------------------------------------------------
 1 | import find from './find.js'
 2 | 
 3 | const addMethod = function (View) {
 4 |   View.prototype.places = function (n) {
 5 |     let m = find(this)
 6 |     m = m.getNth(n)
 7 |     return new View(this.document, m.pointer)
 8 |   }
 9 | }
10 | export default addMethod
11 | 


--------------------------------------------------------------------------------
/plugins/_experiments/cmd-k/scratch.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console, no-unused-vars */
 2 | import nlp from '../../../src/three.js'
 3 | import plugin from './src/plugin.js'
 4 | nlp.extend(plugin)
 5 | 
 6 | 
 7 | let txt = '! i walk !ohyeah gh'
 8 | const doc = nlp(txt)
 9 | doc.searchBangs()
10 | doc.debug()
11 | 
12 | 


--------------------------------------------------------------------------------
/src/4-four/sense/api/api.js:
--------------------------------------------------------------------------------
 1 | const senseMethods = function (View) {
 2 |   /** add sense to these terms*/
 3 |   View.prototype.sense = function (s) {
 4 |     this.docs.forEach(terms =>
 5 |       terms.forEach(t => {
 6 |         t.sense = s
 7 |       })
 8 |     )
 9 |   }
10 | }
11 | export default senseMethods
12 | 


--------------------------------------------------------------------------------
/tests/one/_lib.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import build from '../../builds/one/compromise-one.mjs'
 3 | import src from '../../src/one.js'
 4 | let nlp = src
 5 | if (process.env.TESTENV === 'prod') {
 6 |   console.warn('== production build test 🚀 ==')
 7 |   nlp = build
 8 | }
 9 | export default nlp
10 | 


--------------------------------------------------------------------------------
/tests/two/_lib.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import build from '../../builds/two/compromise-two.mjs'
 3 | import src from '../../src/two.js'
 4 | let nlp = src
 5 | if (process.env.TESTENV === 'prod') {
 6 |   console.warn('== production build test 🚀 ==')
 7 |   nlp = build
 8 | }
 9 | export default nlp
10 | 


--------------------------------------------------------------------------------
/src/1-one/match/methods/index.js:
--------------------------------------------------------------------------------
 1 | import parseMatch from './parseMatch/index.js'
 2 | import match from './match/index.js'
 3 | import termMethods from './termMethods.js'
 4 | 
 5 | const methods = {
 6 |   one: {
 7 |     termMethods,
 8 |     parseMatch,
 9 |     match,
10 |   },
11 | }
12 | 
13 | export default methods
14 | 


--------------------------------------------------------------------------------
/src/1-one/output/methods/debug/index.js:
--------------------------------------------------------------------------------
 1 | import clientSide from './client-side.js'
 2 | import tags from './tags.js'
 3 | import chunks from './chunks.js'
 4 | import highlight from './highlight.js'
 5 | 
 6 | const debug = {
 7 |   tags,
 8 |   clientSide,
 9 |   chunks,
10 |   highlight,
11 | }
12 | export default debug
13 | 


--------------------------------------------------------------------------------
/src/4-four/facts/parse/pivot.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const breaks = '(but|however|and|so|thus|therefor)'
 3 | 
 4 | const parsePivot = function (chunk) {
 5 |   const str = chunk.text('normal')
 6 |   const breakPoint = chunk.has(breaks)
 7 |   return {
 8 |     breakPoint,
 9 |     root: str
10 |   }
11 | }
12 | export default parsePivot


--------------------------------------------------------------------------------
/plugins/_experiments/sentiment/scratch.js:
--------------------------------------------------------------------------------
 1 | import nlp from '../../../src/three.js'
 2 | import plg from './src/plugin.js'
 3 | nlp.plugin(plg)
 4 | 
 5 | const arr = [
 6 |   "the acting was terrible",
 7 |   "these pretzels are making me thirsty",
 8 | ]
 9 | 
10 | const res = nlp(arr[0]).sentiment()
11 | console.log(res)
12 | 


--------------------------------------------------------------------------------
/plugins/speech/src/compute/soundsLike/index.js:
--------------------------------------------------------------------------------
 1 | import metaphone from './metaphone.js'
 2 | 
 3 | const soundsLike = function (view) {
 4 |   view.docs.forEach(terms => {
 5 |     terms.forEach(term => {
 6 |       term.soundsLike = metaphone(term.normal || term.text)
 7 |     })
 8 |   })
 9 | }
10 | 
11 | export default soundsLike


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/suffixes.js:
--------------------------------------------------------------------------------
 1 | // dashed suffixes that are not independent words
 2 | //  'flower-like', 'president-elect'
 3 | export default {
 4 |   'like': true,
 5 |   'ish': true,
 6 |   'less': true,
 7 |   'able': true,
 8 |   'elect': true,
 9 |   'type': true,
10 |   'designate': true,
11 |   // 'fold':true,
12 | }


--------------------------------------------------------------------------------
/src/2-two/preTagger/plugin.js:
--------------------------------------------------------------------------------
 1 | import model from './model/index.js'
 2 | import methods from './methods/index.js'
 3 | import compute from './compute/index.js'
 4 | import tags from './tagSet/index.js'
 5 | 
 6 | export default {
 7 |   compute,
 8 |   methods,
 9 |   model,
10 |   tags,
11 |   hooks: ['preTagger'],
12 | }
13 | 


--------------------------------------------------------------------------------
/src/4-four/facts/parse/noun.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const parseNoun = function (chunk) {
 3 |   const root = chunk.match('#Noun').not('#Demonym').text('root')
 4 |   const obj = chunk.nouns().json()[0].noun
 5 |   obj.chunk = 'Noun'
 6 |   obj.ptr = chunk.ptrs[0]
 7 |   obj.root = root
 8 |   return obj
 9 | }
10 | 
11 | export default parseNoun


--------------------------------------------------------------------------------
/src/4-four/sense/model/index.js:
--------------------------------------------------------------------------------
 1 | import data from './_data.js'
 2 | import { unpack } from 'efrt'
 3 | 
 4 | Object.keys(data).forEach(ambig => {
 5 |   data[ambig].forEach(sense => {
 6 |     sense.words = unpack(sense.words)
 7 |   })
 8 | })
 9 | 
10 | export default {
11 |   four: {
12 |     senses: data,
13 |   },
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/three/_lib.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import build from '../../builds/three/compromise-three.mjs'
 3 | import src from '../../src/three.js'
 4 | let nlp = src
 5 | if (process.env.TESTENV === 'prod') {
 6 |   console.warn('== production build test 🚀 ==')
 7 |   nlp = build
 8 | }
 9 | export default nlp
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules/
 2 | coverage/
 3 | .DS_Store
 4 | .env
 5 | .nyc_output/
 6 | .vscode
 7 | .clinic
 8 | coverage.lcov
 9 | codecov.yml
10 | plugins/phrases/data/
11 | learn/giga/result/
12 | .scratch.js
13 | .eslintrc
14 | .npmignore
15 | .github
16 | .gitignore
17 | LICENSE
18 | package-lock.json
19 | rollup.config.js
20 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/possessives.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   // 'her', //this one is check ambiguous
 3 |   // 'hers',
 4 |   // 'his',
 5 |   'its',
 6 |   'mine',
 7 |   'my',
 8 |   // 'none',
 9 |   'our',
10 |   'ours',
11 |   'thy',
12 |   // 'their',
13 |   // 'theirs',
14 |   // 'your',
15 |   // 'yours',
16 | 
17 | 
18 | ]
19 | 


--------------------------------------------------------------------------------
/src/3-three/topics/people/find.js:
--------------------------------------------------------------------------------
 1 | const find = function (doc) {
 2 |   let m = doc.splitAfter('@hasComma')
 3 |   m = m.match('#Honorific+? #Person+')
 4 |   // Spencer's King
 5 |   const poss = m.match('#Possessive').notIf('(his|her)') //her majesty ...
 6 |   m = m.splitAfter(poss)
 7 |   return m
 8 | }
 9 | export default find
10 | 


--------------------------------------------------------------------------------
/src/3-three/topics/plugin.js:
--------------------------------------------------------------------------------
 1 | import people from './people/api.js'
 2 | import places from './places/api.js'
 3 | import orgs from './orgs/api.js'
 4 | import topics from './topics.js'
 5 | 
 6 | const api = function (View) {
 7 |   people(View)
 8 |   places(View)
 9 |   orgs(View)
10 |   topics(View)
11 | }
12 | export default { api }
13 | 


--------------------------------------------------------------------------------
/src/4-four/facts/parse/adjective.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const parseAdjective = function (chunk) {
 3 |   // let obj = chunk.adjectives().json()[0]
 4 |   return {
 5 |     chunk: 'Adjective',
 6 |     ptr: chunk.ptrs[0],
 7 |     root: chunk.text('normal'),
 8 |     desc: chunk.adjectives().out('array')
 9 |   }
10 | }
11 | 
12 | export default parseAdjective


--------------------------------------------------------------------------------
/plugins/dates/src/model/words/dates.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'weekday',
 3 | 
 4 |   'summer',
 5 |   'winter',
 6 |   'autumn',
 7 | 
 8 |   // 'some day',
 9 |   // 'one day',
10 |   'all day',
11 |   // 'some point',
12 | 
13 |   'eod',
14 |   'eom',
15 |   'eoy',
16 |   'standard time',
17 |   'daylight time',
18 |   'tommorrow',
19 | ]
20 | 


--------------------------------------------------------------------------------
/src/1-one/match/methods/match/_lib.js:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | export const getGroup = function (state, term_index) {
 4 |   if (state.groups[state.inGroup]) {
 5 |     return state.groups[state.inGroup]
 6 |   }
 7 |   state.groups[state.inGroup] = {
 8 |     start: term_index,
 9 |     length: 0,
10 |   }
11 |   return state.groups[state.inGroup]
12 | }
13 | 


--------------------------------------------------------------------------------
/src/1-one/tag/lib.js:
--------------------------------------------------------------------------------
 1 | // wire-up more pos-tags to our model
 2 | const addTags = function (tags) {
 3 |   const { model, methods } = this.world()
 4 |   const tagSet = model.one.tagSet
 5 |   const fn = methods.one.addTags
 6 |   const res = fn(tags, tagSet)
 7 |   model.one.tagSet = res
 8 |   return this
 9 | }
10 | 
11 | export default { addTags }


--------------------------------------------------------------------------------
/data/lexicon/nouns/relative-prounoun.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'whatever',
 3 |   'whatsoever',
 4 |   'whichever',
 5 |   'whichsoever',
 6 |   'whoever',
 7 |   'whom',
 8 |   'whomever',
 9 |   'whomsoever',
10 |   'whose',
11 |   'whosesoever',
12 |   'whosoever',
13 |   // 'that',
14 |   // 'when',
15 |   // 'which',
16 |   // 'who',
17 | ]
18 | 


--------------------------------------------------------------------------------
/plugins/speed/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | import build from '../../../builds/one/compromise-one.mjs'
 2 | import src from '../../../src/one.js'
 3 | let nlp;
 4 | 
 5 | if (process.env.TESTENV === 'prod') {
 6 |   console.warn('== production build test 🚀 ==')  // eslint-disable-line
 7 |   nlp = build
 8 | } else {
 9 |   nlp = src
10 | }
11 | export default nlp
12 | 


--------------------------------------------------------------------------------
/src/1-one/output/api/index.js:
--------------------------------------------------------------------------------
 1 | import html from './html.js'
 2 | import json from './json.js'
 3 | import out from './out.js'
 4 | import text from './text.js'
 5 | 
 6 | const methods = Object.assign({}, out, text, json, html)
 7 | 
 8 | const addAPI = function (View) {
 9 |   Object.assign(View.prototype, methods)
10 | }
11 | export default addAPI
12 | 


--------------------------------------------------------------------------------
/tests/four/_lib.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | // import build from '../../builds/four/compromise-four.mjs'
 3 | import src from '../../src/four.js'
 4 | const nlp = src
 5 | if (process.env.TESTENV === 'prod') {
 6 |   console.warn('== production build test 🚀 ==')
 7 |   // nlp = build
 8 |   process.exit()
 9 | }
10 | export default nlp
11 | 


--------------------------------------------------------------------------------
/tests/one/misc/misc.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[one/misc] '
 4 | 
 5 | test('lazy-pointer-issue', function (t) {
 6 |   const doc = nlp.tokenize(`four two five`)
 7 |   const m = doc.eq(0).match('two')
 8 |   t.equal(m.text(), 'two', here + 'convert to full-pointer')
 9 |   t.end()
10 | })
11 | 


--------------------------------------------------------------------------------
/plugins/wikipedia/index.d.ts:
--------------------------------------------------------------------------------
 1 | import nlp from 'compromise'
 2 | type View = ReturnType<typeof nlp>
 3 | 
 4 | export interface WikiMethods {
 5 |   /** phrases that match wikipedia titles */
 6 |   wikipedia(): View
 7 | }
 8 | 
 9 | /** extended compromise lib **/
10 | declare const nlpWiki: nlp.TypedPlugin<WikiMethods>
11 | 
12 | export default nlpWiki
13 | 


--------------------------------------------------------------------------------
/src/1-one/typeahead/plugin.js:
--------------------------------------------------------------------------------
 1 | import compute from './compute.js'
 2 | import api from './api.js'
 3 | import lib from './lib/index.js'
 4 | 
 5 | const model = {
 6 |   one: {
 7 |     typeahead: {} //set a blank key-val
 8 |   }
 9 | }
10 | export default {
11 |   model,
12 |   api,
13 |   lib,
14 |   compute,
15 |   hooks: ['typeahead']
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/data/lexicon/switches/person-place.js:
--------------------------------------------------------------------------------
 1 | // cities
 2 | export default [
 3 |   'alexandria',
 4 |   'austin',
 5 |   'darwin',
 6 |   'diego',
 7 |   'hamilton',
 8 |   'houston',
 9 |   'jordan',
10 |   'kent',
11 |   'kobe',
12 |   'orlando',
13 |   'salvador',
14 |   'samara',
15 |   'santiago',
16 |   'sydney',
17 |   'victoria',
18 |   'virginia',
19 | ]
20 | 


--------------------------------------------------------------------------------
/src/1-one/tag/plugin.js:
--------------------------------------------------------------------------------
 1 | import methods from './methods/index.js'
 2 | import api from './api/index.js'
 3 | import lib from './lib.js'
 4 | import tagRank from './compute/tagRank.js'
 5 | 
 6 | 
 7 | export default {
 8 |   model: {
 9 |     one: { tagSet: {} }
10 |   },
11 |   compute: {
12 |     tagRank
13 |   },
14 |   methods,
15 |   api,
16 |   lib
17 | }
18 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/tagSet/index.js:
--------------------------------------------------------------------------------
 1 | import nouns from './nouns.js'
 2 | import verbs from './verbs.js'
 3 | import values from './values.js'
 4 | import dates from './dates.js'
 5 | import misc from './misc.js'
 6 | 
 7 | const allTags = Object.assign({}, nouns, verbs, values, dates, misc)
 8 | // const tagSet = compute(allTags)
 9 | export default allTags
10 | 


--------------------------------------------------------------------------------
/plugins/stats/src/tfidf/unpack.js:
--------------------------------------------------------------------------------
 1 | import { unpack } from 'efrt'
 2 | 
 3 | const unzip = function (model) {
 4 |   const all = {}
 5 |   Object.keys(model).forEach(k => {
 6 |     model[k] = unpack(model[k])
 7 |     const num = Number(k)
 8 |     Object.keys(model[k]).forEach(w => {
 9 |       all[w] = num
10 |     })
11 |   })
12 |   return all
13 | }
14 | export default unzip


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/index.js:
--------------------------------------------------------------------------------
 1 | import transform from './transform/index.js'
 2 | import expandLexicon from './expand/index.js'
 3 | import quickSplit from './quickSplit.js'
 4 | import looksPlural from './looksPlural.js'
 5 | 
 6 | 
 7 | export default {
 8 |   two: {
 9 |     quickSplit,
10 |     expandLexicon,
11 |     transform,
12 |     looksPlural
13 |   },
14 | }
15 | 


--------------------------------------------------------------------------------
/src/1-one/match/lib.js:
--------------------------------------------------------------------------------
 1 | 
 2 | export default {
 3 |   /** pre-parse any match statements */
 4 |   parseMatch: function (str, opts) {
 5 |     const world = this.world()
 6 |     const killUnicode = world.methods.one.killUnicode
 7 |     if (killUnicode) {
 8 |       str = killUnicode(str, world)
 9 |     }
10 |     return world.methods.one.parseMatch(str, opts, world)
11 |   }
12 | }


--------------------------------------------------------------------------------
/src/1-one/contraction-one/compute/contractions/apostrophe-t.js:
--------------------------------------------------------------------------------
 1 | 
 2 | //ain't -> are/is not
 3 | const apostropheT = function (terms, i) {
 4 |   if (terms[i].normal === "ain't" || terms[i].normal === 'aint') {
 5 |     return null //do this in ./two/
 6 |   }
 7 |   const before = terms[i].normal.replace(/n't/, '')
 8 |   return [before, 'not']
 9 | }
10 | 
11 | export default apostropheT
12 | 


--------------------------------------------------------------------------------
/src/1-one/tag/methods/addTags/_colors.js:
--------------------------------------------------------------------------------
 1 | // i just made these up
 2 | const colors = {
 3 |   Noun: 'blue',
 4 |   Verb: 'green',
 5 |   Negative: 'green',
 6 |   Date: 'red',
 7 |   Value: 'red',
 8 |   Adjective: 'magenta',
 9 |   Preposition: 'cyan',
10 |   Conjunction: 'cyan',
11 |   Determiner: 'cyan',
12 |   Hyphenated: 'cyan',
13 |   Adverb: 'cyan',
14 | }
15 | 
16 | export default colors


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/transform/verbs/index.js:
--------------------------------------------------------------------------------
 1 | import toInfinitive from './toInfinitive/index.js'
 2 | import conjugate from './conjugate/index.js'
 3 | 
 4 | const all = function (str, model) {
 5 |   const res = conjugate(str, model)
 6 |   delete res.FutureTense
 7 |   return Object.values(res).filter(s => s)
 8 | }
 9 | export default {
10 |   toInfinitive, conjugate, all
11 | }
12 | 


--------------------------------------------------------------------------------
/plugins/speech/index.d.ts:
--------------------------------------------------------------------------------
 1 | import nlp from 'compromise'
 2 | 
 3 | export interface SpeechMethods {
 4 |   /** estimate spoken phenomes */
 5 |   syllables(): String[][]
 6 |   /** estimate pronounciation information */
 7 |   soundsLike(): String[][]
 8 | }
 9 | 
10 | /** extended compromise lib **/
11 | declare const nlpSpeech: nlp.TypedPlugin<SpeechMethods>
12 | 
13 | export default nlpSpeech
14 | 


--------------------------------------------------------------------------------
/plugins/dates/src/model/words/times.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'noon',
 3 |   'midnight',
 4 |   'morning',
 5 |   'tonight',
 6 |   'evening',
 7 |   'afternoon',
 8 |   'breakfast time',
 9 |   'lunchtime',
10 |   'dinnertime',
11 |   'midday',
12 |   'eod',
13 |   'oclock',
14 |   'oclock',
15 |   'at night',
16 |   // 'now',
17 |   // 'night',
18 |   // 'sometime',
19 |   // 'all day',
20 | ]
21 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/methods/unicode.js:
--------------------------------------------------------------------------------
 1 | // 'Björk' to 'Bjork'.
 2 | const killUnicode = function (str, world) {
 3 |   const unicode = world.model.one.unicode || {}
 4 |   str = str || ''
 5 |   const chars = str.split('')
 6 |   chars.forEach((s, i) => {
 7 |     if (unicode[s]) {
 8 |       chars[i] = unicode[s]
 9 |     }
10 |   })
11 |   return chars.join('')
12 | }
13 | export default killUnicode
14 | 


--------------------------------------------------------------------------------
/src/3-three/numbers/fractions/convert/toCardinal.js:
--------------------------------------------------------------------------------
 1 | import toText from '../../numbers/format/toText/index.js'
 2 | 
 3 | const toCardinal = function (obj) {
 4 |   if (!obj.numerator || !obj.denominator) {
 5 |     return ''
 6 |   }
 7 |   const a = toText({ num: obj.numerator })
 8 |   const b = toText({ num: obj.denominator })
 9 |   return `${a} out of ${b}`
10 | }
11 | export default toCardinal
12 | 


--------------------------------------------------------------------------------
/src/1-one/cache/api.js:
--------------------------------------------------------------------------------
 1 | const methods = {
 2 |   /** */
 3 |   cache: function () {
 4 |     this._cache = this.methods.one.cacheDoc(this.document)
 5 |     return this
 6 |   },
 7 |   /** */
 8 |   uncache: function () {
 9 |     this._cache = null
10 |     return this
11 |   },
12 | }
13 | const addAPI = function (View) {
14 |   Object.assign(View.prototype, methods)
15 | }
16 | export default addAPI
17 | 


--------------------------------------------------------------------------------
/plugins/_experiments/ast/scratch.js:
--------------------------------------------------------------------------------
 1 | import nlp from '../../../src/three.js'
 2 | import plg from './src/plugin.js'
 3 | nlp.plugin(plg)
 4 | 
 5 | let str = ''
 6 | str = `I prefer the morning flight through Denver. it was cool,
 7 | oh yeah nice`
 8 | const doc = nlp(str)
 9 | // console.log(doc.lines())
10 | // let tree = doc.ast()
11 | // console.dir(tree, { depth: 10 })
12 | doc.chunks().debug('chunker')
13 | 


--------------------------------------------------------------------------------
/plugins/paragraphs/index.d.ts:
--------------------------------------------------------------------------------
 1 | import nlp from 'compromise'
 2 | type View = ReturnType<typeof nlp>
 3 | 
 4 | interface ParagraphView extends View {
 5 | }
 6 | 
 7 | export interface ParagraphMethods {
 8 |   /**  */
 9 |   paragraphs(): ParagraphView
10 | }
11 | 
12 | /** extended compromise lib **/
13 | declare const nlpParagraphs: nlp.TypedPlugin<ParagraphMethods>
14 | 
15 | export default nlpParagraphs
16 | 


--------------------------------------------------------------------------------
/plugins/speech/src/compute/syllables/index.js:
--------------------------------------------------------------------------------
 1 | import getSyllables from './syllables.js'
 2 | 
 3 | // const defaultObj = { normal: true, text: true, terms: false }
 4 | 
 5 | const syllables = function (view) {
 6 |   view.docs.forEach(terms => {
 7 |     terms.forEach(term => {
 8 |       term.syllables = getSyllables(term.normal || term.text)
 9 |     })
10 |   })
11 | }
12 | 
13 | export default syllables
14 | 


--------------------------------------------------------------------------------
/src/1-one/change/compute/index.js:
--------------------------------------------------------------------------------
 1 | import uuid from './uuid.js'
 2 | 
 3 | const compute = {
 4 |   id: function (view) {
 5 |     const docs = view.docs
 6 |     for (let n = 0; n < docs.length; n += 1) {
 7 |       for (let i = 0; i < docs[n].length; i += 1) {
 8 |         const term = docs[n][i]
 9 |         term.id = term.id || uuid(term)
10 |       }
11 |     }
12 |   }
13 | }
14 | 
15 | export default compute


--------------------------------------------------------------------------------
/src/1-one/contraction-one/model/number-suffix.js:
--------------------------------------------------------------------------------
 1 | // number suffixes that are not units
 2 | const t = true
 3 | export default {
 4 |   'st': t,
 5 |   'nd': t,
 6 |   'rd': t,
 7 |   'th': t,
 8 |   'am': t,
 9 |   'pm': t,
10 |   'max': t,
11 |   '°': t,
12 |   's': t, // 1990s
13 |   'e': t, // 18e - french/spanish ordinal
14 |   'er': t, //french 1er
15 |   'ère': t, //''
16 |   'ème': t, //french 2ème
17 | }


--------------------------------------------------------------------------------
/src/1-one/tokenize/methods/03-whitespace/index.js:
--------------------------------------------------------------------------------
 1 | import tokenize from './tokenize.js'
 2 | 
 3 | const parseTerm = (txt, model) => {
 4 |   // cleanup any punctuation as whitespace
 5 |   const { str, pre, post } = tokenize(txt, model)
 6 |   const parsed = {
 7 |     text: str,
 8 |     pre: pre,
 9 |     post: post,
10 |     tags: new Set(),
11 |   }
12 |   return parsed
13 | }
14 | export default parseTerm
15 | 


--------------------------------------------------------------------------------
/plugins/payload/src/debug.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | 
 3 | // pretty-print each match that has a payload
 4 | const debug = function (view) {
 5 |   view.getPayloads().forEach(res => {
 6 |     const { match, val } = res
 7 |     console.log('\n────────')
 8 |     match.debug('highlight')
 9 |     console.log('    ', JSON.stringify(val))
10 |     console.log('\n')
11 |   })
12 | }
13 | export default debug
14 | 


--------------------------------------------------------------------------------
/data/lexicon/dates/weekdays.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'monday',
 3 |   'tuesday',
 4 |   'wednesday',
 5 |   'thursday',
 6 |   'friday',
 7 |   'saturday',
 8 |   'sunday',
 9 |   'mon',
10 |   'tues',
11 |   // 'wed',
12 |   'thurs',
13 |   'fri',
14 |   // 'sat',
15 |   // 'sun',
16 |   'mondays',
17 |   'tuesdays',
18 |   'wednesdays',
19 |   'thursdays',
20 |   'fridays',
21 |   'saturdays',
22 |   'sundays',
23 | ]
24 | 


--------------------------------------------------------------------------------
/src/1-one/lexicon/plugin.js:
--------------------------------------------------------------------------------
 1 | import compute from './compute/index.js'
 2 | import methods from './methods/index.js'
 3 | import lib from './lib.js'
 4 | 
 5 | const model = {
 6 |   one: {
 7 |     lexicon: {}, //setup blank lexicon
 8 |     _multiCache: {},
 9 |     frozenLex: {}, //2nd lexicon
10 |   },
11 | }
12 | 
13 | export default {
14 |   model,
15 |   methods,
16 |   compute,
17 |   lib,
18 |   hooks: ['lexicon'],
19 | }
20 | 


--------------------------------------------------------------------------------
/src/1-one/change/api/harden.js:
--------------------------------------------------------------------------------
 1 | // add indexes to pointers
 2 | const harden = function () {
 3 |   this.ptrs = this.fullPointer
 4 |   return this
 5 | }
 6 | // remove indexes from pointers
 7 | const soften = function () {
 8 |   let ptr = this.ptrs
 9 |   if (!ptr || ptr.length < 1) {
10 |     return this
11 |   }
12 |   ptr = ptr.map(a => a.slice(0, 3))
13 |   this.ptrs = ptr
14 |   return this
15 | }
16 | export default { harden, soften }


--------------------------------------------------------------------------------
/src/1-one/tokenize/methods/02-terms/02-slashes.js:
--------------------------------------------------------------------------------
 1 | const isSlash = /\p{L} ?\/ ?\p{L}+$/u
 2 | 
 3 | // 'he / she' should be one word
 4 | const combineSlashes = function (arr) {
 5 |   for (let i = 1; i < arr.length - 1; i++) {
 6 |     if (isSlash.test(arr[i])) {
 7 |       arr[i - 1] += arr[i] + arr[i + 1]
 8 |       arr[i] = null
 9 |       arr[i + 1] = null
10 |     }
11 |   }
12 |   return arr
13 | }
14 | export default combineSlashes
15 | 


--------------------------------------------------------------------------------
/plugins/stats/src/tfidf/tf.js:
--------------------------------------------------------------------------------
 1 | const tf = function (view, opts = {}) {
 2 |   const counts = {}
 3 |   const form = opts.form || 'root'
 4 |   view.docs.forEach(terms => {
 5 |     terms.forEach(term => {
 6 |       const str = term[form] || term.implicit || term.normal
 7 |       if (str) {
 8 |         counts[str] = counts[str] || 0
 9 |         counts[str] += 1
10 |       }
11 |     })
12 |   })
13 |   return counts
14 | }
15 | export default tf


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/wordCount.js:
--------------------------------------------------------------------------------
 1 | const wordCount = function (view) {
 2 |   let n = 0
 3 |   const docs = view.docs
 4 |   for (let i = 0; i < docs.length; i += 1) {
 5 |     for (let t = 0; t < docs[i].length; t += 1) {
 6 |       if (docs[i][t].normal === '') {
 7 |         continue //skip implicit words
 8 |       }
 9 |       n += 1
10 |       docs[i][t].wordCount = n
11 |     }
12 |   }
13 | }
14 | 
15 | export default wordCount
16 | 


--------------------------------------------------------------------------------
/tests/one/change/fork.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[one/fork] '
 4 | 
 5 | test('fork basic:', function (t) {
 6 |   const before = nlp.fork()
 7 |   const a = before('hello donkey kong', { donkey: 'Person' })
 8 |   const b = nlp('donkey')
 9 |   t.equal(a.has('#Person'), true, here + 'has person')
10 |   t.equal(b.has('#Person'), false, here + 'does not have person')
11 |   t.end()
12 | })
13 | 


--------------------------------------------------------------------------------
/plugins/_experiments/ast/src/plugin.js:
--------------------------------------------------------------------------------
 1 | import toAst from './ast.js'
 2 | import toLines from './lines.js'
 3 | import compute from './compute/index.js'
 4 | 
 5 | 
 6 | 
 7 | export default {
 8 |   compute,
 9 | 
10 |   api: function (View) {
11 | 
12 |     View.prototype.lines = function () {
13 |       return toLines(this)
14 |     }
15 | 
16 | 
17 |     View.prototype.ast = function (opts) {
18 |       return toAst(this, opts)
19 |     }
20 |   }
21 | }


--------------------------------------------------------------------------------
/src/3-three/numbers/fractions/find.js:
--------------------------------------------------------------------------------
 1 | const findFractions = function (doc, n) {
 2 |   // five eighths
 3 |   let m = doc.match('#Fraction+')
 4 |   // remove 'two and five eights'
 5 |   m = m.filter(r => {
 6 |     return !r.lookBehind('#Value and$').found
 7 |   })
 8 |   // thirty seconds
 9 |   m = m.notIf('#Value seconds')
10 | 
11 |   if (typeof n === 'number') {
12 |     m = m.eq(n)
13 |   }
14 |   return m
15 | }
16 | export default findFractions
17 | 


--------------------------------------------------------------------------------
/demos/web-worker/_worker.js:
--------------------------------------------------------------------------------
 1 | /* global importScripts */
 2 | 
 3 | //loads and runs compromise inside the worker-instance
 4 | self.addEventListener(
 5 |   'message',
 6 |   function (e) {
 7 |     // importScripts('https://unpkg.com/compromise@next')
 8 |     importScripts('../../builds/compromise.js')
 9 |     const doc = self.nlp(e.data)
10 |     const m = doc.places()
11 |     self.postMessage(m.json({ count: true, unique: true }))
12 |   },
13 |   false
14 | )


--------------------------------------------------------------------------------
/plugins/wikipedia/scratch.js:
--------------------------------------------------------------------------------
 1 | // import corpus from 'nlp-corpus'
 2 | import nlp from '../../src/one.js'
 3 | import plugin from './src/plugin.js'
 4 | nlp.extend(plugin)
 5 | 
 6 | let txt = ''
 7 | txt = `Moreover, it is always possible to consolidate for discovery different cases that involve construction of the same claims.`
 8 | 
 9 | txt = 'i saw the toronto raptors play a cleveland foops'
10 | const doc = nlp(txt)
11 | const m = doc.wikipedia()
12 | m.debug()


--------------------------------------------------------------------------------
/scripts/perf/flame/index.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import txt from './_sotu-text.js'
 3 | 
 4 | console.log('\n-- testing:  --')
 5 | console.time('load')
 6 | import nlp from '../../../src/three.js'
 7 | console.timeEnd('load')
 8 | 
 9 | console.time('parse')
10 | const doc = nlp(txt)
11 | console.timeEnd('parse')
12 | 
13 | console.time('match')
14 | doc.match('#Noun')
15 | console.timeEnd('match')
16 | console.log('\n   v' + nlp.version, '\n')
17 | 


--------------------------------------------------------------------------------
/src/1-one/tag/methods/canBe.js:
--------------------------------------------------------------------------------
 1 | // quick check if this tag will require any untagging
 2 | const canBe = function (term, tag, tagSet) {
 3 |   if (!tagSet.hasOwnProperty(tag)) {
 4 |     return true // everything can be an unknown tag
 5 |   }
 6 |   const not = tagSet[tag].not || []
 7 |   for (let i = 0; i < not.length; i += 1) {
 8 |     if (term.tags.has(not[i])) {
 9 |       return false
10 |     }
11 |   }
12 |   return true
13 | }
14 | export default canBe
15 | 


--------------------------------------------------------------------------------
/plugins/payload/scratch.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console, no-unused-vars */
 2 | 
 3 | import nlp from '../../src/three.js'
 4 | // import plugin from './src/plugin.js'
 5 | import plugin from './builds/compromise-payload.mjs'
 6 | nlp.extend(plugin)
 7 | 
 8 | const doc = nlp('i saw John Lennon, and john smith and bob dylan')
 9 | 
10 | doc.match('(john|bob|dave) .').addPayload(m => {
11 |   return { lastName: m.terms().last().text() }
12 | })
13 | console.log(doc.getPayloads())
14 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/transform/nouns/index.js:
--------------------------------------------------------------------------------
 1 | import toPlural from './toPlural/index.js'
 2 | import toSingular from './toSingular/index.js'
 3 | 
 4 | const all = function (str, model) {
 5 |   const arr = [str]
 6 |   const p = toPlural(str, model)
 7 |   if (p !== str) {
 8 |     arr.push(p)
 9 |   }
10 |   const s = toSingular(str, model)
11 |   if (s !== str) {
12 |     arr.push(s)
13 |   }
14 |   return arr
15 | }
16 | 
17 | export default { toPlural, toSingular, all }
18 | 


--------------------------------------------------------------------------------
/src/API/_lib.js:
--------------------------------------------------------------------------------
 1 | 
 2 | /** log the decision-making to console */
 3 | const verbose = function (set) {
 4 |   const env = typeof process === 'undefined' || !process.env ? self.env || {} : process.env //use window, in browser
 5 |   env.DEBUG_TAGS = set === 'tagger' || set === true ? true : ''
 6 |   env.DEBUG_MATCH = set === 'match' || set === true ? true : ''
 7 |   env.DEBUG_CHUNKS = set === 'chunker' || set === true ? true : ''
 8 |   return this
 9 | }
10 | 
11 | export { verbose }


--------------------------------------------------------------------------------
/tests/hmm.js:
--------------------------------------------------------------------------------
 1 | // test('tokenize() accepts lexicon param', function (t) {
 2 | //   let doc = nlp.tokenize('spencer kelly is working here', {
 3 | //     'spencer kelly': 'Person',
 4 | //     working: 'NotFun',
 5 | //   })
 6 | //   t.equal(doc.match('#Person+').text(), 'spencer kelly', here + 'used tag')
 7 | //   t.equal(doc.match('#NotFun').text(), 'working', here + 'used 2nd tag')
 8 | //   t.equal(doc.has('#Verb'), false, here + 'not a full tag')
 9 | //   t.end()
10 | // })
11 | 


--------------------------------------------------------------------------------
/data/pairs/index.js:
--------------------------------------------------------------------------------
 1 | import Comparative from './Comparative.js'
 2 | import Gerund from './Gerund.js'
 3 | import Participle from './Participle.js'
 4 | import PastTense from './PastTense.js'
 5 | import PresentTense from './PresentTense.js'
 6 | import Superlative from './Superlative.js'
 7 | import AdjToNoun from './AdjToNoun.js'
 8 | 
 9 | export default {
10 |   Comparative,
11 |   Gerund,
12 |   Participle,
13 |   PastTense,
14 |   PresentTense,
15 |   Superlative,
16 |   AdjToNoun,
17 | }


--------------------------------------------------------------------------------
/scripts/perf/pool/_lib.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import src from '../../../src/three.js'
 3 | import build from '../../../builds/three/compromise-three.mjs'
 4 | 
 5 | let nlp
 6 | if (process.env.TESTENV === 'prod') {
 7 |   console.warn('== production build test 🚀 ==')
 8 |   // nlp = require('../../../builds/compromise.min.js')
 9 |   nlp = build
10 | } else {
11 |   nlp = src
12 |   // nlp.extend(require('../plugins/numbers/src'))
13 | }
14 | 
15 | export default nlp
16 | 


--------------------------------------------------------------------------------
/src/two.js:
--------------------------------------------------------------------------------
 1 | import nlp from './one.js'
 2 | import preTag from './2-two/preTagger/plugin.js'
 3 | import contractionTwo from './2-two/contraction-two/plugin.js'
 4 | import postTag from './2-two/postTagger/plugin.js'
 5 | import lazy from './2-two/lazy/plugin.js'
 6 | import swap from './2-two/swap/plugin.js'
 7 | 
 8 | nlp.plugin(preTag) //~103kb
 9 | nlp.plugin(contractionTwo) //
10 | nlp.plugin(postTag) //~33kb
11 | nlp.plugin(lazy) //
12 | nlp.plugin(swap) //
13 | 
14 | export default nlp
15 | 


--------------------------------------------------------------------------------
/plugins/stats/src/compute.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const compute = {
 3 |   // this is just the same thing
 4 |   // but written to Term objects
 5 |   tfidf: (view) => {
 6 |     let res = view.tfidf()
 7 |     res = res.reduce((h, a) => {
 8 |       h[a[0]] = a[1]
 9 |       return h
10 |     }, {})
11 |     view.docs.forEach(terms => {
12 |       terms.forEach(term => {
13 |         term.tfidf = res[term.root || term.implicit || term.normal] || 0
14 |       })
15 |     })
16 |   }
17 | }
18 | export default compute


--------------------------------------------------------------------------------
/plugins/stats/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | import build from '../../../builds/one/compromise-one.mjs'
 2 | import src from '../../../src/one.js'
 3 | import plgBuild from '../builds/compromise-stats.mjs'
 4 | import plg from '../src/plugin.js'
 5 | let nlp;
 6 | 
 7 | if (process.env.TESTENV === 'prod') {
 8 |   console.warn('== production build test 🚀 ==')  // eslint-disable-line
 9 |   nlp = build
10 |   nlp.plugin(plgBuild)
11 | } else {
12 |   nlp = src
13 |   nlp.plugin(plg)
14 | }
15 | export default nlp
16 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/clues/person-adj.js:
--------------------------------------------------------------------------------
 1 | import person from './_person.js'
 2 | import adj from './_adj.js'
 3 | 
 4 | // 'rusty nail'   -  'rusty smith'
 5 | const clues = {
 6 |   beforeTags: Object.assign({}, person.beforeTags, adj.beforeTags),
 7 |   afterTags: Object.assign({}, person.afterTags, adj.afterTags),
 8 |   beforeWords: Object.assign({}, person.beforeWords, adj.beforeWords),
 9 |   afterWords: Object.assign({}, person.afterWords, adj.afterWords),
10 | }
11 | export default clues


--------------------------------------------------------------------------------
/plugins/dates/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | import build from '../../../builds/three/compromise-three.mjs'
 2 | import src from '../../../src/three.js'
 3 | import plgBuild from '../builds/compromise-dates.mjs'
 4 | import plg from '../src/plugin.js'
 5 | let nlp;
 6 | 
 7 | if (process.env.TESTENV === 'prod') {
 8 |   console.warn('== production build test 🚀 ==')  // eslint-disable-line
 9 |   nlp = build
10 |   nlp.plugin(plgBuild)
11 | } else {
12 |   nlp = src
13 |   nlp.plugin(plg)
14 | }
15 | export default nlp
16 | 


--------------------------------------------------------------------------------
/plugins/speech/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | import build from '../../../builds/one/compromise-one.mjs'
 2 | import src from '../../../src/one.js'
 3 | import plgBuild from '../builds/compromise-speech.mjs'
 4 | import plg from '../src/plugin.js'
 5 | let nlp;
 6 | 
 7 | if (process.env.TESTENV === 'prod') {
 8 |   console.warn('== production build test 🚀 ==')  // eslint-disable-line
 9 |   nlp = build
10 |   nlp.plugin(plgBuild)
11 | } else {
12 |   nlp = src
13 |   nlp.plugin(plg)
14 | }
15 | export default nlp
16 | 


--------------------------------------------------------------------------------
/plugins/stats/src/ngram/sort.js:
--------------------------------------------------------------------------------
 1 | const sort = function (arr) {
 2 |   arr = arr.sort((a, b) => {
 3 |     //first sort them by count
 4 |     if (a.count > b.count) {
 5 |       return -1
 6 |     }
 7 |     if (a.count < b.count) {
 8 |       return 1
 9 |     }
10 |     // in a tie, sort them by size
11 |     if (a.size > b.size) {
12 |       return -1
13 |     }
14 |     if (a.size < b.size) {
15 |       return 1
16 |     }
17 |     return 0
18 |   })
19 |   return arr
20 | }
21 | export default sort
22 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/clues/unit-noun.js:
--------------------------------------------------------------------------------
 1 | // '5 oz'   -  'dr oz'
 2 | const un = 'Unit'
 3 | const clues = {
 4 |   beforeTags: { Value: un },
 5 |   afterTags: {},
 6 |   beforeWords: {
 7 |     per: un,
 8 |     every: un,
 9 |     each: un,
10 |     square: un, //square km
11 |     cubic: un,
12 |     sq: un,
13 |     metric: un //metric ton
14 |   },
15 |   afterWords: {
16 |     per: un,
17 |     squared: un,
18 |     cubed: un,
19 |     long: un //foot long
20 |   },
21 | }
22 | export default clues


--------------------------------------------------------------------------------
/plugins/paragraphs/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | import build from '../../../builds/one/compromise-one.mjs'
 2 | import src from '../../../src/one.js'
 3 | import plgBuild from '../builds/compromise-paragraphs.mjs'
 4 | import plg from '../src/plugin.js'
 5 | let nlp;
 6 | 
 7 | if (process.env.TESTENV === 'prod') {
 8 |   console.warn('== production build test 🚀 ==')  // eslint-disable-line
 9 |   nlp = build
10 |   nlp.plugin(plgBuild)
11 | } else {
12 |   nlp = src
13 |   nlp.plugin(plg)
14 | }
15 | export default nlp
16 | 


--------------------------------------------------------------------------------
/plugins/payload/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | import src from '../../../src/one.js'
 2 | import build from '../../../builds/one/compromise-one.mjs'
 3 | 
 4 | import plg from '../src/plugin.js'
 5 | import plgBuild from '../builds/compromise-payload.mjs'
 6 | let nlp
 7 | if (process.env.TESTENV === 'prod') {
 8 |   console.warn('== production build test 🚀 ==') // eslint-disable-line
 9 |   nlp = build
10 |   nlp.plugin(plgBuild)
11 | } else {
12 |   nlp = src
13 |   nlp.plugin(plg)
14 | }
15 | 
16 | export default nlp
17 | 


--------------------------------------------------------------------------------
/plugins/wikipedia/tests/_lib.js:
--------------------------------------------------------------------------------
 1 | import build from '../../../builds/one/compromise-one.mjs'
 2 | import src from '../../../src/one.js'
 3 | import plgBuild from '../builds/compromise-wikipedia.mjs'
 4 | import plg from '../src/plugin.js'
 5 | let nlp;
 6 | 
 7 | if (process.env.TESTENV === 'prod') {
 8 |   console.warn('== production build test 🚀 ==')  // eslint-disable-line
 9 |   nlp = build
10 |   nlp.plugin(plgBuild)
11 | } else {
12 |   nlp = src
13 |   nlp.plugin(plg)
14 | }
15 | export default nlp
16 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/one/units/index.js:
--------------------------------------------------------------------------------
 1 | 
 2 | export { default as Unit } from './Unit.js'
 3 | export { Day, WeekDay, CalendarDate, Holiday } from './_day.js'
 4 | export { Hour, Minute, Moment } from './_time.js'
 5 | export { AnyMonth, Month, Quarter, AnyQuarter, Season, Year } from './_year.js'
 6 | export { Week, WeekEnd } from './_week.js'
 7 | 
 8 | // export { Unit, Day, WeekDay, CalendarDate, Holiday, Hour, Minute, Moment, AnyMonth, Month, Quarter, AnyQuarter, Season, Year, Week, WeekEnd }
 9 | 
10 | 


--------------------------------------------------------------------------------
/plugins/dates/src/model/tags.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   FinancialQuarter: {
 3 |     is: 'Date',
 4 |     not: ['Fraction'],
 5 |   },
 6 |   // 'summer'
 7 |   Season: {
 8 |     is: 'Date',
 9 |   },
10 |   // '1982'
11 |   Year: {
12 |     is: 'Date',
13 |     not: ['RomanNumeral'],
14 |   },
15 |   // 'easter'
16 |   Holiday: {
17 |     is: 'Date',
18 |     also: 'Noun',
19 |   },
20 |   // 'two weeks before'
21 |   DateShift: {
22 |     is: 'Date',
23 |     not: ['Timezone', 'Holiday'],
24 |   },
25 | }
26 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/clues/actor-verb.js:
--------------------------------------------------------------------------------
 1 | import noun from './_noun.js'
 2 | import verb from './_verb.js'
 3 | // 'the pilot' vs 'pilot the plane'
 4 | const clue = {
 5 |   beforeTags: Object.assign({}, verb.beforeTags, noun.beforeTags, {
 6 |   }),
 7 |   afterTags: Object.assign({}, verb.afterTags, noun.afterTags, {}),
 8 |   beforeWords: Object.assign({}, verb.beforeWords, noun.beforeWords, {}),
 9 |   afterWords: Object.assign({}, verb.afterWords, noun.afterWords, {}),
10 | }
11 | 
12 | export default clue


--------------------------------------------------------------------------------
/tests/three/sentences/negative.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[three/sentence-negative] '
 4 | 
 5 | test('sentences.toPositive', function (t) {
 6 |   const doc = nlp(`do not use reverse psychology.`)
 7 |   doc.sentences().toPositive()
 8 |   t.equal(doc.text(), 'use reverse psychology.', here + 'neg')
 9 | 
10 |   doc.sentences().toNegative()
11 |   t.equal(doc.text(), 'do not use reverse psychology.', here + 'back to neg')
12 | 
13 |   t.end()
14 | })
15 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/_expand/irregulars.js:
--------------------------------------------------------------------------------
 1 | // import irregularVerbs from './conjugations.js'
 2 | // harvest list of irregulars for any juicy word-data
 3 | const expandIrregulars = function (model) {
 4 |   const { irregularPlurals } = model.two
 5 |   const { lexicon } = model.one
 6 |   Object.entries(irregularPlurals).forEach(a => {
 7 |     lexicon[a[0]] = lexicon[a[0]] || 'Singular'
 8 |     lexicon[a[1]] = lexicon[a[1]] || 'Plural'
 9 |   })
10 |   return model
11 | }
12 | export default expandIrregulars
13 | 


--------------------------------------------------------------------------------
/src/2-two/lazy/lazyParse.js:
--------------------------------------------------------------------------------
 1 | import maybeMatch from './maybeMatch.js'
 2 | 
 3 | // tokenize first, then only tag sentences required
 4 | const lazyParse = function (input, reg) {
 5 |   let net = reg
 6 |   if (typeof reg === 'string') {
 7 |     net = this.buildNet([{ match: reg }])
 8 |   }
 9 |   const doc = this.tokenize(input)
10 |   const m = maybeMatch(doc, net)
11 |   if (m.found) {
12 |     m.compute(['index', 'tagger'])
13 |     return m.match(reg)
14 |   }
15 |   return doc.none()
16 | }
17 | export default lazyParse


--------------------------------------------------------------------------------
/src/4-four/facts/parse/postProcess.js:
--------------------------------------------------------------------------------
 1 | const postProcess = function (parts) {
 2 | 
 3 |   for (let i = 1; i < parts.length; i += 1) {
 4 |     // is it missing a subject?
 5 |     // borrow the last one
 6 |     if (!parts[i].subj && parts[i].verb) {
 7 |       for (let o = i; o >= 0; o -= 1) {
 8 |         if (parts[o].subj) {
 9 |           parts[i].subj = Object.assign({ borrowed: true }, parts[o].subj)
10 |           break
11 |         }
12 |       }
13 |     }
14 |   }
15 |   return parts
16 | }
17 | export default postProcess


--------------------------------------------------------------------------------
/scripts/typescript/two.ts:
--------------------------------------------------------------------------------
 1 | // import nlp from '../../src/three.js'
 2 | import nlp from '../../types/two'
 3 | 
 4 | const doc = nlp('okay cool')
 5 | 
 6 | // ### Pre-tagger
 7 | doc.compute('preTagger')
 8 | doc.compute('root')
 9 | doc.compute('penn')
10 | 
11 | // ### Contraction-two
12 | doc.compute('contractionTwo')
13 | doc.contractions()
14 | doc.contractions().expand()
15 | doc.contract()
16 | 
17 | // ### Post-tagger
18 | doc.compute('postTagger')
19 | doc.confidence()
20 | 
21 | 
22 | // ### Swap
23 | doc.swap('', '')
24 | 


--------------------------------------------------------------------------------
/src/4-four/facts/parse/verb.js:
--------------------------------------------------------------------------------
 1 | const parseVerb = function (chunk) {
 2 |   const obj = chunk.verbs().json()[0].verb
 3 |   return {
 4 |     chunk: 'Verb',
 5 |     ptr: obj.ptr = chunk.ptrs[0],
 6 |     desc: obj.preAdverbs.concat(obj.postAdverbs),
 7 |     negative: obj.negative,
 8 |     root: obj.infinitive,
 9 |     tense: obj.grammar.tense,
10 |     copula: obj.grammar.copula,
11 |     imperative: chunk.has('#Imperative'),
12 |     hypothetical: chunk.has('(would|could) #Adverb? have')
13 |   }
14 | }
15 | export default parseVerb


--------------------------------------------------------------------------------
/tests/four/match.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | const here = '[one/match] '
 4 | 
 5 | const arr = [
 6 |   ['cold', '{cold/temperature}'],
 7 |   ['cold demeanor', '{cold/attitude}'],
 8 |   ['he will plug his book', '{plug/sell}'],
 9 | ]
10 | test('match:', function (t) {
11 |   arr.forEach(function (a) {
12 |     const doc = nlp(a[0])
13 |     const msg = `'${(a[0] + "' ").padEnd(20, '.')}  - '${a[1]}'`
14 |     t.equal(doc.has(a[1]), true, here + msg)
15 |   })
16 |   t.end()
17 | })
18 | 


--------------------------------------------------------------------------------
/plugins/speed/src/lazyParse/lazyParse.js:
--------------------------------------------------------------------------------
 1 | import maybeMatch from './maybeMatch.js'
 2 | 
 3 | // tokenize first, then only tag sentences required
 4 | const lazyParse = function (input, reg) {
 5 |   let net = reg
 6 |   if (typeof reg === 'string') {
 7 |     net = this.buildNet([{ match: reg }])
 8 |   }
 9 |   const doc = this.tokenize(input)
10 |   const m = maybeMatch(doc, net)
11 |   if (m.found) {
12 |     m.compute(['index', 'tagger'])
13 |     return m.match(reg)
14 |   }
15 |   return doc.none()
16 | }
17 | export default lazyParse


--------------------------------------------------------------------------------
/src/1-one/lookup/plugin.js:
--------------------------------------------------------------------------------
 1 | import api from './api/index.js'
 2 | import compress from './api/buildTrie/compress.js'
 3 | import build from './api/buildTrie/index.js'
 4 | 
 5 | /** pre-compile a list of matches to lookup */
 6 | const lib = {
 7 |   /** turn an array or object into a compressed trie*/
 8 |   buildTrie: function (input) {
 9 |     const trie = build(input, this.world())
10 |     return compress(trie)
11 |   }
12 | }
13 | // add alias
14 | lib.compile = lib.buildTrie
15 | 
16 | export default {
17 |   api,
18 |   lib
19 | }
20 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/compute/tagger/3rd-pass/07-verb-type.js:
--------------------------------------------------------------------------------
 1 | const verbType = function (terms, i, model, world) {
 2 |   const setTag = world.methods.one.setTag
 3 |   const term = terms[i]
 4 |   const types = ['PastTense', 'PresentTense', 'Auxiliary', 'Modal', 'Particle']
 5 |   if (term.tags.has('Verb')) {
 6 |     const type = types.find(typ => term.tags.has(typ))
 7 |     // is it a bare #Verb tag?
 8 |     if (!type) {
 9 |       setTag([term], 'Infinitive', world, null, `2-verb-type''`)
10 |     }
11 |   }
12 | }
13 | export default verbType


--------------------------------------------------------------------------------
/plugins/speed/src/plugin.js:
--------------------------------------------------------------------------------
 1 | import streamFile from './stream/streamFile.js'
 2 | import keyPress from './keypress/index.js'
 3 | import workerPool from './workerPool/plugin.js'
 4 | import lazyParse from './lazyParse/plugin.js'
 5 | import version from './_version.js'
 6 | 
 7 | // combine all the plugins
 8 | const plugin = {
 9 |   lib: Object.assign({}, streamFile.lib, keyPress.lib, workerPool.lib, lazyParse.lib),
10 |   version: version
11 | }
12 | 
13 | export { streamFile, keyPress, workerPool, lazyParse }
14 | export default plugin
15 | 
16 | 


--------------------------------------------------------------------------------
/scripts/test/coverage.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | const exec = require('shelljs').exec
 3 | const codecov = '15039ad1-b495-48cd-b4a0-bcf124c9b318' //i don't really care if you steal this.
 4 | 
 5 | //let cmd=`./node_modules/.bin/nyc --reporter=text ./node_modules/.bin/tape ./test/**/*.test.js`
 6 | //run all the tests
 7 | const cmd = `./node_modules/.bin/c8 --reporter=text-lcov ./node_modules/.bin/tape ./tests/**/*.test.js > coverage.lcov && ./node_modules/.bin/codecov -t ${codecov}`
 8 | exec(cmd)
 9 | console.log('\n 🏃 done!')
10 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/clues/person-noun.js:
--------------------------------------------------------------------------------
 1 | import person from './_person.js'
 2 | import noun from './_noun.js'
 3 | 
 4 | // 'babling brook' vs 'brook sheilds'
 5 | 
 6 | const clue = {
 7 |   beforeTags: Object.assign({}, noun.beforeTags, person.beforeTags),
 8 |   afterTags: Object.assign({}, noun.afterTags, person.afterTags),
 9 |   beforeWords: Object.assign({}, noun.beforeWords, person.beforeWords, { i: 'Infinitive', we: 'Infinitive' }),
10 |   afterWords: Object.assign({}, noun.afterWords, person.afterWords),
11 | }
12 | export default clue


--------------------------------------------------------------------------------
/tests/two/misc/misc.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[two/misc] '
 4 | 
 5 | test('non-coercive-lex', function (t) {
 6 |   let doc = nlp('the Spencer Kelly', { kelly: 'Verb' })
 7 |   t.equal(doc.has('#Verb'), false, here + 'still a person')
 8 | 
 9 |   doc = nlp('i kelly', { kelly: 'Verb' })
10 |   t.equal(doc.has('#Verb'), true, here + 'now coerced')
11 | 
12 |   doc = nlp('the  Kelly', { kelly: 'Verb' })
13 |   t.equal(doc.has('#ProperNoun'), true, here + 'titlecase')
14 |   t.end()
15 | })
16 | 


--------------------------------------------------------------------------------
/plugins/_experiments/markdown/src/parse/index.js:
--------------------------------------------------------------------------------
 1 | import { fromMarkdown } from 'mdast-util-from-markdown'
 2 | import { gfmTable } from 'micromark-extension-gfm-table'
 3 | import { gfmTableFromMarkdown } from 'mdast-util-gfm-table'
 4 | // import { frontmatterFromMarkdown, frontmatterToMarkdown } from 'mdast-util-frontmatter'
 5 | 
 6 | const parseMd = function (md) {
 7 |   const tree = fromMarkdown(md, {
 8 |     extensions: [gfmTable],
 9 |     mdastExtensions: [gfmTableFromMarkdown]
10 |   })
11 |   return tree
12 | 
13 | }
14 | export default parseMd


--------------------------------------------------------------------------------
/plugins/speech/scratch.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console, no-unused-vars */
 2 | import nlp from '../../src/three.js'
 3 | 
 4 | import speechPlugin from './src/plugin.js'
 5 | nlp.plugin(speechPlugin)
 6 | // nlp.verbose(true)
 7 | // nlp.verbose('date')
 8 | 
 9 | let txt = ''
10 | txt = 'seventh millenium. white collar'
11 | 
12 | // let doc = nlp(txt).compute(['soundsLike', 'syllables'])
13 | // console.dir(doc.json()[0], { depth: 5 })
14 | 
15 | const doc = nlp('calgary')
16 | doc.compute('soundsLike')
17 | console.log(JSON.stringify(doc.json()[0], null, 2))


--------------------------------------------------------------------------------
/scripts/coreference/index.js:
--------------------------------------------------------------------------------
 1 | import corpus from 'nlp-corpus'
 2 | import nlp from '../../src/three.js'
 3 | 
 4 | const start = 80000
 5 | const list = corpus.all().slice(start, start + 1000)
 6 | list.forEach(str => {
 7 |   const doc = nlp(str)
 8 |   const out = {}
 9 |   doc.pronouns().forEach(p => {
10 |     const n = p.refersTo()
11 |     if (n.found) {
12 |       out[p.text('normal')] = n.text('normal')
13 |     }
14 |   })
15 |   if (Object.keys(out).length) {
16 |     // console.log(JSON.stringify([str, out], null, 2) + ',\n')
17 |   }
18 | })
19 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/methods/02-terms/03-ranges.js:
--------------------------------------------------------------------------------
 1 | // combine '2 - 5' like '2-5' is
 2 | // 2-4: 2, 4
 3 | const combineRanges = function (arr) {
 4 |   const startRange = /^[0-9]{1,4}(:[0-9][0-9])?([a-z]{1,2})? ?[-–—] ?$/
 5 |   const endRange = /^[0-9]{1,4}([a-z]{1,2})? ?$/
 6 |   for (let i = 0; i < arr.length - 1; i += 1) {
 7 |     if (arr[i + 1] && startRange.test(arr[i]) && endRange.test(arr[i + 1])) {
 8 |       arr[i] = arr[i] + arr[i + 1]
 9 |       arr[i + 1] = null
10 |     }
11 |   }
12 |   return arr
13 | }
14 | export default combineRanges
15 | 


--------------------------------------------------------------------------------
/plugins/_experiments/sentiment/src/escape.js:
--------------------------------------------------------------------------------
 1 | /**
 2 | * Function to escape characters in preparation for conversion to regex
 3 | * 
 4 | * @author Brian L
 5 | * @see {@link https://stackoverflow.com/a/7317957|Regex matching list of emoticons of various type}
 6 | *
 7 | * @param {string} text - The text to be escaped
 8 | *
 9 | * @returns {string} text - Input text with special regex characters escaped
10 | *  
11 | * 
12 | */
13 | function RegExpEscape(text) {
14 |   return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
15 | }
16 | export default RegExpEscape


--------------------------------------------------------------------------------
/src/1-one/typeahead/api.js:
--------------------------------------------------------------------------------
 1 | // assume any discovered prefixes
 2 | const autoFill = function () {
 3 |   const docs = this.docs
 4 |   if (docs.length === 0) {
 5 |     return this
 6 |   }
 7 |   const lastPhrase = docs[docs.length - 1] || []
 8 |   const term = lastPhrase[lastPhrase.length - 1]
 9 |   if (term.typeahead === true && term.machine) {
10 |     term.text = term.machine
11 |     term.normal = term.machine
12 |   }
13 |   return this
14 | }
15 | 
16 | const api = function (View) {
17 |   View.prototype.autoFill = autoFill
18 | }
19 | export default api


--------------------------------------------------------------------------------
/src/1-one/change/api/index.js:
--------------------------------------------------------------------------------
 1 | import caseFns from './case.js'
 2 | import insert from './insert.js'
 3 | import replace from './replace.js'
 4 | import remove from './remove.js'
 5 | import whitespace from './whitespace.js'
 6 | import sort from './sort.js'
 7 | import concat from './concat.js'
 8 | import harden from './harden.js'
 9 | 
10 | const methods = Object.assign({}, caseFns, insert, replace, remove, whitespace, sort, concat, harden)
11 | 
12 | const addAPI = function (View) {
13 |   Object.assign(View.prototype, methods)
14 | }
15 | export default addAPI
16 | 


--------------------------------------------------------------------------------
/src/3-three/numbers/plugin.js:
--------------------------------------------------------------------------------
 1 | import fractions from './fractions/api.js'
 2 | import numbers from './numbers/api.js'
 3 | 
 4 | const api = function (View) {
 5 |   fractions(View)
 6 |   numbers(View)
 7 | }
 8 | 
 9 | export default {
10 |   api,
11 | 
12 |   // add @greaterThan, @lessThan
13 |   // mutate: world => {
14 |   //   let termMethods = world.methods.one.termMethods
15 | 
16 |   //   termMethods.lessThan = function (term) {
17 |   //     return false //TODO: implement
18 |   //     // return /[aeiou]/.test(term.text)
19 |   //   }
20 |   // },
21 | }
22 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/one/01-tokenize/07-weekday.js:
--------------------------------------------------------------------------------
 1 | // pull-out 'thurs' from 'thurs next week'
 2 | const parseWeekday = function (doc) {
 3 |   const day = doc.match('#WeekDay')
 4 |   if (day.found && !doc.has('^#WeekDay$')) {
 5 |     // handle relative-day logic elsewhere.
 6 |     if (doc.has('(this|next|last) (next|upcoming|coming|past)? #WeekDay')) {
 7 |       return { result: null, m: doc.none() }
 8 |     }
 9 |     return { result: day.text('reduced'), m: day }
10 |   }
11 |   return { result: null, m: doc.none() }
12 | }
13 | export default parseWeekday
14 | 


--------------------------------------------------------------------------------
/plugins/wikipedia/src/plugin.js:
--------------------------------------------------------------------------------
 1 | import { unpack } from 'efrt'
 2 | import model from './_model.js'
 3 | 
 4 | const plugin = {
 5 |   api: function (View) {
 6 |     View.prototype.wikipedia = function () {
 7 |       return this.lookup(this.world.model.wpTree)
 8 |     }
 9 |   },
10 |   mutate: (world, nlp) => {
11 |     // console.log('unpacking list..')
12 |     const list = Object.keys(unpack(model))
13 |     // console.log(list.length.toLocaleString(), 'articles')
14 |     world.model.wpTree = nlp.buildTrie(list)
15 |   }
16 | }
17 | 
18 | export default plugin
19 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/normal/index.js:
--------------------------------------------------------------------------------
 1 | import cleanup from './01-cleanup.js'
 2 | // import doUnicode from './02-unicode.js'
 3 | import doAcronyms from './02-acronyms.js'
 4 | 
 5 | const normalize = function (term, world) {
 6 |   const killUnicode = world.methods.one.killUnicode
 7 |   // console.log(world.methods.one)
 8 |   let str = term.text || ''
 9 |   str = cleanup(str)
10 |   //(very) rough ASCII transliteration -  bjŏrk -> bjork
11 |   str = killUnicode(str, world)
12 |   str = doAcronyms(str)
13 |   term.normal = str
14 | }
15 | export default normalize
16 | 


--------------------------------------------------------------------------------
/src/3-three/misc/parentheses/index.js:
--------------------------------------------------------------------------------
 1 | import { find, strip } from './fns.js'
 2 | 
 3 | const api = function (View) {
 4 |   class Parentheses extends View {
 5 |     constructor(document, pointer, groups) {
 6 |       super(document, pointer, groups)
 7 |       this.viewType = 'Possessives'
 8 |     }
 9 |     strip() {
10 |       return strip(this)
11 |     }
12 |   }
13 | 
14 |   View.prototype.parentheses = function (n) {
15 |     let m = find(this)
16 |     m = m.getNth(n)
17 |     return new Parentheses(m.document, m.pointer)
18 |   }
19 | }
20 | export default api
21 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/range/_reverse.js:
--------------------------------------------------------------------------------
 1 | // somewhat-intellegent response to end-before-start situations
 2 | const reverseMaybe = function (obj) {
 3 |   const start = obj.start
 4 |   const end = obj.end
 5 |   if (start.d.isAfter(end.d)) {
 6 |     // wednesday to sunday -> move end up a week
 7 |     if (start.isWeekDay && end.isWeekDay) {
 8 |       obj.end.next()
 9 |       return obj
10 |     }
11 |     // else, reverse them
12 |     const tmp = start
13 |     obj.start = end
14 |     obj.end = tmp
15 |   }
16 |   return obj
17 | }
18 | export default reverseMaybe
19 | 


--------------------------------------------------------------------------------
/plugins/payload/index.d.ts:
--------------------------------------------------------------------------------
 1 | import nlp from 'compromise'
 2 | type View = ReturnType<typeof nlp>
 3 | 
 4 | type Payload = { match: View, val: any }
 5 | 
 6 | export interface PayloadMethods {
 7 |   /** return any data on our given matches */
 8 |   getPayloads(): Payload[]
 9 |   /** add data about our current matches */
10 |   addPayload(input:any): View
11 |   /** remove all payloads in match */
12 |   clearPayloads(): View
13 | }
14 | 
15 | /** extended compromise lib **/
16 | declare const nlpPayload: nlp.TypedPlugin<PayloadMethods>
17 | 
18 | export default nlpPayload
19 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/methods/index.js:
--------------------------------------------------------------------------------
 1 | import splitSentences from './01-sentences/index.js'
 2 | import splitTerms from './02-terms/index.js'
 3 | import splitWhitespace from './03-whitespace/index.js'
 4 | import killUnicode from './unicode.js'
 5 | import fromString from './parse.js'
 6 | import isSentence from './01-sentences/is-sentence.js'
 7 | 
 8 | export default {
 9 |   one: {
10 |     killUnicode,
11 |     tokenize: {
12 |       splitSentences,
13 |       isSentence,
14 |       splitTerms,
15 |       splitWhitespace,
16 |       fromString,
17 |     },
18 |   },
19 | }
20 | 


--------------------------------------------------------------------------------
/src/3-three/misc/quotations/index.js:
--------------------------------------------------------------------------------
 1 | import { find, strip } from './fns.js'
 2 | 
 3 | const api = function (View) {
 4 | 
 5 |   class Quotations extends View {
 6 |     constructor(document, pointer, groups) {
 7 |       super(document, pointer, groups)
 8 |       this.viewType = 'Possessives'
 9 |     }
10 |     strip() {
11 |       return strip(this)
12 |     }
13 |   }
14 | 
15 |   View.prototype.quotations = function (n) {
16 |     let m = find(this)
17 |     m = m.getNth(n)
18 |     return new Quotations(m.document, m.pointer)
19 |   }
20 | }
21 | export default api
22 | 


--------------------------------------------------------------------------------
/tests/four/misc.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | const here = '[four/misc]'
 4 | 
 5 | test('match-set', function (t) {
 6 |   const doc = nlp(`located in canada in the year 2019.`)
 7 |   doc.match('(found|located) [in] #Place', 0).sense('in/place')
 8 |   doc.match('[in] the? #Date', 0).sense('in/time')
 9 | 
10 |   t.ok(doc.has('{in} the year'), true, here + 'in-basic')
11 |   t.ok(doc.has('{in/time} the year'), true, here + 'in/time')
12 |   t.ok(doc.has('located {in/place} canada'), true, here + 'in/place')
13 |   t.end()
14 | })
15 | 


--------------------------------------------------------------------------------
/plugins/dates/src/model/regex.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   // 30sec
 3 |   [/^[0-9]+(min|sec|hr|d)s?$/i, 'Duration', '30min'],
 4 |   // 2012-06
 5 |   [/^[0-9]{4}-[0-9]{2}$/, 'Date', '2012-06'],
 6 |   // 13h30
 7 |   [/^[0-9]{2}h[0-9]{2}$/i, 'Time', '13h30'],
 8 |   // @4:30
 9 |   [/^@[0-9]+:[0-9]{2}$/, 'Time', '@5:30'],
10 |   // @4pm
11 |   [/^@[1-9]+(am|pm)$/, 'Time', '@5pm'],
12 |   // 03/02
13 |   [/^(?:0[1-9]|[12]\d|3[01])\/(?:0[1-9]|[12]\d|3[01])$/, 'Date', '03/02'],
14 |   // iso-time
15 |   // [/^[0-9]{4}[:-][0-9]{2}[:-][0-9]{2}T[0-9]/i, 'Time', 'iso-time-tag']
16 | 
17 | ]
18 | 


--------------------------------------------------------------------------------
/plugins/speed/index.d.ts:
--------------------------------------------------------------------------------
 1 | import nlp from 'compromise'
 2 | // type View = ReturnType<typeof nlp>
 3 | 
 4 | export interface SpeedMethods {
 5 |   /** parse sentences of a text in parallel */
 6 |   // workerPool(text: string, match: any): View
 7 |   /** parse text without loading in memory */
 8 |   // streamFile(file: string, filter: () => {}): Promise<View>;
 9 |   /** cache pre-parsed text */
10 |   // keyPress(text: string): View
11 | }
12 | 
13 | /** extended compromise lib **/
14 | declare const nlpSpeed: nlp.TypedPlugin<SpeedMethods>
15 | 
16 | export default nlpSpeed
17 | 
18 | 


--------------------------------------------------------------------------------
/src/3-three/misc/plugin.js:
--------------------------------------------------------------------------------
 1 | import addAcronyms from './acronyms/index.js'
 2 | import addParentheses from './parentheses/index.js'
 3 | import addPossessives from './possessives/index.js'
 4 | import addQuotations from './quotations/index.js'
 5 | import addSelections from './selections/index.js'
 6 | import addSlashes from './slashes/index.js'
 7 | 
 8 | export default {
 9 |   api: function (View) {
10 |     addAcronyms(View)
11 |     addParentheses(View)
12 |     addPossessives(View)
13 |     addQuotations(View)
14 |     addSelections(View)
15 |     addSlashes(View)
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/data/lexicon/misc/determiners.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'a',
 3 |   'an',
 4 |   'another',
 5 |   'any',
 6 |   'both',
 7 |   'each',
 8 |   'else',
 9 |   'every',
10 |   'few',
11 |   'least',
12 |   'much',
13 |   'neither',
14 |   'own',
15 |   'plenty',
16 |   'some',
17 |   'several',
18 |   'that',
19 |   'the',
20 |   'these',
21 |   'this',
22 |   'those',
23 |   'various',
24 |   'whatever',
25 |   'whichever',
26 |   'tha',
27 |   //some other languages (what could go wrong?)
28 |   // 'la',
29 |   'le',
30 |   'les',
31 |   'des',
32 |   'de',
33 |   'du',
34 |   'el',
35 | ]
36 | 


--------------------------------------------------------------------------------
/scripts/perf/_fetch.js:
--------------------------------------------------------------------------------
 1 | import https from 'https'
 2 | 
 3 | const fetch = function (url) {
 4 |   return new Promise((resolve, reject) => {
 5 |     https
 6 |       .get(url, resp => {
 7 |         let data = ''
 8 |         resp.on('data', chunk => {
 9 |           data += chunk
10 |         })
11 |         resp.on('end', () => {
12 |           resolve(JSON.parse(data))
13 |         })
14 |       })
15 |       .on('error', err => {
16 |         console.error('Error: ' + err.message) // eslint-disable-line
17 |         reject()
18 |       })
19 |   })
20 | }
21 | 
22 | export default fetch
23 | 


--------------------------------------------------------------------------------
/src/1-one/match/api/index.js:
--------------------------------------------------------------------------------
 1 | import match from './match.js'
 2 | import lookaround from './lookaround.js'
 3 | import split from './split.js'
 4 | import join from './join.js'
 5 | 
 6 | const methods = Object.assign({}, match, lookaround, split, join)
 7 | // aliases
 8 | methods.lookBehind = methods.before
 9 | methods.lookBefore = methods.before
10 | 
11 | methods.lookAhead = methods.after
12 | methods.lookAfter = methods.after
13 | 
14 | methods.notIf = methods.ifNo
15 | const matchAPI = function (View) {
16 |   Object.assign(View.prototype, methods)
17 | }
18 | export default matchAPI
19 | 


--------------------------------------------------------------------------------
/src/3-three/topics/topics.js:
--------------------------------------------------------------------------------
 1 | 
 2 | //combine them with .topics() method
 3 | const find = function (n) {
 4 |   const r = this.clauses()
 5 |   // Find people, places, and organizations
 6 |   let m = r.people()
 7 |   m = m.concat(r.places())
 8 |   m = m.concat(r.organizations())
 9 |   m = m.not('(someone|man|woman|mother|brother|sister|father)')
10 |   //return them to normal ordering
11 |   m = m.sort('seq')
12 |   // m = m.unique()
13 |   m = m.getNth(n)
14 |   return m
15 | }
16 | 
17 | const api = function (View) {
18 |   View.prototype.topics = find
19 | }
20 | export default api
21 | 


--------------------------------------------------------------------------------
/data/lexicon/dates/durations.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'century',
 3 |   'centuries',
 4 |   'day',
 5 |   'days',
 6 |   'decade',
 7 |   'decades',
 8 |   'hour',
 9 |   'hours',
10 |   'hr',
11 |   'hrs',
12 |   'millisecond',
13 |   'milliseconds',
14 |   'minute',
15 |   'minutes',
16 |   // 'min',
17 |   'month',
18 |   'months',
19 |   'sec',
20 |   'secs',
21 |   // 'week end',
22 |   'weekend',
23 |   'week',
24 |   'weeks',
25 |   'wk',
26 |   'wks',
27 |   'year',
28 |   'years',
29 |   'yr',
30 |   'yrs',
31 |   'quarter',
32 |   // 'quarters',
33 |   'qtr',
34 |   'season',
35 | ]
36 | 


--------------------------------------------------------------------------------
/data/lexicon/numbers/ordinals.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'zeroth',
 3 |   'first',
 4 |   'second',
 5 |   'third',
 6 |   'fourth',
 7 |   'fifth',
 8 |   'sixth',
 9 |   'seventh',
10 |   'eighth',
11 |   'ninth',
12 |   'tenth',
13 |   'eleventh',
14 |   'twelfth',
15 |   'thirteenth',
16 |   'fourteenth',
17 |   'fifteenth',
18 |   'sixteenth',
19 |   'seventeenth',
20 |   'eighteenth',
21 |   'nineteenth',
22 |   'twentieth',
23 |   'thirtieth',
24 |   'fortieth',
25 |   'fourtieth',
26 |   'fiftieth',
27 |   'sixtieth',
28 |   'seventieth',
29 |   'eightieth',
30 |   'ninetieth',
31 | ]
32 | 


--------------------------------------------------------------------------------
/src/1-one/match/methods/match/03-notIf.js:
--------------------------------------------------------------------------------
 1 | import fromHere from './02-from-here.js'
 2 | 
 3 | const notIf = function (results, not, docs) {
 4 |   results = results.filter(res => {
 5 |     const [n, start, end] = res.pointer
 6 |     const terms = docs[n].slice(start, end)
 7 |     for (let i = 0; i < terms.length; i += 1) {
 8 |       const slice = terms.slice(i)
 9 |       const found = fromHere(slice, not, i, terms.length)
10 |       if (found !== null) {
11 |         return false
12 |       }
13 |     }
14 |     return true
15 |   })
16 |   return results
17 | }
18 | 
19 | export default notIf


--------------------------------------------------------------------------------
/src/2-two/postTagger/model/verbs/adj-gerund.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   // that were growing
 3 |   { match: '(that|which) were [%Adj|Gerund%]', group: 0, tag: 'Gerund', reason: 'that-were-growing' },
 4 |   // was dissapointing
 5 |   // { match: '#Copula [%Adj|Gerund%]$', group: 0, tag: 'Adjective', reason: 'was-disappointing$' },
 6 | 
 7 |   // repairing crubling roads
 8 |   { match: '#Gerund [#Gerund] #Plural', group: 0, tag: 'Adjective', reason: 'hard-working-fam' },
 9 | 
10 |   // { match: '(that|which) were [%Adj|Gerund%]', group: 0, tag: 'Gerund', reason: 'that-were-growing' },
11 | ]
12 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/transform/adjectives/conjugate/lib.js:
--------------------------------------------------------------------------------
 1 | //sweep-through all suffixes
 2 | const suffixLoop = function (str = '', suffixes = []) {
 3 |   const len = str.length
 4 |   const max = len <= 6 ? len - 1 : 6
 5 |   for (let i = max; i >= 1; i -= 1) {
 6 |     const suffix = str.substring(len - i, str.length)
 7 |     if (suffixes[suffix.length].hasOwnProperty(suffix) === true) {
 8 |       const pre = str.slice(0, len - i)
 9 |       const post = suffixes[suffix.length][suffix]
10 |       return pre + post
11 |     }
12 |   }
13 |   return null
14 | }
15 | export default suffixLoop


--------------------------------------------------------------------------------
/src/1-one/pointers/api/lib/difference.js:
--------------------------------------------------------------------------------
 1 | import splitAll from './split.js'
 2 | 
 3 | const subtract = function (refs, not) {
 4 |   const res = []
 5 |   const found = splitAll(refs, not)
 6 |   found.forEach(o => {
 7 |     if (o.passthrough) {
 8 |       res.push(o.passthrough)
 9 |     }
10 |     if (o.before) {
11 |       res.push(o.before)
12 |     }
13 |     if (o.after) {
14 |       res.push(o.after)
15 |     }
16 |   })
17 |   return res
18 | }
19 | export default subtract
20 | 
21 | // console.log(subtract([[0, 0, 2]], [[0, 0, 1]]))
22 | // console.log(subtract([[0, 0, 2]], [[0, 1, 2]]))
23 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/index.js:
--------------------------------------------------------------------------------
 1 | import aliases from './aliases.js'
 2 | import { lexicon, abbreviations } from './lexicon.js'
 3 | import prefixes from './prefixes.js'
 4 | import suffixes from './suffixes.js'
 5 | import unicode from './unicode.js'
 6 | import { prePunctuation, postPunctuation, emoticons } from './punctuation.js'
 7 | 
 8 | export default {
 9 |   one: {
10 |     aliases,
11 |     abbreviations,
12 |     prefixes,
13 |     suffixes,
14 |     prePunctuation,
15 |     postPunctuation,
16 |     lexicon, //give this one forward
17 |     unicode,
18 |     emoticons
19 |   },
20 | }
21 | 


--------------------------------------------------------------------------------
/src/2-two/postTagger/model/numbers/money.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   { match: '#Money and #Money #Currency?', tag: 'Money', reason: 'money-and-money' },
 3 |   // 6 dollars and 5 cents
 4 |   { match: '#Value #Currency [and] #Value (cents|ore|centavos|sens)', group: 0, tag: 'money', reason: 'and-5-cents' },
 5 |   // maybe currencies
 6 |   { match: '#Value (mark|rand|won|rub|ore)', tag: '#Money #Currency', reason: '4-mark' },
 7 |   // 3 pounds
 8 |   { match: 'a pound', tag: '#Money #Unit', reason: 'a-pound' },
 9 |   { match: '#Value (pound|pounds)', tag: '#Money #Unit', reason: '4-pounds' },
10 | ]
11 | 


--------------------------------------------------------------------------------
/src/3-three/topics/people/parse.js:
--------------------------------------------------------------------------------
 1 | const parse = function (m) {
 2 |   const res = {}
 3 |   res.firstName = m.match('#FirstName+')
 4 |   res.lastName = m.match('#LastName+')
 5 |   res.honorific = m.match('#Honorific+')
 6 | 
 7 |   const last = res.lastName
 8 |   const first = res.firstName
 9 |   if (!first.found || !last.found) {
10 |     // let p = m.clone()
11 |     // assume 'Mr Springer' is a last-name
12 |     if (!first.found && !last.found && m.has('^#Honorific .$')) {
13 |       res.lastName = m.match('.$')
14 |       return res
15 |     }
16 |   }
17 |   return res
18 | }
19 | export default parse
20 | 


--------------------------------------------------------------------------------
/src/1-one/sweep/methods/tagger/canBe.js:
--------------------------------------------------------------------------------
 1 | // is this tag consistent with the tags they already have?
 2 | const canBe = function (terms, tag, model) {
 3 |   const tagSet = model.one.tagSet
 4 |   if (!tagSet.hasOwnProperty(tag)) {
 5 |     return true
 6 |   }
 7 |   const not = tagSet[tag].not || []
 8 |   for (let i = 0; i < terms.length; i += 1) {
 9 |     const term = terms[i]
10 |     for (let k = 0; k < not.length; k += 1) {
11 |       if (term.tags.has(not[k]) === true) {
12 |         return false //found a tag conflict - bail!
13 |       }
14 |     }
15 |   }
16 |   return true
17 | }
18 | export default canBe
19 | 


--------------------------------------------------------------------------------
/src/1-one/pointers/methods/index.js:
--------------------------------------------------------------------------------
 1 | import { indexN } from '../api/lib/_lib.js'
 2 | import splitAll from '../api/lib/split.js'
 3 | import getDoc from './getDoc.js'
 4 | 
 5 | // flat list of terms from nested document
 6 | const termList = function (docs) {
 7 |   const arr = []
 8 |   for (let i = 0; i < docs.length; i += 1) {
 9 |     for (let t = 0; t < docs[i].length; t += 1) {
10 |       arr.push(docs[i][t])
11 |     }
12 |   }
13 |   return arr
14 | }
15 | 
16 | export default {
17 |   one: {
18 |     termList,
19 |     getDoc,
20 |     pointer: {
21 |       indexN,
22 |       splitAll,
23 |     }
24 |   },
25 | }
26 | 


--------------------------------------------------------------------------------
/plugins/_experiments/compress/src/index.js:
--------------------------------------------------------------------------------
 1 | import lz from './lz.js'
 2 | import fs from 'fs'
 3 | 
 4 | let string = fs.readFileSync('../../../plugins/speed/tests/files/freshPrince.txt').toString()
 5 | // string = "This is my compression test.";
 6 | 
 7 | 
 8 | // console.log("Size of sample is: " + string.length, '\n\n');
 9 | const compressed = lz.compress(string);
10 | // console.log(string)
11 | // console.log(compressed)
12 | // console.log("\n\nSize of compressed sample is: " + compressed.length);
13 | string = lz.decompress(compressed);
14 | 
15 | 
16 | 
17 | /*
18 | [
19 |   text,
20 |   tag,
21 |   post,
22 |   pre
23 | */


--------------------------------------------------------------------------------
/src/1-one/freeze/debug.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | const blue = str => '\x1b[34m' + str + '\x1b[0m'
 3 | const dim = str => '\x1b[3m\x1b[2m' + str + '\x1b[0m'
 4 | 
 5 | const debug = function (view) {
 6 |   view.docs.forEach(terms => {
 7 |     console.log(blue('\n  ┌─────────'))
 8 |     terms.forEach(t => {
 9 |       let str = `  ${dim('│')}  `
10 |       const txt = t.implicit || t.text || '-'
11 |       if (t.frozen === true) {
12 |         str += `${blue(txt)} ❄️`
13 |       } else {
14 |         str += dim(txt)
15 |       }
16 |       console.log(str)
17 |     })
18 |   })
19 | }
20 | export default debug
21 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/transform/verbs/getTense/index.js:
--------------------------------------------------------------------------------
 1 | import guess from './_guess.js'
 2 | 
 3 | /** it helps to know what we're conjugating from */
 4 | const getTense = function (str) {
 5 |   const three = str.substring(str.length - 3)
 6 |   if (guess.hasOwnProperty(three) === true) {
 7 |     return guess[three]
 8 |   }
 9 |   const two = str.substring(str.length - 2)
10 |   if (guess.hasOwnProperty(two) === true) {
11 |     return guess[two]
12 |   }
13 |   const one = str.substring(str.length - 1)
14 |   if (one === 's') {
15 |     return 'PresentTense'
16 |   }
17 |   return null
18 | }
19 | export default getTense


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/parse/toNumber/validate.js:
--------------------------------------------------------------------------------
 1 | import words from './data.js'
 2 | 
 3 | //prevent things like 'fifteen ten', and 'five sixty'
 4 | const isValid = (w, has) => {
 5 |   if (words.ones.hasOwnProperty(w)) {
 6 |     if (has.ones || has.teens) {
 7 |       return false
 8 |     }
 9 |   } else if (words.teens.hasOwnProperty(w)) {
10 |     if (has.ones || has.teens || has.tens) {
11 |       return false
12 |     }
13 |   } else if (words.tens.hasOwnProperty(w)) {
14 |     if (has.ones || has.teens || has.tens) {
15 |       return false
16 |     }
17 |   }
18 |   return true
19 | }
20 | export default isValid
21 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/index.js:
--------------------------------------------------------------------------------
 1 | import parseRange from './range/index.js'
 2 | import spacetime from 'spacetime'
 3 | import normalize from './normalize.js'
 4 | 
 5 | 
 6 | 
 7 | const parse = function (doc, context) {
 8 |   // normalize context
 9 |   context = context || {}
10 |   if (context.timezone === false) {
11 |     context.timezone = 'UTC'
12 |   }
13 |   context.today = context.today || spacetime.now(context.timezone)
14 |   context.today = spacetime(context.today, context.timezone)
15 | 
16 |   doc = normalize(doc)
17 | 
18 |   const res = parseRange(doc, context)
19 |   return res
20 | }
21 | export default parse
22 | 


--------------------------------------------------------------------------------
/src/2-two/lazy/maybeMatch.js:
--------------------------------------------------------------------------------
 1 | const getWords = function (net) {
 2 |   return Object.keys(net.hooks).filter(w => !w.startsWith('#') && !w.startsWith('%'))
 3 | }
 4 | 
 5 | const maybeMatch = function (doc, net) {
 6 |   // must have *atleast* one of these words
 7 |   const words = getWords(net)
 8 |   if (words.length === 0) {
 9 |     return doc
10 |   }
11 |   if (!doc._cache) {
12 |     doc.cache()
13 |   }
14 |   const cache = doc._cache
15 |   // return sentences that have one of our needed words
16 |   return doc.filter((_m, i) => {
17 |     return words.some(str => cache[i].has(str))
18 |   })
19 | }
20 | export default maybeMatch


--------------------------------------------------------------------------------
/src/3-three/nouns/api/toSingular.js:
--------------------------------------------------------------------------------
 1 | const keep = { tags: true }
 2 | 
 3 | const nounToSingular = function (m, parsed) {
 4 |   // already singular?
 5 |   if (parsed.isPlural === false) {
 6 |     return m
 7 |   }
 8 |   const { methods, model } = m.world
 9 |   const { toSingular } = methods.two.transform.noun
10 |   // inflect the root noun
11 |   const str = parsed.root.text('normal')
12 |   const single = toSingular(str, model)
13 |   m.replace(parsed.root, single, keep).tag('Singular', 'toPlural')
14 |   // should we change the determiner/article?
15 |   // m.debug()
16 |   return m
17 | }
18 | export default nounToSingular
19 | 


--------------------------------------------------------------------------------
/data/lexicon/numbers/cardinals.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'zero',
 3 |   'one',
 4 |   'two',
 5 |   'three',
 6 |   'four',
 7 |   'five',
 8 |   'six',
 9 |   'seven',
10 |   'eight',
11 |   'nine',
12 |   'ten',
13 |   'eleven',
14 |   'twelve',
15 |   'thirteen',
16 |   'fourteen',
17 |   'fifteen',
18 |   'sixteen',
19 |   'seventeen',
20 |   'eighteen',
21 |   'nineteen',
22 |   'twenty',
23 |   'thirty',
24 |   'forty',
25 |   'fourty',
26 |   'fifty',
27 |   'sixty',
28 |   'seventy',
29 |   'eighty',
30 |   'ninety',
31 |   // plural-multiples
32 |   'thousands',
33 |   'millions',
34 |   'billions',
35 |   'trillions',
36 | ]
37 | 


--------------------------------------------------------------------------------
/src/2-two/postTagger/api.js:
--------------------------------------------------------------------------------
 1 | const round = n => Math.round(n * 100) / 100
 2 | 
 3 | export default function (View) {
 4 |   // average tagger score
 5 |   View.prototype.confidence = function () {
 6 |     let sum = 0
 7 |     let count = 0
 8 |     this.docs.forEach(terms => {
 9 |       terms.forEach(term => {
10 |         count += 1
11 |         sum += term.confidence || 1
12 |       })
13 |     })
14 |     if (count === 0) {
15 |       return 1
16 |     }
17 |     return round(sum / count)
18 |   }
19 | 
20 |   // (re-) run the POS-tagger
21 |   View.prototype.tagger = function () {
22 |     return this.compute(['tagger'])
23 |   }
24 | }


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/one/01-tokenize/05-section.js:
--------------------------------------------------------------------------------
 1 | // 'start of october', 'middle of june 1st'
 2 | const parseSection = function (doc) {
 3 |   // start of 2019
 4 |   let m = doc.match('[(start|beginning) of] .', 0)
 5 |   if (m.found) {
 6 |     return { result: 'start', m }
 7 |   }
 8 |   // end of 2019
 9 |   m = doc.match('[end of] .', 0)
10 |   if (m.found) {
11 |     return { result: 'end', m }
12 |   }
13 |   // middle of 2019
14 |   m = doc.match('[(middle|midpoint|center) of] .', 0)
15 |   if (m.found) {
16 |     return { result: 'middle', m }
17 |   }
18 |   return { result: null, m }
19 | }
20 | export default parseSection
21 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/compute/tagger/2nd-pass/00-tagSwitch.js:
--------------------------------------------------------------------------------
 1 | const prefix = /^(under|over|mis|re|un|dis|semi)-?/
 2 | 
 3 | const tagSwitch = function (terms, i, model) {
 4 |   const switches = model.two.switches
 5 |   const term = terms[i]
 6 |   if (switches.hasOwnProperty(term.normal)) {
 7 |     term.switch = switches[term.normal]
 8 |     return
 9 |   }
10 |   // support 'restrike' -> 'strike'
11 |   if (prefix.test(term.normal)) {
12 |     const stem = term.normal.replace(prefix, '')
13 |     if (stem.length > 3 && switches.hasOwnProperty(stem)) {
14 |       term.switch = switches[stem]
15 |     }
16 |   }
17 | }
18 | export default tagSwitch


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/clues/person-verb.js:
--------------------------------------------------------------------------------
 1 | import person from './_person.js'
 2 | import verb from './_verb.js'
 3 | import noun from './_noun.js'
 4 | 
 5 | // 'rob the store'   -  'rob lowe'
 6 | // can be a noun too - 'losing hope'
 7 | const clues = {
 8 |   beforeTags: Object.assign({}, noun.beforeTags, person.beforeTags, verb.beforeTags),
 9 |   afterTags: Object.assign({}, noun.afterTags, person.afterTags, verb.afterTags),
10 |   beforeWords: Object.assign({}, noun.beforeWords, person.beforeWords, verb.beforeWords),
11 |   afterWords: Object.assign({}, noun.afterWords, person.afterWords, verb.afterWords),
12 | }
13 | export default clues


--------------------------------------------------------------------------------
/src/3-three/coreference/compute/lib.js:
--------------------------------------------------------------------------------
 1 | 
 2 | // borrow a reference from another pronoun
 3 | // 'mike is tall, [he] climbs and [he] swims'
 4 | const findChained = function (want, s) {
 5 |   const m = s.match(want)
 6 |   if (m.found) {
 7 |     const ref = m.pronouns().refersTo()
 8 |     if (ref.found) {
 9 |       return ref
10 |     }
11 |   }
12 |   return s.none()
13 | }
14 | 
15 | const prevSentence = function (m) {
16 |   if (!m.found) {
17 |     return m
18 |   }
19 |   const [n] = m.fullPointer[0]
20 |   if (n && n > 0) {
21 |     return m.update([[n - 1]])
22 |   }
23 |   return m.none()
24 | }
25 | export { prevSentence, findChained }


--------------------------------------------------------------------------------
/src/3-three/topics/places/find.js:
--------------------------------------------------------------------------------
 1 | const find = function (doc) {
 2 |   let m = doc.match('(#Place|#Address)+')
 3 | 
 4 |   // split all commas except for 'paris, france'
 5 |   let splits = m.match('@hasComma')
 6 |   splits = splits.filter(c => {
 7 |     // split 'europe, china'
 8 |     if (c.has('(asia|africa|europe|america)$')) {
 9 |       return true
10 |     }
11 |     // don't split 'paris, france'
12 |     if (c.has('(#City|#Region|#ProperNoun)$') && c.after('^(#Country|#Region)').found) {
13 |       return false
14 |     }
15 |     return true
16 |   })
17 |   m = m.splitAfter(splits)
18 |   return m
19 | }
20 | export default find
21 | 


--------------------------------------------------------------------------------
/plugins/speed/src/lazyParse/maybeMatch.js:
--------------------------------------------------------------------------------
 1 | const getWords = function (net) {
 2 |   return Object.keys(net.hooks).filter(w => !w.startsWith('#') && !w.startsWith('%'))
 3 | }
 4 | 
 5 | const maybeMatch = function (doc, net) {
 6 |   // must have *atleast* one of these words
 7 |   const words = getWords(net)
 8 |   if (words.length === 0) {
 9 |     return doc
10 |   }
11 |   if (!doc._cache) {
12 |     doc.cache()
13 |   }
14 |   const cache = doc._cache
15 |   // return sentences that have one of our needed words
16 |   return doc.filter((_m, i) => {
17 |     return words.some(str => cache[i].has(str))
18 |   })
19 | }
20 | export default maybeMatch


--------------------------------------------------------------------------------
/src/3-three/verbs/api/parse/root.js:
--------------------------------------------------------------------------------
 1 | // find the main verb, from a verb phrase
 2 | const getMain = function (vb) {
 3 |   let root = vb
 4 |   if (vb.wordCount() > 1) {
 5 |     root = vb.not('(#Negative|#Auxiliary|#Modal|#Adverb|#Prefix)')
 6 |   }
 7 |   // fallback to just the last word, sometimes
 8 |   if (root.length > 1 && !root.has('#Phrasal #Particle')) {
 9 |     root = root.last()
10 |   }
11 |   // look for more modals
12 |   root = root.not('(want|wants|wanted) to')
13 | 
14 |   // fallback
15 |   if (!root.found) {
16 |     root = vb.not('#Negative')
17 |     return root
18 |   }
19 |   return root
20 | }
21 | export default getMain
22 | 


--------------------------------------------------------------------------------
/tests/one/match/punctuation-match.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[one/punctuation-match] '
 4 | 
 5 | test('punctuation-match :', function (t) {
 6 |   let regs = [{ word: 'may' }, { pre: '(' }]
 7 |   let m = nlp('may, (2019) foo').match(regs)
 8 |   t.equal(m.text(), 'may, (2019)', here + '(pre')
 9 | 
10 |   regs = [{ word: 'may' }, { post: ')' }]
11 |   m = nlp('may, (2019) foo').match(regs)
12 |   t.equal(m.text(), 'may, (2019)', here + 'post)')
13 | 
14 |   regs = [{ post: ',' }]
15 |   m = nlp('may, (2019) foo').match(regs)
16 |   t.equal(m.text(), 'may,', here + 'post,')
17 |   t.end()
18 | })


--------------------------------------------------------------------------------
/types/view/two.d.ts:
--------------------------------------------------------------------------------
 1 | import type View from './one.d.ts'
 2 | 
 3 | interface Two extends View {
 4 |   /** return any multi-word terms, like "didn't"  */
 5 |   contractions: (n?: number) => Contractions
 6 |   /** contract words that can combine, like "did not" */
 7 |   contract: () => View
 8 |   /** Average measure of tag confidence */
 9 |   confidence: () => number
10 |   /** smart-replace root forms */
11 |   swap: (fromLemma: string, toLemma: string, guardTag?: string) => View
12 | }
13 | 
14 | 
15 | interface Contractions extends View {
16 |   /** turn "i've" into "i have" */
17 |   expand: () => View
18 | }
19 | 
20 | export default Two
21 | 


--------------------------------------------------------------------------------
/tests/three/redact.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | const here = '[three/redact] '
 4 | 
 5 | test('redact:', function (t) {
 6 |   const arr = [
 7 |     [`spencer from 234 Main st at 423-3242 and spencer@gmail.com.`, '██████████ from ██████████ at ███████ and ██████████.'],
 8 |     [`in Toronto, Canada!`, `in ██████████!`],
 9 |     [`with Dr. Miller and his pal Joe`, `with ██████████ and his pal ██████████`],
10 |   ]
11 |   arr.forEach(a => {
12 |     const [str, want] = a
13 |     const have = nlp(str).redact().text()
14 |     t.equal(have + '|', want + '|', here + ' - ' + str)
15 |   })
16 |   t.end()
17 | })
18 | 


--------------------------------------------------------------------------------
/src/1-one/output/methods/debug/_color.js:
--------------------------------------------------------------------------------
 1 | // https://stackoverflow.com/questions/9781218/how-to-change-node-jss-console-font-color
 2 | const reset = '\x1b[0m'
 3 | 
 4 | //cheaper than requiring chalk
 5 | const cli = {
 6 |   green: str => '\x1b[32m' + str + reset,
 7 |   red: str => '\x1b[31m' + str + reset,
 8 |   blue: str => '\x1b[34m' + str + reset,
 9 |   magenta: str => '\x1b[35m' + str + reset,
10 |   cyan: str => '\x1b[36m' + str + reset,
11 |   yellow: str => '\x1b[33m' + str + reset,
12 |   black: str => '\x1b[30m' + str + reset,
13 |   dim: str => '\x1b[2m' + str + reset,
14 |   i: str => '\x1b[3m' + str + reset,
15 | }
16 | export default cli
17 | 


--------------------------------------------------------------------------------
/plugins/dates/src/model/words/index.js:
--------------------------------------------------------------------------------
 1 | import timezones from './timezones.js'
 2 | import dates from './dates.js'
 3 | import durations from './durations.js'
 4 | import holidays from './holidays.js'
 5 | import times from './times.js'
 6 | 
 7 | const lex = {
 8 |   'a couple': 'Value',
 9 |   thur: 'WeekDay',
10 |   thurs: 'WeekDay',
11 | }
12 | const add = function (arr, tag) {
13 |   arr.forEach(str => {
14 |     lex[str] = tag
15 |   })
16 | }
17 | add(Object.keys(timezones), 'Timezone')
18 | add(dates, 'Date')
19 | add(durations, 'Duration')
20 | add(holidays, 'Holiday')
21 | add(times, 'Time')
22 | // console.log(lex['april fools'])
23 | export default lex
24 | 


--------------------------------------------------------------------------------
/src/1-one/output/api/debug.js:
--------------------------------------------------------------------------------
 1 | const isClientSide = () => typeof window !== 'undefined' && window.document
 2 | 
 3 | //output some helpful stuff to the console
 4 | const debug = function (fmt) {
 5 |   const debugMethods = this.methods.one.debug || {}
 6 |   // see if method name exists
 7 |   if (fmt && debugMethods.hasOwnProperty(fmt)) {
 8 |     debugMethods[fmt](this)
 9 |     return this
10 |   }
11 |   // log default client-side view
12 |   if (isClientSide()) {
13 |     debugMethods.clientSide(this)
14 |     return this
15 |   }
16 |   // else, show regular server-side tags view
17 |   debugMethods.tags(this)
18 |   return this
19 | }
20 | export default debug
21 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/offset.js:
--------------------------------------------------------------------------------
 1 | // get all character startings in doc
 2 | const offset = function (view) {
 3 |   let elapsed = 0
 4 |   let index = 0
 5 |   const docs = view.document //start from the actual-top
 6 |   for (let i = 0; i < docs.length; i += 1) {
 7 |     for (let t = 0; t < docs[i].length; t += 1) {
 8 |       const term = docs[i][t]
 9 |       term.offset = {
10 |         index: index,
11 |         start: elapsed + term.pre.length,
12 |         length: term.text.length,
13 |       }
14 |       elapsed += term.pre.length + term.text.length + term.post.length
15 |       index += 1
16 |     }
17 |   }
18 | }
19 | 
20 | 
21 | export default offset
22 | 


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/_toString.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * turn big numbers, like 2.3e+22, into a string with a ton of trailing 0's
 3 |  * */
 4 | const numToString = function (n) {
 5 |   if (n < 1000000) {
 6 |     return String(n)
 7 |   }
 8 |   let str
 9 |   if (typeof n === 'number') {
10 |     str = n.toFixed(0)
11 |   } else {
12 |     str = n
13 |   }
14 |   if (str.indexOf('e+') === -1) {
15 |     return str
16 |   }
17 |   return str
18 |     .replace('.', '')
19 |     .split('e+')
20 |     .reduce(function (p, b) {
21 |       return p + Array(b - p.length + 2).join(0)
22 |     })
23 | }
24 | export default numToString
25 | // console.log(numToString(2.5e+22));
26 | 


--------------------------------------------------------------------------------
/src/1-one/contraction-one/compute/contractions/number-unit.js:
--------------------------------------------------------------------------------
 1 | const numUnit = /^([+-]?[0-9][.,0-9]*)([a-z°²³µ/]+)$/ //(must be lowercase)
 2 | 
 3 | const numberUnit = function (terms, i, world) {
 4 |   const notUnit = world.model.one.numberSuffixes || {}
 5 |   const term = terms[i]
 6 |   const parts = term.text.match(numUnit)
 7 |   if (parts !== null) {
 8 |     // is it a recognized unit, like 'km'?
 9 |     const unit = parts[2].toLowerCase().trim()
10 |     // don't split '3rd'
11 |     if (notUnit.hasOwnProperty(unit)) {
12 |       return null
13 |     }
14 |     return [parts[1], unit] //split it
15 |   }
16 |   return null
17 | }
18 | export default numberUnit
19 | 


--------------------------------------------------------------------------------
/src/1-one/output/methods/debug/client-side.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | const logClientSide = function (view) {
 3 |   console.log('%c -=-=- ', 'background-color:#6699cc;')
 4 |   view.forEach(m => {
 5 |     console.groupCollapsed(m.text())
 6 |     const terms = m.docs[0]
 7 |     const out = terms.map(t => {
 8 |       let text = t.text || '-'
 9 |       if (t.implicit) {
10 |         text = '[' + t.implicit + ']'
11 |       }
12 |       const tags = '[' + Array.from(t.tags).join(', ') + ']'
13 |       return { text, tags }
14 |     })
15 |     console.table(out, ['text', 'tags'])
16 |     console.groupEnd()
17 |   })
18 | }
19 | export default logClientSide
20 | 


--------------------------------------------------------------------------------
/src/3-three/chunker/compute/index.js:
--------------------------------------------------------------------------------
 1 | import easyMode from './01-easy.js'
 2 | import byNeighbour from './02-neighbours.js'
 3 | import matcher from './03-matcher.js'
 4 | import fallback from './04-fallback.js'
 5 | import fixUp from './05-fixUp.js'
 6 | /* Chunks:
 7 |     Noun
 8 |     Verb
 9 |     Adjective
10 |     Pivot
11 | */
12 | 
13 | const findChunks = function (view) {
14 |   const { document, world } = view
15 |   easyMode(document)
16 |   byNeighbour(document)
17 |   matcher(view, document, world)
18 |   // matcher(view, document, world) //run it 2nd time
19 |   fallback(document, world)
20 |   fixUp(document, world)
21 | }
22 | export default { chunks: findChunks }
23 | 


--------------------------------------------------------------------------------
/src/1-one/lookup/api/index.js:
--------------------------------------------------------------------------------
 1 | import build from './buildTrie/index.js'
 2 | import scan from './scan.js'
 3 | 
 4 | const isObject = val => {
 5 |   return Object.prototype.toString.call(val) === '[object Object]'
 6 | }
 7 | 
 8 | export default function (View) {
 9 | 
10 |   /** find all matches in this document */
11 |   View.prototype.lookup = function (input, opts = {}) {
12 |     if (!input) {
13 |       return this.none()
14 |     }
15 |     if (typeof input === 'string') {
16 |       input = [input]
17 |     }
18 |     const trie = isObject(input) ? input : build(input, this.world)
19 |     let res = scan(this, trie, opts)
20 |     res = res.settle()
21 |     return res
22 |   }
23 | }


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/parse/toNumber/parseNumeric.js:
--------------------------------------------------------------------------------
 1 | //parse a string like "4,200.1" into Number 4200.1
 2 | const parseNumeric = str => {
 3 |   //remove ordinal - 'th/rd'
 4 |   str = str.replace(/1st$/, '1')
 5 |   str = str.replace(/2nd$/, '2')
 6 |   str = str.replace(/3rd$/, '3')
 7 |   str = str.replace(/([4567890])r?th$/, '$1')
 8 |   //remove prefixes
 9 |   str = str.replace(/^[$€¥£¢]/, '')
10 |   //remove suffixes
11 |   str = str.replace(/[%$€¥£¢]$/, '')
12 |   //remove commas
13 |   str = str.replace(/,/g, '')
14 |   //split '5kg' from '5'
15 |   str = str.replace(/([0-9])([a-z\u00C0-\u00FF]{1,2})$/, '$1')
16 |   return str
17 | }
18 | 
19 | export default parseNumeric
20 | 


--------------------------------------------------------------------------------
/plugins/_experiments/ast/src/lines.js:
--------------------------------------------------------------------------------
 1 | // return all newline-seperated sections in the document
 2 | const toLines = function (doc) {
 3 |   const newLine = /\n/
 4 |   const lines = [[]]
 5 |   // a newline already splits a sentence,
 6 |   // so it can only happen at the end of a sentence
 7 |   doc.sentences().forEach(s => {
 8 |     lines[lines.length - 1].push(s)
 9 |     const terms = s.docs[0]
10 |     const end = terms[terms.length - 1]
11 |     if (newLine.test(end.post)) {
12 |       lines.push([])
13 |     }
14 |   })
15 |   // remove an empty last one
16 |   if (lines[lines.length - 1].length === 0) {
17 |     lines.pop()
18 |   }
19 |   return lines
20 | }
21 | export default toLines


--------------------------------------------------------------------------------
/tests/three/verbs/phrasals.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[three/phrasals]'
 4 | 
 5 | test('get phrasal infinitive', function (t) {
 6 |   const arr = [
 7 |     [` running out`, 'run out'],
 8 |     [`we walked in`, 'walk in'],
 9 |     [`then they quickly walked out`, 'walk out'],
10 |     [`they studied up for the test`, 'study up'],
11 |     [`they studied-up for the test`, 'study up'],
12 |     [`they sat down for the test`, 'sit down'],
13 |   ]
14 |   arr.forEach(a => {
15 |     const doc = nlp(a[0])
16 |     const res = doc.verbs().json()[0].verb
17 |     t.equal(res.infinitive, a[1], here + ` '${a[0]}'`)
18 |   })
19 |   t.end()
20 | })


--------------------------------------------------------------------------------
/src/1-one/sweep/methods/sweep/01-getHooks.js:
--------------------------------------------------------------------------------
 1 | // for each cached-sentence, find a list of possible matches
 2 | const getHooks = function (docCaches, hooks) {
 3 |   return docCaches.map((set, i) => {
 4 |     let maybe = []
 5 |     Object.keys(hooks).forEach(k => {
 6 |       if (docCaches[i].has(k)) {
 7 |         maybe = maybe.concat(hooks[k])
 8 |       }
 9 |     })
10 |     // remove duplicates
11 |     const already = {}
12 |     maybe = maybe.filter(m => {
13 |       if (typeof already[m.match] === 'boolean') {
14 |         return false
15 |       }
16 |       already[m.match] = true
17 |       return true
18 |     })
19 |     return maybe
20 |   })
21 | }
22 | 
23 | export default getHooks
24 | 


--------------------------------------------------------------------------------
/one/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "compromise-one",
 3 | 	"version": "14.14.4",
 4 | 	"description": "",
 5 | 	"type": "module",
 6 | 	"module": "./../src/one.js",
 7 | 	"main": "./../src/one.js",
 8 | 	"types": "./../types/one.d.ts",
 9 | 	"exports": {
10 | 		"./package.json": "./package.json",
11 | 		".": {
12 | 			"import": {
13 | 				"types": "./../types/one/one.d.ts",
14 | 				"default": "./../src/one.js"
15 | 			},
16 | 			"require": {
17 | 				"types": "./../types/one.d.cts",
18 | 				"default": "./../builds/one/compromise-one.cjs"
19 | 			}
20 | 		}
21 | 	},
22 | 	"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
23 | 	"license": "MIT",
24 | 	"sideEffects": true
25 | }


--------------------------------------------------------------------------------
/plugins/dates/src/model/words/durations.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'centuries',
 3 |   'century',
 4 |   'day',
 5 |   'days',
 6 |   'decade',
 7 |   'decades',
 8 |   'hour',
 9 |   'hours',
10 |   'hr',
11 |   'hrs',
12 |   'millisecond',
13 |   'milliseconds',
14 |   'minute',
15 |   'minutes',
16 |   'min',
17 |   'mins',
18 |   'month',
19 |   'months',
20 |   'seconds',
21 |   'sec',
22 |   'secs',
23 |   'week end',
24 |   'week ends',
25 |   'weekend',
26 |   'weekends',
27 |   'week',
28 |   'weeks',
29 |   'wk',
30 |   'wks',
31 |   'year',
32 |   'years',
33 |   'yr',
34 |   'yrs',
35 |   'quarter',
36 |   // 'quarters',
37 |   'qtr',
38 |   'qtrs',
39 |   'season',
40 |   'seasons',
41 | ]
42 | 


--------------------------------------------------------------------------------
/tests/two/misc/lazy.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[two/lazy] '
 4 | import penn from '../tagger/_pennSample.js'
 5 | const txt = penn.map(a => a.text).join('\n')
 6 | 
 7 | test('lazy matches are equal', function (t) {
 8 |   const arr = [
 9 |     'captain .',
10 |     '. of the #Noun',
11 |     '#Adverb #Adverb+',
12 |     '#Url #Noun .?',
13 |     'certain !#Plural'
14 |   ]
15 |   arr.forEach(str => {
16 |     const reg = nlp(txt).match(str)
17 |     const lazy = nlp.lazy(txt, str)
18 |     t.equal(reg.length, lazy.length, here + ' ' + str)
19 |     t.deepEqual(reg.out('array'), lazy.out('array'), here + ' ' + str)
20 |   })
21 |   t.end()
22 | })


--------------------------------------------------------------------------------
/two/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "compromise-two",
 3 | 	"version": "14.14.4",
 4 | 	"description": "",
 5 | 	"type": "module",
 6 | 	"module": "./../src/two.js",
 7 | 	"main": "./../src/two.js",
 8 | 	"types": "./../types/two.d.ts",
 9 | 	"exports": {
10 | 		"./package.json": "./package.json",
11 | 		".": {
12 | 			"import": {
13 | 				"types": "./../types/two/two.d.ts",
14 | 				"default": "./../src/two.js"
15 | 			},
16 | 			"require": {
17 | 				"types": "./../types/two.d.cts",
18 | 				"default": "./../builds/two/compromise-two.cjs"
19 | 			}
20 | 		}
21 | 	},
22 | 	"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
23 | 	"license": "MIT",
24 | 	"sideEffects": true
25 | }


--------------------------------------------------------------------------------
/tests/three/numbers/backlog/overlap.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../../_lib.js'
 3 | const here = '[three/number-overlap] '
 4 | 
 5 | test('number-fraction overlap', function (t) {
 6 |   const arr = [
 7 |     ['fifty five and two eighths', 55.25],
 8 |     ['two fifty five and a third', 255.333],
 9 |     ['two fifty five and five thirds', 256.667],
10 |   ]
11 | 
12 |   arr.forEach((a) => {
13 |     const doc = nlp(a[0])
14 |     const values = doc.numbers().get()[0]
15 |     const fractions = doc.fractions().get()[0]
16 |     t.equal(values, a[1], here + 'Value: ' + a[0])
17 |     t.equal(fractions, null, here + 'Fraction: no-fraction')
18 |   })
19 | 
20 |   t.end()
21 | })
22 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/reindex.js:
--------------------------------------------------------------------------------
 1 | // cheat- add the document's pointer to the terms
 2 | const index = function (view) {
 3 |   // console.log('reindex')
 4 |   const document = view.document
 5 |   for (let n = 0; n < document.length; n += 1) {
 6 |     for (let i = 0; i < document[n].length; i += 1) {
 7 |       document[n][i].index = [n, i]
 8 |     }
 9 |   }
10 |   // let ptrs = b.fullPointer
11 |   // console.log(ptrs)
12 |   // for (let i = 0; i < docs.length; i += 1) {
13 |   //   const [n, start] = ptrs[i]
14 |   //   for (let t = 0; t < docs[i].length; t += 1) {
15 |   //     let term = docs[i][t]
16 |   //     term.index = [n, start + t]
17 |   //   }
18 |   // }
19 | }
20 | 
21 | export default index


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/abbreviations/honorifics.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'adj',
 3 |   'adm',
 4 |   'adv',
 5 |   'asst',
 6 |   'atty',
 7 |   'bldg',
 8 |   'brig',
 9 |   'capt',
10 |   'cmdr',
11 |   'comdr',
12 |   'cpl',
13 |   'det',
14 |   'dr',
15 |   'esq',
16 |   'gen',
17 |   'gov',
18 |   'hon',
19 |   'jr',
20 |   'llb',
21 |   'lt',
22 |   'maj',
23 |   'messrs',
24 |   'mlle',
25 |   'mme',
26 |   'mr',
27 |   'mrs',
28 |   'ms',
29 |   'mstr',
30 |   'phd',
31 |   'prof',
32 |   'pvt',
33 |   'rep',
34 |   'reps',
35 |   'res',
36 |   'rev',
37 |   'sen',
38 |   'sens',
39 |   'sfc',
40 |   'sgt',
41 |   'sir',
42 |   'sr',
43 |   'supt',
44 |   'surg'
45 |   //miss
46 |   //misses
47 | ]
48 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/abbreviations/places.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'rd',
 3 |   'st',
 4 |   'dist',
 5 |   'mt',
 6 |   'ave',
 7 |   'blvd',
 8 |   'cl',
 9 |   // 'ct',
10 |   'cres',
11 |   'hwy',
12 |   //states
13 |   'ariz',
14 |   'cal',
15 |   'calif',
16 |   'colo',
17 |   'conn',
18 |   'fla',
19 |   'fl',
20 |   'ga',
21 |   'ida',
22 |   'ia',
23 |   'kan',
24 |   'kans',
25 | 
26 |   'minn',
27 |   'neb',
28 |   'nebr',
29 |   'okla',
30 |   'penna',
31 |   'penn',
32 |   'pa',
33 |   'dak',
34 |   'tenn',
35 |   'tex',
36 |   'ut',
37 |   'vt',
38 |   'va',
39 |   'wis',
40 |   'wisc',
41 |   'wy',
42 |   'wyo',
43 |   'usafa',
44 |   'alta',
45 |   'ont',
46 |   'que',
47 |   'sask',
48 | ]
49 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/plugin.js:
--------------------------------------------------------------------------------
 1 | import methods from './methods/index.js'
 2 | import model from './model/index.js'
 3 | import compute from './compute/index.js'
 4 | 
 5 | export default {
 6 |   compute,
 7 |   methods,
 8 |   model,
 9 |   hooks: ['alias', 'machine', 'index', 'id'],
10 | }
11 | 
12 | // const plugin = function (world) {
13 | //   let { methods, model, parsers } = world
14 | //   Object.assign({}, methods, _methods)
15 | //   Object.assign(model, _model)
16 | //   methods.one.tokenize.fromString = tokenize
17 | //   parsers.push('normal')
18 | //   parsers.push('alias')
19 | //   parsers.push('machine')
20 | //   // extend View class
21 | //   // addMethods(View)
22 | // }
23 | // export default plugin
24 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/compute/tagger/1st-pass/02-hyphens.js:
--------------------------------------------------------------------------------
 1 | const byHyphen = function (terms, i, model, world) {
 2 |   const setTag = world.methods.one.setTag
 3 |   // two words w/ a dash
 4 |   if (terms[i].post === '-' && terms[i + 1]) {
 5 |     setTag([terms[i], terms[i + 1]], 'Hyphenated', world, null, `1-punct-hyphen''`)
 6 | 
 7 |     // bone-headed, man-made, good-tempered, coursely-ground
 8 |     // if (terms[i + 1].tags.has('PastTense')) {
 9 |     //   let tags = terms[i].tags
10 |     //   if (tags.has('Noun') || tags.has('Adverb')) {
11 |     //     setTag([terms[i], terms[i + 1]], 'Adjective', world, null, `2-punct-dash''`)
12 |     //   }
13 | 
14 |     // }
15 |   }
16 | }
17 | export default byHyphen
18 | 


--------------------------------------------------------------------------------
/tokenize/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "compromise-tokenize",
 3 | 	"version": "14.14.3",
 4 | 	"description": "",
 5 | 	"type": "module",
 6 | 	"module": "./../src/one.js",
 7 | 	"main": "./../src/one.js",
 8 | 	"types": "./../types/one.d.ts",
 9 | 	"exports": {
10 | 		"./package.json": "./package.json",
11 | 		".": {
12 | 			"import": {
13 | 				"types": "./../types/one/one.d.ts",
14 | 				"default": "./../src/one.js"
15 | 			},
16 | 			"require": {
17 | 				"types": "./../types/one.d.cts",
18 | 				"default": "./../builds/one/compromise-one.cjs"
19 | 			}
20 | 		}
21 | 	},
22 | 	"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
23 | 	"license": "MIT",
24 | 	"sideEffects": true
25 | }
26 | 


--------------------------------------------------------------------------------
/src/3-three/verbs/api/parse/adverbs.js:
--------------------------------------------------------------------------------
 1 | // split adverbs as before/after the root
 2 | const getAdverbs = function (vb, root) {
 3 |   const res = {
 4 |     pre: vb.none(),
 5 |     post: vb.none(),
 6 |   }
 7 |   if (!vb.has('#Adverb')) {
 8 |     return res
 9 |   }
10 |   // pivot on the main verb
11 |   const parts = vb.splitOn(root)
12 |   if (parts.length === 3) {
13 |     return {
14 |       pre: parts.eq(0).adverbs(),
15 |       post: parts.eq(2).adverbs(),
16 |     }
17 |   }
18 |   // it must be the second one
19 |   if (parts.eq(0).isDoc(root)) {
20 |     res.post = parts.eq(1).adverbs()
21 |     return res
22 |   }
23 |   res.pre = parts.eq(0).adverbs()
24 |   return res
25 | }
26 | export default getAdverbs
27 | 


--------------------------------------------------------------------------------
/three/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "compromise-three",
 3 | 	"version": "14.14.4",
 4 | 	"description": "",
 5 | 	"type": "module",
 6 | 	"module": "./../src/three.js",
 7 | 	"main": "./../src/three.js",
 8 | 	"types": "./../types/three.d.ts",
 9 | 	"exports": {
10 | 		"./package.json": "./package.json",
11 | 		".": {
12 | 			"import": {
13 | 				"types": "./../types/three/three.d.ts",
14 | 				"default": "./../src/three.js"
15 | 			},
16 | 			"require": {
17 | 				"types": "./../types/three.d.cts",
18 | 				"default": "./../builds/three/compromise-three.cjs"
19 | 			}
20 | 		}
21 | 	},
22 | 	"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
23 | 	"license": "MIT",
24 | 	"sideEffects": true
25 | }


--------------------------------------------------------------------------------
/data/lexicon/switches/person-verb.js:
--------------------------------------------------------------------------------
 1 | // words that can be a verb or a person's name
 2 | export default [
 3 |   // clues: [person, verb],
 4 |   // fallback: 'PresentTense', //maybe?
 5 |   'biff',
 6 |   'blaze',
 7 |   'blossom',
 8 |   'bob',
 9 |   'buck',
10 |   'chase',
11 |   'chuck',
12 |   'drew',
13 |   'foster',
14 |   'grace',
15 |   'grant',
16 |   'jack',
17 |   'lance',
18 |   'mack',
19 |   'mark',
20 |   'marshal',
21 |   'nick',
22 |   'ollie',
23 |   'pat',
24 |   'peg',
25 |   'pierce',
26 |   'rob',
27 |   'spike',
28 |   'stew',
29 |   'sue',
30 |   'skip',
31 |   'wade',
32 |   // 'hope',
33 |   // 'trace',
34 |   // 'bill',
35 |   // 'will',
36 |   // 'sung'
37 |   // 'may'
38 |   // 'peter',
39 | ]
40 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/one/02-parse/index.js:
--------------------------------------------------------------------------------
 1 | import today from './01-today.js'
 2 | import holiday from './02-holidays.js'
 3 | import nextLast from './03-next-last.js'
 4 | import yearly from './04-yearly.js'
 5 | import explicit from './05-explicit.js'
 6 | 
 7 | const parse = function (doc, context, parts) {
 8 |   let unit = null
 9 |   //'in two days'
10 |   unit = unit || today(doc, context, parts)
11 |   // 'this haloween'
12 |   unit = unit || holiday(doc, context)
13 |   // 'this month'
14 |   unit = unit || nextLast(doc, context)
15 |   // 'q2 2002'
16 |   unit = unit || yearly(doc, context)
17 |   // 'this june 2nd'
18 |   unit = unit || explicit(doc, context)
19 | 
20 |   return unit
21 | }
22 | export default parse


--------------------------------------------------------------------------------
/plugins/wikipedia/tests/misc.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | 
 4 | test('true-positive', function (t) {
 5 |   let doc = nlp('i learned css')
 6 |   let res = doc.wikipedia().json()
 7 |   t.equal(res.length, 1, 'found-1 result')
 8 |   t.equal(res[0].text, 'css', 'found css result')
 9 | 
10 |   doc = nlp('Melania Trump was trending')
11 |   res = doc.wikipedia().json({ normal: true })
12 |   t.equal(res[0].normal, 'melania trump', 'found trump result')
13 | 
14 |   t.end()
15 | })
16 | 
17 | test('true-negative', function (t) {
18 |   const doc = nlp('i learned the csss grid layout')
19 |   const res = doc.wikipedia().json()
20 |   t.equal(res.length, 0, 'found 0 results')
21 |   t.end()
22 | })


--------------------------------------------------------------------------------
/plugins/speech/tests/soundsLike.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | 
 4 | test('soundsLike-tests', function (t) {
 5 |   const arr = [
 6 |     ['phil collins', 'fil kolins'],
 7 |     ['Philadelphia freedom', 'filatelfia fretom'],
 8 |     ['Shine on me', 'shine on me'],
 9 |     ['peace of mind', 'pease of mint'],
10 |     ['Yes I do', 'yes i to'],
11 |     ['Zapped me', 'sapet me'],
12 |     ['Right between the eyes', 'rit betwen the eyes'],
13 |     // ['But the times have changed', 'but the times hafe kshanjet'],
14 |   ]
15 |   arr.forEach((a) => {
16 |     const doc = nlp(a[0])
17 |     const str = doc.soundsLike({})[0].join(' ')
18 |     t.equal(str, a[1], a[0])
19 |   })
20 |   t.end()
21 | })
22 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/freq.js:
--------------------------------------------------------------------------------
 1 | // sort words by frequency
 2 | const freq = function (view) {
 3 |   const docs = view.docs
 4 |   const counts = {}
 5 |   for (let i = 0; i < docs.length; i += 1) {
 6 |     for (let t = 0; t < docs[i].length; t += 1) {
 7 |       const term = docs[i][t]
 8 |       const word = term.machine || term.normal
 9 |       counts[word] = counts[word] || 0
10 |       counts[word] += 1
11 |     }
12 |   }
13 |   // add counts on each term
14 |   for (let i = 0; i < docs.length; i += 1) {
15 |     for (let t = 0; t < docs[i].length; t += 1) {
16 |       const term = docs[i][t]
17 |       const word = term.machine || term.normal
18 |       term.freq = counts[word]
19 |     }
20 |   }
21 | }
22 | export default freq
23 | 


--------------------------------------------------------------------------------
/tests/three/numbers/backlog/conversion.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../../_lib.js'
 3 | const here = '[three/number] '
 4 | 
 5 | test('fraction/percent conversion', function (t) {
 6 |   const arr = [
 7 |     [`it was 80% of my paycheque.`, 'it was 80/100 of my paycheque.'],
 8 |     [`42%`, '42/100'],
 9 |     [`110%`, '110/100'],
10 |     [`2000%`, '2000/100'],
11 |     // [`4.5%`, '4.5/100'],
12 |     // [`0.2%`, '0.2/100'],
13 |   ]
14 |   arr.forEach((a) => {
15 |     const doc = nlp(a[0])
16 |     doc.percentages().toFraction()
17 |     t.equal(doc.text(), a[1], here+'toFraction')
18 |     doc.fractions().toPercentage()
19 |     t.equal(doc.text(), a[0], here+'toPercentage')
20 |   })
21 |   t.end()
22 | })
23 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/compute/tagger/1st-pass/01-colons.js:
--------------------------------------------------------------------------------
 1 | const byPunctuation = function (terms, i, model, world) {
 2 |   const setTag = world.methods.one.setTag
 3 |   // colon following first word
 4 |   // edit: foo
 5 |   // breaking: foobar
 6 |   if (i === 0 && terms.length >= 3) {
 7 |     const hasColon = /:/
 8 |     const post = terms[0].post
 9 |     if (post.match(hasColon)) {
10 |       // phone: 555-2938
11 |       const nextTerm = terms[1]
12 |       if (nextTerm.tags.has('Value') || nextTerm.tags.has('Email') || nextTerm.tags.has('PhoneNumber')) {
13 |         return
14 |       }
15 |       //
16 |       setTag([terms[0]], 'Expression', world, null, `2-punct-colon''`)
17 |     }
18 |   }
19 | }
20 | export default byPunctuation
21 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/transform/adjectives/index.js:
--------------------------------------------------------------------------------
 1 | import { toSuperlative, toComparative, fromSuperlative, fromComparative, toNoun } from './inflect.js'
 2 | import fromAdverb from './conjugate/fromAdverb.js'
 3 | import toAdverb from './conjugate/toAdverb.js'
 4 | // import toNoun from './conjugate/toNoun.js'
 5 | 
 6 | const all = function (str, model) {
 7 |   let arr = [str]
 8 |   arr.push(toSuperlative(str, model))
 9 |   arr.push(toComparative(str, model))
10 |   arr.push(toAdverb(str))
11 |   arr = arr.filter(s => s)
12 |   arr = new Set(arr)
13 |   return Array.from(arr)
14 | }
15 | 
16 | 
17 | export default {
18 |   toSuperlative, toComparative, toAdverb, toNoun,
19 |   fromAdverb, fromSuperlative, fromComparative,
20 |   all,
21 | }


--------------------------------------------------------------------------------
/plugins/speed/src/workerPool/pool/create.js:
--------------------------------------------------------------------------------
 1 | import path from 'path'
 2 | import { fileURLToPath } from 'url'
 3 | import { Worker } from 'worker_threads'
 4 | const dir = path.dirname(fileURLToPath(import.meta.url))
 5 | 
 6 | const makePool = function (count, reg) {
 7 |   const workers = []
 8 |   for (let i = 0; i < count; i += 1) {
 9 |     const info = {
10 |       workerData: {
11 |         workerIndex: i,
12 |         workerCount: count,
13 |         reg
14 |       }
15 |     }
16 |     const file = path.join(dir, './worker.js')
17 |     const worker = new Worker(file, info)
18 |     worker.on('error', (err) => console.error(err))// eslint-disable-line
19 |     workers.push(worker)
20 |   }
21 |   return workers
22 | }
23 | export default makePool


--------------------------------------------------------------------------------
/src/1-one/lexicon/compute/index.js:
--------------------------------------------------------------------------------
 1 | import multiWord from './multi-word.js'
 2 | import singleWord from './single-word.js'
 3 | 
 4 | // tag any words in our lexicon - even if it hasn't been filled-up yet
 5 | // rest of pre-tagger is in ./two/preTagger
 6 | const lexicon = function (view) {
 7 |   const world = view.world
 8 |   // loop through our terms
 9 |   view.docs.forEach(terms => {
10 |     for (let i = 0; i < terms.length; i += 1) {
11 |       if (terms[i].tags.size === 0) {
12 |         let found = null
13 |         found = found || multiWord(terms, i, world)
14 |         // lookup known words
15 |         found = found || singleWord(terms, i, world)
16 |       }
17 |     }
18 |   })
19 | }
20 | 
21 | export default {
22 |   lexicon,
23 | }
24 | 


--------------------------------------------------------------------------------
/src/1-one/output/api/_fmts.js:
--------------------------------------------------------------------------------
 1 | const fmts = {
 2 |   text: {
 3 |     form: 'text',
 4 |   },
 5 |   normal: {
 6 |     whitespace: 'some',
 7 |     punctuation: 'some',
 8 |     case: 'some',
 9 |     unicode: 'some',
10 |     form: 'normal',
11 |   },
12 |   machine: {
13 |     keepSpace: false,
14 |     whitespace: 'some',
15 |     punctuation: 'some',
16 |     case: 'none',
17 |     unicode: 'some',
18 |     form: 'machine',
19 |   },
20 |   root: {
21 |     keepSpace: false,
22 |     whitespace: 'some',
23 |     punctuation: 'some',
24 |     case: 'some',
25 |     unicode: 'some',
26 |     form: 'root',
27 |   },
28 |   implicit: {
29 |     form: 'implicit',
30 |   }
31 | }
32 | fmts.clean = fmts.normal
33 | fmts.reduced = fmts.root
34 | export default fmts


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/machine.js:
--------------------------------------------------------------------------------
 1 | const hasDash = /^\p{Letter}+-\p{Letter}+$/u
 2 | // 'machine' is a normalized form that looses human-readability
 3 | const doMachine = function (term) {
 4 |   let str = term.implicit || term.normal || term.text
 5 |   // remove apostrophes
 6 |   str = str.replace(/['’]s$/, '')
 7 |   str = str.replace(/s['’]$/, 's')
 8 |   //lookin'->looking (make it easier for conjugation)
 9 |   str = str.replace(/([aeiou][ktrp])in'$/, '$1ing')
10 |   //turn re-enactment to reenactment
11 |   if (hasDash.test(str)) {
12 |     str = str.replace(/-/g, '')
13 |   }
14 |   //#tags, @mentions
15 |   str = str.replace(/^[#@]/, '')
16 |   if (str !== term.normal) {
17 |     term.machine = str
18 |   }
19 | }
20 | export default doMachine
21 | 


--------------------------------------------------------------------------------
/src/2-two/swap/api/swap-verb.js:
--------------------------------------------------------------------------------
 1 | const matchVerb = function (m, lemma) {
 2 |   const conjugate = m.methods.two.transform.verb.conjugate
 3 |   const all = conjugate(lemma, m.model)
 4 |   if (m.has('#Gerund')) {
 5 |     return all.Gerund
 6 |   }
 7 |   if (m.has('#PastTense')) {
 8 |     return all.PastTense
 9 |   }
10 |   if (m.has('#PresentTense')) {
11 |     return all.PresentTense
12 |   }
13 |   if (m.has('#Gerund')) {
14 |     return all.Gerund
15 |   }
16 |   return lemma
17 | }
18 | 
19 | const swapVerb = function (vb, lemma) {
20 |   let str = lemma
21 |   vb.forEach(m => {
22 |     if (!m.has('#Infinitive')) {
23 |       str = matchVerb(m, lemma)
24 |     }
25 |     m.replaceWith(str)
26 |   })
27 |   return vb
28 | }
29 | export default swapVerb


--------------------------------------------------------------------------------
/src/3-three/misc/slashes/index.js:
--------------------------------------------------------------------------------
 1 | const hasSlash = /\//
 2 | 
 3 | const api = function (View) {
 4 | 
 5 |   class Slashes extends View {
 6 |     constructor(document, pointer, groups) {
 7 |       super(document, pointer, groups)
 8 |       this.viewType = 'Slashes'
 9 |     }
10 |     split() {
11 |       return this.map((m) => {
12 |         const str = m.text()
13 |         const arr = str.split(hasSlash)
14 |         m = m.replaceWith(arr.join(' '))
15 |         return m.growRight('(' + arr.join('|') + ')+')
16 |       })
17 |     }
18 |   }
19 | 
20 |   View.prototype.slashes = function (n) {
21 |     let m = this.match('#SlashedTerm')
22 |     m = m.getNth(n)
23 |     return new Slashes(m.document, m.pointer)
24 |   }
25 | }
26 | export default api
27 | 


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/parse/toNumber/parseDecimals.js:
--------------------------------------------------------------------------------
 1 | import words from './data.js'
 2 | 
 3 | //concatenate into a string with leading '0.'
 4 | const parseDecimals = function (arr) {
 5 |   let str = '0.'
 6 |   for (let i = 0; i < arr.length; i++) {
 7 |     const w = arr[i]
 8 |     if (words.ones.hasOwnProperty(w) === true) {
 9 |       str += words.ones[w]
10 |     } else if (words.teens.hasOwnProperty(w) === true) {
11 |       str += words.teens[w]
12 |     } else if (words.tens.hasOwnProperty(w) === true) {
13 |       str += words.tens[w]
14 |     } else if (/^[0-9]$/.test(w) === true) {
15 |       str += w
16 |     } else {
17 |       return 0
18 |     }
19 |   }
20 |   return parseFloat(str)
21 | }
22 | 
23 | export default parseDecimals
24 | 


--------------------------------------------------------------------------------
/data/lexicon/switches/unit-noun.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   'cm',
 3 |   'cup',
 4 |   'cups',
 5 |   'feet',
 6 |   'foot',
 7 |   'ft',
 8 |   'gal',
 9 |   'gb',
10 |   'hg',
11 |   'inch',
12 |   'inches',
13 |   'k',
14 |   'kb',
15 |   'kelvin',
16 |   'kg',
17 |   'kb',
18 |   'km',
19 |   'lb',
20 |   'm',
21 |   'mb',
22 |   'mg',
23 |   'mi',
24 |   'hz',
25 |   'mps',
26 |   'mph',
27 |   'miles',
28 |   'ml',
29 |   'mm',
30 |   'mph',
31 |   'newton',
32 |   'newtons',
33 |   'oz',
34 |   'pa',
35 |   // 'pound',
36 |   // 'pounds',
37 |   'pt',
38 |   'px',
39 |   'qt',
40 |   'tablespoon',
41 |   'tablespoons',
42 |   'tb',
43 |   'tbl',
44 |   'tbsp',
45 |   'teaspoon',
46 |   'teaspoons',
47 |   'tsp',
48 |   'yard',
49 |   'yards',
50 |   'yd',
51 | ]


--------------------------------------------------------------------------------
/src/2-two/preTagger/tagSet/values.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   Value: {
 3 |     not: ['Verb', 'Adjective', 'Adverb'],
 4 |   },
 5 |   Ordinal: {
 6 |     is: 'Value',
 7 |     not: ['Cardinal'],
 8 |   },
 9 |   Cardinal: {
10 |     is: 'Value',
11 |     not: ['Ordinal'],
12 |   },
13 |   Fraction: {
14 |     is: 'Value',
15 |     not: ['Noun'],
16 |   },
17 |   Multiple: {
18 |     is: 'TextValue',
19 |   },
20 |   RomanNumeral: {
21 |     is: 'Cardinal',
22 |     not: ['TextValue'],
23 |   },
24 |   TextValue: {
25 |     is: 'Value',
26 |     not: ['NumericValue'],
27 |   },
28 |   NumericValue: {
29 |     is: 'Value',
30 |     not: ['TextValue'],
31 |   },
32 |   Money: {
33 |     is: 'Cardinal',
34 |   },
35 |   Percent: {
36 |     is: 'Value',
37 |   },
38 | }
39 | 


--------------------------------------------------------------------------------
/src/3-three/numbers/fractions/convert/toOrdinal.js:
--------------------------------------------------------------------------------
 1 | import toText from '../../numbers/format/toText/index.js'
 2 | import textOrdinal from '../../numbers/format/toOrdinal/textOrdinal.js'
 3 | 
 4 | const toOrdinal = function (obj) {
 5 |   // don't divide by zero!
 6 |   if (!obj.numerator || !obj.denominator) {
 7 |     return ''
 8 |   }
 9 |   // create [two] [fifths]
10 |   const start = toText({ num: obj.numerator })
11 |   let end = textOrdinal({ num: obj.denominator })
12 |   // 'one secondth' -> 'one half'
13 |   if (obj.denominator === 2) {
14 |     end = 'half'
15 |   }
16 |   if (start && end) {
17 |     if (obj.numerator !== 1) {
18 |       end += 's'
19 |     }
20 |     return `${start} ${end}`
21 |   }
22 |   return ''
23 | }
24 | export default toOrdinal
25 | 


--------------------------------------------------------------------------------
/tests/one/tokenize/term-split.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../../two/_lib.js'
 3 | const here = '[one/term-split] '
 4 | 
 5 | 
 6 | test('term tokenizer', function (t) {
 7 |   const arr = [
 8 |     [``, 0],
 9 |     [`1`, 1],
10 |     [`&`, 1],
11 |     [`*`, 1],
12 |     [`oh yeah??`, 2],
13 |     [`#canada #goose @gooseman`, 3],
14 |     [`the  "gouvernement" party`, 3],
15 |     [`the  «gouvernement»`, 2],
16 |     [`the  « gouvernement »`, 2],
17 |     [`i guess... but`, 3],
18 |     [`i guess ... but`, 3],
19 |     [`he did. (but barely)`, 4],
20 |     [`he did. ( but barely )`, 4],
21 |   ]
22 |   arr.forEach(a => {
23 |     const [str, len] = a
24 |     t.equal(nlp(str).terms().length, len, here + `"${str}"`)
25 |   })
26 |   t.end()
27 | })


--------------------------------------------------------------------------------
/src/2-two/preTagger/compute/tagger/3rd-pass/05-fallback.js:
--------------------------------------------------------------------------------
 1 | import fastTag from '../_fastTag.js'
 2 | import fillTag from './_fillTags.js'
 3 | 
 4 | const nounFallback = function (terms, i, model) {
 5 |   let isEmpty = false
 6 |   const tags = terms[i].tags
 7 |   if (tags.size === 0) {
 8 |     isEmpty = true
 9 |   } else if (tags.size === 1) {
10 |     // weaker tags to ignore
11 |     if (tags.has('Hyphenated') || tags.has('HashTag') || tags.has('Prefix') || tags.has('SlashedTerm')) {
12 |       isEmpty = true
13 |     }
14 |   }
15 |   if (isEmpty) {
16 |     fastTag(terms[i], 'Noun', '3-[fallback]')
17 |     // try to give it singluar/plural tags, too
18 |     fillTag(terms, i, model)
19 |     terms[i].confidence = 0.1
20 |   }
21 | }
22 | export default nounFallback
23 | 


--------------------------------------------------------------------------------
/scripts/test/stress.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console, no-unused-vars */
 2 | import corpus from 'nlp-corpus' //install with `npm i nlp-corpus --no-save`
 3 | import nlp from '../../src/three.js'
 4 | const texts = corpus.all()
 5 | console.log(`\n\n--- running compromise on ${texts.length.toLocaleString()} random sentences---\n`)
 6 | console.log('    --should take a few minutes--')
 7 | 
 8 | for (let i = 0; i < texts.length; i++) {
 9 |   const txt = texts[i]
10 |   nlp(txt)
11 |     .sentences()
12 |     .forEach(s => {
13 |       s.verbs().forEach(vb => {
14 |         if (vb.terms().not('(#Adverb|#Auxiliary|#Negative|#PhrasalVerb)').length > 1) {
15 |           // console.log(vb.text())
16 |         }
17 |       })
18 | 
19 |     })
20 | }
21 | 
22 | console.log('\n\n - done!')
23 | 


--------------------------------------------------------------------------------
/src/3-three/chunker/api/chunks.js:
--------------------------------------------------------------------------------
 1 | // split terms into Nounphrase, verbphrase, etc groups
 2 | const chunks = function (doc) {
 3 |   const all = []
 4 |   let lastOne = null
 5 |   // first, split by comma, etc
 6 |   const m = doc.clauses()
 7 |   // loop through each clause
 8 |   m.docs.forEach(terms => {
 9 |     terms.forEach(term => {
10 |       // new chunk
11 |       if (!term.chunk || term.chunk !== lastOne) {
12 |         lastOne = term.chunk
13 |         all.push([term.index[0], term.index[1], term.index[1] + 1])
14 |       } else {
15 |         // keep the chunk going
16 |         all[all.length - 1][2] = term.index[1] + 1
17 |       }
18 |     })
19 |     lastOne = null
20 |   })
21 |   const parts = doc.update(all)
22 |   return parts
23 | }
24 | export default chunks
25 | 


--------------------------------------------------------------------------------
/src/3-three/nouns/api/parse.js:
--------------------------------------------------------------------------------
 1 | import isSubordinate from './isSubordinate.js'
 2 | import isPlural from './isPlural.js'
 3 | 
 4 | const getRoot = function (m) {
 5 |   let tmp = m.clone()
 6 |   tmp = tmp.match('#Noun+')
 7 |   tmp = tmp.remove('(#Adjective|#Preposition|#Determiner|#Value)')
 8 |   tmp = tmp.not('#Possessive')
 9 |   tmp = tmp.first()
10 |   if (!tmp.found) {
11 |     return m
12 |   }
13 |   return tmp
14 | }
15 | 
16 | const parseNoun = function (m) {
17 |   const root = getRoot(m)
18 |   return {
19 |     determiner: m.match('#Determiner').eq(0),
20 |     adjectives: m.match('#Adjective'),
21 |     number: m.values(),
22 |     isPlural: isPlural(m, root),
23 |     isSubordinate: isSubordinate(m),
24 |     root: root,
25 |   }
26 | }
27 | export default parseNoun
28 | 


--------------------------------------------------------------------------------
/src/3-three/redact/plugin.js:
--------------------------------------------------------------------------------
 1 | const defaults = {
 2 |   people: true,
 3 |   emails: true,
 4 |   phoneNumbers: true,
 5 |   places: true,
 6 | }
 7 | 
 8 | const redact = function (opts = {}) {
 9 |   opts = Object.assign({}, defaults, opts)
10 |   if (opts.people !== false) {
11 |     this.people().replaceWith('██████████')
12 |   }
13 |   if (opts.emails !== false) {
14 |     this.emails().replaceWith('██████████')
15 |   }
16 |   if (opts.places !== false) {
17 |     this.places().replaceWith('██████████')
18 |   }
19 |   if (opts.phoneNumbers !== false) {
20 |     this.phoneNumbers().replaceWith('███████')
21 |   }
22 |   return this
23 | }
24 | 
25 | const plugin = {
26 |   api: function (View) {
27 |     View.prototype.redact = redact
28 |   }
29 | }
30 | export default plugin
31 | 


--------------------------------------------------------------------------------
/data/lexicon/nouns/properNouns.js:
--------------------------------------------------------------------------------
 1 | // properNouns
 2 | export default [
 3 |   'mercedes',
 4 |   'barbie',
 5 |   'catalina',
 6 |   'christi',
 7 |   'diego',
 8 |   'elmo',
 9 |   'franco',
10 |   'kirby',
11 |   'mickey',
12 |   'finn',
13 |   'missy',
14 |   'florence',
15 |   'stevens',
16 |   'abid',
17 |   'mcgill',
18 |   'hudson',
19 |   'chesley',
20 |   'carling',
21 |   'berkeley',
22 |   'beeton',
23 |   'carleton',
24 |   'ajax',
25 |   'weston',
26 |   'sherwood',
27 |   'wembley',
28 |   'hinton',
29 |   'bentley',
30 |   'landsdowne',
31 |   'brock',
32 |   'dalhousie',
33 |   'spalding',
34 |   'charlton',
35 |   'rothwell',
36 |   'gosford',
37 |   'frampton',
38 |   'fairview',
39 |   // currencies
40 |   'nis',
41 |   'riel',
42 |   'euro',
43 |   'iron maiden',
44 | ]
45 | 


--------------------------------------------------------------------------------
/src/1-one/match/methods/match/steps/greedy-match.js:
--------------------------------------------------------------------------------
 1 | import { getGreedy } from './logic/greedy.js'
 2 | 
 3 | // keep 'foo+' or 'foo*' going..
 4 | const greedyMatch = function (state) {
 5 |   const { regs, phrase_length } = state
 6 |   const reg = regs[state.r]
 7 |   state.t = getGreedy(state, regs[state.r + 1])
 8 |   if (state.t === null) {
 9 |     return null //greedy was too short
10 |   }
11 |   // foo{2,4} - has a greed-minimum
12 |   if (reg.min && reg.min > state.t) {
13 |     return null //greedy was too short
14 |   }
15 |   // 'foo+$' - if also an end-anchor, ensure we really reached the end
16 |   if (reg.end === true && state.start_i + state.t !== phrase_length) {
17 |     return null //greedy didn't reach the end
18 |   }
19 |   return true
20 | }
21 | export default greedyMatch


--------------------------------------------------------------------------------
/tests/two/match/soft-match.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[two/soft-match] '
 4 | 
 5 | test('soft-match', function (t) {
 6 |   const doc = nlp(`a priest walked into the bars`)
 7 |   doc.compute('root')
 8 |   t.equal(doc.match('bars').found, true, here + 'found bars')
 9 |   t.equal(doc.match('bar').found, false, here + 'missed bar without ~')
10 |   t.equal(doc.match('~bars~').found, true, here + 'found ~ bars')
11 |   t.equal(doc.match('~bar~').found, true, here + 'found ~ bar')
12 |   t.equal(doc.match('~walk~ into').found, true, here + 'found infinitive')
13 |   t.equal(doc.match('~bar~').found, true, here + 'found singular')
14 |   t.equal(doc.text('root'), 'a priest walk into the bar', here + 'root-output')
15 |   t.end()
16 | })
17 | 


--------------------------------------------------------------------------------
/plugins/speech/demo/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 | 
 6 |   <script src="https://unpkg.com/compromise"></script>
 7 |   <!-- <script src="https://unpkg.com/compromise-speech"></script> -->
 8 |   <script src="../builds/compromise-speech.min.js"></script>
 9 | 
10 | </head>
11 | 
12 | <body>
13 |   <p>compromise-speech demo:</p>
14 |   <ul>
15 |     <kbd>chocolate microscopes </kbd>
16 |     <pre id="res">loading</pre>
17 |   </ul>
18 | 
19 |   <script defer>
20 |     nlp.plugin(compromiseSpeech) // window.compromiseSpeech
21 |     let txt = document.querySelector('kbd').innerText
22 |     let doc = nlp(txt)
23 |     let json = doc.syllables()
24 |     document.querySelector('#res').innerHTML = JSON.stringify(json, null, 2)
25 |   </script>
26 | </body>
27 | 
28 | </html>


--------------------------------------------------------------------------------
/plugins/stats/scratch.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console, no-unused-vars */
 2 | // import corpus from 'nlp-corpus'
 3 | import nlp from '../../src/three.js'
 4 | import plugin from './src/plugin.js'
 5 | nlp.extend(plugin)
 6 | 
 7 | // let txt = 'toronto raptors play a toronto maple leafs'
 8 | // let doc = nlp(txt)
 9 | // console.log(doc.ngrams())
10 | 
11 | const doc = nlp('one two three. one two foo.')
12 | const res = doc.ngrams({ min: 3 })
13 | /*[
14 |   { size: 3, count: 1, normal: 'one two three' },
15 |   { size: 3, count: 1, normal: 'one two foo' }
16 | ]
17 | */
18 | console.log(res)
19 | // let txt = 'no, my son is also named Bort'
20 | 
21 | // let doc = nlp(txt)
22 | // // console.log(doc.tfidf())
23 | // doc.compute('tfidf')
24 | // console.log(JSON.stringify(doc.json()[0].terms[6]))


--------------------------------------------------------------------------------
/plugins/dates/src/plugin.js:
--------------------------------------------------------------------------------
 1 | import api from './api/index.js'
 2 | import compute from './compute/index.js'
 3 | import tags from './model/tags.js'
 4 | import words from './model/words/index.js'
 5 | import regex from './model/regex.js'
 6 | import version from './_version.js'
 7 | import debug from './debug.js'
 8 | 
 9 | export default {
10 |   tags,
11 |   words,
12 |   compute,
13 |   api,
14 |   mutate: world => {
15 |     // add our regexes
16 |     world.model.two.regexText = world.model.two.regexText || []
17 |     world.model.two.regexText = world.model.two.regexText.concat(regex)
18 |     // add our debug('dates') method
19 |     world.methods.one.debug = world.methods.one.debug || {}
20 |     world.methods.one.debug.dates = debug
21 |   },
22 |   hooks: ['dates'],
23 |   version,
24 | }
25 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2015" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */,
 4 |     "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */,
 5 |     "strict": false /* Enable all strict type-checking options. */,
 6 |     "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */,
 7 |     "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */
 8 |   },
 9 |   "files": ["types/three.d.ts"],
10 |   "include": ["types/index.d.ts"]
11 | }
12 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/toJSON.js:
--------------------------------------------------------------------------------
 1 | const getDuration = function (range) {
 2 |   const end = range.end.d.add(1, 'millisecond')
 3 |   const diff = end.since(range.start.d).diff
 4 |   delete diff.milliseconds
 5 |   delete diff.seconds
 6 |   return diff
 7 | }
 8 | 
 9 | const toJSON = function (range) {
10 |   if (!range.start) {
11 |     return {
12 |       start: null,
13 |       end: null,
14 |       timezone: null,
15 |       duration: {},
16 |       // range: null
17 |     }
18 |   }
19 |   const diff = range.end ? getDuration(range) : {}
20 |   return {
21 |     start: range.start.format('iso'),
22 |     end: range.end ? range.end.format('iso') : null,
23 |     timezone: range.start.d.format('timezone'),
24 |     duration: diff,
25 |     // range: getRange(diff)
26 |   }
27 | }
28 | export default toJSON


--------------------------------------------------------------------------------
/scripts/match.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import repl from 'repl'
 3 | import corpus from 'nlp-corpus'
 4 | import nlp from '../src/three.js'
 5 | 
 6 | const n = 12000
 7 | console.log(` -- pre-processing ${n} sentences-`)
 8 | let docs = corpus.some(n)
 9 | docs = docs.map(str => nlp(str).compute('offset'))
10 | console.log(` -- ok, ready --`)
11 | 
12 | const doMatch = function (match) {
13 |   docs.forEach(doc => {
14 |     const m = doc.match(match)
15 |     if (m.found) {
16 |       m.debug({ highlight: true, tags: false })
17 |     }
18 |   })
19 |   console.log('--')
20 | }
21 | 
22 | let arg = process.argv.slice(2).join(' ')
23 | arg = arg.trim()
24 | if (arg) {
25 |   doMatch(arg)
26 | }
27 | 
28 | repl.start({
29 |   eval: function (match) {
30 |     doMatch(match)
31 |   },
32 | })
33 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/model/regex/regex-text.js:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   // #coolguy
 3 |   [/^#[\p{Number}_]*\p{Letter}/u, 'HashTag'], // can't be all numbers
 4 | 
 5 |   // @spencermountain
 6 |   [/^@\w{2,}$/, 'AtMention'],
 7 | 
 8 |   // period-ones acronyms - f.b.i.
 9 |   [/^([A-Z]\.){2}[A-Z]?/i, ['Acronym', 'Noun'], 'F.B.I'], //ascii-only
10 | 
11 |   // ending-apostrophes
12 |   [/.{3}[lkmnp]in['‘’‛‵′`´]$/, 'Gerund', "chillin'"],
13 |   [/.{4}s['‘’‛‵′`´]$/, 'Possessive', "flanders'"],
14 | 
15 |   //from https://www.regextester.com/106421
16 |   // [/^([\u00a9\u00ae\u2319-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])/, 'Emoji', 'emoji-range']
17 |   // unicode character range
18 |   [/^[\p{Emoji_Presentation}\p{Extended_Pictographic}]/u, 'Emoji', 'emoji-class'],
19 | ]
20 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/one/units/_time.js:
--------------------------------------------------------------------------------
 1 | import Unit from './Unit.js'
 2 | 
 3 | class Hour extends Unit {
 4 |   constructor(input, unit, context) {
 5 |     super(input, unit, context, true)
 6 |     this.unit = 'hour'
 7 |     if (this.d.isValid()) {
 8 |       this.d = this.d.startOf('hour')
 9 |     }
10 |   }
11 | }
12 | class Minute extends Unit {
13 |   constructor(input, unit, context) {
14 |     super(input, unit, context, true)
15 |     this.unit = 'minute'
16 |     if (this.d.isValid()) {
17 |       this.d = this.d.startOf('minute')
18 |     }
19 |   }
20 | }
21 | class Moment extends Unit {
22 |   constructor(input, unit, context) {
23 |     super(input, unit, context, true)
24 |     this.unit = 'millisecond'
25 |   }
26 | }
27 | 
28 | export {
29 |   Hour,
30 |   Minute,
31 |   Moment
32 | }
33 | 


--------------------------------------------------------------------------------
/plugins/stats/demo/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 | 
 6 |   <script src="https://unpkg.com/compromise"></script>
 7 |   <!-- <script src="https://unpkg.com/compromise-stats"></script> -->
 8 |   <script src="../builds/compromise-stats.min.js"></script>
 9 | 
10 | </head>
11 | 
12 | <body>
13 |   <p>compromise-stats demo:</p>
14 |   <ul>
15 |     <kbd>Who keeps the metric system down? We do, we do! </kbd>
16 |     <pre id="res">loading</pre>
17 |   </ul>
18 | 
19 |   <script defer>
20 |     nlp.plugin(compromiseStats) // window.compromiseDates
21 |     let txt = document.querySelector('kbd').innerText
22 |     let doc = nlp(txt)
23 |     let json = doc.ngrams()
24 |     document.querySelector('#res').innerHTML = JSON.stringify(json, null, 2)
25 |   </script>
26 | </body>
27 | 
28 | </html>


--------------------------------------------------------------------------------
/src/2-two/postTagger/model/verbs/passive.js:
--------------------------------------------------------------------------------
 1 | // ==== Passive voice ===
 2 | export default [
 3 |   // got walked, was walked, were walked
 4 |   { match: '(got|were|was|is|are|am) (#PastTense|#Participle)', tag: 'Passive', reason: 'got-walked' },
 5 |   // was being walked
 6 |   { match: '(was|were|is|are|am) being (#PastTense|#Participle)', tag: 'Passive', reason: 'was-being' },
 7 |   // had been walked, have been eaten
 8 |   { match: '(had|have|has) been (#PastTense|#Participle)', tag: 'Passive', reason: 'had-been' },
 9 |   // will be cleaned
10 |   { match: 'will be being? (#PastTense|#Participle)', tag: 'Passive', reason: 'will-be-cleaned' },
11 |   // suffered by the country
12 |   { match: '#Noun [(#PastTense|#Participle)] by (the|a) #Noun', group: 0, tag: 'Passive', reason: 'suffered-by' },
13 | 
14 | ]


--------------------------------------------------------------------------------
/plugins/wikipedia/demo/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 | 
 6 |   <script src="https://unpkg.com/compromise"></script>
 7 |   <!-- <script src="https://unpkg.com/compromise-stats"></script> -->
 8 |   <script src="../builds/compromise-wikipedia.min.js"></script>
 9 | 
10 | </head>
11 | 
12 | <body>
13 |   <p>compromise-wikipedia demo:</p>
14 |   <ul>
15 |     <kbd>you could still go to McGill, the Harvard of Canada!</kbd>
16 |     <pre id="res">loading</pre>
17 |   </ul>
18 | 
19 |   <script defer>
20 |     nlp.plugin(compromiseWikipedia)
21 |     let txt = document.querySelector('kbd').innerText
22 |     let doc = nlp(txt)
23 |     let json = doc.wikipedia().out('array')
24 |     document.querySelector('#res').innerHTML = JSON.stringify(json, null, 2)
25 |   </script>
26 | </body>
27 | 
28 | </html>


--------------------------------------------------------------------------------
/src/1-one/match/methods/match/03-getGroup.js:
--------------------------------------------------------------------------------
 1 | // support returning a subset of a match
 2 | // like 'foo [bar] baz' -> bar
 3 | const getGroup = function (res, group) {
 4 |   const ptrs = []
 5 |   const byGroup = {}
 6 |   if (res.length === 0) {
 7 |     return { ptrs, byGroup }
 8 |   }
 9 |   if (typeof group === 'number') {
10 |     group = String(group)
11 |   }
12 |   if (group) {
13 |     res.forEach(r => {
14 |       if (r.groups[group]) {
15 |         ptrs.push(r.groups[group])
16 |       }
17 |     })
18 |   } else {
19 |     res.forEach(r => {
20 |       ptrs.push(r.pointer)
21 |       Object.keys(r.groups).forEach(k => {
22 |         byGroup[k] = byGroup[k] || []
23 |         byGroup[k].push(r.groups[k])
24 |       })
25 |     })
26 |   }
27 |   return { ptrs, byGroup }
28 | }
29 | export default getGroup
30 | 


--------------------------------------------------------------------------------
/src/3-three/nouns/api/toJSON.js:
--------------------------------------------------------------------------------
 1 | import parseNoun from './parse.js'
 2 | 
 3 | const toText = m => m.text()
 4 | const toArray = m => m.json({ terms: false, normal: true }).map(s => s.normal)
 5 | 
 6 | const getNum = function (m) {
 7 |   const num = null
 8 |   if (!m.found) {
 9 |     return num
10 |   }
11 |   const val = m.values(0)
12 |   if (val.found) {
13 |     const obj = val.parse()[0] || {}
14 |     return obj.num
15 |   }
16 |   return num
17 | }
18 | 
19 | const toJSON = function (m) {
20 |   const res = parseNoun(m)
21 |   return {
22 |     root: toText(res.root),
23 |     number: getNum(res.number),
24 |     determiner: toText(res.determiner),
25 |     adjectives: toArray(res.adjectives),
26 |     isPlural: res.isPlural,
27 |     isSubordinate: res.isSubordinate,
28 |   }
29 | }
30 | export default toJSON
31 | 


--------------------------------------------------------------------------------
/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
 1 | # sends test-coverage data to codecov.io
 2 | # https://codecov.io/gh/spencermountain/compromise
 3 | name: Coverage
 4 | 
 5 | on:
 6 |   release:
 7 |     types: [created]
 8 | 
 9 | jobs:
10 |   getCoverage:
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       contents: read
14 |   
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |         with:
18 |           persist-credentials: false
19 | 
20 |       - uses: actions/setup-node@v4
21 |         with:
22 |           node-version: '20'
23 |           cache: npm
24 |           check-latest: true
25 | 
26 |       - run: npm ci
27 |       - run: npm i -g c8 codecov
28 |       - run: c8 -r lcov -n 'src/**/*' -n 'plugins/**/*' npm run test && codecov -t 15039ad1-b495-48cd-b4a0-bcf124c9b318
29 |       # - run: npm run codecov
30 | 


--------------------------------------------------------------------------------
/plugins/dates/demo/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 | 
 6 |   <script src="https://unpkg.com/compromise"></script>
 7 |   <!-- <script src="https://unpkg.com/compromise-dates"></script> -->
 8 |   <script src="../builds/compromise-dates.min.js"></script>
 9 | 
10 | </head>
11 | 
12 | <body>
13 |   <p>compromise-dates demo:</p>
14 |   <ul>
15 |     <kbd>lets meet in 32 days</kbd>
16 |     <h2 id="res">loading</h2>
17 |   </ul>
18 | 
19 |   <script defer>
20 |     nlp.plugin(compromiseDates) // window.compromiseDates
21 |     let txt = document.querySelector('kbd').innerText
22 |     let doc = nlp(txt)
23 |     let json = doc.dates().json()
24 |     let iso = json[0].dates.start
25 |     document.querySelector('#res').innerHTML = new Date(iso).toDateString()
26 |   </script>
27 | </body>
28 | 
29 | </html>


--------------------------------------------------------------------------------
/src/1-one/match/methods/match/steps/optional-match.js:
--------------------------------------------------------------------------------
 1 | import matchTerm from '../term/doesMatch.js'
 2 | 
 3 | // 'foo? foo' matches are tricky.
 4 | const foundOptional = function (state) {
 5 |   const { regs } = state
 6 |   const reg = regs[state.r]
 7 |   const term = state.terms[state.t]
 8 |   // does the next reg match it too?
 9 |   const nextRegMatched = matchTerm(term, regs[state.r + 1], state.start_i + state.t, state.phrase_length)
10 |   if (reg.negative || nextRegMatched) {
11 |     // but does the next reg match the next term??
12 |     // only skip if it doesn't
13 |     const nextTerm = state.terms[state.t + 1]
14 |     if (!nextTerm || !matchTerm(nextTerm, regs[state.r + 1], state.start_i + state.t, state.phrase_length)) {
15 |       state.r += 1
16 |     }
17 |   }
18 | }
19 | 
20 | export default foundOptional


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/format/toOrdinal/numOrdinal.js:
--------------------------------------------------------------------------------
 1 | import toString from '../../_toString.js'
 2 | 
 3 | /**
 4 |  * turn a number like 5 into an ordinal like 5th
 5 |  */
 6 | const numOrdinal = function (obj) {
 7 |   const num = obj.num
 8 |   if (!num && num !== 0) {
 9 |     return null
10 |   }
11 |   //the teens are all 'th'
12 |   const tens = num % 100
13 |   if (tens > 10 && tens < 20) {
14 |     return String(num) + 'th'
15 |   }
16 |   //the rest of 'em
17 |   const mapping = {
18 |     0: 'th',
19 |     1: 'st',
20 |     2: 'nd',
21 |     3: 'rd',
22 |   }
23 |   let str = toString(num)
24 |   const last = str.slice(str.length - 1, str.length)
25 |   if (mapping[last]) {
26 |     str += mapping[last]
27 |   } else {
28 |     str += 'th'
29 |   }
30 |   return str
31 | }
32 | 
33 | export default numOrdinal
34 | 


--------------------------------------------------------------------------------
/plugins/_experiments/cmd-k/src/slashCmd.js:
--------------------------------------------------------------------------------
 1 | 
 2 | // slashCmds are / followed by a word
 3 | // they're a way to add custom commands
 4 | // "/me writes some bugs"
 5 | 
 6 | const slashCmd = {
 7 |   /** add a method */
 8 |   api: (View) => {
 9 |     View.prototype.slashCmds = function () {
10 |       return this.matchOne('#SlashCmd+').text('normal')
11 |     }
12 |   },
13 | 
14 | 
15 |   /** add some tags */
16 |   tags: {
17 |     SlashCmd: {
18 |       notA: ['Noun', 'Verb', 'Adjective'],
19 |       color: 'yellow'
20 |     },
21 |   },
22 | 
23 | 
24 |   /** post-process tagger */
25 |   compute: {
26 |     tagSlashCmds: (doc) => {
27 |       doc.match([{ pre: '/' }]).not('#Number').tag('#SlashCmd')
28 |     }
29 |   },
30 | 
31 | 
32 |   /** run it on init */
33 |   hooks: ['tagSlashCmds']
34 | }
35 | export default slashCmd


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/parse/toNumber/findModifiers.js:
--------------------------------------------------------------------------------
 1 | //support global multipliers, like 'half-million' by doing 'million' then multiplying by 0.5
 2 | const findModifiers = str => {
 3 |   const mults = [
 4 |     {
 5 |       reg: /^(minus|negative)[\s-]/i,
 6 |       mult: -1,
 7 |     },
 8 |     {
 9 |       reg: /^(a\s)?half[\s-](of\s)?/i,
10 |       mult: 0.5,
11 |     },
12 |     //  {
13 |     //   reg: /^(a\s)?quarter[\s\-]/i,
14 |     //   mult: 0.25
15 |     // }
16 |   ]
17 |   for (let i = 0; i < mults.length; i++) {
18 |     if (mults[i].reg.test(str) === true) {
19 |       return {
20 |         amount: mults[i].mult,
21 |         str: str.replace(mults[i].reg, ''),
22 |       }
23 |     }
24 |   }
25 |   return {
26 |     amount: 1,
27 |     str: str,
28 |   }
29 | }
30 | 
31 | export default findModifiers
32 | 


--------------------------------------------------------------------------------
/data/lexicon/switches/actor-verb.js:
--------------------------------------------------------------------------------
 1 | // actor like 'the coach' or verb like 'coach a team'
 2 | // use noun-verb for sometimes-actors, like 'target', or 'star'
 3 | export default [
 4 |   'addict',
 5 |   'architect',
 6 |   'advocate',
 7 |   'affiliate',
 8 |   'bitch',
 9 |   'bully',
10 |   'boss',
11 |   'champion',
12 |   'chauffeur',
13 |   'coach',
14 |   'cook',
15 |   'doctor',
16 |   'butcher',
17 |   'delegate',
18 |   'engineer',
19 |   'fool',
20 |   'geek',
21 |   'goof',
22 |   'graduate',
23 |   'groom',
24 |   'guide',
25 |   'host',
26 |   'judge',
27 |   'man',
28 |   'mime',
29 |   'master',
30 |   'nerd',
31 |   'parent',
32 |   'pilot',
33 |   'recruit',
34 |   'scout',
35 |   'sponsor',
36 |   'spy',
37 |   'suspect',
38 |   'usher',
39 |   'volunteer',
40 |   'conscript',
41 |   'wimp',
42 |   'witness',
43 | ]
44 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/methods/transform/nouns/toSingular/index.js:
--------------------------------------------------------------------------------
 1 | import rules from './_rules.js'
 2 | const invertObj = function (obj) {
 3 |   return Object.keys(obj).reduce((h, k) => {
 4 |     h[obj[k]] = k
 5 |     return h
 6 |   }, {})
 7 | }
 8 | 
 9 | const toSingular = function (str, model) {
10 |   const { irregularPlurals } = model.two
11 |   const invert = invertObj(irregularPlurals) //(not very efficient)
12 |   // check irregulars list
13 |   if (invert.hasOwnProperty(str)) {
14 |     return invert[str]
15 |   }
16 |   // go through our regexes
17 |   for (let i = 0; i < rules.length; i++) {
18 |     if (rules[i][0].test(str) === true) {
19 |       // console.log(rules[i])
20 |       str = str.replace(rules[i][0], rules[i][1])
21 |       return str
22 |     }
23 |   }
24 |   return str
25 | }
26 | export default toSingular
27 | 


--------------------------------------------------------------------------------
/tests/three/subsets.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | const here = '[three/subset] '
 4 | 
 5 | test('match shorthand:', function (t) {
 6 |   let doc = nlp('the cute and shortest')
 7 |   t.equal(doc.adjectives('#Superlative').text(), 'shortest', here + '.adj()')
 8 | 
 9 |   doc = nlp('spencer can. jamie cannot.')
10 |   t.equal(doc.nouns('jamie').text(), 'jamie', here + '.nouns()')
11 | 
12 |   doc = nlp('spencer and jamie')
13 |   t.equal(doc.people('!jamie').text(), 'spencer', here + '.people()')
14 | 
15 |   doc = nlp('i must walk but i am scared')
16 |   t.equal(doc.verbs('must').text(), 'must walk', here + '.verbs()')
17 | 
18 |   doc = nlp('i toronto but not hamilton Jamaica')
19 |   t.equal(doc.places('#Country').text(), 'hamilton Jamaica', here + '.places()')
20 | 
21 | 
22 |   t.end()
23 | })


--------------------------------------------------------------------------------
/src/1-one/tokenize/methods/parse.js:
--------------------------------------------------------------------------------
 1 | import normalize from '../compute/normal/index.js'
 2 | 
 3 | // turn a string input into a 'document' json format
 4 | const parse = function (input, world) {
 5 |   const { methods, model } = world
 6 |   const { splitSentences, splitTerms, splitWhitespace } = methods.one.tokenize
 7 |   input = input || ''
 8 |   // split into sentences
 9 |   const sentences = splitSentences(input, world)
10 |   // split into word objects
11 |   input = sentences.map((txt) => {
12 |     let terms = splitTerms(txt, model)
13 |     // split into [pre-text-post]
14 |     terms = terms.map(t => splitWhitespace(t, model))
15 |     // add normalized term format, always
16 |     terms.forEach((t) => {
17 |       normalize(t, world)
18 |     })
19 |     return terms
20 |   })
21 |   return input
22 | }
23 | export default parse


--------------------------------------------------------------------------------
/src/1-one/match/methods/parseMatch/03-splitHyphens.js:
--------------------------------------------------------------------------------
 1 | const hasDash = /[a-z0-9][-–—][a-z]/i
 2 | 
 3 | // match 're-do' -> ['re','do']
 4 | const splitHyphens = function (regs, world) {
 5 |   const prefixes = world.model.one.prefixes
 6 |   for (let i = regs.length - 1; i >= 0; i -= 1) {
 7 |     const reg = regs[i]
 8 |     if (reg.word && hasDash.test(reg.word)) {
 9 |       let words = reg.word.split(/[-–—]/g)
10 |       // don't split 're-cycle', etc
11 |       if (prefixes.hasOwnProperty(words[0])) {
12 |         continue
13 |       }
14 |       words = words.filter(w => w).reverse()
15 |       regs.splice(i, 1)
16 |       words.forEach(w => {
17 |         const obj = Object.assign({}, reg)
18 |         obj.word = w
19 |         regs.splice(i, 0, obj)
20 |       })
21 |     }
22 |   }
23 |   return regs
24 | }
25 | export default splitHyphens


--------------------------------------------------------------------------------
/tests/one/match/sweep-not.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[one/sweep] '
 4 | 
 5 | test('sweep-not:', function (t) {
 6 |   let doc = nlp('The service is fast really')
 7 |   let net = nlp.buildNet([{ match: 'is fast .', notIf: 'psych' }])
 8 |   let m = doc.match(net)
 9 |   t.equal(m.text(), 'is fast really', here + 'no-psych')
10 | 
11 |   doc = nlp('The service is fast psych')
12 |   net = nlp.buildNet([{ match: 'is fast .', notIf: 'psych' }])
13 |   m = doc.match(net)
14 |   t.equal(m.text(), '', here + 'psych-found')
15 | 
16 |   doc = nlp('i swim in the lake and walk in the road')
17 |   net = nlp.buildNet([{ match: 'i (swim|walk) in the .', notIf: 'in the (park|lake)' }])
18 |   m = doc.match(net)
19 |   t.equal(m.text(), '', here + 'notIf optional')
20 | 
21 |   t.end()
22 | })
23 | 
24 | 


--------------------------------------------------------------------------------
/plugins/wikipedia/scripts/stat.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import sh from 'shelljs'
 3 | import fs from 'fs'
 4 | 
 5 | import conf from '../config.js'
 6 | const { lang, project } = conf
 7 | const file = `./files/${lang}.${project}-pageviews.json`
 8 | // const file = './files/pageviews.tsv'
 9 | 
10 | const round = n => Math.round(n * 10) / 10
11 | 
12 | const fileSize = (pathStr) => {
13 |   const kb = fs.statSync(pathStr).size / 1024
14 |   const num = round(kb / 1000)
15 |   return num.toLocaleString() + 'mb'
16 | }
17 | 
18 | console.log('article count ( lines):')
19 | 
20 | //raw: 40,043,607
21 | //filtered: 1,049,500
22 | const { stdout } = sh.exec(`wc -l ${file}`, { silent: true })
23 | const lines = Number(stdout.split(/\W/)[1]).toLocaleString()
24 | 
25 | console.log('lines', lines)
26 | console.log('size', fileSize(file))
27 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/abbreviations/units.js:
--------------------------------------------------------------------------------
 1 | // units that are abbreviations too
 2 | export default [
 3 |   'dl',
 4 |   'ml',
 5 |   'gal',
 6 |   // 'ft', //ambiguous
 7 |   'qt',
 8 |   'pt',
 9 |   'tbl',
10 |   'tsp',
11 |   'tbsp',
12 |   'km',
13 |   'dm', //decimeter
14 |   'cm',
15 |   'mm',
16 |   'mi',
17 |   'td',
18 |   'hr', //hour
19 |   'hrs', //hour
20 |   'kg',
21 |   'hg',
22 |   'dg', //decigram
23 |   'cg', //centigram
24 |   'mg', //milligram
25 |   'µg', //microgram
26 |   'lb', //pound
27 |   'oz', //ounce
28 |   'sq ft',
29 |   'hz', //hertz
30 |   'mps', //meters per second
31 |   'mph',
32 |   'kmph', //kilometers per hour
33 |   'kb', //kilobyte
34 |   'mb', //megabyte
35 |   // 'gb', //ambig
36 |   'tb', //terabyte
37 |   'lx', //lux
38 |   'lm', //lumen
39 |   // 'pa', //ambig
40 |   'fl oz', //
41 |   'yb',
42 | ]
43 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/index.js:
--------------------------------------------------------------------------------
 1 | import alias from './alias.js'
 2 | import normal from './normal/index.js'
 3 | import machine from './machine.js'
 4 | import freq from './freq.js'
 5 | import offset from './offset.js'
 6 | import index from './reindex.js'
 7 | import wordCount from './wordCount.js'
 8 | 
 9 | // cheat-method for a quick loop
10 | const termLoop = function (view, fn) {
11 |   const docs = view.docs
12 |   for (let i = 0; i < docs.length; i += 1) {
13 |     for (let t = 0; t < docs[i].length; t += 1) {
14 |       fn(docs[i][t], view.world)
15 |     }
16 |   }
17 | }
18 | 
19 | const methods = {
20 |   alias: (view) => termLoop(view, alias),
21 |   machine: (view) => termLoop(view, machine),
22 |   normal: (view) => termLoop(view, normal),
23 |   freq,
24 |   offset,
25 |   index,
26 |   wordCount,
27 | }
28 | export default methods
29 | 


--------------------------------------------------------------------------------
/plugins/_experiments/markdown/src/plugin.js:
--------------------------------------------------------------------------------
 1 | // import { convertToHtml, parseHtml, printHtml } from './html/index.js'
 2 | // import { convertToMd, parseMd, printMd } from './md/index.js'
 3 | // import { fromMarkdown } from 'mdast-util-from-markdown'
 4 | import parse from './parse/index.js'
 5 | 
 6 | import toPlaintexts from './parse/toPlaintext.js'
 7 | import { visit } from 'unist-util-visit'
 8 | 
 9 | const cleanup = function (tree) {
10 |   let n = 0
11 |   visit(tree, null, (node) => {
12 |     node.id = String(n)
13 |     n += 1
14 |     delete node.position
15 |   })
16 |   return tree
17 | }
18 | 
19 | 
20 | export default {
21 |   lib: {
22 |     fromMarkdown: function (md = '') {
23 |       let tree = parse(md)
24 |       tree = cleanup(tree)
25 |       // console.dir(tree, { depth: 8 })
26 |       return toPlaintexts(tree)
27 |     }
28 |   }
29 | }


--------------------------------------------------------------------------------
/src/3-three/nouns/api/isPlural.js:
--------------------------------------------------------------------------------
 1 | const notPlural = '(#Pronoun|#Place|#Value|#Person|#Uncountable|#Month|#WeekDay|#Holiday|#Possessive)'
 2 | 
 3 | const isPlural = function (m, root) {
 4 |   // const { looksPlural } = m.world.methods.two
 5 |   if (m.has('#Plural')) {
 6 |     return true
 7 |   }
 8 |   // two singular nouns are plural noun phrase
 9 |   if (m.has('#Noun and #Noun')) {
10 |     return true
11 |   }
12 |   if (m.has('(we|they)')) {
13 |     return true
14 |   }
15 |   // these can't be plural
16 |   if (root.has(notPlural) === true) {
17 |     return false
18 |   }
19 |   if (m.has('#Singular')) {
20 |     return false
21 |   }
22 |   // word-reg fallback
23 |   const str = root.text('normal')
24 |   // ends with a brutal s fallback
25 |   return str.length > 3 && str.endsWith('s') && !str.endsWith('ss')
26 | }
27 | export default isPlural
28 | 


--------------------------------------------------------------------------------
/tests/three/numbers/backlog/agreement.ignore.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../../_lib.js'
 3 | const here = '[three/number-agreement] '
 4 | 
 5 | test('misc agreement', function (t) {
 6 |   let doc = nlp('i ate 7 kilos of fruit')
 7 |     .numbers()
 8 |     .units()
 9 |   t.equal(doc.text('trim'), 'kilos', here + 'found unit')
10 | 
11 |   doc = nlp('i ate 7 of them, kilos are kilograms')
12 |     .numbers()
13 |     .units()
14 |   t.equal(doc.text('trim'), '', here + 'found no unit')
15 | 
16 |   t.end()
17 | })
18 | 
19 | test('ordinal agreement', function (t) {
20 |   const doc = nlp('seventeen beers')
21 |   doc.values().toOrdinal()
22 |   t.equal(doc.text(), 'seventeenth beer', here + 'ord-agreement')
23 | 
24 |   doc.values().toCardinal()
25 |   t.equal(doc.text(), 'seventeen beers', here + 'card-agreement')
26 |   t.end()
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/two/misc/confidence.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[two/confidence] '
 4 | 
 5 | test('confidence', function (t) {
 6 |   const arr = [
 7 |     ['', 1],
 8 |     ['asdfasdf', 0.1],
 9 |     ['google', 1],
10 |     ['jlcekehj is', 0.6],
11 |     ['yelpily good', 0.85],
12 | 
13 |     // [ 'Striking revenue workers threaten gherao',null]
14 |     // [ 'Madhuri goes dhak-dhak again',null]
15 |     // [ `ACF's development committee meets`,null]
16 |     // [ 'State govt gives HR panel office space',null]
17 |   ]
18 |   arr.forEach(a => {
19 |     const [str, score] = a
20 |     t.equal(nlp(str).confidence(), score, here + str)
21 |   })
22 | 
23 |   const json = nlp('errerum esto lominae').json({ confidence: true })[0]
24 |   t.equal(json.confidence, 0.1, 'confidence in json')
25 |   t.end()
26 | })
27 | 


--------------------------------------------------------------------------------
/plugins/speech/src/api.js:
--------------------------------------------------------------------------------
 1 | const api = function (View) {
 2 |   /** */
 3 |   View.prototype.syllables = function () {
 4 |     this.compute('syllables')
 5 |     const all = []
 6 |     this.docs.forEach(terms => {
 7 |       let some = []
 8 |       terms.forEach(term => {
 9 |         some = some.concat(term.syllables)
10 |       })
11 |       if (some.length > 0) {
12 |         all.push(some)
13 |       }
14 |     })
15 |     return all
16 |   }
17 |   /** */
18 |   View.prototype.soundsLike = function () {
19 |     this.compute('soundsLike')
20 |     const all = []
21 |     this.docs.forEach(terms => {
22 |       let some = []
23 |       terms.forEach(term => {
24 |         some = some.concat(term.soundsLike)
25 |       })
26 |       if (some.length > 0) {
27 |         all.push(some)
28 |       }
29 |     })
30 |     return all
31 |   }
32 | }
33 | export default api


--------------------------------------------------------------------------------
/src/1-one/contraction-one/compute/contractions/number-range.js:
--------------------------------------------------------------------------------
 1 | const isRange = /^([0-9.]{1,4}[a-z]{0,2}) ?[-–—] ?([0-9]{1,4}[a-z]{0,2})$/i
 2 | const timeRange = /^([0-9]{1,2}(:[0-9][0-9])?(am|pm)?) ?[-–—] ?([0-9]{1,2}(:[0-9][0-9])?(am|pm)?)$/i
 3 | const phoneNum = /^[0-9]{3}-[0-9]{4}$/
 4 | 
 5 | const numberRange = function (terms, i) {
 6 |   const term = terms[i]
 7 |   let parts = term.text.match(isRange)
 8 |   if (parts !== null) {
 9 |     // 123-1234 is a phone number, not a number-range
10 |     if (term.tags.has('PhoneNumber') === true || phoneNum.test(term.text)) {
11 |       return null
12 |     }
13 |     return [parts[1], 'to', parts[2]]
14 |   } else {
15 |     parts = term.text.match(timeRange)
16 |     if (parts !== null) {
17 |       return [parts[1], 'to', parts[4]]
18 |     }
19 |   }
20 |   return null
21 | }
22 | export default numberRange
23 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/model/prefixes.js:
--------------------------------------------------------------------------------
 1 | // dashed prefixes that are not independent words
 2 | //  'mid-century', 'pre-history'
 3 | export default [
 4 |   'anti',
 5 |   'bi',
 6 |   'co',
 7 |   'contra',
 8 |   'de',
 9 |   'extra',
10 |   'infra',
11 |   'inter',
12 |   'intra',
13 |   'macro',
14 |   'micro',
15 |   'mis',
16 |   'mono',
17 |   'multi',
18 |   'peri',
19 |   'pre',
20 |   'pro',
21 |   'proto',
22 |   'pseudo',
23 |   're',
24 |   'sub',
25 |   'supra',
26 |   'trans',
27 |   'tri',
28 |   'un',
29 |   'out', //out-lived
30 |   'ex',//ex-wife
31 | 
32 |   // 'counter',
33 |   // 'mid',
34 |   // 'out',
35 |   // 'non',
36 |   // 'over',
37 |   // 'post',
38 |   // 'semi',
39 |   // 'super', //'super-cool'
40 |   // 'ultra', //'ulta-cool'
41 |   // 'under',
42 |   // 'whole',
43 | ].reduce((h, str) => {
44 |   h[str] = true
45 |   return h
46 | }, {})


--------------------------------------------------------------------------------
/src/4-four/sense/model/senses/verb.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   plug: {
 3 |     sell: { words: ['book', 'biography', 'album'] },
 4 |     stop: { fallback: true, words: ['drain', 'sink', 'pipe'] },
 5 |   },
 6 |   strike: {
 7 |     hit: { fallback: true },
 8 |     protest: { words: ['job', 'union', 'worker'] },
 9 |   },
10 |   charge: {
11 |     money: { words: ['fee', 'bank', 'price', 'service'] },
12 |     run: { words: ['toward', 'run', 'flee'] },
13 |   },
14 |   fire: {
15 |     job: { words: ['job', 'boss', 'contract'] },
16 |     gun: { words: ['gun', 'weapon', 'bullet', 'away'] },
17 |   },
18 |   trip: {
19 |     drug: { words: ['lsd', 'acid'] },
20 |     fall: { fallback: true, words: ['stumble', 'hurt'] },
21 |   },
22 |   tie: {
23 |     knot: { words: ['bow', 'rope', 'lace'] },
24 |     game: { words: ['point', 'score', 'match'] },
25 |   },
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/one/miss.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | const here = '[one/miss] '
 4 | 
 5 | const arr = [
 6 |   // no tags
 7 |   // [`toronto`, '#City'],
 8 |   // [`i went to Toronto`, '#Noun'],
 9 |   // // no chunks
10 |   // [`toronto`, '<Noun>'],
11 |   // [`i went to Toronto`, '<Noun>'],
12 |   // min-length
13 |   ['mexico', '.{2}'],
14 |   ['mexico', '.{2,3}'],
15 |   //word-word
16 |   ['mexico city', 'foo city'],
17 |   ['mexico city', 'city foo'],
18 |   ['mexico city', 'city .'],
19 |   ['mexico city', 'mexico city .'],
20 |   ['mexico city', '. mexico city'],
21 | ]
22 | 
23 | test('no-match:', function (t) {
24 |   arr.forEach(function (a) {
25 |     const doc = nlp(a[0])
26 |     const msg = `'${(a[0] + "' ").padEnd(20, '.')}  - '${a[1]}'`
27 |     t.equal(doc.has(a[1]), false, here + msg)
28 |   })
29 |   t.end()
30 | })
31 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/normalize.js:
--------------------------------------------------------------------------------
 1 | const normalize = function (doc) {
 2 | 
 3 |   if (!doc.numbers) {
 4 |     console.warn(`\nCompromise warning: compromise/three must be used with compromise-dates plugin\n`) // eslint-disable-line
 5 |   }
 6 | 
 7 |   // normalize doc
 8 |   doc = doc.clone()
 9 |   doc.numbers().toNumber()
10 | 
11 |   // expand 'aug 20-21'
12 |   doc.contractions().expand()
13 | 
14 |   // 'week-end'
15 |   doc.replace('week end', 'weekend', true).tag('Date')
16 |   // 'a up to b'
17 |   doc.replace('up to', 'upto', true).tag('Date')
18 |   // 'a year ago'
19 |   if (doc.has('once (a|an) #Duration') === false) {
20 |     doc.match('[(a|an)] #Duration', 0).replaceWith('1', { tags: true }).compute('lexicon')
21 |   }
22 |   // jan - feb
23 |   doc.match('@hasDash').insertAfter('to').tag('Date')
24 |   return doc
25 | }
26 | 
27 | export default normalize


--------------------------------------------------------------------------------
/src/1-one/match/methods/match/steps/contraction-skip.js:
--------------------------------------------------------------------------------
 1 | // for: ['we', 'have']
 2 | // a match for "we have" should work as normal
 3 | // but matching "we've" should skip over implict terms
 4 | const contractionSkip = function (state) {
 5 |   const term = state.terms[state.t]
 6 |   const reg = state.regs[state.r]
 7 |   // did we match the first part of a contraction?
 8 |   if (term.implicit && state.terms[state.t + 1]) {
 9 |     const nextTerm = state.terms[state.t + 1]
10 |     // ensure next word is implicit
11 |     if (!nextTerm.implicit) {
12 |       return
13 |     }
14 |     // we matched "we've" - skip-over [we, have]
15 |     if (reg.word === term.normal) {
16 |       state.t += 1
17 |     }
18 |     // also skip for @hasContraction
19 |     if (reg.method === 'hasContraction') {
20 |       state.t += 1
21 |     }
22 |   }
23 | }
24 | export default contractionSkip


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/format/index.js:
--------------------------------------------------------------------------------
 1 | import numOrdinal from './toOrdinal/numOrdinal.js'
 2 | import textOrdinal from './toOrdinal/textOrdinal.js'
 3 | import textCardinal from './toText/index.js'
 4 | import makeSuffix from './suffix.js'
 5 | 
 6 | const format = function (obj, fmt) {
 7 |   if (fmt === 'TextOrdinal') {
 8 |     const { prefix, suffix } = makeSuffix(obj)
 9 |     return prefix + textOrdinal(obj) + suffix
10 |   }
11 |   if (fmt === 'Ordinal') {
12 |     return obj.prefix + numOrdinal(obj) + obj.suffix
13 |   }
14 |   if (fmt === 'TextCardinal') {
15 |     const { prefix, suffix } = makeSuffix(obj)
16 |     return prefix + textCardinal(obj) + suffix
17 |   }
18 |   // assume Cardinal
19 |   let num = obj.num
20 |   if (obj.hasComma) {
21 |     num = num.toLocaleString()
22 |   }
23 |   return obj.prefix + String(num) + obj.suffix
24 | }
25 | export default format


--------------------------------------------------------------------------------
/src/4-four/sense/model/senses/adjective.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   ill: {
 3 |     sick: { fallback: true, words: ['sick', 'flu', 'symptom'] },
 4 |     good: { words: ['sweet', 'trick'] },
 5 |   },
 6 |   prime: {
 7 |     good: { fallback: true, words: ['location'] },
 8 |     number: { words: ['digit', 'factor'] },
 9 |   },
10 |   high: {
11 |     up: { fallback: true, words: ['above', 'over'] },
12 |     drugs: { words: ['upper', 'pot', 'dope', 'drug', 'addict', 'addiction'] },
13 |   },
14 |   sick: {
15 |     ill: { fallback: true, words: ['doctor', 'flu', 'symptom'] },
16 |     good: { words: ['sweet', 'trick'] },
17 |   },
18 |   cold: {
19 |     temperature: { fallback: true, words: ['winter', 'thermometer', 'thermostat', 'air', 'freeze', 'freezing'] },
20 |     attitude: { words: ['shoulder', 'uncaring', 'aloof', 'mean', 'attitude', 'demeanor'] },
21 |   },
22 | }
23 | 


--------------------------------------------------------------------------------
/src/3-three/chunker/compute/05-fixUp.js:
--------------------------------------------------------------------------------
 1 | const fixUp = function (docs) {
 2 |   const byChunk = []
 3 |   let current = null
 4 |   docs.forEach(terms => {
 5 |     // ensure an adjective chunk is preceded by a copula
 6 |     for (let i = 0; i < terms.length; i += 1) {
 7 |       const term = terms[i]
 8 |       if (current && term.chunk === current) {
 9 |         byChunk[byChunk.length - 1].terms.push(term)
10 |       } else {
11 |         byChunk.push({ chunk: term.chunk, terms: [term] })
12 |         current = term.chunk
13 |       }
14 |     }
15 |   })
16 |   // ensure every verb-phrase actually has a verb
17 |   byChunk.forEach(c => {
18 |     if (c.chunk === 'Verb') {
19 |       const hasVerb = c.terms.find(t => t.tags.has('Verb'))
20 |       if (!hasVerb) {
21 |         c.terms.forEach(t => t.chunk = null)
22 |       }
23 |     }
24 |   })
25 | }
26 | export default fixUp
27 | 


--------------------------------------------------------------------------------
/tests/two/misc/canBe.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[two/canBe] '
 4 | 
 5 | test('canBe', function (t) {
 6 |   const doc = nlp(`spencer was going crazy. He walks quickly.`)
 7 | 
 8 |   const canBeNoun = doc.canBe('Noun')
 9 |   t.equal(canBeNoun.length, 2, here + 'two results')
10 |   t.equal(canBeNoun.terms(0).text('normal'), 'spencer', here + 'first result')
11 |   t.equal(canBeNoun.terms(1).text(), 'He', here + 'first result')
12 | 
13 |   const canBeVerb = nlp('spencer kelly').canBe('Verb')
14 |   t.equal(canBeVerb.length, 0, here + 'no results')
15 | 
16 |   const canBeMisc = nlp('spencer kelly').canBe('asdf')
17 |   t.equal(canBeMisc.length, 1, here + 'all results are one')
18 | 
19 | 
20 |   const found = nlp("Moe Sizlak.").terms().canBe('#Verb').found
21 |   t.equal(found, false, here + 'no verb')
22 |   t.end()
23 | })
24 | 


--------------------------------------------------------------------------------
/plugins/dates/src/api/parse/one/02-parse/02-holidays.js:
--------------------------------------------------------------------------------
 1 | import { Holiday } from '../units/index.js'
 2 | import spacetimeHoliday from 'spacetime-holiday'
 3 | 
 4 | const parseHoliday = function (doc, context) {
 5 |   let unit = null
 6 |   const m = doc.match('[<holiday>#Holiday+] [<year>#Year?]')
 7 |   let year = context.today.year()
 8 |   if (m.groups('year').found) {
 9 |     year = Number(m.groups('year').text('reduced')) || year
10 |   }
11 |   const str = m.groups('holiday').text('reduced')
12 |   let s = spacetimeHoliday(str, year, context.timezone)
13 |   if (s !== null) {
14 |     // assume the year in the future..
15 |     if (s.isBefore(context.today) && year === context.today.year()) {
16 |       s = spacetimeHoliday(str, year + 1, context.timezone)
17 |     }
18 |     unit = new Holiday(s, null, context)
19 |   }
20 |   return unit
21 | }
22 | export default parseHoliday
23 | 


--------------------------------------------------------------------------------
/src/1-one/lookup/api/buildTrie/compress.js:
--------------------------------------------------------------------------------
 1 | // chop-off tail of redundant vals at end of array
 2 | const truncate = (list, val) => {
 3 |   for (let i = list.length - 1; i >= 0; i -= 1) {
 4 |     if (list[i] !== val) {
 5 |       list = list.slice(0, i + 1)
 6 |       return list
 7 |     }
 8 |   }
 9 |   return list
10 | }
11 | 
12 | // prune trie a bit
13 | const compress = function (trie) {
14 |   trie.goNext = trie.goNext.map(o => {
15 |     if (Object.keys(o).length === 0) {
16 |       return undefined
17 |     }
18 |     return o
19 |   })
20 |   // chop-off tail of undefined vals in goNext array
21 |   trie.goNext = truncate(trie.goNext, undefined)
22 |   // chop-off tail of zeros in failTo array
23 |   trie.failTo = truncate(trie.failTo, 0)
24 |   // chop-off tail of nulls in endAs array
25 |   trie.endAs = truncate(trie.endAs, null)
26 |   return trie
27 | }
28 | export default compress


--------------------------------------------------------------------------------
/tests/three/verbs/isplural.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[three/verb-isPlural] '
 4 | 
 5 | test('plural-verbs:', function (t) {
 6 |   let r = nlp('i look. Spencer looks.')
 7 |   let len = r.verbs().isPlural().length
 8 |   t.equal(len, 0, here + '0 singular')
 9 | 
10 |   r = nlp('we look at it. They report on it')
11 |   len = r.verbs().isPlural().length
12 |   t.equal(len, 2, here + 'they plural')
13 | 
14 |   r = nlp('lkjsdf are cool')
15 |   let str = r.verbs().isPlural().out('normal')
16 |   t.equal(str, 'are', here + 'are plural')
17 | 
18 |   r = nlp('lkjsdf does eat bugs')
19 |   str = r.verbs().isPlural().out('normal')
20 |   t.equal(str, 'does eat', here + 'does plural')
21 | 
22 |   r = nlp('lkjsdf is cool')
23 |   str = r.verbs().isPlural().out('normal')
24 |   t.equal(str, '', here + 'is singular')
25 |   t.end()
26 | })
27 | 


--------------------------------------------------------------------------------
/plugins/speech/src/compute/soundsLike/metaphone.js:
--------------------------------------------------------------------------------
 1 | //a js version of the metaphone (#1) algorithm
 2 | //adapted from the work of Chris Umbel
 3 | // https://github.com/NaturalNode/natural/blob/master/lib/natural/phonetics/metaphone.js
 4 | 
 5 | import m from './transformations.js'
 6 | 
 7 | const metaphone = function (s) {
 8 |   s = m.dedup(s)
 9 |   s = m.dropInitialLetters(s)
10 |   s = m.dropBafterMAtEnd(s)
11 |   s = m.changeCK(s)
12 |   s = m.cchange(s)
13 |   s = m.dchange(s)
14 |   s = m.dropG(s)
15 |   s = m.changeG(s)
16 |   s = m.dropH(s)
17 |   s = m.changePH(s)
18 |   s = m.changeQ(s)
19 |   s = m.changeS(s)
20 |   s = m.changeX(s)
21 |   s = m.changeT(s)
22 |   s = m.dropT(s)
23 |   s = m.changeV(s)
24 |   s = m.changeWH(s)
25 |   s = m.dropW(s)
26 |   s = m.dropY(s)
27 |   s = m.changeZ(s)
28 |   s = m.dropVowels(s)
29 |   return s.trim()
30 | }
31 | 
32 | export default metaphone
33 | 


--------------------------------------------------------------------------------
/plugins/speed/tests/stream.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | import fs from 'fs'
 4 | import path from 'path'
 5 | import { streamFile } from '../src/plugin.js'
 6 | nlp.plugin(streamFile)
 7 | 
 8 | 
 9 | import { fileURLToPath } from 'url'
10 | const dir = path.dirname(fileURLToPath(import.meta.url))
11 | 
12 | const file = path.join(dir, `./files/freshPrince.txt`)
13 | 
14 | test('stream the whole document', function (t) {
15 |   const want = fs.readFileSync(file).toString()
16 |   nlp.streamFile(file, (s) => {
17 |     return s.match('.')
18 |   }).then(doc => {
19 |     t.equal(doc.text(), want, 'full-text')
20 |     t.end()
21 |   })
22 | })
23 | 
24 | test('return no matches', function (t) {
25 |   nlp.streamFile(file, (s) => {
26 |     return s.match('coconut')
27 |   }).then(doc => {
28 |     t.equal(doc.text(), '', 'no-text')
29 |     t.end()
30 |   })
31 | })


--------------------------------------------------------------------------------
/tests/three/verbs/phrasal.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[three/phrasal]'
 4 | 
 5 | test('phrasal-verbs:', function (t) {
 6 |   const arr = [
 7 |     [`he is really good`, ['he', 'is', 'really', 'good']],
 8 |     [`he is upset about it`, ['he', 'is', 'upset', 'about', 'it']],
 9 |     [`he will mess about with it`, ['he', 'will', 'mess about', 'with', 'it']],
10 | 
11 |     [`come forward`, ['come forward']],
12 |     [`come together`, ['come together']],
13 |     [`come apart`, ['come apart']],
14 | 
15 |     [`frighten back`, ['frighten', 'back']],
16 |     [`frighten away`, ['frighten away']],
17 |   ]
18 |   arr.forEach(function (a) {
19 |     const terms = nlp(a[0]).out('array')
20 |     const msg = terms.join(' ') + '  -- ' + a[1].join(' ')
21 |     t.equal(terms.join(' '), a[1].join(' '), here + msg)
22 |   })
23 |   t.end()
24 | })
25 | 


--------------------------------------------------------------------------------
/src/2-two/postTagger/compute/index.js:
--------------------------------------------------------------------------------
 1 | let net = null
 2 | 
 3 | // runs all match/tag patterns in model.two.matches
 4 | const postTagger = function (view) {
 5 |   const { world } = view
 6 |   const { model, methods } = world
 7 |   net = net || methods.one.buildNet(model.two.matches, world)
 8 |   // perform these matches on a comma-seperated document
 9 |   const document = methods.two.quickSplit(view.document)
10 |   const ptrs = document.map(terms => {
11 |     const t = terms[0]
12 |     return [t.index[0], t.index[1], t.index[1] + terms.length]
13 |   })
14 |   const m = view.update(ptrs)
15 |   m.cache()
16 |   m.sweep(net)
17 |   view.uncache()
18 |   view.unfreeze()
19 |   return view
20 | }
21 | 
22 | // helper function for compute('tagger')
23 | const tagger = view => view.compute(['freeze', 'lexicon', 'preTagger', 'postTagger', 'unfreeze'])
24 | 
25 | export default { postTagger, tagger }
26 | 


--------------------------------------------------------------------------------
/src/2-two/preTagger/tagSet/dates.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   Date: {
 3 |     not: ['Verb', 'Adverb', 'Adjective'],
 4 |   },
 5 |   Month: {
 6 |     is: 'Date',
 7 |     also: ['Noun'],
 8 |     not: ['Year', 'WeekDay', 'Time'],
 9 |   },
10 |   WeekDay: {
11 |     is: 'Date',
12 |     also: ['Noun'],
13 |   },
14 |   Year: {
15 |     is: 'Date',
16 |     not: ['RomanNumeral'],
17 |   },
18 |   FinancialQuarter: {
19 |     is: 'Date',
20 |     not: 'Fraction',
21 |   },
22 |   // 'easter'
23 |   Holiday: {
24 |     is: 'Date',
25 |     also: ['Noun'],
26 |   },
27 |   // 'summer'
28 |   Season: {
29 |     is: 'Date',
30 |   },
31 |   Timezone: {
32 |     is: 'Date',
33 |     also: ['Noun'],
34 |     not: ['ProperNoun'],
35 |   },
36 |   Time: {
37 |     is: 'Date',
38 |     not: ['AtMention'],
39 |   },
40 |   // 'months'
41 |   Duration: {
42 |     is: 'Date',
43 |     also: ['Noun'],
44 |   },
45 | }
46 | 


--------------------------------------------------------------------------------
/plugins/stats/scripts/generate.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | import corpus from 'nlp-corpus'
 3 | import fs from 'fs'
 4 | import nlp from '../../../src/three.js'
 5 | import plugin from '../src/plugin.js'
 6 | nlp.extend(plugin)
 7 | import idf from '../src/tfidf/idf.js'
 8 | import zipUp from './pack.js'
 9 | 
10 | const fileSize = (pathStr) => {
11 |   const kb = fs.statSync(pathStr).size / 1024
12 |   return Math.round(kb) + 'kb'
13 | }
14 | 
15 | // let txt = corpus.some(1000).join('\n')
16 | const txt = corpus.all().join(`\n`)
17 | 
18 | const doc = nlp(txt).compute('root')
19 | const counts = idf(doc, { use: 'root', min: 4 })
20 | 
21 | // collect by freq
22 | const byFreq = zipUp(counts)
23 | 
24 | // console.log(counts)
25 | const out = "export default " + JSON.stringify(byFreq, null, 2)
26 | fs.writeFileSync('./src/tfidf/_model.js', out)
27 | console.log(fileSize('./src/tfidf/_model.js'))
28 | 


--------------------------------------------------------------------------------
/src/3-three/coreference/compute/findThey.js:
--------------------------------------------------------------------------------
 1 | import { findChained } from './lib.js'
 2 | 
 3 | // find best reference for 'they' & 'their'
 4 | const getThey = function (s) {
 5 |   const nouns = s.nouns()
 6 | 
 7 |   // 'the bananas'
 8 |   let things = nouns.isPlural().notIf('#Pronoun')
 9 |   if (things.found) {
10 |     return things.last()
11 |   }
12 |   // re-use existing pronoun reference
13 |   const chain = findChained('(they|their|theirs)', s)
14 |   if (chain.found) {
15 |     return chain
16 |   }
17 | 
18 |   // they can also refer to a singular noun
19 |   // "the restaurant sold their food"
20 |   // "a choir sang their song"
21 | 
22 |   // somebody shaved their head
23 |   things = nouns.match('(somebody|nobody|everybody|anybody|someone|noone|everyone|anyone)')
24 |   if (things.found) {
25 |     return things.last()
26 |   }
27 |   return s.none()
28 | }
29 | 
30 | 
31 | export default getThey


--------------------------------------------------------------------------------
/scripts/plugins.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable no-console */
 2 | // Run arbitrary (but typically npm) commands for each plugin
 3 | // Example: "node ./plugins.js npm install"
 4 | import sh from 'shelljs'
 5 | 
 6 | // process.argv contains the complete command-line, with [0] as the node
 7 | // executable, and [1] as the script (this file).  [2] is the beginning of any
 8 | // remaining args.
 9 | const args = process.argv.slice(2)
10 | const command = args.join(' ')
11 | 
12 | let shouldFail = false
13 | sh.ls('./plugins').forEach(function (dir) {
14 |   console.log('\n===' + dir + '===')
15 |   const code = sh.exec(command, { cwd: `./plugins/${dir}` }).code
16 |   if (code !== 0) {
17 |     shouldFail = dir
18 |   }
19 | })
20 | 
21 | if (shouldFail !== false) {
22 |   console.warn('==================')
23 |   console.log('    dir: ' + shouldFail)
24 |   console.warn('==================')
25 |   throw shouldFail
26 | }
27 | 


--------------------------------------------------------------------------------
/src/1-one/tag/methods/unTag.js:
--------------------------------------------------------------------------------
 1 | // remove this tag, and its children, from these terms
 2 | const unTag = function (terms, tag, tagSet) {
 3 |   tag = tag.trim().replace(/^#/, '')
 4 |   for (let i = 0; i < terms.length; i += 1) {
 5 |     const term = terms[i]
 6 |     // don't untag anything if term is frozen
 7 |     if (term.frozen === true) {
 8 |       continue
 9 |     }
10 |     // support clearing all tags, with '*'
11 |     if (tag === '*') {
12 |       term.tags.clear()
13 |       continue
14 |     }
15 |     // for known tags, do logical dependencies first
16 |     const known = tagSet[tag]
17 |     // removing #Verb should also remove #PastTense
18 |     if (known && known.children.length > 0) {
19 |       for (let o = 0; o < known.children.length; o += 1) {
20 |         term.tags.delete(known.children[o])
21 |       }
22 |     }
23 |     term.tags.delete(tag)
24 |   }
25 | }
26 | export default unTag
27 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/methods/01-sentences/03-smart-merge.js:
--------------------------------------------------------------------------------
 1 | const hasNewline = function (c) {
 2 |   return Boolean(c.match(/\n$/))
 3 | }
 4 | 
 5 | //loop through these chunks, and join the non-sentence chunks back together..
 6 | const smartMerge = function (chunks, world) {
 7 |   const isSentence = world.methods.one.tokenize.isSentence
 8 |   const abbrevs = world.model.one.abbreviations || new Set()
 9 | 
10 |   const sentences = []
11 |   for (let i = 0; i < chunks.length; i++) {
12 |     const c = chunks[i]
13 |     //should this chunk be combined with the next one?
14 |     if (chunks[i + 1] && !isSentence(c, abbrevs) && !hasNewline(c)) {
15 |       chunks[i + 1] = c + (chunks[i + 1] || '')
16 |     } else if (c && c.length > 0) {
17 |       //this chunk is a proper sentence..
18 |       sentences.push(c)
19 |       chunks[i] = ''
20 |     }
21 |   }
22 |   return sentences
23 | }
24 | export default smartMerge


--------------------------------------------------------------------------------
/src/4-four/sense/model/senses/index.js:
--------------------------------------------------------------------------------
 1 | import verbs from './verb.js'
 2 | import adjectives from './adjective.js'
 3 | import nouns from './noun.js'
 4 | 
 5 | const byWord = {}
 6 | const setup = function (senses, tag) {
 7 |   Object.keys(senses).forEach(ambig => {
 8 |     const words = {}
 9 |     let fallback = null
10 |     Object.keys(senses[ambig]).forEach(name => {
11 |       if (senses[ambig][name].words) {
12 |         senses[ambig][name].words.forEach(w => {
13 |           words[w] = name
14 |         })
15 |       }
16 |       if (senses[ambig][name].fallback) {
17 |         fallback = name
18 |       }
19 |     })
20 |     byWord[ambig] = byWord[ambig] || []
21 |     byWord[ambig].push({
22 |       tag: tag,
23 |       fallback,
24 |       words,
25 |     })
26 |   })
27 | }
28 | 
29 | setup(verbs, 'Verb')
30 | setup(nouns, 'Noun')
31 | setup(adjectives, 'Adjective')
32 | 
33 | export default byWord
34 | 


--------------------------------------------------------------------------------
/tests/three/nouns/adjectives.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from '../_lib.js'
 3 | const here = '[three/noun-adjectives] '
 4 | 
 5 | test('.adjectives():', function (t) {
 6 |   let doc = nlp('the really cute cat')
 7 |   let m = doc.nouns().adjectives()
 8 |   t.equal(m.text(), 'cute', here + 'cute .')
 9 | 
10 |   doc = nlp('the really cute orange cat')
11 |   m = doc.nouns().adjectives()
12 |   t.equal(m.text(), 'cute orange', here + 'two adjectives')
13 | 
14 |   // doc = nlp('the cat who was really mean')
15 |   // m = doc.nouns().adjectives()
16 |   // t.equal(m.text(), 'mean', here + 'who was really .')
17 | 
18 |   doc = nlp('the cat that was mean attacked the cute dog')
19 |   // m = doc.nouns(0).adjectives()
20 |   // t.equal(m.text(), 'mean', here + 'first-noun')
21 |   m = doc.nouns(1).adjectives()
22 |   t.equal(m.text(), 'cute', here + 'second-noun')
23 | 
24 |   t.end()
25 | })
26 | 


--------------------------------------------------------------------------------
/src/1-one/tokenize/compute/normal/02-acronyms.js:
--------------------------------------------------------------------------------
 1 | // do acronyms need to be ASCII?  ... kind of?
 2 | const periodAcronym = /([A-Z]\.)+[A-Z]?,?$/
 3 | const oneLetterAcronym = /^[A-Z]\.,?$/
 4 | const noPeriodAcronym = /[A-Z]{2,}('s|,)?$/
 5 | const lowerCaseAcronym = /([a-z]\.)+[a-z]\.?$/
 6 | 
 7 | const isAcronym = function (str) {
 8 |   //like N.D.A
 9 |   if (periodAcronym.test(str) === true) {
10 |     return true
11 |   }
12 |   //like c.e.o
13 |   if (lowerCaseAcronym.test(str) === true) {
14 |     return true
15 |   }
16 |   //like 'F.'
17 |   if (oneLetterAcronym.test(str) === true) {
18 |     return true
19 |   }
20 |   //like NDA
21 |   if (noPeriodAcronym.test(str) === true) {
22 |     return true
23 |   }
24 |   return false
25 | }
26 | 
27 | const doAcronym = function (str) {
28 |   if (isAcronym(str)) {
29 |     str = str.replace(/\./g, '')
30 |   }
31 |   return str
32 | }
33 | export default doAcronym
34 | 


--------------------------------------------------------------------------------
/src/3-three/numbers/numbers/isUnit.js:
--------------------------------------------------------------------------------
 1 | import parse from './parse/index.js'
 2 | 
 3 | const isArray = arr => Object.prototype.toString.call(arr) === '[object Array]'
 4 | 
 5 | // turn anything into {foo:true} format
 6 | const coerceToObject = function (input) {
 7 |   if (typeof input === 'string' || typeof input === 'number') {
 8 |     const tmp = {}
 9 |     tmp[input] = true
10 |     return tmp
11 |   }
12 |   if (isArray(input)) {
13 |     return input.reduce((h, s) => {
14 |       h[s] = true
15 |       return h
16 |     }, {})
17 |   }
18 |   return input || {}
19 | }
20 | 
21 | // only return values with the given unit
22 | const isUnit = function (doc, input = {}) {
23 |   input = coerceToObject(input)
24 |   return doc.filter(p => {
25 |     const { unit } = parse(p)
26 |     if (unit && input[unit] === true) {
27 |       return true
28 |     }
29 |     return false
30 |   })
31 | }
32 | export default isUnit
33 | 


--------------------------------------------------------------------------------
/plugins/_experiments/markdown/scratch.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable */
 2 | import nlp from '../../../src/three.js'
 3 | import plg from './src/plugin.js'
 4 | nlp.plugin(plg)
 5 | 
 6 | let md = `is this [working](http://noitsnot.com) *again*? nope.
 7 | 
 8 | ## oh yeah
 9 | 
10 | and this is too
11 | --
12 | and **this** will be another section. i guess. \`inline stuff\` 
13 | 
14 | \`\`\`
15 | block stuff
16 | \`\`\`
17 | afterwards
18 | 
19 | > Alpha bravo charlie.
20 | 
21 | and then a cool:
22 | * list 1
23 | * list 2
24 | * list 3
25 | 
26 | hello ![alpha](https://example.com/favicon.ico "bravo") world
27 | 
28 | `
29 | 
30 | 
31 | 
32 | md = `| cool | also | here  |   |   |
33 | |------|------|-------|---|---|
34 | | one  | two  | three |   |   |
35 | | four | five |       |   |   |
36 | |      |      |       |   |   |`
37 | 
38 | md = `ok **cool** after.
39 | 
40 | below`
41 | const doc = nlp.fromMarkdown(md)
42 | console.log(doc)


--------------------------------------------------------------------------------
/plugins/_experiments/cmd-k/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |   <img src="https://cloud.githubusercontent.com/assets/399657/23590290/ede73772-01aa-11e7-8915-181ef21027bc.png" />
 3 | 
 4 |   <div>a plugin for <a href="https://github.com/spencermountain/compromise/">compromise</a></div>
 5 |   
 6 |   <!-- npm version -->
 7 |   <a href="https://npmjs.org/package/compromise-cmd-k">
 8 |     <img src="https://img.shields.io/npm/v/compromise-cmd-k.svg?style=flat-square" />
 9 |   </a>
10 |   v
11 |   <!-- file size -->
12 |   <a href="https://unpkg.com/compromise-cmd-k/builds/compromise-cmd-k.min.js">
13 |     <img src="https://badge-size.herokuapp.com/spencermountain/compromise/master/plugins/cmd-k/builds/compromise-cmd-k.min.js" />
14 |   </a>
15 |    <hr/>
16 | </div>
17 | 
18 | <div align="center">
19 |   <code>npm install compromise-cmd-k</code>
20 | </div>
21 | 
22 | experimental plugins for command-prompt parsing
23 | 
24 | MIT
25 | 


--------------------------------------------------------------------------------
/plugins/stats/tests/misc.test.js:
--------------------------------------------------------------------------------
 1 | import test from 'tape'
 2 | import nlp from './_lib.js'
 3 | 
 4 | test('misc ngrams', function (t) {
 5 |   let doc = nlp(`quickly, suddenly`)
 6 |   t.equal(doc.ngrams().length, 3, 'found three ngrams')
 7 | 
 8 |   doc = nlp(`john, bill, joe`)
 9 |   t.equal(doc.unigrams().length, 3, 'found three unigrams')
10 | 
11 |   doc = nlp(`john, bill, joe`)
12 |   t.equal(doc.bigrams().length, 2, 'found 2 bigrams')
13 | 
14 |   doc = nlp(`john, bill, joe`)
15 |   t.equal(doc.trigrams().length, 1, 'found 1 trigrams')
16 | 
17 |   doc = nlp('i am in houston texas. i am a good person. so i think he is a good person.')
18 |   const arr = doc.endgrams({ size: 2 }) || []
19 |   t.equal(arr.length, 2, 'found 2 endgrams of size-2')
20 |   const first = arr[0] || {}
21 |   t.equal(first.normal, 'good person', 'found good person')
22 |   t.equal(first.count, 2, 'found 2 good person results')
23 |   t.end()
24 | })
25 | 


--------------------------------------------------------------------------------