├── plugins ├── wikipedia │ ├── .gitignore │ ├── config.js │ ├── scripts │ │ ├── generate │ │ │ └── index.js │ │ └── stat.js │ ├── index.d.ts │ ├── scratch.js │ ├── tests │ │ ├── _lib.js │ │ └── misc.test.js │ ├── src │ │ └── plugin.js │ └── demo │ │ └── index.html ├── _experiments │ ├── markdown │ │ ├── src │ │ │ ├── Wrap.js │ │ │ ├── parse │ │ │ │ └── index.js │ │ │ └── plugin.js │ │ ├── README.md │ │ └── scratch.js │ ├── sentiment │ │ ├── README.md │ │ ├── scratch.js │ │ └── src │ │ │ └── escape.js │ ├── compress │ │ ├── README.md │ │ └── src │ │ │ └── index.js │ ├── cmd-k │ │ ├── src │ │ │ ├── plugin.js │ │ │ └── slashCmd.js │ │ ├── scratch.js │ │ └── README.md │ └── ast │ │ ├── README.md │ │ ├── src │ │ ├── compute │ │ │ └── index.js │ │ ├── plugin.js │ │ └── lines.js │ │ └── scratch.js ├── dates │ ├── src │ │ ├── _version.js │ │ ├── api │ │ │ ├── index.js │ │ │ ├── parse │ │ │ │ ├── one │ │ │ │ │ ├── units │ │ │ │ │ │ ├── index.js │ │ │ │ │ │ └── _time.js │ │ │ │ │ ├── 01-tokenize │ │ │ │ │ │ ├── 07-weekday.js │ │ │ │ │ │ └── 05-section.js │ │ │ │ │ └── 02-parse │ │ │ │ │ │ ├── index.js │ │ │ │ │ │ └── 02-holidays.js │ │ │ │ ├── range │ │ │ │ │ └── _reverse.js │ │ │ │ ├── index.js │ │ │ │ └── normalize.js │ │ │ └── toJSON.js │ │ ├── model │ │ │ ├── words │ │ │ │ ├── dates.js │ │ │ │ ├── times.js │ │ │ │ ├── index.js │ │ │ │ └── durations.js │ │ │ ├── tags.js │ │ │ └── regex.js │ │ └── plugin.js │ ├── index.d.cts │ ├── scripts │ │ └── version.js │ ├── tests │ │ └── _lib.js │ └── demo │ │ └── index.html ├── speed │ ├── src │ │ ├── _version.js │ │ ├── lazyParse │ │ │ ├── plugin.js │ │ │ ├── lazyParse.js │ │ │ └── maybeMatch.js │ │ ├── workerPool │ │ │ ├── plugin.js │ │ │ └── pool │ │ │ │ └── create.js │ │ └── plugin.js │ ├── scripts │ │ └── version.js │ ├── tests │ │ ├── _lib.js │ │ └── stream.test.js │ └── index.d.ts ├── paragraphs │ ├── src │ │ └── plugin.js │ ├── index.d.ts │ └── tests │ │ └── _lib.js ├── speech │ ├── src │ │ ├── plugin.js │ │ ├── compute │ │ │ ├── index.js │ │ │ ├── soundsLike │ │ │ │ ├── index.js │ │ │ │ └── metaphone.js │ │ │ └── syllables │ │ │ │ └── index.js │ │ └── api.js │ ├── index.d.ts │ ├── tests │ │ ├── _lib.js │ │ └── soundsLike.test.js │ ├── scratch.js │ └── demo │ │ └── index.html ├── stats │ ├── src │ │ ├── ngram │ │ │ ├── tokenize.js │ │ │ └── sort.js │ │ ├── plugin.js │ │ ├── tfidf │ │ │ ├── unpack.js │ │ │ └── tf.js │ │ └── compute.js │ ├── tests │ │ ├── _lib.js │ │ └── misc.test.js │ ├── scratch.js │ ├── demo │ │ └── index.html │ └── scripts │ │ └── generate.js └── payload │ ├── src │ └── debug.js │ ├── scratch.js │ ├── tests │ └── _lib.js │ └── index.d.ts ├── src ├── _version.js ├── 3-three │ ├── sentences │ │ ├── plugin.js │ │ └── conjugate │ │ │ ├── toInfinitive.js │ │ │ └── toNegative.js │ ├── normalize │ │ └── plugin.js │ ├── nouns │ │ ├── plugin.js │ │ └── api │ │ │ ├── hasPlural.js │ │ │ ├── toSingular.js │ │ │ ├── parse.js │ │ │ ├── toJSON.js │ │ │ └── isPlural.js │ ├── verbs │ │ ├── plugin.js │ │ └── api │ │ │ └── parse │ │ │ ├── root.js │ │ │ └── adverbs.js │ ├── coreference │ │ ├── compute │ │ │ ├── findIt.js │ │ │ ├── lib.js │ │ │ └── findThey.js │ │ └── plugin.js │ ├── chunker │ │ ├── plugin.js │ │ ├── compute │ │ │ ├── index.js │ │ │ └── 05-fixUp.js │ │ └── api │ │ │ └── chunks.js │ ├── topics │ │ ├── orgs │ │ │ └── api.js │ │ ├── places │ │ │ ├── api.js │ │ │ └── find.js │ │ ├── people │ │ │ ├── find.js │ │ │ └── parse.js │ │ ├── plugin.js │ │ └── topics.js │ ├── numbers │ │ ├── fractions │ │ │ ├── convert │ │ │ │ ├── toCardinal.js │ │ │ │ └── toOrdinal.js │ │ │ └── find.js │ │ ├── plugin.js │ │ └── numbers │ │ │ ├── parse │ │ │ └── toNumber │ │ │ │ ├── validate.js │ │ │ │ ├── parseNumeric.js │ │ │ │ ├── parseDecimals.js │ │ │ │ └── findModifiers.js │ │ │ ├── _toString.js │ │ │ ├── format │ │ │ ├── toOrdinal │ │ │ │ └── numOrdinal.js │ │ │ └── index.js │ │ │ └── isUnit.js │ ├── misc │ │ ├── parentheses │ │ │ └── index.js │ │ ├── quotations │ │ │ └── index.js │ │ ├── plugin.js │ │ └── slashes │ │ │ └── index.js │ └── redact │ │ └── plugin.js ├── 4-four │ ├── facts │ │ ├── plugin.js │ │ └── parse │ │ │ ├── pivot.js │ │ │ ├── noun.js │ │ │ ├── adjective.js │ │ │ ├── postProcess.js │ │ │ └── verb.js │ └── sense │ │ ├── plugin.js │ │ ├── api │ │ └── api.js │ │ └── model │ │ ├── index.js │ │ └── senses │ │ ├── verb.js │ │ ├── adjective.js │ │ └── index.js ├── 2-two │ ├── lazy │ │ ├── plugin.js │ │ ├── lazyParse.js │ │ └── maybeMatch.js │ ├── preTagger │ │ ├── model │ │ │ ├── personWords.js │ │ │ ├── clues │ │ │ │ ├── person-adj.js │ │ │ │ ├── unit-noun.js │ │ │ │ ├── actor-verb.js │ │ │ │ ├── person-noun.js │ │ │ │ └── person-verb.js │ │ │ ├── _expand │ │ │ │ └── irregulars.js │ │ │ └── regex │ │ │ │ └── regex-text.js │ │ ├── compute │ │ │ ├── index.js │ │ │ └── tagger │ │ │ │ ├── 3rd-pass │ │ │ │ ├── 07-verb-type.js │ │ │ │ └── 05-fallback.js │ │ │ │ ├── 2nd-pass │ │ │ │ └── 00-tagSwitch.js │ │ │ │ └── 1st-pass │ │ │ │ ├── 02-hyphens.js │ │ │ │ └── 01-colons.js │ │ ├── methods │ │ │ ├── transform │ │ │ │ ├── index.js │ │ │ │ ├── verbs │ │ │ │ │ ├── index.js │ │ │ │ │ └── getTense │ │ │ │ │ │ └── index.js │ │ │ │ ├── nouns │ │ │ │ │ ├── index.js │ │ │ │ │ └── toSingular │ │ │ │ │ │ └── index.js │ │ │ │ └── adjectives │ │ │ │ │ ├── conjugate │ │ │ │ │ └── lib.js │ │ │ │ │ └── index.js │ │ │ └── index.js │ │ ├── plugin.js │ │ └── tagSet │ │ │ ├── index.js │ │ │ ├── values.js │ │ │ └── dates.js │ ├── swap │ │ ├── plugin.js │ │ └── api │ │ │ └── swap-verb.js │ ├── contraction-two │ │ └── plugin.js │ └── postTagger │ │ ├── plugin.js │ │ ├── model │ │ ├── verbs │ │ │ ├── adj-gerund.js │ │ │ └── passive.js │ │ └── numbers │ │ │ └── money.js │ │ ├── api.js │ │ └── compute │ │ └── index.js ├── 1-one │ ├── tokenize │ │ ├── model │ │ │ ├── abbreviations │ │ │ │ ├── organizations.js │ │ │ │ ├── months.js │ │ │ │ ├── nouns.js │ │ │ │ ├── honorifics.js │ │ │ │ ├── places.js │ │ │ │ └── units.js │ │ │ ├── aliases.js │ │ │ ├── suffixes.js │ │ │ ├── index.js │ │ │ └── prefixes.js │ │ ├── methods │ │ │ ├── unicode.js │ │ │ ├── 03-whitespace │ │ │ │ └── index.js │ │ │ ├── 02-terms │ │ │ │ ├── 02-slashes.js │ │ │ │ └── 03-ranges.js │ │ │ ├── index.js │ │ │ ├── parse.js │ │ │ └── 01-sentences │ │ │ │ └── 03-smart-merge.js │ │ ├── compute │ │ │ ├── wordCount.js │ │ │ ├── normal │ │ │ │ ├── index.js │ │ │ │ └── 02-acronyms.js │ │ │ ├── offset.js │ │ │ ├── reindex.js │ │ │ ├── freq.js │ │ │ ├── machine.js │ │ │ └── index.js │ │ └── plugin.js │ ├── contraction-one │ │ ├── compute │ │ │ ├── index.js │ │ │ └── contractions │ │ │ │ ├── apostrophe-t.js │ │ │ │ ├── number-unit.js │ │ │ │ └── number-range.js │ │ ├── model │ │ │ ├── index.js │ │ │ └── number-suffix.js │ │ └── plugin.js │ ├── output │ │ ├── methods │ │ │ ├── index.js │ │ │ └── debug │ │ │ │ ├── index.js │ │ │ │ ├── _color.js │ │ │ │ └── client-side.js │ │ ├── plugin.js │ │ └── api │ │ │ ├── index.js │ │ │ ├── debug.js │ │ │ └── _fmts.js │ ├── cache │ │ ├── methods │ │ │ └── index.js │ │ ├── compute.js │ │ ├── plugin.js │ │ └── api.js │ ├── lexicon │ │ ├── methods │ │ │ └── index.js │ │ ├── plugin.js │ │ └── compute │ │ │ └── index.js │ ├── change │ │ ├── plugin.js │ │ ├── compute │ │ │ └── index.js │ │ └── api │ │ │ ├── harden.js │ │ │ └── index.js │ ├── pointers │ │ ├── plugin.js │ │ ├── api │ │ │ └── lib │ │ │ │ └── difference.js │ │ └── methods │ │ │ └── index.js │ ├── tag │ │ ├── api │ │ │ └── index.js │ │ ├── methods │ │ │ ├── index.js │ │ │ ├── addTags │ │ │ │ └── _colors.js │ │ │ ├── canBe.js │ │ │ └── unTag.js │ │ ├── lib.js │ │ └── plugin.js │ ├── match │ │ ├── plugin.js │ │ ├── methods │ │ │ ├── index.js │ │ │ ├── match │ │ │ │ ├── _lib.js │ │ │ │ ├── 03-notIf.js │ │ │ │ ├── steps │ │ │ │ │ ├── greedy-match.js │ │ │ │ │ ├── optional-match.js │ │ │ │ │ └── contraction-skip.js │ │ │ │ └── 03-getGroup.js │ │ │ └── parseMatch │ │ │ │ └── 03-splitHyphens.js │ │ ├── lib.js │ │ └── api │ │ │ └── index.js │ ├── sweep │ │ ├── plugin.js │ │ ├── methods │ │ │ ├── index.js │ │ │ ├── tagger │ │ │ │ └── canBe.js │ │ │ └── sweep │ │ │ │ └── 01-getHooks.js │ │ └── lib.js │ ├── typeahead │ │ ├── plugin.js │ │ └── api.js │ ├── lookup │ │ ├── plugin.js │ │ └── api │ │ │ ├── index.js │ │ │ └── buildTrie │ │ │ └── compress.js │ └── freeze │ │ └── debug.js ├── four.js ├── API │ ├── methods │ │ └── index.js │ ├── world.js │ └── _lib.js └── two.js ├── codecov.yml ├── types ├── one.d.cts ├── two.d.cts ├── three.d.cts └── view │ ├── two.d.cts │ ├── one.d.cts │ ├── three.d.cts │ └── two.d.ts ├── .npmignore ├── data ├── lexicon │ ├── people │ │ └── honorifics.js │ ├── verbs │ │ ├── verbs.js │ │ └── modals.js │ ├── numbers │ │ ├── multiples.js │ │ ├── ordinals.js │ │ └── cardinals.js │ ├── switches │ │ ├── person-date.js │ │ ├── person-adj.js │ │ ├── person-place.js │ │ ├── person-verb.js │ │ ├── unit-noun.js │ │ └── actor-verb.js │ ├── dates │ │ ├── dates.js │ │ ├── months.js │ │ ├── weekdays.js │ │ └── durations.js │ ├── nouns │ │ ├── pronouns.js │ │ ├── possessives.js │ │ ├── relative-prounoun.js │ │ └── properNouns.js │ └── misc │ │ └── determiners.js └── pairs │ └── index.js ├── scripts ├── chunks.js ├── patterns │ └── patterns.js ├── debug.js ├── version.js ├── perf │ ├── flame │ │ └── index.js │ ├── pool │ │ └── _lib.js │ └── _fetch.js ├── typescript │ └── two.ts ├── test │ ├── coverage.js │ └── stress.js ├── coreference │ └── index.js ├── match.js └── plugins.js ├── tests ├── one │ ├── _lib.js │ ├── misc │ │ └── misc.test.js │ ├── change │ │ └── fork.ignore.js │ ├── match │ │ ├── punctuation-match.test.js │ │ └── sweep-not.test.js │ ├── tokenize │ │ └── term-split.test.js │ └── miss.test.js ├── two │ ├── _lib.js │ ├── misc │ │ ├── misc.test.js │ │ ├── lazy.test.js │ │ ├── confidence.test.js │ │ └── canBe.test.js │ └── match │ │ └── soft-match.test.js ├── three │ ├── _lib.js │ ├── sentences │ │ └── negative.test.js │ ├── redact.test.js │ ├── verbs │ │ ├── phrasals.test.js │ │ ├── isplural.test.js │ │ └── phrasal.test.js │ ├── numbers │ │ └── backlog │ │ │ ├── overlap.ignore.js │ │ │ ├── conversion.ignore.js │ │ │ └── agreement.ignore.js │ ├── subsets.test.js │ └── nouns │ │ └── adjectives.test.js ├── four │ ├── _lib.js │ ├── match.ignore.js │ └── misc.ignore.js └── hmm.js ├── .gitignore ├── demos └── web-worker │ └── _worker.js ├── one └── package.json ├── two └── package.json ├── tokenize └── package.json ├── three └── package.json ├── tsconfig.json └── .github └── workflows └── coverage.yml /plugins/wikipedia/.gitignore: -------------------------------------------------------------------------------- 1 | files -------------------------------------------------------------------------------- /plugins/_experiments/markdown/src/Wrap.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/_version.js: -------------------------------------------------------------------------------- 1 | export default '14.14.5' -------------------------------------------------------------------------------- /plugins/dates/src/_version.js: -------------------------------------------------------------------------------- 1 | export default '3.7.1' -------------------------------------------------------------------------------- /plugins/speed/src/_version.js: -------------------------------------------------------------------------------- 1 | export default '0.1.2' -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | branches: 3 | - 'master' 4 | -------------------------------------------------------------------------------- /types/one.d.cts: -------------------------------------------------------------------------------- 1 | import nlp from "./one.d"; 2 | 3 | export = nlp 4 | 5 | -------------------------------------------------------------------------------- /types/two.d.cts: -------------------------------------------------------------------------------- 1 | import nlp from "./two.d"; 2 | 3 | export = nlp 4 | 5 | -------------------------------------------------------------------------------- /types/three.d.cts: -------------------------------------------------------------------------------- 1 | import nlp from "./three.d"; 2 | 3 | export = nlp 4 | 5 | -------------------------------------------------------------------------------- /types/view/two.d.cts: -------------------------------------------------------------------------------- 1 | import Two from "./two.d"; 2 | 3 | export = Two 4 | 5 | -------------------------------------------------------------------------------- /types/view/one.d.cts: -------------------------------------------------------------------------------- 1 | import View from "./one.d"; 2 | 3 | export = View 4 | 5 | -------------------------------------------------------------------------------- /plugins/dates/index.d.cts: -------------------------------------------------------------------------------- 1 | import dates from './index.d'; 2 | 3 | export = dates 4 | -------------------------------------------------------------------------------- /types/view/three.d.cts: -------------------------------------------------------------------------------- 1 | import Three from "./three.d"; 2 | 3 | export = Three 4 | 5 | -------------------------------------------------------------------------------- /src/3-three/sentences/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api.js' 2 | 3 | export default { api } 4 | -------------------------------------------------------------------------------- /src/4-four/facts/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api.js' 2 | 3 | export default { 4 | api 5 | } -------------------------------------------------------------------------------- /plugins/paragraphs/src/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api.js' 2 | 3 | export default { 4 | api, 5 | } -------------------------------------------------------------------------------- /src/3-three/normalize/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api.js' 2 | 3 | export default { 4 | api 5 | } -------------------------------------------------------------------------------- /src/3-three/nouns/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/api.js' 2 | 3 | export default { 4 | api, 5 | } 6 | -------------------------------------------------------------------------------- /src/3-three/verbs/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/api.js' 2 | 3 | export default { 4 | api, 5 | } 6 | -------------------------------------------------------------------------------- /plugins/_experiments/markdown/README.md: -------------------------------------------------------------------------------- 1 | experimental nlp on a [unified/remark](https://unifiedjs.com/) AST. 2 | -------------------------------------------------------------------------------- /plugins/_experiments/sentiment/README.md: -------------------------------------------------------------------------------- 1 | experimental rule-based, compressed-data sentiment analysis by Scott Cram 2 | -------------------------------------------------------------------------------- /src/2-two/lazy/plugin.js: -------------------------------------------------------------------------------- 1 | import lazy from './lazyParse.js' 2 | 3 | export default { 4 | lib: { 5 | lazy 6 | } 7 | } -------------------------------------------------------------------------------- /src/1-one/tokenize/model/abbreviations/organizations.js: -------------------------------------------------------------------------------- 1 | export default ['dept', 'univ', 'assn', 'bros', 'inc', 'ltd', 'co'] 2 | -------------------------------------------------------------------------------- /src/3-three/coreference/compute/findIt.js: -------------------------------------------------------------------------------- 1 | const findIt = function (m) { 2 | return m.none() 3 | } 4 | export default findIt -------------------------------------------------------------------------------- /plugins/speed/src/lazyParse/plugin.js: -------------------------------------------------------------------------------- 1 | import lazy from './lazyParse.js' 2 | 3 | export default { 4 | lib: { 5 | lazy 6 | } 7 | } -------------------------------------------------------------------------------- /src/1-one/contraction-one/compute/index.js: -------------------------------------------------------------------------------- 1 | import contractions from './contractions/index.js' 2 | 3 | export default { contractions } 4 | -------------------------------------------------------------------------------- /src/1-one/output/methods/index.js: -------------------------------------------------------------------------------- 1 | import hash from './hash.js' 2 | import debug from './debug/index.js' 3 | 4 | export { hash, debug } 5 | -------------------------------------------------------------------------------- /src/1-one/cache/methods/index.js: -------------------------------------------------------------------------------- 1 | import cacheDoc from './cacheDoc.js' 2 | 3 | export default { 4 | one: { 5 | cacheDoc, 6 | }, 7 | } 8 | -------------------------------------------------------------------------------- /src/2-two/preTagger/model/personWords.js: -------------------------------------------------------------------------------- 1 | // extended professions, for #Actor tag 2 | // 'x therapist', 'y engineer' 3 | export default [ 4 | 5 | ] -------------------------------------------------------------------------------- /plugins/speed/src/workerPool/plugin.js: -------------------------------------------------------------------------------- 1 | import workerPool from './index.js' 2 | 3 | export default { 4 | lib: { 5 | workerPool 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/1-one/lexicon/methods/index.js: -------------------------------------------------------------------------------- 1 | import expandLexicon from './expand.js' 2 | 3 | export default { 4 | one: { 5 | expandLexicon, 6 | } 7 | } -------------------------------------------------------------------------------- /src/1-one/tokenize/model/abbreviations/months.js: -------------------------------------------------------------------------------- 1 | export default ['jan', 'feb', 'mar', 'apr', 'jun', 'jul', 'aug', 'sep', 'sept', 'oct', 'nov', 'dec'] 2 | -------------------------------------------------------------------------------- /plugins/speech/src/plugin.js: -------------------------------------------------------------------------------- 1 | import compute from './compute/index.js' 2 | import api from './api.js' 3 | 4 | export default { 5 | api, 6 | compute 7 | } -------------------------------------------------------------------------------- /src/1-one/cache/compute.js: -------------------------------------------------------------------------------- 1 | 2 | export default { 3 | cache: function (view) { 4 | view._cache = view.methods.one.cacheDoc(view.document) 5 | } 6 | } -------------------------------------------------------------------------------- /plugins/_experiments/compress/README.md: -------------------------------------------------------------------------------- 1 | they say that compression and intellegence are the same thing, 2 | but I'm not small-enough to understand that. 3 | 4 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | data 2 | demo 3 | plugins 4 | scripts 5 | tests 6 | .eslintrc 7 | .gitignore 8 | changelog.md 9 | hmm.md 10 | rollup.config.js 11 | scratch.js 12 | -------------------------------------------------------------------------------- /src/1-one/change/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/index.js' 2 | import compute from './compute/index.js' 3 | 4 | export default { 5 | api, 6 | compute, 7 | } 8 | -------------------------------------------------------------------------------- /src/1-one/pointers/plugin.js: -------------------------------------------------------------------------------- 1 | import methods from './methods/index.js' 2 | import api from './api/index.js' 3 | 4 | export default { 5 | methods, 6 | api, 7 | } 8 | -------------------------------------------------------------------------------- /plugins/_experiments/cmd-k/src/plugin.js: -------------------------------------------------------------------------------- 1 | import searchBang from './searchBang.js' 2 | import slashCmd from './slashCmd.js' 3 | 4 | export default [searchBang, slashCmd] 5 | -------------------------------------------------------------------------------- /src/1-one/tag/api/index.js: -------------------------------------------------------------------------------- 1 | import tag from './tag.js' 2 | 3 | const tagAPI = function (View) { 4 | Object.assign(View.prototype, tag) 5 | } 6 | export default tagAPI 7 | -------------------------------------------------------------------------------- /src/2-two/swap/plugin.js: -------------------------------------------------------------------------------- 1 | import swap from './api/swap.js' 2 | 3 | const api = function (View) { 4 | View.prototype.swap = swap 5 | } 6 | 7 | export default { 8 | api 9 | } -------------------------------------------------------------------------------- /src/1-one/tokenize/model/aliases.js: -------------------------------------------------------------------------------- 1 | const aliases = { 2 | '&': 'and', 3 | '@': 'at', 4 | '%': 'percent', 5 | 'plz': 'please', 6 | 'bein': 'being', 7 | } 8 | export default aliases 9 | -------------------------------------------------------------------------------- /src/3-three/coreference/plugin.js: -------------------------------------------------------------------------------- 1 | import coreference from './compute/index.js' 2 | import api from './api/pronouns.js' 3 | 4 | export default { 5 | compute: { coreference }, 6 | api 7 | } -------------------------------------------------------------------------------- /plugins/speech/src/compute/index.js: -------------------------------------------------------------------------------- 1 | import soundsLike from './soundsLike/index.js' 2 | import syllables from './syllables/index.js' 3 | 4 | export default { 5 | soundsLike, 6 | syllables 7 | } -------------------------------------------------------------------------------- /src/2-two/preTagger/compute/index.js: -------------------------------------------------------------------------------- 1 | import preTagger from './tagger/index.js' 2 | import root from './root.js' 3 | import penn from './penn.js' 4 | 5 | export default { preTagger, root, penn } 6 | -------------------------------------------------------------------------------- /src/3-three/sentences/conjugate/toInfinitive.js: -------------------------------------------------------------------------------- 1 | const toInfinitive = function (s) { 2 | s.verbs().toInfinitive() 3 | // s.compute('chunks') 4 | return s 5 | } 6 | export default toInfinitive -------------------------------------------------------------------------------- /data/lexicon/people/honorifics.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'lieutenant general', 3 | 'field marshal', 4 | 'rear admiral', 5 | 'vice admiral', 6 | 'sergeant major', 7 | 'director general', 8 | ] -------------------------------------------------------------------------------- /src/2-two/contraction-two/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/index.js' 2 | import compute from './compute/index.js' 3 | 4 | export default { 5 | compute, 6 | api, 7 | hooks: ['contractionTwo'] 8 | } -------------------------------------------------------------------------------- /src/3-three/chunker/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/api.js' 2 | import compute from './compute/index.js' 3 | 4 | export default { 5 | compute: compute, 6 | api: api, 7 | hooks: ['chunks'], 8 | } 9 | -------------------------------------------------------------------------------- /scripts/chunks.js: -------------------------------------------------------------------------------- 1 | import corpus from 'nlp-corpus' 2 | import nlp from '../src/three.js' 3 | const docs = corpus.some(13) 4 | docs.forEach(str => { 5 | nlp(str).debug({ tags: false, chunks: true }) 6 | }) 7 | -------------------------------------------------------------------------------- /scripts/patterns/patterns.js: -------------------------------------------------------------------------------- 1 | // list of all match patterns, Nov 2020 2 | let patterns = [] 3 | patterns = patterns.reduce((h, str) => { 4 | h[str] = 0 5 | return h 6 | }, {}) 7 | 8 | module.exports = patterns 9 | -------------------------------------------------------------------------------- /src/1-one/match/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/index.js' 2 | import methods from './methods/index.js' 3 | import lib from './lib.js' 4 | 5 | export default { 6 | api, 7 | methods, 8 | lib, 9 | } 10 | -------------------------------------------------------------------------------- /plugins/_experiments/ast/README.md: -------------------------------------------------------------------------------- 1 | attempt to create a [unist-formatted](https://github.com/syntax-tree/unist) Abstract Syntax Tree via some [dependency parsing](http://nlpprogress.com/english/dependency_parsing.html) 2 | -------------------------------------------------------------------------------- /src/1-one/cache/plugin.js: -------------------------------------------------------------------------------- 1 | import methods from './methods/index.js' 2 | import api from './api.js' 3 | import compute from './compute.js' 4 | 5 | export default { 6 | api, 7 | compute, 8 | methods, 9 | } 10 | -------------------------------------------------------------------------------- /src/four.js: -------------------------------------------------------------------------------- 1 | import nlp from './three.js' 2 | import sense from './4-four/sense/plugin.js' 3 | import facts from './4-four/facts/plugin.js' 4 | 5 | nlp.plugin(sense) 6 | nlp.plugin(facts) 7 | 8 | export default nlp 9 | -------------------------------------------------------------------------------- /plugins/wikipedia/config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | lang: 'en', 3 | project: 'wikipedia', 4 | // min_pageviews: 3 // remove 378,151 5 | min_pageviews: 200 // remove 1,159,783 6 | // fresh: true, //start fresh 7 | } 8 | -------------------------------------------------------------------------------- /plugins/wikipedia/scripts/generate/index.js: -------------------------------------------------------------------------------- 1 | // import download from './01-download.js' 2 | import filter from './02-filter.js' 3 | import compress from './03-compress.js' 4 | 5 | // await download() 6 | filter() 7 | compress() -------------------------------------------------------------------------------- /src/3-three/topics/orgs/api.js: -------------------------------------------------------------------------------- 1 | 2 | const api = function (View) { 3 | View.prototype.organizations = function (n) { 4 | const m = this.match('#Organization+') 5 | return m.getNth(n) 6 | } 7 | } 8 | export default api 9 | -------------------------------------------------------------------------------- /data/lexicon/verbs/verbs.js: -------------------------------------------------------------------------------- 1 | //verbs we shouldn't conjugate, for whatever reason 2 | export default [ 3 | 'has', 4 | 'keep tabs', 5 | 'born', 6 | 'cannot', 7 | 'gonna', 8 | 'msg', 9 | 'make sure', 10 | 11 | ] 12 | -------------------------------------------------------------------------------- /scripts/debug.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import nlp from '../src/three.js' 3 | const txt = process.argv.slice(2).join(' ') 4 | console.log(`\n\n======== '${txt}' ======\n`) 5 | nlp.verbose(true) 6 | 7 | nlp(txt).debug() 8 | -------------------------------------------------------------------------------- /src/3-three/nouns/api/hasPlural.js: -------------------------------------------------------------------------------- 1 | 2 | const hasPlural = function (root) { 3 | if (root.has('^(#Uncountable|#ProperNoun|#Place|#Pronoun|#Acronym)+$')) { 4 | return false 5 | } 6 | return true 7 | } 8 | export default hasPlural -------------------------------------------------------------------------------- /data/lexicon/numbers/multiples.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'hundred', 3 | 'thousand', 4 | 'million', 5 | 'billion', 6 | 'trillion', 7 | 'quadrillion', 8 | 'quintillion', 9 | 'sextillion', 10 | 'septillion', 11 | ] 12 | -------------------------------------------------------------------------------- /src/1-one/sweep/plugin.js: -------------------------------------------------------------------------------- 1 | import lib from './lib.js' 2 | import api from './api.js' 3 | import methods from './methods/index.js' 4 | 5 | export default { 6 | lib, 7 | api, 8 | methods: { 9 | one: methods, 10 | } 11 | } -------------------------------------------------------------------------------- /data/lexicon/switches/person-date.js: -------------------------------------------------------------------------------- 1 | // person-names that can be dates 2 | export default [ 3 | // clues: [person, date], 4 | // fallback: 'Month', 5 | 'april', 'august', 'jan', 'january', 'june', 'sep', 'avril', 6 | // 'may' 7 | ] 8 | 9 | -------------------------------------------------------------------------------- /src/1-one/sweep/methods/index.js: -------------------------------------------------------------------------------- 1 | import buildNet from './buildNet/index.js' 2 | import bulkMatch from './sweep/index.js' 3 | import bulkTagger from './tagger/index.js' 4 | 5 | export default { 6 | buildNet, 7 | bulkMatch, 8 | bulkTagger 9 | } -------------------------------------------------------------------------------- /src/1-one/contraction-one/model/index.js: -------------------------------------------------------------------------------- 1 | import contractions from './contractions.js' 2 | import numberSuffixes from './number-suffix.js' 3 | 4 | export default { 5 | one: { 6 | contractions, 7 | numberSuffixes 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /data/lexicon/switches/person-adj.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'misty', 3 | 'rusty', 4 | 'dusty', 5 | 'rich', 6 | 'randy', 7 | 'sandy', 8 | 'earnest', 9 | 'frank', 10 | // 'young', 11 | 'brown', 12 | 'bella', 13 | 'woody' 14 | ] -------------------------------------------------------------------------------- /plugins/stats/src/ngram/tokenize.js: -------------------------------------------------------------------------------- 1 | // tokenize by term 2 | const tokenize = function (doc) { 3 | const list = doc.json({ text: false }).map(o => { 4 | return o.terms.map(t => t.normal) 5 | }) 6 | return list 7 | } 8 | export default tokenize 9 | -------------------------------------------------------------------------------- /src/1-one/output/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/index.js' 2 | import { debug, hash } from './methods/index.js' 3 | 4 | export default { 5 | api, 6 | methods: { 7 | one: { 8 | hash, 9 | debug, 10 | }, 11 | }, 12 | } 13 | -------------------------------------------------------------------------------- /src/1-one/contraction-one/plugin.js: -------------------------------------------------------------------------------- 1 | import model from './model/index.js' 2 | import compute from './compute/index.js' 3 | 4 | const plugin = { 5 | model: model, 6 | compute: compute, 7 | hooks: ['contractions'], 8 | } 9 | export default plugin 10 | -------------------------------------------------------------------------------- /src/4-four/sense/plugin.js: -------------------------------------------------------------------------------- 1 | import model from './model/index.js' 2 | import api from './api/api.js' 3 | import sense from './compute/index.js' 4 | 5 | export default { 6 | compute: { sense }, 7 | api, 8 | model, 9 | // hooks: ['sense'], 10 | } 11 | -------------------------------------------------------------------------------- /data/lexicon/dates/dates.js: -------------------------------------------------------------------------------- 1 | // uncontroversial date words 2 | export default [ 3 | 'today', 4 | 'tomorrow', 5 | 'tmr', 6 | 'tmrw', 7 | 'yesterday', 8 | 'weekend', 9 | 'weekends', 10 | 'week end', 11 | 'ago', 12 | 'someday', 13 | 'oneday', 14 | ] 15 | -------------------------------------------------------------------------------- /data/lexicon/dates/months.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | // 'january', 3 | 'february', 4 | // 'april', 5 | // 'june', 6 | 'july', 7 | // 'august', 8 | 'september', 9 | 'october', 10 | 'november', 11 | 'december', 12 | //abbreviations are elsewhere 13 | ] 14 | -------------------------------------------------------------------------------- /plugins/dates/src/api/index.js: -------------------------------------------------------------------------------- 1 | import dates from './dates.js' 2 | import times from './times.js' 3 | import durations from './durations/index.js' 4 | 5 | const api = function (View) { 6 | dates(View) 7 | times(View) 8 | durations(View) 9 | } 10 | export default api -------------------------------------------------------------------------------- /src/API/methods/index.js: -------------------------------------------------------------------------------- 1 | import compute from './compute.js' 2 | import loops from './loops.js' 3 | import util from './utils.js' 4 | 5 | const methods = Object.assign({}, util, compute, loops) 6 | 7 | // aliases 8 | methods.get = methods.eq 9 | export default methods 10 | -------------------------------------------------------------------------------- /data/lexicon/nouns/pronouns.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'it', 3 | 'they', 4 | 'i', 5 | 'them', 6 | 'you', 7 | 'she', 8 | 'me', 9 | 'he', 10 | 'him', 11 | 'us', 12 | 'we', 13 | 'thou', 14 | 'thee', 15 | 'il', 16 | 'elle', 17 | // `'em`, 18 | ] 19 | -------------------------------------------------------------------------------- /src/1-one/sweep/lib.js: -------------------------------------------------------------------------------- 1 | export default { 2 | // compile a list of matches into a match-net 3 | buildNet: function (matches) { 4 | const methods = this.methods() 5 | const net = methods.one.buildNet(matches, this.world()) 6 | net.isNet = true 7 | return net 8 | } 9 | } -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/transform/index.js: -------------------------------------------------------------------------------- 1 | import nouns from './nouns/index.js' 2 | import verbs from './verbs/index.js' 3 | import adjectives from './adjectives/index.js' 4 | 5 | export default { 6 | noun: nouns, 7 | verb: verbs, 8 | adjective: adjectives 9 | } 10 | -------------------------------------------------------------------------------- /src/2-two/postTagger/plugin.js: -------------------------------------------------------------------------------- 1 | import model from './model/index.js' 2 | import compute from './compute/index.js' 3 | import api from './api.js' 4 | 5 | 6 | const plugin = { 7 | api, 8 | compute, 9 | model, 10 | hooks: ['postTagger'], 11 | } 12 | export default plugin 13 | -------------------------------------------------------------------------------- /data/lexicon/verbs/modals.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'can', 3 | 'could', 4 | 'lets', //arguable 5 | // 'may', 6 | 'might', 7 | 'must', 8 | 'ought to', 9 | 'ought', 10 | 'oughta', 11 | 'shall', 12 | 'shant', 13 | 'should', 14 | 'will', 15 | 'would', 16 | ] 17 | -------------------------------------------------------------------------------- /plugins/stats/src/plugin.js: -------------------------------------------------------------------------------- 1 | import ngram from './ngram/index.js' 2 | import tfidf from './tfidf/index.js' 3 | import compute from './compute.js' 4 | 5 | const api = function (View) { 6 | ngram(View) 7 | tfidf(View) 8 | } 9 | 10 | export default { 11 | compute, 12 | api 13 | } -------------------------------------------------------------------------------- /scripts/version.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | // avoid requiring our whole package.json file 3 | // make a small file for our version number 4 | const pkg = JSON.parse(fs.readFileSync('./package.json').toString()) 5 | 6 | fs.writeFileSync('./src/_version.js', `export default '${pkg.version}'`) 7 | -------------------------------------------------------------------------------- /plugins/_experiments/ast/src/compute/index.js: -------------------------------------------------------------------------------- 1 | 2 | export default { 3 | lines: function (view) { 4 | view.lines().forEach((arr, i) => { 5 | arr.forEach(s => { 6 | s.docs[0].forEach(term => { 7 | term.line = i 8 | }) 9 | }) 10 | }) 11 | } 12 | } -------------------------------------------------------------------------------- /src/1-one/tokenize/model/abbreviations/nouns.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'ad', 3 | 'al', 4 | 'arc', 5 | 'ba', 6 | 'bl', 7 | 'ca', 8 | 'cca', 9 | 'col', 10 | 'corp', 11 | 'ft', 12 | 'fy', 13 | 'ie', 14 | 'lit', 15 | 'ma', 16 | 'md', 17 | 'pd', 18 | 'tce', 19 | ] 20 | -------------------------------------------------------------------------------- /src/3-three/sentences/conjugate/toNegative.js: -------------------------------------------------------------------------------- 1 | const toNegative = function (s) { 2 | s.verbs().first().toNegative().compute('chunks') 3 | return s 4 | } 5 | const toPositive = function (s) { 6 | s.verbs().first().toPositive().compute('chunks') 7 | return s 8 | } 9 | export { toNegative, toPositive } -------------------------------------------------------------------------------- /src/API/world.js: -------------------------------------------------------------------------------- 1 | const methods = { 2 | one: {}, 3 | two: {}, 4 | three: {}, 5 | four: {}, 6 | } 7 | 8 | const model = { 9 | one: {}, 10 | two: {}, 11 | three: {}, 12 | } 13 | const compute = {} 14 | const hooks = [] 15 | 16 | export default { methods, model, compute, hooks } 17 | -------------------------------------------------------------------------------- /plugins/dates/scripts/version.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | // avoid requiring our whole package.json file 3 | // make a small file for our version number 4 | const pkg = JSON.parse(fs.readFileSync('./package.json').toString()) 5 | 6 | fs.writeFileSync('./src/_version.js', `export default '${pkg.version}'`) 7 | -------------------------------------------------------------------------------- /plugins/speed/scripts/version.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | // avoid requiring our whole package.json file 3 | // make a small file for our version number 4 | const pkg = JSON.parse(fs.readFileSync('./package.json').toString()) 5 | 6 | fs.writeFileSync('./src/_version.js', `export default '${pkg.version}'`) 7 | -------------------------------------------------------------------------------- /src/1-one/tag/methods/index.js: -------------------------------------------------------------------------------- 1 | import setTag from './setTag.js' 2 | import unTag from './unTag.js' 3 | import canBe from './canBe.js' 4 | import addTags from './addTags/index.js' 5 | 6 | export default { 7 | one: { 8 | setTag, 9 | unTag, 10 | addTags, 11 | canBe, 12 | }, 13 | } 14 | -------------------------------------------------------------------------------- /src/3-three/topics/places/api.js: -------------------------------------------------------------------------------- 1 | import find from './find.js' 2 | 3 | const addMethod = function (View) { 4 | View.prototype.places = function (n) { 5 | let m = find(this) 6 | m = m.getNth(n) 7 | return new View(this.document, m.pointer) 8 | } 9 | } 10 | export default addMethod 11 | -------------------------------------------------------------------------------- /plugins/_experiments/cmd-k/scratch.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console, no-unused-vars */ 2 | import nlp from '../../../src/three.js' 3 | import plugin from './src/plugin.js' 4 | nlp.extend(plugin) 5 | 6 | 7 | let txt = '! i walk !ohyeah gh' 8 | const doc = nlp(txt) 9 | doc.searchBangs() 10 | doc.debug() 11 | 12 | -------------------------------------------------------------------------------- /src/4-four/sense/api/api.js: -------------------------------------------------------------------------------- 1 | const senseMethods = function (View) { 2 | /** add sense to these terms*/ 3 | View.prototype.sense = function (s) { 4 | this.docs.forEach(terms => 5 | terms.forEach(t => { 6 | t.sense = s 7 | }) 8 | ) 9 | } 10 | } 11 | export default senseMethods 12 | -------------------------------------------------------------------------------- /tests/one/_lib.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import build from '../../builds/one/compromise-one.mjs' 3 | import src from '../../src/one.js' 4 | let nlp = src 5 | if (process.env.TESTENV === 'prod') { 6 | console.warn('== production build test 🚀 ==') 7 | nlp = build 8 | } 9 | export default nlp 10 | -------------------------------------------------------------------------------- /tests/two/_lib.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import build from '../../builds/two/compromise-two.mjs' 3 | import src from '../../src/two.js' 4 | let nlp = src 5 | if (process.env.TESTENV === 'prod') { 6 | console.warn('== production build test 🚀 ==') 7 | nlp = build 8 | } 9 | export default nlp 10 | -------------------------------------------------------------------------------- /src/1-one/match/methods/index.js: -------------------------------------------------------------------------------- 1 | import parseMatch from './parseMatch/index.js' 2 | import match from './match/index.js' 3 | import termMethods from './termMethods.js' 4 | 5 | const methods = { 6 | one: { 7 | termMethods, 8 | parseMatch, 9 | match, 10 | }, 11 | } 12 | 13 | export default methods 14 | -------------------------------------------------------------------------------- /src/1-one/output/methods/debug/index.js: -------------------------------------------------------------------------------- 1 | import clientSide from './client-side.js' 2 | import tags from './tags.js' 3 | import chunks from './chunks.js' 4 | import highlight from './highlight.js' 5 | 6 | const debug = { 7 | tags, 8 | clientSide, 9 | chunks, 10 | highlight, 11 | } 12 | export default debug 13 | -------------------------------------------------------------------------------- /src/4-four/facts/parse/pivot.js: -------------------------------------------------------------------------------- 1 | 2 | const breaks = '(but|however|and|so|thus|therefor)' 3 | 4 | const parsePivot = function (chunk) { 5 | const str = chunk.text('normal') 6 | const breakPoint = chunk.has(breaks) 7 | return { 8 | breakPoint, 9 | root: str 10 | } 11 | } 12 | export default parsePivot -------------------------------------------------------------------------------- /plugins/_experiments/sentiment/scratch.js: -------------------------------------------------------------------------------- 1 | import nlp from '../../../src/three.js' 2 | import plg from './src/plugin.js' 3 | nlp.plugin(plg) 4 | 5 | const arr = [ 6 | "the acting was terrible", 7 | "these pretzels are making me thirsty", 8 | ] 9 | 10 | const res = nlp(arr[0]).sentiment() 11 | console.log(res) 12 | -------------------------------------------------------------------------------- /plugins/speech/src/compute/soundsLike/index.js: -------------------------------------------------------------------------------- 1 | import metaphone from './metaphone.js' 2 | 3 | const soundsLike = function (view) { 4 | view.docs.forEach(terms => { 5 | terms.forEach(term => { 6 | term.soundsLike = metaphone(term.normal || term.text) 7 | }) 8 | }) 9 | } 10 | 11 | export default soundsLike -------------------------------------------------------------------------------- /src/1-one/tokenize/model/suffixes.js: -------------------------------------------------------------------------------- 1 | // dashed suffixes that are not independent words 2 | // 'flower-like', 'president-elect' 3 | export default { 4 | 'like': true, 5 | 'ish': true, 6 | 'less': true, 7 | 'able': true, 8 | 'elect': true, 9 | 'type': true, 10 | 'designate': true, 11 | // 'fold':true, 12 | } -------------------------------------------------------------------------------- /src/2-two/preTagger/plugin.js: -------------------------------------------------------------------------------- 1 | import model from './model/index.js' 2 | import methods from './methods/index.js' 3 | import compute from './compute/index.js' 4 | import tags from './tagSet/index.js' 5 | 6 | export default { 7 | compute, 8 | methods, 9 | model, 10 | tags, 11 | hooks: ['preTagger'], 12 | } 13 | -------------------------------------------------------------------------------- /src/4-four/facts/parse/noun.js: -------------------------------------------------------------------------------- 1 | 2 | const parseNoun = function (chunk) { 3 | const root = chunk.match('#Noun').not('#Demonym').text('root') 4 | const obj = chunk.nouns().json()[0].noun 5 | obj.chunk = 'Noun' 6 | obj.ptr = chunk.ptrs[0] 7 | obj.root = root 8 | return obj 9 | } 10 | 11 | export default parseNoun -------------------------------------------------------------------------------- /src/4-four/sense/model/index.js: -------------------------------------------------------------------------------- 1 | import data from './_data.js' 2 | import { unpack } from 'efrt' 3 | 4 | Object.keys(data).forEach(ambig => { 5 | data[ambig].forEach(sense => { 6 | sense.words = unpack(sense.words) 7 | }) 8 | }) 9 | 10 | export default { 11 | four: { 12 | senses: data, 13 | }, 14 | } 15 | -------------------------------------------------------------------------------- /tests/three/_lib.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import build from '../../builds/three/compromise-three.mjs' 3 | import src from '../../src/three.js' 4 | let nlp = src 5 | if (process.env.TESTENV === 'prod') { 6 | console.warn('== production build test 🚀 ==') 7 | nlp = build 8 | } 9 | export default nlp 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | coverage/ 3 | .DS_Store 4 | .env 5 | .nyc_output/ 6 | .vscode 7 | .clinic 8 | coverage.lcov 9 | codecov.yml 10 | plugins/phrases/data/ 11 | learn/giga/result/ 12 | .scratch.js 13 | .eslintrc 14 | .npmignore 15 | .github 16 | .gitignore 17 | LICENSE 18 | package-lock.json 19 | rollup.config.js 20 | -------------------------------------------------------------------------------- /data/lexicon/nouns/possessives.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | // 'her', //this one is check ambiguous 3 | // 'hers', 4 | // 'his', 5 | 'its', 6 | 'mine', 7 | 'my', 8 | // 'none', 9 | 'our', 10 | 'ours', 11 | 'thy', 12 | // 'their', 13 | // 'theirs', 14 | // 'your', 15 | // 'yours', 16 | 17 | 18 | ] 19 | -------------------------------------------------------------------------------- /src/3-three/topics/people/find.js: -------------------------------------------------------------------------------- 1 | const find = function (doc) { 2 | let m = doc.splitAfter('@hasComma') 3 | m = m.match('#Honorific+? #Person+') 4 | // Spencer's King 5 | const poss = m.match('#Possessive').notIf('(his|her)') //her majesty ... 6 | m = m.splitAfter(poss) 7 | return m 8 | } 9 | export default find 10 | -------------------------------------------------------------------------------- /src/3-three/topics/plugin.js: -------------------------------------------------------------------------------- 1 | import people from './people/api.js' 2 | import places from './places/api.js' 3 | import orgs from './orgs/api.js' 4 | import topics from './topics.js' 5 | 6 | const api = function (View) { 7 | people(View) 8 | places(View) 9 | orgs(View) 10 | topics(View) 11 | } 12 | export default { api } 13 | -------------------------------------------------------------------------------- /src/4-four/facts/parse/adjective.js: -------------------------------------------------------------------------------- 1 | 2 | const parseAdjective = function (chunk) { 3 | // let obj = chunk.adjectives().json()[0] 4 | return { 5 | chunk: 'Adjective', 6 | ptr: chunk.ptrs[0], 7 | root: chunk.text('normal'), 8 | desc: chunk.adjectives().out('array') 9 | } 10 | } 11 | 12 | export default parseAdjective -------------------------------------------------------------------------------- /plugins/dates/src/model/words/dates.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'weekday', 3 | 4 | 'summer', 5 | 'winter', 6 | 'autumn', 7 | 8 | // 'some day', 9 | // 'one day', 10 | 'all day', 11 | // 'some point', 12 | 13 | 'eod', 14 | 'eom', 15 | 'eoy', 16 | 'standard time', 17 | 'daylight time', 18 | 'tommorrow', 19 | ] 20 | -------------------------------------------------------------------------------- /src/1-one/match/methods/match/_lib.js: -------------------------------------------------------------------------------- 1 | 2 | 3 | export const getGroup = function (state, term_index) { 4 | if (state.groups[state.inGroup]) { 5 | return state.groups[state.inGroup] 6 | } 7 | state.groups[state.inGroup] = { 8 | start: term_index, 9 | length: 0, 10 | } 11 | return state.groups[state.inGroup] 12 | } 13 | -------------------------------------------------------------------------------- /src/1-one/tag/lib.js: -------------------------------------------------------------------------------- 1 | // wire-up more pos-tags to our model 2 | const addTags = function (tags) { 3 | const { model, methods } = this.world() 4 | const tagSet = model.one.tagSet 5 | const fn = methods.one.addTags 6 | const res = fn(tags, tagSet) 7 | model.one.tagSet = res 8 | return this 9 | } 10 | 11 | export default { addTags } -------------------------------------------------------------------------------- /data/lexicon/nouns/relative-prounoun.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'whatever', 3 | 'whatsoever', 4 | 'whichever', 5 | 'whichsoever', 6 | 'whoever', 7 | 'whom', 8 | 'whomever', 9 | 'whomsoever', 10 | 'whose', 11 | 'whosesoever', 12 | 'whosoever', 13 | // 'that', 14 | // 'when', 15 | // 'which', 16 | // 'who', 17 | ] 18 | -------------------------------------------------------------------------------- /plugins/speed/tests/_lib.js: -------------------------------------------------------------------------------- 1 | import build from '../../../builds/one/compromise-one.mjs' 2 | import src from '../../../src/one.js' 3 | let nlp; 4 | 5 | if (process.env.TESTENV === 'prod') { 6 | console.warn('== production build test 🚀 ==') // eslint-disable-line 7 | nlp = build 8 | } else { 9 | nlp = src 10 | } 11 | export default nlp 12 | -------------------------------------------------------------------------------- /src/1-one/output/api/index.js: -------------------------------------------------------------------------------- 1 | import html from './html.js' 2 | import json from './json.js' 3 | import out from './out.js' 4 | import text from './text.js' 5 | 6 | const methods = Object.assign({}, out, text, json, html) 7 | 8 | const addAPI = function (View) { 9 | Object.assign(View.prototype, methods) 10 | } 11 | export default addAPI 12 | -------------------------------------------------------------------------------- /tests/four/_lib.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | // import build from '../../builds/four/compromise-four.mjs' 3 | import src from '../../src/four.js' 4 | const nlp = src 5 | if (process.env.TESTENV === 'prod') { 6 | console.warn('== production build test 🚀 ==') 7 | // nlp = build 8 | process.exit() 9 | } 10 | export default nlp 11 | -------------------------------------------------------------------------------- /tests/one/misc/misc.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[one/misc] ' 4 | 5 | test('lazy-pointer-issue', function (t) { 6 | const doc = nlp.tokenize(`four two five`) 7 | const m = doc.eq(0).match('two') 8 | t.equal(m.text(), 'two', here + 'convert to full-pointer') 9 | t.end() 10 | }) 11 | -------------------------------------------------------------------------------- /plugins/wikipedia/index.d.ts: -------------------------------------------------------------------------------- 1 | import nlp from 'compromise' 2 | type View = ReturnType 3 | 4 | export interface WikiMethods { 5 | /** phrases that match wikipedia titles */ 6 | wikipedia(): View 7 | } 8 | 9 | /** extended compromise lib **/ 10 | declare const nlpWiki: nlp.TypedPlugin 11 | 12 | export default nlpWiki 13 | -------------------------------------------------------------------------------- /src/1-one/typeahead/plugin.js: -------------------------------------------------------------------------------- 1 | import compute from './compute.js' 2 | import api from './api.js' 3 | import lib from './lib/index.js' 4 | 5 | const model = { 6 | one: { 7 | typeahead: {} //set a blank key-val 8 | } 9 | } 10 | export default { 11 | model, 12 | api, 13 | lib, 14 | compute, 15 | hooks: ['typeahead'] 16 | } 17 | 18 | -------------------------------------------------------------------------------- /data/lexicon/switches/person-place.js: -------------------------------------------------------------------------------- 1 | // cities 2 | export default [ 3 | 'alexandria', 4 | 'austin', 5 | 'darwin', 6 | 'diego', 7 | 'hamilton', 8 | 'houston', 9 | 'jordan', 10 | 'kent', 11 | 'kobe', 12 | 'orlando', 13 | 'salvador', 14 | 'samara', 15 | 'santiago', 16 | 'sydney', 17 | 'victoria', 18 | 'virginia', 19 | ] 20 | -------------------------------------------------------------------------------- /src/1-one/tag/plugin.js: -------------------------------------------------------------------------------- 1 | import methods from './methods/index.js' 2 | import api from './api/index.js' 3 | import lib from './lib.js' 4 | import tagRank from './compute/tagRank.js' 5 | 6 | 7 | export default { 8 | model: { 9 | one: { tagSet: {} } 10 | }, 11 | compute: { 12 | tagRank 13 | }, 14 | methods, 15 | api, 16 | lib 17 | } 18 | -------------------------------------------------------------------------------- /src/2-two/preTagger/tagSet/index.js: -------------------------------------------------------------------------------- 1 | import nouns from './nouns.js' 2 | import verbs from './verbs.js' 3 | import values from './values.js' 4 | import dates from './dates.js' 5 | import misc from './misc.js' 6 | 7 | const allTags = Object.assign({}, nouns, verbs, values, dates, misc) 8 | // const tagSet = compute(allTags) 9 | export default allTags 10 | -------------------------------------------------------------------------------- /plugins/stats/src/tfidf/unpack.js: -------------------------------------------------------------------------------- 1 | import { unpack } from 'efrt' 2 | 3 | const unzip = function (model) { 4 | const all = {} 5 | Object.keys(model).forEach(k => { 6 | model[k] = unpack(model[k]) 7 | const num = Number(k) 8 | Object.keys(model[k]).forEach(w => { 9 | all[w] = num 10 | }) 11 | }) 12 | return all 13 | } 14 | export default unzip -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/index.js: -------------------------------------------------------------------------------- 1 | import transform from './transform/index.js' 2 | import expandLexicon from './expand/index.js' 3 | import quickSplit from './quickSplit.js' 4 | import looksPlural from './looksPlural.js' 5 | 6 | 7 | export default { 8 | two: { 9 | quickSplit, 10 | expandLexicon, 11 | transform, 12 | looksPlural 13 | }, 14 | } 15 | -------------------------------------------------------------------------------- /src/1-one/match/lib.js: -------------------------------------------------------------------------------- 1 | 2 | export default { 3 | /** pre-parse any match statements */ 4 | parseMatch: function (str, opts) { 5 | const world = this.world() 6 | const killUnicode = world.methods.one.killUnicode 7 | if (killUnicode) { 8 | str = killUnicode(str, world) 9 | } 10 | return world.methods.one.parseMatch(str, opts, world) 11 | } 12 | } -------------------------------------------------------------------------------- /src/1-one/contraction-one/compute/contractions/apostrophe-t.js: -------------------------------------------------------------------------------- 1 | 2 | //ain't -> are/is not 3 | const apostropheT = function (terms, i) { 4 | if (terms[i].normal === "ain't" || terms[i].normal === 'aint') { 5 | return null //do this in ./two/ 6 | } 7 | const before = terms[i].normal.replace(/n't/, '') 8 | return [before, 'not'] 9 | } 10 | 11 | export default apostropheT 12 | -------------------------------------------------------------------------------- /src/1-one/tag/methods/addTags/_colors.js: -------------------------------------------------------------------------------- 1 | // i just made these up 2 | const colors = { 3 | Noun: 'blue', 4 | Verb: 'green', 5 | Negative: 'green', 6 | Date: 'red', 7 | Value: 'red', 8 | Adjective: 'magenta', 9 | Preposition: 'cyan', 10 | Conjunction: 'cyan', 11 | Determiner: 'cyan', 12 | Hyphenated: 'cyan', 13 | Adverb: 'cyan', 14 | } 15 | 16 | export default colors -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/transform/verbs/index.js: -------------------------------------------------------------------------------- 1 | import toInfinitive from './toInfinitive/index.js' 2 | import conjugate from './conjugate/index.js' 3 | 4 | const all = function (str, model) { 5 | const res = conjugate(str, model) 6 | delete res.FutureTense 7 | return Object.values(res).filter(s => s) 8 | } 9 | export default { 10 | toInfinitive, conjugate, all 11 | } 12 | -------------------------------------------------------------------------------- /plugins/speech/index.d.ts: -------------------------------------------------------------------------------- 1 | import nlp from 'compromise' 2 | 3 | export interface SpeechMethods { 4 | /** estimate spoken phenomes */ 5 | syllables(): String[][] 6 | /** estimate pronounciation information */ 7 | soundsLike(): String[][] 8 | } 9 | 10 | /** extended compromise lib **/ 11 | declare const nlpSpeech: nlp.TypedPlugin 12 | 13 | export default nlpSpeech 14 | -------------------------------------------------------------------------------- /plugins/dates/src/model/words/times.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'noon', 3 | 'midnight', 4 | 'morning', 5 | 'tonight', 6 | 'evening', 7 | 'afternoon', 8 | 'breakfast time', 9 | 'lunchtime', 10 | 'dinnertime', 11 | 'midday', 12 | 'eod', 13 | 'oclock', 14 | 'oclock', 15 | 'at night', 16 | // 'now', 17 | // 'night', 18 | // 'sometime', 19 | // 'all day', 20 | ] 21 | -------------------------------------------------------------------------------- /src/1-one/tokenize/methods/unicode.js: -------------------------------------------------------------------------------- 1 | // 'Björk' to 'Bjork'. 2 | const killUnicode = function (str, world) { 3 | const unicode = world.model.one.unicode || {} 4 | str = str || '' 5 | const chars = str.split('') 6 | chars.forEach((s, i) => { 7 | if (unicode[s]) { 8 | chars[i] = unicode[s] 9 | } 10 | }) 11 | return chars.join('') 12 | } 13 | export default killUnicode 14 | -------------------------------------------------------------------------------- /src/3-three/numbers/fractions/convert/toCardinal.js: -------------------------------------------------------------------------------- 1 | import toText from '../../numbers/format/toText/index.js' 2 | 3 | const toCardinal = function (obj) { 4 | if (!obj.numerator || !obj.denominator) { 5 | return '' 6 | } 7 | const a = toText({ num: obj.numerator }) 8 | const b = toText({ num: obj.denominator }) 9 | return `${a} out of ${b}` 10 | } 11 | export default toCardinal 12 | -------------------------------------------------------------------------------- /src/1-one/cache/api.js: -------------------------------------------------------------------------------- 1 | const methods = { 2 | /** */ 3 | cache: function () { 4 | this._cache = this.methods.one.cacheDoc(this.document) 5 | return this 6 | }, 7 | /** */ 8 | uncache: function () { 9 | this._cache = null 10 | return this 11 | }, 12 | } 13 | const addAPI = function (View) { 14 | Object.assign(View.prototype, methods) 15 | } 16 | export default addAPI 17 | -------------------------------------------------------------------------------- /plugins/_experiments/ast/scratch.js: -------------------------------------------------------------------------------- 1 | import nlp from '../../../src/three.js' 2 | import plg from './src/plugin.js' 3 | nlp.plugin(plg) 4 | 5 | let str = '' 6 | str = `I prefer the morning flight through Denver. it was cool, 7 | oh yeah nice` 8 | const doc = nlp(str) 9 | // console.log(doc.lines()) 10 | // let tree = doc.ast() 11 | // console.dir(tree, { depth: 10 }) 12 | doc.chunks().debug('chunker') 13 | -------------------------------------------------------------------------------- /plugins/paragraphs/index.d.ts: -------------------------------------------------------------------------------- 1 | import nlp from 'compromise' 2 | type View = ReturnType 3 | 4 | interface ParagraphView extends View { 5 | } 6 | 7 | export interface ParagraphMethods { 8 | /** */ 9 | paragraphs(): ParagraphView 10 | } 11 | 12 | /** extended compromise lib **/ 13 | declare const nlpParagraphs: nlp.TypedPlugin 14 | 15 | export default nlpParagraphs 16 | -------------------------------------------------------------------------------- /plugins/speech/src/compute/syllables/index.js: -------------------------------------------------------------------------------- 1 | import getSyllables from './syllables.js' 2 | 3 | // const defaultObj = { normal: true, text: true, terms: false } 4 | 5 | const syllables = function (view) { 6 | view.docs.forEach(terms => { 7 | terms.forEach(term => { 8 | term.syllables = getSyllables(term.normal || term.text) 9 | }) 10 | }) 11 | } 12 | 13 | export default syllables 14 | -------------------------------------------------------------------------------- /src/1-one/change/compute/index.js: -------------------------------------------------------------------------------- 1 | import uuid from './uuid.js' 2 | 3 | const compute = { 4 | id: function (view) { 5 | const docs = view.docs 6 | for (let n = 0; n < docs.length; n += 1) { 7 | for (let i = 0; i < docs[n].length; i += 1) { 8 | const term = docs[n][i] 9 | term.id = term.id || uuid(term) 10 | } 11 | } 12 | } 13 | } 14 | 15 | export default compute -------------------------------------------------------------------------------- /src/1-one/contraction-one/model/number-suffix.js: -------------------------------------------------------------------------------- 1 | // number suffixes that are not units 2 | const t = true 3 | export default { 4 | 'st': t, 5 | 'nd': t, 6 | 'rd': t, 7 | 'th': t, 8 | 'am': t, 9 | 'pm': t, 10 | 'max': t, 11 | '°': t, 12 | 's': t, // 1990s 13 | 'e': t, // 18e - french/spanish ordinal 14 | 'er': t, //french 1er 15 | 'ère': t, //'' 16 | 'ème': t, //french 2ème 17 | } -------------------------------------------------------------------------------- /src/1-one/tokenize/methods/03-whitespace/index.js: -------------------------------------------------------------------------------- 1 | import tokenize from './tokenize.js' 2 | 3 | const parseTerm = (txt, model) => { 4 | // cleanup any punctuation as whitespace 5 | const { str, pre, post } = tokenize(txt, model) 6 | const parsed = { 7 | text: str, 8 | pre: pre, 9 | post: post, 10 | tags: new Set(), 11 | } 12 | return parsed 13 | } 14 | export default parseTerm 15 | -------------------------------------------------------------------------------- /plugins/payload/src/debug.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | 3 | // pretty-print each match that has a payload 4 | const debug = function (view) { 5 | view.getPayloads().forEach(res => { 6 | const { match, val } = res 7 | console.log('\n────────') 8 | match.debug('highlight') 9 | console.log(' ', JSON.stringify(val)) 10 | console.log('\n') 11 | }) 12 | } 13 | export default debug 14 | -------------------------------------------------------------------------------- /data/lexicon/dates/weekdays.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'monday', 3 | 'tuesday', 4 | 'wednesday', 5 | 'thursday', 6 | 'friday', 7 | 'saturday', 8 | 'sunday', 9 | 'mon', 10 | 'tues', 11 | // 'wed', 12 | 'thurs', 13 | 'fri', 14 | // 'sat', 15 | // 'sun', 16 | 'mondays', 17 | 'tuesdays', 18 | 'wednesdays', 19 | 'thursdays', 20 | 'fridays', 21 | 'saturdays', 22 | 'sundays', 23 | ] 24 | -------------------------------------------------------------------------------- /src/1-one/lexicon/plugin.js: -------------------------------------------------------------------------------- 1 | import compute from './compute/index.js' 2 | import methods from './methods/index.js' 3 | import lib from './lib.js' 4 | 5 | const model = { 6 | one: { 7 | lexicon: {}, //setup blank lexicon 8 | _multiCache: {}, 9 | frozenLex: {}, //2nd lexicon 10 | }, 11 | } 12 | 13 | export default { 14 | model, 15 | methods, 16 | compute, 17 | lib, 18 | hooks: ['lexicon'], 19 | } 20 | -------------------------------------------------------------------------------- /src/1-one/change/api/harden.js: -------------------------------------------------------------------------------- 1 | // add indexes to pointers 2 | const harden = function () { 3 | this.ptrs = this.fullPointer 4 | return this 5 | } 6 | // remove indexes from pointers 7 | const soften = function () { 8 | let ptr = this.ptrs 9 | if (!ptr || ptr.length < 1) { 10 | return this 11 | } 12 | ptr = ptr.map(a => a.slice(0, 3)) 13 | this.ptrs = ptr 14 | return this 15 | } 16 | export default { harden, soften } -------------------------------------------------------------------------------- /src/1-one/tokenize/methods/02-terms/02-slashes.js: -------------------------------------------------------------------------------- 1 | const isSlash = /\p{L} ?\/ ?\p{L}+$/u 2 | 3 | // 'he / she' should be one word 4 | const combineSlashes = function (arr) { 5 | for (let i = 1; i < arr.length - 1; i++) { 6 | if (isSlash.test(arr[i])) { 7 | arr[i - 1] += arr[i] + arr[i + 1] 8 | arr[i] = null 9 | arr[i + 1] = null 10 | } 11 | } 12 | return arr 13 | } 14 | export default combineSlashes 15 | -------------------------------------------------------------------------------- /plugins/stats/src/tfidf/tf.js: -------------------------------------------------------------------------------- 1 | const tf = function (view, opts = {}) { 2 | const counts = {} 3 | const form = opts.form || 'root' 4 | view.docs.forEach(terms => { 5 | terms.forEach(term => { 6 | const str = term[form] || term.implicit || term.normal 7 | if (str) { 8 | counts[str] = counts[str] || 0 9 | counts[str] += 1 10 | } 11 | }) 12 | }) 13 | return counts 14 | } 15 | export default tf -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/wordCount.js: -------------------------------------------------------------------------------- 1 | const wordCount = function (view) { 2 | let n = 0 3 | const docs = view.docs 4 | for (let i = 0; i < docs.length; i += 1) { 5 | for (let t = 0; t < docs[i].length; t += 1) { 6 | if (docs[i][t].normal === '') { 7 | continue //skip implicit words 8 | } 9 | n += 1 10 | docs[i][t].wordCount = n 11 | } 12 | } 13 | } 14 | 15 | export default wordCount 16 | -------------------------------------------------------------------------------- /tests/one/change/fork.ignore.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[one/fork] ' 4 | 5 | test('fork basic:', function (t) { 6 | const before = nlp.fork() 7 | const a = before('hello donkey kong', { donkey: 'Person' }) 8 | const b = nlp('donkey') 9 | t.equal(a.has('#Person'), true, here + 'has person') 10 | t.equal(b.has('#Person'), false, here + 'does not have person') 11 | t.end() 12 | }) 13 | -------------------------------------------------------------------------------- /plugins/_experiments/ast/src/plugin.js: -------------------------------------------------------------------------------- 1 | import toAst from './ast.js' 2 | import toLines from './lines.js' 3 | import compute from './compute/index.js' 4 | 5 | 6 | 7 | export default { 8 | compute, 9 | 10 | api: function (View) { 11 | 12 | View.prototype.lines = function () { 13 | return toLines(this) 14 | } 15 | 16 | 17 | View.prototype.ast = function (opts) { 18 | return toAst(this, opts) 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /src/3-three/numbers/fractions/find.js: -------------------------------------------------------------------------------- 1 | const findFractions = function (doc, n) { 2 | // five eighths 3 | let m = doc.match('#Fraction+') 4 | // remove 'two and five eights' 5 | m = m.filter(r => { 6 | return !r.lookBehind('#Value and$').found 7 | }) 8 | // thirty seconds 9 | m = m.notIf('#Value seconds') 10 | 11 | if (typeof n === 'number') { 12 | m = m.eq(n) 13 | } 14 | return m 15 | } 16 | export default findFractions 17 | -------------------------------------------------------------------------------- /demos/web-worker/_worker.js: -------------------------------------------------------------------------------- 1 | /* global importScripts */ 2 | 3 | //loads and runs compromise inside the worker-instance 4 | self.addEventListener( 5 | 'message', 6 | function (e) { 7 | // importScripts('https://unpkg.com/compromise@next') 8 | importScripts('../../builds/compromise.js') 9 | const doc = self.nlp(e.data) 10 | const m = doc.places() 11 | self.postMessage(m.json({ count: true, unique: true })) 12 | }, 13 | false 14 | ) -------------------------------------------------------------------------------- /plugins/wikipedia/scratch.js: -------------------------------------------------------------------------------- 1 | // import corpus from 'nlp-corpus' 2 | import nlp from '../../src/one.js' 3 | import plugin from './src/plugin.js' 4 | nlp.extend(plugin) 5 | 6 | let txt = '' 7 | txt = `Moreover, it is always possible to consolidate for discovery different cases that involve construction of the same claims.` 8 | 9 | txt = 'i saw the toronto raptors play a cleveland foops' 10 | const doc = nlp(txt) 11 | const m = doc.wikipedia() 12 | m.debug() -------------------------------------------------------------------------------- /scripts/perf/flame/index.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import txt from './_sotu-text.js' 3 | 4 | console.log('\n-- testing: --') 5 | console.time('load') 6 | import nlp from '../../../src/three.js' 7 | console.timeEnd('load') 8 | 9 | console.time('parse') 10 | const doc = nlp(txt) 11 | console.timeEnd('parse') 12 | 13 | console.time('match') 14 | doc.match('#Noun') 15 | console.timeEnd('match') 16 | console.log('\n v' + nlp.version, '\n') 17 | -------------------------------------------------------------------------------- /src/1-one/tag/methods/canBe.js: -------------------------------------------------------------------------------- 1 | // quick check if this tag will require any untagging 2 | const canBe = function (term, tag, tagSet) { 3 | if (!tagSet.hasOwnProperty(tag)) { 4 | return true // everything can be an unknown tag 5 | } 6 | const not = tagSet[tag].not || [] 7 | for (let i = 0; i < not.length; i += 1) { 8 | if (term.tags.has(not[i])) { 9 | return false 10 | } 11 | } 12 | return true 13 | } 14 | export default canBe 15 | -------------------------------------------------------------------------------- /plugins/payload/scratch.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console, no-unused-vars */ 2 | 3 | import nlp from '../../src/three.js' 4 | // import plugin from './src/plugin.js' 5 | import plugin from './builds/compromise-payload.mjs' 6 | nlp.extend(plugin) 7 | 8 | const doc = nlp('i saw John Lennon, and john smith and bob dylan') 9 | 10 | doc.match('(john|bob|dave) .').addPayload(m => { 11 | return { lastName: m.terms().last().text() } 12 | }) 13 | console.log(doc.getPayloads()) 14 | -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/transform/nouns/index.js: -------------------------------------------------------------------------------- 1 | import toPlural from './toPlural/index.js' 2 | import toSingular from './toSingular/index.js' 3 | 4 | const all = function (str, model) { 5 | const arr = [str] 6 | const p = toPlural(str, model) 7 | if (p !== str) { 8 | arr.push(p) 9 | } 10 | const s = toSingular(str, model) 11 | if (s !== str) { 12 | arr.push(s) 13 | } 14 | return arr 15 | } 16 | 17 | export default { toPlural, toSingular, all } 18 | -------------------------------------------------------------------------------- /src/API/_lib.js: -------------------------------------------------------------------------------- 1 | 2 | /** log the decision-making to console */ 3 | const verbose = function (set) { 4 | const env = typeof process === 'undefined' || !process.env ? self.env || {} : process.env //use window, in browser 5 | env.DEBUG_TAGS = set === 'tagger' || set === true ? true : '' 6 | env.DEBUG_MATCH = set === 'match' || set === true ? true : '' 7 | env.DEBUG_CHUNKS = set === 'chunker' || set === true ? true : '' 8 | return this 9 | } 10 | 11 | export { verbose } -------------------------------------------------------------------------------- /tests/hmm.js: -------------------------------------------------------------------------------- 1 | // test('tokenize() accepts lexicon param', function (t) { 2 | // let doc = nlp.tokenize('spencer kelly is working here', { 3 | // 'spencer kelly': 'Person', 4 | // working: 'NotFun', 5 | // }) 6 | // t.equal(doc.match('#Person+').text(), 'spencer kelly', here + 'used tag') 7 | // t.equal(doc.match('#NotFun').text(), 'working', here + 'used 2nd tag') 8 | // t.equal(doc.has('#Verb'), false, here + 'not a full tag') 9 | // t.end() 10 | // }) 11 | -------------------------------------------------------------------------------- /data/pairs/index.js: -------------------------------------------------------------------------------- 1 | import Comparative from './Comparative.js' 2 | import Gerund from './Gerund.js' 3 | import Participle from './Participle.js' 4 | import PastTense from './PastTense.js' 5 | import PresentTense from './PresentTense.js' 6 | import Superlative from './Superlative.js' 7 | import AdjToNoun from './AdjToNoun.js' 8 | 9 | export default { 10 | Comparative, 11 | Gerund, 12 | Participle, 13 | PastTense, 14 | PresentTense, 15 | Superlative, 16 | AdjToNoun, 17 | } -------------------------------------------------------------------------------- /scripts/perf/pool/_lib.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import src from '../../../src/three.js' 3 | import build from '../../../builds/three/compromise-three.mjs' 4 | 5 | let nlp 6 | if (process.env.TESTENV === 'prod') { 7 | console.warn('== production build test 🚀 ==') 8 | // nlp = require('../../../builds/compromise.min.js') 9 | nlp = build 10 | } else { 11 | nlp = src 12 | // nlp.extend(require('../plugins/numbers/src')) 13 | } 14 | 15 | export default nlp 16 | -------------------------------------------------------------------------------- /src/two.js: -------------------------------------------------------------------------------- 1 | import nlp from './one.js' 2 | import preTag from './2-two/preTagger/plugin.js' 3 | import contractionTwo from './2-two/contraction-two/plugin.js' 4 | import postTag from './2-two/postTagger/plugin.js' 5 | import lazy from './2-two/lazy/plugin.js' 6 | import swap from './2-two/swap/plugin.js' 7 | 8 | nlp.plugin(preTag) //~103kb 9 | nlp.plugin(contractionTwo) // 10 | nlp.plugin(postTag) //~33kb 11 | nlp.plugin(lazy) // 12 | nlp.plugin(swap) // 13 | 14 | export default nlp 15 | -------------------------------------------------------------------------------- /plugins/stats/src/compute.js: -------------------------------------------------------------------------------- 1 | 2 | const compute = { 3 | // this is just the same thing 4 | // but written to Term objects 5 | tfidf: (view) => { 6 | let res = view.tfidf() 7 | res = res.reduce((h, a) => { 8 | h[a[0]] = a[1] 9 | return h 10 | }, {}) 11 | view.docs.forEach(terms => { 12 | terms.forEach(term => { 13 | term.tfidf = res[term.root || term.implicit || term.normal] || 0 14 | }) 15 | }) 16 | } 17 | } 18 | export default compute -------------------------------------------------------------------------------- /plugins/stats/tests/_lib.js: -------------------------------------------------------------------------------- 1 | import build from '../../../builds/one/compromise-one.mjs' 2 | import src from '../../../src/one.js' 3 | import plgBuild from '../builds/compromise-stats.mjs' 4 | import plg from '../src/plugin.js' 5 | let nlp; 6 | 7 | if (process.env.TESTENV === 'prod') { 8 | console.warn('== production build test 🚀 ==') // eslint-disable-line 9 | nlp = build 10 | nlp.plugin(plgBuild) 11 | } else { 12 | nlp = src 13 | nlp.plugin(plg) 14 | } 15 | export default nlp 16 | -------------------------------------------------------------------------------- /src/2-two/preTagger/model/clues/person-adj.js: -------------------------------------------------------------------------------- 1 | import person from './_person.js' 2 | import adj from './_adj.js' 3 | 4 | // 'rusty nail' - 'rusty smith' 5 | const clues = { 6 | beforeTags: Object.assign({}, person.beforeTags, adj.beforeTags), 7 | afterTags: Object.assign({}, person.afterTags, adj.afterTags), 8 | beforeWords: Object.assign({}, person.beforeWords, adj.beforeWords), 9 | afterWords: Object.assign({}, person.afterWords, adj.afterWords), 10 | } 11 | export default clues -------------------------------------------------------------------------------- /plugins/dates/tests/_lib.js: -------------------------------------------------------------------------------- 1 | import build from '../../../builds/three/compromise-three.mjs' 2 | import src from '../../../src/three.js' 3 | import plgBuild from '../builds/compromise-dates.mjs' 4 | import plg from '../src/plugin.js' 5 | let nlp; 6 | 7 | if (process.env.TESTENV === 'prod') { 8 | console.warn('== production build test 🚀 ==') // eslint-disable-line 9 | nlp = build 10 | nlp.plugin(plgBuild) 11 | } else { 12 | nlp = src 13 | nlp.plugin(plg) 14 | } 15 | export default nlp 16 | -------------------------------------------------------------------------------- /plugins/speech/tests/_lib.js: -------------------------------------------------------------------------------- 1 | import build from '../../../builds/one/compromise-one.mjs' 2 | import src from '../../../src/one.js' 3 | import plgBuild from '../builds/compromise-speech.mjs' 4 | import plg from '../src/plugin.js' 5 | let nlp; 6 | 7 | if (process.env.TESTENV === 'prod') { 8 | console.warn('== production build test 🚀 ==') // eslint-disable-line 9 | nlp = build 10 | nlp.plugin(plgBuild) 11 | } else { 12 | nlp = src 13 | nlp.plugin(plg) 14 | } 15 | export default nlp 16 | -------------------------------------------------------------------------------- /plugins/stats/src/ngram/sort.js: -------------------------------------------------------------------------------- 1 | const sort = function (arr) { 2 | arr = arr.sort((a, b) => { 3 | //first sort them by count 4 | if (a.count > b.count) { 5 | return -1 6 | } 7 | if (a.count < b.count) { 8 | return 1 9 | } 10 | // in a tie, sort them by size 11 | if (a.size > b.size) { 12 | return -1 13 | } 14 | if (a.size < b.size) { 15 | return 1 16 | } 17 | return 0 18 | }) 19 | return arr 20 | } 21 | export default sort 22 | -------------------------------------------------------------------------------- /src/2-two/preTagger/model/clues/unit-noun.js: -------------------------------------------------------------------------------- 1 | // '5 oz' - 'dr oz' 2 | const un = 'Unit' 3 | const clues = { 4 | beforeTags: { Value: un }, 5 | afterTags: {}, 6 | beforeWords: { 7 | per: un, 8 | every: un, 9 | each: un, 10 | square: un, //square km 11 | cubic: un, 12 | sq: un, 13 | metric: un //metric ton 14 | }, 15 | afterWords: { 16 | per: un, 17 | squared: un, 18 | cubed: un, 19 | long: un //foot long 20 | }, 21 | } 22 | export default clues -------------------------------------------------------------------------------- /plugins/paragraphs/tests/_lib.js: -------------------------------------------------------------------------------- 1 | import build from '../../../builds/one/compromise-one.mjs' 2 | import src from '../../../src/one.js' 3 | import plgBuild from '../builds/compromise-paragraphs.mjs' 4 | import plg from '../src/plugin.js' 5 | let nlp; 6 | 7 | if (process.env.TESTENV === 'prod') { 8 | console.warn('== production build test 🚀 ==') // eslint-disable-line 9 | nlp = build 10 | nlp.plugin(plgBuild) 11 | } else { 12 | nlp = src 13 | nlp.plugin(plg) 14 | } 15 | export default nlp 16 | -------------------------------------------------------------------------------- /plugins/payload/tests/_lib.js: -------------------------------------------------------------------------------- 1 | import src from '../../../src/one.js' 2 | import build from '../../../builds/one/compromise-one.mjs' 3 | 4 | import plg from '../src/plugin.js' 5 | import plgBuild from '../builds/compromise-payload.mjs' 6 | let nlp 7 | if (process.env.TESTENV === 'prod') { 8 | console.warn('== production build test 🚀 ==') // eslint-disable-line 9 | nlp = build 10 | nlp.plugin(plgBuild) 11 | } else { 12 | nlp = src 13 | nlp.plugin(plg) 14 | } 15 | 16 | export default nlp 17 | -------------------------------------------------------------------------------- /plugins/wikipedia/tests/_lib.js: -------------------------------------------------------------------------------- 1 | import build from '../../../builds/one/compromise-one.mjs' 2 | import src from '../../../src/one.js' 3 | import plgBuild from '../builds/compromise-wikipedia.mjs' 4 | import plg from '../src/plugin.js' 5 | let nlp; 6 | 7 | if (process.env.TESTENV === 'prod') { 8 | console.warn('== production build test 🚀 ==') // eslint-disable-line 9 | nlp = build 10 | nlp.plugin(plgBuild) 11 | } else { 12 | nlp = src 13 | nlp.plugin(plg) 14 | } 15 | export default nlp 16 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/one/units/index.js: -------------------------------------------------------------------------------- 1 | 2 | export { default as Unit } from './Unit.js' 3 | export { Day, WeekDay, CalendarDate, Holiday } from './_day.js' 4 | export { Hour, Minute, Moment } from './_time.js' 5 | export { AnyMonth, Month, Quarter, AnyQuarter, Season, Year } from './_year.js' 6 | export { Week, WeekEnd } from './_week.js' 7 | 8 | // export { Unit, Day, WeekDay, CalendarDate, Holiday, Hour, Minute, Moment, AnyMonth, Month, Quarter, AnyQuarter, Season, Year, Week, WeekEnd } 9 | 10 | -------------------------------------------------------------------------------- /plugins/dates/src/model/tags.js: -------------------------------------------------------------------------------- 1 | export default { 2 | FinancialQuarter: { 3 | is: 'Date', 4 | not: ['Fraction'], 5 | }, 6 | // 'summer' 7 | Season: { 8 | is: 'Date', 9 | }, 10 | // '1982' 11 | Year: { 12 | is: 'Date', 13 | not: ['RomanNumeral'], 14 | }, 15 | // 'easter' 16 | Holiday: { 17 | is: 'Date', 18 | also: 'Noun', 19 | }, 20 | // 'two weeks before' 21 | DateShift: { 22 | is: 'Date', 23 | not: ['Timezone', 'Holiday'], 24 | }, 25 | } 26 | -------------------------------------------------------------------------------- /src/2-two/preTagger/model/clues/actor-verb.js: -------------------------------------------------------------------------------- 1 | import noun from './_noun.js' 2 | import verb from './_verb.js' 3 | // 'the pilot' vs 'pilot the plane' 4 | const clue = { 5 | beforeTags: Object.assign({}, verb.beforeTags, noun.beforeTags, { 6 | }), 7 | afterTags: Object.assign({}, verb.afterTags, noun.afterTags, {}), 8 | beforeWords: Object.assign({}, verb.beforeWords, noun.beforeWords, {}), 9 | afterWords: Object.assign({}, verb.afterWords, noun.afterWords, {}), 10 | } 11 | 12 | export default clue -------------------------------------------------------------------------------- /tests/three/sentences/negative.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[three/sentence-negative] ' 4 | 5 | test('sentences.toPositive', function (t) { 6 | const doc = nlp(`do not use reverse psychology.`) 7 | doc.sentences().toPositive() 8 | t.equal(doc.text(), 'use reverse psychology.', here + 'neg') 9 | 10 | doc.sentences().toNegative() 11 | t.equal(doc.text(), 'do not use reverse psychology.', here + 'back to neg') 12 | 13 | t.end() 14 | }) 15 | -------------------------------------------------------------------------------- /src/2-two/preTagger/model/_expand/irregulars.js: -------------------------------------------------------------------------------- 1 | // import irregularVerbs from './conjugations.js' 2 | // harvest list of irregulars for any juicy word-data 3 | const expandIrregulars = function (model) { 4 | const { irregularPlurals } = model.two 5 | const { lexicon } = model.one 6 | Object.entries(irregularPlurals).forEach(a => { 7 | lexicon[a[0]] = lexicon[a[0]] || 'Singular' 8 | lexicon[a[1]] = lexicon[a[1]] || 'Plural' 9 | }) 10 | return model 11 | } 12 | export default expandIrregulars 13 | -------------------------------------------------------------------------------- /src/2-two/lazy/lazyParse.js: -------------------------------------------------------------------------------- 1 | import maybeMatch from './maybeMatch.js' 2 | 3 | // tokenize first, then only tag sentences required 4 | const lazyParse = function (input, reg) { 5 | let net = reg 6 | if (typeof reg === 'string') { 7 | net = this.buildNet([{ match: reg }]) 8 | } 9 | const doc = this.tokenize(input) 10 | const m = maybeMatch(doc, net) 11 | if (m.found) { 12 | m.compute(['index', 'tagger']) 13 | return m.match(reg) 14 | } 15 | return doc.none() 16 | } 17 | export default lazyParse -------------------------------------------------------------------------------- /src/4-four/facts/parse/postProcess.js: -------------------------------------------------------------------------------- 1 | const postProcess = function (parts) { 2 | 3 | for (let i = 1; i < parts.length; i += 1) { 4 | // is it missing a subject? 5 | // borrow the last one 6 | if (!parts[i].subj && parts[i].verb) { 7 | for (let o = i; o >= 0; o -= 1) { 8 | if (parts[o].subj) { 9 | parts[i].subj = Object.assign({ borrowed: true }, parts[o].subj) 10 | break 11 | } 12 | } 13 | } 14 | } 15 | return parts 16 | } 17 | export default postProcess -------------------------------------------------------------------------------- /scripts/typescript/two.ts: -------------------------------------------------------------------------------- 1 | // import nlp from '../../src/three.js' 2 | import nlp from '../../types/two' 3 | 4 | const doc = nlp('okay cool') 5 | 6 | // ### Pre-tagger 7 | doc.compute('preTagger') 8 | doc.compute('root') 9 | doc.compute('penn') 10 | 11 | // ### Contraction-two 12 | doc.compute('contractionTwo') 13 | doc.contractions() 14 | doc.contractions().expand() 15 | doc.contract() 16 | 17 | // ### Post-tagger 18 | doc.compute('postTagger') 19 | doc.confidence() 20 | 21 | 22 | // ### Swap 23 | doc.swap('', '') 24 | -------------------------------------------------------------------------------- /src/4-four/facts/parse/verb.js: -------------------------------------------------------------------------------- 1 | const parseVerb = function (chunk) { 2 | const obj = chunk.verbs().json()[0].verb 3 | return { 4 | chunk: 'Verb', 5 | ptr: obj.ptr = chunk.ptrs[0], 6 | desc: obj.preAdverbs.concat(obj.postAdverbs), 7 | negative: obj.negative, 8 | root: obj.infinitive, 9 | tense: obj.grammar.tense, 10 | copula: obj.grammar.copula, 11 | imperative: chunk.has('#Imperative'), 12 | hypothetical: chunk.has('(would|could) #Adverb? have') 13 | } 14 | } 15 | export default parseVerb -------------------------------------------------------------------------------- /tests/four/match.ignore.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | const here = '[one/match] ' 4 | 5 | const arr = [ 6 | ['cold', '{cold/temperature}'], 7 | ['cold demeanor', '{cold/attitude}'], 8 | ['he will plug his book', '{plug/sell}'], 9 | ] 10 | test('match:', function (t) { 11 | arr.forEach(function (a) { 12 | const doc = nlp(a[0]) 13 | const msg = `'${(a[0] + "' ").padEnd(20, '.')} - '${a[1]}'` 14 | t.equal(doc.has(a[1]), true, here + msg) 15 | }) 16 | t.end() 17 | }) 18 | -------------------------------------------------------------------------------- /plugins/speed/src/lazyParse/lazyParse.js: -------------------------------------------------------------------------------- 1 | import maybeMatch from './maybeMatch.js' 2 | 3 | // tokenize first, then only tag sentences required 4 | const lazyParse = function (input, reg) { 5 | let net = reg 6 | if (typeof reg === 'string') { 7 | net = this.buildNet([{ match: reg }]) 8 | } 9 | const doc = this.tokenize(input) 10 | const m = maybeMatch(doc, net) 11 | if (m.found) { 12 | m.compute(['index', 'tagger']) 13 | return m.match(reg) 14 | } 15 | return doc.none() 16 | } 17 | export default lazyParse -------------------------------------------------------------------------------- /src/1-one/lookup/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/index.js' 2 | import compress from './api/buildTrie/compress.js' 3 | import build from './api/buildTrie/index.js' 4 | 5 | /** pre-compile a list of matches to lookup */ 6 | const lib = { 7 | /** turn an array or object into a compressed trie*/ 8 | buildTrie: function (input) { 9 | const trie = build(input, this.world()) 10 | return compress(trie) 11 | } 12 | } 13 | // add alias 14 | lib.compile = lib.buildTrie 15 | 16 | export default { 17 | api, 18 | lib 19 | } 20 | -------------------------------------------------------------------------------- /src/2-two/preTagger/compute/tagger/3rd-pass/07-verb-type.js: -------------------------------------------------------------------------------- 1 | const verbType = function (terms, i, model, world) { 2 | const setTag = world.methods.one.setTag 3 | const term = terms[i] 4 | const types = ['PastTense', 'PresentTense', 'Auxiliary', 'Modal', 'Particle'] 5 | if (term.tags.has('Verb')) { 6 | const type = types.find(typ => term.tags.has(typ)) 7 | // is it a bare #Verb tag? 8 | if (!type) { 9 | setTag([term], 'Infinitive', world, null, `2-verb-type''`) 10 | } 11 | } 12 | } 13 | export default verbType -------------------------------------------------------------------------------- /plugins/speed/src/plugin.js: -------------------------------------------------------------------------------- 1 | import streamFile from './stream/streamFile.js' 2 | import keyPress from './keypress/index.js' 3 | import workerPool from './workerPool/plugin.js' 4 | import lazyParse from './lazyParse/plugin.js' 5 | import version from './_version.js' 6 | 7 | // combine all the plugins 8 | const plugin = { 9 | lib: Object.assign({}, streamFile.lib, keyPress.lib, workerPool.lib, lazyParse.lib), 10 | version: version 11 | } 12 | 13 | export { streamFile, keyPress, workerPool, lazyParse } 14 | export default plugin 15 | 16 | -------------------------------------------------------------------------------- /scripts/test/coverage.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | const exec = require('shelljs').exec 3 | const codecov = '15039ad1-b495-48cd-b4a0-bcf124c9b318' //i don't really care if you steal this. 4 | 5 | //let cmd=`./node_modules/.bin/nyc --reporter=text ./node_modules/.bin/tape ./test/**/*.test.js` 6 | //run all the tests 7 | const cmd = `./node_modules/.bin/c8 --reporter=text-lcov ./node_modules/.bin/tape ./tests/**/*.test.js > coverage.lcov && ./node_modules/.bin/codecov -t ${codecov}` 8 | exec(cmd) 9 | console.log('\n 🏃 done!') 10 | -------------------------------------------------------------------------------- /src/2-two/preTagger/model/clues/person-noun.js: -------------------------------------------------------------------------------- 1 | import person from './_person.js' 2 | import noun from './_noun.js' 3 | 4 | // 'babling brook' vs 'brook sheilds' 5 | 6 | const clue = { 7 | beforeTags: Object.assign({}, noun.beforeTags, person.beforeTags), 8 | afterTags: Object.assign({}, noun.afterTags, person.afterTags), 9 | beforeWords: Object.assign({}, noun.beforeWords, person.beforeWords, { i: 'Infinitive', we: 'Infinitive' }), 10 | afterWords: Object.assign({}, noun.afterWords, person.afterWords), 11 | } 12 | export default clue -------------------------------------------------------------------------------- /tests/two/misc/misc.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[two/misc] ' 4 | 5 | test('non-coercive-lex', function (t) { 6 | let doc = nlp('the Spencer Kelly', { kelly: 'Verb' }) 7 | t.equal(doc.has('#Verb'), false, here + 'still a person') 8 | 9 | doc = nlp('i kelly', { kelly: 'Verb' }) 10 | t.equal(doc.has('#Verb'), true, here + 'now coerced') 11 | 12 | doc = nlp('the Kelly', { kelly: 'Verb' }) 13 | t.equal(doc.has('#ProperNoun'), true, here + 'titlecase') 14 | t.end() 15 | }) 16 | -------------------------------------------------------------------------------- /plugins/_experiments/markdown/src/parse/index.js: -------------------------------------------------------------------------------- 1 | import { fromMarkdown } from 'mdast-util-from-markdown' 2 | import { gfmTable } from 'micromark-extension-gfm-table' 3 | import { gfmTableFromMarkdown } from 'mdast-util-gfm-table' 4 | // import { frontmatterFromMarkdown, frontmatterToMarkdown } from 'mdast-util-frontmatter' 5 | 6 | const parseMd = function (md) { 7 | const tree = fromMarkdown(md, { 8 | extensions: [gfmTable], 9 | mdastExtensions: [gfmTableFromMarkdown] 10 | }) 11 | return tree 12 | 13 | } 14 | export default parseMd -------------------------------------------------------------------------------- /plugins/speech/scratch.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console, no-unused-vars */ 2 | import nlp from '../../src/three.js' 3 | 4 | import speechPlugin from './src/plugin.js' 5 | nlp.plugin(speechPlugin) 6 | // nlp.verbose(true) 7 | // nlp.verbose('date') 8 | 9 | let txt = '' 10 | txt = 'seventh millenium. white collar' 11 | 12 | // let doc = nlp(txt).compute(['soundsLike', 'syllables']) 13 | // console.dir(doc.json()[0], { depth: 5 }) 14 | 15 | const doc = nlp('calgary') 16 | doc.compute('soundsLike') 17 | console.log(JSON.stringify(doc.json()[0], null, 2)) -------------------------------------------------------------------------------- /scripts/coreference/index.js: -------------------------------------------------------------------------------- 1 | import corpus from 'nlp-corpus' 2 | import nlp from '../../src/three.js' 3 | 4 | const start = 80000 5 | const list = corpus.all().slice(start, start + 1000) 6 | list.forEach(str => { 7 | const doc = nlp(str) 8 | const out = {} 9 | doc.pronouns().forEach(p => { 10 | const n = p.refersTo() 11 | if (n.found) { 12 | out[p.text('normal')] = n.text('normal') 13 | } 14 | }) 15 | if (Object.keys(out).length) { 16 | // console.log(JSON.stringify([str, out], null, 2) + ',\n') 17 | } 18 | }) 19 | -------------------------------------------------------------------------------- /src/1-one/tokenize/methods/02-terms/03-ranges.js: -------------------------------------------------------------------------------- 1 | // combine '2 - 5' like '2-5' is 2 | // 2-4: 2, 4 3 | const combineRanges = function (arr) { 4 | const startRange = /^[0-9]{1,4}(:[0-9][0-9])?([a-z]{1,2})? ?[-–—] ?$/ 5 | const endRange = /^[0-9]{1,4}([a-z]{1,2})? ?$/ 6 | for (let i = 0; i < arr.length - 1; i += 1) { 7 | if (arr[i + 1] && startRange.test(arr[i]) && endRange.test(arr[i + 1])) { 8 | arr[i] = arr[i] + arr[i + 1] 9 | arr[i + 1] = null 10 | } 11 | } 12 | return arr 13 | } 14 | export default combineRanges 15 | -------------------------------------------------------------------------------- /plugins/_experiments/sentiment/src/escape.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Function to escape characters in preparation for conversion to regex 3 | * 4 | * @author Brian L 5 | * @see {@link https://stackoverflow.com/a/7317957|Regex matching list of emoticons of various type} 6 | * 7 | * @param {string} text - The text to be escaped 8 | * 9 | * @returns {string} text - Input text with special regex characters escaped 10 | * 11 | * 12 | */ 13 | function RegExpEscape(text) { 14 | return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&"); 15 | } 16 | export default RegExpEscape -------------------------------------------------------------------------------- /src/1-one/typeahead/api.js: -------------------------------------------------------------------------------- 1 | // assume any discovered prefixes 2 | const autoFill = function () { 3 | const docs = this.docs 4 | if (docs.length === 0) { 5 | return this 6 | } 7 | const lastPhrase = docs[docs.length - 1] || [] 8 | const term = lastPhrase[lastPhrase.length - 1] 9 | if (term.typeahead === true && term.machine) { 10 | term.text = term.machine 11 | term.normal = term.machine 12 | } 13 | return this 14 | } 15 | 16 | const api = function (View) { 17 | View.prototype.autoFill = autoFill 18 | } 19 | export default api -------------------------------------------------------------------------------- /src/1-one/change/api/index.js: -------------------------------------------------------------------------------- 1 | import caseFns from './case.js' 2 | import insert from './insert.js' 3 | import replace from './replace.js' 4 | import remove from './remove.js' 5 | import whitespace from './whitespace.js' 6 | import sort from './sort.js' 7 | import concat from './concat.js' 8 | import harden from './harden.js' 9 | 10 | const methods = Object.assign({}, caseFns, insert, replace, remove, whitespace, sort, concat, harden) 11 | 12 | const addAPI = function (View) { 13 | Object.assign(View.prototype, methods) 14 | } 15 | export default addAPI 16 | -------------------------------------------------------------------------------- /src/3-three/numbers/plugin.js: -------------------------------------------------------------------------------- 1 | import fractions from './fractions/api.js' 2 | import numbers from './numbers/api.js' 3 | 4 | const api = function (View) { 5 | fractions(View) 6 | numbers(View) 7 | } 8 | 9 | export default { 10 | api, 11 | 12 | // add @greaterThan, @lessThan 13 | // mutate: world => { 14 | // let termMethods = world.methods.one.termMethods 15 | 16 | // termMethods.lessThan = function (term) { 17 | // return false //TODO: implement 18 | // // return /[aeiou]/.test(term.text) 19 | // } 20 | // }, 21 | } 22 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/one/01-tokenize/07-weekday.js: -------------------------------------------------------------------------------- 1 | // pull-out 'thurs' from 'thurs next week' 2 | const parseWeekday = function (doc) { 3 | const day = doc.match('#WeekDay') 4 | if (day.found && !doc.has('^#WeekDay$')) { 5 | // handle relative-day logic elsewhere. 6 | if (doc.has('(this|next|last) (next|upcoming|coming|past)? #WeekDay')) { 7 | return { result: null, m: doc.none() } 8 | } 9 | return { result: day.text('reduced'), m: day } 10 | } 11 | return { result: null, m: doc.none() } 12 | } 13 | export default parseWeekday 14 | -------------------------------------------------------------------------------- /plugins/wikipedia/src/plugin.js: -------------------------------------------------------------------------------- 1 | import { unpack } from 'efrt' 2 | import model from './_model.js' 3 | 4 | const plugin = { 5 | api: function (View) { 6 | View.prototype.wikipedia = function () { 7 | return this.lookup(this.world.model.wpTree) 8 | } 9 | }, 10 | mutate: (world, nlp) => { 11 | // console.log('unpacking list..') 12 | const list = Object.keys(unpack(model)) 13 | // console.log(list.length.toLocaleString(), 'articles') 14 | world.model.wpTree = nlp.buildTrie(list) 15 | } 16 | } 17 | 18 | export default plugin 19 | -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/normal/index.js: -------------------------------------------------------------------------------- 1 | import cleanup from './01-cleanup.js' 2 | // import doUnicode from './02-unicode.js' 3 | import doAcronyms from './02-acronyms.js' 4 | 5 | const normalize = function (term, world) { 6 | const killUnicode = world.methods.one.killUnicode 7 | // console.log(world.methods.one) 8 | let str = term.text || '' 9 | str = cleanup(str) 10 | //(very) rough ASCII transliteration - bjŏrk -> bjork 11 | str = killUnicode(str, world) 12 | str = doAcronyms(str) 13 | term.normal = str 14 | } 15 | export default normalize 16 | -------------------------------------------------------------------------------- /src/3-three/misc/parentheses/index.js: -------------------------------------------------------------------------------- 1 | import { find, strip } from './fns.js' 2 | 3 | const api = function (View) { 4 | class Parentheses extends View { 5 | constructor(document, pointer, groups) { 6 | super(document, pointer, groups) 7 | this.viewType = 'Possessives' 8 | } 9 | strip() { 10 | return strip(this) 11 | } 12 | } 13 | 14 | View.prototype.parentheses = function (n) { 15 | let m = find(this) 16 | m = m.getNth(n) 17 | return new Parentheses(m.document, m.pointer) 18 | } 19 | } 20 | export default api 21 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/range/_reverse.js: -------------------------------------------------------------------------------- 1 | // somewhat-intellegent response to end-before-start situations 2 | const reverseMaybe = function (obj) { 3 | const start = obj.start 4 | const end = obj.end 5 | if (start.d.isAfter(end.d)) { 6 | // wednesday to sunday -> move end up a week 7 | if (start.isWeekDay && end.isWeekDay) { 8 | obj.end.next() 9 | return obj 10 | } 11 | // else, reverse them 12 | const tmp = start 13 | obj.start = end 14 | obj.end = tmp 15 | } 16 | return obj 17 | } 18 | export default reverseMaybe 19 | -------------------------------------------------------------------------------- /plugins/payload/index.d.ts: -------------------------------------------------------------------------------- 1 | import nlp from 'compromise' 2 | type View = ReturnType 3 | 4 | type Payload = { match: View, val: any } 5 | 6 | export interface PayloadMethods { 7 | /** return any data on our given matches */ 8 | getPayloads(): Payload[] 9 | /** add data about our current matches */ 10 | addPayload(input:any): View 11 | /** remove all payloads in match */ 12 | clearPayloads(): View 13 | } 14 | 15 | /** extended compromise lib **/ 16 | declare const nlpPayload: nlp.TypedPlugin 17 | 18 | export default nlpPayload 19 | -------------------------------------------------------------------------------- /src/1-one/tokenize/methods/index.js: -------------------------------------------------------------------------------- 1 | import splitSentences from './01-sentences/index.js' 2 | import splitTerms from './02-terms/index.js' 3 | import splitWhitespace from './03-whitespace/index.js' 4 | import killUnicode from './unicode.js' 5 | import fromString from './parse.js' 6 | import isSentence from './01-sentences/is-sentence.js' 7 | 8 | export default { 9 | one: { 10 | killUnicode, 11 | tokenize: { 12 | splitSentences, 13 | isSentence, 14 | splitTerms, 15 | splitWhitespace, 16 | fromString, 17 | }, 18 | }, 19 | } 20 | -------------------------------------------------------------------------------- /src/3-three/misc/quotations/index.js: -------------------------------------------------------------------------------- 1 | import { find, strip } from './fns.js' 2 | 3 | const api = function (View) { 4 | 5 | class Quotations extends View { 6 | constructor(document, pointer, groups) { 7 | super(document, pointer, groups) 8 | this.viewType = 'Possessives' 9 | } 10 | strip() { 11 | return strip(this) 12 | } 13 | } 14 | 15 | View.prototype.quotations = function (n) { 16 | let m = find(this) 17 | m = m.getNth(n) 18 | return new Quotations(m.document, m.pointer) 19 | } 20 | } 21 | export default api 22 | -------------------------------------------------------------------------------- /tests/four/misc.ignore.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | const here = '[four/misc]' 4 | 5 | test('match-set', function (t) { 6 | const doc = nlp(`located in canada in the year 2019.`) 7 | doc.match('(found|located) [in] #Place', 0).sense('in/place') 8 | doc.match('[in] the? #Date', 0).sense('in/time') 9 | 10 | t.ok(doc.has('{in} the year'), true, here + 'in-basic') 11 | t.ok(doc.has('{in/time} the year'), true, here + 'in/time') 12 | t.ok(doc.has('located {in/place} canada'), true, here + 'in/place') 13 | t.end() 14 | }) 15 | -------------------------------------------------------------------------------- /plugins/dates/src/model/regex.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | // 30sec 3 | [/^[0-9]+(min|sec|hr|d)s?$/i, 'Duration', '30min'], 4 | // 2012-06 5 | [/^[0-9]{4}-[0-9]{2}$/, 'Date', '2012-06'], 6 | // 13h30 7 | [/^[0-9]{2}h[0-9]{2}$/i, 'Time', '13h30'], 8 | // @4:30 9 | [/^@[0-9]+:[0-9]{2}$/, 'Time', '@5:30'], 10 | // @4pm 11 | [/^@[1-9]+(am|pm)$/, 'Time', '@5pm'], 12 | // 03/02 13 | [/^(?:0[1-9]|[12]\d|3[01])\/(?:0[1-9]|[12]\d|3[01])$/, 'Date', '03/02'], 14 | // iso-time 15 | // [/^[0-9]{4}[:-][0-9]{2}[:-][0-9]{2}T[0-9]/i, 'Time', 'iso-time-tag'] 16 | 17 | ] 18 | -------------------------------------------------------------------------------- /plugins/speed/index.d.ts: -------------------------------------------------------------------------------- 1 | import nlp from 'compromise' 2 | // type View = ReturnType 3 | 4 | export interface SpeedMethods { 5 | /** parse sentences of a text in parallel */ 6 | // workerPool(text: string, match: any): View 7 | /** parse text without loading in memory */ 8 | // streamFile(file: string, filter: () => {}): Promise; 9 | /** cache pre-parsed text */ 10 | // keyPress(text: string): View 11 | } 12 | 13 | /** extended compromise lib **/ 14 | declare const nlpSpeed: nlp.TypedPlugin 15 | 16 | export default nlpSpeed 17 | 18 | -------------------------------------------------------------------------------- /src/3-three/misc/plugin.js: -------------------------------------------------------------------------------- 1 | import addAcronyms from './acronyms/index.js' 2 | import addParentheses from './parentheses/index.js' 3 | import addPossessives from './possessives/index.js' 4 | import addQuotations from './quotations/index.js' 5 | import addSelections from './selections/index.js' 6 | import addSlashes from './slashes/index.js' 7 | 8 | export default { 9 | api: function (View) { 10 | addAcronyms(View) 11 | addParentheses(View) 12 | addPossessives(View) 13 | addQuotations(View) 14 | addSelections(View) 15 | addSlashes(View) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /data/lexicon/misc/determiners.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'a', 3 | 'an', 4 | 'another', 5 | 'any', 6 | 'both', 7 | 'each', 8 | 'else', 9 | 'every', 10 | 'few', 11 | 'least', 12 | 'much', 13 | 'neither', 14 | 'own', 15 | 'plenty', 16 | 'some', 17 | 'several', 18 | 'that', 19 | 'the', 20 | 'these', 21 | 'this', 22 | 'those', 23 | 'various', 24 | 'whatever', 25 | 'whichever', 26 | 'tha', 27 | //some other languages (what could go wrong?) 28 | // 'la', 29 | 'le', 30 | 'les', 31 | 'des', 32 | 'de', 33 | 'du', 34 | 'el', 35 | ] 36 | -------------------------------------------------------------------------------- /scripts/perf/_fetch.js: -------------------------------------------------------------------------------- 1 | import https from 'https' 2 | 3 | const fetch = function (url) { 4 | return new Promise((resolve, reject) => { 5 | https 6 | .get(url, resp => { 7 | let data = '' 8 | resp.on('data', chunk => { 9 | data += chunk 10 | }) 11 | resp.on('end', () => { 12 | resolve(JSON.parse(data)) 13 | }) 14 | }) 15 | .on('error', err => { 16 | console.error('Error: ' + err.message) // eslint-disable-line 17 | reject() 18 | }) 19 | }) 20 | } 21 | 22 | export default fetch 23 | -------------------------------------------------------------------------------- /src/1-one/match/api/index.js: -------------------------------------------------------------------------------- 1 | import match from './match.js' 2 | import lookaround from './lookaround.js' 3 | import split from './split.js' 4 | import join from './join.js' 5 | 6 | const methods = Object.assign({}, match, lookaround, split, join) 7 | // aliases 8 | methods.lookBehind = methods.before 9 | methods.lookBefore = methods.before 10 | 11 | methods.lookAhead = methods.after 12 | methods.lookAfter = methods.after 13 | 14 | methods.notIf = methods.ifNo 15 | const matchAPI = function (View) { 16 | Object.assign(View.prototype, methods) 17 | } 18 | export default matchAPI 19 | -------------------------------------------------------------------------------- /src/3-three/topics/topics.js: -------------------------------------------------------------------------------- 1 | 2 | //combine them with .topics() method 3 | const find = function (n) { 4 | const r = this.clauses() 5 | // Find people, places, and organizations 6 | let m = r.people() 7 | m = m.concat(r.places()) 8 | m = m.concat(r.organizations()) 9 | m = m.not('(someone|man|woman|mother|brother|sister|father)') 10 | //return them to normal ordering 11 | m = m.sort('seq') 12 | // m = m.unique() 13 | m = m.getNth(n) 14 | return m 15 | } 16 | 17 | const api = function (View) { 18 | View.prototype.topics = find 19 | } 20 | export default api 21 | -------------------------------------------------------------------------------- /data/lexicon/dates/durations.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'century', 3 | 'centuries', 4 | 'day', 5 | 'days', 6 | 'decade', 7 | 'decades', 8 | 'hour', 9 | 'hours', 10 | 'hr', 11 | 'hrs', 12 | 'millisecond', 13 | 'milliseconds', 14 | 'minute', 15 | 'minutes', 16 | // 'min', 17 | 'month', 18 | 'months', 19 | 'sec', 20 | 'secs', 21 | // 'week end', 22 | 'weekend', 23 | 'week', 24 | 'weeks', 25 | 'wk', 26 | 'wks', 27 | 'year', 28 | 'years', 29 | 'yr', 30 | 'yrs', 31 | 'quarter', 32 | // 'quarters', 33 | 'qtr', 34 | 'season', 35 | ] 36 | -------------------------------------------------------------------------------- /data/lexicon/numbers/ordinals.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'zeroth', 3 | 'first', 4 | 'second', 5 | 'third', 6 | 'fourth', 7 | 'fifth', 8 | 'sixth', 9 | 'seventh', 10 | 'eighth', 11 | 'ninth', 12 | 'tenth', 13 | 'eleventh', 14 | 'twelfth', 15 | 'thirteenth', 16 | 'fourteenth', 17 | 'fifteenth', 18 | 'sixteenth', 19 | 'seventeenth', 20 | 'eighteenth', 21 | 'nineteenth', 22 | 'twentieth', 23 | 'thirtieth', 24 | 'fortieth', 25 | 'fourtieth', 26 | 'fiftieth', 27 | 'sixtieth', 28 | 'seventieth', 29 | 'eightieth', 30 | 'ninetieth', 31 | ] 32 | -------------------------------------------------------------------------------- /src/1-one/match/methods/match/03-notIf.js: -------------------------------------------------------------------------------- 1 | import fromHere from './02-from-here.js' 2 | 3 | const notIf = function (results, not, docs) { 4 | results = results.filter(res => { 5 | const [n, start, end] = res.pointer 6 | const terms = docs[n].slice(start, end) 7 | for (let i = 0; i < terms.length; i += 1) { 8 | const slice = terms.slice(i) 9 | const found = fromHere(slice, not, i, terms.length) 10 | if (found !== null) { 11 | return false 12 | } 13 | } 14 | return true 15 | }) 16 | return results 17 | } 18 | 19 | export default notIf -------------------------------------------------------------------------------- /src/2-two/postTagger/model/verbs/adj-gerund.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | // that were growing 3 | { match: '(that|which) were [%Adj|Gerund%]', group: 0, tag: 'Gerund', reason: 'that-were-growing' }, 4 | // was dissapointing 5 | // { match: '#Copula [%Adj|Gerund%]$', group: 0, tag: 'Adjective', reason: 'was-disappointing$' }, 6 | 7 | // repairing crubling roads 8 | { match: '#Gerund [#Gerund] #Plural', group: 0, tag: 'Adjective', reason: 'hard-working-fam' }, 9 | 10 | // { match: '(that|which) were [%Adj|Gerund%]', group: 0, tag: 'Gerund', reason: 'that-were-growing' }, 11 | ] 12 | -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/transform/adjectives/conjugate/lib.js: -------------------------------------------------------------------------------- 1 | //sweep-through all suffixes 2 | const suffixLoop = function (str = '', suffixes = []) { 3 | const len = str.length 4 | const max = len <= 6 ? len - 1 : 6 5 | for (let i = max; i >= 1; i -= 1) { 6 | const suffix = str.substring(len - i, str.length) 7 | if (suffixes[suffix.length].hasOwnProperty(suffix) === true) { 8 | const pre = str.slice(0, len - i) 9 | const post = suffixes[suffix.length][suffix] 10 | return pre + post 11 | } 12 | } 13 | return null 14 | } 15 | export default suffixLoop -------------------------------------------------------------------------------- /src/1-one/pointers/api/lib/difference.js: -------------------------------------------------------------------------------- 1 | import splitAll from './split.js' 2 | 3 | const subtract = function (refs, not) { 4 | const res = [] 5 | const found = splitAll(refs, not) 6 | found.forEach(o => { 7 | if (o.passthrough) { 8 | res.push(o.passthrough) 9 | } 10 | if (o.before) { 11 | res.push(o.before) 12 | } 13 | if (o.after) { 14 | res.push(o.after) 15 | } 16 | }) 17 | return res 18 | } 19 | export default subtract 20 | 21 | // console.log(subtract([[0, 0, 2]], [[0, 0, 1]])) 22 | // console.log(subtract([[0, 0, 2]], [[0, 1, 2]])) 23 | -------------------------------------------------------------------------------- /src/1-one/tokenize/model/index.js: -------------------------------------------------------------------------------- 1 | import aliases from './aliases.js' 2 | import { lexicon, abbreviations } from './lexicon.js' 3 | import prefixes from './prefixes.js' 4 | import suffixes from './suffixes.js' 5 | import unicode from './unicode.js' 6 | import { prePunctuation, postPunctuation, emoticons } from './punctuation.js' 7 | 8 | export default { 9 | one: { 10 | aliases, 11 | abbreviations, 12 | prefixes, 13 | suffixes, 14 | prePunctuation, 15 | postPunctuation, 16 | lexicon, //give this one forward 17 | unicode, 18 | emoticons 19 | }, 20 | } 21 | -------------------------------------------------------------------------------- /src/2-two/postTagger/model/numbers/money.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | { match: '#Money and #Money #Currency?', tag: 'Money', reason: 'money-and-money' }, 3 | // 6 dollars and 5 cents 4 | { match: '#Value #Currency [and] #Value (cents|ore|centavos|sens)', group: 0, tag: 'money', reason: 'and-5-cents' }, 5 | // maybe currencies 6 | { match: '#Value (mark|rand|won|rub|ore)', tag: '#Money #Currency', reason: '4-mark' }, 7 | // 3 pounds 8 | { match: 'a pound', tag: '#Money #Unit', reason: 'a-pound' }, 9 | { match: '#Value (pound|pounds)', tag: '#Money #Unit', reason: '4-pounds' }, 10 | ] 11 | -------------------------------------------------------------------------------- /src/3-three/topics/people/parse.js: -------------------------------------------------------------------------------- 1 | const parse = function (m) { 2 | const res = {} 3 | res.firstName = m.match('#FirstName+') 4 | res.lastName = m.match('#LastName+') 5 | res.honorific = m.match('#Honorific+') 6 | 7 | const last = res.lastName 8 | const first = res.firstName 9 | if (!first.found || !last.found) { 10 | // let p = m.clone() 11 | // assume 'Mr Springer' is a last-name 12 | if (!first.found && !last.found && m.has('^#Honorific .$')) { 13 | res.lastName = m.match('.$') 14 | return res 15 | } 16 | } 17 | return res 18 | } 19 | export default parse 20 | -------------------------------------------------------------------------------- /src/1-one/sweep/methods/tagger/canBe.js: -------------------------------------------------------------------------------- 1 | // is this tag consistent with the tags they already have? 2 | const canBe = function (terms, tag, model) { 3 | const tagSet = model.one.tagSet 4 | if (!tagSet.hasOwnProperty(tag)) { 5 | return true 6 | } 7 | const not = tagSet[tag].not || [] 8 | for (let i = 0; i < terms.length; i += 1) { 9 | const term = terms[i] 10 | for (let k = 0; k < not.length; k += 1) { 11 | if (term.tags.has(not[k]) === true) { 12 | return false //found a tag conflict - bail! 13 | } 14 | } 15 | } 16 | return true 17 | } 18 | export default canBe 19 | -------------------------------------------------------------------------------- /src/1-one/pointers/methods/index.js: -------------------------------------------------------------------------------- 1 | import { indexN } from '../api/lib/_lib.js' 2 | import splitAll from '../api/lib/split.js' 3 | import getDoc from './getDoc.js' 4 | 5 | // flat list of terms from nested document 6 | const termList = function (docs) { 7 | const arr = [] 8 | for (let i = 0; i < docs.length; i += 1) { 9 | for (let t = 0; t < docs[i].length; t += 1) { 10 | arr.push(docs[i][t]) 11 | } 12 | } 13 | return arr 14 | } 15 | 16 | export default { 17 | one: { 18 | termList, 19 | getDoc, 20 | pointer: { 21 | indexN, 22 | splitAll, 23 | } 24 | }, 25 | } 26 | -------------------------------------------------------------------------------- /plugins/_experiments/compress/src/index.js: -------------------------------------------------------------------------------- 1 | import lz from './lz.js' 2 | import fs from 'fs' 3 | 4 | let string = fs.readFileSync('../../../plugins/speed/tests/files/freshPrince.txt').toString() 5 | // string = "This is my compression test."; 6 | 7 | 8 | // console.log("Size of sample is: " + string.length, '\n\n'); 9 | const compressed = lz.compress(string); 10 | // console.log(string) 11 | // console.log(compressed) 12 | // console.log("\n\nSize of compressed sample is: " + compressed.length); 13 | string = lz.decompress(compressed); 14 | 15 | 16 | 17 | /* 18 | [ 19 | text, 20 | tag, 21 | post, 22 | pre 23 | */ -------------------------------------------------------------------------------- /src/1-one/freeze/debug.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | const blue = str => '\x1b[34m' + str + '\x1b[0m' 3 | const dim = str => '\x1b[3m\x1b[2m' + str + '\x1b[0m' 4 | 5 | const debug = function (view) { 6 | view.docs.forEach(terms => { 7 | console.log(blue('\n ┌─────────')) 8 | terms.forEach(t => { 9 | let str = ` ${dim('│')} ` 10 | const txt = t.implicit || t.text || '-' 11 | if (t.frozen === true) { 12 | str += `${blue(txt)} ❄️` 13 | } else { 14 | str += dim(txt) 15 | } 16 | console.log(str) 17 | }) 18 | }) 19 | } 20 | export default debug 21 | -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/transform/verbs/getTense/index.js: -------------------------------------------------------------------------------- 1 | import guess from './_guess.js' 2 | 3 | /** it helps to know what we're conjugating from */ 4 | const getTense = function (str) { 5 | const three = str.substring(str.length - 3) 6 | if (guess.hasOwnProperty(three) === true) { 7 | return guess[three] 8 | } 9 | const two = str.substring(str.length - 2) 10 | if (guess.hasOwnProperty(two) === true) { 11 | return guess[two] 12 | } 13 | const one = str.substring(str.length - 1) 14 | if (one === 's') { 15 | return 'PresentTense' 16 | } 17 | return null 18 | } 19 | export default getTense -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/parse/toNumber/validate.js: -------------------------------------------------------------------------------- 1 | import words from './data.js' 2 | 3 | //prevent things like 'fifteen ten', and 'five sixty' 4 | const isValid = (w, has) => { 5 | if (words.ones.hasOwnProperty(w)) { 6 | if (has.ones || has.teens) { 7 | return false 8 | } 9 | } else if (words.teens.hasOwnProperty(w)) { 10 | if (has.ones || has.teens || has.tens) { 11 | return false 12 | } 13 | } else if (words.tens.hasOwnProperty(w)) { 14 | if (has.ones || has.teens || has.tens) { 15 | return false 16 | } 17 | } 18 | return true 19 | } 20 | export default isValid 21 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/index.js: -------------------------------------------------------------------------------- 1 | import parseRange from './range/index.js' 2 | import spacetime from 'spacetime' 3 | import normalize from './normalize.js' 4 | 5 | 6 | 7 | const parse = function (doc, context) { 8 | // normalize context 9 | context = context || {} 10 | if (context.timezone === false) { 11 | context.timezone = 'UTC' 12 | } 13 | context.today = context.today || spacetime.now(context.timezone) 14 | context.today = spacetime(context.today, context.timezone) 15 | 16 | doc = normalize(doc) 17 | 18 | const res = parseRange(doc, context) 19 | return res 20 | } 21 | export default parse 22 | -------------------------------------------------------------------------------- /src/2-two/lazy/maybeMatch.js: -------------------------------------------------------------------------------- 1 | const getWords = function (net) { 2 | return Object.keys(net.hooks).filter(w => !w.startsWith('#') && !w.startsWith('%')) 3 | } 4 | 5 | const maybeMatch = function (doc, net) { 6 | // must have *atleast* one of these words 7 | const words = getWords(net) 8 | if (words.length === 0) { 9 | return doc 10 | } 11 | if (!doc._cache) { 12 | doc.cache() 13 | } 14 | const cache = doc._cache 15 | // return sentences that have one of our needed words 16 | return doc.filter((_m, i) => { 17 | return words.some(str => cache[i].has(str)) 18 | }) 19 | } 20 | export default maybeMatch -------------------------------------------------------------------------------- /src/3-three/nouns/api/toSingular.js: -------------------------------------------------------------------------------- 1 | const keep = { tags: true } 2 | 3 | const nounToSingular = function (m, parsed) { 4 | // already singular? 5 | if (parsed.isPlural === false) { 6 | return m 7 | } 8 | const { methods, model } = m.world 9 | const { toSingular } = methods.two.transform.noun 10 | // inflect the root noun 11 | const str = parsed.root.text('normal') 12 | const single = toSingular(str, model) 13 | m.replace(parsed.root, single, keep).tag('Singular', 'toPlural') 14 | // should we change the determiner/article? 15 | // m.debug() 16 | return m 17 | } 18 | export default nounToSingular 19 | -------------------------------------------------------------------------------- /data/lexicon/numbers/cardinals.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'zero', 3 | 'one', 4 | 'two', 5 | 'three', 6 | 'four', 7 | 'five', 8 | 'six', 9 | 'seven', 10 | 'eight', 11 | 'nine', 12 | 'ten', 13 | 'eleven', 14 | 'twelve', 15 | 'thirteen', 16 | 'fourteen', 17 | 'fifteen', 18 | 'sixteen', 19 | 'seventeen', 20 | 'eighteen', 21 | 'nineteen', 22 | 'twenty', 23 | 'thirty', 24 | 'forty', 25 | 'fourty', 26 | 'fifty', 27 | 'sixty', 28 | 'seventy', 29 | 'eighty', 30 | 'ninety', 31 | // plural-multiples 32 | 'thousands', 33 | 'millions', 34 | 'billions', 35 | 'trillions', 36 | ] 37 | -------------------------------------------------------------------------------- /src/2-two/postTagger/api.js: -------------------------------------------------------------------------------- 1 | const round = n => Math.round(n * 100) / 100 2 | 3 | export default function (View) { 4 | // average tagger score 5 | View.prototype.confidence = function () { 6 | let sum = 0 7 | let count = 0 8 | this.docs.forEach(terms => { 9 | terms.forEach(term => { 10 | count += 1 11 | sum += term.confidence || 1 12 | }) 13 | }) 14 | if (count === 0) { 15 | return 1 16 | } 17 | return round(sum / count) 18 | } 19 | 20 | // (re-) run the POS-tagger 21 | View.prototype.tagger = function () { 22 | return this.compute(['tagger']) 23 | } 24 | } -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/one/01-tokenize/05-section.js: -------------------------------------------------------------------------------- 1 | // 'start of october', 'middle of june 1st' 2 | const parseSection = function (doc) { 3 | // start of 2019 4 | let m = doc.match('[(start|beginning) of] .', 0) 5 | if (m.found) { 6 | return { result: 'start', m } 7 | } 8 | // end of 2019 9 | m = doc.match('[end of] .', 0) 10 | if (m.found) { 11 | return { result: 'end', m } 12 | } 13 | // middle of 2019 14 | m = doc.match('[(middle|midpoint|center) of] .', 0) 15 | if (m.found) { 16 | return { result: 'middle', m } 17 | } 18 | return { result: null, m } 19 | } 20 | export default parseSection 21 | -------------------------------------------------------------------------------- /src/2-two/preTagger/compute/tagger/2nd-pass/00-tagSwitch.js: -------------------------------------------------------------------------------- 1 | const prefix = /^(under|over|mis|re|un|dis|semi)-?/ 2 | 3 | const tagSwitch = function (terms, i, model) { 4 | const switches = model.two.switches 5 | const term = terms[i] 6 | if (switches.hasOwnProperty(term.normal)) { 7 | term.switch = switches[term.normal] 8 | return 9 | } 10 | // support 'restrike' -> 'strike' 11 | if (prefix.test(term.normal)) { 12 | const stem = term.normal.replace(prefix, '') 13 | if (stem.length > 3 && switches.hasOwnProperty(stem)) { 14 | term.switch = switches[stem] 15 | } 16 | } 17 | } 18 | export default tagSwitch -------------------------------------------------------------------------------- /src/2-two/preTagger/model/clues/person-verb.js: -------------------------------------------------------------------------------- 1 | import person from './_person.js' 2 | import verb from './_verb.js' 3 | import noun from './_noun.js' 4 | 5 | // 'rob the store' - 'rob lowe' 6 | // can be a noun too - 'losing hope' 7 | const clues = { 8 | beforeTags: Object.assign({}, noun.beforeTags, person.beforeTags, verb.beforeTags), 9 | afterTags: Object.assign({}, noun.afterTags, person.afterTags, verb.afterTags), 10 | beforeWords: Object.assign({}, noun.beforeWords, person.beforeWords, verb.beforeWords), 11 | afterWords: Object.assign({}, noun.afterWords, person.afterWords, verb.afterWords), 12 | } 13 | export default clues -------------------------------------------------------------------------------- /src/3-three/coreference/compute/lib.js: -------------------------------------------------------------------------------- 1 | 2 | // borrow a reference from another pronoun 3 | // 'mike is tall, [he] climbs and [he] swims' 4 | const findChained = function (want, s) { 5 | const m = s.match(want) 6 | if (m.found) { 7 | const ref = m.pronouns().refersTo() 8 | if (ref.found) { 9 | return ref 10 | } 11 | } 12 | return s.none() 13 | } 14 | 15 | const prevSentence = function (m) { 16 | if (!m.found) { 17 | return m 18 | } 19 | const [n] = m.fullPointer[0] 20 | if (n && n > 0) { 21 | return m.update([[n - 1]]) 22 | } 23 | return m.none() 24 | } 25 | export { prevSentence, findChained } -------------------------------------------------------------------------------- /src/3-three/topics/places/find.js: -------------------------------------------------------------------------------- 1 | const find = function (doc) { 2 | let m = doc.match('(#Place|#Address)+') 3 | 4 | // split all commas except for 'paris, france' 5 | let splits = m.match('@hasComma') 6 | splits = splits.filter(c => { 7 | // split 'europe, china' 8 | if (c.has('(asia|africa|europe|america)$')) { 9 | return true 10 | } 11 | // don't split 'paris, france' 12 | if (c.has('(#City|#Region|#ProperNoun)$') && c.after('^(#Country|#Region)').found) { 13 | return false 14 | } 15 | return true 16 | }) 17 | m = m.splitAfter(splits) 18 | return m 19 | } 20 | export default find 21 | -------------------------------------------------------------------------------- /plugins/speed/src/lazyParse/maybeMatch.js: -------------------------------------------------------------------------------- 1 | const getWords = function (net) { 2 | return Object.keys(net.hooks).filter(w => !w.startsWith('#') && !w.startsWith('%')) 3 | } 4 | 5 | const maybeMatch = function (doc, net) { 6 | // must have *atleast* one of these words 7 | const words = getWords(net) 8 | if (words.length === 0) { 9 | return doc 10 | } 11 | if (!doc._cache) { 12 | doc.cache() 13 | } 14 | const cache = doc._cache 15 | // return sentences that have one of our needed words 16 | return doc.filter((_m, i) => { 17 | return words.some(str => cache[i].has(str)) 18 | }) 19 | } 20 | export default maybeMatch -------------------------------------------------------------------------------- /src/3-three/verbs/api/parse/root.js: -------------------------------------------------------------------------------- 1 | // find the main verb, from a verb phrase 2 | const getMain = function (vb) { 3 | let root = vb 4 | if (vb.wordCount() > 1) { 5 | root = vb.not('(#Negative|#Auxiliary|#Modal|#Adverb|#Prefix)') 6 | } 7 | // fallback to just the last word, sometimes 8 | if (root.length > 1 && !root.has('#Phrasal #Particle')) { 9 | root = root.last() 10 | } 11 | // look for more modals 12 | root = root.not('(want|wants|wanted) to') 13 | 14 | // fallback 15 | if (!root.found) { 16 | root = vb.not('#Negative') 17 | return root 18 | } 19 | return root 20 | } 21 | export default getMain 22 | -------------------------------------------------------------------------------- /tests/one/match/punctuation-match.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[one/punctuation-match] ' 4 | 5 | test('punctuation-match :', function (t) { 6 | let regs = [{ word: 'may' }, { pre: '(' }] 7 | let m = nlp('may, (2019) foo').match(regs) 8 | t.equal(m.text(), 'may, (2019)', here + '(pre') 9 | 10 | regs = [{ word: 'may' }, { post: ')' }] 11 | m = nlp('may, (2019) foo').match(regs) 12 | t.equal(m.text(), 'may, (2019)', here + 'post)') 13 | 14 | regs = [{ post: ',' }] 15 | m = nlp('may, (2019) foo').match(regs) 16 | t.equal(m.text(), 'may,', here + 'post,') 17 | t.end() 18 | }) -------------------------------------------------------------------------------- /types/view/two.d.ts: -------------------------------------------------------------------------------- 1 | import type View from './one.d.ts' 2 | 3 | interface Two extends View { 4 | /** return any multi-word terms, like "didn't" */ 5 | contractions: (n?: number) => Contractions 6 | /** contract words that can combine, like "did not" */ 7 | contract: () => View 8 | /** Average measure of tag confidence */ 9 | confidence: () => number 10 | /** smart-replace root forms */ 11 | swap: (fromLemma: string, toLemma: string, guardTag?: string) => View 12 | } 13 | 14 | 15 | interface Contractions extends View { 16 | /** turn "i've" into "i have" */ 17 | expand: () => View 18 | } 19 | 20 | export default Two 21 | -------------------------------------------------------------------------------- /tests/three/redact.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | const here = '[three/redact] ' 4 | 5 | test('redact:', function (t) { 6 | const arr = [ 7 | [`spencer from 234 Main st at 423-3242 and spencer@gmail.com.`, '██████████ from ██████████ at ███████ and ██████████.'], 8 | [`in Toronto, Canada!`, `in ██████████!`], 9 | [`with Dr. Miller and his pal Joe`, `with ██████████ and his pal ██████████`], 10 | ] 11 | arr.forEach(a => { 12 | const [str, want] = a 13 | const have = nlp(str).redact().text() 14 | t.equal(have + '|', want + '|', here + ' - ' + str) 15 | }) 16 | t.end() 17 | }) 18 | -------------------------------------------------------------------------------- /src/1-one/output/methods/debug/_color.js: -------------------------------------------------------------------------------- 1 | // https://stackoverflow.com/questions/9781218/how-to-change-node-jss-console-font-color 2 | const reset = '\x1b[0m' 3 | 4 | //cheaper than requiring chalk 5 | const cli = { 6 | green: str => '\x1b[32m' + str + reset, 7 | red: str => '\x1b[31m' + str + reset, 8 | blue: str => '\x1b[34m' + str + reset, 9 | magenta: str => '\x1b[35m' + str + reset, 10 | cyan: str => '\x1b[36m' + str + reset, 11 | yellow: str => '\x1b[33m' + str + reset, 12 | black: str => '\x1b[30m' + str + reset, 13 | dim: str => '\x1b[2m' + str + reset, 14 | i: str => '\x1b[3m' + str + reset, 15 | } 16 | export default cli 17 | -------------------------------------------------------------------------------- /plugins/dates/src/model/words/index.js: -------------------------------------------------------------------------------- 1 | import timezones from './timezones.js' 2 | import dates from './dates.js' 3 | import durations from './durations.js' 4 | import holidays from './holidays.js' 5 | import times from './times.js' 6 | 7 | const lex = { 8 | 'a couple': 'Value', 9 | thur: 'WeekDay', 10 | thurs: 'WeekDay', 11 | } 12 | const add = function (arr, tag) { 13 | arr.forEach(str => { 14 | lex[str] = tag 15 | }) 16 | } 17 | add(Object.keys(timezones), 'Timezone') 18 | add(dates, 'Date') 19 | add(durations, 'Duration') 20 | add(holidays, 'Holiday') 21 | add(times, 'Time') 22 | // console.log(lex['april fools']) 23 | export default lex 24 | -------------------------------------------------------------------------------- /src/1-one/output/api/debug.js: -------------------------------------------------------------------------------- 1 | const isClientSide = () => typeof window !== 'undefined' && window.document 2 | 3 | //output some helpful stuff to the console 4 | const debug = function (fmt) { 5 | const debugMethods = this.methods.one.debug || {} 6 | // see if method name exists 7 | if (fmt && debugMethods.hasOwnProperty(fmt)) { 8 | debugMethods[fmt](this) 9 | return this 10 | } 11 | // log default client-side view 12 | if (isClientSide()) { 13 | debugMethods.clientSide(this) 14 | return this 15 | } 16 | // else, show regular server-side tags view 17 | debugMethods.tags(this) 18 | return this 19 | } 20 | export default debug 21 | -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/offset.js: -------------------------------------------------------------------------------- 1 | // get all character startings in doc 2 | const offset = function (view) { 3 | let elapsed = 0 4 | let index = 0 5 | const docs = view.document //start from the actual-top 6 | for (let i = 0; i < docs.length; i += 1) { 7 | for (let t = 0; t < docs[i].length; t += 1) { 8 | const term = docs[i][t] 9 | term.offset = { 10 | index: index, 11 | start: elapsed + term.pre.length, 12 | length: term.text.length, 13 | } 14 | elapsed += term.pre.length + term.text.length + term.post.length 15 | index += 1 16 | } 17 | } 18 | } 19 | 20 | 21 | export default offset 22 | -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/_toString.js: -------------------------------------------------------------------------------- 1 | /** 2 | * turn big numbers, like 2.3e+22, into a string with a ton of trailing 0's 3 | * */ 4 | const numToString = function (n) { 5 | if (n < 1000000) { 6 | return String(n) 7 | } 8 | let str 9 | if (typeof n === 'number') { 10 | str = n.toFixed(0) 11 | } else { 12 | str = n 13 | } 14 | if (str.indexOf('e+') === -1) { 15 | return str 16 | } 17 | return str 18 | .replace('.', '') 19 | .split('e+') 20 | .reduce(function (p, b) { 21 | return p + Array(b - p.length + 2).join(0) 22 | }) 23 | } 24 | export default numToString 25 | // console.log(numToString(2.5e+22)); 26 | -------------------------------------------------------------------------------- /src/1-one/contraction-one/compute/contractions/number-unit.js: -------------------------------------------------------------------------------- 1 | const numUnit = /^([+-]?[0-9][.,0-9]*)([a-z°²³µ/]+)$/ //(must be lowercase) 2 | 3 | const numberUnit = function (terms, i, world) { 4 | const notUnit = world.model.one.numberSuffixes || {} 5 | const term = terms[i] 6 | const parts = term.text.match(numUnit) 7 | if (parts !== null) { 8 | // is it a recognized unit, like 'km'? 9 | const unit = parts[2].toLowerCase().trim() 10 | // don't split '3rd' 11 | if (notUnit.hasOwnProperty(unit)) { 12 | return null 13 | } 14 | return [parts[1], unit] //split it 15 | } 16 | return null 17 | } 18 | export default numberUnit 19 | -------------------------------------------------------------------------------- /src/1-one/output/methods/debug/client-side.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | const logClientSide = function (view) { 3 | console.log('%c -=-=- ', 'background-color:#6699cc;') 4 | view.forEach(m => { 5 | console.groupCollapsed(m.text()) 6 | const terms = m.docs[0] 7 | const out = terms.map(t => { 8 | let text = t.text || '-' 9 | if (t.implicit) { 10 | text = '[' + t.implicit + ']' 11 | } 12 | const tags = '[' + Array.from(t.tags).join(', ') + ']' 13 | return { text, tags } 14 | }) 15 | console.table(out, ['text', 'tags']) 16 | console.groupEnd() 17 | }) 18 | } 19 | export default logClientSide 20 | -------------------------------------------------------------------------------- /src/3-three/chunker/compute/index.js: -------------------------------------------------------------------------------- 1 | import easyMode from './01-easy.js' 2 | import byNeighbour from './02-neighbours.js' 3 | import matcher from './03-matcher.js' 4 | import fallback from './04-fallback.js' 5 | import fixUp from './05-fixUp.js' 6 | /* Chunks: 7 | Noun 8 | Verb 9 | Adjective 10 | Pivot 11 | */ 12 | 13 | const findChunks = function (view) { 14 | const { document, world } = view 15 | easyMode(document) 16 | byNeighbour(document) 17 | matcher(view, document, world) 18 | // matcher(view, document, world) //run it 2nd time 19 | fallback(document, world) 20 | fixUp(document, world) 21 | } 22 | export default { chunks: findChunks } 23 | -------------------------------------------------------------------------------- /src/1-one/lookup/api/index.js: -------------------------------------------------------------------------------- 1 | import build from './buildTrie/index.js' 2 | import scan from './scan.js' 3 | 4 | const isObject = val => { 5 | return Object.prototype.toString.call(val) === '[object Object]' 6 | } 7 | 8 | export default function (View) { 9 | 10 | /** find all matches in this document */ 11 | View.prototype.lookup = function (input, opts = {}) { 12 | if (!input) { 13 | return this.none() 14 | } 15 | if (typeof input === 'string') { 16 | input = [input] 17 | } 18 | const trie = isObject(input) ? input : build(input, this.world) 19 | let res = scan(this, trie, opts) 20 | res = res.settle() 21 | return res 22 | } 23 | } -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/parse/toNumber/parseNumeric.js: -------------------------------------------------------------------------------- 1 | //parse a string like "4,200.1" into Number 4200.1 2 | const parseNumeric = str => { 3 | //remove ordinal - 'th/rd' 4 | str = str.replace(/1st$/, '1') 5 | str = str.replace(/2nd$/, '2') 6 | str = str.replace(/3rd$/, '3') 7 | str = str.replace(/([4567890])r?th$/, '$1') 8 | //remove prefixes 9 | str = str.replace(/^[$€¥£¢]/, '') 10 | //remove suffixes 11 | str = str.replace(/[%$€¥£¢]$/, '') 12 | //remove commas 13 | str = str.replace(/,/g, '') 14 | //split '5kg' from '5' 15 | str = str.replace(/([0-9])([a-z\u00C0-\u00FF]{1,2})$/, '$1') 16 | return str 17 | } 18 | 19 | export default parseNumeric 20 | -------------------------------------------------------------------------------- /plugins/_experiments/ast/src/lines.js: -------------------------------------------------------------------------------- 1 | // return all newline-seperated sections in the document 2 | const toLines = function (doc) { 3 | const newLine = /\n/ 4 | const lines = [[]] 5 | // a newline already splits a sentence, 6 | // so it can only happen at the end of a sentence 7 | doc.sentences().forEach(s => { 8 | lines[lines.length - 1].push(s) 9 | const terms = s.docs[0] 10 | const end = terms[terms.length - 1] 11 | if (newLine.test(end.post)) { 12 | lines.push([]) 13 | } 14 | }) 15 | // remove an empty last one 16 | if (lines[lines.length - 1].length === 0) { 17 | lines.pop() 18 | } 19 | return lines 20 | } 21 | export default toLines -------------------------------------------------------------------------------- /tests/three/verbs/phrasals.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[three/phrasals]' 4 | 5 | test('get phrasal infinitive', function (t) { 6 | const arr = [ 7 | [` running out`, 'run out'], 8 | [`we walked in`, 'walk in'], 9 | [`then they quickly walked out`, 'walk out'], 10 | [`they studied up for the test`, 'study up'], 11 | [`they studied-up for the test`, 'study up'], 12 | [`they sat down for the test`, 'sit down'], 13 | ] 14 | arr.forEach(a => { 15 | const doc = nlp(a[0]) 16 | const res = doc.verbs().json()[0].verb 17 | t.equal(res.infinitive, a[1], here + ` '${a[0]}'`) 18 | }) 19 | t.end() 20 | }) -------------------------------------------------------------------------------- /src/1-one/sweep/methods/sweep/01-getHooks.js: -------------------------------------------------------------------------------- 1 | // for each cached-sentence, find a list of possible matches 2 | const getHooks = function (docCaches, hooks) { 3 | return docCaches.map((set, i) => { 4 | let maybe = [] 5 | Object.keys(hooks).forEach(k => { 6 | if (docCaches[i].has(k)) { 7 | maybe = maybe.concat(hooks[k]) 8 | } 9 | }) 10 | // remove duplicates 11 | const already = {} 12 | maybe = maybe.filter(m => { 13 | if (typeof already[m.match] === 'boolean') { 14 | return false 15 | } 16 | already[m.match] = true 17 | return true 18 | }) 19 | return maybe 20 | }) 21 | } 22 | 23 | export default getHooks 24 | -------------------------------------------------------------------------------- /one/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "compromise-one", 3 | "version": "14.14.4", 4 | "description": "", 5 | "type": "module", 6 | "module": "./../src/one.js", 7 | "main": "./../src/one.js", 8 | "types": "./../types/one.d.ts", 9 | "exports": { 10 | "./package.json": "./package.json", 11 | ".": { 12 | "import": { 13 | "types": "./../types/one/one.d.ts", 14 | "default": "./../src/one.js" 15 | }, 16 | "require": { 17 | "types": "./../types/one.d.cts", 18 | "default": "./../builds/one/compromise-one.cjs" 19 | } 20 | } 21 | }, 22 | "author": "Spencer Kelly (http://spencermounta.in)", 23 | "license": "MIT", 24 | "sideEffects": true 25 | } -------------------------------------------------------------------------------- /plugins/dates/src/model/words/durations.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'centuries', 3 | 'century', 4 | 'day', 5 | 'days', 6 | 'decade', 7 | 'decades', 8 | 'hour', 9 | 'hours', 10 | 'hr', 11 | 'hrs', 12 | 'millisecond', 13 | 'milliseconds', 14 | 'minute', 15 | 'minutes', 16 | 'min', 17 | 'mins', 18 | 'month', 19 | 'months', 20 | 'seconds', 21 | 'sec', 22 | 'secs', 23 | 'week end', 24 | 'week ends', 25 | 'weekend', 26 | 'weekends', 27 | 'week', 28 | 'weeks', 29 | 'wk', 30 | 'wks', 31 | 'year', 32 | 'years', 33 | 'yr', 34 | 'yrs', 35 | 'quarter', 36 | // 'quarters', 37 | 'qtr', 38 | 'qtrs', 39 | 'season', 40 | 'seasons', 41 | ] 42 | -------------------------------------------------------------------------------- /tests/two/misc/lazy.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[two/lazy] ' 4 | import penn from '../tagger/_pennSample.js' 5 | const txt = penn.map(a => a.text).join('\n') 6 | 7 | test('lazy matches are equal', function (t) { 8 | const arr = [ 9 | 'captain .', 10 | '. of the #Noun', 11 | '#Adverb #Adverb+', 12 | '#Url #Noun .?', 13 | 'certain !#Plural' 14 | ] 15 | arr.forEach(str => { 16 | const reg = nlp(txt).match(str) 17 | const lazy = nlp.lazy(txt, str) 18 | t.equal(reg.length, lazy.length, here + ' ' + str) 19 | t.deepEqual(reg.out('array'), lazy.out('array'), here + ' ' + str) 20 | }) 21 | t.end() 22 | }) -------------------------------------------------------------------------------- /two/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "compromise-two", 3 | "version": "14.14.4", 4 | "description": "", 5 | "type": "module", 6 | "module": "./../src/two.js", 7 | "main": "./../src/two.js", 8 | "types": "./../types/two.d.ts", 9 | "exports": { 10 | "./package.json": "./package.json", 11 | ".": { 12 | "import": { 13 | "types": "./../types/two/two.d.ts", 14 | "default": "./../src/two.js" 15 | }, 16 | "require": { 17 | "types": "./../types/two.d.cts", 18 | "default": "./../builds/two/compromise-two.cjs" 19 | } 20 | } 21 | }, 22 | "author": "Spencer Kelly (http://spencermounta.in)", 23 | "license": "MIT", 24 | "sideEffects": true 25 | } -------------------------------------------------------------------------------- /tests/three/numbers/backlog/overlap.ignore.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../../_lib.js' 3 | const here = '[three/number-overlap] ' 4 | 5 | test('number-fraction overlap', function (t) { 6 | const arr = [ 7 | ['fifty five and two eighths', 55.25], 8 | ['two fifty five and a third', 255.333], 9 | ['two fifty five and five thirds', 256.667], 10 | ] 11 | 12 | arr.forEach((a) => { 13 | const doc = nlp(a[0]) 14 | const values = doc.numbers().get()[0] 15 | const fractions = doc.fractions().get()[0] 16 | t.equal(values, a[1], here + 'Value: ' + a[0]) 17 | t.equal(fractions, null, here + 'Fraction: no-fraction') 18 | }) 19 | 20 | t.end() 21 | }) 22 | -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/reindex.js: -------------------------------------------------------------------------------- 1 | // cheat- add the document's pointer to the terms 2 | const index = function (view) { 3 | // console.log('reindex') 4 | const document = view.document 5 | for (let n = 0; n < document.length; n += 1) { 6 | for (let i = 0; i < document[n].length; i += 1) { 7 | document[n][i].index = [n, i] 8 | } 9 | } 10 | // let ptrs = b.fullPointer 11 | // console.log(ptrs) 12 | // for (let i = 0; i < docs.length; i += 1) { 13 | // const [n, start] = ptrs[i] 14 | // for (let t = 0; t < docs[i].length; t += 1) { 15 | // let term = docs[i][t] 16 | // term.index = [n, start + t] 17 | // } 18 | // } 19 | } 20 | 21 | export default index -------------------------------------------------------------------------------- /src/1-one/tokenize/model/abbreviations/honorifics.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'adj', 3 | 'adm', 4 | 'adv', 5 | 'asst', 6 | 'atty', 7 | 'bldg', 8 | 'brig', 9 | 'capt', 10 | 'cmdr', 11 | 'comdr', 12 | 'cpl', 13 | 'det', 14 | 'dr', 15 | 'esq', 16 | 'gen', 17 | 'gov', 18 | 'hon', 19 | 'jr', 20 | 'llb', 21 | 'lt', 22 | 'maj', 23 | 'messrs', 24 | 'mlle', 25 | 'mme', 26 | 'mr', 27 | 'mrs', 28 | 'ms', 29 | 'mstr', 30 | 'phd', 31 | 'prof', 32 | 'pvt', 33 | 'rep', 34 | 'reps', 35 | 'res', 36 | 'rev', 37 | 'sen', 38 | 'sens', 39 | 'sfc', 40 | 'sgt', 41 | 'sir', 42 | 'sr', 43 | 'supt', 44 | 'surg' 45 | //miss 46 | //misses 47 | ] 48 | -------------------------------------------------------------------------------- /src/1-one/tokenize/model/abbreviations/places.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'rd', 3 | 'st', 4 | 'dist', 5 | 'mt', 6 | 'ave', 7 | 'blvd', 8 | 'cl', 9 | // 'ct', 10 | 'cres', 11 | 'hwy', 12 | //states 13 | 'ariz', 14 | 'cal', 15 | 'calif', 16 | 'colo', 17 | 'conn', 18 | 'fla', 19 | 'fl', 20 | 'ga', 21 | 'ida', 22 | 'ia', 23 | 'kan', 24 | 'kans', 25 | 26 | 'minn', 27 | 'neb', 28 | 'nebr', 29 | 'okla', 30 | 'penna', 31 | 'penn', 32 | 'pa', 33 | 'dak', 34 | 'tenn', 35 | 'tex', 36 | 'ut', 37 | 'vt', 38 | 'va', 39 | 'wis', 40 | 'wisc', 41 | 'wy', 42 | 'wyo', 43 | 'usafa', 44 | 'alta', 45 | 'ont', 46 | 'que', 47 | 'sask', 48 | ] 49 | -------------------------------------------------------------------------------- /src/1-one/tokenize/plugin.js: -------------------------------------------------------------------------------- 1 | import methods from './methods/index.js' 2 | import model from './model/index.js' 3 | import compute from './compute/index.js' 4 | 5 | export default { 6 | compute, 7 | methods, 8 | model, 9 | hooks: ['alias', 'machine', 'index', 'id'], 10 | } 11 | 12 | // const plugin = function (world) { 13 | // let { methods, model, parsers } = world 14 | // Object.assign({}, methods, _methods) 15 | // Object.assign(model, _model) 16 | // methods.one.tokenize.fromString = tokenize 17 | // parsers.push('normal') 18 | // parsers.push('alias') 19 | // parsers.push('machine') 20 | // // extend View class 21 | // // addMethods(View) 22 | // } 23 | // export default plugin 24 | -------------------------------------------------------------------------------- /src/2-two/preTagger/compute/tagger/1st-pass/02-hyphens.js: -------------------------------------------------------------------------------- 1 | const byHyphen = function (terms, i, model, world) { 2 | const setTag = world.methods.one.setTag 3 | // two words w/ a dash 4 | if (terms[i].post === '-' && terms[i + 1]) { 5 | setTag([terms[i], terms[i + 1]], 'Hyphenated', world, null, `1-punct-hyphen''`) 6 | 7 | // bone-headed, man-made, good-tempered, coursely-ground 8 | // if (terms[i + 1].tags.has('PastTense')) { 9 | // let tags = terms[i].tags 10 | // if (tags.has('Noun') || tags.has('Adverb')) { 11 | // setTag([terms[i], terms[i + 1]], 'Adjective', world, null, `2-punct-dash''`) 12 | // } 13 | 14 | // } 15 | } 16 | } 17 | export default byHyphen 18 | -------------------------------------------------------------------------------- /tokenize/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "compromise-tokenize", 3 | "version": "14.14.3", 4 | "description": "", 5 | "type": "module", 6 | "module": "./../src/one.js", 7 | "main": "./../src/one.js", 8 | "types": "./../types/one.d.ts", 9 | "exports": { 10 | "./package.json": "./package.json", 11 | ".": { 12 | "import": { 13 | "types": "./../types/one/one.d.ts", 14 | "default": "./../src/one.js" 15 | }, 16 | "require": { 17 | "types": "./../types/one.d.cts", 18 | "default": "./../builds/one/compromise-one.cjs" 19 | } 20 | } 21 | }, 22 | "author": "Spencer Kelly (http://spencermounta.in)", 23 | "license": "MIT", 24 | "sideEffects": true 25 | } 26 | -------------------------------------------------------------------------------- /src/3-three/verbs/api/parse/adverbs.js: -------------------------------------------------------------------------------- 1 | // split adverbs as before/after the root 2 | const getAdverbs = function (vb, root) { 3 | const res = { 4 | pre: vb.none(), 5 | post: vb.none(), 6 | } 7 | if (!vb.has('#Adverb')) { 8 | return res 9 | } 10 | // pivot on the main verb 11 | const parts = vb.splitOn(root) 12 | if (parts.length === 3) { 13 | return { 14 | pre: parts.eq(0).adverbs(), 15 | post: parts.eq(2).adverbs(), 16 | } 17 | } 18 | // it must be the second one 19 | if (parts.eq(0).isDoc(root)) { 20 | res.post = parts.eq(1).adverbs() 21 | return res 22 | } 23 | res.pre = parts.eq(0).adverbs() 24 | return res 25 | } 26 | export default getAdverbs 27 | -------------------------------------------------------------------------------- /three/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "compromise-three", 3 | "version": "14.14.4", 4 | "description": "", 5 | "type": "module", 6 | "module": "./../src/three.js", 7 | "main": "./../src/three.js", 8 | "types": "./../types/three.d.ts", 9 | "exports": { 10 | "./package.json": "./package.json", 11 | ".": { 12 | "import": { 13 | "types": "./../types/three/three.d.ts", 14 | "default": "./../src/three.js" 15 | }, 16 | "require": { 17 | "types": "./../types/three.d.cts", 18 | "default": "./../builds/three/compromise-three.cjs" 19 | } 20 | } 21 | }, 22 | "author": "Spencer Kelly (http://spencermounta.in)", 23 | "license": "MIT", 24 | "sideEffects": true 25 | } -------------------------------------------------------------------------------- /data/lexicon/switches/person-verb.js: -------------------------------------------------------------------------------- 1 | // words that can be a verb or a person's name 2 | export default [ 3 | // clues: [person, verb], 4 | // fallback: 'PresentTense', //maybe? 5 | 'biff', 6 | 'blaze', 7 | 'blossom', 8 | 'bob', 9 | 'buck', 10 | 'chase', 11 | 'chuck', 12 | 'drew', 13 | 'foster', 14 | 'grace', 15 | 'grant', 16 | 'jack', 17 | 'lance', 18 | 'mack', 19 | 'mark', 20 | 'marshal', 21 | 'nick', 22 | 'ollie', 23 | 'pat', 24 | 'peg', 25 | 'pierce', 26 | 'rob', 27 | 'spike', 28 | 'stew', 29 | 'sue', 30 | 'skip', 31 | 'wade', 32 | // 'hope', 33 | // 'trace', 34 | // 'bill', 35 | // 'will', 36 | // 'sung' 37 | // 'may' 38 | // 'peter', 39 | ] 40 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/one/02-parse/index.js: -------------------------------------------------------------------------------- 1 | import today from './01-today.js' 2 | import holiday from './02-holidays.js' 3 | import nextLast from './03-next-last.js' 4 | import yearly from './04-yearly.js' 5 | import explicit from './05-explicit.js' 6 | 7 | const parse = function (doc, context, parts) { 8 | let unit = null 9 | //'in two days' 10 | unit = unit || today(doc, context, parts) 11 | // 'this haloween' 12 | unit = unit || holiday(doc, context) 13 | // 'this month' 14 | unit = unit || nextLast(doc, context) 15 | // 'q2 2002' 16 | unit = unit || yearly(doc, context) 17 | // 'this june 2nd' 18 | unit = unit || explicit(doc, context) 19 | 20 | return unit 21 | } 22 | export default parse -------------------------------------------------------------------------------- /plugins/wikipedia/tests/misc.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | 4 | test('true-positive', function (t) { 5 | let doc = nlp('i learned css') 6 | let res = doc.wikipedia().json() 7 | t.equal(res.length, 1, 'found-1 result') 8 | t.equal(res[0].text, 'css', 'found css result') 9 | 10 | doc = nlp('Melania Trump was trending') 11 | res = doc.wikipedia().json({ normal: true }) 12 | t.equal(res[0].normal, 'melania trump', 'found trump result') 13 | 14 | t.end() 15 | }) 16 | 17 | test('true-negative', function (t) { 18 | const doc = nlp('i learned the csss grid layout') 19 | const res = doc.wikipedia().json() 20 | t.equal(res.length, 0, 'found 0 results') 21 | t.end() 22 | }) -------------------------------------------------------------------------------- /plugins/speech/tests/soundsLike.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | 4 | test('soundsLike-tests', function (t) { 5 | const arr = [ 6 | ['phil collins', 'fil kolins'], 7 | ['Philadelphia freedom', 'filatelfia fretom'], 8 | ['Shine on me', 'shine on me'], 9 | ['peace of mind', 'pease of mint'], 10 | ['Yes I do', 'yes i to'], 11 | ['Zapped me', 'sapet me'], 12 | ['Right between the eyes', 'rit betwen the eyes'], 13 | // ['But the times have changed', 'but the times hafe kshanjet'], 14 | ] 15 | arr.forEach((a) => { 16 | const doc = nlp(a[0]) 17 | const str = doc.soundsLike({})[0].join(' ') 18 | t.equal(str, a[1], a[0]) 19 | }) 20 | t.end() 21 | }) 22 | -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/freq.js: -------------------------------------------------------------------------------- 1 | // sort words by frequency 2 | const freq = function (view) { 3 | const docs = view.docs 4 | const counts = {} 5 | for (let i = 0; i < docs.length; i += 1) { 6 | for (let t = 0; t < docs[i].length; t += 1) { 7 | const term = docs[i][t] 8 | const word = term.machine || term.normal 9 | counts[word] = counts[word] || 0 10 | counts[word] += 1 11 | } 12 | } 13 | // add counts on each term 14 | for (let i = 0; i < docs.length; i += 1) { 15 | for (let t = 0; t < docs[i].length; t += 1) { 16 | const term = docs[i][t] 17 | const word = term.machine || term.normal 18 | term.freq = counts[word] 19 | } 20 | } 21 | } 22 | export default freq 23 | -------------------------------------------------------------------------------- /tests/three/numbers/backlog/conversion.ignore.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../../_lib.js' 3 | const here = '[three/number] ' 4 | 5 | test('fraction/percent conversion', function (t) { 6 | const arr = [ 7 | [`it was 80% of my paycheque.`, 'it was 80/100 of my paycheque.'], 8 | [`42%`, '42/100'], 9 | [`110%`, '110/100'], 10 | [`2000%`, '2000/100'], 11 | // [`4.5%`, '4.5/100'], 12 | // [`0.2%`, '0.2/100'], 13 | ] 14 | arr.forEach((a) => { 15 | const doc = nlp(a[0]) 16 | doc.percentages().toFraction() 17 | t.equal(doc.text(), a[1], here+'toFraction') 18 | doc.fractions().toPercentage() 19 | t.equal(doc.text(), a[0], here+'toPercentage') 20 | }) 21 | t.end() 22 | }) 23 | -------------------------------------------------------------------------------- /src/2-two/preTagger/compute/tagger/1st-pass/01-colons.js: -------------------------------------------------------------------------------- 1 | const byPunctuation = function (terms, i, model, world) { 2 | const setTag = world.methods.one.setTag 3 | // colon following first word 4 | // edit: foo 5 | // breaking: foobar 6 | if (i === 0 && terms.length >= 3) { 7 | const hasColon = /:/ 8 | const post = terms[0].post 9 | if (post.match(hasColon)) { 10 | // phone: 555-2938 11 | const nextTerm = terms[1] 12 | if (nextTerm.tags.has('Value') || nextTerm.tags.has('Email') || nextTerm.tags.has('PhoneNumber')) { 13 | return 14 | } 15 | // 16 | setTag([terms[0]], 'Expression', world, null, `2-punct-colon''`) 17 | } 18 | } 19 | } 20 | export default byPunctuation 21 | -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/transform/adjectives/index.js: -------------------------------------------------------------------------------- 1 | import { toSuperlative, toComparative, fromSuperlative, fromComparative, toNoun } from './inflect.js' 2 | import fromAdverb from './conjugate/fromAdverb.js' 3 | import toAdverb from './conjugate/toAdverb.js' 4 | // import toNoun from './conjugate/toNoun.js' 5 | 6 | const all = function (str, model) { 7 | let arr = [str] 8 | arr.push(toSuperlative(str, model)) 9 | arr.push(toComparative(str, model)) 10 | arr.push(toAdverb(str)) 11 | arr = arr.filter(s => s) 12 | arr = new Set(arr) 13 | return Array.from(arr) 14 | } 15 | 16 | 17 | export default { 18 | toSuperlative, toComparative, toAdverb, toNoun, 19 | fromAdverb, fromSuperlative, fromComparative, 20 | all, 21 | } -------------------------------------------------------------------------------- /plugins/speed/src/workerPool/pool/create.js: -------------------------------------------------------------------------------- 1 | import path from 'path' 2 | import { fileURLToPath } from 'url' 3 | import { Worker } from 'worker_threads' 4 | const dir = path.dirname(fileURLToPath(import.meta.url)) 5 | 6 | const makePool = function (count, reg) { 7 | const workers = [] 8 | for (let i = 0; i < count; i += 1) { 9 | const info = { 10 | workerData: { 11 | workerIndex: i, 12 | workerCount: count, 13 | reg 14 | } 15 | } 16 | const file = path.join(dir, './worker.js') 17 | const worker = new Worker(file, info) 18 | worker.on('error', (err) => console.error(err))// eslint-disable-line 19 | workers.push(worker) 20 | } 21 | return workers 22 | } 23 | export default makePool -------------------------------------------------------------------------------- /src/1-one/lexicon/compute/index.js: -------------------------------------------------------------------------------- 1 | import multiWord from './multi-word.js' 2 | import singleWord from './single-word.js' 3 | 4 | // tag any words in our lexicon - even if it hasn't been filled-up yet 5 | // rest of pre-tagger is in ./two/preTagger 6 | const lexicon = function (view) { 7 | const world = view.world 8 | // loop through our terms 9 | view.docs.forEach(terms => { 10 | for (let i = 0; i < terms.length; i += 1) { 11 | if (terms[i].tags.size === 0) { 12 | let found = null 13 | found = found || multiWord(terms, i, world) 14 | // lookup known words 15 | found = found || singleWord(terms, i, world) 16 | } 17 | } 18 | }) 19 | } 20 | 21 | export default { 22 | lexicon, 23 | } 24 | -------------------------------------------------------------------------------- /src/1-one/output/api/_fmts.js: -------------------------------------------------------------------------------- 1 | const fmts = { 2 | text: { 3 | form: 'text', 4 | }, 5 | normal: { 6 | whitespace: 'some', 7 | punctuation: 'some', 8 | case: 'some', 9 | unicode: 'some', 10 | form: 'normal', 11 | }, 12 | machine: { 13 | keepSpace: false, 14 | whitespace: 'some', 15 | punctuation: 'some', 16 | case: 'none', 17 | unicode: 'some', 18 | form: 'machine', 19 | }, 20 | root: { 21 | keepSpace: false, 22 | whitespace: 'some', 23 | punctuation: 'some', 24 | case: 'some', 25 | unicode: 'some', 26 | form: 'root', 27 | }, 28 | implicit: { 29 | form: 'implicit', 30 | } 31 | } 32 | fmts.clean = fmts.normal 33 | fmts.reduced = fmts.root 34 | export default fmts -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/machine.js: -------------------------------------------------------------------------------- 1 | const hasDash = /^\p{Letter}+-\p{Letter}+$/u 2 | // 'machine' is a normalized form that looses human-readability 3 | const doMachine = function (term) { 4 | let str = term.implicit || term.normal || term.text 5 | // remove apostrophes 6 | str = str.replace(/['’]s$/, '') 7 | str = str.replace(/s['’]$/, 's') 8 | //lookin'->looking (make it easier for conjugation) 9 | str = str.replace(/([aeiou][ktrp])in'$/, '$1ing') 10 | //turn re-enactment to reenactment 11 | if (hasDash.test(str)) { 12 | str = str.replace(/-/g, '') 13 | } 14 | //#tags, @mentions 15 | str = str.replace(/^[#@]/, '') 16 | if (str !== term.normal) { 17 | term.machine = str 18 | } 19 | } 20 | export default doMachine 21 | -------------------------------------------------------------------------------- /src/2-two/swap/api/swap-verb.js: -------------------------------------------------------------------------------- 1 | const matchVerb = function (m, lemma) { 2 | const conjugate = m.methods.two.transform.verb.conjugate 3 | const all = conjugate(lemma, m.model) 4 | if (m.has('#Gerund')) { 5 | return all.Gerund 6 | } 7 | if (m.has('#PastTense')) { 8 | return all.PastTense 9 | } 10 | if (m.has('#PresentTense')) { 11 | return all.PresentTense 12 | } 13 | if (m.has('#Gerund')) { 14 | return all.Gerund 15 | } 16 | return lemma 17 | } 18 | 19 | const swapVerb = function (vb, lemma) { 20 | let str = lemma 21 | vb.forEach(m => { 22 | if (!m.has('#Infinitive')) { 23 | str = matchVerb(m, lemma) 24 | } 25 | m.replaceWith(str) 26 | }) 27 | return vb 28 | } 29 | export default swapVerb -------------------------------------------------------------------------------- /src/3-three/misc/slashes/index.js: -------------------------------------------------------------------------------- 1 | const hasSlash = /\// 2 | 3 | const api = function (View) { 4 | 5 | class Slashes extends View { 6 | constructor(document, pointer, groups) { 7 | super(document, pointer, groups) 8 | this.viewType = 'Slashes' 9 | } 10 | split() { 11 | return this.map((m) => { 12 | const str = m.text() 13 | const arr = str.split(hasSlash) 14 | m = m.replaceWith(arr.join(' ')) 15 | return m.growRight('(' + arr.join('|') + ')+') 16 | }) 17 | } 18 | } 19 | 20 | View.prototype.slashes = function (n) { 21 | let m = this.match('#SlashedTerm') 22 | m = m.getNth(n) 23 | return new Slashes(m.document, m.pointer) 24 | } 25 | } 26 | export default api 27 | -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/parse/toNumber/parseDecimals.js: -------------------------------------------------------------------------------- 1 | import words from './data.js' 2 | 3 | //concatenate into a string with leading '0.' 4 | const parseDecimals = function (arr) { 5 | let str = '0.' 6 | for (let i = 0; i < arr.length; i++) { 7 | const w = arr[i] 8 | if (words.ones.hasOwnProperty(w) === true) { 9 | str += words.ones[w] 10 | } else if (words.teens.hasOwnProperty(w) === true) { 11 | str += words.teens[w] 12 | } else if (words.tens.hasOwnProperty(w) === true) { 13 | str += words.tens[w] 14 | } else if (/^[0-9]$/.test(w) === true) { 15 | str += w 16 | } else { 17 | return 0 18 | } 19 | } 20 | return parseFloat(str) 21 | } 22 | 23 | export default parseDecimals 24 | -------------------------------------------------------------------------------- /data/lexicon/switches/unit-noun.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | 'cm', 3 | 'cup', 4 | 'cups', 5 | 'feet', 6 | 'foot', 7 | 'ft', 8 | 'gal', 9 | 'gb', 10 | 'hg', 11 | 'inch', 12 | 'inches', 13 | 'k', 14 | 'kb', 15 | 'kelvin', 16 | 'kg', 17 | 'kb', 18 | 'km', 19 | 'lb', 20 | 'm', 21 | 'mb', 22 | 'mg', 23 | 'mi', 24 | 'hz', 25 | 'mps', 26 | 'mph', 27 | 'miles', 28 | 'ml', 29 | 'mm', 30 | 'mph', 31 | 'newton', 32 | 'newtons', 33 | 'oz', 34 | 'pa', 35 | // 'pound', 36 | // 'pounds', 37 | 'pt', 38 | 'px', 39 | 'qt', 40 | 'tablespoon', 41 | 'tablespoons', 42 | 'tb', 43 | 'tbl', 44 | 'tbsp', 45 | 'teaspoon', 46 | 'teaspoons', 47 | 'tsp', 48 | 'yard', 49 | 'yards', 50 | 'yd', 51 | ] -------------------------------------------------------------------------------- /src/2-two/preTagger/tagSet/values.js: -------------------------------------------------------------------------------- 1 | export default { 2 | Value: { 3 | not: ['Verb', 'Adjective', 'Adverb'], 4 | }, 5 | Ordinal: { 6 | is: 'Value', 7 | not: ['Cardinal'], 8 | }, 9 | Cardinal: { 10 | is: 'Value', 11 | not: ['Ordinal'], 12 | }, 13 | Fraction: { 14 | is: 'Value', 15 | not: ['Noun'], 16 | }, 17 | Multiple: { 18 | is: 'TextValue', 19 | }, 20 | RomanNumeral: { 21 | is: 'Cardinal', 22 | not: ['TextValue'], 23 | }, 24 | TextValue: { 25 | is: 'Value', 26 | not: ['NumericValue'], 27 | }, 28 | NumericValue: { 29 | is: 'Value', 30 | not: ['TextValue'], 31 | }, 32 | Money: { 33 | is: 'Cardinal', 34 | }, 35 | Percent: { 36 | is: 'Value', 37 | }, 38 | } 39 | -------------------------------------------------------------------------------- /src/3-three/numbers/fractions/convert/toOrdinal.js: -------------------------------------------------------------------------------- 1 | import toText from '../../numbers/format/toText/index.js' 2 | import textOrdinal from '../../numbers/format/toOrdinal/textOrdinal.js' 3 | 4 | const toOrdinal = function (obj) { 5 | // don't divide by zero! 6 | if (!obj.numerator || !obj.denominator) { 7 | return '' 8 | } 9 | // create [two] [fifths] 10 | const start = toText({ num: obj.numerator }) 11 | let end = textOrdinal({ num: obj.denominator }) 12 | // 'one secondth' -> 'one half' 13 | if (obj.denominator === 2) { 14 | end = 'half' 15 | } 16 | if (start && end) { 17 | if (obj.numerator !== 1) { 18 | end += 's' 19 | } 20 | return `${start} ${end}` 21 | } 22 | return '' 23 | } 24 | export default toOrdinal 25 | -------------------------------------------------------------------------------- /tests/one/tokenize/term-split.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../../two/_lib.js' 3 | const here = '[one/term-split] ' 4 | 5 | 6 | test('term tokenizer', function (t) { 7 | const arr = [ 8 | [``, 0], 9 | [`1`, 1], 10 | [`&`, 1], 11 | [`*`, 1], 12 | [`oh yeah??`, 2], 13 | [`#canada #goose @gooseman`, 3], 14 | [`the "gouvernement" party`, 3], 15 | [`the «gouvernement»`, 2], 16 | [`the « gouvernement »`, 2], 17 | [`i guess... but`, 3], 18 | [`i guess ... but`, 3], 19 | [`he did. (but barely)`, 4], 20 | [`he did. ( but barely )`, 4], 21 | ] 22 | arr.forEach(a => { 23 | const [str, len] = a 24 | t.equal(nlp(str).terms().length, len, here + `"${str}"`) 25 | }) 26 | t.end() 27 | }) -------------------------------------------------------------------------------- /src/2-two/preTagger/compute/tagger/3rd-pass/05-fallback.js: -------------------------------------------------------------------------------- 1 | import fastTag from '../_fastTag.js' 2 | import fillTag from './_fillTags.js' 3 | 4 | const nounFallback = function (terms, i, model) { 5 | let isEmpty = false 6 | const tags = terms[i].tags 7 | if (tags.size === 0) { 8 | isEmpty = true 9 | } else if (tags.size === 1) { 10 | // weaker tags to ignore 11 | if (tags.has('Hyphenated') || tags.has('HashTag') || tags.has('Prefix') || tags.has('SlashedTerm')) { 12 | isEmpty = true 13 | } 14 | } 15 | if (isEmpty) { 16 | fastTag(terms[i], 'Noun', '3-[fallback]') 17 | // try to give it singluar/plural tags, too 18 | fillTag(terms, i, model) 19 | terms[i].confidence = 0.1 20 | } 21 | } 22 | export default nounFallback 23 | -------------------------------------------------------------------------------- /scripts/test/stress.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console, no-unused-vars */ 2 | import corpus from 'nlp-corpus' //install with `npm i nlp-corpus --no-save` 3 | import nlp from '../../src/three.js' 4 | const texts = corpus.all() 5 | console.log(`\n\n--- running compromise on ${texts.length.toLocaleString()} random sentences---\n`) 6 | console.log(' --should take a few minutes--') 7 | 8 | for (let i = 0; i < texts.length; i++) { 9 | const txt = texts[i] 10 | nlp(txt) 11 | .sentences() 12 | .forEach(s => { 13 | s.verbs().forEach(vb => { 14 | if (vb.terms().not('(#Adverb|#Auxiliary|#Negative|#PhrasalVerb)').length > 1) { 15 | // console.log(vb.text()) 16 | } 17 | }) 18 | 19 | }) 20 | } 21 | 22 | console.log('\n\n - done!') 23 | -------------------------------------------------------------------------------- /src/3-three/chunker/api/chunks.js: -------------------------------------------------------------------------------- 1 | // split terms into Nounphrase, verbphrase, etc groups 2 | const chunks = function (doc) { 3 | const all = [] 4 | let lastOne = null 5 | // first, split by comma, etc 6 | const m = doc.clauses() 7 | // loop through each clause 8 | m.docs.forEach(terms => { 9 | terms.forEach(term => { 10 | // new chunk 11 | if (!term.chunk || term.chunk !== lastOne) { 12 | lastOne = term.chunk 13 | all.push([term.index[0], term.index[1], term.index[1] + 1]) 14 | } else { 15 | // keep the chunk going 16 | all[all.length - 1][2] = term.index[1] + 1 17 | } 18 | }) 19 | lastOne = null 20 | }) 21 | const parts = doc.update(all) 22 | return parts 23 | } 24 | export default chunks 25 | -------------------------------------------------------------------------------- /src/3-three/nouns/api/parse.js: -------------------------------------------------------------------------------- 1 | import isSubordinate from './isSubordinate.js' 2 | import isPlural from './isPlural.js' 3 | 4 | const getRoot = function (m) { 5 | let tmp = m.clone() 6 | tmp = tmp.match('#Noun+') 7 | tmp = tmp.remove('(#Adjective|#Preposition|#Determiner|#Value)') 8 | tmp = tmp.not('#Possessive') 9 | tmp = tmp.first() 10 | if (!tmp.found) { 11 | return m 12 | } 13 | return tmp 14 | } 15 | 16 | const parseNoun = function (m) { 17 | const root = getRoot(m) 18 | return { 19 | determiner: m.match('#Determiner').eq(0), 20 | adjectives: m.match('#Adjective'), 21 | number: m.values(), 22 | isPlural: isPlural(m, root), 23 | isSubordinate: isSubordinate(m), 24 | root: root, 25 | } 26 | } 27 | export default parseNoun 28 | -------------------------------------------------------------------------------- /src/3-three/redact/plugin.js: -------------------------------------------------------------------------------- 1 | const defaults = { 2 | people: true, 3 | emails: true, 4 | phoneNumbers: true, 5 | places: true, 6 | } 7 | 8 | const redact = function (opts = {}) { 9 | opts = Object.assign({}, defaults, opts) 10 | if (opts.people !== false) { 11 | this.people().replaceWith('██████████') 12 | } 13 | if (opts.emails !== false) { 14 | this.emails().replaceWith('██████████') 15 | } 16 | if (opts.places !== false) { 17 | this.places().replaceWith('██████████') 18 | } 19 | if (opts.phoneNumbers !== false) { 20 | this.phoneNumbers().replaceWith('███████') 21 | } 22 | return this 23 | } 24 | 25 | const plugin = { 26 | api: function (View) { 27 | View.prototype.redact = redact 28 | } 29 | } 30 | export default plugin 31 | -------------------------------------------------------------------------------- /data/lexicon/nouns/properNouns.js: -------------------------------------------------------------------------------- 1 | // properNouns 2 | export default [ 3 | 'mercedes', 4 | 'barbie', 5 | 'catalina', 6 | 'christi', 7 | 'diego', 8 | 'elmo', 9 | 'franco', 10 | 'kirby', 11 | 'mickey', 12 | 'finn', 13 | 'missy', 14 | 'florence', 15 | 'stevens', 16 | 'abid', 17 | 'mcgill', 18 | 'hudson', 19 | 'chesley', 20 | 'carling', 21 | 'berkeley', 22 | 'beeton', 23 | 'carleton', 24 | 'ajax', 25 | 'weston', 26 | 'sherwood', 27 | 'wembley', 28 | 'hinton', 29 | 'bentley', 30 | 'landsdowne', 31 | 'brock', 32 | 'dalhousie', 33 | 'spalding', 34 | 'charlton', 35 | 'rothwell', 36 | 'gosford', 37 | 'frampton', 38 | 'fairview', 39 | // currencies 40 | 'nis', 41 | 'riel', 42 | 'euro', 43 | 'iron maiden', 44 | ] 45 | -------------------------------------------------------------------------------- /src/1-one/match/methods/match/steps/greedy-match.js: -------------------------------------------------------------------------------- 1 | import { getGreedy } from './logic/greedy.js' 2 | 3 | // keep 'foo+' or 'foo*' going.. 4 | const greedyMatch = function (state) { 5 | const { regs, phrase_length } = state 6 | const reg = regs[state.r] 7 | state.t = getGreedy(state, regs[state.r + 1]) 8 | if (state.t === null) { 9 | return null //greedy was too short 10 | } 11 | // foo{2,4} - has a greed-minimum 12 | if (reg.min && reg.min > state.t) { 13 | return null //greedy was too short 14 | } 15 | // 'foo+$' - if also an end-anchor, ensure we really reached the end 16 | if (reg.end === true && state.start_i + state.t !== phrase_length) { 17 | return null //greedy didn't reach the end 18 | } 19 | return true 20 | } 21 | export default greedyMatch -------------------------------------------------------------------------------- /tests/two/match/soft-match.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[two/soft-match] ' 4 | 5 | test('soft-match', function (t) { 6 | const doc = nlp(`a priest walked into the bars`) 7 | doc.compute('root') 8 | t.equal(doc.match('bars').found, true, here + 'found bars') 9 | t.equal(doc.match('bar').found, false, here + 'missed bar without ~') 10 | t.equal(doc.match('~bars~').found, true, here + 'found ~ bars') 11 | t.equal(doc.match('~bar~').found, true, here + 'found ~ bar') 12 | t.equal(doc.match('~walk~ into').found, true, here + 'found infinitive') 13 | t.equal(doc.match('~bar~').found, true, here + 'found singular') 14 | t.equal(doc.text('root'), 'a priest walk into the bar', here + 'root-output') 15 | t.end() 16 | }) 17 | -------------------------------------------------------------------------------- /plugins/speech/demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

compromise-speech demo:

14 |
    15 | chocolate microscopes 16 |
    loading
    17 |
18 | 19 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /plugins/stats/scratch.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console, no-unused-vars */ 2 | // import corpus from 'nlp-corpus' 3 | import nlp from '../../src/three.js' 4 | import plugin from './src/plugin.js' 5 | nlp.extend(plugin) 6 | 7 | // let txt = 'toronto raptors play a toronto maple leafs' 8 | // let doc = nlp(txt) 9 | // console.log(doc.ngrams()) 10 | 11 | const doc = nlp('one two three. one two foo.') 12 | const res = doc.ngrams({ min: 3 }) 13 | /*[ 14 | { size: 3, count: 1, normal: 'one two three' }, 15 | { size: 3, count: 1, normal: 'one two foo' } 16 | ] 17 | */ 18 | console.log(res) 19 | // let txt = 'no, my son is also named Bort' 20 | 21 | // let doc = nlp(txt) 22 | // // console.log(doc.tfidf()) 23 | // doc.compute('tfidf') 24 | // console.log(JSON.stringify(doc.json()[0].terms[6])) -------------------------------------------------------------------------------- /plugins/dates/src/plugin.js: -------------------------------------------------------------------------------- 1 | import api from './api/index.js' 2 | import compute from './compute/index.js' 3 | import tags from './model/tags.js' 4 | import words from './model/words/index.js' 5 | import regex from './model/regex.js' 6 | import version from './_version.js' 7 | import debug from './debug.js' 8 | 9 | export default { 10 | tags, 11 | words, 12 | compute, 13 | api, 14 | mutate: world => { 15 | // add our regexes 16 | world.model.two.regexText = world.model.two.regexText || [] 17 | world.model.two.regexText = world.model.two.regexText.concat(regex) 18 | // add our debug('dates') method 19 | world.methods.one.debug = world.methods.one.debug || {} 20 | world.methods.one.debug.dates = debug 21 | }, 22 | hooks: ['dates'], 23 | version, 24 | } 25 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2015" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */, 4 | "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */, 5 | "strict": false /* Enable all strict type-checking options. */, 6 | "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */, 7 | "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */ 8 | }, 9 | "files": ["types/three.d.ts"], 10 | "include": ["types/index.d.ts"] 11 | } 12 | -------------------------------------------------------------------------------- /plugins/dates/src/api/toJSON.js: -------------------------------------------------------------------------------- 1 | const getDuration = function (range) { 2 | const end = range.end.d.add(1, 'millisecond') 3 | const diff = end.since(range.start.d).diff 4 | delete diff.milliseconds 5 | delete diff.seconds 6 | return diff 7 | } 8 | 9 | const toJSON = function (range) { 10 | if (!range.start) { 11 | return { 12 | start: null, 13 | end: null, 14 | timezone: null, 15 | duration: {}, 16 | // range: null 17 | } 18 | } 19 | const diff = range.end ? getDuration(range) : {} 20 | return { 21 | start: range.start.format('iso'), 22 | end: range.end ? range.end.format('iso') : null, 23 | timezone: range.start.d.format('timezone'), 24 | duration: diff, 25 | // range: getRange(diff) 26 | } 27 | } 28 | export default toJSON -------------------------------------------------------------------------------- /scripts/match.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import repl from 'repl' 3 | import corpus from 'nlp-corpus' 4 | import nlp from '../src/three.js' 5 | 6 | const n = 12000 7 | console.log(` -- pre-processing ${n} sentences-`) 8 | let docs = corpus.some(n) 9 | docs = docs.map(str => nlp(str).compute('offset')) 10 | console.log(` -- ok, ready --`) 11 | 12 | const doMatch = function (match) { 13 | docs.forEach(doc => { 14 | const m = doc.match(match) 15 | if (m.found) { 16 | m.debug({ highlight: true, tags: false }) 17 | } 18 | }) 19 | console.log('--') 20 | } 21 | 22 | let arg = process.argv.slice(2).join(' ') 23 | arg = arg.trim() 24 | if (arg) { 25 | doMatch(arg) 26 | } 27 | 28 | repl.start({ 29 | eval: function (match) { 30 | doMatch(match) 31 | }, 32 | }) 33 | -------------------------------------------------------------------------------- /src/2-two/preTagger/model/regex/regex-text.js: -------------------------------------------------------------------------------- 1 | export default [ 2 | // #coolguy 3 | [/^#[\p{Number}_]*\p{Letter}/u, 'HashTag'], // can't be all numbers 4 | 5 | // @spencermountain 6 | [/^@\w{2,}$/, 'AtMention'], 7 | 8 | // period-ones acronyms - f.b.i. 9 | [/^([A-Z]\.){2}[A-Z]?/i, ['Acronym', 'Noun'], 'F.B.I'], //ascii-only 10 | 11 | // ending-apostrophes 12 | [/.{3}[lkmnp]in['‘’‛‵′`´]$/, 'Gerund', "chillin'"], 13 | [/.{4}s['‘’‛‵′`´]$/, 'Possessive', "flanders'"], 14 | 15 | //from https://www.regextester.com/106421 16 | // [/^([\u00a9\u00ae\u2319-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])/, 'Emoji', 'emoji-range'] 17 | // unicode character range 18 | [/^[\p{Emoji_Presentation}\p{Extended_Pictographic}]/u, 'Emoji', 'emoji-class'], 19 | ] 20 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/one/units/_time.js: -------------------------------------------------------------------------------- 1 | import Unit from './Unit.js' 2 | 3 | class Hour extends Unit { 4 | constructor(input, unit, context) { 5 | super(input, unit, context, true) 6 | this.unit = 'hour' 7 | if (this.d.isValid()) { 8 | this.d = this.d.startOf('hour') 9 | } 10 | } 11 | } 12 | class Minute extends Unit { 13 | constructor(input, unit, context) { 14 | super(input, unit, context, true) 15 | this.unit = 'minute' 16 | if (this.d.isValid()) { 17 | this.d = this.d.startOf('minute') 18 | } 19 | } 20 | } 21 | class Moment extends Unit { 22 | constructor(input, unit, context) { 23 | super(input, unit, context, true) 24 | this.unit = 'millisecond' 25 | } 26 | } 27 | 28 | export { 29 | Hour, 30 | Minute, 31 | Moment 32 | } 33 | -------------------------------------------------------------------------------- /plugins/stats/demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

compromise-stats demo:

14 |
    15 | Who keeps the metric system down? We do, we do! 16 |
    loading
    17 |
18 | 19 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/2-two/postTagger/model/verbs/passive.js: -------------------------------------------------------------------------------- 1 | // ==== Passive voice === 2 | export default [ 3 | // got walked, was walked, were walked 4 | { match: '(got|were|was|is|are|am) (#PastTense|#Participle)', tag: 'Passive', reason: 'got-walked' }, 5 | // was being walked 6 | { match: '(was|were|is|are|am) being (#PastTense|#Participle)', tag: 'Passive', reason: 'was-being' }, 7 | // had been walked, have been eaten 8 | { match: '(had|have|has) been (#PastTense|#Participle)', tag: 'Passive', reason: 'had-been' }, 9 | // will be cleaned 10 | { match: 'will be being? (#PastTense|#Participle)', tag: 'Passive', reason: 'will-be-cleaned' }, 11 | // suffered by the country 12 | { match: '#Noun [(#PastTense|#Participle)] by (the|a) #Noun', group: 0, tag: 'Passive', reason: 'suffered-by' }, 13 | 14 | ] -------------------------------------------------------------------------------- /plugins/wikipedia/demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

compromise-wikipedia demo:

14 |
    15 | you could still go to McGill, the Harvard of Canada! 16 |
    loading
    17 |
18 | 19 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/1-one/match/methods/match/03-getGroup.js: -------------------------------------------------------------------------------- 1 | // support returning a subset of a match 2 | // like 'foo [bar] baz' -> bar 3 | const getGroup = function (res, group) { 4 | const ptrs = [] 5 | const byGroup = {} 6 | if (res.length === 0) { 7 | return { ptrs, byGroup } 8 | } 9 | if (typeof group === 'number') { 10 | group = String(group) 11 | } 12 | if (group) { 13 | res.forEach(r => { 14 | if (r.groups[group]) { 15 | ptrs.push(r.groups[group]) 16 | } 17 | }) 18 | } else { 19 | res.forEach(r => { 20 | ptrs.push(r.pointer) 21 | Object.keys(r.groups).forEach(k => { 22 | byGroup[k] = byGroup[k] || [] 23 | byGroup[k].push(r.groups[k]) 24 | }) 25 | }) 26 | } 27 | return { ptrs, byGroup } 28 | } 29 | export default getGroup 30 | -------------------------------------------------------------------------------- /src/3-three/nouns/api/toJSON.js: -------------------------------------------------------------------------------- 1 | import parseNoun from './parse.js' 2 | 3 | const toText = m => m.text() 4 | const toArray = m => m.json({ terms: false, normal: true }).map(s => s.normal) 5 | 6 | const getNum = function (m) { 7 | const num = null 8 | if (!m.found) { 9 | return num 10 | } 11 | const val = m.values(0) 12 | if (val.found) { 13 | const obj = val.parse()[0] || {} 14 | return obj.num 15 | } 16 | return num 17 | } 18 | 19 | const toJSON = function (m) { 20 | const res = parseNoun(m) 21 | return { 22 | root: toText(res.root), 23 | number: getNum(res.number), 24 | determiner: toText(res.determiner), 25 | adjectives: toArray(res.adjectives), 26 | isPlural: res.isPlural, 27 | isSubordinate: res.isSubordinate, 28 | } 29 | } 30 | export default toJSON 31 | -------------------------------------------------------------------------------- /.github/workflows/coverage.yml: -------------------------------------------------------------------------------- 1 | # sends test-coverage data to codecov.io 2 | # https://codecov.io/gh/spencermountain/compromise 3 | name: Coverage 4 | 5 | on: 6 | release: 7 | types: [created] 8 | 9 | jobs: 10 | getCoverage: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | contents: read 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | persist-credentials: false 19 | 20 | - uses: actions/setup-node@v4 21 | with: 22 | node-version: '20' 23 | cache: npm 24 | check-latest: true 25 | 26 | - run: npm ci 27 | - run: npm i -g c8 codecov 28 | - run: c8 -r lcov -n 'src/**/*' -n 'plugins/**/*' npm run test && codecov -t 15039ad1-b495-48cd-b4a0-bcf124c9b318 29 | # - run: npm run codecov 30 | -------------------------------------------------------------------------------- /plugins/dates/demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

compromise-dates demo:

14 |
    15 | lets meet in 32 days 16 |

    loading

    17 |
18 | 19 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/1-one/match/methods/match/steps/optional-match.js: -------------------------------------------------------------------------------- 1 | import matchTerm from '../term/doesMatch.js' 2 | 3 | // 'foo? foo' matches are tricky. 4 | const foundOptional = function (state) { 5 | const { regs } = state 6 | const reg = regs[state.r] 7 | const term = state.terms[state.t] 8 | // does the next reg match it too? 9 | const nextRegMatched = matchTerm(term, regs[state.r + 1], state.start_i + state.t, state.phrase_length) 10 | if (reg.negative || nextRegMatched) { 11 | // but does the next reg match the next term?? 12 | // only skip if it doesn't 13 | const nextTerm = state.terms[state.t + 1] 14 | if (!nextTerm || !matchTerm(nextTerm, regs[state.r + 1], state.start_i + state.t, state.phrase_length)) { 15 | state.r += 1 16 | } 17 | } 18 | } 19 | 20 | export default foundOptional -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/format/toOrdinal/numOrdinal.js: -------------------------------------------------------------------------------- 1 | import toString from '../../_toString.js' 2 | 3 | /** 4 | * turn a number like 5 into an ordinal like 5th 5 | */ 6 | const numOrdinal = function (obj) { 7 | const num = obj.num 8 | if (!num && num !== 0) { 9 | return null 10 | } 11 | //the teens are all 'th' 12 | const tens = num % 100 13 | if (tens > 10 && tens < 20) { 14 | return String(num) + 'th' 15 | } 16 | //the rest of 'em 17 | const mapping = { 18 | 0: 'th', 19 | 1: 'st', 20 | 2: 'nd', 21 | 3: 'rd', 22 | } 23 | let str = toString(num) 24 | const last = str.slice(str.length - 1, str.length) 25 | if (mapping[last]) { 26 | str += mapping[last] 27 | } else { 28 | str += 'th' 29 | } 30 | return str 31 | } 32 | 33 | export default numOrdinal 34 | -------------------------------------------------------------------------------- /plugins/_experiments/cmd-k/src/slashCmd.js: -------------------------------------------------------------------------------- 1 | 2 | // slashCmds are / followed by a word 3 | // they're a way to add custom commands 4 | // "/me writes some bugs" 5 | 6 | const slashCmd = { 7 | /** add a method */ 8 | api: (View) => { 9 | View.prototype.slashCmds = function () { 10 | return this.matchOne('#SlashCmd+').text('normal') 11 | } 12 | }, 13 | 14 | 15 | /** add some tags */ 16 | tags: { 17 | SlashCmd: { 18 | notA: ['Noun', 'Verb', 'Adjective'], 19 | color: 'yellow' 20 | }, 21 | }, 22 | 23 | 24 | /** post-process tagger */ 25 | compute: { 26 | tagSlashCmds: (doc) => { 27 | doc.match([{ pre: '/' }]).not('#Number').tag('#SlashCmd') 28 | } 29 | }, 30 | 31 | 32 | /** run it on init */ 33 | hooks: ['tagSlashCmds'] 34 | } 35 | export default slashCmd -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/parse/toNumber/findModifiers.js: -------------------------------------------------------------------------------- 1 | //support global multipliers, like 'half-million' by doing 'million' then multiplying by 0.5 2 | const findModifiers = str => { 3 | const mults = [ 4 | { 5 | reg: /^(minus|negative)[\s-]/i, 6 | mult: -1, 7 | }, 8 | { 9 | reg: /^(a\s)?half[\s-](of\s)?/i, 10 | mult: 0.5, 11 | }, 12 | // { 13 | // reg: /^(a\s)?quarter[\s\-]/i, 14 | // mult: 0.25 15 | // } 16 | ] 17 | for (let i = 0; i < mults.length; i++) { 18 | if (mults[i].reg.test(str) === true) { 19 | return { 20 | amount: mults[i].mult, 21 | str: str.replace(mults[i].reg, ''), 22 | } 23 | } 24 | } 25 | return { 26 | amount: 1, 27 | str: str, 28 | } 29 | } 30 | 31 | export default findModifiers 32 | -------------------------------------------------------------------------------- /data/lexicon/switches/actor-verb.js: -------------------------------------------------------------------------------- 1 | // actor like 'the coach' or verb like 'coach a team' 2 | // use noun-verb for sometimes-actors, like 'target', or 'star' 3 | export default [ 4 | 'addict', 5 | 'architect', 6 | 'advocate', 7 | 'affiliate', 8 | 'bitch', 9 | 'bully', 10 | 'boss', 11 | 'champion', 12 | 'chauffeur', 13 | 'coach', 14 | 'cook', 15 | 'doctor', 16 | 'butcher', 17 | 'delegate', 18 | 'engineer', 19 | 'fool', 20 | 'geek', 21 | 'goof', 22 | 'graduate', 23 | 'groom', 24 | 'guide', 25 | 'host', 26 | 'judge', 27 | 'man', 28 | 'mime', 29 | 'master', 30 | 'nerd', 31 | 'parent', 32 | 'pilot', 33 | 'recruit', 34 | 'scout', 35 | 'sponsor', 36 | 'spy', 37 | 'suspect', 38 | 'usher', 39 | 'volunteer', 40 | 'conscript', 41 | 'wimp', 42 | 'witness', 43 | ] 44 | -------------------------------------------------------------------------------- /src/2-two/preTagger/methods/transform/nouns/toSingular/index.js: -------------------------------------------------------------------------------- 1 | import rules from './_rules.js' 2 | const invertObj = function (obj) { 3 | return Object.keys(obj).reduce((h, k) => { 4 | h[obj[k]] = k 5 | return h 6 | }, {}) 7 | } 8 | 9 | const toSingular = function (str, model) { 10 | const { irregularPlurals } = model.two 11 | const invert = invertObj(irregularPlurals) //(not very efficient) 12 | // check irregulars list 13 | if (invert.hasOwnProperty(str)) { 14 | return invert[str] 15 | } 16 | // go through our regexes 17 | for (let i = 0; i < rules.length; i++) { 18 | if (rules[i][0].test(str) === true) { 19 | // console.log(rules[i]) 20 | str = str.replace(rules[i][0], rules[i][1]) 21 | return str 22 | } 23 | } 24 | return str 25 | } 26 | export default toSingular 27 | -------------------------------------------------------------------------------- /tests/three/subsets.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | const here = '[three/subset] ' 4 | 5 | test('match shorthand:', function (t) { 6 | let doc = nlp('the cute and shortest') 7 | t.equal(doc.adjectives('#Superlative').text(), 'shortest', here + '.adj()') 8 | 9 | doc = nlp('spencer can. jamie cannot.') 10 | t.equal(doc.nouns('jamie').text(), 'jamie', here + '.nouns()') 11 | 12 | doc = nlp('spencer and jamie') 13 | t.equal(doc.people('!jamie').text(), 'spencer', here + '.people()') 14 | 15 | doc = nlp('i must walk but i am scared') 16 | t.equal(doc.verbs('must').text(), 'must walk', here + '.verbs()') 17 | 18 | doc = nlp('i toronto but not hamilton Jamaica') 19 | t.equal(doc.places('#Country').text(), 'hamilton Jamaica', here + '.places()') 20 | 21 | 22 | t.end() 23 | }) -------------------------------------------------------------------------------- /src/1-one/tokenize/methods/parse.js: -------------------------------------------------------------------------------- 1 | import normalize from '../compute/normal/index.js' 2 | 3 | // turn a string input into a 'document' json format 4 | const parse = function (input, world) { 5 | const { methods, model } = world 6 | const { splitSentences, splitTerms, splitWhitespace } = methods.one.tokenize 7 | input = input || '' 8 | // split into sentences 9 | const sentences = splitSentences(input, world) 10 | // split into word objects 11 | input = sentences.map((txt) => { 12 | let terms = splitTerms(txt, model) 13 | // split into [pre-text-post] 14 | terms = terms.map(t => splitWhitespace(t, model)) 15 | // add normalized term format, always 16 | terms.forEach((t) => { 17 | normalize(t, world) 18 | }) 19 | return terms 20 | }) 21 | return input 22 | } 23 | export default parse -------------------------------------------------------------------------------- /src/1-one/match/methods/parseMatch/03-splitHyphens.js: -------------------------------------------------------------------------------- 1 | const hasDash = /[a-z0-9][-–—][a-z]/i 2 | 3 | // match 're-do' -> ['re','do'] 4 | const splitHyphens = function (regs, world) { 5 | const prefixes = world.model.one.prefixes 6 | for (let i = regs.length - 1; i >= 0; i -= 1) { 7 | const reg = regs[i] 8 | if (reg.word && hasDash.test(reg.word)) { 9 | let words = reg.word.split(/[-–—]/g) 10 | // don't split 're-cycle', etc 11 | if (prefixes.hasOwnProperty(words[0])) { 12 | continue 13 | } 14 | words = words.filter(w => w).reverse() 15 | regs.splice(i, 1) 16 | words.forEach(w => { 17 | const obj = Object.assign({}, reg) 18 | obj.word = w 19 | regs.splice(i, 0, obj) 20 | }) 21 | } 22 | } 23 | return regs 24 | } 25 | export default splitHyphens -------------------------------------------------------------------------------- /tests/one/match/sweep-not.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[one/sweep] ' 4 | 5 | test('sweep-not:', function (t) { 6 | let doc = nlp('The service is fast really') 7 | let net = nlp.buildNet([{ match: 'is fast .', notIf: 'psych' }]) 8 | let m = doc.match(net) 9 | t.equal(m.text(), 'is fast really', here + 'no-psych') 10 | 11 | doc = nlp('The service is fast psych') 12 | net = nlp.buildNet([{ match: 'is fast .', notIf: 'psych' }]) 13 | m = doc.match(net) 14 | t.equal(m.text(), '', here + 'psych-found') 15 | 16 | doc = nlp('i swim in the lake and walk in the road') 17 | net = nlp.buildNet([{ match: 'i (swim|walk) in the .', notIf: 'in the (park|lake)' }]) 18 | m = doc.match(net) 19 | t.equal(m.text(), '', here + 'notIf optional') 20 | 21 | t.end() 22 | }) 23 | 24 | -------------------------------------------------------------------------------- /plugins/wikipedia/scripts/stat.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import sh from 'shelljs' 3 | import fs from 'fs' 4 | 5 | import conf from '../config.js' 6 | const { lang, project } = conf 7 | const file = `./files/${lang}.${project}-pageviews.json` 8 | // const file = './files/pageviews.tsv' 9 | 10 | const round = n => Math.round(n * 10) / 10 11 | 12 | const fileSize = (pathStr) => { 13 | const kb = fs.statSync(pathStr).size / 1024 14 | const num = round(kb / 1000) 15 | return num.toLocaleString() + 'mb' 16 | } 17 | 18 | console.log('article count ( lines):') 19 | 20 | //raw: 40,043,607 21 | //filtered: 1,049,500 22 | const { stdout } = sh.exec(`wc -l ${file}`, { silent: true }) 23 | const lines = Number(stdout.split(/\W/)[1]).toLocaleString() 24 | 25 | console.log('lines', lines) 26 | console.log('size', fileSize(file)) 27 | -------------------------------------------------------------------------------- /src/1-one/tokenize/model/abbreviations/units.js: -------------------------------------------------------------------------------- 1 | // units that are abbreviations too 2 | export default [ 3 | 'dl', 4 | 'ml', 5 | 'gal', 6 | // 'ft', //ambiguous 7 | 'qt', 8 | 'pt', 9 | 'tbl', 10 | 'tsp', 11 | 'tbsp', 12 | 'km', 13 | 'dm', //decimeter 14 | 'cm', 15 | 'mm', 16 | 'mi', 17 | 'td', 18 | 'hr', //hour 19 | 'hrs', //hour 20 | 'kg', 21 | 'hg', 22 | 'dg', //decigram 23 | 'cg', //centigram 24 | 'mg', //milligram 25 | 'µg', //microgram 26 | 'lb', //pound 27 | 'oz', //ounce 28 | 'sq ft', 29 | 'hz', //hertz 30 | 'mps', //meters per second 31 | 'mph', 32 | 'kmph', //kilometers per hour 33 | 'kb', //kilobyte 34 | 'mb', //megabyte 35 | // 'gb', //ambig 36 | 'tb', //terabyte 37 | 'lx', //lux 38 | 'lm', //lumen 39 | // 'pa', //ambig 40 | 'fl oz', // 41 | 'yb', 42 | ] 43 | -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/index.js: -------------------------------------------------------------------------------- 1 | import alias from './alias.js' 2 | import normal from './normal/index.js' 3 | import machine from './machine.js' 4 | import freq from './freq.js' 5 | import offset from './offset.js' 6 | import index from './reindex.js' 7 | import wordCount from './wordCount.js' 8 | 9 | // cheat-method for a quick loop 10 | const termLoop = function (view, fn) { 11 | const docs = view.docs 12 | for (let i = 0; i < docs.length; i += 1) { 13 | for (let t = 0; t < docs[i].length; t += 1) { 14 | fn(docs[i][t], view.world) 15 | } 16 | } 17 | } 18 | 19 | const methods = { 20 | alias: (view) => termLoop(view, alias), 21 | machine: (view) => termLoop(view, machine), 22 | normal: (view) => termLoop(view, normal), 23 | freq, 24 | offset, 25 | index, 26 | wordCount, 27 | } 28 | export default methods 29 | -------------------------------------------------------------------------------- /plugins/_experiments/markdown/src/plugin.js: -------------------------------------------------------------------------------- 1 | // import { convertToHtml, parseHtml, printHtml } from './html/index.js' 2 | // import { convertToMd, parseMd, printMd } from './md/index.js' 3 | // import { fromMarkdown } from 'mdast-util-from-markdown' 4 | import parse from './parse/index.js' 5 | 6 | import toPlaintexts from './parse/toPlaintext.js' 7 | import { visit } from 'unist-util-visit' 8 | 9 | const cleanup = function (tree) { 10 | let n = 0 11 | visit(tree, null, (node) => { 12 | node.id = String(n) 13 | n += 1 14 | delete node.position 15 | }) 16 | return tree 17 | } 18 | 19 | 20 | export default { 21 | lib: { 22 | fromMarkdown: function (md = '') { 23 | let tree = parse(md) 24 | tree = cleanup(tree) 25 | // console.dir(tree, { depth: 8 }) 26 | return toPlaintexts(tree) 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /src/3-three/nouns/api/isPlural.js: -------------------------------------------------------------------------------- 1 | const notPlural = '(#Pronoun|#Place|#Value|#Person|#Uncountable|#Month|#WeekDay|#Holiday|#Possessive)' 2 | 3 | const isPlural = function (m, root) { 4 | // const { looksPlural } = m.world.methods.two 5 | if (m.has('#Plural')) { 6 | return true 7 | } 8 | // two singular nouns are plural noun phrase 9 | if (m.has('#Noun and #Noun')) { 10 | return true 11 | } 12 | if (m.has('(we|they)')) { 13 | return true 14 | } 15 | // these can't be plural 16 | if (root.has(notPlural) === true) { 17 | return false 18 | } 19 | if (m.has('#Singular')) { 20 | return false 21 | } 22 | // word-reg fallback 23 | const str = root.text('normal') 24 | // ends with a brutal s fallback 25 | return str.length > 3 && str.endsWith('s') && !str.endsWith('ss') 26 | } 27 | export default isPlural 28 | -------------------------------------------------------------------------------- /tests/three/numbers/backlog/agreement.ignore.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../../_lib.js' 3 | const here = '[three/number-agreement] ' 4 | 5 | test('misc agreement', function (t) { 6 | let doc = nlp('i ate 7 kilos of fruit') 7 | .numbers() 8 | .units() 9 | t.equal(doc.text('trim'), 'kilos', here + 'found unit') 10 | 11 | doc = nlp('i ate 7 of them, kilos are kilograms') 12 | .numbers() 13 | .units() 14 | t.equal(doc.text('trim'), '', here + 'found no unit') 15 | 16 | t.end() 17 | }) 18 | 19 | test('ordinal agreement', function (t) { 20 | const doc = nlp('seventeen beers') 21 | doc.values().toOrdinal() 22 | t.equal(doc.text(), 'seventeenth beer', here + 'ord-agreement') 23 | 24 | doc.values().toCardinal() 25 | t.equal(doc.text(), 'seventeen beers', here + 'card-agreement') 26 | t.end() 27 | }) 28 | -------------------------------------------------------------------------------- /tests/two/misc/confidence.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[two/confidence] ' 4 | 5 | test('confidence', function (t) { 6 | const arr = [ 7 | ['', 1], 8 | ['asdfasdf', 0.1], 9 | ['google', 1], 10 | ['jlcekehj is', 0.6], 11 | ['yelpily good', 0.85], 12 | 13 | // [ 'Striking revenue workers threaten gherao',null] 14 | // [ 'Madhuri goes dhak-dhak again',null] 15 | // [ `ACF's development committee meets`,null] 16 | // [ 'State govt gives HR panel office space',null] 17 | ] 18 | arr.forEach(a => { 19 | const [str, score] = a 20 | t.equal(nlp(str).confidence(), score, here + str) 21 | }) 22 | 23 | const json = nlp('errerum esto lominae').json({ confidence: true })[0] 24 | t.equal(json.confidence, 0.1, 'confidence in json') 25 | t.end() 26 | }) 27 | -------------------------------------------------------------------------------- /plugins/speech/src/api.js: -------------------------------------------------------------------------------- 1 | const api = function (View) { 2 | /** */ 3 | View.prototype.syllables = function () { 4 | this.compute('syllables') 5 | const all = [] 6 | this.docs.forEach(terms => { 7 | let some = [] 8 | terms.forEach(term => { 9 | some = some.concat(term.syllables) 10 | }) 11 | if (some.length > 0) { 12 | all.push(some) 13 | } 14 | }) 15 | return all 16 | } 17 | /** */ 18 | View.prototype.soundsLike = function () { 19 | this.compute('soundsLike') 20 | const all = [] 21 | this.docs.forEach(terms => { 22 | let some = [] 23 | terms.forEach(term => { 24 | some = some.concat(term.soundsLike) 25 | }) 26 | if (some.length > 0) { 27 | all.push(some) 28 | } 29 | }) 30 | return all 31 | } 32 | } 33 | export default api -------------------------------------------------------------------------------- /src/1-one/contraction-one/compute/contractions/number-range.js: -------------------------------------------------------------------------------- 1 | const isRange = /^([0-9.]{1,4}[a-z]{0,2}) ?[-–—] ?([0-9]{1,4}[a-z]{0,2})$/i 2 | const timeRange = /^([0-9]{1,2}(:[0-9][0-9])?(am|pm)?) ?[-–—] ?([0-9]{1,2}(:[0-9][0-9])?(am|pm)?)$/i 3 | const phoneNum = /^[0-9]{3}-[0-9]{4}$/ 4 | 5 | const numberRange = function (terms, i) { 6 | const term = terms[i] 7 | let parts = term.text.match(isRange) 8 | if (parts !== null) { 9 | // 123-1234 is a phone number, not a number-range 10 | if (term.tags.has('PhoneNumber') === true || phoneNum.test(term.text)) { 11 | return null 12 | } 13 | return [parts[1], 'to', parts[2]] 14 | } else { 15 | parts = term.text.match(timeRange) 16 | if (parts !== null) { 17 | return [parts[1], 'to', parts[4]] 18 | } 19 | } 20 | return null 21 | } 22 | export default numberRange 23 | -------------------------------------------------------------------------------- /src/1-one/tokenize/model/prefixes.js: -------------------------------------------------------------------------------- 1 | // dashed prefixes that are not independent words 2 | // 'mid-century', 'pre-history' 3 | export default [ 4 | 'anti', 5 | 'bi', 6 | 'co', 7 | 'contra', 8 | 'de', 9 | 'extra', 10 | 'infra', 11 | 'inter', 12 | 'intra', 13 | 'macro', 14 | 'micro', 15 | 'mis', 16 | 'mono', 17 | 'multi', 18 | 'peri', 19 | 'pre', 20 | 'pro', 21 | 'proto', 22 | 'pseudo', 23 | 're', 24 | 'sub', 25 | 'supra', 26 | 'trans', 27 | 'tri', 28 | 'un', 29 | 'out', //out-lived 30 | 'ex',//ex-wife 31 | 32 | // 'counter', 33 | // 'mid', 34 | // 'out', 35 | // 'non', 36 | // 'over', 37 | // 'post', 38 | // 'semi', 39 | // 'super', //'super-cool' 40 | // 'ultra', //'ulta-cool' 41 | // 'under', 42 | // 'whole', 43 | ].reduce((h, str) => { 44 | h[str] = true 45 | return h 46 | }, {}) -------------------------------------------------------------------------------- /src/4-four/sense/model/senses/verb.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plug: { 3 | sell: { words: ['book', 'biography', 'album'] }, 4 | stop: { fallback: true, words: ['drain', 'sink', 'pipe'] }, 5 | }, 6 | strike: { 7 | hit: { fallback: true }, 8 | protest: { words: ['job', 'union', 'worker'] }, 9 | }, 10 | charge: { 11 | money: { words: ['fee', 'bank', 'price', 'service'] }, 12 | run: { words: ['toward', 'run', 'flee'] }, 13 | }, 14 | fire: { 15 | job: { words: ['job', 'boss', 'contract'] }, 16 | gun: { words: ['gun', 'weapon', 'bullet', 'away'] }, 17 | }, 18 | trip: { 19 | drug: { words: ['lsd', 'acid'] }, 20 | fall: { fallback: true, words: ['stumble', 'hurt'] }, 21 | }, 22 | tie: { 23 | knot: { words: ['bow', 'rope', 'lace'] }, 24 | game: { words: ['point', 'score', 'match'] }, 25 | }, 26 | } 27 | -------------------------------------------------------------------------------- /tests/one/miss.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | const here = '[one/miss] ' 4 | 5 | const arr = [ 6 | // no tags 7 | // [`toronto`, '#City'], 8 | // [`i went to Toronto`, '#Noun'], 9 | // // no chunks 10 | // [`toronto`, ''], 11 | // [`i went to Toronto`, ''], 12 | // min-length 13 | ['mexico', '.{2}'], 14 | ['mexico', '.{2,3}'], 15 | //word-word 16 | ['mexico city', 'foo city'], 17 | ['mexico city', 'city foo'], 18 | ['mexico city', 'city .'], 19 | ['mexico city', 'mexico city .'], 20 | ['mexico city', '. mexico city'], 21 | ] 22 | 23 | test('no-match:', function (t) { 24 | arr.forEach(function (a) { 25 | const doc = nlp(a[0]) 26 | const msg = `'${(a[0] + "' ").padEnd(20, '.')} - '${a[1]}'` 27 | t.equal(doc.has(a[1]), false, here + msg) 28 | }) 29 | t.end() 30 | }) 31 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/normalize.js: -------------------------------------------------------------------------------- 1 | const normalize = function (doc) { 2 | 3 | if (!doc.numbers) { 4 | console.warn(`\nCompromise warning: compromise/three must be used with compromise-dates plugin\n`) // eslint-disable-line 5 | } 6 | 7 | // normalize doc 8 | doc = doc.clone() 9 | doc.numbers().toNumber() 10 | 11 | // expand 'aug 20-21' 12 | doc.contractions().expand() 13 | 14 | // 'week-end' 15 | doc.replace('week end', 'weekend', true).tag('Date') 16 | // 'a up to b' 17 | doc.replace('up to', 'upto', true).tag('Date') 18 | // 'a year ago' 19 | if (doc.has('once (a|an) #Duration') === false) { 20 | doc.match('[(a|an)] #Duration', 0).replaceWith('1', { tags: true }).compute('lexicon') 21 | } 22 | // jan - feb 23 | doc.match('@hasDash').insertAfter('to').tag('Date') 24 | return doc 25 | } 26 | 27 | export default normalize -------------------------------------------------------------------------------- /src/1-one/match/methods/match/steps/contraction-skip.js: -------------------------------------------------------------------------------- 1 | // for: ['we', 'have'] 2 | // a match for "we have" should work as normal 3 | // but matching "we've" should skip over implict terms 4 | const contractionSkip = function (state) { 5 | const term = state.terms[state.t] 6 | const reg = state.regs[state.r] 7 | // did we match the first part of a contraction? 8 | if (term.implicit && state.terms[state.t + 1]) { 9 | const nextTerm = state.terms[state.t + 1] 10 | // ensure next word is implicit 11 | if (!nextTerm.implicit) { 12 | return 13 | } 14 | // we matched "we've" - skip-over [we, have] 15 | if (reg.word === term.normal) { 16 | state.t += 1 17 | } 18 | // also skip for @hasContraction 19 | if (reg.method === 'hasContraction') { 20 | state.t += 1 21 | } 22 | } 23 | } 24 | export default contractionSkip -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/format/index.js: -------------------------------------------------------------------------------- 1 | import numOrdinal from './toOrdinal/numOrdinal.js' 2 | import textOrdinal from './toOrdinal/textOrdinal.js' 3 | import textCardinal from './toText/index.js' 4 | import makeSuffix from './suffix.js' 5 | 6 | const format = function (obj, fmt) { 7 | if (fmt === 'TextOrdinal') { 8 | const { prefix, suffix } = makeSuffix(obj) 9 | return prefix + textOrdinal(obj) + suffix 10 | } 11 | if (fmt === 'Ordinal') { 12 | return obj.prefix + numOrdinal(obj) + obj.suffix 13 | } 14 | if (fmt === 'TextCardinal') { 15 | const { prefix, suffix } = makeSuffix(obj) 16 | return prefix + textCardinal(obj) + suffix 17 | } 18 | // assume Cardinal 19 | let num = obj.num 20 | if (obj.hasComma) { 21 | num = num.toLocaleString() 22 | } 23 | return obj.prefix + String(num) + obj.suffix 24 | } 25 | export default format -------------------------------------------------------------------------------- /src/4-four/sense/model/senses/adjective.js: -------------------------------------------------------------------------------- 1 | export default { 2 | ill: { 3 | sick: { fallback: true, words: ['sick', 'flu', 'symptom'] }, 4 | good: { words: ['sweet', 'trick'] }, 5 | }, 6 | prime: { 7 | good: { fallback: true, words: ['location'] }, 8 | number: { words: ['digit', 'factor'] }, 9 | }, 10 | high: { 11 | up: { fallback: true, words: ['above', 'over'] }, 12 | drugs: { words: ['upper', 'pot', 'dope', 'drug', 'addict', 'addiction'] }, 13 | }, 14 | sick: { 15 | ill: { fallback: true, words: ['doctor', 'flu', 'symptom'] }, 16 | good: { words: ['sweet', 'trick'] }, 17 | }, 18 | cold: { 19 | temperature: { fallback: true, words: ['winter', 'thermometer', 'thermostat', 'air', 'freeze', 'freezing'] }, 20 | attitude: { words: ['shoulder', 'uncaring', 'aloof', 'mean', 'attitude', 'demeanor'] }, 21 | }, 22 | } 23 | -------------------------------------------------------------------------------- /src/3-three/chunker/compute/05-fixUp.js: -------------------------------------------------------------------------------- 1 | const fixUp = function (docs) { 2 | const byChunk = [] 3 | let current = null 4 | docs.forEach(terms => { 5 | // ensure an adjective chunk is preceded by a copula 6 | for (let i = 0; i < terms.length; i += 1) { 7 | const term = terms[i] 8 | if (current && term.chunk === current) { 9 | byChunk[byChunk.length - 1].terms.push(term) 10 | } else { 11 | byChunk.push({ chunk: term.chunk, terms: [term] }) 12 | current = term.chunk 13 | } 14 | } 15 | }) 16 | // ensure every verb-phrase actually has a verb 17 | byChunk.forEach(c => { 18 | if (c.chunk === 'Verb') { 19 | const hasVerb = c.terms.find(t => t.tags.has('Verb')) 20 | if (!hasVerb) { 21 | c.terms.forEach(t => t.chunk = null) 22 | } 23 | } 24 | }) 25 | } 26 | export default fixUp 27 | -------------------------------------------------------------------------------- /tests/two/misc/canBe.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[two/canBe] ' 4 | 5 | test('canBe', function (t) { 6 | const doc = nlp(`spencer was going crazy. He walks quickly.`) 7 | 8 | const canBeNoun = doc.canBe('Noun') 9 | t.equal(canBeNoun.length, 2, here + 'two results') 10 | t.equal(canBeNoun.terms(0).text('normal'), 'spencer', here + 'first result') 11 | t.equal(canBeNoun.terms(1).text(), 'He', here + 'first result') 12 | 13 | const canBeVerb = nlp('spencer kelly').canBe('Verb') 14 | t.equal(canBeVerb.length, 0, here + 'no results') 15 | 16 | const canBeMisc = nlp('spencer kelly').canBe('asdf') 17 | t.equal(canBeMisc.length, 1, here + 'all results are one') 18 | 19 | 20 | const found = nlp("Moe Sizlak.").terms().canBe('#Verb').found 21 | t.equal(found, false, here + 'no verb') 22 | t.end() 23 | }) 24 | -------------------------------------------------------------------------------- /plugins/dates/src/api/parse/one/02-parse/02-holidays.js: -------------------------------------------------------------------------------- 1 | import { Holiday } from '../units/index.js' 2 | import spacetimeHoliday from 'spacetime-holiday' 3 | 4 | const parseHoliday = function (doc, context) { 5 | let unit = null 6 | const m = doc.match('[#Holiday+] [#Year?]') 7 | let year = context.today.year() 8 | if (m.groups('year').found) { 9 | year = Number(m.groups('year').text('reduced')) || year 10 | } 11 | const str = m.groups('holiday').text('reduced') 12 | let s = spacetimeHoliday(str, year, context.timezone) 13 | if (s !== null) { 14 | // assume the year in the future.. 15 | if (s.isBefore(context.today) && year === context.today.year()) { 16 | s = spacetimeHoliday(str, year + 1, context.timezone) 17 | } 18 | unit = new Holiday(s, null, context) 19 | } 20 | return unit 21 | } 22 | export default parseHoliday 23 | -------------------------------------------------------------------------------- /src/1-one/lookup/api/buildTrie/compress.js: -------------------------------------------------------------------------------- 1 | // chop-off tail of redundant vals at end of array 2 | const truncate = (list, val) => { 3 | for (let i = list.length - 1; i >= 0; i -= 1) { 4 | if (list[i] !== val) { 5 | list = list.slice(0, i + 1) 6 | return list 7 | } 8 | } 9 | return list 10 | } 11 | 12 | // prune trie a bit 13 | const compress = function (trie) { 14 | trie.goNext = trie.goNext.map(o => { 15 | if (Object.keys(o).length === 0) { 16 | return undefined 17 | } 18 | return o 19 | }) 20 | // chop-off tail of undefined vals in goNext array 21 | trie.goNext = truncate(trie.goNext, undefined) 22 | // chop-off tail of zeros in failTo array 23 | trie.failTo = truncate(trie.failTo, 0) 24 | // chop-off tail of nulls in endAs array 25 | trie.endAs = truncate(trie.endAs, null) 26 | return trie 27 | } 28 | export default compress -------------------------------------------------------------------------------- /tests/three/verbs/isplural.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[three/verb-isPlural] ' 4 | 5 | test('plural-verbs:', function (t) { 6 | let r = nlp('i look. Spencer looks.') 7 | let len = r.verbs().isPlural().length 8 | t.equal(len, 0, here + '0 singular') 9 | 10 | r = nlp('we look at it. They report on it') 11 | len = r.verbs().isPlural().length 12 | t.equal(len, 2, here + 'they plural') 13 | 14 | r = nlp('lkjsdf are cool') 15 | let str = r.verbs().isPlural().out('normal') 16 | t.equal(str, 'are', here + 'are plural') 17 | 18 | r = nlp('lkjsdf does eat bugs') 19 | str = r.verbs().isPlural().out('normal') 20 | t.equal(str, 'does eat', here + 'does plural') 21 | 22 | r = nlp('lkjsdf is cool') 23 | str = r.verbs().isPlural().out('normal') 24 | t.equal(str, '', here + 'is singular') 25 | t.end() 26 | }) 27 | -------------------------------------------------------------------------------- /plugins/speech/src/compute/soundsLike/metaphone.js: -------------------------------------------------------------------------------- 1 | //a js version of the metaphone (#1) algorithm 2 | //adapted from the work of Chris Umbel 3 | // https://github.com/NaturalNode/natural/blob/master/lib/natural/phonetics/metaphone.js 4 | 5 | import m from './transformations.js' 6 | 7 | const metaphone = function (s) { 8 | s = m.dedup(s) 9 | s = m.dropInitialLetters(s) 10 | s = m.dropBafterMAtEnd(s) 11 | s = m.changeCK(s) 12 | s = m.cchange(s) 13 | s = m.dchange(s) 14 | s = m.dropG(s) 15 | s = m.changeG(s) 16 | s = m.dropH(s) 17 | s = m.changePH(s) 18 | s = m.changeQ(s) 19 | s = m.changeS(s) 20 | s = m.changeX(s) 21 | s = m.changeT(s) 22 | s = m.dropT(s) 23 | s = m.changeV(s) 24 | s = m.changeWH(s) 25 | s = m.dropW(s) 26 | s = m.dropY(s) 27 | s = m.changeZ(s) 28 | s = m.dropVowels(s) 29 | return s.trim() 30 | } 31 | 32 | export default metaphone 33 | -------------------------------------------------------------------------------- /plugins/speed/tests/stream.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | import fs from 'fs' 4 | import path from 'path' 5 | import { streamFile } from '../src/plugin.js' 6 | nlp.plugin(streamFile) 7 | 8 | 9 | import { fileURLToPath } from 'url' 10 | const dir = path.dirname(fileURLToPath(import.meta.url)) 11 | 12 | const file = path.join(dir, `./files/freshPrince.txt`) 13 | 14 | test('stream the whole document', function (t) { 15 | const want = fs.readFileSync(file).toString() 16 | nlp.streamFile(file, (s) => { 17 | return s.match('.') 18 | }).then(doc => { 19 | t.equal(doc.text(), want, 'full-text') 20 | t.end() 21 | }) 22 | }) 23 | 24 | test('return no matches', function (t) { 25 | nlp.streamFile(file, (s) => { 26 | return s.match('coconut') 27 | }).then(doc => { 28 | t.equal(doc.text(), '', 'no-text') 29 | t.end() 30 | }) 31 | }) -------------------------------------------------------------------------------- /tests/three/verbs/phrasal.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[three/phrasal]' 4 | 5 | test('phrasal-verbs:', function (t) { 6 | const arr = [ 7 | [`he is really good`, ['he', 'is', 'really', 'good']], 8 | [`he is upset about it`, ['he', 'is', 'upset', 'about', 'it']], 9 | [`he will mess about with it`, ['he', 'will', 'mess about', 'with', 'it']], 10 | 11 | [`come forward`, ['come forward']], 12 | [`come together`, ['come together']], 13 | [`come apart`, ['come apart']], 14 | 15 | [`frighten back`, ['frighten', 'back']], 16 | [`frighten away`, ['frighten away']], 17 | ] 18 | arr.forEach(function (a) { 19 | const terms = nlp(a[0]).out('array') 20 | const msg = terms.join(' ') + ' -- ' + a[1].join(' ') 21 | t.equal(terms.join(' '), a[1].join(' '), here + msg) 22 | }) 23 | t.end() 24 | }) 25 | -------------------------------------------------------------------------------- /src/2-two/postTagger/compute/index.js: -------------------------------------------------------------------------------- 1 | let net = null 2 | 3 | // runs all match/tag patterns in model.two.matches 4 | const postTagger = function (view) { 5 | const { world } = view 6 | const { model, methods } = world 7 | net = net || methods.one.buildNet(model.two.matches, world) 8 | // perform these matches on a comma-seperated document 9 | const document = methods.two.quickSplit(view.document) 10 | const ptrs = document.map(terms => { 11 | const t = terms[0] 12 | return [t.index[0], t.index[1], t.index[1] + terms.length] 13 | }) 14 | const m = view.update(ptrs) 15 | m.cache() 16 | m.sweep(net) 17 | view.uncache() 18 | view.unfreeze() 19 | return view 20 | } 21 | 22 | // helper function for compute('tagger') 23 | const tagger = view => view.compute(['freeze', 'lexicon', 'preTagger', 'postTagger', 'unfreeze']) 24 | 25 | export default { postTagger, tagger } 26 | -------------------------------------------------------------------------------- /src/2-two/preTagger/tagSet/dates.js: -------------------------------------------------------------------------------- 1 | export default { 2 | Date: { 3 | not: ['Verb', 'Adverb', 'Adjective'], 4 | }, 5 | Month: { 6 | is: 'Date', 7 | also: ['Noun'], 8 | not: ['Year', 'WeekDay', 'Time'], 9 | }, 10 | WeekDay: { 11 | is: 'Date', 12 | also: ['Noun'], 13 | }, 14 | Year: { 15 | is: 'Date', 16 | not: ['RomanNumeral'], 17 | }, 18 | FinancialQuarter: { 19 | is: 'Date', 20 | not: 'Fraction', 21 | }, 22 | // 'easter' 23 | Holiday: { 24 | is: 'Date', 25 | also: ['Noun'], 26 | }, 27 | // 'summer' 28 | Season: { 29 | is: 'Date', 30 | }, 31 | Timezone: { 32 | is: 'Date', 33 | also: ['Noun'], 34 | not: ['ProperNoun'], 35 | }, 36 | Time: { 37 | is: 'Date', 38 | not: ['AtMention'], 39 | }, 40 | // 'months' 41 | Duration: { 42 | is: 'Date', 43 | also: ['Noun'], 44 | }, 45 | } 46 | -------------------------------------------------------------------------------- /plugins/stats/scripts/generate.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import corpus from 'nlp-corpus' 3 | import fs from 'fs' 4 | import nlp from '../../../src/three.js' 5 | import plugin from '../src/plugin.js' 6 | nlp.extend(plugin) 7 | import idf from '../src/tfidf/idf.js' 8 | import zipUp from './pack.js' 9 | 10 | const fileSize = (pathStr) => { 11 | const kb = fs.statSync(pathStr).size / 1024 12 | return Math.round(kb) + 'kb' 13 | } 14 | 15 | // let txt = corpus.some(1000).join('\n') 16 | const txt = corpus.all().join(`\n`) 17 | 18 | const doc = nlp(txt).compute('root') 19 | const counts = idf(doc, { use: 'root', min: 4 }) 20 | 21 | // collect by freq 22 | const byFreq = zipUp(counts) 23 | 24 | // console.log(counts) 25 | const out = "export default " + JSON.stringify(byFreq, null, 2) 26 | fs.writeFileSync('./src/tfidf/_model.js', out) 27 | console.log(fileSize('./src/tfidf/_model.js')) 28 | -------------------------------------------------------------------------------- /src/3-three/coreference/compute/findThey.js: -------------------------------------------------------------------------------- 1 | import { findChained } from './lib.js' 2 | 3 | // find best reference for 'they' & 'their' 4 | const getThey = function (s) { 5 | const nouns = s.nouns() 6 | 7 | // 'the bananas' 8 | let things = nouns.isPlural().notIf('#Pronoun') 9 | if (things.found) { 10 | return things.last() 11 | } 12 | // re-use existing pronoun reference 13 | const chain = findChained('(they|their|theirs)', s) 14 | if (chain.found) { 15 | return chain 16 | } 17 | 18 | // they can also refer to a singular noun 19 | // "the restaurant sold their food" 20 | // "a choir sang their song" 21 | 22 | // somebody shaved their head 23 | things = nouns.match('(somebody|nobody|everybody|anybody|someone|noone|everyone|anyone)') 24 | if (things.found) { 25 | return things.last() 26 | } 27 | return s.none() 28 | } 29 | 30 | 31 | export default getThey -------------------------------------------------------------------------------- /scripts/plugins.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | // Run arbitrary (but typically npm) commands for each plugin 3 | // Example: "node ./plugins.js npm install" 4 | import sh from 'shelljs' 5 | 6 | // process.argv contains the complete command-line, with [0] as the node 7 | // executable, and [1] as the script (this file). [2] is the beginning of any 8 | // remaining args. 9 | const args = process.argv.slice(2) 10 | const command = args.join(' ') 11 | 12 | let shouldFail = false 13 | sh.ls('./plugins').forEach(function (dir) { 14 | console.log('\n===' + dir + '===') 15 | const code = sh.exec(command, { cwd: `./plugins/${dir}` }).code 16 | if (code !== 0) { 17 | shouldFail = dir 18 | } 19 | }) 20 | 21 | if (shouldFail !== false) { 22 | console.warn('==================') 23 | console.log(' dir: ' + shouldFail) 24 | console.warn('==================') 25 | throw shouldFail 26 | } 27 | -------------------------------------------------------------------------------- /src/1-one/tag/methods/unTag.js: -------------------------------------------------------------------------------- 1 | // remove this tag, and its children, from these terms 2 | const unTag = function (terms, tag, tagSet) { 3 | tag = tag.trim().replace(/^#/, '') 4 | for (let i = 0; i < terms.length; i += 1) { 5 | const term = terms[i] 6 | // don't untag anything if term is frozen 7 | if (term.frozen === true) { 8 | continue 9 | } 10 | // support clearing all tags, with '*' 11 | if (tag === '*') { 12 | term.tags.clear() 13 | continue 14 | } 15 | // for known tags, do logical dependencies first 16 | const known = tagSet[tag] 17 | // removing #Verb should also remove #PastTense 18 | if (known && known.children.length > 0) { 19 | for (let o = 0; o < known.children.length; o += 1) { 20 | term.tags.delete(known.children[o]) 21 | } 22 | } 23 | term.tags.delete(tag) 24 | } 25 | } 26 | export default unTag 27 | -------------------------------------------------------------------------------- /src/1-one/tokenize/methods/01-sentences/03-smart-merge.js: -------------------------------------------------------------------------------- 1 | const hasNewline = function (c) { 2 | return Boolean(c.match(/\n$/)) 3 | } 4 | 5 | //loop through these chunks, and join the non-sentence chunks back together.. 6 | const smartMerge = function (chunks, world) { 7 | const isSentence = world.methods.one.tokenize.isSentence 8 | const abbrevs = world.model.one.abbreviations || new Set() 9 | 10 | const sentences = [] 11 | for (let i = 0; i < chunks.length; i++) { 12 | const c = chunks[i] 13 | //should this chunk be combined with the next one? 14 | if (chunks[i + 1] && !isSentence(c, abbrevs) && !hasNewline(c)) { 15 | chunks[i + 1] = c + (chunks[i + 1] || '') 16 | } else if (c && c.length > 0) { 17 | //this chunk is a proper sentence.. 18 | sentences.push(c) 19 | chunks[i] = '' 20 | } 21 | } 22 | return sentences 23 | } 24 | export default smartMerge -------------------------------------------------------------------------------- /src/4-four/sense/model/senses/index.js: -------------------------------------------------------------------------------- 1 | import verbs from './verb.js' 2 | import adjectives from './adjective.js' 3 | import nouns from './noun.js' 4 | 5 | const byWord = {} 6 | const setup = function (senses, tag) { 7 | Object.keys(senses).forEach(ambig => { 8 | const words = {} 9 | let fallback = null 10 | Object.keys(senses[ambig]).forEach(name => { 11 | if (senses[ambig][name].words) { 12 | senses[ambig][name].words.forEach(w => { 13 | words[w] = name 14 | }) 15 | } 16 | if (senses[ambig][name].fallback) { 17 | fallback = name 18 | } 19 | }) 20 | byWord[ambig] = byWord[ambig] || [] 21 | byWord[ambig].push({ 22 | tag: tag, 23 | fallback, 24 | words, 25 | }) 26 | }) 27 | } 28 | 29 | setup(verbs, 'Verb') 30 | setup(nouns, 'Noun') 31 | setup(adjectives, 'Adjective') 32 | 33 | export default byWord 34 | -------------------------------------------------------------------------------- /tests/three/nouns/adjectives.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from '../_lib.js' 3 | const here = '[three/noun-adjectives] ' 4 | 5 | test('.adjectives():', function (t) { 6 | let doc = nlp('the really cute cat') 7 | let m = doc.nouns().adjectives() 8 | t.equal(m.text(), 'cute', here + 'cute .') 9 | 10 | doc = nlp('the really cute orange cat') 11 | m = doc.nouns().adjectives() 12 | t.equal(m.text(), 'cute orange', here + 'two adjectives') 13 | 14 | // doc = nlp('the cat who was really mean') 15 | // m = doc.nouns().adjectives() 16 | // t.equal(m.text(), 'mean', here + 'who was really .') 17 | 18 | doc = nlp('the cat that was mean attacked the cute dog') 19 | // m = doc.nouns(0).adjectives() 20 | // t.equal(m.text(), 'mean', here + 'first-noun') 21 | m = doc.nouns(1).adjectives() 22 | t.equal(m.text(), 'cute', here + 'second-noun') 23 | 24 | t.end() 25 | }) 26 | -------------------------------------------------------------------------------- /src/1-one/tokenize/compute/normal/02-acronyms.js: -------------------------------------------------------------------------------- 1 | // do acronyms need to be ASCII? ... kind of? 2 | const periodAcronym = /([A-Z]\.)+[A-Z]?,?$/ 3 | const oneLetterAcronym = /^[A-Z]\.,?$/ 4 | const noPeriodAcronym = /[A-Z]{2,}('s|,)?$/ 5 | const lowerCaseAcronym = /([a-z]\.)+[a-z]\.?$/ 6 | 7 | const isAcronym = function (str) { 8 | //like N.D.A 9 | if (periodAcronym.test(str) === true) { 10 | return true 11 | } 12 | //like c.e.o 13 | if (lowerCaseAcronym.test(str) === true) { 14 | return true 15 | } 16 | //like 'F.' 17 | if (oneLetterAcronym.test(str) === true) { 18 | return true 19 | } 20 | //like NDA 21 | if (noPeriodAcronym.test(str) === true) { 22 | return true 23 | } 24 | return false 25 | } 26 | 27 | const doAcronym = function (str) { 28 | if (isAcronym(str)) { 29 | str = str.replace(/\./g, '') 30 | } 31 | return str 32 | } 33 | export default doAcronym 34 | -------------------------------------------------------------------------------- /src/3-three/numbers/numbers/isUnit.js: -------------------------------------------------------------------------------- 1 | import parse from './parse/index.js' 2 | 3 | const isArray = arr => Object.prototype.toString.call(arr) === '[object Array]' 4 | 5 | // turn anything into {foo:true} format 6 | const coerceToObject = function (input) { 7 | if (typeof input === 'string' || typeof input === 'number') { 8 | const tmp = {} 9 | tmp[input] = true 10 | return tmp 11 | } 12 | if (isArray(input)) { 13 | return input.reduce((h, s) => { 14 | h[s] = true 15 | return h 16 | }, {}) 17 | } 18 | return input || {} 19 | } 20 | 21 | // only return values with the given unit 22 | const isUnit = function (doc, input = {}) { 23 | input = coerceToObject(input) 24 | return doc.filter(p => { 25 | const { unit } = parse(p) 26 | if (unit && input[unit] === true) { 27 | return true 28 | } 29 | return false 30 | }) 31 | } 32 | export default isUnit 33 | -------------------------------------------------------------------------------- /plugins/_experiments/markdown/scratch.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | import nlp from '../../../src/three.js' 3 | import plg from './src/plugin.js' 4 | nlp.plugin(plg) 5 | 6 | let md = `is this [working](http://noitsnot.com) *again*? nope. 7 | 8 | ## oh yeah 9 | 10 | and this is too 11 | -- 12 | and **this** will be another section. i guess. \`inline stuff\` 13 | 14 | \`\`\` 15 | block stuff 16 | \`\`\` 17 | afterwards 18 | 19 | > Alpha bravo charlie. 20 | 21 | and then a cool: 22 | * list 1 23 | * list 2 24 | * list 3 25 | 26 | hello ![alpha](https://example.com/favicon.ico "bravo") world 27 | 28 | ` 29 | 30 | 31 | 32 | md = `| cool | also | here | | | 33 | |------|------|-------|---|---| 34 | | one | two | three | | | 35 | | four | five | | | | 36 | | | | | | |` 37 | 38 | md = `ok **cool** after. 39 | 40 | below` 41 | const doc = nlp.fromMarkdown(md) 42 | console.log(doc) -------------------------------------------------------------------------------- /plugins/_experiments/cmd-k/README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
a plugin for compromise
5 | 6 | 7 | 8 | 9 | 10 | v 11 | 12 | 13 | 14 | 15 |
16 |
17 | 18 |
19 | npm install compromise-cmd-k 20 |
21 | 22 | experimental plugins for command-prompt parsing 23 | 24 | MIT 25 | -------------------------------------------------------------------------------- /plugins/stats/tests/misc.test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import nlp from './_lib.js' 3 | 4 | test('misc ngrams', function (t) { 5 | let doc = nlp(`quickly, suddenly`) 6 | t.equal(doc.ngrams().length, 3, 'found three ngrams') 7 | 8 | doc = nlp(`john, bill, joe`) 9 | t.equal(doc.unigrams().length, 3, 'found three unigrams') 10 | 11 | doc = nlp(`john, bill, joe`) 12 | t.equal(doc.bigrams().length, 2, 'found 2 bigrams') 13 | 14 | doc = nlp(`john, bill, joe`) 15 | t.equal(doc.trigrams().length, 1, 'found 1 trigrams') 16 | 17 | doc = nlp('i am in houston texas. i am a good person. so i think he is a good person.') 18 | const arr = doc.endgrams({ size: 2 }) || [] 19 | t.equal(arr.length, 2, 'found 2 endgrams of size-2') 20 | const first = arr[0] || {} 21 | t.equal(first.normal, 'good person', 'found good person') 22 | t.equal(first.count, 2, 'found 2 good person results') 23 | t.end() 24 | }) 25 | --------------------------------------------------------------------------------