├── .gitignore
├── src
    ├── config.js
    ├── default_config.js
    ├── index.js
    ├── methods.js
    ├── utils.js
    └── stop_words.js
├── webpack.config.js
├── package.json
├── README.md
└── dist
    └── index.js


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .tgz
3 | npm-debug.log


--------------------------------------------------------------------------------
/src/config.js:
--------------------------------------------------------------------------------
1 | import DefaultConfig from './default_config'
2 | import deepmerge from 'deepmerge'
3 | 
4 | export default function Config(opts) {  
5 |   return deepmerge(DefaultConfig, opts || {})
6 | }


--------------------------------------------------------------------------------
/src/default_config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 | 
 3 |   htmlTags: ['p', 'b', 'em', 'title'],
 4 |   method: 'combined',
 5 |   useDefaultStopWords: true,
 6 |   maxNumberOfKeywords: 10,
 7 |   minKeywordLength: 3,
 8 |   ngram: {
 9 |     min_count: 3,
10 |     max_size: 1
11 |   },
12 |   progressiveGeneration: true
13 | }


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | import Utils from './utils'
 2 | import startProcess from './methods'
 3 | import Config from './config'
 4 | 
 5 | export default class Topick {
 6 | 
 7 |   static getKeywords(uri,opts) {
 8 |     const config = Config(opts)
 9 |     let cb = (typeof arguments[arguments.length-1] === "function") ? arguments[arguments.length-1] : undefined
10 |     return Utils.httpGet(uri)
11 |     .then((res) => {
12 |       let result = startProcess(res.text,config)
13 |       if (cb) { cb(result) }
14 |       return result
15 |     })
16 |     .catch(() => {
17 |       let result = startProcess(uri,config)
18 |       if (cb) { cb(result) }
19 |       return result
20 |     })
21 |   }
22 | 
23 |   static getDomain(uri) {
24 |     return Utils.getDomainString(uri)
25 |   }
26 | 
27 | }


--------------------------------------------------------------------------------
/webpack.config.js:
--------------------------------------------------------------------------------
 1 | var webpack = require('webpack')
 2 | var path = require('path')
 3 | 
 4 | module.exports = {
 5 |   entry: [
 6 |     path.resolve(__dirname, 'src/index.js')
 7 |   ],
 8 | 
 9 |   output: {
10 |     path: path.resolve(__dirname, 'dist'),
11 |     filename: "index.js",
12 |     library: "topick",
13 |     libraryTarget: "commonjs2"
14 |   },
15 | 
16 |   externals: {
17 |     "nlp_compromise": "nlp_compromise",
18 |     "superagent-bluebird-promise": "superagent-bluebird-promise",
19 |     "htmlparser2": "htmlparser2",
20 |     "text-miner": "text-miner"
21 |   },
22 | 
23 |   module: {
24 |     loaders: [
25 |       {
26 |         test: /\.js$/,
27 |         loaders: ['babel-loader'],
28 |         exclude: /node_modules/,
29 |         include: path.resolve(__dirname, 'src')
30 |       }
31 |     ]
32 |   }
33 | }


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "topick",
 3 |   "version": "0.1.3",
 4 |   "author": "Lau Siaw Young <lausiawyoung@gmail.com>",
 5 |   "description": "One trick pony NLP library for extracting keywords from HTML documents",
 6 |   "main": "./dist/index.js",
 7 |   "dependencies": {
 8 |     "deepmerge": "^0.2.10",
 9 |     "htmlparser2": "^3.8.3",
10 |     "nlp_compromise": "^1.1.2",
11 |     "superagent-bluebird-promise": "^2.0.2",
12 |     "text-miner": "^1.0.3"
13 |   },
14 |   "devDependencies": {
15 |     "babel-core": "^5.8.3",
16 |     "babel-loader": "^5.3.2",
17 |     "webpack": "^1.10.3",
18 |     "webpack-dev-server": "^1.10.1"
19 |   },
20 |   "repository": {
21 |     "type": "git",
22 |     "url": "git@github.com:siawyoung/topick.git"
23 |   },
24 |   "bugs": {
25 |     "url": "https://github.com/siawyoung/topick/issues"
26 |   },
27 |   "keywords": [
28 |     "nlp",
29 |     "keywords",
30 |     "tags",
31 |     "extract"
32 |   ],
33 |   "scripts": {
34 |     "start": "webpack-dev-server -d",
35 |     "clean": "rm -rf dist/",
36 |     "test": "echo \"Error: no test specified\" && exit 1"
37 |   },
38 |   "license": "MIT"
39 | }
40 | 


--------------------------------------------------------------------------------
/src/methods.js:
--------------------------------------------------------------------------------
 1 | import Utils from './utils'
 2 | 
 3 | export default function(text,opts) {
 4 | 
 5 |   let method = opts.method
 6 |   let cleanedText = Utils.clean(Utils.parseHtml(text, opts), opts)
 7 | 
 8 |   switch(method) {
 9 | 
10 |     case "combined":
11 |       return Methods.combineNGramsAndNamedEntities(cleanedText,opts)
12 |     case "ngram":
13 |       return Methods.useNGrams(cleanedText,opts)
14 |     case "namedentites":
15 |       return Methods.useNamedEntities(cleanedText,ops)
16 |     default:
17 |       return Methods.combineNGramsAndNamedEntities(cleanedText,opts)
18 |   
19 |   }
20 | 
21 | }
22 | 
23 | class Methods {
24 | 
25 |   static useNGrams(text,opts) {
26 |     return Utils.filterWords(Utils.sortNGrams(Utils.generateNGrams(text,opts), opts), opts)
27 |   }
28 | 
29 |   static useNamedEntities(text,opts) {
30 |     return Utils.useNGrams(Utils.generateNamedEntitiesString(text),opts)
31 |   }
32 | 
33 |   static combineNGramsAndNamedEntities(text,opts) {
34 |     return Utils.filterWords(Utils.sortNGrams(
35 |       Utils.generateNGrams(text,opts)
36 |       .concat(Utils.generateNGrams(Utils.generateNamedEntitiesString(text), opts)), opts
37 |     ), opts)
38 |   }
39 | 
40 | }


--------------------------------------------------------------------------------
/src/utils.js:
--------------------------------------------------------------------------------
  1 | import nlp from 'nlp_compromise'
  2 | import request from 'superagent-bluebird-promise'
  3 | import htmlparser from 'htmlparser2'
  4 | import tm from 'text-miner'
  5 | 
  6 | export default class Utils {
  7 | 
  8 |   // returns a get request wrapped in a promise
  9 |   static httpGet(uri) {
 10 |     return request.get(uri)
 11 |   }
 12 | 
 13 |   static httpGetSync(uri) {
 14 |     let req = new XMLHttpRequest()
 15 |     req.open('GET', uri, false)
 16 |     req.send(null)
 17 |     return req.status === 200 ? req.responseText : uri
 18 |   }
 19 | 
 20 |   // parses and extracts text from the html tags supplied in opts
 21 |   static parseHtml(rawHtml,opts) {
 22 |     let outputString = ""
 23 |     let writeFlag = false
 24 |     let tags = opts.htmlTags
 25 |     let parser = new htmlparser.Parser({
 26 | 
 27 |       onopentag: (name, attribs) => {
 28 |         if (tags.includes(name)) {
 29 |           writeFlag = true
 30 |         }
 31 |       },
 32 |       ontext: (text) =>  {
 33 |         if (writeFlag) {
 34 |           outputString += ` ${text}`
 35 |         }
 36 |       },
 37 |       onclosetag: (tagname) => {
 38 |         writeFlag = false
 39 |       }
 40 |     }, {decodeEntities: true})
 41 | 
 42 |     parser.write(rawHtml)
 43 |     parser.end()
 44 |     return outputString
 45 |   }
 46 | 
 47 |   // custom compare function for comparing ngram objects by their count property:
 48 |   // [{ word: 'asd', count: 3 }, { word: 'asdf', count: 2 }]
 49 |   static compareNGramByCount(a,b) {
 50 |     if (a.count < b.count) { return -1 }
 51 |     else if (a.count > b.count) { return 1 }
 52 |     else { return 0 }
 53 |   }
 54 | 
 55 |   // this does two things
 56 |   // first sort by count
 57 |   // then it retrieves the actual word from each ngram object
 58 |   // accepts array of NGram objects: [{word: 'adsf', count: 1}, ...]
 59 |   // returns array of strings: ['asdf', ...]
 60 |   static sortNGrams(ngrams,opts) {
 61 |     return ngrams.filter((ngram) => ngram.size <= opts.ngram.max_size).sort(this.compareNGramByCount).map((ngram) => ngram.word)
 62 |   }
 63 | 
 64 |   // while taking the first n items, ignores duplicates
 65 |   // accepts and returns array of strings
 66 |   static filterWords(wordArray,opts) {
 67 |     let output = []
 68 |     for (var i = wordArray.length - 1; i >= 0; i--) {
 69 |       let currWord = wordArray[i]
 70 |       if (output.length >= opts.maxNumberOfKeywords) { return output }
 71 |       if (output.includes(currWord)) { continue }
 72 |       if (currWord.length <= opts.minKeywordLength) { continue }
 73 |       output.push(currWord)
 74 |     }
 75 |     return output
 76 |   }
 77 | 
 78 |   // generates ngrams with settings specified by opts
 79 |   static generateNGrams(text,opts) {
 80 |     let ngrams = nlp.ngram(text, opts.ngram).reduce((init,curr) => init.concat(curr))
 81 |     if ( ngrams.length <= opts.maxNumberOfKeywords && opts.progressiveGeneration && opts.ngram.min_count >= 1 ) {
 82 |       opts.ngram.min_count -= 1
 83 |       ngrams = this.generateNGrams(text,opts)
 84 |     }
 85 |     return ngrams
 86 |   }
 87 | 
 88 |   // identifies named entities using nlp_compromise's spot function
 89 |   // returns a single string concatenating all the named entities for further processing using ngrams
 90 |   static generateNamedEntitiesString(text) {
 91 |     return nlp.spot(text).map((kw) => { return kw.text }).join(" ")
 92 |   }
 93 | 
 94 |   // this function performs cleaning on the document by:
 95 |   // expanding contractions (from i'll to I will)
 96 |   // removing inter punctuations (such as ? and !)
 97 |   // removing whitespace between words
 98 |   // removing stop words using the default stop word dictionary
 99 |   // removing custom stop words specified in the user supplied opts
100 |   static clean(text,opts) {
101 |     let c = new tm.Corpus([tm.utils.expandContractions(text)]).removeInterpunctuation().clean()
102 |     if (opts.useDefaultStopWords === true) {
103 |       let stop_words = require("./stop_words").stop_words
104 |       c = c.removeWords(stop_words, 'gi')
105 |     }
106 |     let custom_stop_words = opts.customStopWords
107 |     if (custom_stop_words) {
108 |       c = c.removeWords(custom_stop_words)
109 |     }
110 |     return c.documents[0];
111 |   }
112 | 
113 | 
114 |   // given a uri string http://google.com
115 |   // return 'google'
116 |   static getDomainString(uri) {
117 |     let domain;
118 |     // find & remove protocol (http, ftp, etc.) and get domain
119 |     if (uri.indexOf("://") > -1) {
120 |       domain = uri.split('/')[2]
121 |     } else {
122 |       domain = uri.split('/')[0]
123 |     }
124 |     // find & remove port number
125 |     // find and remove TLD
126 |     let splitDomain = domain.split(":")[0].split(".")
127 |     if (splitDomain.length >= 2) {
128 |       return splitDomain[splitDomain.length-2]  
129 |     } else {
130 |       return splitDomain[0]
131 |     }
132 |   }
133 | 
134 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Topick
  2 | 
  3 | One trick pony NLP library for extracting keywords from HTML documents. It uses `htmlparser2` for HTML parsing, `nlp_compromise` for NLP and `text-miner` for text cleaning and removing stop words.
  4 | 
  5 | Here is Topick in action, being used in a Telegram bot to autosuggest tags for links mentioned in a conversation:
  6 | 
  7 | <img src="http://siawyoung.com/coding/javascript/2015-07-28-topick/mure-sneak-peek-1.jpg" width="250">
  8 | <img src="http://siawyoung.com/coding/javascript/2015-07-28-topick/mure-sneak-peek-2.jpg" width="250">
  9 | 
 10 | ## Installation
 11 | 
 12 | `npm install topick`
 13 | 
 14 | Topick is intended primarily for server-side use because of cross-domain issues, although I'm working on making the codebase isomorphic so that browser use is possible as well (with an appropriate module loader such as webpack).
 15 | 
 16 | ## Usage
 17 | 
 18 | The simplest way to use Topick:
 19 | 
 20 | ```js
 21 | import Topick from 'topick'
 22 | 
 23 | Topick.getKeywords('http://example.com/').then((keywords) => {
 24 |   console.log(keywords); // ['most relevant keyword', 'very relevant keyword', 'somewhat relevant keyword']
 25 |   // do something with your keywords
 26 | })
 27 | ```
 28 | 
 29 | The keywords are arranged in order of decreasing relevance.
 30 | 
 31 | ### `getKeywords(uri[,opts,cb])`
 32 | 
 33 | #### Options
 34 | 
 35 | `getKeywords` takes either a valid `HTTP` URI, or a HTML string, and returns a promise that can be resolved appropriately. `getKeywords` also accepts an optional options object:
 36 | 
 37 | ```js
 38 | Topick.getKeywords('http://example.com/', {
 39 |   htmlTags: ['p'],
 40 |   ngram: {
 41 |     min_count: 4,
 42 |     max_size: 2
 43 |   }
 44 | }).then((keywords) => {
 45 |   console.log(keywords);
 46 | })
 47 | ```
 48 | 
 49 | Currently available options are:
 50 | 
 51 | ##### `htmlTags`
 52 | 
 53 | Default: `['p', 'b', 'em', 'title']`
 54 | 
 55 | An array of HTML tags that should be parsed.
 56 | 
 57 | #### `method`
 58 | 
 59 | Default: `combined`
 60 | 
 61 | Topick includes three methods for generating keywords. 
 62 | 
 63 | `ngram`
 64 | 
 65 | Generates n-grams from the content string and ranks them in terms of frequency.
 66 | 
 67 | `namedentities`
 68 | 
 69 | Uses `nlp_compromise`'s `spot` method to identify [named entities](https://en.wikipedia.org/wiki/Named-entity_recognition) before generating n-grams based on these named entities.
 70 | 
 71 | `combined`
 72 | 
 73 | Runs both `ngram` and `namedentities` methods, then combines their ranking.
 74 | 
 75 | ##### `useDefaultStopWords`
 76 | 
 77 | Default: `true`
 78 | 
 79 | If true, uses Topick's internal stop words dictionary to remove stop words. If false, no stop word removal will be performed unless you supply your own stop word array (see `customStopWords`).
 80 | 
 81 | Topick's dictionary is a set union of all six English collections found [here](https://code.google.com/p/stop-words/).
 82 | 
 83 | ##### `customStopWords`
 84 | 
 85 | Default: `[]`
 86 | 
 87 | An array of strings that should be used as stop words. This has no bearing on `useDefaultStopWords`, although it should be populated with your own stop word array if `useDefaultStopWords` is set to `false`, else Topick will generate a lot of irrelevant keywords.
 88 | 
 89 | ##### `maxNumberOfKeywords`
 90 | 
 91 | Default: 10
 92 | 
 93 | Maximum number of keywords to generate.
 94 | 
 95 | ##### `minKeywordLength`
 96 | 
 97 | Default: 3
 98 | 
 99 | Minimum length of generated keywords.
100 | 
101 | ##### `ngram`
102 | 
103 | Default:
104 | 
105 | ```
106 | { min_count: 3, max_size: 1 }
107 | ```
108 | 
109 | Defines options for n-gram generation. 
110 | 
111 | `min_count` is the minimum number of times a particular n-gram should appear in the document before being considered. There should be no need to change this number.
112 | 
113 | `max_size` is the maximum size of n-grams that should be generated (defaults to generating unigrams).
114 | 
115 | ##### `progressiveGeneration`
116 | 
117 | Default: `true`
118 | 
119 | If set to true, `progressiveGeneration` will progressively generate n-grams with weaker settings until the specified number of keywords set in `maxNumberOfKeywords` is hit.
120 | 
121 | For example: for a `min_count` of 3 and `maxNumberOfKeywords` of 10, Topick only generates 5 keywords, then `progressiveGeneration` will decrease the `min_count` to 2, and then to 1, until 10 keywords can be generated.
122 | 
123 | `progressiveGeneration` does not guarantee that `maxNumberOfKeywords` keywords will be generated (like if even at `min_count` of 1, your specified `maxNumberOfKeywords` still cannot be reached).
124 | 
125 | #### Callback
126 | 
127 | In case you're not familar with promises or are unable to use them, `getKeywords` also accepts a callback function as its **last** argument:
128 | 
129 | ```js
130 | topick.getKeywords("http://example.com", {
131 |   customStopWords: []
132 | }, (keywords) => {
133 |   console.log("This is the callback function");
134 |   console.log(keywords);
135 | })
136 | .then((keywords) => {
137 |   console.log("This is the promise");
138 |   console.log(keywords)
139 | })
140 | 
141 | // "This is the callback function"
142 | // ["cool keyword", "another cool keyword"]
143 | // "This is the promise"
144 | // ["cool keyword", "another cool keyword"]
145 | ```
146 | 
147 | Notice that regardless of whether a callback function is specified, `getKeywords` continues to return a Promise.
148 | 
149 | ### `getKeywordsSync(uri[, opts])`
150 | 
151 | There are no plans to support a synchronous version of `getKeywords`.
152 | 
153 | ### `getDomain(uri)`
154 | 
155 | Example:
156 | 
157 | ```js
158 | Topick.getDomain('http://example.com')
159 | ```
160 | 
161 | Given `http://example.com`, returns `example`. Removes URI scheme, port number, and TLD.
162 | 
163 | ## Contributing
164 | 
165 | Contributions are welcome!
166 | 
167 | Topick is written in ES6 wherever possible. The development workflow is centered primarily around webpack, so be sure to check out `webpack.config.js`.


--------------------------------------------------------------------------------
/src/stop_words.js:
--------------------------------------------------------------------------------
1 | export const stop_words = ["﻿able","about","above","abroad","according","accordingly","across","actually","adj","after","afterwards","again","against","ago","ahead","ain't","all","allow","allows","almost","alone","along","alongside","already","also","although","always","am","amid","amidst","among","amongst","an","and","another","any","anybody","anyhow","anyone","anything","anyway","anyways","anywhere","apart","appear","appreciate","appropriate","are","aren't","around","as","a's","aside","ask","asking","associated","at","available","away","awfully","back","backward","backwards","be","became","because","become","becomes","becoming","been","before","beforehand","begin","behind","being","believe","below","beside","besides","best","better","between","beyond","both","brief","but","by","came","can","cannot","cant","can't","caption","cause","causes","certain","certainly","changes","clearly","c'mon","co","co.","com","come","comes","concerning","consequently","consider","considering","contain","containing","contains","corresponding","could","couldn't","course","c's","currently","dare","daren't","definitely","described","despite","did","didn't","different","directly","do","does","doesn't","doing","done","don't","down","downwards","during","each","edu","eg","eight","eighty","either","else","elsewhere","end","ending","enough","entirely","especially","et","etc","even","ever","evermore","every","everybody","everyone","everything","everywhere","ex","exactly","example","except","fairly","far","farther","few","fewer","fifth","first","five","followed","following","follows","for","forever","former","formerly","forth","forward","found","four","from","further","furthermore","get","gets","getting","given","gives","go","goes","going","gone","got","gotten","greetings","had","hadn't","half","happens","hardly","has","hasn't","have","haven't","having","he","he'd","he'll","hello","help","hence","her","here","hereafter","hereby","herein","here's","hereupon","hers","herself","he's","hi","him","himself","his","hither","hopefully","how","howbeit","however","hundred","i'd","ie","if","ignored","i'll","i'm","immediate","in","inasmuch","inc","inc.","indeed","indicate","indicated","indicates","inner","inside","insofar","instead","into","inward","is","isn't","it","it'd","it'll","its","it's","itself","i've","just","k","keep","keeps","kept","know","known","knows","last","lately","later","latter","latterly","least","less","lest","let","let's","like","liked","likely","likewise","little","look","looking","looks","low","lower","ltd","made","mainly","make","makes","many","may","maybe","mayn't","me","mean","meantime","meanwhile","merely","might","mightn't","mine","minus","miss","more","moreover","most","mostly","mr","mrs","much","must","mustn't","my","myself","name","namely","nd","near","nearly","necessary","need","needn't","needs","neither","never","neverf","neverless","nevertheless","new","next","nine","ninety","no","nobody","non","none","nonetheless","noone","no-one","nor","normally","not","nothing","notwithstanding","novel","now","nowhere","obviously","of","off","often","oh","ok","okay","old","on","once","one","ones","one's","only","onto","opposite","or","other","others","otherwise","ought","oughtn't","our","ours","ourselves","out","outside","over","overall","own","particular","particularly","past","per","perhaps","placed","please","plus","possible","presumably","probably","provided","provides","que","quite","qv","rather","rd","re","really","reasonably","recent","recently","regarding","regardless","regards","relatively","respectively","right","round","said","same","saw","say","saying","says","second","secondly","see","seeing","seem","seemed","seeming","seems","seen","self","selves","sensible","sent","serious","seriously","seven","several","shall","shan't","she","she'd","she'll","she's","should","shouldn't","since","six","so","some","somebody","someday","somehow","someone","something","sometime","sometimes","somewhat","somewhere","soon","sorry","specified","specify","specifying","still","sub","such","sup","sure","take","taken","taking","tell","tends","th","than","thank","thanks","thanx","that","that'll","thats","that's","that've","the","their","theirs","them","themselves","then","thence","there","thereafter","thereby","there'd","therefore","therein","there'll","there're","theres","there's","thereupon","there've","these","they","they'd","they'll","they're","they've","thing","things","think","third","thirty","this","thorough","thoroughly","those","though","three","through","throughout","thru","thus","till","to","together","too","took","toward","towards","tried","tries","truly","try","trying","t's","twice","two","un","under","underneath","undoing","unfortunately","unless","unlike","unlikely","until","unto","up","upon","upwards","us","use","used","useful","uses","using","usually","v","value","various","versus","very","via","viz","vs","want","wants","was","wasn't","way","we","we'd","welcome","well","we'll","went","were","we're","weren't","we've","what","whatever","what'll","what's","what've","when","whence","whenever","where","whereafter","whereas","whereby","wherein","where's","whereupon","wherever","whether","which","whichever","while","whilst","whither","who","who'd","whoever","whole","who'll","whom","whomever","who's","whose","why","will","willing","wish","with","within","without","wonder","won't","would","wouldn't","yes","yet","you","you'd","you'll","your","you're","yours","yourself","yourselves","you've","zero","﻿a","how's","i","ours ","when's","why's","a","able","b","c","d","e","f","g","h","j","l","m","n","o","p","q","r","s","t","u","uucp","w","x","y","z","zero","﻿I","www","amoungst","amount","bill","bottom","call","computer","con","couldnt","cry","de","describe","detail","due","eleven","empty","fifteen","fify","fill","find","fire","forty","front","full","give","hasnt","herse”","himse”","interest","itse”","mill","move","myse”","part","put","show","side","sincere","sixty","system","ten","thick","thin","top","twelve","twenty","abst","accordance","act","added","adopted","affected","affecting","affects","ah","announce","anymore","apparently","approximately","aren","arent","arise","auth","beginning","beginnings","begins","biol","briefly","ca","date","ed","effect","et-al","ff","fix","gave","giving","hed","heres","hes","hid","home","id","im","immediately","importance","important","index","information","invention","itd","keys","kg","km","largely","lets","line","'ll","means","mg","million","ml","mug","na","nay","necessarily","nos","noted","obtain","obtained","omitted","ord","owing","page","pages","poorly","possibly","potentially","pp","predominantly","present","previously","primarily","promptly","proud","quickly","ran","readily","ref","refs","related","research","resulted","resulting","results","run","sec","section","shed","shes","showed","shown","showns","shows","significant","significantly","similar","similarly","slightly","somethan","specifically","state","states","stop","strongly","substantially","successfully","sufficiently","suggest","thered","thereof","therere","thereto","theyd","theyre","thou","thoughh","thousand","throug","til","tip","ts","ups","usefully","usefulness","'ve","vol","vols","wed","whats","wheres","whim","whod","whos","widely","words","world","youd","youre"]


--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
  1 | module.exports =
  2 | /******/ (function(modules) { // webpackBootstrap
  3 | /******/ 	// The module cache
  4 | /******/ 	var installedModules = {};
  5 | 
  6 | /******/ 	// The require function
  7 | /******/ 	function __webpack_require__(moduleId) {
  8 | 
  9 | /******/ 		// Check if module is in cache
 10 | /******/ 		if(installedModules[moduleId])
 11 | /******/ 			return installedModules[moduleId].exports;
 12 | 
 13 | /******/ 		// Create a new module (and put it into the cache)
 14 | /******/ 		var module = installedModules[moduleId] = {
 15 | /******/ 			exports: {},
 16 | /******/ 			id: moduleId,
 17 | /******/ 			loaded: false
 18 | /******/ 		};
 19 | 
 20 | /******/ 		// Execute the module function
 21 | /******/ 		modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
 22 | 
 23 | /******/ 		// Flag the module as loaded
 24 | /******/ 		module.loaded = true;
 25 | 
 26 | /******/ 		// Return the exports of the module
 27 | /******/ 		return module.exports;
 28 | /******/ 	}
 29 | 
 30 | 
 31 | /******/ 	// expose the modules object (__webpack_modules__)
 32 | /******/ 	__webpack_require__.m = modules;
 33 | 
 34 | /******/ 	// expose the module cache
 35 | /******/ 	__webpack_require__.c = installedModules;
 36 | 
 37 | /******/ 	// __webpack_public_path__
 38 | /******/ 	__webpack_require__.p = "";
 39 | 
 40 | /******/ 	// Load entry module and return exports
 41 | /******/ 	return __webpack_require__(0);
 42 | /******/ })
 43 | /************************************************************************/
 44 | /******/ ([
 45 | /* 0 */
 46 | /***/ function(module, exports, __webpack_require__) {
 47 | 
 48 | 	module.exports = __webpack_require__(1);
 49 | 
 50 | 
 51 | /***/ },
 52 | /* 1 */
 53 | /***/ function(module, exports, __webpack_require__) {
 54 | 
 55 | 	'use strict';
 56 | 
 57 | 	Object.defineProperty(exports, '__esModule', {
 58 | 	  value: true
 59 | 	});
 60 | 
 61 | 	var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ('value' in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })();
 62 | 
 63 | 	function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { 'default': obj }; }
 64 | 
 65 | 	function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError('Cannot call a class as a function'); } }
 66 | 
 67 | 	var _utils = __webpack_require__(2);
 68 | 
 69 | 	var _utils2 = _interopRequireDefault(_utils);
 70 | 
 71 | 	var _methods = __webpack_require__(8);
 72 | 
 73 | 	var _methods2 = _interopRequireDefault(_methods);
 74 | 
 75 | 	var _config = __webpack_require__(9);
 76 | 
 77 | 	var _config2 = _interopRequireDefault(_config);
 78 | 
 79 | 	var Topick = (function () {
 80 | 	  function Topick() {
 81 | 	    _classCallCheck(this, Topick);
 82 | 	  }
 83 | 
 84 | 	  _createClass(Topick, null, [{
 85 | 	    key: 'getKeywords',
 86 | 	    value: function getKeywords(uri, opts) {
 87 | 	      var config = (0, _config2['default'])(opts);
 88 | 	      var cb = typeof arguments[arguments.length - 1] === "function" ? arguments[arguments.length - 1] : undefined;
 89 | 	      return _utils2['default'].httpGet(uri).then(function (res) {
 90 | 	        var result = (0, _methods2['default'])(res.text, config);
 91 | 	        if (cb) {
 92 | 	          cb(result);
 93 | 	        }
 94 | 	        return result;
 95 | 	      })['catch'](function () {
 96 | 	        var result = (0, _methods2['default'])(uri, config);
 97 | 	        if (cb) {
 98 | 	          cb(result);
 99 | 	        }
100 | 	        return result;
101 | 	      });
102 | 	    }
103 | 	  }, {
104 | 	    key: 'getDomain',
105 | 	    value: function getDomain(uri) {
106 | 	      return _utils2['default'].getDomainString(uri);
107 | 	    }
108 | 	  }]);
109 | 
110 | 	  return Topick;
111 | 	})();
112 | 
113 | 	exports['default'] = Topick;
114 | 	module.exports = exports['default'];
115 | 
116 | /***/ },
117 | /* 2 */
118 | /***/ function(module, exports, __webpack_require__) {
119 | 
120 | 	'use strict';
121 | 
122 | 	Object.defineProperty(exports, '__esModule', {
123 | 	  value: true
124 | 	});
125 | 
126 | 	var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ('value' in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })();
127 | 
128 | 	function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { 'default': obj }; }
129 | 
130 | 	function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError('Cannot call a class as a function'); } }
131 | 
132 | 	var _nlp_compromise = __webpack_require__(3);
133 | 
134 | 	var _nlp_compromise2 = _interopRequireDefault(_nlp_compromise);
135 | 
136 | 	var _superagentBluebirdPromise = __webpack_require__(4);
137 | 
138 | 	var _superagentBluebirdPromise2 = _interopRequireDefault(_superagentBluebirdPromise);
139 | 
140 | 	var _htmlparser2 = __webpack_require__(5);
141 | 
142 | 	var _htmlparser22 = _interopRequireDefault(_htmlparser2);
143 | 
144 | 	var _textMiner = __webpack_require__(6);
145 | 
146 | 	var _textMiner2 = _interopRequireDefault(_textMiner);
147 | 
148 | 	var Utils = (function () {
149 | 	  function Utils() {
150 | 	    _classCallCheck(this, Utils);
151 | 	  }
152 | 
153 | 	  _createClass(Utils, null, [{
154 | 	    key: 'httpGet',
155 | 
156 | 	    // returns a get request wrapped in a promise
157 | 	    value: function httpGet(uri) {
158 | 	      return _superagentBluebirdPromise2['default'].get(uri);
159 | 	    }
160 | 	  }, {
161 | 	    key: 'httpGetSync',
162 | 	    value: function httpGetSync(uri) {
163 | 	      var req = new XMLHttpRequest();
164 | 	      req.open('GET', uri, false);
165 | 	      req.send(null);
166 | 	      return req.status === 200 ? req.responseText : uri;
167 | 	    }
168 | 
169 | 	    // parses and extracts text from the html tags supplied in opts
170 | 	  }, {
171 | 	    key: 'parseHtml',
172 | 	    value: function parseHtml(rawHtml, opts) {
173 | 	      var outputString = "";
174 | 	      var writeFlag = false;
175 | 	      var tags = opts.htmlTags;
176 | 	      var parser = new _htmlparser22['default'].Parser({
177 | 
178 | 	        onopentag: function onopentag(name, attribs) {
179 | 	          if (tags.includes(name)) {
180 | 	            writeFlag = true;
181 | 	          }
182 | 	        },
183 | 	        ontext: function ontext(text) {
184 | 	          if (writeFlag) {
185 | 	            outputString += ' ' + text;
186 | 	          }
187 | 	        },
188 | 	        onclosetag: function onclosetag(tagname) {
189 | 	          writeFlag = false;
190 | 	        }
191 | 	      }, { decodeEntities: true });
192 | 
193 | 	      parser.write(rawHtml);
194 | 	      parser.end();
195 | 	      return outputString;
196 | 	    }
197 | 
198 | 	    // custom compare function for comparing ngram objects by their count property:
199 | 	    // [{ word: 'asd', count: 3 }, { word: 'asdf', count: 2 }]
200 | 	  }, {
201 | 	    key: 'compareNGramByCount',
202 | 	    value: function compareNGramByCount(a, b) {
203 | 	      if (a.count < b.count) {
204 | 	        return -1;
205 | 	      } else if (a.count > b.count) {
206 | 	        return 1;
207 | 	      } else {
208 | 	        return 0;
209 | 	      }
210 | 	    }
211 | 
212 | 	    // this does two things
213 | 	    // first sort by count
214 | 	    // then it retrieves the actual word from each ngram object
215 | 	    // accepts array of NGram objects: [{word: 'adsf', count: 1}, ...]
216 | 	    // returns array of strings: ['asdf', ...]
217 | 	  }, {
218 | 	    key: 'sortNGrams',
219 | 	    value: function sortNGrams(ngrams, opts) {
220 | 	      return ngrams.filter(function (ngram) {
221 | 	        return ngram.size <= opts.ngram.max_size;
222 | 	      }).sort(this.compareNGramByCount).map(function (ngram) {
223 | 	        return ngram.word;
224 | 	      });
225 | 	    }
226 | 
227 | 	    // while taking the first n items, ignores duplicates
228 | 	    // accepts and returns array of strings
229 | 	  }, {
230 | 	    key: 'filterWords',
231 | 	    value: function filterWords(wordArray, opts) {
232 | 	      var output = [];
233 | 	      for (var i = wordArray.length - 1; i >= 0; i--) {
234 | 	        var currWord = wordArray[i];
235 | 	        if (output.length >= opts.maxNumberOfKeywords) {
236 | 	          return output;
237 | 	        }
238 | 	        if (output.includes(currWord)) {
239 | 	          continue;
240 | 	        }
241 | 	        if (currWord.length <= opts.minKeywordLength) {
242 | 	          continue;
243 | 	        }
244 | 	        output.push(currWord);
245 | 	      }
246 | 	      return output;
247 | 	    }
248 | 
249 | 	    // generates ngrams with settings specified by opts
250 | 	  }, {
251 | 	    key: 'generateNGrams',
252 | 	    value: function generateNGrams(text, opts) {
253 | 	      var ngrams = _nlp_compromise2['default'].ngram(text, opts.ngram).reduce(function (init, curr) {
254 | 	        return init.concat(curr);
255 | 	      });
256 | 	      if (ngrams.length <= opts.maxNumberOfKeywords && opts.progressiveGeneration && opts.ngram.min_count >= 1) {
257 | 	        opts.ngram.min_count -= 1;
258 | 	        ngrams = this.generateNGrams(text, opts);
259 | 	      }
260 | 	      return ngrams;
261 | 	    }
262 | 
263 | 	    // identifies named entities using nlp_compromise's spot function
264 | 	    // returns a single string concatenating all the named entities for further processing using ngrams
265 | 	  }, {
266 | 	    key: 'generateNamedEntitiesString',
267 | 	    value: function generateNamedEntitiesString(text) {
268 | 	      return _nlp_compromise2['default'].spot(text).map(function (kw) {
269 | 	        return kw.text;
270 | 	      }).join(" ");
271 | 	    }
272 | 
273 | 	    // this function performs cleaning on the document by:
274 | 	    // expanding contractions (from i'll to I will)
275 | 	    // removing inter punctuations (such as ? and !)
276 | 	    // removing whitespace between words
277 | 	    // removing stop words using the default stop word dictionary
278 | 	    // removing custom stop words specified in the user supplied opts
279 | 	  }, {
280 | 	    key: 'clean',
281 | 	    value: function clean(text, opts) {
282 | 	      var c = new _textMiner2['default'].Corpus([_textMiner2['default'].utils.expandContractions(text)]).removeInterpunctuation().clean();
283 | 	      if (opts.useDefaultStopWords === true) {
284 | 	        var stop_words = __webpack_require__(7).stop_words;
285 | 	        c = c.removeWords(stop_words, 'gi');
286 | 	      }
287 | 	      var custom_stop_words = opts.customStopWords;
288 | 	      if (custom_stop_words) {
289 | 	        c = c.removeWords(custom_stop_words);
290 | 	      }
291 | 	      return c.documents[0];
292 | 	    }
293 | 
294 | 	    // given a uri string http://google.com
295 | 	    // return 'google'
296 | 	  }, {
297 | 	    key: 'getDomainString',
298 | 	    value: function getDomainString(uri) {
299 | 	      var domain = undefined;
300 | 	      // find & remove protocol (http, ftp, etc.) and get domain
301 | 	      if (uri.indexOf("://") > -1) {
302 | 	        domain = uri.split('/')[2];
303 | 	      } else {
304 | 	        domain = uri.split('/')[0];
305 | 	      }
306 | 	      // find & remove port number
307 | 	      // find and remove TLD
308 | 	      var splitDomain = domain.split(":")[0].split(".");
309 | 	      if (splitDomain.length >= 2) {
310 | 	        return splitDomain[splitDomain.length - 2];
311 | 	      } else {
312 | 	        return splitDomain[0];
313 | 	      }
314 | 	    }
315 | 	  }]);
316 | 
317 | 	  return Utils;
318 | 	})();
319 | 
320 | 	exports['default'] = Utils;
321 | 	module.exports = exports['default'];
322 | 
323 | /***/ },
324 | /* 3 */
325 | /***/ function(module, exports) {
326 | 
327 | 	module.exports = require("nlp_compromise");
328 | 
329 | /***/ },
330 | /* 4 */
331 | /***/ function(module, exports) {
332 | 
333 | 	module.exports = require("superagent-bluebird-promise");
334 | 
335 | /***/ },
336 | /* 5 */
337 | /***/ function(module, exports) {
338 | 
339 | 	module.exports = require("htmlparser2");
340 | 
341 | /***/ },
342 | /* 6 */
343 | /***/ function(module, exports) {
344 | 
345 | 	module.exports = require("text-miner");
346 | 
347 | /***/ },
348 | /* 7 */
349 | /***/ function(module, exports) {
350 | 
351 | 	"use strict";
352 | 
353 | 	Object.defineProperty(exports, "__esModule", {
354 | 	  value: true
355 | 	});
356 | 	var stop_words = ["﻿able", "about", "above", "abroad", "according", "accordingly", "across", "actually", "adj", "after", "afterwards", "again", "against", "ago", "ahead", "ain't", "all", "allow", "allows", "almost", "alone", "along", "alongside", "already", "also", "although", "always", "am", "amid", "amidst", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "a's", "aside", "ask", "asking", "associated", "at", "available", "away", "awfully", "back", "backward", "backwards", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "begin", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "but", "by", "came", "can", "cannot", "cant", "can't", "caption", "cause", "causes", "certain", "certainly", "changes", "clearly", "c'mon", "co", "co.", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "c's", "currently", "dare", "daren't", "definitely", "described", "despite", "did", "didn't", "different", "directly", "do", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "during", "each", "edu", "eg", "eight", "eighty", "either", "else", "elsewhere", "end", "ending", "enough", "entirely", "especially", "et", "etc", "even", "ever", "evermore", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "fairly", "far", "farther", "few", "fewer", "fifth", "first", "five", "followed", "following", "follows", "for", "forever", "former", "formerly", "forth", "forward", "found", "four", "from", "further", "furthermore", "get", "gets", "getting", "given", "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "had", "hadn't", "half", "happens", "hardly", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "here's", "hereupon", "hers", "herself", "he's", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "hundred", "i'd", "ie", "if", "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc", "inc.", "indeed", "indicate", "indicated", "indicates", "inner", "inside", "insofar", "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "its", "it's", "itself", "i've", "just", "k", "keep", "keeps", "kept", "know", "known", "knows", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like", "liked", "likely", "likewise", "little", "look", "looking", "looks", "low", "lower", "ltd", "made", "mainly", "make", "makes", "many", "may", "maybe", "mayn't", "me", "mean", "meantime", "meanwhile", "merely", "might", "mightn't", "mine", "minus", "miss", "more", "moreover", "most", "mostly", "mr", "mrs", "much", "must", "mustn't", "my", "myself", "name", "namely", "nd", "near", "nearly", "necessary", "need", "needn't", "needs", "neither", "never", "neverf", "neverless", "nevertheless", "new", "next", "nine", "ninety", "no", "nobody", "non", "none", "nonetheless", "noone", "no-one", "nor", "normally", "not", "nothing", "notwithstanding", "novel", "now", "nowhere", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones", "one's", "only", "onto", "opposite", "or", "other", "others", "otherwise", "ought", "oughtn't", "our", "ours", "ourselves", "out", "outside", "over", "overall", "own", "particular", "particularly", "past", "per", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provided", "provides", "que", "quite", "qv", "rather", "rd", "re", "really", "reasonably", "recent", "recently", "regarding", "regardless", "regards", "relatively", "respectively", "right", "round", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "since", "six", "so", "some", "somebody", "someday", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure", "take", "taken", "taking", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "that'll", "thats", "that's", "that've", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "there'd", "therefore", "therein", "there'll", "there're", "theres", "there's", "thereupon", "there've", "these", "they", "they'd", "they'll", "they're", "they've", "thing", "things", "think", "third", "thirty", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "till", "to", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", "try", "trying", "t's", "twice", "two", "un", "under", "underneath", "undoing", "unfortunately", "unless", "unlike", "unlikely", "until", "unto", "up", "upon", "upwards", "us", "use", "used", "useful", "uses", "using", "usually", "v", "value", "various", "versus", "very", "via", "viz", "vs", "want", "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've", "what", "whatever", "what'll", "what's", "what've", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "where's", "whereupon", "wherever", "whether", "which", "whichever", "while", "whilst", "whither", "who", "who'd", "whoever", "whole", "who'll", "whom", "whomever", "who's", "whose", "why", "will", "willing", "wish", "with", "within", "without", "wonder", "won't", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your", "you're", "yours", "yourself", "yourselves", "you've", "zero", "﻿a", "how's", "i", "ours ", "when's", "why's", "a", "able", "b", "c", "d", "e", "f", "g", "h", "j", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "uucp", "w", "x", "y", "z", "zero", "﻿I", "www", "amoungst", "amount", "bill", "bottom", "call", "computer", "con", "couldnt", "cry", "de", "describe", "detail", "due", "eleven", "empty", "fifteen", "fify", "fill", "find", "fire", "forty", "front", "full", "give", "hasnt", "herse”", "himse”", "interest", "itse”", "mill", "move", "myse”", "part", "put", "show", "side", "sincere", "sixty", "system", "ten", "thick", "thin", "top", "twelve", "twenty", "abst", "accordance", "act", "added", "adopted", "affected", "affecting", "affects", "ah", "announce", "anymore", "apparently", "approximately", "aren", "arent", "arise", "auth", "beginning", "beginnings", "begins", "biol", "briefly", "ca", "date", "ed", "effect", "et-al", "ff", "fix", "gave", "giving", "hed", "heres", "hes", "hid", "home", "id", "im", "immediately", "importance", "important", "index", "information", "invention", "itd", "keys", "kg", "km", "largely", "lets", "line", "'ll", "means", "mg", "million", "ml", "mug", "na", "nay", "necessarily", "nos", "noted", "obtain", "obtained", "omitted", "ord", "owing", "page", "pages", "poorly", "possibly", "potentially", "pp", "predominantly", "present", "previously", "primarily", "promptly", "proud", "quickly", "ran", "readily", "ref", "refs", "related", "research", "resulted", "resulting", "results", "run", "sec", "section", "shed", "shes", "showed", "shown", "showns", "shows", "significant", "significantly", "similar", "similarly", "slightly", "somethan", "specifically", "state", "states", "stop", "strongly", "substantially", "successfully", "sufficiently", "suggest", "thered", "thereof", "therere", "thereto", "theyd", "theyre", "thou", "thoughh", "thousand", "throug", "til", "tip", "ts", "ups", "usefully", "usefulness", "'ve", "vol", "vols", "wed", "whats", "wheres", "whim", "whod", "whos", "widely", "words", "world", "youd", "youre"];
357 | 	exports.stop_words = stop_words;
358 | 
359 | /***/ },
360 | /* 8 */
361 | /***/ function(module, exports, __webpack_require__) {
362 | 
363 | 	"use strict";
364 | 
365 | 	Object.defineProperty(exports, "__esModule", {
366 | 	  value: true
367 | 	});
368 | 
369 | 	var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })();
370 | 
371 | 	function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
372 | 
373 | 	function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
374 | 
375 | 	var _utils = __webpack_require__(2);
376 | 
377 | 	var _utils2 = _interopRequireDefault(_utils);
378 | 
379 | 	exports["default"] = function (text, opts) {
380 | 
381 | 	  var method = opts.method;
382 | 	  var cleanedText = _utils2["default"].clean(_utils2["default"].parseHtml(text, opts), opts);
383 | 
384 | 	  switch (method) {
385 | 
386 | 	    case "combined":
387 | 	      return Methods.combineNGramsAndNamedEntities(cleanedText, opts);
388 | 	    case "ngram":
389 | 	      return Methods.useNGrams(cleanedText, opts);
390 | 	    case "namedentites":
391 | 	      return Methods.useNamedEntities(cleanedText, ops);
392 | 	    default:
393 | 	      return Methods.combineNGramsAndNamedEntities(cleanedText, opts);
394 | 
395 | 	  }
396 | 	};
397 | 
398 | 	var Methods = (function () {
399 | 	  function Methods() {
400 | 	    _classCallCheck(this, Methods);
401 | 	  }
402 | 
403 | 	  _createClass(Methods, null, [{
404 | 	    key: "useNGrams",
405 | 	    value: function useNGrams(text, opts) {
406 | 	      return _utils2["default"].filterWords(_utils2["default"].sortNGrams(_utils2["default"].generateNGrams(text, opts), opts), opts);
407 | 	    }
408 | 	  }, {
409 | 	    key: "useNamedEntities",
410 | 	    value: function useNamedEntities(text, opts) {
411 | 	      return _utils2["default"].useNGrams(_utils2["default"].generateNamedEntitiesString(text), opts);
412 | 	    }
413 | 	  }, {
414 | 	    key: "combineNGramsAndNamedEntities",
415 | 	    value: function combineNGramsAndNamedEntities(text, opts) {
416 | 	      return _utils2["default"].filterWords(_utils2["default"].sortNGrams(_utils2["default"].generateNGrams(text, opts).concat(_utils2["default"].generateNGrams(_utils2["default"].generateNamedEntitiesString(text), opts)), opts), opts);
417 | 	    }
418 | 	  }]);
419 | 
420 | 	  return Methods;
421 | 	})();
422 | 
423 | 	module.exports = exports["default"];
424 | 
425 | /***/ },
426 | /* 9 */
427 | /***/ function(module, exports, __webpack_require__) {
428 | 
429 | 	'use strict';
430 | 
431 | 	Object.defineProperty(exports, '__esModule', {
432 | 	  value: true
433 | 	});
434 | 	exports['default'] = Config;
435 | 
436 | 	function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { 'default': obj }; }
437 | 
438 | 	var _default_config = __webpack_require__(10);
439 | 
440 | 	var _default_config2 = _interopRequireDefault(_default_config);
441 | 
442 | 	var _deepmerge = __webpack_require__(11);
443 | 
444 | 	var _deepmerge2 = _interopRequireDefault(_deepmerge);
445 | 
446 | 	function Config(opts) {
447 | 	  return (0, _deepmerge2['default'])(_default_config2['default'], opts || {});
448 | 	}
449 | 
450 | 	module.exports = exports['default'];
451 | 
452 | /***/ },
453 | /* 10 */
454 | /***/ function(module, exports) {
455 | 
456 | 	'use strict';
457 | 
458 | 	module.exports = {
459 | 
460 | 	  htmlTags: ['p', 'b', 'em', 'title'],
461 | 	  method: 'combined',
462 | 	  useDefaultStopWords: true,
463 | 	  maxNumberOfKeywords: 10,
464 | 	  minKeywordLength: 3,
465 | 	  ngram: {
466 | 	    min_count: 3,
467 | 	    max_size: 1
468 | 	  },
469 | 	  progressiveGeneration: true
470 | 	};
471 | 
472 | /***/ },
473 | /* 11 */
474 | /***/ function(module, exports, __webpack_require__) {
475 | 
476 | 	var __WEBPACK_AMD_DEFINE_FACTORY__, __WEBPACK_AMD_DEFINE_RESULT__;(function (root, factory) {
477 | 	    if (true) {
478 | 	        !(__WEBPACK_AMD_DEFINE_FACTORY__ = (factory), __WEBPACK_AMD_DEFINE_RESULT__ = (typeof __WEBPACK_AMD_DEFINE_FACTORY__ === 'function' ? (__WEBPACK_AMD_DEFINE_FACTORY__.call(exports, __webpack_require__, exports, module)) : __WEBPACK_AMD_DEFINE_FACTORY__), __WEBPACK_AMD_DEFINE_RESULT__ !== undefined && (module.exports = __WEBPACK_AMD_DEFINE_RESULT__));
479 | 	    } else if (typeof exports === 'object') {
480 | 	        module.exports = factory();
481 | 	    } else {
482 | 	        root.deepmerge = factory();
483 | 	    }
484 | 	}(this, function () {
485 | 
486 | 	return function deepmerge(target, src) {
487 | 	    var array = Array.isArray(src);
488 | 	    var dst = array && [] || {};
489 | 
490 | 	    if (array) {
491 | 	        target = target || [];
492 | 	        dst = dst.concat(target);
493 | 	        src.forEach(function(e, i) {
494 | 	            if (typeof dst[i] === 'undefined') {
495 | 	                dst[i] = e;
496 | 	            } else if (typeof e === 'object') {
497 | 	                dst[i] = deepmerge(target[i], e);
498 | 	            } else {
499 | 	                if (target.indexOf(e) === -1) {
500 | 	                    dst.push(e);
501 | 	                }
502 | 	            }
503 | 	        });
504 | 	    } else {
505 | 	        if (target && typeof target === 'object') {
506 | 	            Object.keys(target).forEach(function (key) {
507 | 	                dst[key] = target[key];
508 | 	            })
509 | 	        }
510 | 	        Object.keys(src).forEach(function (key) {
511 | 	            if (typeof src[key] !== 'object' || !src[key]) {
512 | 	                dst[key] = src[key];
513 | 	            }
514 | 	            else {
515 | 	                if (!target[key]) {
516 | 	                    dst[key] = src[key];
517 | 	                } else {
518 | 	                    dst[key] = deepmerge(target[key], src[key]);
519 | 	                }
520 | 	            }
521 | 	        });
522 | 	    }
523 | 
524 | 	    return dst;
525 | 	}
526 | 
527 | 	}));
528 | 
529 | 
530 | /***/ }
531 | /******/ ]);


--------------------------------------------------------------------------------