├── .gitignore ├── src ├── config.js ├── default_config.js ├── index.js ├── methods.js ├── utils.js └── stop_words.js ├── webpack.config.js ├── package.json ├── README.md └── dist └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .tgz 3 | npm-debug.log -------------------------------------------------------------------------------- /src/config.js: -------------------------------------------------------------------------------- 1 | import DefaultConfig from './default_config' 2 | import deepmerge from 'deepmerge' 3 | 4 | export default function Config(opts) { 5 | return deepmerge(DefaultConfig, opts || {}) 6 | } -------------------------------------------------------------------------------- /src/default_config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | 3 | htmlTags: ['p', 'b', 'em', 'title'], 4 | method: 'combined', 5 | useDefaultStopWords: true, 6 | maxNumberOfKeywords: 10, 7 | minKeywordLength: 3, 8 | ngram: { 9 | min_count: 3, 10 | max_size: 1 11 | }, 12 | progressiveGeneration: true 13 | } -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import Utils from './utils' 2 | import startProcess from './methods' 3 | import Config from './config' 4 | 5 | export default class Topick { 6 | 7 | static getKeywords(uri,opts) { 8 | const config = Config(opts) 9 | let cb = (typeof arguments[arguments.length-1] === "function") ? arguments[arguments.length-1] : undefined 10 | return Utils.httpGet(uri) 11 | .then((res) => { 12 | let result = startProcess(res.text,config) 13 | if (cb) { cb(result) } 14 | return result 15 | }) 16 | .catch(() => { 17 | let result = startProcess(uri,config) 18 | if (cb) { cb(result) } 19 | return result 20 | }) 21 | } 22 | 23 | static getDomain(uri) { 24 | return Utils.getDomainString(uri) 25 | } 26 | 27 | } -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | var webpack = require('webpack') 2 | var path = require('path') 3 | 4 | module.exports = { 5 | entry: [ 6 | path.resolve(__dirname, 'src/index.js') 7 | ], 8 | 9 | output: { 10 | path: path.resolve(__dirname, 'dist'), 11 | filename: "index.js", 12 | library: "topick", 13 | libraryTarget: "commonjs2" 14 | }, 15 | 16 | externals: { 17 | "nlp_compromise": "nlp_compromise", 18 | "superagent-bluebird-promise": "superagent-bluebird-promise", 19 | "htmlparser2": "htmlparser2", 20 | "text-miner": "text-miner" 21 | }, 22 | 23 | module: { 24 | loaders: [ 25 | { 26 | test: /\.js$/, 27 | loaders: ['babel-loader'], 28 | exclude: /node_modules/, 29 | include: path.resolve(__dirname, 'src') 30 | } 31 | ] 32 | } 33 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "topick", 3 | "version": "0.1.3", 4 | "author": "Lau Siaw Young ", 5 | "description": "One trick pony NLP library for extracting keywords from HTML documents", 6 | "main": "./dist/index.js", 7 | "dependencies": { 8 | "deepmerge": "^0.2.10", 9 | "htmlparser2": "^3.8.3", 10 | "nlp_compromise": "^1.1.2", 11 | "superagent-bluebird-promise": "^2.0.2", 12 | "text-miner": "^1.0.3" 13 | }, 14 | "devDependencies": { 15 | "babel-core": "^5.8.3", 16 | "babel-loader": "^5.3.2", 17 | "webpack": "^1.10.3", 18 | "webpack-dev-server": "^1.10.1" 19 | }, 20 | "repository": { 21 | "type": "git", 22 | "url": "git@github.com:siawyoung/topick.git" 23 | }, 24 | "bugs": { 25 | "url": "https://github.com/siawyoung/topick/issues" 26 | }, 27 | "keywords": [ 28 | "nlp", 29 | "keywords", 30 | "tags", 31 | "extract" 32 | ], 33 | "scripts": { 34 | "start": "webpack-dev-server -d", 35 | "clean": "rm -rf dist/", 36 | "test": "echo \"Error: no test specified\" && exit 1" 37 | }, 38 | "license": "MIT" 39 | } 40 | -------------------------------------------------------------------------------- /src/methods.js: -------------------------------------------------------------------------------- 1 | import Utils from './utils' 2 | 3 | export default function(text,opts) { 4 | 5 | let method = opts.method 6 | let cleanedText = Utils.clean(Utils.parseHtml(text, opts), opts) 7 | 8 | switch(method) { 9 | 10 | case "combined": 11 | return Methods.combineNGramsAndNamedEntities(cleanedText,opts) 12 | case "ngram": 13 | return Methods.useNGrams(cleanedText,opts) 14 | case "namedentites": 15 | return Methods.useNamedEntities(cleanedText,ops) 16 | default: 17 | return Methods.combineNGramsAndNamedEntities(cleanedText,opts) 18 | 19 | } 20 | 21 | } 22 | 23 | class Methods { 24 | 25 | static useNGrams(text,opts) { 26 | return Utils.filterWords(Utils.sortNGrams(Utils.generateNGrams(text,opts), opts), opts) 27 | } 28 | 29 | static useNamedEntities(text,opts) { 30 | return Utils.useNGrams(Utils.generateNamedEntitiesString(text),opts) 31 | } 32 | 33 | static combineNGramsAndNamedEntities(text,opts) { 34 | return Utils.filterWords(Utils.sortNGrams( 35 | Utils.generateNGrams(text,opts) 36 | .concat(Utils.generateNGrams(Utils.generateNamedEntitiesString(text), opts)), opts 37 | ), opts) 38 | } 39 | 40 | } -------------------------------------------------------------------------------- /src/utils.js: -------------------------------------------------------------------------------- 1 | import nlp from 'nlp_compromise' 2 | import request from 'superagent-bluebird-promise' 3 | import htmlparser from 'htmlparser2' 4 | import tm from 'text-miner' 5 | 6 | export default class Utils { 7 | 8 | // returns a get request wrapped in a promise 9 | static httpGet(uri) { 10 | return request.get(uri) 11 | } 12 | 13 | static httpGetSync(uri) { 14 | let req = new XMLHttpRequest() 15 | req.open('GET', uri, false) 16 | req.send(null) 17 | return req.status === 200 ? req.responseText : uri 18 | } 19 | 20 | // parses and extracts text from the html tags supplied in opts 21 | static parseHtml(rawHtml,opts) { 22 | let outputString = "" 23 | let writeFlag = false 24 | let tags = opts.htmlTags 25 | let parser = new htmlparser.Parser({ 26 | 27 | onopentag: (name, attribs) => { 28 | if (tags.includes(name)) { 29 | writeFlag = true 30 | } 31 | }, 32 | ontext: (text) => { 33 | if (writeFlag) { 34 | outputString += ` ${text}` 35 | } 36 | }, 37 | onclosetag: (tagname) => { 38 | writeFlag = false 39 | } 40 | }, {decodeEntities: true}) 41 | 42 | parser.write(rawHtml) 43 | parser.end() 44 | return outputString 45 | } 46 | 47 | // custom compare function for comparing ngram objects by their count property: 48 | // [{ word: 'asd', count: 3 }, { word: 'asdf', count: 2 }] 49 | static compareNGramByCount(a,b) { 50 | if (a.count < b.count) { return -1 } 51 | else if (a.count > b.count) { return 1 } 52 | else { return 0 } 53 | } 54 | 55 | // this does two things 56 | // first sort by count 57 | // then it retrieves the actual word from each ngram object 58 | // accepts array of NGram objects: [{word: 'adsf', count: 1}, ...] 59 | // returns array of strings: ['asdf', ...] 60 | static sortNGrams(ngrams,opts) { 61 | return ngrams.filter((ngram) => ngram.size <= opts.ngram.max_size).sort(this.compareNGramByCount).map((ngram) => ngram.word) 62 | } 63 | 64 | // while taking the first n items, ignores duplicates 65 | // accepts and returns array of strings 66 | static filterWords(wordArray,opts) { 67 | let output = [] 68 | for (var i = wordArray.length - 1; i >= 0; i--) { 69 | let currWord = wordArray[i] 70 | if (output.length >= opts.maxNumberOfKeywords) { return output } 71 | if (output.includes(currWord)) { continue } 72 | if (currWord.length <= opts.minKeywordLength) { continue } 73 | output.push(currWord) 74 | } 75 | return output 76 | } 77 | 78 | // generates ngrams with settings specified by opts 79 | static generateNGrams(text,opts) { 80 | let ngrams = nlp.ngram(text, opts.ngram).reduce((init,curr) => init.concat(curr)) 81 | if ( ngrams.length <= opts.maxNumberOfKeywords && opts.progressiveGeneration && opts.ngram.min_count >= 1 ) { 82 | opts.ngram.min_count -= 1 83 | ngrams = this.generateNGrams(text,opts) 84 | } 85 | return ngrams 86 | } 87 | 88 | // identifies named entities using nlp_compromise's spot function 89 | // returns a single string concatenating all the named entities for further processing using ngrams 90 | static generateNamedEntitiesString(text) { 91 | return nlp.spot(text).map((kw) => { return kw.text }).join(" ") 92 | } 93 | 94 | // this function performs cleaning on the document by: 95 | // expanding contractions (from i'll to I will) 96 | // removing inter punctuations (such as ? and !) 97 | // removing whitespace between words 98 | // removing stop words using the default stop word dictionary 99 | // removing custom stop words specified in the user supplied opts 100 | static clean(text,opts) { 101 | let c = new tm.Corpus([tm.utils.expandContractions(text)]).removeInterpunctuation().clean() 102 | if (opts.useDefaultStopWords === true) { 103 | let stop_words = require("./stop_words").stop_words 104 | c = c.removeWords(stop_words, 'gi') 105 | } 106 | let custom_stop_words = opts.customStopWords 107 | if (custom_stop_words) { 108 | c = c.removeWords(custom_stop_words) 109 | } 110 | return c.documents[0]; 111 | } 112 | 113 | 114 | // given a uri string http://google.com 115 | // return 'google' 116 | static getDomainString(uri) { 117 | let domain; 118 | // find & remove protocol (http, ftp, etc.) and get domain 119 | if (uri.indexOf("://") > -1) { 120 | domain = uri.split('/')[2] 121 | } else { 122 | domain = uri.split('/')[0] 123 | } 124 | // find & remove port number 125 | // find and remove TLD 126 | let splitDomain = domain.split(":")[0].split(".") 127 | if (splitDomain.length >= 2) { 128 | return splitDomain[splitDomain.length-2] 129 | } else { 130 | return splitDomain[0] 131 | } 132 | } 133 | 134 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Topick 2 | 3 | One trick pony NLP library for extracting keywords from HTML documents. It uses `htmlparser2` for HTML parsing, `nlp_compromise` for NLP and `text-miner` for text cleaning and removing stop words. 4 | 5 | Here is Topick in action, being used in a Telegram bot to autosuggest tags for links mentioned in a conversation: 6 | 7 | 8 | 9 | 10 | ## Installation 11 | 12 | `npm install topick` 13 | 14 | Topick is intended primarily for server-side use because of cross-domain issues, although I'm working on making the codebase isomorphic so that browser use is possible as well (with an appropriate module loader such as webpack). 15 | 16 | ## Usage 17 | 18 | The simplest way to use Topick: 19 | 20 | ```js 21 | import Topick from 'topick' 22 | 23 | Topick.getKeywords('http://example.com/').then((keywords) => { 24 | console.log(keywords); // ['most relevant keyword', 'very relevant keyword', 'somewhat relevant keyword'] 25 | // do something with your keywords 26 | }) 27 | ``` 28 | 29 | The keywords are arranged in order of decreasing relevance. 30 | 31 | ### `getKeywords(uri[,opts,cb])` 32 | 33 | #### Options 34 | 35 | `getKeywords` takes either a valid `HTTP` URI, or a HTML string, and returns a promise that can be resolved appropriately. `getKeywords` also accepts an optional options object: 36 | 37 | ```js 38 | Topick.getKeywords('http://example.com/', { 39 | htmlTags: ['p'], 40 | ngram: { 41 | min_count: 4, 42 | max_size: 2 43 | } 44 | }).then((keywords) => { 45 | console.log(keywords); 46 | }) 47 | ``` 48 | 49 | Currently available options are: 50 | 51 | ##### `htmlTags` 52 | 53 | Default: `['p', 'b', 'em', 'title']` 54 | 55 | An array of HTML tags that should be parsed. 56 | 57 | #### `method` 58 | 59 | Default: `combined` 60 | 61 | Topick includes three methods for generating keywords. 62 | 63 | `ngram` 64 | 65 | Generates n-grams from the content string and ranks them in terms of frequency. 66 | 67 | `namedentities` 68 | 69 | Uses `nlp_compromise`'s `spot` method to identify [named entities](https://en.wikipedia.org/wiki/Named-entity_recognition) before generating n-grams based on these named entities. 70 | 71 | `combined` 72 | 73 | Runs both `ngram` and `namedentities` methods, then combines their ranking. 74 | 75 | ##### `useDefaultStopWords` 76 | 77 | Default: `true` 78 | 79 | If true, uses Topick's internal stop words dictionary to remove stop words. If false, no stop word removal will be performed unless you supply your own stop word array (see `customStopWords`). 80 | 81 | Topick's dictionary is a set union of all six English collections found [here](https://code.google.com/p/stop-words/). 82 | 83 | ##### `customStopWords` 84 | 85 | Default: `[]` 86 | 87 | An array of strings that should be used as stop words. This has no bearing on `useDefaultStopWords`, although it should be populated with your own stop word array if `useDefaultStopWords` is set to `false`, else Topick will generate a lot of irrelevant keywords. 88 | 89 | ##### `maxNumberOfKeywords` 90 | 91 | Default: 10 92 | 93 | Maximum number of keywords to generate. 94 | 95 | ##### `minKeywordLength` 96 | 97 | Default: 3 98 | 99 | Minimum length of generated keywords. 100 | 101 | ##### `ngram` 102 | 103 | Default: 104 | 105 | ``` 106 | { min_count: 3, max_size: 1 } 107 | ``` 108 | 109 | Defines options for n-gram generation. 110 | 111 | `min_count` is the minimum number of times a particular n-gram should appear in the document before being considered. There should be no need to change this number. 112 | 113 | `max_size` is the maximum size of n-grams that should be generated (defaults to generating unigrams). 114 | 115 | ##### `progressiveGeneration` 116 | 117 | Default: `true` 118 | 119 | If set to true, `progressiveGeneration` will progressively generate n-grams with weaker settings until the specified number of keywords set in `maxNumberOfKeywords` is hit. 120 | 121 | For example: for a `min_count` of 3 and `maxNumberOfKeywords` of 10, Topick only generates 5 keywords, then `progressiveGeneration` will decrease the `min_count` to 2, and then to 1, until 10 keywords can be generated. 122 | 123 | `progressiveGeneration` does not guarantee that `maxNumberOfKeywords` keywords will be generated (like if even at `min_count` of 1, your specified `maxNumberOfKeywords` still cannot be reached). 124 | 125 | #### Callback 126 | 127 | In case you're not familar with promises or are unable to use them, `getKeywords` also accepts a callback function as its **last** argument: 128 | 129 | ```js 130 | topick.getKeywords("http://example.com", { 131 | customStopWords: [] 132 | }, (keywords) => { 133 | console.log("This is the callback function"); 134 | console.log(keywords); 135 | }) 136 | .then((keywords) => { 137 | console.log("This is the promise"); 138 | console.log(keywords) 139 | }) 140 | 141 | // "This is the callback function" 142 | // ["cool keyword", "another cool keyword"] 143 | // "This is the promise" 144 | // ["cool keyword", "another cool keyword"] 145 | ``` 146 | 147 | Notice that regardless of whether a callback function is specified, `getKeywords` continues to return a Promise. 148 | 149 | ### `getKeywordsSync(uri[, opts])` 150 | 151 | There are no plans to support a synchronous version of `getKeywords`. 152 | 153 | ### `getDomain(uri)` 154 | 155 | Example: 156 | 157 | ```js 158 | Topick.getDomain('http://example.com') 159 | ``` 160 | 161 | Given `http://example.com`, returns `example`. Removes URI scheme, port number, and TLD. 162 | 163 | ## Contributing 164 | 165 | Contributions are welcome! 166 | 167 | Topick is written in ES6 wherever possible. The development workflow is centered primarily around webpack, so be sure to check out `webpack.config.js`. -------------------------------------------------------------------------------- /src/stop_words.js: -------------------------------------------------------------------------------- 1 | export const stop_words = ["able","about","above","abroad","according","accordingly","across","actually","adj","after","afterwards","again","against","ago","ahead","ain't","all","allow","allows","almost","alone","along","alongside","already","also","although","always","am","amid","amidst","among","amongst","an","and","another","any","anybody","anyhow","anyone","anything","anyway","anyways","anywhere","apart","appear","appreciate","appropriate","are","aren't","around","as","a's","aside","ask","asking","associated","at","available","away","awfully","back","backward","backwards","be","became","because","become","becomes","becoming","been","before","beforehand","begin","behind","being","believe","below","beside","besides","best","better","between","beyond","both","brief","but","by","came","can","cannot","cant","can't","caption","cause","causes","certain","certainly","changes","clearly","c'mon","co","co.","com","come","comes","concerning","consequently","consider","considering","contain","containing","contains","corresponding","could","couldn't","course","c's","currently","dare","daren't","definitely","described","despite","did","didn't","different","directly","do","does","doesn't","doing","done","don't","down","downwards","during","each","edu","eg","eight","eighty","either","else","elsewhere","end","ending","enough","entirely","especially","et","etc","even","ever","evermore","every","everybody","everyone","everything","everywhere","ex","exactly","example","except","fairly","far","farther","few","fewer","fifth","first","five","followed","following","follows","for","forever","former","formerly","forth","forward","found","four","from","further","furthermore","get","gets","getting","given","gives","go","goes","going","gone","got","gotten","greetings","had","hadn't","half","happens","hardly","has","hasn't","have","haven't","having","he","he'd","he'll","hello","help","hence","her","here","hereafter","hereby","herein","here's","hereupon","hers","herself","he's","hi","him","himself","his","hither","hopefully","how","howbeit","however","hundred","i'd","ie","if","ignored","i'll","i'm","immediate","in","inasmuch","inc","inc.","indeed","indicate","indicated","indicates","inner","inside","insofar","instead","into","inward","is","isn't","it","it'd","it'll","its","it's","itself","i've","just","k","keep","keeps","kept","know","known","knows","last","lately","later","latter","latterly","least","less","lest","let","let's","like","liked","likely","likewise","little","look","looking","looks","low","lower","ltd","made","mainly","make","makes","many","may","maybe","mayn't","me","mean","meantime","meanwhile","merely","might","mightn't","mine","minus","miss","more","moreover","most","mostly","mr","mrs","much","must","mustn't","my","myself","name","namely","nd","near","nearly","necessary","need","needn't","needs","neither","never","neverf","neverless","nevertheless","new","next","nine","ninety","no","nobody","non","none","nonetheless","noone","no-one","nor","normally","not","nothing","notwithstanding","novel","now","nowhere","obviously","of","off","often","oh","ok","okay","old","on","once","one","ones","one's","only","onto","opposite","or","other","others","otherwise","ought","oughtn't","our","ours","ourselves","out","outside","over","overall","own","particular","particularly","past","per","perhaps","placed","please","plus","possible","presumably","probably","provided","provides","que","quite","qv","rather","rd","re","really","reasonably","recent","recently","regarding","regardless","regards","relatively","respectively","right","round","said","same","saw","say","saying","says","second","secondly","see","seeing","seem","seemed","seeming","seems","seen","self","selves","sensible","sent","serious","seriously","seven","several","shall","shan't","she","she'd","she'll","she's","should","shouldn't","since","six","so","some","somebody","someday","somehow","someone","something","sometime","sometimes","somewhat","somewhere","soon","sorry","specified","specify","specifying","still","sub","such","sup","sure","take","taken","taking","tell","tends","th","than","thank","thanks","thanx","that","that'll","thats","that's","that've","the","their","theirs","them","themselves","then","thence","there","thereafter","thereby","there'd","therefore","therein","there'll","there're","theres","there's","thereupon","there've","these","they","they'd","they'll","they're","they've","thing","things","think","third","thirty","this","thorough","thoroughly","those","though","three","through","throughout","thru","thus","till","to","together","too","took","toward","towards","tried","tries","truly","try","trying","t's","twice","two","un","under","underneath","undoing","unfortunately","unless","unlike","unlikely","until","unto","up","upon","upwards","us","use","used","useful","uses","using","usually","v","value","various","versus","very","via","viz","vs","want","wants","was","wasn't","way","we","we'd","welcome","well","we'll","went","were","we're","weren't","we've","what","whatever","what'll","what's","what've","when","whence","whenever","where","whereafter","whereas","whereby","wherein","where's","whereupon","wherever","whether","which","whichever","while","whilst","whither","who","who'd","whoever","whole","who'll","whom","whomever","who's","whose","why","will","willing","wish","with","within","without","wonder","won't","would","wouldn't","yes","yet","you","you'd","you'll","your","you're","yours","yourself","yourselves","you've","zero","a","how's","i","ours ","when's","why's","a","able","b","c","d","e","f","g","h","j","l","m","n","o","p","q","r","s","t","u","uucp","w","x","y","z","zero","I","www","amoungst","amount","bill","bottom","call","computer","con","couldnt","cry","de","describe","detail","due","eleven","empty","fifteen","fify","fill","find","fire","forty","front","full","give","hasnt","herse”","himse”","interest","itse”","mill","move","myse”","part","put","show","side","sincere","sixty","system","ten","thick","thin","top","twelve","twenty","abst","accordance","act","added","adopted","affected","affecting","affects","ah","announce","anymore","apparently","approximately","aren","arent","arise","auth","beginning","beginnings","begins","biol","briefly","ca","date","ed","effect","et-al","ff","fix","gave","giving","hed","heres","hes","hid","home","id","im","immediately","importance","important","index","information","invention","itd","keys","kg","km","largely","lets","line","'ll","means","mg","million","ml","mug","na","nay","necessarily","nos","noted","obtain","obtained","omitted","ord","owing","page","pages","poorly","possibly","potentially","pp","predominantly","present","previously","primarily","promptly","proud","quickly","ran","readily","ref","refs","related","research","resulted","resulting","results","run","sec","section","shed","shes","showed","shown","showns","shows","significant","significantly","similar","similarly","slightly","somethan","specifically","state","states","stop","strongly","substantially","successfully","sufficiently","suggest","thered","thereof","therere","thereto","theyd","theyre","thou","thoughh","thousand","throug","til","tip","ts","ups","usefully","usefulness","'ve","vol","vols","wed","whats","wheres","whim","whod","whos","widely","words","world","youd","youre"] -------------------------------------------------------------------------------- /dist/index.js: -------------------------------------------------------------------------------- 1 | module.exports = 2 | /******/ (function(modules) { // webpackBootstrap 3 | /******/ // The module cache 4 | /******/ var installedModules = {}; 5 | 6 | /******/ // The require function 7 | /******/ function __webpack_require__(moduleId) { 8 | 9 | /******/ // Check if module is in cache 10 | /******/ if(installedModules[moduleId]) 11 | /******/ return installedModules[moduleId].exports; 12 | 13 | /******/ // Create a new module (and put it into the cache) 14 | /******/ var module = installedModules[moduleId] = { 15 | /******/ exports: {}, 16 | /******/ id: moduleId, 17 | /******/ loaded: false 18 | /******/ }; 19 | 20 | /******/ // Execute the module function 21 | /******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__); 22 | 23 | /******/ // Flag the module as loaded 24 | /******/ module.loaded = true; 25 | 26 | /******/ // Return the exports of the module 27 | /******/ return module.exports; 28 | /******/ } 29 | 30 | 31 | /******/ // expose the modules object (__webpack_modules__) 32 | /******/ __webpack_require__.m = modules; 33 | 34 | /******/ // expose the module cache 35 | /******/ __webpack_require__.c = installedModules; 36 | 37 | /******/ // __webpack_public_path__ 38 | /******/ __webpack_require__.p = ""; 39 | 40 | /******/ // Load entry module and return exports 41 | /******/ return __webpack_require__(0); 42 | /******/ }) 43 | /************************************************************************/ 44 | /******/ ([ 45 | /* 0 */ 46 | /***/ function(module, exports, __webpack_require__) { 47 | 48 | module.exports = __webpack_require__(1); 49 | 50 | 51 | /***/ }, 52 | /* 1 */ 53 | /***/ function(module, exports, __webpack_require__) { 54 | 55 | 'use strict'; 56 | 57 | Object.defineProperty(exports, '__esModule', { 58 | value: true 59 | }); 60 | 61 | var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ('value' in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })(); 62 | 63 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { 'default': obj }; } 64 | 65 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError('Cannot call a class as a function'); } } 66 | 67 | var _utils = __webpack_require__(2); 68 | 69 | var _utils2 = _interopRequireDefault(_utils); 70 | 71 | var _methods = __webpack_require__(8); 72 | 73 | var _methods2 = _interopRequireDefault(_methods); 74 | 75 | var _config = __webpack_require__(9); 76 | 77 | var _config2 = _interopRequireDefault(_config); 78 | 79 | var Topick = (function () { 80 | function Topick() { 81 | _classCallCheck(this, Topick); 82 | } 83 | 84 | _createClass(Topick, null, [{ 85 | key: 'getKeywords', 86 | value: function getKeywords(uri, opts) { 87 | var config = (0, _config2['default'])(opts); 88 | var cb = typeof arguments[arguments.length - 1] === "function" ? arguments[arguments.length - 1] : undefined; 89 | return _utils2['default'].httpGet(uri).then(function (res) { 90 | var result = (0, _methods2['default'])(res.text, config); 91 | if (cb) { 92 | cb(result); 93 | } 94 | return result; 95 | })['catch'](function () { 96 | var result = (0, _methods2['default'])(uri, config); 97 | if (cb) { 98 | cb(result); 99 | } 100 | return result; 101 | }); 102 | } 103 | }, { 104 | key: 'getDomain', 105 | value: function getDomain(uri) { 106 | return _utils2['default'].getDomainString(uri); 107 | } 108 | }]); 109 | 110 | return Topick; 111 | })(); 112 | 113 | exports['default'] = Topick; 114 | module.exports = exports['default']; 115 | 116 | /***/ }, 117 | /* 2 */ 118 | /***/ function(module, exports, __webpack_require__) { 119 | 120 | 'use strict'; 121 | 122 | Object.defineProperty(exports, '__esModule', { 123 | value: true 124 | }); 125 | 126 | var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ('value' in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })(); 127 | 128 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { 'default': obj }; } 129 | 130 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError('Cannot call a class as a function'); } } 131 | 132 | var _nlp_compromise = __webpack_require__(3); 133 | 134 | var _nlp_compromise2 = _interopRequireDefault(_nlp_compromise); 135 | 136 | var _superagentBluebirdPromise = __webpack_require__(4); 137 | 138 | var _superagentBluebirdPromise2 = _interopRequireDefault(_superagentBluebirdPromise); 139 | 140 | var _htmlparser2 = __webpack_require__(5); 141 | 142 | var _htmlparser22 = _interopRequireDefault(_htmlparser2); 143 | 144 | var _textMiner = __webpack_require__(6); 145 | 146 | var _textMiner2 = _interopRequireDefault(_textMiner); 147 | 148 | var Utils = (function () { 149 | function Utils() { 150 | _classCallCheck(this, Utils); 151 | } 152 | 153 | _createClass(Utils, null, [{ 154 | key: 'httpGet', 155 | 156 | // returns a get request wrapped in a promise 157 | value: function httpGet(uri) { 158 | return _superagentBluebirdPromise2['default'].get(uri); 159 | } 160 | }, { 161 | key: 'httpGetSync', 162 | value: function httpGetSync(uri) { 163 | var req = new XMLHttpRequest(); 164 | req.open('GET', uri, false); 165 | req.send(null); 166 | return req.status === 200 ? req.responseText : uri; 167 | } 168 | 169 | // parses and extracts text from the html tags supplied in opts 170 | }, { 171 | key: 'parseHtml', 172 | value: function parseHtml(rawHtml, opts) { 173 | var outputString = ""; 174 | var writeFlag = false; 175 | var tags = opts.htmlTags; 176 | var parser = new _htmlparser22['default'].Parser({ 177 | 178 | onopentag: function onopentag(name, attribs) { 179 | if (tags.includes(name)) { 180 | writeFlag = true; 181 | } 182 | }, 183 | ontext: function ontext(text) { 184 | if (writeFlag) { 185 | outputString += ' ' + text; 186 | } 187 | }, 188 | onclosetag: function onclosetag(tagname) { 189 | writeFlag = false; 190 | } 191 | }, { decodeEntities: true }); 192 | 193 | parser.write(rawHtml); 194 | parser.end(); 195 | return outputString; 196 | } 197 | 198 | // custom compare function for comparing ngram objects by their count property: 199 | // [{ word: 'asd', count: 3 }, { word: 'asdf', count: 2 }] 200 | }, { 201 | key: 'compareNGramByCount', 202 | value: function compareNGramByCount(a, b) { 203 | if (a.count < b.count) { 204 | return -1; 205 | } else if (a.count > b.count) { 206 | return 1; 207 | } else { 208 | return 0; 209 | } 210 | } 211 | 212 | // this does two things 213 | // first sort by count 214 | // then it retrieves the actual word from each ngram object 215 | // accepts array of NGram objects: [{word: 'adsf', count: 1}, ...] 216 | // returns array of strings: ['asdf', ...] 217 | }, { 218 | key: 'sortNGrams', 219 | value: function sortNGrams(ngrams, opts) { 220 | return ngrams.filter(function (ngram) { 221 | return ngram.size <= opts.ngram.max_size; 222 | }).sort(this.compareNGramByCount).map(function (ngram) { 223 | return ngram.word; 224 | }); 225 | } 226 | 227 | // while taking the first n items, ignores duplicates 228 | // accepts and returns array of strings 229 | }, { 230 | key: 'filterWords', 231 | value: function filterWords(wordArray, opts) { 232 | var output = []; 233 | for (var i = wordArray.length - 1; i >= 0; i--) { 234 | var currWord = wordArray[i]; 235 | if (output.length >= opts.maxNumberOfKeywords) { 236 | return output; 237 | } 238 | if (output.includes(currWord)) { 239 | continue; 240 | } 241 | if (currWord.length <= opts.minKeywordLength) { 242 | continue; 243 | } 244 | output.push(currWord); 245 | } 246 | return output; 247 | } 248 | 249 | // generates ngrams with settings specified by opts 250 | }, { 251 | key: 'generateNGrams', 252 | value: function generateNGrams(text, opts) { 253 | var ngrams = _nlp_compromise2['default'].ngram(text, opts.ngram).reduce(function (init, curr) { 254 | return init.concat(curr); 255 | }); 256 | if (ngrams.length <= opts.maxNumberOfKeywords && opts.progressiveGeneration && opts.ngram.min_count >= 1) { 257 | opts.ngram.min_count -= 1; 258 | ngrams = this.generateNGrams(text, opts); 259 | } 260 | return ngrams; 261 | } 262 | 263 | // identifies named entities using nlp_compromise's spot function 264 | // returns a single string concatenating all the named entities for further processing using ngrams 265 | }, { 266 | key: 'generateNamedEntitiesString', 267 | value: function generateNamedEntitiesString(text) { 268 | return _nlp_compromise2['default'].spot(text).map(function (kw) { 269 | return kw.text; 270 | }).join(" "); 271 | } 272 | 273 | // this function performs cleaning on the document by: 274 | // expanding contractions (from i'll to I will) 275 | // removing inter punctuations (such as ? and !) 276 | // removing whitespace between words 277 | // removing stop words using the default stop word dictionary 278 | // removing custom stop words specified in the user supplied opts 279 | }, { 280 | key: 'clean', 281 | value: function clean(text, opts) { 282 | var c = new _textMiner2['default'].Corpus([_textMiner2['default'].utils.expandContractions(text)]).removeInterpunctuation().clean(); 283 | if (opts.useDefaultStopWords === true) { 284 | var stop_words = __webpack_require__(7).stop_words; 285 | c = c.removeWords(stop_words, 'gi'); 286 | } 287 | var custom_stop_words = opts.customStopWords; 288 | if (custom_stop_words) { 289 | c = c.removeWords(custom_stop_words); 290 | } 291 | return c.documents[0]; 292 | } 293 | 294 | // given a uri string http://google.com 295 | // return 'google' 296 | }, { 297 | key: 'getDomainString', 298 | value: function getDomainString(uri) { 299 | var domain = undefined; 300 | // find & remove protocol (http, ftp, etc.) and get domain 301 | if (uri.indexOf("://") > -1) { 302 | domain = uri.split('/')[2]; 303 | } else { 304 | domain = uri.split('/')[0]; 305 | } 306 | // find & remove port number 307 | // find and remove TLD 308 | var splitDomain = domain.split(":")[0].split("."); 309 | if (splitDomain.length >= 2) { 310 | return splitDomain[splitDomain.length - 2]; 311 | } else { 312 | return splitDomain[0]; 313 | } 314 | } 315 | }]); 316 | 317 | return Utils; 318 | })(); 319 | 320 | exports['default'] = Utils; 321 | module.exports = exports['default']; 322 | 323 | /***/ }, 324 | /* 3 */ 325 | /***/ function(module, exports) { 326 | 327 | module.exports = require("nlp_compromise"); 328 | 329 | /***/ }, 330 | /* 4 */ 331 | /***/ function(module, exports) { 332 | 333 | module.exports = require("superagent-bluebird-promise"); 334 | 335 | /***/ }, 336 | /* 5 */ 337 | /***/ function(module, exports) { 338 | 339 | module.exports = require("htmlparser2"); 340 | 341 | /***/ }, 342 | /* 6 */ 343 | /***/ function(module, exports) { 344 | 345 | module.exports = require("text-miner"); 346 | 347 | /***/ }, 348 | /* 7 */ 349 | /***/ function(module, exports) { 350 | 351 | "use strict"; 352 | 353 | Object.defineProperty(exports, "__esModule", { 354 | value: true 355 | }); 356 | var stop_words = ["able", "about", "above", "abroad", "according", "accordingly", "across", "actually", "adj", "after", "afterwards", "again", "against", "ago", "ahead", "ain't", "all", "allow", "allows", "almost", "alone", "along", "alongside", "already", "also", "although", "always", "am", "amid", "amidst", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "a's", "aside", "ask", "asking", "associated", "at", "available", "away", "awfully", "back", "backward", "backwards", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "begin", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "but", "by", "came", "can", "cannot", "cant", "can't", "caption", "cause", "causes", "certain", "certainly", "changes", "clearly", "c'mon", "co", "co.", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "c's", "currently", "dare", "daren't", "definitely", "described", "despite", "did", "didn't", "different", "directly", "do", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "during", "each", "edu", "eg", "eight", "eighty", "either", "else", "elsewhere", "end", "ending", "enough", "entirely", "especially", "et", "etc", "even", "ever", "evermore", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "fairly", "far", "farther", "few", "fewer", "fifth", "first", "five", "followed", "following", "follows", "for", "forever", "former", "formerly", "forth", "forward", "found", "four", "from", "further", "furthermore", "get", "gets", "getting", "given", "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "had", "hadn't", "half", "happens", "hardly", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "here's", "hereupon", "hers", "herself", "he's", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "hundred", "i'd", "ie", "if", "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc", "inc.", "indeed", "indicate", "indicated", "indicates", "inner", "inside", "insofar", "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "its", "it's", "itself", "i've", "just", "k", "keep", "keeps", "kept", "know", "known", "knows", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like", "liked", "likely", "likewise", "little", "look", "looking", "looks", "low", "lower", "ltd", "made", "mainly", "make", "makes", "many", "may", "maybe", "mayn't", "me", "mean", "meantime", "meanwhile", "merely", "might", "mightn't", "mine", "minus", "miss", "more", "moreover", "most", "mostly", "mr", "mrs", "much", "must", "mustn't", "my", "myself", "name", "namely", "nd", "near", "nearly", "necessary", "need", "needn't", "needs", "neither", "never", "neverf", "neverless", "nevertheless", "new", "next", "nine", "ninety", "no", "nobody", "non", "none", "nonetheless", "noone", "no-one", "nor", "normally", "not", "nothing", "notwithstanding", "novel", "now", "nowhere", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones", "one's", "only", "onto", "opposite", "or", "other", "others", "otherwise", "ought", "oughtn't", "our", "ours", "ourselves", "out", "outside", "over", "overall", "own", "particular", "particularly", "past", "per", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provided", "provides", "que", "quite", "qv", "rather", "rd", "re", "really", "reasonably", "recent", "recently", "regarding", "regardless", "regards", "relatively", "respectively", "right", "round", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "since", "six", "so", "some", "somebody", "someday", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure", "take", "taken", "taking", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "that'll", "thats", "that's", "that've", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "there'd", "therefore", "therein", "there'll", "there're", "theres", "there's", "thereupon", "there've", "these", "they", "they'd", "they'll", "they're", "they've", "thing", "things", "think", "third", "thirty", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "till", "to", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", "try", "trying", "t's", "twice", "two", "un", "under", "underneath", "undoing", "unfortunately", "unless", "unlike", "unlikely", "until", "unto", "up", "upon", "upwards", "us", "use", "used", "useful", "uses", "using", "usually", "v", "value", "various", "versus", "very", "via", "viz", "vs", "want", "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've", "what", "whatever", "what'll", "what's", "what've", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "where's", "whereupon", "wherever", "whether", "which", "whichever", "while", "whilst", "whither", "who", "who'd", "whoever", "whole", "who'll", "whom", "whomever", "who's", "whose", "why", "will", "willing", "wish", "with", "within", "without", "wonder", "won't", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your", "you're", "yours", "yourself", "yourselves", "you've", "zero", "a", "how's", "i", "ours ", "when's", "why's", "a", "able", "b", "c", "d", "e", "f", "g", "h", "j", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "uucp", "w", "x", "y", "z", "zero", "I", "www", "amoungst", "amount", "bill", "bottom", "call", "computer", "con", "couldnt", "cry", "de", "describe", "detail", "due", "eleven", "empty", "fifteen", "fify", "fill", "find", "fire", "forty", "front", "full", "give", "hasnt", "herse”", "himse”", "interest", "itse”", "mill", "move", "myse”", "part", "put", "show", "side", "sincere", "sixty", "system", "ten", "thick", "thin", "top", "twelve", "twenty", "abst", "accordance", "act", "added", "adopted", "affected", "affecting", "affects", "ah", "announce", "anymore", "apparently", "approximately", "aren", "arent", "arise", "auth", "beginning", "beginnings", "begins", "biol", "briefly", "ca", "date", "ed", "effect", "et-al", "ff", "fix", "gave", "giving", "hed", "heres", "hes", "hid", "home", "id", "im", "immediately", "importance", "important", "index", "information", "invention", "itd", "keys", "kg", "km", "largely", "lets", "line", "'ll", "means", "mg", "million", "ml", "mug", "na", "nay", "necessarily", "nos", "noted", "obtain", "obtained", "omitted", "ord", "owing", "page", "pages", "poorly", "possibly", "potentially", "pp", "predominantly", "present", "previously", "primarily", "promptly", "proud", "quickly", "ran", "readily", "ref", "refs", "related", "research", "resulted", "resulting", "results", "run", "sec", "section", "shed", "shes", "showed", "shown", "showns", "shows", "significant", "significantly", "similar", "similarly", "slightly", "somethan", "specifically", "state", "states", "stop", "strongly", "substantially", "successfully", "sufficiently", "suggest", "thered", "thereof", "therere", "thereto", "theyd", "theyre", "thou", "thoughh", "thousand", "throug", "til", "tip", "ts", "ups", "usefully", "usefulness", "'ve", "vol", "vols", "wed", "whats", "wheres", "whim", "whod", "whos", "widely", "words", "world", "youd", "youre"]; 357 | exports.stop_words = stop_words; 358 | 359 | /***/ }, 360 | /* 8 */ 361 | /***/ function(module, exports, __webpack_require__) { 362 | 363 | "use strict"; 364 | 365 | Object.defineProperty(exports, "__esModule", { 366 | value: true 367 | }); 368 | 369 | var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })(); 370 | 371 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } 372 | 373 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 374 | 375 | var _utils = __webpack_require__(2); 376 | 377 | var _utils2 = _interopRequireDefault(_utils); 378 | 379 | exports["default"] = function (text, opts) { 380 | 381 | var method = opts.method; 382 | var cleanedText = _utils2["default"].clean(_utils2["default"].parseHtml(text, opts), opts); 383 | 384 | switch (method) { 385 | 386 | case "combined": 387 | return Methods.combineNGramsAndNamedEntities(cleanedText, opts); 388 | case "ngram": 389 | return Methods.useNGrams(cleanedText, opts); 390 | case "namedentites": 391 | return Methods.useNamedEntities(cleanedText, ops); 392 | default: 393 | return Methods.combineNGramsAndNamedEntities(cleanedText, opts); 394 | 395 | } 396 | }; 397 | 398 | var Methods = (function () { 399 | function Methods() { 400 | _classCallCheck(this, Methods); 401 | } 402 | 403 | _createClass(Methods, null, [{ 404 | key: "useNGrams", 405 | value: function useNGrams(text, opts) { 406 | return _utils2["default"].filterWords(_utils2["default"].sortNGrams(_utils2["default"].generateNGrams(text, opts), opts), opts); 407 | } 408 | }, { 409 | key: "useNamedEntities", 410 | value: function useNamedEntities(text, opts) { 411 | return _utils2["default"].useNGrams(_utils2["default"].generateNamedEntitiesString(text), opts); 412 | } 413 | }, { 414 | key: "combineNGramsAndNamedEntities", 415 | value: function combineNGramsAndNamedEntities(text, opts) { 416 | return _utils2["default"].filterWords(_utils2["default"].sortNGrams(_utils2["default"].generateNGrams(text, opts).concat(_utils2["default"].generateNGrams(_utils2["default"].generateNamedEntitiesString(text), opts)), opts), opts); 417 | } 418 | }]); 419 | 420 | return Methods; 421 | })(); 422 | 423 | module.exports = exports["default"]; 424 | 425 | /***/ }, 426 | /* 9 */ 427 | /***/ function(module, exports, __webpack_require__) { 428 | 429 | 'use strict'; 430 | 431 | Object.defineProperty(exports, '__esModule', { 432 | value: true 433 | }); 434 | exports['default'] = Config; 435 | 436 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { 'default': obj }; } 437 | 438 | var _default_config = __webpack_require__(10); 439 | 440 | var _default_config2 = _interopRequireDefault(_default_config); 441 | 442 | var _deepmerge = __webpack_require__(11); 443 | 444 | var _deepmerge2 = _interopRequireDefault(_deepmerge); 445 | 446 | function Config(opts) { 447 | return (0, _deepmerge2['default'])(_default_config2['default'], opts || {}); 448 | } 449 | 450 | module.exports = exports['default']; 451 | 452 | /***/ }, 453 | /* 10 */ 454 | /***/ function(module, exports) { 455 | 456 | 'use strict'; 457 | 458 | module.exports = { 459 | 460 | htmlTags: ['p', 'b', 'em', 'title'], 461 | method: 'combined', 462 | useDefaultStopWords: true, 463 | maxNumberOfKeywords: 10, 464 | minKeywordLength: 3, 465 | ngram: { 466 | min_count: 3, 467 | max_size: 1 468 | }, 469 | progressiveGeneration: true 470 | }; 471 | 472 | /***/ }, 473 | /* 11 */ 474 | /***/ function(module, exports, __webpack_require__) { 475 | 476 | var __WEBPACK_AMD_DEFINE_FACTORY__, __WEBPACK_AMD_DEFINE_RESULT__;(function (root, factory) { 477 | if (true) { 478 | !(__WEBPACK_AMD_DEFINE_FACTORY__ = (factory), __WEBPACK_AMD_DEFINE_RESULT__ = (typeof __WEBPACK_AMD_DEFINE_FACTORY__ === 'function' ? (__WEBPACK_AMD_DEFINE_FACTORY__.call(exports, __webpack_require__, exports, module)) : __WEBPACK_AMD_DEFINE_FACTORY__), __WEBPACK_AMD_DEFINE_RESULT__ !== undefined && (module.exports = __WEBPACK_AMD_DEFINE_RESULT__)); 479 | } else if (typeof exports === 'object') { 480 | module.exports = factory(); 481 | } else { 482 | root.deepmerge = factory(); 483 | } 484 | }(this, function () { 485 | 486 | return function deepmerge(target, src) { 487 | var array = Array.isArray(src); 488 | var dst = array && [] || {}; 489 | 490 | if (array) { 491 | target = target || []; 492 | dst = dst.concat(target); 493 | src.forEach(function(e, i) { 494 | if (typeof dst[i] === 'undefined') { 495 | dst[i] = e; 496 | } else if (typeof e === 'object') { 497 | dst[i] = deepmerge(target[i], e); 498 | } else { 499 | if (target.indexOf(e) === -1) { 500 | dst.push(e); 501 | } 502 | } 503 | }); 504 | } else { 505 | if (target && typeof target === 'object') { 506 | Object.keys(target).forEach(function (key) { 507 | dst[key] = target[key]; 508 | }) 509 | } 510 | Object.keys(src).forEach(function (key) { 511 | if (typeof src[key] !== 'object' || !src[key]) { 512 | dst[key] = src[key]; 513 | } 514 | else { 515 | if (!target[key]) { 516 | dst[key] = src[key]; 517 | } else { 518 | dst[key] = deepmerge(target[key], src[key]); 519 | } 520 | } 521 | }); 522 | } 523 | 524 | return dst; 525 | } 526 | 527 | })); 528 | 529 | 530 | /***/ } 531 | /******/ ]); --------------------------------------------------------------------------------