├── .gitattributes ├── .gitignore ├── .jshintrc ├── LICENSE-MIT ├── README.md ├── index.js ├── lib └── words.js ├── package.json └── words.json /.gitattributes: -------------------------------------------------------------------------------- 1 | # Enforce Unix newlines 2 | *.* text eol=lf 3 | *.css text eol=lf 4 | *.html text eol=lf 5 | *.js text eol=lf 6 | *.json text eol=lf 7 | *.less text eol=lf 8 | *.md text eol=lf 9 | *.yml text eol=lf 10 | 11 | *.jpg binary 12 | *.gif binary 13 | *.png binary 14 | *.jpeg binary -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Numerous always-ignore extensions 2 | *.csv 3 | *.dat 4 | *.diff 5 | *.err 6 | *.gz 7 | *.log 8 | *.orig 9 | *.out 10 | *.pid 11 | *.rej 12 | *.seed 13 | *.swo 14 | *.swp 15 | *.vi 16 | *.yo-rc.json 17 | *.zip 18 | *~ 19 | .ruby-version 20 | lib-cov 21 | 22 | # OS or Editor folders 23 | *.esproj 24 | *.sublime-project 25 | *.sublime-workspace 26 | ._* 27 | .cache 28 | .DS_Store 29 | .idea 30 | .project 31 | .settings 32 | .tmproj 33 | nbproject 34 | Thumbs.db 35 | 36 | # Komodo 37 | *.komodoproject 38 | .komodotools 39 | 40 | # grunt-html-validation 41 | validation-status.json 42 | validation-report.json 43 | 44 | # Vendor packages 45 | node_modules 46 | bower_components 47 | vendor 48 | 49 | # General folders and files to ignore 50 | _gh_pages 51 | tmp 52 | temp 53 | TODO.md -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "esnext": true, 3 | "boss": true, 4 | "curly": true, 5 | "eqeqeq": true, 6 | "eqnull": true, 7 | "immed": true, 8 | "latedef": true, 9 | "newcap": true, 10 | "noarg": true, 11 | "node": true, 12 | "sub": true, 13 | "undef": true, 14 | "unused": true, 15 | "globals": { 16 | "define": true, 17 | "before": true, 18 | "after": true, 19 | "describe": true, 20 | "it": true 21 | } 22 | } -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Jon Schlinkert, contributors. 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # common-words [![NPM version](https://badge.fury.io/js/common-words.png)](http://badge.fury.io/js/common-words) 2 | 3 | > Updated list (JSON) of the 100 most common words in the english language. Useful for excluding these words from arrays. 4 | 5 | From 6 | 7 | Example: 8 | 9 | ```js 10 | [ 11 | { 12 | "rank": "1", 13 | "word": "the" 14 | }, 15 | { 16 | "rank": "2", 17 | "word": "be" 18 | }, 19 | { 20 | "rank": "3", 21 | "word": "to" 22 | }, 23 | ... 24 | ] 25 | ``` 26 | 27 | ## Install 28 | Install with [npm](npmjs.org): 29 | 30 | ```bash 31 | npm i common-words --save-dev 32 | ``` 33 | 34 | ## Usage 35 | 36 | ```js 37 | var common = require('common-words'); 38 | 39 | function removeCommonWords(words, common) { 40 | common.forEach(function(obj) { 41 | var word = obj.word; 42 | while (words.indexOf(word) !== -1) { 43 | words.splice(words.indexOf(word), 1); 44 | } 45 | }); 46 | return words; 47 | }; 48 | removeCommonWords(yourWords, common); 49 | ``` 50 | 51 | ## Author 52 | 53 | **Jon Schlinkert** 54 | 55 | * [github/jonschlinkert](https://github.com/jonschlinkert) 56 | * [twitter/jonschlinkert](https://twitter.com/jonschlinkert) 57 | 58 | 59 | ## License 60 | Copyright (c) 2014 Jon Schlinkert, contributors. 61 | Released under the MIT license 62 | 63 | *** 64 | 65 | _This file was generated by [verb-cli](https://github.com/assemble/verb-cli) on April 13, 2014._ -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./words.json'); -------------------------------------------------------------------------------- /lib/words.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const path = require('path'); 4 | const log = require('verbalize'); 5 | const cheerio = require('cheerio'); 6 | const request = require('request'); 7 | const file = require('fs-utils'); 8 | 9 | log.runner = 'repos'; 10 | 11 | var options = { 12 | url: 'https://en.wikipedia.org/wiki/Most_common_words_in_English', 13 | headers: { 14 | 'User-Agent': 'request' 15 | } 16 | }; 17 | 18 | log.writeln(); 19 | log.inform('reading', options.url); 20 | 21 | var dest = 'words.json'; 22 | 23 | function callback(err, response, body) { 24 | if (!err && response.statusCode === 200) { 25 | var $ = cheerio.load(body); 26 | var content = ''; 27 | 28 | // Iterate over TR elements in the Wikipedia infobox 29 | $("table.wikitable tr").each(function (i, ele) { 30 | content += $(this).find("td").text(); 31 | content += '\n'; 32 | }); 33 | 34 | var words = []; 35 | content.replace(/([\d]+)([\S]+)/g, function(match, num, word) { 36 | words = words.concat({ 37 | rank: num, 38 | word: word 39 | }); 40 | }); 41 | 42 | file.writeJSONSync(dest, words); 43 | 44 | dest = path.relative(process.cwd(), dest).replace(/\\/g, '/'); 45 | log.inform('writing', dest); 46 | } else { 47 | log.error(err); 48 | } 49 | 50 | // Success message. 51 | log.done('done'); 52 | } 53 | request(options, callback); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "common-words", 3 | "description": "Updated list (JSON) of the 100 most common words in the English language. Useful for excluding these words from arrays.", 4 | "version": "0.1.3", 5 | "homepage": "https://github.com/jonschlinkert/common-words", 6 | "author": { 7 | "name": "Jon Schlinkert", 8 | "url": "https://github.com/jonschlinkert" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git://github.com/jonschlinkert/common-words.git" 13 | }, 14 | "bugs": { 15 | "url": "https://github.com/jonschlinkert/common-words/issues" 16 | }, 17 | "licenses": [ 18 | { 19 | "type": "MIT", 20 | "url": "https://github.com/jonschlinkert/common-words/blob/master/LICENSE-MIT" 21 | } 22 | ], 23 | "keywords": [ 24 | "words", 25 | "common", 26 | "english" 27 | ], 28 | "main": "index.js", 29 | "engines": { 30 | "node": ">=0.8" 31 | }, 32 | "scripts": { 33 | "test": "mocha -R test" 34 | }, 35 | "devDependencies": { 36 | "verb": "~0.2.0" 37 | }, 38 | "devDependencies": { 39 | "cheerio": "~0.15.0", 40 | "fs-utils": "~0.4.0", 41 | "request": "~2.34.0", 42 | "verbalize": "~0.1.2" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /words.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "rank": "1", 4 | "word": "the" 5 | }, 6 | { 7 | "rank": "2", 8 | "word": "be" 9 | }, 10 | { 11 | "rank": "3", 12 | "word": "to" 13 | }, 14 | { 15 | "rank": "4", 16 | "word": "of" 17 | }, 18 | { 19 | "rank": "5", 20 | "word": "and" 21 | }, 22 | { 23 | "rank": "6", 24 | "word": "a" 25 | }, 26 | { 27 | "rank": "7", 28 | "word": "in" 29 | }, 30 | { 31 | "rank": "8", 32 | "word": "that" 33 | }, 34 | { 35 | "rank": "9", 36 | "word": "have" 37 | }, 38 | { 39 | "rank": "10", 40 | "word": "I" 41 | }, 42 | { 43 | "rank": "11", 44 | "word": "it" 45 | }, 46 | { 47 | "rank": "12", 48 | "word": "for" 49 | }, 50 | { 51 | "rank": "13", 52 | "word": "not" 53 | }, 54 | { 55 | "rank": "14", 56 | "word": "on" 57 | }, 58 | { 59 | "rank": "15", 60 | "word": "with" 61 | }, 62 | { 63 | "rank": "16", 64 | "word": "he" 65 | }, 66 | { 67 | "rank": "17", 68 | "word": "as" 69 | }, 70 | { 71 | "rank": "18", 72 | "word": "you" 73 | }, 74 | { 75 | "rank": "19", 76 | "word": "do" 77 | }, 78 | { 79 | "rank": "20", 80 | "word": "at" 81 | }, 82 | { 83 | "rank": "21", 84 | "word": "this" 85 | }, 86 | { 87 | "rank": "22", 88 | "word": "but" 89 | }, 90 | { 91 | "rank": "23", 92 | "word": "his" 93 | }, 94 | { 95 | "rank": "24", 96 | "word": "by" 97 | }, 98 | { 99 | "rank": "25", 100 | "word": "from" 101 | }, 102 | { 103 | "rank": "26", 104 | "word": "they" 105 | }, 106 | { 107 | "rank": "27", 108 | "word": "we" 109 | }, 110 | { 111 | "rank": "28", 112 | "word": "say" 113 | }, 114 | { 115 | "rank": "29", 116 | "word": "her" 117 | }, 118 | { 119 | "rank": "30", 120 | "word": "she" 121 | }, 122 | { 123 | "rank": "31", 124 | "word": "or" 125 | }, 126 | { 127 | "rank": "32", 128 | "word": "an" 129 | }, 130 | { 131 | "rank": "33", 132 | "word": "will" 133 | }, 134 | { 135 | "rank": "34", 136 | "word": "my" 137 | }, 138 | { 139 | "rank": "35", 140 | "word": "one" 141 | }, 142 | { 143 | "rank": "36", 144 | "word": "all" 145 | }, 146 | { 147 | "rank": "37", 148 | "word": "would" 149 | }, 150 | { 151 | "rank": "38", 152 | "word": "there" 153 | }, 154 | { 155 | "rank": "39", 156 | "word": "their" 157 | }, 158 | { 159 | "rank": "40", 160 | "word": "what" 161 | }, 162 | { 163 | "rank": "41", 164 | "word": "so" 165 | }, 166 | { 167 | "rank": "42", 168 | "word": "up" 169 | }, 170 | { 171 | "rank": "43", 172 | "word": "out" 173 | }, 174 | { 175 | "rank": "44", 176 | "word": "if" 177 | }, 178 | { 179 | "rank": "45", 180 | "word": "about" 181 | }, 182 | { 183 | "rank": "46", 184 | "word": "who" 185 | }, 186 | { 187 | "rank": "47", 188 | "word": "get" 189 | }, 190 | { 191 | "rank": "48", 192 | "word": "which" 193 | }, 194 | { 195 | "rank": "49", 196 | "word": "go" 197 | }, 198 | { 199 | "rank": "50", 200 | "word": "me" 201 | }, 202 | { 203 | "rank": "51", 204 | "word": "when" 205 | }, 206 | { 207 | "rank": "52", 208 | "word": "make" 209 | }, 210 | { 211 | "rank": "53", 212 | "word": "can" 213 | }, 214 | { 215 | "rank": "54", 216 | "word": "like" 217 | }, 218 | { 219 | "rank": "55", 220 | "word": "time" 221 | }, 222 | { 223 | "rank": "56", 224 | "word": "no" 225 | }, 226 | { 227 | "rank": "57", 228 | "word": "just" 229 | }, 230 | { 231 | "rank": "58", 232 | "word": "him" 233 | }, 234 | { 235 | "rank": "59", 236 | "word": "know" 237 | }, 238 | { 239 | "rank": "60", 240 | "word": "take" 241 | }, 242 | { 243 | "rank": "61", 244 | "word": "people" 245 | }, 246 | { 247 | "rank": "62", 248 | "word": "into" 249 | }, 250 | { 251 | "rank": "63", 252 | "word": "year" 253 | }, 254 | { 255 | "rank": "64", 256 | "word": "your" 257 | }, 258 | { 259 | "rank": "65", 260 | "word": "good" 261 | }, 262 | { 263 | "rank": "66", 264 | "word": "some" 265 | }, 266 | { 267 | "rank": "67", 268 | "word": "could" 269 | }, 270 | { 271 | "rank": "68", 272 | "word": "them" 273 | }, 274 | { 275 | "rank": "69", 276 | "word": "see" 277 | }, 278 | { 279 | "rank": "70", 280 | "word": "other" 281 | }, 282 | { 283 | "rank": "71", 284 | "word": "than" 285 | }, 286 | { 287 | "rank": "72", 288 | "word": "then" 289 | }, 290 | { 291 | "rank": "73", 292 | "word": "now" 293 | }, 294 | { 295 | "rank": "74", 296 | "word": "look" 297 | }, 298 | { 299 | "rank": "75", 300 | "word": "only" 301 | }, 302 | { 303 | "rank": "76", 304 | "word": "come" 305 | }, 306 | { 307 | "rank": "77", 308 | "word": "its" 309 | }, 310 | { 311 | "rank": "78", 312 | "word": "over" 313 | }, 314 | { 315 | "rank": "79", 316 | "word": "think" 317 | }, 318 | { 319 | "rank": "80", 320 | "word": "also" 321 | }, 322 | { 323 | "rank": "81", 324 | "word": "back" 325 | }, 326 | { 327 | "rank": "82", 328 | "word": "after" 329 | }, 330 | { 331 | "rank": "83", 332 | "word": "use" 333 | }, 334 | { 335 | "rank": "84", 336 | "word": "two" 337 | }, 338 | { 339 | "rank": "85", 340 | "word": "how" 341 | }, 342 | { 343 | "rank": "86", 344 | "word": "our" 345 | }, 346 | { 347 | "rank": "87", 348 | "word": "work" 349 | }, 350 | { 351 | "rank": "88", 352 | "word": "first" 353 | }, 354 | { 355 | "rank": "89", 356 | "word": "well" 357 | }, 358 | { 359 | "rank": "90", 360 | "word": "way" 361 | }, 362 | { 363 | "rank": "91", 364 | "word": "even" 365 | }, 366 | { 367 | "rank": "92", 368 | "word": "new" 369 | }, 370 | { 371 | "rank": "93", 372 | "word": "want" 373 | }, 374 | { 375 | "rank": "94", 376 | "word": "because" 377 | }, 378 | { 379 | "rank": "95", 380 | "word": "any" 381 | }, 382 | { 383 | "rank": "96", 384 | "word": "these" 385 | }, 386 | { 387 | "rank": "97", 388 | "word": "give" 389 | }, 390 | { 391 | "rank": "98", 392 | "word": "day" 393 | }, 394 | { 395 | "rank": "99", 396 | "word": "most" 397 | }, 398 | { 399 | "rank": "100", 400 | "word": "us" 401 | } 402 | ] --------------------------------------------------------------------------------