├── 1.jpg ├── 2.jpg ├── package.json ├── index.js ├── README.md ├── LICENSE ├── .gitignore ├── crawler.js └── words.txt /1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yelexin/english-words-notes/HEAD/1.jpg -------------------------------------------------------------------------------- /2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yelexin/english-words-notes/HEAD/2.jpg -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dict", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "license": "ISC", 12 | "dependencies": { 13 | "puppeteer": "^1.7.0" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const go = require('./crawler') 2 | const fs = require('fs') 3 | 4 | fs.readFile('./words.txt', async (err, buff) => { 5 | if (err) throw err 6 | let data = buff.toString() 7 | data = data.replace(/\r/g, '') 8 | let arr = data.split('\n') 9 | for (let i = 0; i < arr.length; i++) { 10 | let res = await go(arr[i]) 11 | fs.writeFile('./new.txt', arr[i] + '\n' + res + '\n------------------------------------------------\n',{flag: 'a+'}, function (err) { 12 | if (err) console.log(err) 13 | }) 14 | } 15 | }) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 运行效果 2 | 3 | #### 使用前 4 | ![使用前](https://github.com/yelexin/english-words-notes/blob/master/1.jpg) 5 | 6 | #### 使用后 7 | ![使用后](https://github.com/yelexin/english-words-notes/blob/master/2.jpg) 8 | 9 | ### 本程序可以做什么 10 | 将成批的英语单词从词典中查出释义、音标、例句等内容 11 | 12 | ### 如何使用 13 | 在程序目录下创建 words.txt,内容为需要查询的英语单词,一行一个。 14 | node index.js 15 | 16 | 查询结果将在 new.txt 中给出 17 | 18 | ### 注意事项 19 | 本程序使用无头浏览器在有道词典中查询,查询的内容为柯林斯词典内容。使用了最简单的实现方式(注意不是最好),实现方式效率较低,查询可能花费时间较长。 20 | 21 | 输出的 new.txt 中的换行符为 `\n` 而不是 `\r\n`。words.txt 中的换行符无所谓,程序会将其中的`\r`删掉。如果结果 new.txt 无法正常显示换行符,windows 用户请自行转换 22 | 23 | 本程序可能最近不会更新 24 | 25 | ### 为什么不直接用 get 请求做爬虫? 26 | 27 | get 到的释义结果页面 html 中存在大量空白字符,处理较为麻烦。不如 innerText 来的快。 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ye Lexin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # TypeScript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | # next.js build output 61 | .next 62 | 63 | .idea -------------------------------------------------------------------------------- /crawler.js: -------------------------------------------------------------------------------- 1 | const puppeteer = require('puppeteer') 2 | 3 | function go(word) { 4 | return new Promise(async function (resolve, reject) { 5 | try { 6 | let browser = await puppeteer.launch({ 7 | executablePath: '/usr/bin/chromium-browser', 8 | headless: true, 9 | }) 10 | let page = await browser.newPage() 11 | await page.goto('http://dict.youdao.com/w/' + word) 12 | let txt = await page.evaluate(() => { 13 | let m = [] 14 | let i = 1 15 | let collins = document.querySelector('.collinsToggle') 16 | if (collins) { 17 | for (let j = 0; j < collins.children.length; j++) { 18 | collins.children[j].classList.remove('wt-collapse') 19 | } 20 | return collins.innerText 21 | } else { 22 | if (document.querySelector('#phrsListTab > div > ul > li')) { 23 | return document.querySelector('#phrsListTab > div > ul > li').innerText 24 | 25 | } else { 26 | return 'no' 27 | } 28 | } 29 | }) 30 | browser.close() 31 | resolve(txt) 32 | } catch (e) { 33 | console.log('oops', e) 34 | } 35 | }) 36 | } 37 | 38 | module.exports = go -------------------------------------------------------------------------------- /words.txt: -------------------------------------------------------------------------------- 1 | abduction 2 | adulteress 3 | aglow 4 | alas 5 | assimilate 6 | austere 7 | ballad 8 | balustrade 9 | banmishment 10 | beak 11 | behold 12 | blister 13 | bluff 14 | blunder 15 | blush 16 | boon 17 | braced 18 | brag 19 | braid 20 | brethren 21 | bristly 22 | brooding 23 | brusque 24 | burrow 25 | cairns 26 | caste 27 | chalice 28 | chasten 29 | chinless 30 | chucked 31 | churning 32 | clad 33 | clamoring 34 | clash 35 | clasp 36 | coarse 37 | colossal 38 | compunction 39 | comrade 40 | concession 41 | conclave 42 | conquest 43 | conspired 44 | contempt 45 | contemptuous 46 | copse 47 | crab 48 | crackling 49 | craggy 50 | craven 51 | creases 52 | creek 53 | crescent 54 | crested 55 | cringe 56 | crust 57 | culling 58 | cunning 59 | cursory 60 | curt 61 | dangled 62 | debark encampment 63 | decimating 64 | decreed 65 | deft 66 | deftly 67 | derision 68 | desultory 69 | disdain 70 | disheartening 71 | disregard 72 | diversion 73 | dominion 74 | doughty 75 | drained 76 | draped 77 | emanation 78 | embroidered 79 | enameled 80 | encased 81 | endaevor 82 | engraved 83 | epitath 84 | eradicate 85 | erratic 86 | espied 87 | falsehood 88 | faltered 89 | farce 90 | fealty 91 | fend 92 | fidgeting 93 | fig 94 | flagon 95 | flanked 96 | flared 97 | flaunt 98 | fluttered 99 | foe 100 | foul 101 | frail 102 | fulrry 103 | futile 104 | gaily 105 | garb 106 | garbed 107 | garnish 108 | gashes 109 | gleamed 110 | glided 111 | glimmer 112 | glob 113 | gorged 114 | gout 115 | gouty 116 | grimace 117 | grimly 118 | grit 119 | groped 120 | growled 121 | gruffly 122 | grumbled 123 | guffawed 124 | gust 125 | guttered 126 | hallow 127 | hamper 128 | hauled 129 | hedges 130 | herbs 131 | hilt 132 | hobbling 133 | honed 134 | hooting 135 | illiterates 136 | impoverished 137 | impregnable 138 | impugning 139 | incinerate 140 | incredulous 141 | incubation 142 | indigo 143 | infiltrate 144 | insurgent 145 | intimate 146 | intoned 147 | irritation 148 | jammed 149 | jape 150 | jerked 151 | jest 152 | juncture 153 | jut 154 | kitten 155 | lame 156 | lance 157 | lash 158 | latent 159 | leech 160 | lick 161 | lining 162 | lithely 163 | lobstered 164 | lounged 165 | lunged 166 | lurched 167 | lure 168 | manhood 169 | mantle 170 | manure 171 | mare 172 | meekly 173 | merrily 174 | mettle 175 | millennia 176 | mole 177 | mortal 178 | motley 179 | muzzle 180 | nibbled 181 | notch 182 | notoriously 183 | oaf 184 | oak 185 | on 186 | onslaught 187 | orbit 188 | ornament 189 | outpost 190 | overpowering 191 | overween 192 | pacity 193 | pack...through 194 | parched 195 | parry 196 | peeved 197 | pelting 198 | pennons 199 | perished 200 | petulant 201 | pholegm 202 | pimples 203 | pitchers 204 | plaintively 205 | plastered 206 | plinth 207 | plucked 208 | plump 209 | pock 210 | poised 211 | poked 212 | pommel 213 | ponderously 214 | potent 215 | preeminent 216 | prevail 217 | profane 218 | prudent 219 | psionic 220 | quarried 221 | quell 222 | quivered 223 | rack 224 | rally 225 | rampage 226 | rampent 227 | ranger 228 | rash 229 | rattling 230 | ravage 231 | ravenous 232 | reconnoiter 233 | recruits 234 | reincarnation 235 | relent 236 | remission 237 | renegade 238 | reprisal 239 | reptile 240 | retainers 241 | retribution 242 | reverence 243 | robe 244 | rogue 245 | rookery 246 | sabotage 247 | sapphires 248 | sarcasm 249 | scarcely 250 | sconces 251 | scour 252 | scowl 253 | scrambling 254 | scrubbed 255 | sentiment 256 | sheared 257 | shed 258 | shiverd 259 | shrieked 260 | shrill 261 | shrubs 262 | shudder 263 | shuddered 264 | silos 265 | sire 266 | skirling 267 | slap 268 | slit 269 | smacked 270 | smoldering 271 | smuggle 272 | snap 273 | snatched 274 | snorted 275 | snot-nose 276 | solace 277 | somber 278 | soothed 279 | sorely 280 | spare 281 | spasm 282 | spat 283 | speckled 284 | splinted 285 | sprigs 286 | sprouted 287 | sputtered 288 | squinted 289 | squirted 290 | stag 291 | staggered 292 | staggering 293 | stallion 294 | stalwart 295 | statuary 296 | stirring 297 | stoop 298 | stout 299 | strap 300 | stray 301 | studs 302 | stunted 303 | submission 304 | subside 305 | succumb 306 | suitors 307 | superstitious 308 | swine 309 | swirled 310 | swiveled 311 | taint 312 | tangled 313 | tarnish 314 | templar 315 | tenacity 316 | terrace 317 | thorny 318 | throb 319 | thrust 320 | tiara 321 | timid 322 | tin 323 | torso 324 | tottered 325 | tracery 326 | trailing 327 | treachery 328 | trickle 329 | trout 330 | tugged 331 | tumble 332 | turnip 333 | twitch 334 | twitched 335 | vaguely 336 | vanquish 337 | vaunt 338 | veined 339 | velvet 340 | verdict 341 | vicious 342 | vigil 343 | vintage 344 | viper 345 | visor 346 | wanton 347 | warily 348 | warp 349 | weakling 350 | weary 351 | wedge 352 | wheezed 353 | whetstone 354 | whiskers 355 | whoop 356 | wile 357 | wincing 358 | windswept 359 | woe 360 | wraiths 361 | wrenched 362 | wretch 363 | wry 364 | --------------------------------------------------------------------------------