├── .gitignore ├── .npmignore ├── README.md ├── index.js ├── package.json ├── raw_dict ├── data.adj ├── data.adv ├── data.noun └── data.verb └── setup.js /.gitignore: -------------------------------------------------------------------------------- 1 | wordnet.dict 2 | 3 | # Created by .ignore support plugin (hsz.mobi) 4 | ### Node template 5 | # Logs 6 | logs 7 | *.log 8 | 9 | # Runtime data 10 | pids 11 | *.pid 12 | *.seed 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 21 | .grunt 22 | 23 | # node-waf configuration 24 | .lock-wscript 25 | 26 | # Compiled binary addons (http://nodejs.org/api/addons.html) 27 | build/Release 28 | 29 | # Dependency directory 30 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 31 | node_modules 32 | 33 | .idea 34 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Node template 3 | # Logs 4 | logs 5 | *.log 6 | 7 | # Runtime data 8 | pids 9 | *.pid 10 | *.seed 11 | 12 | # Directory for instrumented libs generated by jscoverage/JSCover 13 | lib-cov 14 | 15 | # Coverage directory used by tools like istanbul 16 | coverage 17 | 18 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 19 | .grunt 20 | 21 | # node-waf configuration 22 | .lock-wscript 23 | 24 | # Compiled binary addons (http://nodejs.org/api/addons.html) 25 | build/Release 26 | 27 | # Dependency directory 28 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 29 | node_modules 30 | 31 | .idea 32 | 33 | #The raw dictionary used to generate the sqlite database (http://wordnet.princeton.edu/wordnet/download/current-version/) 34 | raw_dict 35 | 36 | setup.js -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WordNet SQLite 2 | 3 | A node package exposing an SQLite database of the Princeton University WordNet database 4 | 5 | ## Installation 6 | 7 | Just run 8 | ```javascript 9 | npm install wordnet-sqlite 10 | ``` 11 | 12 | Note that installing the module will install the SQLite3 module as a dependency, which requires compilation using node-gyp, 13 | so a working toolchain is required to install this module. 14 | 15 | ## API 16 | 17 | On requiring the module, an instance of an [sqlite3](https://github.com/mapbox/node-sqlite3) client is created and 18 | connected to the local WordNet database. This client is then returned, and can be used according to the 19 | [sqlite3 API](https://github.com/mapbox/node-sqlite3/wiki/API#databaseclosecallback).The returned object is an instance 20 | of *Database*, so methods like #run and #foreach can be called directly from it. 21 | 22 | Currently the database consists of only one table called `words`, which has the following columns: 23 | 24 | * `word`. A `text` field that contains the dictionary word in its most basic form (without a prefix or suffix) i.e. 25 | *child* will appear but not *children*. Note that spaces are replaced with underscores, e.g. *out_of_the_way*. 26 | For further information, have a look at the [WordNet documentation](http://wordnet.princeton.edu/wordnet/) 27 | 28 | * `definition`. A `text` field that contains a *gloss*, a string which which may contain a definition, one or more example 29 | sentences, or both. For example the `definition` field for *implicit* is the string *being without doubt or reserve; "implicit trust"*, 30 | consisting of a definition and one example sentence. 31 | 32 | * `type`. Also a text field that contains a string indicating the type of word this row is. Either "adj", "adv", "noun", or "verb". 33 | Note that types such as conjunctions and interjections are not part of the WordNet project so are not present in the database. 34 | 35 | * `rowid`. An `integer` field created automatically by SQLite. Corresponds to the index of the word, so the first entry 36 | has a `rowid` of 1. However the words are in no particular order so this is not likely to be of any use. 37 | 38 | Here's a simple example usage of the database. See the [Example](#example) section for another example. 39 | 40 | ```javascript 41 | var db = require("wordnet-sqlite"); 42 | db.get("SELECT definition FROM words WHERE word = 'pulpy' LIMIT 1;", function (err, row) { 43 | console.log(row.definition); 44 | }); 45 | ``` 46 | Outputs: 47 | 48 | >like a pulp or overripe; not having stiffness 49 | 50 | ## Example 51 | 52 | Here's a bot I wrote to email people random compliments (well actually they're more like insults): 53 | ```javascript 54 | var nodemailer = require('nodemailer'); 55 | var db = require("wordnet-sqlite"); 56 | 57 | // create reusable transporter object using SMTP transport 58 | var transporter = nodemailer.createTransport({ 59 | service: 'Gmail', 60 | auth: { 61 | user: '', 62 | pass: 'mypassword' 63 | } 64 | }); 65 | 66 | db.get("SELECT * FROM words WHERE type = 'adj' ORDER BY RANDOM() LIMIT 1;", function (err, row) { 67 | var mailOptions = { 68 | from: 'Me ', 69 | to: 'My Victim ', 70 | subject: 'You are ' + row.word, 71 | text: row.word + ": " + row.definition 72 | }; 73 | 74 | transporter.sendMail(mailOptions, function (error, info) { 75 | if (error) 76 | console.log(error); 77 | else 78 | console.log('Message sent: ' + info.response); 79 | 80 | db.close(); 81 | }); 82 | }); 83 | ``` 84 | 85 | Outputs: 86 | ![Mailer Results](http://i.imgur.com/2irUI0x.png) 87 | 88 | ## Contributions 89 | 90 | The GitHub repository contains the raw_dict directory, which contains the data.adj, data.adv, data.noun and data.verb 91 | files from the [WordNet website](http://wordnet.princeton.edu/wordnet/download/current-version/) (version 3.1). If WordNet 92 | is updated, download the new files and replace those in raw_dict, then run the setup.js script to rebuild the database. 93 | 94 | Any other contributions are welcome, especially improvements to the database schema itself. -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var sqlite3 = require('sqlite3'); 2 | var path = require("path"); 3 | 4 | var db = path.join(__dirname, 'wordnet.dict'); 5 | module.exports = new sqlite3.Database(db); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wordnet-sqlite", 3 | "description": "A node package exposing an SQLite database of the Princeton University WordNet database", 4 | "repository": { 5 | "type": "git", 6 | "url": "https://github.com/TMiguelT/wordnet-sqlite" 7 | }, 8 | "scripts":{ 9 | "prepare": "./setup.js" 10 | }, 11 | "keywords": [ 12 | "wordnet", 13 | "dictionary", 14 | "lookup", 15 | "word", 16 | "words", 17 | "word-list", 18 | "list", 19 | "en", 20 | "english", 21 | "dict", 22 | "dictionary" 23 | ], 24 | "version": "1.0.3", 25 | "dependencies": { 26 | "sqlite3": "^3.0.5" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /setup.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var sqlite3 = require('sqlite3').verbose(); 3 | var fs = require('fs'); 4 | var readline = require('readline'); 5 | 6 | //Make the database and run it serially 7 | var db = new sqlite3.Database('wordnet.dict'); 8 | db.serialize(function () { 9 | 10 | //Create the main table 11 | db.run("DROP TABLE IF EXISTS words"); 12 | db.run("CREATE TABLE words (word TEXT, definition TEXT, type TEXT)"); 13 | db.run("CREATE INDEX word_idx ON words (word ASC)"); 14 | db.run("CREATE INDEX type_idx ON words (type ASC)"); 15 | db.run("BEGIN TRANSACTION"); 16 | 17 | //Prepare the insert statement 18 | var stmt = db.prepare("INSERT INTO words VALUES (?, ?, ?)"); 19 | 20 | //For each input file 21 | var types = ["adj", "adv", "noun", "verb"]; 22 | var counter = 0; 23 | 24 | types.forEach(function (type) { 25 | 26 | //Read each line of the file 27 | var rl = readline.createInterface({input: fs.createReadStream('raw_dict/data.' + type)}); 28 | 29 | var rows = 0; 30 | 31 | //Find the relevant variables and insert them 32 | rl.on('line', function (line) { 33 | //Skip the comment lines 34 | if (line.substr(0, 2) === " ") 35 | return; 36 | 37 | //Split the line to find relevant variables 38 | var sections = line.split(/\s+\|\s+/); 39 | var cols = sections[0].split(/\s/); 40 | var words = cols 41 | .filter(col => col.match(/^[^\d!"#$%&'()\*\+\-\.,\/:;<=>?@\[\\\]^_`{|}~]/gm)) // doesn't start with number or special letter 42 | .filter(col => col.length > 1); // has two or more charactors 43 | 44 | //Preserve cols[4] which always has a vaild meaning 45 | if(words.indexOf(cols[4]) === -1){ 46 | words.push(cols[4]) 47 | } 48 | 49 | words.forEach(word => stmt.run(word, sections[1], type)); 50 | rows++; 51 | }); 52 | 53 | rl.on('close', function () { 54 | counter++; 55 | if (counter >= types.length) { 56 | stmt.finalize(()=>{ 57 | db.run("END"); 58 | db.exec("VACUUM"); 59 | db.close(); 60 | }); 61 | } 62 | }); 63 | }); 64 | }); 65 | --------------------------------------------------------------------------------