├── .gitignore ├── tests ├── .lev_history ├── indexing-tests.js └── word-phoneme-map-tests.js ├── Makefile ├── index.js ├── collect-words-from-phoneme-subtree.js ├── package.json ├── forward-phoneme-map.js ├── reverse-phoneme-map.js ├── word-phoneme-map.js ├── forward-phoneme-indexer.js ├── reverse-phoneme-indexer.js ├── index-words-and-phonemes.js ├── phoneme-indexer.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | tests/test.db 4 | -------------------------------------------------------------------------------- /tests/.lev_history: -------------------------------------------------------------------------------- 1 | get !p 2 | gte !p 3 | limit 20 4 | ls 5 | keys 6 | ls 7 | rm gte 8 | ls 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | test: 2 | node tests/indexing-tests.js 3 | node tests/word-phoneme-map-tests.js 4 | 5 | pushall: 6 | git push origin master && npm publish 7 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var indexWordsAndPhonemes = require('./index-words-and-phonemes'); 2 | var createWordPhonemeMap = require('./word-phoneme-map'); 3 | 4 | module.exports = { 5 | setUpDatabase: indexWordsAndPhonemes, 6 | createWordPhonemeMap: createWordPhonemeMap 7 | }; 8 | -------------------------------------------------------------------------------- /collect-words-from-phoneme-subtree.js: -------------------------------------------------------------------------------- 1 | function collectWordsFromPhonemeSubtree(subtree) { 2 | var collected = []; 3 | var currentNodes = [subtree]; 4 | var nextNodes = []; 5 | 6 | while (currentNodes.length > 0) { 7 | currentNodes.forEach(visitCurrentNode); 8 | currentNodes = nextNodes.slice(); 9 | nextNodes.length = 0; 10 | } 11 | 12 | function visitCurrentNode(node) { 13 | if (node) { 14 | if (node.value && node.value.words) { 15 | collected = collected.concat(node.value.words); 16 | } 17 | if (node.children) { 18 | nextNodes = nextNodes.concat(node.children); 19 | } 20 | } 21 | } 22 | 23 | return collected; 24 | } 25 | 26 | module.exports = collectWordsFromPhonemeSubtree; 27 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "word-phoneme-map", 3 | "version": "1.4.0", 4 | "description": "Provides a two-way map between the words and phonemes listed in the CMU Pronouncing Dictionary.", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "make test" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git@github.com:jimkang/word-phoneme-map.git" 12 | }, 13 | "keywords": [ 14 | "phoneme", 15 | "word" 16 | ], 17 | "author": "Jim Kang", 18 | "license": "MIT", 19 | "bugs": { 20 | "url": "https://github.com/jimkang/word-phoneme-map/issues" 21 | }, 22 | "homepage": "https://github.com/jimkang/word-phoneme-map", 23 | "devDependencies": { 24 | "rimraf": "^2.3.4", 25 | "tape": "^3.0.3" 26 | }, 27 | "dependencies": { 28 | "basic-level-tree": "^1.2.1", 29 | "basic-subleveler": "^1.2.0", 30 | "call-next-tick": "^1.1.2", 31 | "export-methods": "^1.1.0", 32 | "level": "^1.1.0", 33 | "phoneme-types": "^1.2.2", 34 | "queue-async": "^1.0.7", 35 | "split": "^1.0.0" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /tests/indexing-tests.js: -------------------------------------------------------------------------------- 1 | var test = require('tape'); 2 | var indexWordsAndPhonemes = require('../index-words-and-phonemes'); 3 | var createWordPhonemeMap = require('../word-phoneme-map'); 4 | var fs = require('fs'); 5 | var rimraf = require('rimraf'); 6 | var callNextTick = require('call-next-tick'); 7 | 8 | var indexOpts = { 9 | dbLocation: __dirname + '/test.db', 10 | numberOfLinesToIndex: 6000 11 | }; 12 | 13 | rimraf.sync(indexOpts.dbLocation); 14 | 15 | function setUpMiniIndex(done) { 16 | if (fs.existsSync(indexOpts.dbLocation)) { 17 | callNextTick(done); 18 | } 19 | else { 20 | indexWordsAndPhonemes(indexOpts, done); 21 | } 22 | } 23 | 24 | test('Try map without db', function noDb(t) { 25 | t.plan(1); 26 | 27 | t.throws(createMapWithNoDb); 28 | 29 | function createMapWithNoDb() { 30 | createWordPhonemeMap({ 31 | dbLocation: null 32 | }); 33 | } 34 | }); 35 | 36 | test('Index', function indexTest(t) { 37 | t.plan(2); 38 | setUpMiniIndex(checkDb); 39 | 40 | function checkDb(error) { 41 | t.ok(!error, 'No error occurred while indexing.'); 42 | t.ok(fs.existsSync(indexOpts.dbLocation), 'Database file was created.'); 43 | } 44 | }); 45 | 46 | -------------------------------------------------------------------------------- /forward-phoneme-map.js: -------------------------------------------------------------------------------- 1 | var createLevelTree = require('basic-level-tree'); 2 | var collectWordsFromPhonemeSubtree = require('./collect-words-from-phoneme-subtree'); 3 | 4 | function createForwardPhonemeMap(opts, createDone) { 5 | var db; 6 | var root; 7 | 8 | if (opts) { 9 | db = opts.db; 10 | } 11 | 12 | if (!db) { 13 | createDone(new Error('Cannot create forward phonemes map without db.')); 14 | return; 15 | } 16 | 17 | var levelTree = createLevelTree( 18 | { 19 | db: db, 20 | treeName: 'forward-phonemes' 21 | }, 22 | passBackMethod 23 | ); 24 | 25 | function passBackMethod(error, levelTreeRoot) { 26 | if (error) { 27 | createDone(error); 28 | } 29 | else { 30 | root = levelTreeRoot; 31 | createDone(error, wordsForPhonemeStartSequence); 32 | } 33 | } 34 | 35 | function wordsForPhonemeStartSequence(phonemesInOrder, done) { 36 | root.getSubtreeAtPath(phonemesInOrder, gatherWords); 37 | 38 | function gatherWords(error, subtree) { 39 | if (error) { 40 | done(error); 41 | } 42 | else { 43 | var words = collectWordsFromPhonemeSubtree(subtree); 44 | if (words) { 45 | done(error, words); 46 | } 47 | else { 48 | done(error); 49 | } 50 | } 51 | } 52 | } 53 | } 54 | 55 | module.exports = createForwardPhonemeMap; 56 | -------------------------------------------------------------------------------- /reverse-phoneme-map.js: -------------------------------------------------------------------------------- 1 | var createLevelTree = require('basic-level-tree'); 2 | var collectWordsFromPhonemeSubtree = require('./collect-words-from-phoneme-subtree'); 3 | 4 | function createReversePhonemeMap(opts, createDone) { 5 | var db; 6 | var root; 7 | 8 | if (opts) { 9 | db = opts.db; 10 | } 11 | 12 | if (!db) { 13 | createDone(new Error('Cannot create reverse phonemes map without db.')); 14 | return; 15 | } 16 | 17 | var levelTree = createLevelTree( 18 | { 19 | db: db, 20 | treeName: 'reverse-phonemes' 21 | }, 22 | passBackMethod 23 | ); 24 | 25 | function passBackMethod(error, levelTreeRoot) { 26 | if (error) { 27 | createDone(error); 28 | } 29 | else { 30 | root = levelTreeRoot; 31 | createDone(error, wordsForPhonemeEndSequence); 32 | } 33 | } 34 | 35 | function wordsForPhonemeEndSequence(phonemesInOrder, done) { 36 | root.getSubtreeAtPath(phonemesInOrder.slice().reverse(), gatherWords); 37 | 38 | function gatherWords(error, subtree) { 39 | if (error) { 40 | done(error); 41 | } 42 | else { 43 | var words = collectWordsFromPhonemeSubtree(subtree); 44 | if (words) { 45 | done(error, words); 46 | } 47 | else { 48 | done(error); 49 | } 50 | } 51 | } 52 | } 53 | } 54 | 55 | module.exports = createReversePhonemeMap; 56 | -------------------------------------------------------------------------------- /word-phoneme-map.js: -------------------------------------------------------------------------------- 1 | var basicSubleveler = require('basic-subleveler'); 2 | var level = require('level'); 3 | var createReversePhonemeMap = require('./reverse-phoneme-map'); 4 | var createForwardPhonemeMap = require('./forward-phoneme-map'); 5 | var queue = require('queue-async'); 6 | 7 | function createWordPhonemeMap(opts, createDone) { 8 | if (!opts || !opts.dbLocation) { 9 | createDone(new Error('Cannot create wordPhonemeMap without dbLocation.')); 10 | return; 11 | } 12 | 13 | var db = level( 14 | opts.dbLocation, 15 | { 16 | valueEncoding: 'json' 17 | } 18 | ); 19 | 20 | var db = basicSubleveler.setUpSubleveledDB({ 21 | db: db, 22 | sublevels: { 23 | words: 'w', 24 | phonemes: 'p' 25 | } 26 | }); 27 | 28 | var mapOpts = { 29 | db: db 30 | }; 31 | 32 | var q = queue(); 33 | q.defer(createReversePhonemeMap, mapOpts); 34 | q.defer(createForwardPhonemeMap, mapOpts); 35 | q.await(passBackMethods); 36 | 37 | function passBackMethods( 38 | error, wordsForPhonemeEndSequence, wordsForPhonemeStartSequence) { 39 | 40 | if (error) { 41 | createDone(error); 42 | } 43 | else { 44 | createDone( 45 | error, 46 | { 47 | wordsForPhonemeSequence: wordsForPhonemeSequence, 48 | phonemeSequencesForWord: phonemeSequencesForWord, 49 | wordsForPhonemeEndSequence: wordsForPhonemeEndSequence, 50 | wordsForPhonemeStartSequence: wordsForPhonemeStartSequence, 51 | close: db.close.bind(db) 52 | } 53 | ); 54 | } 55 | } 56 | 57 | function wordsForPhonemeSequence(sequence, done) { 58 | var sequenceString = sequence.join('_'); 59 | var seqLevel = db.phonemes.sublevel(sequenceString); 60 | basicSubleveler.readAllValuesFromSublevel(seqLevel, done); 61 | } 62 | 63 | function phonemeSequencesForWord(word, done) { 64 | var wordLevel = db.words.sublevel(word); 65 | basicSubleveler.readAllValuesFromSublevel(wordLevel, done); 66 | } 67 | } 68 | 69 | module.exports = createWordPhonemeMap; 70 | -------------------------------------------------------------------------------- /forward-phoneme-indexer.js: -------------------------------------------------------------------------------- 1 | var createLevelTree = require('basic-level-tree'); 2 | var callNextTick = require('call-next-tick'); 3 | 4 | function createForwardPhonemeIndexer(opts, createDone) { 5 | var db; 6 | var root; 7 | 8 | if (opts) { 9 | db = opts.db; 10 | } 11 | 12 | if (!db) { 13 | createDone(new Error('Cannot create forward indexer without db.')); 14 | return; 15 | } 16 | 17 | var levelTree = createLevelTree( 18 | { 19 | db: db, 20 | treeName: 'forward-phonemes', 21 | root: { 22 | name: 'START' 23 | } 24 | }, 25 | passBackMethod 26 | ); 27 | 28 | function passBackMethod(error, levelTreeRoot) { 29 | if (error) { 30 | createDone(error); 31 | } 32 | else { 33 | root = levelTreeRoot; 34 | createDone(error, indexWordByForwardPhonemes); 35 | } 36 | } 37 | 38 | function indexWordByForwardPhonemes(word, phonemesInOrder, done) { 39 | mapToTree(root, word, phonemesInOrder, done); 40 | } 41 | 42 | // Maps the phonemes to the tree, using one node for each phoneme. When it 43 | // gets to the last phoneme, it stores the word in that node. 44 | function mapToTree(node, word, phonemes, done) { 45 | if (phonemes.length < 1) { 46 | callNextTick(done); 47 | } 48 | else { 49 | node.addChildIfNotThere( 50 | { 51 | value: { 52 | name: phonemes[0] 53 | }, 54 | equalityFn: nodeNamesAreEqual 55 | }, 56 | updateChild 57 | ); 58 | } 59 | 60 | function updateChild(error, child) { 61 | if (error) { 62 | done(error); 63 | } 64 | else if (phonemes.length === 1) { 65 | if (child.value.words === undefined) { 66 | child.value.words = []; 67 | } 68 | child.value.words.push(word); 69 | child.save(done); 70 | } 71 | else { 72 | mapToTree(child, word, phonemes.slice(1), done); 73 | } 74 | } 75 | } 76 | 77 | return indexWordByForwardPhonemes; 78 | } 79 | 80 | function nodeNamesAreEqual(a, b) { 81 | return a.name === b.name; 82 | } 83 | 84 | module.exports = createForwardPhonemeIndexer; 85 | -------------------------------------------------------------------------------- /reverse-phoneme-indexer.js: -------------------------------------------------------------------------------- 1 | var createLevelTree = require('basic-level-tree'); 2 | var callNextTick = require('call-next-tick'); 3 | 4 | function createReversePhonemeIndexer(opts, createDone) { 5 | var db; 6 | var root; 7 | 8 | if (opts) { 9 | db = opts.db; 10 | } 11 | 12 | if (!db) { 13 | createDone(new Error('Cannot create reverse indexer without db.')); 14 | return; 15 | } 16 | 17 | var levelTree = createLevelTree( 18 | { 19 | db: db, 20 | treeName: 'reverse-phonemes', 21 | root: { 22 | name: 'END' 23 | } 24 | }, 25 | passBackMethod 26 | ); 27 | 28 | function passBackMethod(error, levelTreeRoot) { 29 | if (error) { 30 | createDone(error); 31 | } 32 | else { 33 | root = levelTreeRoot; 34 | createDone(error, indexWordByReversePhonemes); 35 | } 36 | } 37 | 38 | function indexWordByReversePhonemes(word, phonemesInOrder, done) { 39 | mapToTree(root, word, phonemesInOrder.slice().reverse(), done); 40 | } 41 | 42 | // Maps the phonemes to the tree, using one node for each phoneme. When it 43 | // gets to the last phoneme, it stores the word in that node. 44 | function mapToTree(node, word, phonemes, done) { 45 | if (phonemes.length < 1) { 46 | callNextTick(done); 47 | } 48 | else { 49 | node.addChildIfNotThere( 50 | { 51 | value: { 52 | name: phonemes[0] 53 | }, 54 | equalityFn: nodeNamesAreEqual 55 | }, 56 | updateChild 57 | ); 58 | } 59 | 60 | function updateChild(error, child) { 61 | if (error) { 62 | done(error); 63 | } 64 | else if (phonemes.length === 1) { 65 | if (child.value.words === undefined) { 66 | child.value.words = []; 67 | } 68 | child.value.words.push(word); 69 | child.save(done); 70 | } 71 | else { 72 | mapToTree(child, word, phonemes.slice(1), done); 73 | } 74 | } 75 | } 76 | 77 | return indexWordByReversePhonemes; 78 | } 79 | 80 | function nodeNamesAreEqual(a, b) { 81 | return a.name === b.name; 82 | } 83 | 84 | module.exports = createReversePhonemeIndexer; 85 | -------------------------------------------------------------------------------- /index-words-and-phonemes.js: -------------------------------------------------------------------------------- 1 | var callNextTick = require('call-next-tick'); 2 | var fs = require('fs'); 3 | var split = require('split'); 4 | var queue = require('queue-async'); 5 | var createPhonemeIndexer = require('./phoneme-indexer'); 6 | var Writable = require('stream').Writable; 7 | 8 | function indexWordsAndPhonemes(opts, done) { 9 | createPhonemeIndexer( 10 | { 11 | dbLocation: opts.dbLocation 12 | }, 13 | startIndexing 14 | ); 15 | 16 | function startIndexing(error, indexer) { 17 | if (error) { 18 | done(error); 19 | return; 20 | } 21 | 22 | var q = queue(4); 23 | var linesIndexed = 0; 24 | var readStream = fs.createReadStream(__dirname + '/data/cmudict.0.7a'); 25 | var lineStream = split(); 26 | var indexStream = Writable({ 27 | objectMode: true 28 | }); 29 | indexStream._write = writeChunkToIndex; 30 | 31 | readStream.pipe(lineStream); 32 | lineStream.pipe(indexStream); 33 | 34 | lineStream.on('end', cleanUp); 35 | 36 | function writeChunkToIndex(chunk, enc, callback) { 37 | if (opts.numberOfLinesToIndex === undefined || 38 | linesIndexed < opts.numberOfLinesToIndex) { 39 | 40 | linesIndexed += 1; 41 | indexLine(chunk, callback) 42 | } 43 | else { 44 | callback(); 45 | } 46 | } 47 | 48 | function indexLine(line, indexDone) { 49 | if (!line || line.indexOf(';;;') === 0) { 50 | indexDone(); 51 | return; 52 | } 53 | 54 | var pieces = line.split(' '); 55 | if (pieces.length < 2) { 56 | indexDone(); 57 | return; 58 | } 59 | 60 | var word = pieces[0]; 61 | var phonemeString = pieces[1]; 62 | 63 | if (stringIsValid(word) && stringIsValid(phonemeString)) { 64 | indexer.index(word, phonemeString, indexDone); 65 | } 66 | else { 67 | // It is not an error if the line is not parseable. 68 | callNextTick(indexDone); 69 | } 70 | } 71 | 72 | function cleanUp(error) { 73 | indexer.closeDb(passError); 74 | 75 | function passError() { 76 | done(error); 77 | } 78 | } 79 | } 80 | } 81 | 82 | function stringIsValid(s) { 83 | return (typeof s === 'string' && s.length > 0); 84 | } 85 | 86 | module.exports = indexWordsAndPhonemes; 87 | -------------------------------------------------------------------------------- /phoneme-indexer.js: -------------------------------------------------------------------------------- 1 | var queue = require('queue-async'); 2 | var level = require('level'); 3 | var basicSubleveler = require('basic-subleveler'); 4 | var phonemeTypes = require('phoneme-types'); 5 | var callNextTick = require('call-next-tick'); 6 | var createReversePhonemeIndexer = require('./reverse-phoneme-indexer'); 7 | var createForwardPhonemeIndexer = require('./forward-phoneme-indexer') 8 | var queue = require('queue-async'); 9 | 10 | function createPhonemeIndexer(opts, done) { 11 | var indexWordByReversePhonemes; 12 | var indexWordByForwardPhonemes; 13 | 14 | var db = level( 15 | opts.dbLocation, 16 | { 17 | valueEncoding: 'json' 18 | } 19 | ); 20 | 21 | var db = basicSubleveler.setUpSubleveledDB({ 22 | db: db, 23 | sublevels: { 24 | words: 'w', 25 | phonemes: 'p' 26 | } 27 | }); 28 | 29 | var specialIndexerOpts = { 30 | db: db 31 | }; 32 | 33 | var specialIndexerQueue = queue(); 34 | specialIndexerQueue.defer(createReversePhonemeIndexer, specialIndexerOpts); 35 | specialIndexerQueue.defer(createForwardPhonemeIndexer, specialIndexerOpts); 36 | specialIndexerQueue.await(passBackMethods); 37 | 38 | function passBackMethods(error, reverseIndexMethod, forwardIndexMethod) { 39 | if (error) { 40 | done(error); 41 | } 42 | else { 43 | indexWordByReversePhonemes = reverseIndexMethod; 44 | indexWordByForwardPhonemes = forwardIndexMethod; 45 | 46 | var indexerMethods = { 47 | index: index, 48 | closeDb: db.close.bind(db) 49 | }; 50 | done(error, indexerMethods); 51 | } 52 | } 53 | 54 | function index(word, cmuDictPhonemeString, done) { 55 | var phonemeString = phonemeTypes.stripStressor(cmuDictPhonemeString); 56 | var phonemes = phonemeString.split(' '); 57 | phonemeString = phonemes.join('_'); 58 | 59 | if (stringIsEmpty(word)) { 60 | callNextTick(done, new Error('Missing word.')); 61 | return; 62 | } 63 | if (stringIsEmpty(phonemeString)) { 64 | callNextTick(done, new Error('Missing phonemeString.')); 65 | return; 66 | } 67 | 68 | var q = queue(); 69 | 70 | // Index by word. 71 | var cleanedWord = stripOrdinal(word); 72 | var wordLevel = db.words.sublevel(cleanedWord); 73 | q.defer(wordLevel.put, phonemeString, phonemes); 74 | 75 | // Index by phoneme string. 76 | var phonemeLevel = db.phonemes.sublevel(phonemeString); 77 | 78 | q.defer(phonemeLevel.put, cleanedWord, cleanedWord); 79 | 80 | // Reverse index. 81 | q.defer(indexWordByReversePhonemes, cleanedWord, phonemes); 82 | // Forward index. 83 | q.defer(indexWordByForwardPhonemes, cleanedWord, phonemes); 84 | 85 | q.awaitAll(done); 86 | } 87 | } 88 | 89 | function stringIsEmpty(s) { 90 | return (typeof s !== 'string' || s.length < 1); 91 | } 92 | 93 | var ordinalRegex = /\(\d\)/; 94 | 95 | function stripOrdinal(word) { 96 | return word.replace(ordinalRegex, ''); 97 | } 98 | 99 | 100 | module.exports = createPhonemeIndexer; 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | word-phoneme-map 2 | ================== 3 | 4 | Provides a two-way map between the words and phonemes listed in the [CMU Pronouncing Dictionary](http://www.speech.cs.cmu.edu/cgi-bin/cmudict). You can look up words that correspond to phoneme sequences and look up phoneme sequences that correspond to words. 5 | 6 | Installation 7 | ------------ 8 | 9 | npm install word-phoneme-map 10 | 11 | Usage 12 | ----- 13 | 14 | Sometime before you use the map, you need to call `setUpDatabase` to set up a database, like so: 15 | 16 | var setUpDatabase = require('word-phoneme-map').setUpDatabase; 17 | setUpDatabase( 18 | { 19 | dbLocation: __dirname + '/where-you-want-it/a-word-phoneme.db' 20 | }, 21 | done 22 | ); 23 | 24 | function done(error) { 25 | if (error) { 26 | console.log(error) 27 | } 28 | else { 29 | console.log('Successfully set up database.'); 30 | } 31 | } 32 | 33 | You only need to do this once. 34 | 35 | From then on, you can use the map like this: 36 | 37 | var createWordPhonemeMap = require('word-phoneme-map').createWordPhonemeMap; 38 | var wordPhonemeMap = createWordPhonemeMap({ 39 | dbLocation: __dirname + '/where-you-want-it-/a-word-phoneme.db' 40 | }); 41 | 42 | wordPhonemeMap.wordsForPhonemeSequence(['AA', 'R', 'K'], showWords); 43 | 44 | function showWords(error, words) { 45 | if (error) { 46 | console.log(error); 47 | } 48 | else { 49 | console.log(words); 50 | } 51 | } 52 | 53 | Output: 54 | 55 | [ 56 | 'ARC', 57 | 'ARK' 58 | ] 59 | 60 | To do lookups in the opposite direction, do this: 61 | 62 | wordPhonemeMap.phonemeSequencesForWord('ARK', showPhonemes); 63 | 64 | function showPhonemes(error, phonemeSequences) { 65 | if (error) { 66 | console.log(error); 67 | } 68 | else { 69 | console.log(phonemeSequences); 70 | } 71 | } 72 | 73 | Output: 74 | 75 | [ 76 | ['AA', 'R', 'K'] 77 | ] 78 | 79 | Tests 80 | ----- 81 | 82 | Run tests with `make test`. 83 | 84 | About CMUdict (the Carnegie Mellon Pronouncing Dictionary) 85 | ---------------------------------------------------------- 86 | 87 | "[It is a free pronouncing dictionary of English](http://www.speech.cs.cmu.edu/cgi-bin/cmudict), suitable for uses in speech 88 | technology and is maintained by the Speech Group in the School of 89 | Computer Science at Carnegie Mellon University." 90 | 91 | "The Carnegie Mellon Pronouncing Dictionary, in its current and 92 | previous versions is Copyright (C) 1993-2014 by Carnegie Mellon 93 | University. Use of this dictionary for any research or commercial 94 | purpose is completely unrestricted. If you make use of or 95 | redistribute this material we request that you acknowledge its 96 | origin in your descriptions." 97 | 98 | License 99 | ------- 100 | 101 | The MIT License (MIT) 102 | 103 | Copyright (c) 2015 Jim Kang 104 | 105 | Permission is hereby granted, free of charge, to any person obtaining a copy 106 | of this software and associated documentation files (the "Software"), to deal 107 | in the Software without restriction, including without limitation the rights 108 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 109 | copies of the Software, and to permit persons to whom the Software is 110 | furnished to do so, subject to the following conditions: 111 | 112 | The above copyright notice and this permission notice shall be included in 113 | all copies or substantial portions of the Software. 114 | 115 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 116 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 117 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 118 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 119 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 120 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 121 | THE SOFTWARE. 122 | -------------------------------------------------------------------------------- /tests/word-phoneme-map-tests.js: -------------------------------------------------------------------------------- 1 | // Depends on indexing-tests having been run first, unfortunately. 2 | 3 | var test = require('tape'); 4 | var createWordPhonemeMap = require('../word-phoneme-map'); 5 | var callNextTick = require('call-next-tick'); 6 | 7 | var dbLocation = __dirname + '/test.db'; 8 | 9 | test('Create and use map', function fullPhonemeSequenceMatch(t) { 10 | var expectedWordsForSequences = [ 11 | { 12 | sequences: [ 13 | ['AA', 'R', 'K'] 14 | ], 15 | endWords: ['ARC', 'ARK'] 16 | }, 17 | { 18 | sequences: [ 19 | ['AE', 'B', 'N', 'AO', 'R', 'M', 'AH', 'L', 'IY'] 20 | ], 21 | endWords: ['ABNORMALLY'] 22 | }, 23 | { 24 | sequences: [ 25 | ['EY', 'B', 'AH', 'L', 'ER'], 26 | ['EY', 'B', 'L', 'ER'] 27 | ], 28 | endWords: ['ABLER'] 29 | } 30 | ]; 31 | 32 | 33 | t.plan(expectedWordsForSequences.length * 4 + 2); 34 | 35 | createWordPhonemeMap( 36 | { 37 | dbLocation: dbLocation 38 | }, 39 | useMap 40 | ); 41 | 42 | function useMap(error, wordPhonemeMap) { 43 | t.ok(!error, 'No error while creating map.'); 44 | 45 | expectedWordsForSequences.forEach(runWordsForSequenceTest); 46 | 47 | function runWordsForSequenceTest(pair) { 48 | wordPhonemeMap.wordsForPhonemeSequence(pair.sequences[0], checkWords); 49 | 50 | function checkWords(error, words) { 51 | t.ok(!error, 'No error occured while looking for words.'); 52 | t.deepEqual(words, pair.endWords, 'Expected words are returned.'); 53 | } 54 | } 55 | 56 | expectedWordsForSequences.forEach(runSequencesForWordsTest); 57 | 58 | function runSequencesForWordsTest(pair) { 59 | wordPhonemeMap.phonemeSequencesForWord(pair.endWords[0], checkSequences); 60 | 61 | function checkSequences(error, sequences) { 62 | t.ok(!error, 'No error occured while looking for sequence.'); 63 | t.deepEqual( 64 | sequences, pair.sequences, 'Expected sequence is returned.' 65 | ); 66 | } 67 | } 68 | 69 | wordPhonemeMap.close(checkClose); 70 | } 71 | 72 | function checkClose(error) { 73 | t.ok(!error, 'Database closes successfully.'); 74 | } 75 | }); 76 | 77 | var expectedWordsForSequences = [ 78 | { 79 | sequence: ['AA', 'R', 'K'], 80 | endWords: ['ARC', 'ARK', 'AARDVARK', '?QUESTION-MARK'], 81 | startWords: ['ARC', 'ARK', 'ARCHEY', 'ARCO', 'ARKO', 'ARCS', 'ARX', 'ARCADE', 'ARCANE', 'ARCHERD', 'ARKIN', 'ARCHIVE', 'ARCO\'S', 'ARCOS', 'ARKOSE', 'ARKLA', 'ARQUETTE', 'ARCADES', 'ARKADY', 'ARCHAIC', 'ARCANA', 'ARCAND', 'ARKADI', 'ARKAROW', 'ARCARO', 'ARCATA', 'ARCTIC', 'ARKIN\'S', 'ARCHIVES', 'ARCOLA', 'ARCURI', 'ARKLA\'S', 'ARKWRIGHT', 'ARCADIA', 'ARCANUM', 'ARKANSAS', 'ARCTURUS', 'ARCHETYPE', 'ARCHIVAL', 'ARQUILLA', 'ARCADIAN', 'ARCHANGEL', 'ARKANSAN', 'ARCANUM\'S', 'ARKANSAS\'', 'ARKANSAS\'S', 'ARCHITECT', 'ARCHITRAVE', 'ARCHIVIST', 'ARCHETYPAL', 'ARCTURUS', 'ARCHETYPES', 'ARCHIVIST', 'ARCHULETA', 'ARCHULETTA', 'ARCADIANS', 'ARCHANGELS', 'ARKANSANS', 'ARCHIMEDES', 'ARCHITECT\'S', 'ARCHITECTS', 'ARCHITECTURE', 'ARCHITRAVES', 'ARCHIVISTS', 'ARCOSANTI', 'ARCHAEOLOGY', 'ARCHEOLOGY', 'ARCHIVISTS', 'ARCHIPELAGO', 'ARCHITECTURE\'S', 'ARCHITECTURES', 'ARKADELPHIA', 'ARCHITECTURAL', 'ARCHAEOLOGIST', 'ARCHAEOLOGICAL', 'ARCHEOLOGICAL', 'ARCHAEOLOGISTS', 'ARCHITECTURALLY', 'ARCHITECTURALLY'] 82 | }, 83 | { 84 | sequence: ['AH', 'L', 'IY'], 85 | endWords: ['ALLEE', 'AMALIE', 'ACTUALLY', 'ANOMALY', 'ACTUALLY', 'ANNUALLY', 'ANGRILY', 'ARTFULLY', 'ABYSMALLY', 'ADDITIONALLY', 'ABNORMALLY', 'ADDITIONALLY', 'ANECDOTALLY', 'ANECDOTALLY', 'ACCIDENTALLY', 'ARTIFICIALLY', 'ANENCEPHALY', 'ACCIDENTALLY', 'ARBITRARILY', 'AGRICULTURALLY', 'ARCHITECTURALLY', 'AGRICULTURALLY', 'ARCHITECTURALLY'], 86 | startWords: ['ALLEE', 'ALEEN', 'ALENE', 'ALEVE', 'ALIYAH', 'ALEDO', 'ALINA', 'ALETHA', 'ALISA', 'ALITO', 'ALENIA', 'ALERIA', 'ALLEVIATE', 'ALLEGIANCE', 'ALLEVIATES', 'ALLEVIATED', 'ALLEVIATED', 'ALLEVIATING', 'ALLEVIATION', 'ALLEGIANCES'] 87 | }, 88 | { 89 | sequence: ['L', 'ER'], 90 | endWords: ['AILOR', 'ALLER', 'ALLOR', 'ABLER', 'ADLER', 'AGLER', 'ABLER', 'AMBLER', 'ANDLER', 'ANGLER', 'AKSLER', 'AMSLER', 'ANTLER', 'ANNULAR', 'ALACHLOR', 'ALTSCHILLER', 'ALTSCHULER', 'ALTSHULER', 'ALVEOLAR', 'ANGULAR', 'ALTSCHULER', 'ALTSHULER', 'APPENZELLER'], 91 | startWords: [] 92 | }, 93 | { 94 | sequence: ['AA', 'AA'], 95 | endWords: [], 96 | startWords: [] 97 | } 98 | ]; 99 | 100 | expectedWordsForSequences.forEach(runReverseMatchTest); 101 | 102 | function runReverseMatchTest(pair) { 103 | test('Partial matching from end', function matchingFromEnd(t) { 104 | t.plan(6); 105 | 106 | createWordPhonemeMap( 107 | { 108 | dbLocation: dbLocation 109 | }, 110 | useMap 111 | ); 112 | 113 | function useMap(error, wordPhonemeMap) { 114 | t.ok(!error, 'No error while creating map.'); 115 | 116 | var backwardWordsChecked = false; 117 | var forwardWordsChecked = false; 118 | 119 | wordPhonemeMap.wordsForPhonemeEndSequence( 120 | pair.sequence, checkWords 121 | ); 122 | wordPhonemeMap.wordsForPhonemeStartSequence( 123 | pair.sequence, checkStartWords 124 | ); 125 | 126 | function checkWords(error, words) { 127 | // console.log('words!', words); 128 | t.ok(!error, 'No error occured while looking for words.'); 129 | t.deepEqual(words, pair.endWords, 'Expected words are returned.'); 130 | 131 | backwardWordsChecked = true; 132 | closeIfChecksAreDone(); 133 | } 134 | 135 | function checkStartWords(error, words) { 136 | // console.log('words!', words); 137 | t.ok(!error, 'No error occured while looking for words.'); 138 | t.deepEqual(words, pair.startWords, 'Expected words are returned.'); 139 | 140 | forwardWordsChecked = true; 141 | closeIfChecksAreDone(); 142 | } 143 | 144 | function closeIfChecksAreDone() { 145 | if (backwardWordsChecked && forwardWordsChecked) { 146 | wordPhonemeMap.close(checkClose); 147 | } 148 | } 149 | } 150 | 151 | function checkClose(error) { 152 | t.ok(!error, 'Database closes successfully.'); 153 | } 154 | }); 155 | } 156 | --------------------------------------------------------------------------------