├── .editorconfig ├── .github └── workflows │ └── main.yml ├── .gitignore ├── .npmrc ├── .prettierignore ├── build.js ├── index.js ├── license ├── package.json ├── readme.md ├── test.js └── tsconfig.json /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | on: 3 | - pull_request 4 | - push 5 | jobs: 6 | main: 7 | name: '${{matrix.node}} on ${{matrix.os}}' 8 | runs-on: ${{matrix.os}} 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: dcodeIO/setup-node-nvm@master 12 | with: 13 | node-version: ${{matrix.node}} 14 | - run: npm install 15 | - run: npm test 16 | strategy: 17 | matrix: 18 | os: 19 | - ubuntu-latest 20 | - windows-latest 21 | node: 22 | - lts/erbium 23 | - node 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.log 3 | node_modules/ 4 | yarn.lock 5 | *.d.ts 6 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | *.md 2 | index.js 3 | -------------------------------------------------------------------------------- /build.js: -------------------------------------------------------------------------------- 1 | import {writeFile} from 'node:fs' 2 | import {request} from 'node:https' 3 | import concat from 'concat-stream' 4 | import {bail} from 'bail' 5 | 6 | const endpoint = 7 | 'https://raw.githubusercontent.com/cmusphinx/cmudict/master/cmudict.dict' 8 | 9 | request(endpoint, onrequest).end() 10 | 11 | /** 12 | * 13 | * @param {import("http").IncomingMessage} response 14 | */ 15 | function onrequest(response) { 16 | response.pipe(concat(onconcat)).on('error', bail) 17 | } 18 | 19 | /** 20 | * 21 | * @param {Buffer} buffer 22 | */ 23 | function onconcat(buffer) { 24 | const words = {} 25 | 26 | for (const d of String(buffer).split('\n')) { 27 | const space = d.indexOf(' ') 28 | 29 | if (space !== -1) { 30 | words[d.slice(0, space)] = d.slice(space + 1) 31 | } 32 | } 33 | 34 | writeFile( 35 | 'index.js', 36 | '/** @type {{ [word: string]: string }} */\nexport const dictionary = ' + 37 | JSON.stringify(words, null, 2) + 38 | '\n', 39 | bail 40 | ) 41 | } 42 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2015 Zeke Sikelianos 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cmu-pronouncing-dictionary", 3 | "version": "3.0.0", 4 | "description": "The 134,000+ words and their pronunciations in the CMU pronouncing dictionary", 5 | "license": "ISC", 6 | "keywords": [ 7 | "english", 8 | "language", 9 | "words", 10 | "arpabet", 11 | "transcription", 12 | "spelling", 13 | "cmu", 14 | "dictionary", 15 | "rhyme" 16 | ], 17 | "repository": "words/cmu-pronouncing-dictionary", 18 | "bugs": "https://github.com/words/cmu-pronouncing-dictionary/issues", 19 | "author": "Zeke Sikelianos (http://zeke.sikelianos.com)", 20 | "contributors": [ 21 | "Zeke Sikelianos (http://zeke.sikelianos.com)", 22 | "Titus Wormer (https://wooorm.com)", 23 | "Christian Murphy " 24 | ], 25 | "type": "module", 26 | "main": "index.js", 27 | "types": "index.d.ts", 28 | "files": [ 29 | "index.js", 30 | "index.d.ts" 31 | ], 32 | "devDependencies": { 33 | "@types/tape": "^4.0.0", 34 | "bail": "^2.0.0", 35 | "concat-stream": "^2.0.0", 36 | "prettier": "^2.0.0", 37 | "remark-cli": "^9.0.0", 38 | "remark-preset-wooorm": "^8.0.0", 39 | "rimraf": "^3.0.2", 40 | "tape": "^5.2.2", 41 | "type-coverage": "^2.0.0", 42 | "typescript": "~4.3.0", 43 | "xo": "^0.40.0" 44 | }, 45 | "scripts": { 46 | "generate": "node build", 47 | "format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", 48 | "test-api": "node test", 49 | "test-types": "rimraf \"*.d.ts\" && tsc && type-coverage", 50 | "test": "npm run format && npm run test-api && npm run test-types", 51 | "prepublishOnly": "npm run generate && npm run test" 52 | }, 53 | "prettier": { 54 | "tabWidth": 2, 55 | "useTabs": false, 56 | "singleQuote": true, 57 | "bracketSpacing": false, 58 | "semi": false, 59 | "trailingComma": "none" 60 | }, 61 | "xo": { 62 | "prettier": true, 63 | "ignore": [ 64 | "index.js" 65 | ] 66 | }, 67 | "remarkConfig": { 68 | "plugins": [ 69 | "preset-wooorm" 70 | ] 71 | }, 72 | "typeCoverage": { 73 | "atLeast": 100, 74 | "detail": true, 75 | "strict": true 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # `cmu-pronouncing-dictionary` 2 | 3 | [![Build][build-badge]][build] 4 | [![Downloads][downloads-badge]][downloads] 5 | [![Size][size-badge]][size] 6 | 7 | The 134,000+ words and their pronunciations in the CMU pronouncing dictionary. 8 | 9 | > The CMU Pronouncing Dictionary (also known as cmudict) is a public domain 10 | > pronouncing dictionary created by Carnegie Mellon University (CMU). 11 | > It defines a mapping from English words to their North American 12 | > pronunciations, and is commonly used in speech processing applications. 13 | 14 | Crawled from [`cmusphinx/cmudict`][cmudict]. 15 | 16 | ## Install 17 | 18 | This package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c): 19 | Node 12+ is needed to use it and it must be `import`ed instead of `require`d. 20 | 21 | [npm][]: 22 | 23 | ```sh 24 | npm install cmu-pronouncing-dictionary 25 | ``` 26 | 27 | ## Use 28 | 29 | ```js 30 | import { dictionary } from 'cmu-pronouncing-dictionary' 31 | 32 | console.log(dictionary) 33 | ``` 34 | 35 | Yields: 36 | 37 | ```js 38 | { 39 | a: 'AH0', 40 | 'a(1)': 'EY1', 41 | "a's": 'EY1 Z', 42 | 'a.': 'EY1', 43 | "a.'s": 'EY1 Z', 44 | 'a.s': 'EY1 Z', 45 | a42128: 'EY1 F AO1 R T UW1 W AH1 N T UW1 EY1 T', 46 | aa: 'EY2 EY1', 47 | aaa: 'T R IH2 P AH0 L EY1', 48 | aaberg: 'AA1 B ER0 G', 49 | aachen: 'AA1 K AH0 N', 50 | aachener: 'AA1 K AH0 N ER0', 51 | aah: 'AA1', 52 | aaker: 'AA1 K ER0', 53 | aaliyah: 'AA2 L IY1 AA2', 54 | aalseth: 'AA1 L S EH0 TH', 55 | aamodt: 'AA1 M AH0 T', 56 | aancor: 'AA1 N K AO2 R', 57 | // …and many more 58 | } 59 | ``` 60 | 61 | ## API 62 | 63 | This package exports the following identifiers: `dictionary`. 64 | There is no default export. 65 | 66 | ### `dictionary` 67 | 68 | `Object.` — Map of English words to [ARPABET][] phonetic transcription 69 | codes. 70 | 71 | Note that sometimes there are multiple possible pronunciations. 72 | Those are represented as `$word($counter)`, like so: 73 | 74 | ```js 75 | { 76 | // … 77 | "unnatural": "AH0 N N AE1 CH ER0 AH0 L", 78 | "unnaturally": "AH0 N N AE1 CH ER0 AH0 L IY0", 79 | "unnaturally(2)": "AH0 N N AE1 CH ER0 L IY0", 80 | "unnaturally(3)": "AH0 N AE1 CH ER0 L IY0", 81 | "unnaturally(4)": "AH0 N N AE1 CH R AH0 L IY0", 82 | "unnecessarily": "AH0 N N EH1 S AH0 S EH2 R AH0 L IY0", 83 | "unnecessary": "AH0 N N EH1 S AH0 S EH2 R IY0", 84 | // … 85 | } 86 | ``` 87 | 88 | ## License 89 | 90 | [ISC][license] © [Zeke Sikelianos][author] 91 | 92 | 93 | 94 | [build-badge]: https://img.shields.io/travis/words/cmu-pronouncing-dictionary.svg 95 | 96 | [build]: https://travis-ci.org/words/cmu-pronouncing-dictionary 97 | 98 | [downloads-badge]: https://img.shields.io/npm/dm/cmu-pronouncing-dictionary.svg 99 | 100 | [downloads]: https://www.npmjs.com/package/cmu-pronouncing-dictionary 101 | 102 | [size-badge]: https://img.shields.io/bundlephobia/minzip/cmu-pronouncing-dictionary.svg 103 | 104 | [size]: https://bundlephobia.com/result?p=cmu-pronouncing-dictionary 105 | 106 | [npm]: https://docs.npmjs.com/cli/install 107 | 108 | [license]: license 109 | 110 | [author]: http://zeke.sikelianos.com 111 | 112 | [cmudict]: https://github.com/cmusphinx/cmudict 113 | 114 | [arpabet]: https://en.wikipedia.org/wiki/ARPABET 115 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | import test from 'tape' 2 | import {dictionary as cmu} from './index.js' 3 | 4 | test('cmu-pronouncing-dictionary', (t) => { 5 | t.ok(typeof cmu === 'object', 'should be an object') 6 | 7 | t.ok(typeof cmu.fun === 'string', 'there should be a fun,') 8 | t.ok(typeof cmu.perilous === 'string', 'perilous,') 9 | t.ok(typeof cmu.monkey === 'string', 'monkey.') 10 | t.equal(cmu.bought, 'B AA1 T', 'should have arpabet transcriptions') 11 | t.ok(Object.keys(cmu).length > 130_000, 'should have loads of words') 12 | 13 | t.end() 14 | }) 15 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["*.js", "lib/**/*.js"], 3 | "compilerOptions": { 4 | "target": "ES2020", 5 | "lib": ["ES2020"], 6 | "module": "ES2020", 7 | "moduleResolution": "node", 8 | "allowJs": true, 9 | "checkJs": true, 10 | "declaration": true, 11 | "emitDeclarationOnly": true, 12 | "allowSyntheticDefaultImports": true 13 | } 14 | } 15 | --------------------------------------------------------------------------------